Skip to content

Commit

Permalink
fixed bugs in retrieving s3 private results, uploading results, and s…
Browse files Browse the repository at this point in the history
…qs message timeouts

Signed-off-by: Duncan Ragsdale <[email protected]>
  • Loading branch information
Thistleman committed Dec 6, 2023
1 parent af1801f commit 32cdd7f
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 9 deletions.
29 changes: 29 additions & 0 deletions delete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from solardatatools.dataio import load_cassandra_data
from solardatatools import DataHandler
from statistical_clear_sky import SCSF
from dask.distributed import Client, SSHCluster
import dask

ec2_ips = ["172.31.26.35", "172.31.28.129"]
password="slacgismo"
pem = "~/.pem/dask-ssh-ed25519.pem"

cluster = SSHCluster(
ec2_ips,
connect_options={"known_hosts": None, "client_host_keys": pem},
worker_options={"nthreads": 2, "memory_limit":'15GiB'}
)
client = Client(cluster, processes=False)

@dask.delayed
def pull_and_run(site_id):
df = load_cassandra_data(site_id, cluster_ip="54.176.95.208")
dh = DataHandler(df, convert_to_ts=True)
dh.run_pipeline(power_col='ac_power_01')
return dh.report(return_values=True)

results = []
site_ids = ["TAAJ01021775", "001C4B0008A5", "TABG01081601"]
for si in site_ids:
results.append(pull_and_run(si))
dask.compute(results)
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,24 @@
"file_name",
"run_time",
"data_requirements",
"mean_absolute_error_time_series",
"mean_absolute_error",
"data_sampling_frequency",
"issue"
],
"plots": [
{
"type": "histogram",
"x_val": "mean_absolute_error_time_series",
"x_val": "mean_absolute_error",
"color_code": "issue",
"title": "Time Series MAE Distribution by Issue",
"save_file_path": "mean_absolute_error_time_series_dist.png"
"save_file_path": "mean_absolute_error_dist.png"
},
{
"type": "histogram",
"x_val": "mean_absolute_error_time_series",
"x_val": "mean_absolute_error",
"color_code": "data_sampling_frequency",
"title": "Time Series MAE Distribution by Sampling Frequency",
"save_file_path": "mean_absolute_error_time_series_dist.png"
"save_file_path": "mean_absolute_error_dist.png"
},
{
"type": "histogram",
Expand Down
13 changes: 11 additions & 2 deletions valhub/submissions/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,17 @@ def get_submission_results(request, submission_id):
status=status.HTTP_500_INTERNAL_SERVER_ERROR)
file_list = response.json()
else:
storage = default_storage
_, file_list = storage.listdir(results_directory)
# get the list of files in the results directory
s3 = boto3.client('s3')
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=results_directory)
if response['KeyCount'] == 0:
return JsonResponse({"error": "No files found in the results directory"}, status=status.HTTP_404_NOT_FOUND)
# remove the first entry if it is the same as results_directory
if response['Contents'][0]['Key'] == results_directory:
file_list = [file['Key'] for file in response['Contents'][1:]]
else:
file_list = [file['Key'] for file in response['Contents']]


png_files = [file for file in file_list if file.lower().endswith(".png")]

Expand Down
4 changes: 2 additions & 2 deletions workers/submission_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ def process_submission_message(message):
for file_name in file_names:
full_file_name = os.path.join(dir_path, file_name)
relative_file_name = full_file_name[len(f'{res_files_path}/'):]

if is_s3_emulation:
s3_full_path = f'pv-validation-hub-bucket/submission_files/submission_user_{user_id}/submission_{submission_id}/results/{relative_file_name}'
else:
Expand Down Expand Up @@ -688,7 +688,7 @@ def main():
while True:
messages = queue.receive_messages(
MaxNumberOfMessages=1,
VisibilityTimeout=7200
VisibilityTimeout=28800
)
for message in messages:
logger.info(
Expand Down

0 comments on commit 32cdd7f

Please sign in to comment.