From 32cdd7fda5c0ae84c98b88d9113f286c12011360 Mon Sep 17 00:00:00 2001 From: Duncan Ragsdale <88173870+Thistleman@users.noreply.github.com> Date: Tue, 5 Dec 2023 16:52:34 -0800 Subject: [PATCH] fixed bugs in retrieving s3 private results, uploading results, and sqs message timeouts Signed-off-by: Duncan Ragsdale <88173870+Thistleman@users.noreply.github.com> --- delete.py | 29 +++++++++++++++++++ .../evaluation_scripts/1/config.json | 10 +++---- valhub/submissions/views.py | 13 +++++++-- workers/submission_worker.py | 4 +-- 4 files changed, 47 insertions(+), 9 deletions(-) create mode 100644 delete.py diff --git a/delete.py b/delete.py new file mode 100644 index 00000000..9171d921 --- /dev/null +++ b/delete.py @@ -0,0 +1,29 @@ +from solardatatools.dataio import load_cassandra_data +from solardatatools import DataHandler +from statistical_clear_sky import SCSF +from dask.distributed import Client, SSHCluster +import dask + +ec2_ips = ["172.31.26.35", "172.31.28.129"] +password="slacgismo" +pem = "~/.pem/dask-ssh-ed25519.pem" + +cluster = SSHCluster( + ec2_ips, + connect_options={"known_hosts": None, "client_host_keys": pem}, + worker_options={"nthreads": 2, "memory_limit":'15GiB'} +) +client = Client(cluster, processes=False) + +@dask.delayed +def pull_and_run(site_id): + df = load_cassandra_data(site_id, cluster_ip="54.176.95.208") + dh = DataHandler(df, convert_to_ts=True) + dh.run_pipeline(power_col='ac_power_01') + return dh.report(return_values=True) + +results = [] +site_ids = ["TAAJ01021775", "001C4B0008A5", "TABG01081601"] +for si in site_ids: + results.append(pull_and_run(si)) +dask.compute(results) \ No newline at end of file diff --git a/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/config.json b/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/config.json index d32529aa..9d80255a 100644 --- a/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/config.json +++ b/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/config.json @@ -11,24 +11,24 @@ "file_name", "run_time", "data_requirements", - "mean_absolute_error_time_series", + "mean_absolute_error", "data_sampling_frequency", "issue" ], "plots": [ { "type": "histogram", - "x_val": "mean_absolute_error_time_series", + "x_val": "mean_absolute_error", "color_code": "issue", "title": "Time Series MAE Distribution by Issue", - "save_file_path": "mean_absolute_error_time_series_dist.png" + "save_file_path": "mean_absolute_error_dist.png" }, { "type": "histogram", - "x_val": "mean_absolute_error_time_series", + "x_val": "mean_absolute_error", "color_code": "data_sampling_frequency", "title": "Time Series MAE Distribution by Sampling Frequency", - "save_file_path": "mean_absolute_error_time_series_dist.png" + "save_file_path": "mean_absolute_error_dist.png" }, { "type": "histogram", diff --git a/valhub/submissions/views.py b/valhub/submissions/views.py index dc9764d2..f47ef029 100644 --- a/valhub/submissions/views.py +++ b/valhub/submissions/views.py @@ -315,8 +315,17 @@ def get_submission_results(request, submission_id): status=status.HTTP_500_INTERNAL_SERVER_ERROR) file_list = response.json() else: - storage = default_storage - _, file_list = storage.listdir(results_directory) + # get the list of files in the results directory + s3 = boto3.client('s3') + response = s3.list_objects_v2(Bucket=bucket_name, Prefix=results_directory) + if response['KeyCount'] == 0: + return JsonResponse({"error": "No files found in the results directory"}, status=status.HTTP_404_NOT_FOUND) + # remove the first entry if it is the same as results_directory + if response['Contents'][0]['Key'] == results_directory: + file_list = [file['Key'] for file in response['Contents'][1:]] + else: + file_list = [file['Key'] for file in response['Contents']] + png_files = [file for file in file_list if file.lower().endswith(".png")] diff --git a/workers/submission_worker.py b/workers/submission_worker.py index 4b6d03c2..c81d5c98 100644 --- a/workers/submission_worker.py +++ b/workers/submission_worker.py @@ -584,7 +584,7 @@ def process_submission_message(message): for file_name in file_names: full_file_name = os.path.join(dir_path, file_name) relative_file_name = full_file_name[len(f'{res_files_path}/'):] - + if is_s3_emulation: s3_full_path = f'pv-validation-hub-bucket/submission_files/submission_user_{user_id}/submission_{submission_id}/results/{relative_file_name}' else: @@ -688,7 +688,7 @@ def main(): while True: messages = queue.receive_messages( MaxNumberOfMessages=1, - VisibilityTimeout=7200 + VisibilityTimeout=28800 ) for message in messages: logger.info(