fixed bugs in retrieving s3 private results, uploading results, and s…

…qs message timeouts Signed-off-by: Duncan Ragsdale <[email protected]>
slacgismo · Dec 6, 2023 · 32cdd7f · 32cdd7f
1 parent af1801f
commit 32cdd7f
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 9 deletions.
diff --git a/delete.py b/delete.py
@@ -0,0 +1,29 @@
+from solardatatools.dataio import load_cassandra_data
+from solardatatools import DataHandler
+from statistical_clear_sky import SCSF
+from dask.distributed import Client, SSHCluster
+import dask
+
+ec2_ips = ["172.31.26.35", "172.31.28.129"]
+password="slacgismo"
+pem = "~/.pem/dask-ssh-ed25519.pem"
+
+cluster = SSHCluster(
+    ec2_ips,
+    connect_options={"known_hosts": None, "client_host_keys": pem},
+    worker_options={"nthreads": 2, "memory_limit":'15GiB'}
+)
+client = Client(cluster, processes=False)
+
+@dask.delayed
+def pull_and_run(site_id):
+    df = load_cassandra_data(site_id, cluster_ip="54.176.95.208")
+    dh = DataHandler(df, convert_to_ts=True)
+    dh.run_pipeline(power_col='ac_power_01')
+    return dh.report(return_values=True)
+
+results = []
+site_ids = ["TAAJ01021775", "001C4B0008A5", "TABG01081601"]
+for si in site_ids:
+    results.append(pull_and_run(si))
+dask.compute(results)
diff --git a/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/config.json b/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/config.json
@@ -11,24 +11,24 @@
 		"file_name",
 		"run_time",
 		"data_requirements",
-		"mean_absolute_error_time_series",
+		"mean_absolute_error",
 		"data_sampling_frequency",
 		"issue"
 	],
 	"plots": [
 		{
 			"type": "histogram",
-			"x_val": "mean_absolute_error_time_series",
+			"x_val": "mean_absolute_error",
 			"color_code": "issue",
 			"title": "Time Series MAE Distribution by Issue",
-			"save_file_path": "mean_absolute_error_time_series_dist.png"
+			"save_file_path": "mean_absolute_error_dist.png"
 		},
 		{
 			"type": "histogram",
-			"x_val": "mean_absolute_error_time_series",
+			"x_val": "mean_absolute_error",
 			"color_code": "data_sampling_frequency",
 			"title": "Time Series MAE Distribution by Sampling Frequency",
-			"save_file_path": "mean_absolute_error_time_series_dist.png"
+			"save_file_path": "mean_absolute_error_dist.png"
 		},
 		{
 			"type": "histogram",

diff --git a/valhub/submissions/views.py b/valhub/submissions/views.py
@@ -315,8 +315,17 @@ def get_submission_results(request, submission_id):
                                 status=status.HTTP_500_INTERNAL_SERVER_ERROR)
         file_list = response.json()
     else:
-        storage = default_storage
-        _, file_list = storage.listdir(results_directory)
+        # get the list of files in the results directory
+        s3 = boto3.client('s3')
+        response = s3.list_objects_v2(Bucket=bucket_name, Prefix=results_directory)
+        if response['KeyCount'] == 0:
+            return JsonResponse({"error": "No files found in the results directory"}, status=status.HTTP_404_NOT_FOUND)
+        # remove the first entry if it is the same as results_directory
+        if response['Contents'][0]['Key'] == results_directory:
+            file_list = [file['Key'] for file in response['Contents'][1:]]
+        else:
+            file_list = [file['Key'] for file in response['Contents']]
+
 
     png_files = [file for file in file_list if file.lower().endswith(".png")]
 

diff --git a/workers/submission_worker.py b/workers/submission_worker.py
@@ -584,7 +584,7 @@ def process_submission_message(message):
         for file_name in file_names:
             full_file_name = os.path.join(dir_path, file_name)
             relative_file_name = full_file_name[len(f'{res_files_path}/'):]
-            
+
             if is_s3_emulation:
                 s3_full_path = f'pv-validation-hub-bucket/submission_files/submission_user_{user_id}/submission_{submission_id}/results/{relative_file_name}'
             else:
@@ -688,7 +688,7 @@ def main():
     while True:
         messages = queue.receive_messages(
             MaxNumberOfMessages=1,
-            VisibilityTimeout=7200
+            VisibilityTimeout=28800
         )
         for message in messages:
             logger.info(