From de11ab2ff972cbd8e7c2b6d4b14f08bdba61d05b Mon Sep 17 00:00:00 2001
From: Duncan Ragsdale <88173870+Thistleman@users.noreply.github.com>
Date: Wed, 29 Nov 2023 14:57:18 -0800
Subject: [PATCH] added more extensive logging to identify s3 issues

Signed-off-by: Duncan Ragsdale <88173870+Thistleman@users.noreply.github.com>
---
 .../evaluation_scripts/1/requirements.txt     | 16 +++++-----
 workers/pvinsight-validation-runner.py        |  2 ++
 workers/submission_worker.py                  | 30 ++++++++++---------
 3 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/requirements.txt b/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/requirements.txt
index a6a17638..cc7e4607 100644
--- a/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/requirements.txt
+++ b/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/requirements.txt
@@ -1,8 +1,8 @@
-matplotlib==3.7.1
-numpy==1.22.0
-pandas==1.5.3
-pvanalytics==0.1.3
-pvlib==0.9.4
-ruptures==1.1.7
-seaborn==0.11.1
-solar_data_tools==0.7.0
\ No newline at end of file
+matplotlib
+numpy
+pandas
+pvanalytics
+pvlib
+ruptures
+seaborn
+solar_data_tools
\ No newline at end of file
diff --git a/workers/pvinsight-validation-runner.py b/workers/pvinsight-validation-runner.py
index a6274041..8188c8f1 100644
--- a/workers/pvinsight-validation-runner.py
+++ b/workers/pvinsight-validation-runner.py
@@ -62,8 +62,10 @@ def is_local():
 S3_BUCKET_NAME = "pv-validation-hub-bucket"
 
 def pull_from_s3(s3_file_path):
+    logger.info(f"pulling file {s3_file_path} from s3")
     if s3_file_path.startswith('/'):
         s3_file_path = s3_file_path[1:]
+        logger.info(f"modified path to {s3_file_path}")
 
     if is_s3_emulation:
         s3_file_full_path = 'http://s3:5000/get_object/' + s3_file_path
diff --git a/workers/submission_worker.py b/workers/submission_worker.py
index 14551265..b7670cb8 100644
--- a/workers/submission_worker.py
+++ b/workers/submission_worker.py
@@ -36,8 +36,19 @@ def is_local():
 FAILED = "failed"
 FINISHED = "finished"
 
+formatter = logging.Formatter(
+    "[%(asctime)s] %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
+)
+
+handler = logging.StreamHandler(sys.stdout)
+handler.setFormatter(formatter)
+
+logger = logging.getLogger(__name__)
+logger.addHandler(handler)
+logger.setLevel(logging.INFO)
 
 def pull_from_s3(s3_file_path):
+    logger.info(f'pull file {s3_file_path} from s3')
     if s3_file_path.startswith('/'):
         s3_file_path = s3_file_path[1:]
 
@@ -67,6 +78,7 @@ def pull_from_s3(s3_file_path):
 
 
 def push_to_s3(local_file_path, s3_file_path):
+    logger.info(f'push file {local_file_path} to s3')
     if s3_file_path.startswith('/'):
         s3_file_path = s3_file_path[1:]
 
@@ -91,6 +103,7 @@ def push_to_s3(local_file_path, s3_file_path):
             return None
         
 def list_s3_bucket(s3_dir):
+    logger.info(f'list s3 bucket {s3_dir}')
     if s3_dir.startswith('/'):
         s3_dir = s3_dir[1:]
 
@@ -107,6 +120,7 @@ def list_s3_bucket(s3_dir):
         for entry in ret['Contents']:
             all_files.append(os.path.join(s3_dir.split('/')[0], entry['Key']))
     else:
+        logger.info(f'list s3 bucket {s3_dir_full_path}')
         s3 = boto3.client('s3')
         paginator = s3.get_paginator('list_objects_v2')
         pages = paginator.paginate(Bucket=S3_BUCKET_NAME, Prefix=s3_dir)
@@ -115,6 +129,7 @@ def list_s3_bucket(s3_dir):
                 for entry in page['Contents']:
                     all_files.append(entry['Key'])
     
+    logger.info(f'list s3 bucket {s3_dir_full_path} returns {all_files}')
     return all_files
 
 
@@ -207,20 +222,6 @@ def get_module_name(module_dir):
 SUBMISSION_ALGORITHMS = {}
 ANNOTATION_FILE_NAME_MAP = {}
 
-formatter = logging.Formatter(
-    "[%(asctime)s] %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
-)
-
-handler = logging.StreamHandler(sys.stdout)
-handler.setFormatter(formatter)
-
-logger = logging.getLogger(__name__)
-logger.addHandler(handler)
-logger.setLevel(logging.INFO)
-
-# django.db.close_old_connections()
-
-
 class GracefulKiller:
     kill_now = False
 
@@ -339,6 +340,7 @@ def extract_analysis_data(analysis_id, current_evaluation_dir):
 
     # download evaluation scripts and requirements.txt etc.
     files = list_s3_bucket(f'pv-validation-hub-bucket/evaluation_scripts/{analysis_id}/')
+    logger.info(f'pull evaluation scripts from s3')
     for file in files:
         tmp_path = pull_from_s3(file)
         shutil.move(tmp_path, os.path.join(current_evaluation_dir, tmp_path.split('/')[-1]))