From de11ab2ff972cbd8e7c2b6d4b14f08bdba61d05b Mon Sep 17 00:00:00 2001 From: Duncan Ragsdale <88173870+Thistleman@users.noreply.github.com> Date: Wed, 29 Nov 2023 14:57:18 -0800 Subject: [PATCH] added more extensive logging to identify s3 issues Signed-off-by: Duncan Ragsdale <88173870+Thistleman@users.noreply.github.com> --- .../evaluation_scripts/1/requirements.txt | 16 +++++----- workers/pvinsight-validation-runner.py | 2 ++ workers/submission_worker.py | 30 ++++++++++--------- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/requirements.txt b/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/requirements.txt index a6a17638..cc7e4607 100644 --- a/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/requirements.txt +++ b/s3Emulator/pv-validation-hub-bucket/evaluation_scripts/1/requirements.txt @@ -1,8 +1,8 @@ -matplotlib==3.7.1 -numpy==1.22.0 -pandas==1.5.3 -pvanalytics==0.1.3 -pvlib==0.9.4 -ruptures==1.1.7 -seaborn==0.11.1 -solar_data_tools==0.7.0 \ No newline at end of file +matplotlib +numpy +pandas +pvanalytics +pvlib +ruptures +seaborn +solar_data_tools \ No newline at end of file diff --git a/workers/pvinsight-validation-runner.py b/workers/pvinsight-validation-runner.py index a6274041..8188c8f1 100644 --- a/workers/pvinsight-validation-runner.py +++ b/workers/pvinsight-validation-runner.py @@ -62,8 +62,10 @@ def is_local(): S3_BUCKET_NAME = "pv-validation-hub-bucket" def pull_from_s3(s3_file_path): + logger.info(f"pulling file {s3_file_path} from s3") if s3_file_path.startswith('/'): s3_file_path = s3_file_path[1:] + logger.info(f"modified path to {s3_file_path}") if is_s3_emulation: s3_file_full_path = 'http://s3:5000/get_object/' + s3_file_path diff --git a/workers/submission_worker.py b/workers/submission_worker.py index 14551265..b7670cb8 100644 --- a/workers/submission_worker.py +++ b/workers/submission_worker.py @@ -36,8 +36,19 @@ def is_local(): FAILED = "failed" FINISHED = "finished" +formatter = logging.Formatter( + "[%(asctime)s] %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S" +) + +handler = logging.StreamHandler(sys.stdout) +handler.setFormatter(formatter) + +logger = logging.getLogger(__name__) +logger.addHandler(handler) +logger.setLevel(logging.INFO) def pull_from_s3(s3_file_path): + logger.info(f'pull file {s3_file_path} from s3') if s3_file_path.startswith('/'): s3_file_path = s3_file_path[1:] @@ -67,6 +78,7 @@ def pull_from_s3(s3_file_path): def push_to_s3(local_file_path, s3_file_path): + logger.info(f'push file {local_file_path} to s3') if s3_file_path.startswith('/'): s3_file_path = s3_file_path[1:] @@ -91,6 +103,7 @@ def push_to_s3(local_file_path, s3_file_path): return None def list_s3_bucket(s3_dir): + logger.info(f'list s3 bucket {s3_dir}') if s3_dir.startswith('/'): s3_dir = s3_dir[1:] @@ -107,6 +120,7 @@ def list_s3_bucket(s3_dir): for entry in ret['Contents']: all_files.append(os.path.join(s3_dir.split('/')[0], entry['Key'])) else: + logger.info(f'list s3 bucket {s3_dir_full_path}') s3 = boto3.client('s3') paginator = s3.get_paginator('list_objects_v2') pages = paginator.paginate(Bucket=S3_BUCKET_NAME, Prefix=s3_dir) @@ -115,6 +129,7 @@ def list_s3_bucket(s3_dir): for entry in page['Contents']: all_files.append(entry['Key']) + logger.info(f'list s3 bucket {s3_dir_full_path} returns {all_files}') return all_files @@ -207,20 +222,6 @@ def get_module_name(module_dir): SUBMISSION_ALGORITHMS = {} ANNOTATION_FILE_NAME_MAP = {} -formatter = logging.Formatter( - "[%(asctime)s] %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S" -) - -handler = logging.StreamHandler(sys.stdout) -handler.setFormatter(formatter) - -logger = logging.getLogger(__name__) -logger.addHandler(handler) -logger.setLevel(logging.INFO) - -# django.db.close_old_connections() - - class GracefulKiller: kill_now = False @@ -339,6 +340,7 @@ def extract_analysis_data(analysis_id, current_evaluation_dir): # download evaluation scripts and requirements.txt etc. files = list_s3_bucket(f'pv-validation-hub-bucket/evaluation_scripts/{analysis_id}/') + logger.info(f'pull evaluation scripts from s3') for file in files: tmp_path = pull_from_s3(file) shutil.move(tmp_path, os.path.join(current_evaluation_dir, tmp_path.split('/')[-1]))