Skip to content

Commit

Permalink
added more extensive logging to identify s3 issues
Browse files Browse the repository at this point in the history
Signed-off-by: Duncan Ragsdale <[email protected]>
  • Loading branch information
Thistleman committed Nov 29, 2023
1 parent 9082309 commit de11ab2
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 22 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
matplotlib==3.7.1
numpy==1.22.0
pandas==1.5.3
pvanalytics==0.1.3
pvlib==0.9.4
ruptures==1.1.7
seaborn==0.11.1
solar_data_tools==0.7.0
matplotlib
numpy
pandas
pvanalytics
pvlib
ruptures
seaborn
solar_data_tools
2 changes: 2 additions & 0 deletions workers/pvinsight-validation-runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@ def is_local():
S3_BUCKET_NAME = "pv-validation-hub-bucket"

def pull_from_s3(s3_file_path):
logger.info(f"pulling file {s3_file_path} from s3")
if s3_file_path.startswith('/'):
s3_file_path = s3_file_path[1:]
logger.info(f"modified path to {s3_file_path}")

if is_s3_emulation:
s3_file_full_path = 'http://s3:5000/get_object/' + s3_file_path
Expand Down
30 changes: 16 additions & 14 deletions workers/submission_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,19 @@ def is_local():
FAILED = "failed"
FINISHED = "finished"

formatter = logging.Formatter(
"[%(asctime)s] %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)

handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(formatter)

logger = logging.getLogger(__name__)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

def pull_from_s3(s3_file_path):
logger.info(f'pull file {s3_file_path} from s3')
if s3_file_path.startswith('/'):
s3_file_path = s3_file_path[1:]

Expand Down Expand Up @@ -67,6 +78,7 @@ def pull_from_s3(s3_file_path):


def push_to_s3(local_file_path, s3_file_path):
logger.info(f'push file {local_file_path} to s3')
if s3_file_path.startswith('/'):
s3_file_path = s3_file_path[1:]

Expand All @@ -91,6 +103,7 @@ def push_to_s3(local_file_path, s3_file_path):
return None

def list_s3_bucket(s3_dir):
logger.info(f'list s3 bucket {s3_dir}')
if s3_dir.startswith('/'):
s3_dir = s3_dir[1:]

Expand All @@ -107,6 +120,7 @@ def list_s3_bucket(s3_dir):
for entry in ret['Contents']:
all_files.append(os.path.join(s3_dir.split('/')[0], entry['Key']))
else:
logger.info(f'list s3 bucket {s3_dir_full_path}')
s3 = boto3.client('s3')
paginator = s3.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=S3_BUCKET_NAME, Prefix=s3_dir)
Expand All @@ -115,6 +129,7 @@ def list_s3_bucket(s3_dir):
for entry in page['Contents']:
all_files.append(entry['Key'])

logger.info(f'list s3 bucket {s3_dir_full_path} returns {all_files}')
return all_files


Expand Down Expand Up @@ -207,20 +222,6 @@ def get_module_name(module_dir):
SUBMISSION_ALGORITHMS = {}
ANNOTATION_FILE_NAME_MAP = {}

formatter = logging.Formatter(
"[%(asctime)s] %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)

handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(formatter)

logger = logging.getLogger(__name__)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

# django.db.close_old_connections()


class GracefulKiller:
kill_now = False

Expand Down Expand Up @@ -339,6 +340,7 @@ def extract_analysis_data(analysis_id, current_evaluation_dir):

# download evaluation scripts and requirements.txt etc.
files = list_s3_bucket(f'pv-validation-hub-bucket/evaluation_scripts/{analysis_id}/')
logger.info(f'pull evaluation scripts from s3')
for file in files:
tmp_path = pull_from_s3(file)
shutil.move(tmp_path, os.path.join(current_evaluation_dir, tmp_path.split('/')[-1]))
Expand Down

0 comments on commit de11ab2

Please sign in to comment.