From 3ce2a40b9a131e3e478210be19618883222d7ff7 Mon Sep 17 00:00:00 2001 From: Dave McKay Date: Fri, 2 Aug 2024 16:41:38 +0100 Subject: [PATCH] modified to use pandas Series for current_onjects (#21) --- csd3-side/scripts/lsst-backup.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/csd3-side/scripts/lsst-backup.py b/csd3-side/scripts/lsst-backup.py index 067c0f8..6f20a07 100644 --- a/csd3-side/scripts/lsst-backup.py +++ b/csd3-side/scripts/lsst-backup.py @@ -505,12 +505,12 @@ def process_files(s3_host, access_key, secret_key, bucket_name, current_objects, # remove current objects - avoids reuploading # could provide overwrite flag if this is desirable # print(f'current_objects: {current_objects}') - if all([obj in current_objects for obj in object_names]): + if current_objects.isin(object_names).all(): #all files in this subfolder already in bucket print(f'Skipping subfolder - all files exist.') continue for oni, on in enumerate(object_names): - if on in current_objects: + if current_objects.isin([on]).any(): object_names.remove(on) del folder_files[oni] pre_linkcheck_file_count = len(object_names) @@ -589,12 +589,12 @@ def process_files(s3_host, access_key, secret_key, bucket_name, current_objects, # remove current objects - avoids reuploading # could provide overwrite flag if this is desirable # print(f'current_objects: {current_objects}') - if all([obj in current_objects for obj in object_names]): + if current_objects.isin(object_names).all(): #all files in this subfolder already in bucket print(f'Skipping subfolder - all files exist.') continue for oni, on in enumerate(object_names): - if on in current_objects: + if current_objects.isin([on]).any(): object_names.remove(on) del folder_files[oni] pre_linkcheck_file_count = len(object_names) @@ -677,7 +677,7 @@ def process_files(s3_host, access_key, secret_key, bucket_name, current_objects, # to_collate[parent_folder][id]['zip_object_name'] = # check if zip_object_name exists in bucket and get its checksum - if to_collate[parent_folder]['zips'][-1]['zip_object_name'] in current_objects: + if current_objects.isin([to_collate[parent_folder]['zips'][-1]['zip_object_name']]).any(): existing_zip_checksum = bm.get_resource(access_key, secret_key, s3_host).Object(bucket_name,to_collate[parent_folder]['zips'][-1]['zip_object_name']).e_tag.strip('"') checksum_hash = hashlib.md5(zip_data) checksum_string = checksum_hash.hexdigest() @@ -874,12 +874,13 @@ def error(self, message): current_objects = bm.object_list(bucket) print(f'Done.\nFinished at {datetime.now()}, elapsed time = {datetime.now() - start}') + current_objects = pd.Series(current_objects) ## check if log exists in the bucket, and download it and append top it if it does # TODO: integrate this with local check for log file - if log in current_objects: + if current_objects.isin([log]).any(): print(f'Log file {log} already exists in bucket. Downloading.') bucket.download_file(log, log) - elif previous_log in current_objects: + elif current_objects.isin([previous_log]).any(): print(f'Previous log file {previous_log} already exists in bucket. Downloading.') bucket.download_file(previous_log, log)