Skip to content

Commit

Permalink
modified to use pandas Series for current_onjects (#21)
Browse files Browse the repository at this point in the history
  • Loading branch information
davedavemckay authored Aug 2, 2024
1 parent af612e9 commit 3ce2a40
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions csd3-side/scripts/lsst-backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,12 +505,12 @@ def process_files(s3_host, access_key, secret_key, bucket_name, current_objects,
# remove current objects - avoids reuploading
# could provide overwrite flag if this is desirable
# print(f'current_objects: {current_objects}')
if all([obj in current_objects for obj in object_names]):
if current_objects.isin(object_names).all():
#all files in this subfolder already in bucket
print(f'Skipping subfolder - all files exist.')
continue
for oni, on in enumerate(object_names):
if on in current_objects:
if current_objects.isin([on]).any():
object_names.remove(on)
del folder_files[oni]
pre_linkcheck_file_count = len(object_names)
Expand Down Expand Up @@ -589,12 +589,12 @@ def process_files(s3_host, access_key, secret_key, bucket_name, current_objects,
# remove current objects - avoids reuploading
# could provide overwrite flag if this is desirable
# print(f'current_objects: {current_objects}')
if all([obj in current_objects for obj in object_names]):
if current_objects.isin(object_names).all():
#all files in this subfolder already in bucket
print(f'Skipping subfolder - all files exist.')
continue
for oni, on in enumerate(object_names):
if on in current_objects:
if current_objects.isin([on]).any():
object_names.remove(on)
del folder_files[oni]
pre_linkcheck_file_count = len(object_names)
Expand Down Expand Up @@ -677,7 +677,7 @@ def process_files(s3_host, access_key, secret_key, bucket_name, current_objects,
# to_collate[parent_folder][id]['zip_object_name'] =

# check if zip_object_name exists in bucket and get its checksum
if to_collate[parent_folder]['zips'][-1]['zip_object_name'] in current_objects:
if current_objects.isin([to_collate[parent_folder]['zips'][-1]['zip_object_name']]).any():
existing_zip_checksum = bm.get_resource(access_key, secret_key, s3_host).Object(bucket_name,to_collate[parent_folder]['zips'][-1]['zip_object_name']).e_tag.strip('"')
checksum_hash = hashlib.md5(zip_data)
checksum_string = checksum_hash.hexdigest()
Expand Down Expand Up @@ -874,12 +874,13 @@ def error(self, message):
current_objects = bm.object_list(bucket)
print(f'Done.\nFinished at {datetime.now()}, elapsed time = {datetime.now() - start}')

current_objects = pd.Series(current_objects)
## check if log exists in the bucket, and download it and append top it if it does
# TODO: integrate this with local check for log file
if log in current_objects:
if current_objects.isin([log]).any():
print(f'Log file {log} already exists in bucket. Downloading.')
bucket.download_file(log, log)
elif previous_log in current_objects:
elif current_objects.isin([previous_log]).any():
print(f'Previous log file {previous_log} already exists in bucket. Downloading.')
bucket.download_file(previous_log, log)

Expand Down

0 comments on commit 3ce2a40

Please sign in to comment.