Skip to content

Commit

Permalink
compile droplist
Browse files Browse the repository at this point in the history
  • Loading branch information
davedavemckay committed Oct 23, 2024
1 parent 2aabdef commit dd021ab
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion csd3-side/scripts/lsst-backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,7 @@ def process_files(s3_host, access_key, secret_key, bucket_name, current_objects,
print(f'Loaded collate list from {collate_list_file}, len={len(to_collate)}.', flush=True)
if not current_objects.empty:
# now using pandas for both current_objects and to_collate - this could be re-written to using vectorised operations
droplist = []
for i in range(len(to_collate['object_names'])):
# print(zip_object_names)
cmp = [x.replace(destination_dir+'/', '') for x in to_collate.iloc[i]['object_names']]
Expand All @@ -1015,10 +1016,11 @@ def process_files(s3_host, access_key, secret_key, bucket_name, current_objects,
existing_zip_contents = current_objects[current_objects['METADATA'].isin([cmp])]['METADATA'].values[0]
if all([x in existing_zip_contents for x in cmp]):
print(f'Zip file {destination_dir}/collated_{i+1}.zip from {collate_list_file} already exists and file lists match - skipping.')
to_collate = to_collate.drop(i)
droplist.append(i)
continue
else:
print(f'Zip file {destination_dir}/collated_{i+1}.zip from {collate_list_file} already exists but file lists do not match - reuploading.')
to_collate.drop(droplist, inplace=True)
if save_collate_file:
print(f'Saving collate list to {collate_list_file}, len={len(to_collate)}.')
# with open(collate_list_file, 'w') as f:
Expand Down

0 comments on commit dd021ab

Please sign in to comment.