diff --git a/csd3-side/scripts/lsst-backup.py b/csd3-side/scripts/lsst-backup.py index 06073ea..74d89a5 100644 --- a/csd3-side/scripts/lsst-backup.py +++ b/csd3-side/scripts/lsst-backup.py @@ -1005,6 +1005,7 @@ def process_files(s3_host, access_key, secret_key, bucket_name, current_objects, print(f'Loaded collate list from {collate_list_file}, len={len(to_collate)}.', flush=True) if not current_objects.empty: # now using pandas for both current_objects and to_collate - this could be re-written to using vectorised operations + droplist = [] for i in range(len(to_collate['object_names'])): # print(zip_object_names) cmp = [x.replace(destination_dir+'/', '') for x in to_collate.iloc[i]['object_names']] @@ -1015,10 +1016,11 @@ def process_files(s3_host, access_key, secret_key, bucket_name, current_objects, existing_zip_contents = current_objects[current_objects['METADATA'].isin([cmp])]['METADATA'].values[0] if all([x in existing_zip_contents for x in cmp]): print(f'Zip file {destination_dir}/collated_{i+1}.zip from {collate_list_file} already exists and file lists match - skipping.') - to_collate = to_collate.drop(i) + droplist.append(i) continue else: print(f'Zip file {destination_dir}/collated_{i+1}.zip from {collate_list_file} already exists but file lists do not match - reuploading.') + to_collate.drop(droplist, inplace=True) if save_collate_file: print(f'Saving collate list to {collate_list_file}, len={len(to_collate)}.') # with open(collate_list_file, 'w') as f: