Skip to content

Commit

Permalink
saving csv file list
Browse files Browse the repository at this point in the history
  • Loading branch information
davedavemckay committed Jul 1, 2024
1 parent 1e6cd53 commit 5e50f57
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
10 changes: 9 additions & 1 deletion echo-side/dags/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
from airflow.models import Variable
from datetime import timedelta

from kubernetes.client import models
from datetime import datetime

# Create k8s storage mount for large, persistent NFS disk space

volume_dss_mount = models.V1VolumeMount(name="logs-volume", mount_path="/lsst-backup-logs", sub_path=None, read_only=False,)

# Define default arguments for the DAG
default_args = {
'owner': 'airflow',
Expand All @@ -27,12 +34,13 @@
task_id='list_csv_files',
image='ghcr.io/lsst-uk/csd3-echo-somerville:latest',
cmds=['./entrypoint.sh'],
arguments=['python', 'csd3-echo-somerville/scripts/list_backup_csvs.py', '--bucket_name', 'LSST-IR-FUSION-Butlers'],
arguments=['python', 'csd3-echo-somerville/scripts/list_backup_csvs.py', '--bucket_name', 'LSST-IR-FUSION-Butlers', '--save-list', ''.join(['/lsst-backup-logs/lsst-backup-logs-','{{ ts_nodash }}','.csv'])],
env_vars={
'ECHO_S3_ACCESS_KEY': Variable.get("ECHO_S3_ACCESS_KEY"),
'ECHO_S3_SECRET_KEY': Variable.get("ECHO_S3_SECRET_KEY"),
},
dag=dag,
volume_mounts=[volume_dss_mount],
get_logs=True,
)

Expand Down
17 changes: 16 additions & 1 deletion scripts/list_backup_csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,22 @@
parser = argparse.ArgumentParser()
parser.add_argument('--bucket_name', '-b', type=str, help='The name of the S3 bucket.')
parser.add_argument('--download', action='store_true', default=False, help='Download the backup log.')
parser.add_argument('--save-list', type=str, help='Write the list to file given absolute path.')
args = parser.parse_args()

bucket_name = args.bucket_name
download = args.download

if args.save_list:
save_list = args.save_list
if os.path.exists(save_list):
print(f'{save_list} already exists. Exiting.')
sys.exit()
save_folder = os.path.dirname(save_list)
if not os.path.exists(save_folder):
print(f'{save_folder} does not exist. Exiting.')
sys.exit()

try:
keys = bm.get_keys('S3')
except KeyError as e:
Expand All @@ -44,7 +55,11 @@
if ob.key.count('/') > 0:
continue
if log_suffix in ob.key or previous_log_suffix in ob.key:
print(f'{ob.key}, {ob.size/1024**2:.2f}, {ob.last_modified}')
if save_list:
with open(save_list,'a') as f:
f.write(f'{ob.key},{ob.size/1024**2:.2f},{ob.last_modified}\n')
else:
print(f'{ob.key},{ob.size/1024**2:.2f},{ob.last_modified}')
if download:
with tqdm(total=ob.size/1024**2, unit='MiB', unit_scale=True, unit_divisor=1024) as pbar:
bucket.download_file(ob.key,ob.key,Callback=pbar.update)
Expand Down

0 comments on commit 5e50f57

Please sign in to comment.