Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
davedavemckay committed Aug 15, 2024
1 parent 518b7cc commit 9ba50b0
Showing 1 changed file with 14 additions and 22 deletions.
36 changes: 14 additions & 22 deletions echo-side/dags/process_new_zips.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,15 @@
from datetime import timedelta, datetime



def dl_bucket_names(url):
import json
import requests
bucket_names = []
# url = kwargs['url']
r = requests.get(url)
buckets = json.loads(r.text)
for bucket in buckets:
bucket_names.append(bucket['name'])
print(f'Bucket names found: {bucket_names}')
# kwargs['ti'].xcom_push(key='bucket_names', value=bucket_names)
return bucket_names

bucket_names = dl_bucket_names('https://raw.githubusercontent.com/lsst-uk/csd3-echo-somerville/main/echo-side/bucket_names/bucket_names.json')
Expand All @@ -45,12 +42,6 @@ def print_bucket_name(bucket_name):
catchup=False,
) as dag:

# get_bucket_names = PythonOperator(
# task_id = 'get_bucket_names',
# python_callable = dl_bucket_names,
# op_kwargs={'url':'https://raw.githubusercontent.com/lsst-uk/csd3-echo-somerville/main/echo-side/bucket_names/bucket_names.json'},
# )

print_bucket_name_task = [
PythonOperator(
task_id=f'print_bucket_name_{bucket_name}',
Expand All @@ -60,20 +51,21 @@ def print_bucket_name(bucket_name):

# if len(bucket_names) > 0:
# print(f'Bucket names found: {bucket_names}')
# process_zips_task = [
# KubernetesPodOperator(
# task_id=f'process_zips_{bucket_name}',
# image='ghcr.io/lsst-uk/csd3-echo-somerville:latest',
# cmds=['./entrypoint.sh'],
# arguments=['python', 'csd3-echo-somerville/scripts/process_collated_zips.py', '--bucket_name', bucket_name, '--extract', '--nprocs', '16'],
# env_vars={
# 'ECHO_S3_ACCESS_KEY': Variable.get("ECHO_S3_ACCESS_KEY"),
# 'ECHO_S3_SECRET_KEY': Variable.get("ECHO_S3_SECRET_KEY"),
# },
# get_logs=True,
# ) for bucket_name in bucket_names]
process_zips_task = [
KubernetesPodOperator(
task_id=f'process_zips_{bucket_name}',
image='ghcr.io/lsst-uk/csd3-echo-somerville:latest',
cmds=['./entrypoint.sh'],
arguments=['python', 'csd3-echo-somerville/scripts/process_collated_zips.py', '--bucket_name', bucket_name, '--extract', '--nprocs', '16'],
env_vars={
'ECHO_S3_ACCESS_KEY': Variable.get("ECHO_S3_ACCESS_KEY"),
'ECHO_S3_SECRET_KEY': Variable.get("ECHO_S3_SECRET_KEY"),
},
get_logs=True,
) for bucket_name in bucket_names]
# else:
# print('No bucket names found.')

print_bucket_name_task #>> process_zips_task
print_bucket_name_task
process_zips_task

0 comments on commit 9ba50b0

Please sign in to comment.