From 360da7de92799dfe7628d83bd287ac9615ee714a Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 2 Oct 2024 10:36:34 -0800 Subject: [PATCH 001/163] rough draft of SRG_TIME_SERIES.yml --- job_spec/SRG_TIME_SERIES.yml | 84 ++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 job_spec/SRG_TIME_SERIES.yml diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml new file mode 100644 index 000000000..4545f536f --- /dev/null +++ b/job_spec/SRG_TIME_SERIES.yml @@ -0,0 +1,84 @@ +SRG_TIME_SERIES: + required_parameters: + - granules + parameters: + granules: + default: '""' + api_schema: + type: array + minItems: 1 + # TODO determine appropriate max + maxItems: 6 + example: + - S1A_IW_RAW__0SDV_20231229T134339_20231229T134411_051870_064437_4F42 + - S1A_IW_RAW__0SDV_20231229T134404_20231229T134436_051870_064437_5F38 + items: + description: Name of the Level-0 Sentinel-1 scenes to process + type: string + pattern: "^S1[AB]_IW_RAW" + minLength: 67 + maxLength: 67 + example: S1A_IW_RAW__0SDV_20231229T134404_20231229T134436_051870_064437_5F38 + bucket_prefix: + default: '""' + bounds: + default: '""' + api_schema: + type: array + default: [0.0, 0.0, 0.0, 0.0] + minItems: 4 + maxItems: 4 + example: + - -116.583 + - 35.714 + - -113.209 + - 38.138 + items: + description: min lon, min lat, max lon, max lat in EPSG:4326 + type: number + example: -116.583 + validators: [] + cost_profiles: + DEFAULT: + cost: 1.0 + compute_environment: + # TODO do we need to use different compute environments for back_projection and time_series? + name: SrgGslc # TODO rename? + instance_types: g6.2xlarge + # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id + ami_id: ami-0729c079aae647cb3 + steps: + - name: CREATE_GSLC_PRODUCTS + type: Map + steps: + - name: BACK_PROJECTION + type: Task + image: ghcr.io/asfhyp3/hyp3-srg + image_tag: latest.gpu + command: + - ++process + - back_projection + - --bounds + - Ref::bounds + - --gpu + - --bucket + - '!Ref Bucket' + - --bucket-prefix + - Ref::bucket_prefix + - Ref::granules + timeout: 10800 + vcpu: 1 + gpu: 1 + memory: 30500 + secrets: + - EARTHDATA_USERNAME + - EARTHDATA_PASSWORD + - name: TIME_SERIES + type: Task + image: ghcr.io/asfhyp3/hyp3-srg + image_tag: latest.gpu # TODO use GPU for time series? + command: + - ++process + - time_series + # TODO remaining command arguments + # TODO remaining task fields From 49ad39d3e60d9d09c5cbc1253fda293ba2af7807 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 2 Oct 2024 10:47:50 -0800 Subject: [PATCH 002/163] items: granules --- job_spec/SRG_TIME_SERIES.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 4545f536f..0c9371c46 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -50,6 +50,7 @@ SRG_TIME_SERIES: steps: - name: CREATE_GSLC_PRODUCTS type: Map + items: granules steps: - name: BACK_PROJECTION type: Task @@ -65,7 +66,7 @@ SRG_TIME_SERIES: - '!Ref Bucket' - --bucket-prefix - Ref::bucket_prefix - - Ref::granules + - Ref::granules # TODO determine syntax for passing an individual granule timeout: 10800 vcpu: 1 gpu: 1 From 9a7cf51b1a4c88f51770a49c67f0a5f2ee5498d6 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 2 Oct 2024 10:51:33 -0800 Subject: [PATCH 003/163] rename steps to states --- job_spec/SRG_TIME_SERIES.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 0c9371c46..c7f004e97 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -47,11 +47,11 @@ SRG_TIME_SERIES: instance_types: g6.2xlarge # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id ami_id: ami-0729c079aae647cb3 - steps: + states: - name: CREATE_GSLC_PRODUCTS type: Map items: granules - steps: + states: - name: BACK_PROJECTION type: Task image: ghcr.io/asfhyp3/hyp3-srg From fd9fc5eafa2d2dab96bd06b4cb61677025e88265 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 2 Oct 2024 10:55:33 -0800 Subject: [PATCH 004/163] rename back to steps --- job_spec/SRG_TIME_SERIES.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index c7f004e97..0c9371c46 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -47,11 +47,11 @@ SRG_TIME_SERIES: instance_types: g6.2xlarge # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id ami_id: ami-0729c079aae647cb3 - states: + steps: - name: CREATE_GSLC_PRODUCTS type: Map items: granules - states: + steps: - name: BACK_PROJECTION type: Task image: ghcr.io/asfhyp3/hyp3-srg From f127a660e3b9f636fa02e1ae14eadc54b7f836da Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 2 Oct 2024 11:06:33 -0800 Subject: [PATCH 005/163] eliminate map nesting --- job_spec/SRG_TIME_SERIES.yml | 45 +++++++++++++++++------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 0c9371c46..2ff315a40 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -48,32 +48,29 @@ SRG_TIME_SERIES: # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id ami_id: ami-0729c079aae647cb3 steps: - - name: CREATE_GSLC_PRODUCTS + - name: BACK_PROJECTION type: Map items: granules - steps: - - name: BACK_PROJECTION - type: Task - image: ghcr.io/asfhyp3/hyp3-srg - image_tag: latest.gpu - command: - - ++process - - back_projection - - --bounds - - Ref::bounds - - --gpu - - --bucket - - '!Ref Bucket' - - --bucket-prefix - - Ref::bucket_prefix - - Ref::granules # TODO determine syntax for passing an individual granule - timeout: 10800 - vcpu: 1 - gpu: 1 - memory: 30500 - secrets: - - EARTHDATA_USERNAME - - EARTHDATA_PASSWORD + image: ghcr.io/asfhyp3/hyp3-srg + image_tag: latest.gpu + command: + - ++process + - back_projection + - --bounds + - Ref::bounds + - --gpu + - --bucket + - '!Ref Bucket' + - --bucket-prefix + - Ref::bucket_prefix + - Ref::granules # TODO figure out syntax for passing an individual granule + timeout: 10800 + vcpu: 1 + gpu: 1 + memory: 30500 + secrets: + - EARTHDATA_USERNAME + - EARTHDATA_PASSWORD - name: TIME_SERIES type: Task image: ghcr.io/asfhyp3/hyp3-srg From a6f991dd3c98880148970cbd6059b0de661caec0 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 2 Oct 2024 11:07:54 -0800 Subject: [PATCH 006/163] simplify tasks --- job_spec/SRG_TIME_SERIES.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 2ff315a40..72d575caf 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -47,10 +47,9 @@ SRG_TIME_SERIES: instance_types: g6.2xlarge # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id ami_id: ami-0729c079aae647cb3 - steps: + tasks: - name: BACK_PROJECTION - type: Map - items: granules + for: granules image: ghcr.io/asfhyp3/hyp3-srg image_tag: latest.gpu command: @@ -72,7 +71,6 @@ SRG_TIME_SERIES: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - name: TIME_SERIES - type: Task image: ghcr.io/asfhyp3/hyp3-srg image_tag: latest.gpu # TODO use GPU for time series? command: From d31a620d35b2d2055ab407003c3c6bd06b419bd4 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 3 Oct 2024 15:21:55 -0800 Subject: [PATCH 007/163] construct step function json for jobs in render.py --- apps/render_cf.py | 81 ++++++++++++++++++++++++++++++++++++++ apps/step-function.json.j2 | 49 +---------------------- 2 files changed, 83 insertions(+), 47 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 1907d065a..708232e00 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -11,7 +11,84 @@ def snake_to_pascal_case(input_string: str): return ''.join([i.title() for i in split_string]) +def get_steps_for_jobs(job_types: dict) -> dict: + steps = {} + for job_spec in job_types.values(): + steps.update(get_steps_for_job(job_spec)) + return steps + + +def get_steps_for_job(job_spec: dict) -> dict: + steps = {} + tasks = job_spec["tasks"] + for i in range(len(tasks)): + task = tasks[i] + next_step_name = tasks[i + 1]["name"] if i < len(tasks) - 1 else "GET_FILES" + steps[task["name"]] = get_step_for_task(task, i, next_step_name, job_spec) + return steps + + +def get_step_for_task(task: dict, index: int, next_step_name: str, job_spec: dict) -> dict: + if "map" in task: + return get_step_for_map_task(task, index, next_step_name, job_spec) + return get_step_for_normal_task(task, index, next_step_name, job_spec) + + +def get_step_for_normal_task(task: dict, index: int, next_step_name: str, job_spec: dict) -> dict: + compute_environment = job_spec["compute_environment"]["name"] + job_queue = "JobQueueArn" if compute_environment == "Default" else compute_environment + "JobQueueArn" + return { + "Type": "Task", + "Resource": "arn:aws:states:::batch:submitJob.sync", + "Parameters": { + "JobDefinition": "${"+ snake_to_pascal_case(task["name"]) + "}", + "JobName.$": "$.job_id", + "JobQueue": "${" + job_queue + "}", + "ShareIdentifier": "default", + "SchedulingPriorityOverride.$": "$.priority", + "Parameters.$": "$.job_parameters", + "ContainerOverrides.$": "$.container_overrides", + "RetryStrategy": { + "Attempts": 3 + }, + }, + "ResultPath": f"$.results.processing_results.step_{index}", + "Next": next_step_name, + "Retry": [ + { + "ErrorEquals": [ + "Batch.ServerException", + "Batch.AWSBatchException" + ], + "MaxAttempts": 2 + }, + { + "ErrorEquals": [ + "States.ALL" + ], + "MaxAttempts": 0 + } + ], + "Catch": [ + { + "ErrorEquals": [ + "States.ALL" + ], + "Next": "PROCESSING_FAILED", + "ResultPath": f"$.results.processing_results.step_{index}" + } + ], + } + + +def get_step_for_map_task(task: dict, index: int, next_step_name: str, job_spec: dict) -> dict: + # TODO + pass + + def render_templates(job_types, security_environment, api_name): + job_steps = get_steps_for_jobs(job_types) + env = jinja2.Environment( loader=jinja2.FileSystemLoader('./'), autoescape=jinja2.select_autoescape(default=True, disabled_extensions=('j2',)), @@ -30,8 +107,12 @@ def render_templates(job_types, security_environment, api_name): api_name=api_name, json=json, snake_to_pascal_case=snake_to_pascal_case, + job_steps=job_steps, ) + if str(template_file).endswith('.json.j2'): + output = json.dumps(json.loads(output), indent=2) + template_file.with_suffix('').write_text(output) diff --git a/apps/step-function.json.j2 b/apps/step-function.json.j2 index bc168208d..646d81da0 100644 --- a/apps/step-function.json.j2 +++ b/apps/step-function.json.j2 @@ -78,53 +78,8 @@ ], "Default": "JOB_FAILED" }, - {% for job_type, job_spec in job_types.items() %} - {% for task in job_spec['tasks'] %} - "{{ task['name'] }}": { - "Type": "Task", - "Resource": "arn:aws:states:::batch:submitJob.sync", - "Parameters": { - "JobDefinition": "{{ '${'+ snake_to_pascal_case(task['name']) + '}' }}", - "JobName.$": "$.job_id", - {% set name = job_spec['compute_environment']['name'] %} - {% set job_queue = name + 'JobQueueArn' if 'Default' != name else 'JobQueueArn' %} - "JobQueue": "{{ '${' + job_queue + '}' }}", - "ShareIdentifier": "default", - "SchedulingPriorityOverride.$": "$.priority", - "Parameters.$": "$.job_parameters", - "ContainerOverrides.$": "$.container_overrides", - "RetryStrategy": { - "Attempts": 3 - } - }, - "ResultPath": "$.results.processing_results.step_{{ loop.index0 }}", - "Next": "{% if not loop.last %}{{ loop.nextitem['name'] }}{% else %}GET_FILES{% endif %}", - "Retry": [ - { - "ErrorEquals": [ - "Batch.ServerException", - "Batch.AWSBatchException" - ], - "MaxAttempts": 2 - }, - { - "ErrorEquals": [ - "States.ALL" - ], - "MaxAttempts": 0 - } - ], - "Catch": [ - { - "ErrorEquals": [ - "States.ALL" - ], - "Next": "PROCESSING_FAILED", - "ResultPath": "$.results.processing_results.step_{{ loop.index0 }}" - } - ] - }, - {% endfor %} + {% for name, body in job_steps.items() %} + "{{ name }}": {{ json.dumps(body) }}, {% endfor %} "PROCESSING_FAILED": { "Type": "Pass", From 6ff153f0bf38b23126fd22aa86c05dbb64540883 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 3 Oct 2024 16:38:01 -0800 Subject: [PATCH 008/163] render map state for step function --- apps/render_cf.py | 133 +++++++++++++++++++++++------------ job_spec/SRG_TIME_SERIES.yml | 11 +-- 2 files changed, 94 insertions(+), 50 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 708232e00..596737e2a 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -20,72 +20,113 @@ def get_steps_for_jobs(job_types: dict) -> dict: def get_steps_for_job(job_spec: dict) -> dict: steps = {} - tasks = job_spec["tasks"] + tasks = job_spec['tasks'] for i in range(len(tasks)): task = tasks[i] - next_step_name = tasks[i + 1]["name"] if i < len(tasks) - 1 else "GET_FILES" - steps[task["name"]] = get_step_for_task(task, i, next_step_name, job_spec) + next_step_name = tasks[i + 1]['name'] if i < len(tasks) - 1 else 'GET_FILES' + steps[task['name']] = get_step_for_task(task, i, next_step_name, job_spec) return steps def get_step_for_task(task: dict, index: int, next_step_name: str, job_spec: dict) -> dict: - if "map" in task: - return get_step_for_map_task(task, index, next_step_name, job_spec) - return get_step_for_normal_task(task, index, next_step_name, job_spec) + if 'map' in task: + step = get_step_for_map_task(task, job_spec) + else: + step = get_step_for_batch_submit_job(task, job_spec) + step.update( + { + 'Catch': [ + { + 'ErrorEquals': [ + 'States.ALL' + ], + 'ResultPath': f'$.results.processing_results.step_{index}', + 'Next': 'PROCESSING_FAILED', + }, + ], + 'ResultPath': f'$.results.processing_results.step_{index}', + 'Next': next_step_name, + } + ) + return step + + +def get_step_for_map_task(task: dict, job_spec: dict) -> dict: + item, items = parse_task_map(task['map']) + job_parameters = get_job_parameters(item, items, job_spec) + submit_job_step = get_step_for_batch_submit_job(task, job_spec) + submit_job_step['End'] = True + return { + 'Type': 'Map', + 'ItemsPath': f'$.job_parameters.{items}', + 'ItemSelector': { + 'job_id.$': '$.job_id', + 'priority.$': '$.priority', + 'container_overrides.$': '$.container_overrides', + 'job_parameters': job_parameters, + }, + 'ItemProcessor': { + 'StartAt': task['name'], + 'States': { + task['name']: submit_job_step, + } + } + } -def get_step_for_normal_task(task: dict, index: int, next_step_name: str, job_spec: dict) -> dict: - compute_environment = job_spec["compute_environment"]["name"] - job_queue = "JobQueueArn" if compute_environment == "Default" else compute_environment + "JobQueueArn" +def parse_task_map(task_map: str) -> tuple[str, str]: + tokens = task_map.split(' ') + assert len(tokens) == 4 + assert tokens[0], tokens[2] == ('for', 'in') + return tokens[1], tokens[3] + + +def get_job_parameters(item: str, items: str, job_spec: dict) -> dict: + job_parameters = { + f'{param}.$': f'$.job_parameters.{param}' + for param in job_spec['parameters'] + if param != items + } + job_parameters[f'{item}.$'] = '$$.Map.Item.Value' + return job_parameters + + +def get_step_for_batch_submit_job(task: dict, job_spec: dict) -> dict: + compute_environment = job_spec['compute_environment']['name'] + job_queue = 'JobQueueArn' if compute_environment == 'Default' else compute_environment + 'JobQueueArn' return { - "Type": "Task", - "Resource": "arn:aws:states:::batch:submitJob.sync", - "Parameters": { - "JobDefinition": "${"+ snake_to_pascal_case(task["name"]) + "}", - "JobName.$": "$.job_id", - "JobQueue": "${" + job_queue + "}", - "ShareIdentifier": "default", - "SchedulingPriorityOverride.$": "$.priority", - "Parameters.$": "$.job_parameters", - "ContainerOverrides.$": "$.container_overrides", - "RetryStrategy": { - "Attempts": 3 + 'Type': 'Task', + 'Resource': 'arn:aws:states:::batch:submitJob.sync', + 'Parameters': { + 'JobDefinition': '${'+ snake_to_pascal_case(task['name']) + '}', + 'JobName.$': '$.job_id', + 'JobQueue': '${' + job_queue + '}', + 'ShareIdentifier': 'default', + 'SchedulingPriorityOverride.$': '$.priority', + 'Parameters.$': '$.job_parameters', + 'ContainerOverrides.$': '$.container_overrides', + 'RetryStrategy': { + 'Attempts': 3 }, }, - "ResultPath": f"$.results.processing_results.step_{index}", - "Next": next_step_name, - "Retry": [ + 'Retry': [ { - "ErrorEquals": [ - "Batch.ServerException", - "Batch.AWSBatchException" + 'ErrorEquals': [ + 'Batch.ServerException', + 'Batch.AWSBatchException' ], - "MaxAttempts": 2 + 'MaxAttempts': 2 }, { - "ErrorEquals": [ - "States.ALL" + 'ErrorEquals': [ + 'States.ALL' ], - "MaxAttempts": 0 + 'MaxAttempts': 0 } - ], - "Catch": [ - { - "ErrorEquals": [ - "States.ALL" - ], - "Next": "PROCESSING_FAILED", - "ResultPath": f"$.results.processing_results.step_{index}" - } - ], + ] } -def get_step_for_map_task(task: dict, index: int, next_step_name: str, job_spec: dict) -> dict: - # TODO - pass - - def render_templates(job_types, security_environment, api_name): job_steps = get_steps_for_jobs(job_types) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 72d575caf..181557bc0 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -49,7 +49,7 @@ SRG_TIME_SERIES: ami_id: ami-0729c079aae647cb3 tasks: - name: BACK_PROJECTION - for: granules + map: for granule in granules image: ghcr.io/asfhyp3/hyp3-srg image_tag: latest.gpu command: @@ -62,7 +62,7 @@ SRG_TIME_SERIES: - '!Ref Bucket' - --bucket-prefix - Ref::bucket_prefix - - Ref::granules # TODO figure out syntax for passing an individual granule + - Ref::granule timeout: 10800 vcpu: 1 gpu: 1 @@ -70,11 +70,14 @@ SRG_TIME_SERIES: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - - name: TIME_SERIES + - name: '' image: ghcr.io/asfhyp3/hyp3-srg image_tag: latest.gpu # TODO use GPU for time series? command: - ++process - time_series # TODO remaining command arguments - # TODO remaining task fields + timeout: 0 # TODO + vcpu: 1 + memory: 0 # TODO + # TODO secrets? From 3cd00269cd88c1315b21d95638caae4ea6655af1 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 3 Oct 2024 16:38:45 -0800 Subject: [PATCH 009/163] max 300 granules --- job_spec/SRG_TIME_SERIES.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 181557bc0..626a0dea2 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -7,8 +7,7 @@ SRG_TIME_SERIES: api_schema: type: array minItems: 1 - # TODO determine appropriate max - maxItems: 6 + maxItems: 300 example: - S1A_IW_RAW__0SDV_20231229T134339_20231229T134411_051870_064437_4F42 - S1A_IW_RAW__0SDV_20231229T134404_20231229T134436_051870_064437_5F38 From c217c330679ea436db492643d020d024f03a33d4 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 10:05:54 -0800 Subject: [PATCH 010/163] do not specify default params in Batch job definition --- apps/workflow-cf.yml.j2 | 4 ---- 1 file changed, 4 deletions(-) diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index 0859387c6..260fe7d5e 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -49,10 +49,6 @@ Resources: Type: AWS::Batch::JobDefinition Properties: Type: container - Parameters: - {% for k, v in job_spec['parameters'].items() %} - {{ k }}: {{ v.get('default') or v['api_schema'].get('default') }} - {% endfor %} ContainerProperties: Image: {% if 'image_tag' in task -%} "{{ task['image'] }}:{{ task['image_tag'] }}" From 0dbe15778d4f52ae15a3a0d5fb416fda2e03e40e Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 10:06:29 -0800 Subject: [PATCH 011/163] rename SRG_TIME_SERIES custom compute env --- job_spec/SRG_TIME_SERIES.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 626a0dea2..a8148cac6 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -41,8 +41,9 @@ SRG_TIME_SERIES: DEFAULT: cost: 1.0 compute_environment: - # TODO do we need to use different compute environments for back_projection and time_series? - name: SrgGslc # TODO rename? + # TODO use different compute environments for back_projection and time_series + # TODO use same compute env as SRG_GSLC.yml + name: SrgBackProjection instance_types: g6.2xlarge # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id ami_id: ami-0729c079aae647cb3 From 2aa1699ab8bd5d37419da99cfd151bc918c056b5 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 10:18:40 -0800 Subject: [PATCH 012/163] update hyp3-multi-burst-sandbox to match hyp3-lavas-test --- .github/workflows/deploy-multi-burst-sandbox.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/deploy-multi-burst-sandbox.yml b/.github/workflows/deploy-multi-burst-sandbox.yml index c4a2a4518..06f8c4b9f 100644 --- a/.github/workflows/deploy-multi-burst-sandbox.yml +++ b/.github/workflows/deploy-multi-burst-sandbox.yml @@ -25,11 +25,8 @@ jobs: deploy_ref: refs/heads/multi-burst-sandbox job_files: >- job_spec/INSAR_ISCE_BURST.yml - job_spec/INSAR_ISCE_MULTI_BURST.yml - job_spec/AUTORIFT.yml - job_spec/RTC_GAMMA.yml - job_spec/WATER_MAP.yml - job_spec/WATER_MAP_EQ.yml + job_spec/SRG_GSLC.yml + job_spec/SRG_TIME_SERIES.yml instance_types: r6id.xlarge,r6id.2xlarge,r6id.4xlarge,r6id.8xlarge,r6idn.xlarge,r6idn.2xlarge,r6idn.4xlarge,r6idn.8xlarge default_max_vcpus: 640 expanded_max_vcpus: 640 From 603a4873bc8ba77c5cea0b46241dc6c5ba6ee2ec Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 10:26:22 -0800 Subject: [PATCH 013/163] set placeholder timeout and memory for srg time series --- job_spec/SRG_TIME_SERIES.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index a8148cac6..3a605a268 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -77,7 +77,7 @@ SRG_TIME_SERIES: - ++process - time_series # TODO remaining command arguments - timeout: 0 # TODO + timeout: 86400 # TODO vcpu: 1 - memory: 0 # TODO + memory: 30500 # TODO # TODO secrets? From 8eae2342860e66012ef69dc50874929786922d1f Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 10:35:50 -0800 Subject: [PATCH 014/163] Deploy multi-burst-sandbox from srg branch, fix sfn name collision --- .github/workflows/deploy-multi-burst-sandbox.yml | 2 +- apps/render_cf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-multi-burst-sandbox.yml b/.github/workflows/deploy-multi-burst-sandbox.yml index 06f8c4b9f..270894afa 100644 --- a/.github/workflows/deploy-multi-burst-sandbox.yml +++ b/.github/workflows/deploy-multi-burst-sandbox.yml @@ -3,7 +3,7 @@ name: Deploy Multi-Burst Sandbox Stack to AWS on: push: branches: - - multi-burst-sandbox + - srg concurrency: ${{ github.workflow }}-${{ github.ref }} diff --git a/apps/render_cf.py b/apps/render_cf.py index 596737e2a..9d8a67e46 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -68,7 +68,7 @@ def get_step_for_map_task(task: dict, job_spec: dict) -> dict: 'ItemProcessor': { 'StartAt': task['name'], 'States': { - task['name']: submit_job_step, + task['name'] + '_TASK': submit_job_step, } } } From 33e758a65f3c171cca881e781ebcff297c5a72ed Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 10:50:03 -0800 Subject: [PATCH 015/163] fix sfn step name, use hyp3-srg dev image --- apps/render_cf.py | 5 +++-- job_spec/SRG_TIME_SERIES.yml | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 9d8a67e46..e5339d39b 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -56,6 +56,7 @@ def get_step_for_map_task(task: dict, job_spec: dict) -> dict: job_parameters = get_job_parameters(item, items, job_spec) submit_job_step = get_step_for_batch_submit_job(task, job_spec) submit_job_step['End'] = True + submit_job_step_name = task['name'] + '_SUBMIT_JOB' return { 'Type': 'Map', 'ItemsPath': f'$.job_parameters.{items}', @@ -66,9 +67,9 @@ def get_step_for_map_task(task: dict, job_spec: dict) -> dict: 'job_parameters': job_parameters, }, 'ItemProcessor': { - 'StartAt': task['name'], + 'StartAt': submit_job_step_name, 'States': { - task['name'] + '_TASK': submit_job_step, + submit_job_step_name: submit_job_step, } } } diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 3a605a268..bcc4e84c3 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -51,7 +51,9 @@ SRG_TIME_SERIES: - name: BACK_PROJECTION map: for granule in granules image: ghcr.io/asfhyp3/hyp3-srg - image_tag: latest.gpu + # TODO: revert image_tag when done testing + #image_tag: latest.gpu + image_tag: 0.8.1.dev11_ge4608fc command: - ++process - back_projection From 4088b18aaa3c0d2346d8db26dfc6ac4292c899ca Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 11:37:01 -0800 Subject: [PATCH 016/163] pass original array to Map state rather than stringified version --- apps/render_cf.py | 2 +- apps/start-execution-worker/src/start_execution_worker.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index e5339d39b..1064ab320 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -59,7 +59,7 @@ def get_step_for_map_task(task: dict, job_spec: dict) -> dict: submit_job_step_name = task['name'] + '_SUBMIT_JOB' return { 'Type': 'Map', - 'ItemsPath': f'$.job_parameters.{items}', + 'ItemsPath': f'$.original_job_parameters.{items}', 'ItemSelector': { 'job_id.$': '$.job_id', 'priority.$': '$.priority', diff --git a/apps/start-execution-worker/src/start_execution_worker.py b/apps/start-execution-worker/src/start_execution_worker.py index 7becf0093..6031ad43e 100644 --- a/apps/start-execution-worker/src/start_execution_worker.py +++ b/apps/start-execution-worker/src/start_execution_worker.py @@ -25,6 +25,11 @@ def submit_jobs(jobs: list[dict]) -> None: for job in jobs: # Convert parameters to strings so they can be passed to Batch; see: # https://docs.aws.amazon.com/batch/latest/APIReference/API_SubmitJob.html#Batch-SubmitJob-request-parameters + + # TODO: instead, assign the stringified params to a batch_job_parameters key, and keep the original job_parameters + # value unchanged; will require searching for usage of job_parameters in step-function.json.j2 and render_cf.py + # and updating how they're used + job['original_job_parameters'] = job['job_parameters'] job['job_parameters'] = convert_parameters_to_strings(job['job_parameters']) STEP_FUNCTION.start_execution( stateMachineArn=step_function_arn, From da11d2627067b2ed44794ddd75b98dbb2dc88432 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 13:55:31 -0800 Subject: [PATCH 017/163] add back job definition default params, fix image tags --- apps/workflow-cf.yml.j2 | 4 ++++ job_spec/SRG_TIME_SERIES.yml | 7 +++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index 260fe7d5e..0859387c6 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -49,6 +49,10 @@ Resources: Type: AWS::Batch::JobDefinition Properties: Type: container + Parameters: + {% for k, v in job_spec['parameters'].items() %} + {{ k }}: {{ v.get('default') or v['api_schema'].get('default') }} + {% endfor %} ContainerProperties: Image: {% if 'image_tag' in task -%} "{{ task['image'] }}:{{ task['image_tag'] }}" diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index bcc4e84c3..fca2587dd 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -51,9 +51,7 @@ SRG_TIME_SERIES: - name: BACK_PROJECTION map: for granule in granules image: ghcr.io/asfhyp3/hyp3-srg - # TODO: revert image_tag when done testing - #image_tag: latest.gpu - image_tag: 0.8.1.dev11_ge4608fc + image_tag: latest.gpu command: - ++process - back_projection @@ -74,7 +72,8 @@ SRG_TIME_SERIES: - EARTHDATA_PASSWORD - name: '' image: ghcr.io/asfhyp3/hyp3-srg - image_tag: latest.gpu # TODO use GPU for time series? + # TODO decide on appropriate image_tag + image_tag: 0.8.1.dev11_ge4608fc command: - ++process - time_series From 6b99bbf61d4f782011b99b7ac2ce81c053dd59cd Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 14:51:13 -0800 Subject: [PATCH 018/163] pass bucket prefix and other params to SRG jobs --- apps/render_cf.py | 2 ++ job_spec/SRG_TIME_SERIES.yml | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 1064ab320..76d97f59b 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -89,6 +89,8 @@ def get_job_parameters(item: str, items: str, job_spec: dict) -> dict: if param != items } job_parameters[f'{item}.$'] = '$$.Map.Item.Value' + # TODO: concat items sub-prefix to bucket_prefix in the job spec rather than here? + job_parameters['bucket_prefix.$'] = "States.Format('{}/{}', $.job_parameters.bucket_prefix, " + items + ")" return job_parameters diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index fca2587dd..f6b061198 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -77,7 +77,14 @@ SRG_TIME_SERIES: command: - ++process - time_series - # TODO remaining command arguments + - --bounds + - Ref::bounds + - --bucket + - '!Ref Bucket' + - --bucket-prefix + - Ref::bucket_prefix + - --gslc-bucket-prefix + - 'granules' timeout: 86400 # TODO vcpu: 1 memory: 30500 # TODO From f0c1a2414253a425c7373188f6c4c0086a63edaa Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 15:01:06 -0800 Subject: [PATCH 019/163] update image tag --- job_spec/SRG_TIME_SERIES.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index f6b061198..7e8365043 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -73,7 +73,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO decide on appropriate image_tag - image_tag: 0.8.1.dev11_ge4608fc + image_tag: 0.8.1.dev12_gd765228 command: - ++process - time_series From 2622ebe19186faea752a8930d2de61fa4563e459 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 15:10:07 -0800 Subject: [PATCH 020/163] add granules to bucket prefix in job spec --- apps/render_cf.py | 2 -- job_spec/SRG_TIME_SERIES.yml | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 76d97f59b..1064ab320 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -89,8 +89,6 @@ def get_job_parameters(item: str, items: str, job_spec: dict) -> dict: if param != items } job_parameters[f'{item}.$'] = '$$.Map.Item.Value' - # TODO: concat items sub-prefix to bucket_prefix in the job spec rather than here? - job_parameters['bucket_prefix.$'] = "States.Format('{}/{}', $.job_parameters.bucket_prefix, " + items + ")" return job_parameters diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 7e8365043..90d852710 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -61,7 +61,7 @@ SRG_TIME_SERIES: - --bucket - '!Ref Bucket' - --bucket-prefix - - Ref::bucket_prefix + - Ref::bucket_prefix/granules - Ref::granule timeout: 10800 vcpu: 1 From 52815852db977398fb3b8d9fac508373aa9a06fb Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 15:24:26 -0800 Subject: [PATCH 021/163] do not add granules to bucket prefix in job spec --- job_spec/SRG_TIME_SERIES.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 90d852710..7e8365043 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -61,7 +61,7 @@ SRG_TIME_SERIES: - --bucket - '!Ref Bucket' - --bucket-prefix - - Ref::bucket_prefix/granules + - Ref::bucket_prefix - Ref::granule timeout: 10800 vcpu: 1 From e0000a440365b16c56b694a546181081e713549b Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 15:51:00 -0800 Subject: [PATCH 022/163] run time series back projection step in default compute env for now --- job_spec/SRG_TIME_SERIES.yml | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 7e8365043..c5c4e27d1 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -41,23 +41,29 @@ SRG_TIME_SERIES: DEFAULT: cost: 1.0 compute_environment: - # TODO use different compute environments for back_projection and time_series - # TODO use same compute env as SRG_GSLC.yml - name: SrgBackProjection - instance_types: g6.2xlarge - # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id - ami_id: ami-0729c079aae647cb3 + name: 'Default' +# TODO revert compute_environment +# compute_environment: +# # TODO use different compute environments for back_projection and time_series +# # TODO use same compute env as SRG_GSLC.yml +# name: SrgBackProjection +# instance_types: g6.2xlarge +# # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id +# ami_id: ami-0729c079aae647cb3 tasks: - name: BACK_PROJECTION map: for granule in granules image: ghcr.io/asfhyp3/hyp3-srg - image_tag: latest.gpu + # TODO revert image_tag + #image_tag: latest.gpu + image_tag: 0.8.1.dev13_g0de6d63 command: - ++process - back_projection - --bounds - Ref::bounds - - --gpu + # TODO revert --gpu + #- --gpu - --bucket - '!Ref Bucket' - --bucket-prefix @@ -65,7 +71,8 @@ SRG_TIME_SERIES: - Ref::granule timeout: 10800 vcpu: 1 - gpu: 1 + # TODO revert gpu + #gpu: 1 memory: 30500 secrets: - EARTHDATA_USERNAME @@ -73,7 +80,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO decide on appropriate image_tag - image_tag: 0.8.1.dev12_gd765228 + image_tag: 0.8.1.dev13_g0de6d63 command: - ++process - time_series From 3d7337f46532918bce31500f095dbff592486fb6 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 16:03:00 -0800 Subject: [PATCH 023/163] pass --use-granules-from-s3 to time_series workflow --- job_spec/SRG_TIME_SERIES.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index c5c4e27d1..96bd5e1b6 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -90,8 +90,7 @@ SRG_TIME_SERIES: - '!Ref Bucket' - --bucket-prefix - Ref::bucket_prefix - - --gslc-bucket-prefix - - 'granules' + - --use-granules-from-s3 timeout: 86400 # TODO vcpu: 1 memory: 30500 # TODO From bd8f4a586f9df8b8fa55a61306efdf92792d8c1b Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 16:05:38 -0800 Subject: [PATCH 024/163] image tag --- job_spec/SRG_TIME_SERIES.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 96bd5e1b6..73fc07ba6 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -56,7 +56,7 @@ SRG_TIME_SERIES: image: ghcr.io/asfhyp3/hyp3-srg # TODO revert image_tag #image_tag: latest.gpu - image_tag: 0.8.1.dev13_g0de6d63 + image_tag: 0.8.1.dev14_g01f0452 command: - ++process - back_projection @@ -80,7 +80,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO decide on appropriate image_tag - image_tag: 0.8.1.dev13_g0de6d63 + image_tag: 0.8.1.dev14_g01f0452 command: - ++process - time_series From b27b909c7c77f94623651b7b0976e4f90ea3f64c Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 16:15:27 -0800 Subject: [PATCH 025/163] increase timeout for time series gslc step --- job_spec/SRG_TIME_SERIES.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 73fc07ba6..79ea0fb0f 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -69,7 +69,9 @@ SRG_TIME_SERIES: - --bucket-prefix - Ref::bucket_prefix - Ref::granule - timeout: 10800 + # TODO revert timeout for gpu env + #timeout: 10800 + timeout: 86400 vcpu: 1 # TODO revert gpu #gpu: 1 From 8021bcdc518aac718be589fa1ebd5066db392a6f Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 17:11:51 -0800 Subject: [PATCH 026/163] image tag --- job_spec/SRG_TIME_SERIES.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 79ea0fb0f..70650f9d8 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -56,7 +56,7 @@ SRG_TIME_SERIES: image: ghcr.io/asfhyp3/hyp3-srg # TODO revert image_tag #image_tag: latest.gpu - image_tag: 0.8.1.dev14_g01f0452 + image_tag: 0.8.1.dev15_g50d06f4 command: - ++process - back_projection @@ -82,7 +82,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO decide on appropriate image_tag - image_tag: 0.8.1.dev14_g01f0452 + image_tag: 0.8.1.dev15_g50d06f4 command: - ++process - time_series From f3c9ed124b08d81851be70b0f16ca4060a9132d8 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 7 Oct 2024 09:52:51 -0800 Subject: [PATCH 027/163] image tag --- job_spec/SRG_TIME_SERIES.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 70650f9d8..3b212c2f4 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -56,7 +56,7 @@ SRG_TIME_SERIES: image: ghcr.io/asfhyp3/hyp3-srg # TODO revert image_tag #image_tag: latest.gpu - image_tag: 0.8.1.dev15_g50d06f4 + image_tag: 0.8.1.dev16_g3983395 command: - ++process - back_projection @@ -82,7 +82,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO decide on appropriate image_tag - image_tag: 0.8.1.dev15_g50d06f4 + image_tag: 0.8.1.dev16_g3983395 command: - ++process - time_series From fac1172275e2d6a122c56b4378b399ef0bd1f1d8 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 7 Oct 2024 14:16:35 -0400 Subject: [PATCH 028/163] per task compute_envs --- apps/compute-cf.yml.j2 | 18 +++++++++++---- .../handle-batch-event-cf.yml.j2 | 17 ++++++++++---- apps/main-cf.yml.j2 | 18 ++++++++++----- apps/scale-cluster/scale-cluster-cf.yml.j2 | 22 ++++++++++++------- apps/step-function.json.j2 | 2 +- apps/workflow-cf.yml.j2 | 20 ++++++++++++----- 6 files changed, 68 insertions(+), 29 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 80e52b489..e708a11f0 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -1,5 +1,16 @@ AWSTemplateFormatVersion: 2010-09-09 + +{%- set compute_envs = [] -%} +{%- for job_type, job_spec in job_types.items() -%} +{%- for task in job_spec['tasks'] if task['compute_environment']['name'] != 'Default' -%} + +{%- do compute_envs.append(task['compute_environment']) -%} + +{% endfor %} +{% endfor %} + + Parameters: VpcId: @@ -29,8 +40,8 @@ Outputs: JobQueueArn: Value: !Ref BatchJobQueue - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - {% set name = job_spec['compute_environment']['name'] %} + {% for env in compute_envs %} + {% set name = env['name'] %} {{ name }}ComputeEnvironmentArn: Value: !Ref {{ name }}ComputeEnvironment @@ -106,8 +117,7 @@ Resources: SchedulingPolicy: Type: AWS::Batch::SchedulingPolicy - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - {% set env = job_spec['compute_environment'] %} + {% for env in compute_envs %} {% set name = env['name'] %} {% set instance_types = env['instance_types'].split(',') if 'instance_types' in env else '!Ref InstanceTypes' %} {% set ami_id = env['ami_id'] if 'ami_id' in env else '!Ref AmiId' %} diff --git a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 index 386ae87c6..a83670b64 100644 --- a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 +++ b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 @@ -1,12 +1,21 @@ AWSTemplateFormatVersion: 2010-09-09 +{%- set compute_env_names = [] -%} +{%- for job_type, job_spec in job_types.items() -%} +{%- for task in job_spec['tasks'] if task['compute_environment']['name'] != 'Default' -%} + +{%- do compute_env_names.append(task['compute_environment']['name']) -%} + +{% endfor %} +{% endfor %} + Parameters: JobQueueArn: Type: String - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - {{ job_spec['compute_environment']['name'] }}JobQueueArn: + {% for name in compute_env_names %} + {{ name }}JobQueueArn: Type: String {% endfor %} @@ -100,8 +109,8 @@ Resources: detail: jobQueue: - !Ref JobQueueArn - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - - !Ref {{ job_spec['compute_environment']['name'] }}JobQueueArn + {% for name in compute_env_names %} + - !Ref {{ name }}JobQueueArn {% endfor %} status: - RUNNING diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index 13b7405d5..7ad3ba060 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -1,5 +1,14 @@ AWSTemplateFormatVersion: 2010-09-09 +{%- set compute_env_names = [] -%} +{%- for job_type, job_spec in job_types.items() -%} +{%- for task in job_spec['tasks'] if task['compute_environment']['name'] != 'Default' -%} + +{%- do compute_env_names.append(task['compute_environment']['name']) -%} + +{% endfor %} +{% endfor %} + Parameters: VpcId: @@ -154,8 +163,7 @@ Resources: Properties: Parameters: ComputeEnvironmentArn: !GetAtt Cluster.Outputs.ComputeEnvironmentArn - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - {% set name = job_spec['compute_environment']['name'] %} + {% for name in compute_env_names %} {{ name }}ComputeEnvironmentArn: !GetAtt Cluster.Outputs.{{ name }}ComputeEnvironmentArn {% endfor %} DefaultMaxvCpus: !Ref DefaultMaxvCpus @@ -173,8 +181,7 @@ Resources: Properties: Parameters: JobQueueArn: !GetAtt Cluster.Outputs.JobQueueArn - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - {% set name = job_spec['compute_environment']['name'] %} + {% for name in compute_env_names %} {{ name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ name }}JobQueueArn {% endfor %} JobsTable: !Ref JobsTable @@ -189,8 +196,7 @@ Resources: Properties: Parameters: JobQueueArn: !GetAtt Cluster.Outputs.JobQueueArn - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - {% set name = job_spec['compute_environment']['name'] %} + {% for name in compute_env_names %} {{ name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ name }}JobQueueArn {% endfor %} TaskRoleArn: !GetAtt Cluster.Outputs.TaskRoleArn diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index 358f095b3..98bff1bff 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -1,12 +1,20 @@ AWSTemplateFormatVersion: 2010-09-09 +{%- set compute_env_names = [] -%} +{%- for job_type, job_spec in job_types.items() -%} +{%- for task in job_spec['tasks'] if task['compute_environment']['name'] != 'Default' -%} + +{%- do compute_env_names.append(task['compute_environment']['name']) -%} + +{% endfor %} +{% endfor %} + Parameters: ComputeEnvironmentArn: Type: String - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - {% set name = job_spec['compute_environment']['name'] %} + {% for name in compute_env_names %} {{ name }}ComputeEnvironmentArn: Type: String {% endfor %} @@ -87,8 +95,8 @@ Resources: Action: batch:UpdateComputeEnvironment Resource: - !Ref ComputeEnvironmentArn - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - - !Ref {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn + {% for name in compute_env_names %} + - !Ref {{ name }}ComputeEnvironmentArn {% endfor %} Lambda: @@ -128,8 +136,7 @@ Resources: Targets: - Arn: !GetAtt Lambda.Arn Id: lambda - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - {% set name = job_spec['compute_environment']['name'] %} + {% for name in compute_env_names %} - Arn: !GetAtt {{ name }}Lambda.Arn Id: {{ name }}lambda {% endfor %} @@ -142,8 +149,7 @@ Resources: Principal: events.amazonaws.com SourceArn: !GetAtt Schedule.Arn - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - {% set name = job_spec['compute_environment']['name'] %} + {% for name in compute_env_names %} {{ name }}LogGroup: Type: AWS::Logs::LogGroup Properties: diff --git a/apps/step-function.json.j2 b/apps/step-function.json.j2 index bc168208d..de216bc61 100644 --- a/apps/step-function.json.j2 +++ b/apps/step-function.json.j2 @@ -86,7 +86,7 @@ "Parameters": { "JobDefinition": "{{ '${'+ snake_to_pascal_case(task['name']) + '}' }}", "JobName.$": "$.job_id", - {% set name = job_spec['compute_environment']['name'] %} + {% set name = task['compute_environment']['name'] %} {% set job_queue = name + 'JobQueueArn' if 'Default' != name else 'JobQueueArn' %} "JobQueue": "{{ '${' + job_queue + '}' }}", "ShareIdentifier": "default", diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index 0859387c6..1942865fb 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -1,12 +1,21 @@ AWSTemplateFormatVersion: 2010-09-09 +{%- set compute_env_names = [] -%} +{%- for job_type, job_spec in job_types.items() -%} +{%- for task in job_spec['tasks'] if task['compute_environment']['name'] != 'Default' -%} + +{%- do compute_env_names.append(task['compute_environment']['name']) -%} + +{% endfor %} +{% endfor %} + Parameters: JobQueueArn: Type: String - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - {{ job_spec['compute_environment']['name'] }}JobQueueArn: + {% for name in compute_env_names %} + {{ name }}JobQueueArn: Type: String {% endfor %} @@ -93,8 +102,7 @@ Resources: DefinitionS3Location: step-function.json DefinitionSubstitutions: JobQueueArn: !Ref JobQueueArn - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - {% set name = job_spec['compute_environment']['name'] %} + {% for name in compute_env_names %} {{ name }}JobQueueArn: !Ref {{ name }}JobQueueArn {% endfor %} {% for job_type, job_spec in job_types.items() %} @@ -139,8 +147,8 @@ Resources: Action: batch:SubmitJob Resource: - !Ref JobQueueArn - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - - !Ref {{ job_spec['compute_environment']['name'] }}JobQueueArn + {% for name in compute_env_names %} + - !Ref {{ name }}JobQueueArn {% endfor %} {% for job_type, job_spec in job_types.items() %} {% for task in job_spec['tasks'] %} From 782a8c723b46497bc5176c95deb4bb26fd0d1d53 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 7 Oct 2024 14:29:00 -0400 Subject: [PATCH 029/163] add `do` extension --- apps/render_cf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/render_cf.py b/apps/render_cf.py index 1907d065a..61a7a1593 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -19,6 +19,7 @@ def render_templates(job_types, security_environment, api_name): trim_blocks=True, lstrip_blocks=True, keep_trailing_newline=True, + extensions=['jinja2.ext.do'], ) for template_file in Path('.').glob('**/*.j2'): From 50d0e6ced6d5aecf582861d2f308fd77478bb1e9 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 7 Oct 2024 14:29:12 -0400 Subject: [PATCH 030/163] move compute env into tasks --- job_spec/ARIA_AUTORIFT.yml | 6 +++--- job_spec/ARIA_RAIDER.yml | 4 ++-- job_spec/AUTORIFT.yml | 4 ++-- job_spec/AUTORIFT_ITS_LIVE.yml | 4 ++-- job_spec/INSAR_GAMMA.yml | 4 ++-- job_spec/INSAR_ISCE.yml | 4 ++++ job_spec/INSAR_ISCE_BURST.yml | 4 ++-- job_spec/INSAR_ISCE_MULTI_BURST.yml | 4 ++-- job_spec/RTC_GAMMA.yml | 4 ++-- job_spec/S1_CORRECTION_ITS_LIVE.yml | 4 ++-- job_spec/SRG_GSLC.yml | 12 +++++++----- job_spec/WATER_MAP.yml | 8 ++++++-- job_spec/WATER_MAP_EQ.yml | 6 ++++-- 13 files changed, 40 insertions(+), 28 deletions(-) diff --git a/job_spec/ARIA_AUTORIFT.yml b/job_spec/ARIA_AUTORIFT.yml index 5d92c9cce..9d7d681bd 100644 --- a/job_spec/ARIA_AUTORIFT.yml +++ b/job_spec/ARIA_AUTORIFT.yml @@ -42,9 +42,6 @@ AUTORIFT: DEFAULT: cost: 1.0 validators: [] - compute_environment: - name: 'AriaAutorift' - instance_types: r6id.xlarge,r6id.2xlarge,r6id.4xlarge,r6id.8xlarge,r6idn.xlarge,r6idn.2xlarge,r6idn.4xlarge,r6idn.8xlarge tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-autorift @@ -61,6 +58,9 @@ AUTORIFT: - ITS_LIVE_OD - Ref::granules timeout: 10800 + compute_environment: + name: 'AriaAutorift' + instance_types: r6id.xlarge,r6id.2xlarge,r6id.4xlarge,r6id.8xlarge,r6idn.xlarge,r6idn.2xlarge,r6idn.4xlarge,r6idn.8xlarge vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/ARIA_RAIDER.yml b/job_spec/ARIA_RAIDER.yml index 840a67154..0dc69f4bf 100644 --- a/job_spec/ARIA_RAIDER.yml +++ b/job_spec/ARIA_RAIDER.yml @@ -25,8 +25,6 @@ ARIA_RAIDER: DEFAULT: cost: 1.0 validators: [] - compute_environment: - name: 'Default' tasks: - name: '' image: ghcr.io/dbekaert/raider @@ -42,6 +40,8 @@ ARIA_RAIDER: - --input-bucket-prefix - Ref::job_id timeout: 10800 + compute_environment: + name: 'Default' vcpu: 1 memory: 7500 secrets: diff --git a/job_spec/AUTORIFT.yml b/job_spec/AUTORIFT.yml index e4bc9ef39..f4a4a244c 100644 --- a/job_spec/AUTORIFT.yml +++ b/job_spec/AUTORIFT.yml @@ -39,8 +39,6 @@ AUTORIFT: DEFAULT: cost: 1.0 validators: [] - compute_environment: - name: 'Default' tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-autorift @@ -57,6 +55,8 @@ AUTORIFT: - ITS_LIVE_OD - Ref::granules timeout: 10800 + compute_environment: + name: 'Default' vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/AUTORIFT_ITS_LIVE.yml b/job_spec/AUTORIFT_ITS_LIVE.yml index 2a9840d28..ac4fd0fb1 100644 --- a/job_spec/AUTORIFT_ITS_LIVE.yml +++ b/job_spec/AUTORIFT_ITS_LIVE.yml @@ -51,8 +51,6 @@ AUTORIFT: DEFAULT: cost: 1.0 validators: [] - compute_environment: - name: 'Default' tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-autorift @@ -71,6 +69,8 @@ AUTORIFT: - ITS_LIVE_PROD - Ref::granules timeout: 10800 + compute_environment: + name: 'Default' vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/INSAR_GAMMA.yml b/job_spec/INSAR_GAMMA.yml index 2d376bf8d..bf5af6c84 100644 --- a/job_spec/INSAR_GAMMA.yml +++ b/job_spec/INSAR_GAMMA.yml @@ -82,8 +82,6 @@ INSAR_GAMMA: cost: 1.0 validators: - check_dem_coverage - compute_environment: - name: 'Default' tasks: - name: '' image: 845172464411.dkr.ecr.us-west-2.amazonaws.com/hyp3-gamma @@ -116,6 +114,8 @@ INSAR_GAMMA: - Ref::phase_filter_parameter - Ref::granules timeout: 10800 + compute_environment: + name: 'Default' vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index 75b62104c..edca7a1c8 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -127,6 +127,8 @@ INSAR_ISCE: - --unfiltered-coherence - Ref::unfiltered_coherence timeout: 21600 + compute_environment: + name: 'Default' vcpu: 1 memory: 15500 secrets: @@ -146,6 +148,8 @@ INSAR_ISCE: - --weather-model - Ref::weather_model timeout: 10800 + compute_environment: + name: 'Default' vcpu: 1 memory: 7500 secrets: diff --git a/job_spec/INSAR_ISCE_BURST.yml b/job_spec/INSAR_ISCE_BURST.yml index d741b5aa4..53c871799 100644 --- a/job_spec/INSAR_ISCE_BURST.yml +++ b/job_spec/INSAR_ISCE_BURST.yml @@ -44,8 +44,6 @@ INSAR_ISCE_BURST: - check_valid_polarizations - check_same_burst_ids - check_not_antimeridian - compute_environment: - name: 'Default' tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-isce2 @@ -64,6 +62,8 @@ INSAR_ISCE_BURST: - Ref::looks - Ref::granules timeout: 5400 + compute_environment: + name: 'Default' vcpu: 1 memory: 7600 secrets: diff --git a/job_spec/INSAR_ISCE_MULTI_BURST.yml b/job_spec/INSAR_ISCE_MULTI_BURST.yml index e6b3a89e5..5f0a3aa15 100644 --- a/job_spec/INSAR_ISCE_MULTI_BURST.yml +++ b/job_spec/INSAR_ISCE_MULTI_BURST.yml @@ -60,8 +60,6 @@ INSAR_ISCE_MULTI_BURST: - check_valid_polarizations - check_same_burst_ids - check_not_antimeridian - compute_environment: - name: 'Default' tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-isce2 @@ -81,6 +79,8 @@ INSAR_ISCE_MULTI_BURST: - --secondary - Ref::secondary timeout: 126000 # 35 hours + compute_environment: + name: 'Default' vcpu: 1 memory: 4 # Memory is always overridden by the step function secrets: diff --git a/job_spec/RTC_GAMMA.yml b/job_spec/RTC_GAMMA.yml index 393814074..3949d8518 100644 --- a/job_spec/RTC_GAMMA.yml +++ b/job_spec/RTC_GAMMA.yml @@ -103,8 +103,6 @@ RTC_GAMMA: cost: 1.0 validators: - check_dem_coverage - compute_environment: - name: 'Default' tasks: - name: '' image: 845172464411.dkr.ecr.us-west-2.amazonaws.com/hyp3-gamma @@ -139,6 +137,8 @@ RTC_GAMMA: - Ref::include_rgb - Ref::granules timeout: 36000 + compute_environment: + name: 'Default' vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/S1_CORRECTION_ITS_LIVE.yml b/job_spec/S1_CORRECTION_ITS_LIVE.yml index 8de729ce6..c3732596f 100644 --- a/job_spec/S1_CORRECTION_ITS_LIVE.yml +++ b/job_spec/S1_CORRECTION_ITS_LIVE.yml @@ -29,8 +29,6 @@ S1_CORRECTION_TEST: DEFAULT: cost: 1.0 validators: [] - compute_environment: - name: 'Default' tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-autorift @@ -49,6 +47,8 @@ S1_CORRECTION_TEST: - '/vsicurl/http://its-live-data.s3.amazonaws.com/autorift_parameters/v001/autorift_landice_0120m.shp' - Ref::granules timeout: 10800 + compute_environment: + name: 'Default' vcpu: 1 memory: 15750 secrets: diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 21154e0e8..2340f2671 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -40,11 +40,6 @@ SRG_GSLC: cost_profiles: DEFAULT: cost: 1.0 - compute_environment: - name: SrgGslc - instance_types: g6.2xlarge - # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id - ami_id: ami-0729c079aae647cb3 tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-srg @@ -61,6 +56,13 @@ SRG_GSLC: - Ref::bucket_prefix - Ref::granules timeout: 10800 + # TODO: This compute env should come from a file, since it will be duped + # in the time-series workflow. + compute_environment: + name: SrgGslc + instance_types: g6.2xlarge + # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id + ami_id: ami-0729c079aae647cb3 vcpu: 1 gpu: 1 memory: 30500 diff --git a/job_spec/WATER_MAP.yml b/job_spec/WATER_MAP.yml index 89105c97a..4fd0c0770 100644 --- a/job_spec/WATER_MAP.yml +++ b/job_spec/WATER_MAP.yml @@ -116,8 +116,6 @@ WATER_MAP: cost: 1.0 validators: - check_dem_coverage - compute_environment: - name: 'Default' tasks: - name: RTC image: 845172464411.dkr.ecr.us-west-2.amazonaws.com/hyp3-gamma @@ -152,6 +150,8 @@ WATER_MAP: - 'copernicus' - Ref::granules timeout: 36000 + compute_environment: + name: 'Default' vcpu: 1 memory: 31500 secrets: @@ -177,6 +177,8 @@ WATER_MAP: - --membership-threshold - Ref::membership_threshold timeout: 36000 + compute_environment: + name: 'Default' vcpu: 1 memory: 126000 - name: FLOOD_MAP @@ -203,5 +205,7 @@ WATER_MAP: - --minimization-metric - Ref::minimization_metric timeout: 86400 + compute_environment: + name: 'Default' vcpu: 1 memory: 126000 diff --git a/job_spec/WATER_MAP_EQ.yml b/job_spec/WATER_MAP_EQ.yml index d4a16d23f..8daf295b4 100644 --- a/job_spec/WATER_MAP_EQ.yml +++ b/job_spec/WATER_MAP_EQ.yml @@ -58,8 +58,6 @@ WATER_MAP_EQ: cost: 1.0 validators: - check_dem_coverage - compute_environment: - name: 'Default' tasks: - name: RTC image: 845172464411.dkr.ecr.us-west-2.amazonaws.com/hyp3-gamma @@ -94,6 +92,8 @@ WATER_MAP_EQ: - 'copernicus' - Ref::granules timeout: 36000 + compute_environment: + name: 'Default' vcpu: 1 memory: 31500 secrets: @@ -113,5 +113,7 @@ WATER_MAP_EQ: - --hand-fraction - Ref::hand_fraction timeout: 36000 + compute_environment: + name: 'Default' vcpu: 1 memory: 126000 From 7820251eed0913f76b4d4086bf601b5451e1a2cc Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 7 Oct 2024 14:38:50 -0400 Subject: [PATCH 031/163] moving SRG compute env to file --- job_spec/SRG_GSLC.yml | 5 +---- job_spec/compute_environments/SRG_GSLC.yml | 6 ++++++ 2 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 job_spec/compute_environments/SRG_GSLC.yml diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 2340f2671..63adfaae2 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -59,10 +59,7 @@ SRG_GSLC: # TODO: This compute env should come from a file, since it will be duped # in the time-series workflow. compute_environment: - name: SrgGslc - instance_types: g6.2xlarge - # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id - ami_id: ami-0729c079aae647cb3 + from_file: SRG_GSLC vcpu: 1 gpu: 1 memory: 30500 diff --git a/job_spec/compute_environments/SRG_GSLC.yml b/job_spec/compute_environments/SRG_GSLC.yml new file mode 100644 index 000000000..9ee305ec7 --- /dev/null +++ b/job_spec/compute_environments/SRG_GSLC.yml @@ -0,0 +1,6 @@ +SRG_GSLC: + compute_environment: + name: SrgGslc + instance_types: g6.2xlarge + # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id + ami_id: ami-0729c079aae647cb3 From 61634047cba17e56490daea18cfc900fde119062 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 7 Oct 2024 16:11:42 -0400 Subject: [PATCH 032/163] added from_file compute env capability --- Makefile | 5 ++-- apps/compute-cf.yml.j2 | 17 ++++++++++---- .../handle-batch-event-cf.yml.j2 | 14 +++++++---- apps/main-cf.yml.j2 | 14 +++++++---- apps/render_cf.py | 23 ++++++++++++++----- apps/scale-cluster/scale-cluster-cf.yml.j2 | 14 +++++++---- apps/workflow-cf.yml.j2 | 14 +++++++---- job_spec/SRG_GSLC.yml | 4 +--- job_spec/SRG_TIME_SERIES.yml | 6 +++-- ...{SRG_GSLC.yml => COMPUTE_ENVIRONMENTS.yml} | 6 ++--- 10 files changed, 76 insertions(+), 41 deletions(-) rename job_spec/compute_environments/{SRG_GSLC.yml => COMPUTE_ENVIRONMENTS.yml} (58%) diff --git a/Makefile b/Makefile index c76c9a847..66ddc15e9 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ build: render test_file ?= tests/ tests: render export $$(xargs < tests/cfg.env); \ - pytest $(test_file) + pytest -vv $(test_file) run: render export $$(xargs < tests/cfg.env); \ @@ -36,11 +36,12 @@ install: python -m pip install -r requirements-all.txt files ?= job_spec/*.yml +compute_env_files ?= job_spec/compute_environments/*.yml security_environment ?= ASF api_name ?= local cost_profile ?= DEFAULT render: - @echo rendering $(files) for API $(api_name) and security environment $(security_environment); python apps/render_cf.py -j $(files) -s $(security_environment) -n $(api_name) -c $(cost_profile) + @echo rendering $(files) for API $(api_name) and security environment $(security_environment); python apps/render_cf.py -j $(files) -e $(compute_env_files) -s $(security_environment) -n $(api_name) -c $(cost_profile) static: flake8 openapi-validate cfn-lint diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index e708a11f0..c4df37a39 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -3,11 +3,18 @@ AWSTemplateFormatVersion: 2010-09-09 {%- set compute_envs = [] -%} {%- for job_type, job_spec in job_types.items() -%} -{%- for task in job_spec['tasks'] if task['compute_environment']['name'] != 'Default' -%} - -{%- do compute_envs.append(task['compute_environment']) -%} - -{% endfor %} + {%- for task in job_spec['tasks'] -%} + {% set env = task['compute_environment'] %} + {% if 'name' in env and env['name'] != 'Default' %} + {%- do compute_envs.append(env) -%} + {% endif %} + {% if 'from_file' in env %} + {% set name = env['from_file'] %} + {% set env = compute_envs_from_files[name] %} + {% do env.update({'name': name}) %} + {% do compute_envs.append(env) %} + {% endif %} + {% endfor %} {% endfor %} diff --git a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 index a83670b64..2c864c27f 100644 --- a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 +++ b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 @@ -2,11 +2,15 @@ AWSTemplateFormatVersion: 2010-09-09 {%- set compute_env_names = [] -%} {%- for job_type, job_spec in job_types.items() -%} -{%- for task in job_spec['tasks'] if task['compute_environment']['name'] != 'Default' -%} - -{%- do compute_env_names.append(task['compute_environment']['name']) -%} - -{% endfor %} + {%- for task in job_spec['tasks'] -%} + {% set env = task['compute_environment'] %} + {% if 'name' in env and env['name'] != 'Default' %} + {%- do compute_env_names.append(env['name']) -%} + {% endif %} + {% if 'from_file' in env %} + {%- do compute_env_names.append(env['from_file']) -%} + {% endif %} + {% endfor %} {% endfor %} Parameters: diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index 7ad3ba060..888983462 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -2,11 +2,15 @@ AWSTemplateFormatVersion: 2010-09-09 {%- set compute_env_names = [] -%} {%- for job_type, job_spec in job_types.items() -%} -{%- for task in job_spec['tasks'] if task['compute_environment']['name'] != 'Default' -%} - -{%- do compute_env_names.append(task['compute_environment']['name']) -%} - -{% endfor %} + {%- for task in job_spec['tasks'] -%} + {% set env = task['compute_environment'] %} + {% if 'name' in env and env['name'] != 'Default' %} + {%- do compute_env_names.append(env['name']) -%} + {% endif %} + {% if 'from_file' in env %} + {%- do compute_env_names.append(env['from_file']) -%} + {% endif %} + {% endfor %} {% endfor %} Parameters: diff --git a/apps/render_cf.py b/apps/render_cf.py index 30e33321b..9051a2fc8 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -32,7 +32,7 @@ def get_step_for_task(task: dict, index: int, next_step_name: str, job_spec: dic if 'map' in task: step = get_step_for_map_task(task, job_spec) else: - step = get_step_for_batch_submit_job(task, job_spec) + step = get_step_for_batch_submit_job(task) step.update( { 'Catch': [ @@ -54,7 +54,7 @@ def get_step_for_task(task: dict, index: int, next_step_name: str, job_spec: dic def get_step_for_map_task(task: dict, job_spec: dict) -> dict: item, items = parse_task_map(task['map']) job_parameters = get_job_parameters(item, items, job_spec) - submit_job_step = get_step_for_batch_submit_job(task, job_spec) + submit_job_step = get_step_for_batch_submit_job(task) submit_job_step['End'] = True submit_job_step_name = task['name'] + '_SUBMIT_JOB' return { @@ -92,8 +92,13 @@ def get_job_parameters(item: str, items: str, job_spec: dict) -> dict: return job_parameters -def get_step_for_batch_submit_job(task: dict, job_spec: dict) -> dict: - compute_environment = job_spec['compute_environment']['name'] +def get_step_for_batch_submit_job(task: dict) -> dict: + compute_environment = None + if 'compute_environment' in task: + if 'from_file' in task['compute_environment']: + compute_environment = task['compute_environment']['from_file'] + else: + compute_environment = task['compute_environment']['name'] job_queue = 'JobQueueArn' if compute_environment == 'Default' else compute_environment + 'JobQueueArn' return { 'Type': 'Task', @@ -128,7 +133,7 @@ def get_step_for_batch_submit_job(task: dict, job_spec: dict) -> dict: } -def render_templates(job_types, security_environment, api_name): +def render_templates(job_types, compute_envs, security_environment, api_name): job_steps = get_steps_for_jobs(job_types) env = jinja2.Environment( @@ -146,6 +151,7 @@ def render_templates(job_types, security_environment, api_name): output = template.render( job_types=job_types, + compute_envs_from_files=compute_envs, security_environment=security_environment, api_name=api_name, json=json, @@ -186,6 +192,7 @@ def render_costs(job_types: dict, cost_profile: str) -> None: def main(): parser = argparse.ArgumentParser() parser.add_argument('-j', '--job-spec-files', required=True, nargs='+', type=Path) + parser.add_argument('-e', '--compute-environment-files', nargs='+', type=Path) parser.add_argument('-s', '--security-environment', default='ASF', choices=['ASF', 'EDC', 'JPL', 'JPL-public']) parser.add_argument('-n', '--api-name', required=True) parser.add_argument('-c', '--cost-profile', default='DEFAULT', choices=['DEFAULT', 'EDC']) @@ -199,9 +206,13 @@ def main(): for task in job_spec['tasks']: task['name'] = job_type + '_' + task['name'] if task['name'] else job_type + compute_envs_from_files = {} + for file in args.compute_environment_files: + compute_envs_from_files.update(yaml.safe_load(file.read_text())['compute_environments']) + render_default_params_by_job_type(job_types) render_costs(job_types, args.cost_profile) - render_templates(job_types, args.security_environment, args.api_name) + render_templates(job_types, compute_envs_from_files, args.security_environment, args.api_name) if __name__ == '__main__': diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index 98bff1bff..9a2b97dcb 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -2,11 +2,15 @@ AWSTemplateFormatVersion: 2010-09-09 {%- set compute_env_names = [] -%} {%- for job_type, job_spec in job_types.items() -%} -{%- for task in job_spec['tasks'] if task['compute_environment']['name'] != 'Default' -%} - -{%- do compute_env_names.append(task['compute_environment']['name']) -%} - -{% endfor %} + {%- for task in job_spec['tasks'] -%} + {% set env = task['compute_environment'] %} + {% if 'name' in env and env['name'] != 'Default' %} + {%- do compute_env_names.append(env['name']) -%} + {% endif %} + {% if 'from_file' in env %} + {%- do compute_env_names.append(env['from_file']) -%} + {% endif %} + {% endfor %} {% endfor %} Parameters: diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index 1942865fb..1ffcdf9dd 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -2,11 +2,15 @@ AWSTemplateFormatVersion: 2010-09-09 {%- set compute_env_names = [] -%} {%- for job_type, job_spec in job_types.items() -%} -{%- for task in job_spec['tasks'] if task['compute_environment']['name'] != 'Default' -%} - -{%- do compute_env_names.append(task['compute_environment']['name']) -%} - -{% endfor %} + {%- for task in job_spec['tasks'] -%} + {% set env = task['compute_environment'] %} + {% if 'name' in env and env['name'] != 'Default' %} + {%- do compute_env_names.append(env['name']) -%} + {% endif %} + {% if 'from_file' in env %} + {%- do compute_env_names.append(env['from_file']) -%} + {% endif %} + {% endfor %} {% endfor %} Parameters: diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 63adfaae2..cc78f9265 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -56,10 +56,8 @@ SRG_GSLC: - Ref::bucket_prefix - Ref::granules timeout: 10800 - # TODO: This compute env should come from a file, since it will be duped - # in the time-series workflow. compute_environment: - from_file: SRG_GSLC + from_file: SrgGslc vcpu: 1 gpu: 1 memory: 30500 diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 3b212c2f4..1e0c1c328 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -40,8 +40,6 @@ SRG_TIME_SERIES: cost_profiles: DEFAULT: cost: 1.0 - compute_environment: - name: 'Default' # TODO revert compute_environment # compute_environment: # # TODO use different compute environments for back_projection and time_series @@ -72,6 +70,8 @@ SRG_TIME_SERIES: # TODO revert timeout for gpu env #timeout: 10800 timeout: 86400 + compute_environment: + from_file: 'SrgGslc' vcpu: 1 # TODO revert gpu #gpu: 1 @@ -94,6 +94,8 @@ SRG_TIME_SERIES: - Ref::bucket_prefix - --use-granules-from-s3 timeout: 86400 # TODO + compute_environment: + name: 'Default' vcpu: 1 memory: 30500 # TODO # TODO secrets? diff --git a/job_spec/compute_environments/SRG_GSLC.yml b/job_spec/compute_environments/COMPUTE_ENVIRONMENTS.yml similarity index 58% rename from job_spec/compute_environments/SRG_GSLC.yml rename to job_spec/compute_environments/COMPUTE_ENVIRONMENTS.yml index 9ee305ec7..24249b63d 100644 --- a/job_spec/compute_environments/SRG_GSLC.yml +++ b/job_spec/compute_environments/COMPUTE_ENVIRONMENTS.yml @@ -1,6 +1,6 @@ -SRG_GSLC: - compute_environment: - name: SrgGslc +compute_environments: + # Format is the same as in the job_spec(s), except the name becomes the key. + SrgGslc: instance_types: g6.2xlarge # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id ami_id: ami-0729c079aae647cb3 From 21aca1b13dc950e97660d634e3a3c667dfbb452b Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 7 Oct 2024 16:25:28 -0400 Subject: [PATCH 033/163] removed -vv for tests --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 66ddc15e9..2b1526462 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ build: render test_file ?= tests/ tests: render export $$(xargs < tests/cfg.env); \ - pytest -vv $(test_file) + pytest $(test_file) run: render export $$(xargs < tests/cfg.env); \ From 3dfb367b9641a83e38d206649de07a650d87d6b5 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 7 Oct 2024 16:30:15 -0400 Subject: [PATCH 034/163] dont dupe names --- apps/handle-batch-event/handle-batch-event-cf.yml.j2 | 2 +- apps/main-cf.yml.j2 | 2 +- apps/scale-cluster/scale-cluster-cf.yml.j2 | 2 +- apps/workflow-cf.yml.j2 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 index 2c864c27f..6e0eee99f 100644 --- a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 +++ b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 @@ -7,7 +7,7 @@ AWSTemplateFormatVersion: 2010-09-09 {% if 'name' in env and env['name'] != 'Default' %} {%- do compute_env_names.append(env['name']) -%} {% endif %} - {% if 'from_file' in env %} + {% if 'from_file' in env and env['from_file'] not in compute_env_names %} {%- do compute_env_names.append(env['from_file']) -%} {% endif %} {% endfor %} diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index 888983462..07c49edeb 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -7,7 +7,7 @@ AWSTemplateFormatVersion: 2010-09-09 {% if 'name' in env and env['name'] != 'Default' %} {%- do compute_env_names.append(env['name']) -%} {% endif %} - {% if 'from_file' in env %} + {% if 'from_file' in env and env['from_file'] not in compute_env_names %} {%- do compute_env_names.append(env['from_file']) -%} {% endif %} {% endfor %} diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index 9a2b97dcb..3d4ae3825 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -7,7 +7,7 @@ AWSTemplateFormatVersion: 2010-09-09 {% if 'name' in env and env['name'] != 'Default' %} {%- do compute_env_names.append(env['name']) -%} {% endif %} - {% if 'from_file' in env %} + {% if 'from_file' in env and env['from_file'] not in compute_env_names %} {%- do compute_env_names.append(env['from_file']) -%} {% endif %} {% endfor %} diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index 1ffcdf9dd..018524c09 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -7,7 +7,7 @@ AWSTemplateFormatVersion: 2010-09-09 {% if 'name' in env and env['name'] != 'Default' %} {%- do compute_env_names.append(env['name']) -%} {% endif %} - {% if 'from_file' in env %} + {% if 'from_file' in env and env['from_file'] not in compute_env_names %} {%- do compute_env_names.append(env['from_file']) -%} {% endif %} {% endfor %} From 721394e708dbddbd6a52cfd1972e7e66a80bd982 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 7 Oct 2024 16:34:23 -0400 Subject: [PATCH 035/163] dont dupe envs --- apps/compute-cf.yml.j2 | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index c4df37a39..624f353dc 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -2,17 +2,20 @@ AWSTemplateFormatVersion: 2010-09-09 {%- set compute_envs = [] -%} +{%- set compute_env_names = [] -%} {%- for job_type, job_spec in job_types.items() -%} {%- for task in job_spec['tasks'] -%} {% set env = task['compute_environment'] %} - {% if 'name' in env and env['name'] != 'Default' %} + {% if 'name' in env and env['name'] != 'Default' and env['name'] not in compute_env_names %} {%- do compute_envs.append(env) -%} + {%- do compute_env_names.append(env['name']) -%} {% endif %} - {% if 'from_file' in env %} + {% if 'from_file' in env and env['from_file'] not in compute_env_names %} {% set name = env['from_file'] %} {% set env = compute_envs_from_files[name] %} {% do env.update({'name': name}) %} {% do compute_envs.append(env) %} + {% do compute_env_names.append(name) %} {% endif %} {% endfor %} {% endfor %} From 5d80472bf00e36ba00a8292c14b006e72e750624 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 8 Oct 2024 13:26:05 -0400 Subject: [PATCH 036/163] changed 'from_file' to 'import' --- apps/compute-cf.yml.j2 | 6 +++--- apps/handle-batch-event/handle-batch-event-cf.yml.j2 | 4 ++-- apps/main-cf.yml.j2 | 4 ++-- apps/render_cf.py | 6 +++--- apps/scale-cluster/scale-cluster-cf.yml.j2 | 4 ++-- apps/workflow-cf.yml.j2 | 4 ++-- job_spec/SRG_GSLC.yml | 2 +- job_spec/SRG_TIME_SERIES.yml | 2 +- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 624f353dc..58bd29ec1 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -10,9 +10,9 @@ AWSTemplateFormatVersion: 2010-09-09 {%- do compute_envs.append(env) -%} {%- do compute_env_names.append(env['name']) -%} {% endif %} - {% if 'from_file' in env and env['from_file'] not in compute_env_names %} - {% set name = env['from_file'] %} - {% set env = compute_envs_from_files[name] %} + {% if 'import' in env and env['import'] not in compute_env_names %} + {% set name = env['import'] %} + {% set env = compute_env_imports[name] %} {% do env.update({'name': name}) %} {% do compute_envs.append(env) %} {% do compute_env_names.append(name) %} diff --git a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 index 6e0eee99f..f9a81400a 100644 --- a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 +++ b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 @@ -7,8 +7,8 @@ AWSTemplateFormatVersion: 2010-09-09 {% if 'name' in env and env['name'] != 'Default' %} {%- do compute_env_names.append(env['name']) -%} {% endif %} - {% if 'from_file' in env and env['from_file'] not in compute_env_names %} - {%- do compute_env_names.append(env['from_file']) -%} + {% if 'import' in env and env['import'] not in compute_env_names %} + {%- do compute_env_names.append(env['import']) -%} {% endif %} {% endfor %} {% endfor %} diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index 07c49edeb..dcc47750e 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -7,8 +7,8 @@ AWSTemplateFormatVersion: 2010-09-09 {% if 'name' in env and env['name'] != 'Default' %} {%- do compute_env_names.append(env['name']) -%} {% endif %} - {% if 'from_file' in env and env['from_file'] not in compute_env_names %} - {%- do compute_env_names.append(env['from_file']) -%} + {% if 'import' in env and env['import'] not in compute_env_names %} + {%- do compute_env_names.append(env['import']) -%} {% endif %} {% endfor %} {% endfor %} diff --git a/apps/render_cf.py b/apps/render_cf.py index 9051a2fc8..cd44bba63 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -95,8 +95,8 @@ def get_job_parameters(item: str, items: str, job_spec: dict) -> dict: def get_step_for_batch_submit_job(task: dict) -> dict: compute_environment = None if 'compute_environment' in task: - if 'from_file' in task['compute_environment']: - compute_environment = task['compute_environment']['from_file'] + if 'import' in task['compute_environment']: + compute_environment = task['compute_environment']['import'] else: compute_environment = task['compute_environment']['name'] job_queue = 'JobQueueArn' if compute_environment == 'Default' else compute_environment + 'JobQueueArn' @@ -151,7 +151,7 @@ def render_templates(job_types, compute_envs, security_environment, api_name): output = template.render( job_types=job_types, - compute_envs_from_files=compute_envs, + compute_env_imports=compute_envs, security_environment=security_environment, api_name=api_name, json=json, diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index 3d4ae3825..8573c19eb 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -7,8 +7,8 @@ AWSTemplateFormatVersion: 2010-09-09 {% if 'name' in env and env['name'] != 'Default' %} {%- do compute_env_names.append(env['name']) -%} {% endif %} - {% if 'from_file' in env and env['from_file'] not in compute_env_names %} - {%- do compute_env_names.append(env['from_file']) -%} + {% if 'import' in env and env['import'] not in compute_env_names %} + {%- do compute_env_names.append(env['import']) -%} {% endif %} {% endfor %} {% endfor %} diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index 018524c09..d98606dc9 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -7,8 +7,8 @@ AWSTemplateFormatVersion: 2010-09-09 {% if 'name' in env and env['name'] != 'Default' %} {%- do compute_env_names.append(env['name']) -%} {% endif %} - {% if 'from_file' in env and env['from_file'] not in compute_env_names %} - {%- do compute_env_names.append(env['from_file']) -%} + {% if 'import' in env and env['import'] not in compute_env_names %} + {%- do compute_env_names.append(env['import']) -%} {% endif %} {% endfor %} {% endfor %} diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index cc78f9265..d326cd0d4 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -57,7 +57,7 @@ SRG_GSLC: - Ref::granules timeout: 10800 compute_environment: - from_file: SrgGslc + import: 'SrgGslc' vcpu: 1 gpu: 1 memory: 30500 diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 1e0c1c328..519fe7329 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -71,7 +71,7 @@ SRG_TIME_SERIES: #timeout: 10800 timeout: 86400 compute_environment: - from_file: 'SrgGslc' + import: 'SrgGslc' vcpu: 1 # TODO revert gpu #gpu: 1 From bb2aad1e50f7a4eb6ece525f911ecc2fc0ed3c15 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 8 Oct 2024 13:37:14 -0400 Subject: [PATCH 037/163] rename compute_environments folder --- Makefile | 2 +- .../compute_environments.yml} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename job_spec/{compute_environments/COMPUTE_ENVIRONMENTS.yml => config/compute_environments.yml} (100%) diff --git a/Makefile b/Makefile index 2b1526462..54eea5d6d 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ install: python -m pip install -r requirements-all.txt files ?= job_spec/*.yml -compute_env_files ?= job_spec/compute_environments/*.yml +compute_env_files ?= job_spec/config/compute_environments.yml security_environment ?= ASF api_name ?= local cost_profile ?= DEFAULT diff --git a/job_spec/compute_environments/COMPUTE_ENVIRONMENTS.yml b/job_spec/config/compute_environments.yml similarity index 100% rename from job_spec/compute_environments/COMPUTE_ENVIRONMENTS.yml rename to job_spec/config/compute_environments.yml From 55504ff065682f55f5a40ff7d843df9d9ada060d Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 8 Oct 2024 13:53:16 -0400 Subject: [PATCH 038/163] insariscearia compute env --- job_spec/INSAR_ISCE.yml | 9 +++------ job_spec/config/compute_environments.yml | 3 +++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index edca7a1c8..0b501b87b 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -92,10 +92,6 @@ INSAR_ISCE: DEFAULT: cost: 1.0 validators: [] - compute_environment: - name: 'InsarIsceAria' - allocation_type: EC2 - allocation_strategy: BEST_FIT_PROGRESSIVE tasks: - name: '' image: ghcr.io/access-cloud-based-insar/dockerizedtopsapp @@ -128,7 +124,7 @@ INSAR_ISCE: - Ref::unfiltered_coherence timeout: 21600 compute_environment: - name: 'Default' + import: 'InsarIsceAria' vcpu: 1 memory: 15500 secrets: @@ -149,7 +145,8 @@ INSAR_ISCE: - Ref::weather_model timeout: 10800 compute_environment: - name: 'Default' + # TODO: Do we want this to use this env or Default? + import: 'InsarIsceAria' vcpu: 1 memory: 7500 secrets: diff --git a/job_spec/config/compute_environments.yml b/job_spec/config/compute_environments.yml index 24249b63d..5305aa630 100644 --- a/job_spec/config/compute_environments.yml +++ b/job_spec/config/compute_environments.yml @@ -4,3 +4,6 @@ compute_environments: instance_types: g6.2xlarge # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id ami_id: ami-0729c079aae647cb3 + InsarIsceAria: + allocation_type: EC2 + allocation_strategy: BEST_FIT_PROGRESSIVE From 80a0f4936f8ade109cf58f4765fd7fc50608cdba Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 8 Oct 2024 11:41:54 -0800 Subject: [PATCH 039/163] update check_processing_time to handle list of results --- .../src/check_processing_time.py | 21 +++++++----- tests/test_check_processing_time.py | 34 ++++++++++++++++++- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/apps/check-processing-time/src/check_processing_time.py b/apps/check-processing-time/src/check_processing_time.py index 7cc979b7a..27187d09b 100644 --- a/apps/check-processing-time/src/check_processing_time.py +++ b/apps/check-processing-time/src/check_processing_time.py @@ -1,4 +1,5 @@ import json +from typing import Union def get_time_from_attempts(attempts: list[dict]) -> float: @@ -9,15 +10,17 @@ def get_time_from_attempts(attempts: list[dict]) -> float: return (final_attempt['StoppedAt'] - final_attempt['StartedAt']) / 1000 -def get_time_from_result(result: dict) -> float: +def get_time_from_result(result: Union[list, dict]) -> Union[list, float]: + if isinstance(result, list): + return [get_time_from_result(item) for item in result] + if 'Attempts' in result: - attempts = result['Attempts'] - else: - attempts = json.loads(result['Cause'])['Attempts'] - return get_time_from_attempts(attempts) + return get_time_from_attempts(result['Attempts']) + + return get_time_from_attempts(json.loads(result['Cause'])['Attempts']) -def lambda_handler(event, context) -> list[float]: - results_dict = event['processing_results'] - results = [results_dict[key] for key in sorted(results_dict.keys())] - return list(map(get_time_from_result, results)) +def lambda_handler(event, _) -> list[Union[list, float]]: + processing_results = event['processing_results'] + result_list = [processing_results[key] for key in sorted(processing_results.keys())] + return get_time_from_result(result_list) diff --git a/tests/test_check_processing_time.py b/tests/test_check_processing_time.py index 3c2f7b662..7f1597732 100644 --- a/tests/test_check_processing_time.py +++ b/tests/test_check_processing_time.py @@ -49,6 +49,24 @@ def test_get_time_from_result(): assert check_processing_time.get_time_from_result(result) == 5.7 +def test_get_time_from_result_list(): + result = [ + { + 'Attempts': [ + {'Container': {}, 'StartedAt': 500, 'StatusReason': '', 'StoppedAt': 1000}, + {'Container': {}, 'StartedAt': 3000, 'StatusReason': '', 'StoppedAt': 8900} + ] + }, + { + 'Attempts': [ + {'Container': {}, 'StartedAt': 500, 'StatusReason': '', 'StoppedAt': 3000}, + {'Container': {}, 'StartedAt': 4000, 'StatusReason': '', 'StoppedAt': 4200} + ] + }, + ] + assert check_processing_time.get_time_from_result(result) == [5.9, 0.2] + + def test_get_time_from_result_failed(): result = { 'Error': 'States.TaskFailed', @@ -76,6 +94,20 @@ def test_lambda_handler(): '{"Container": {}, "StartedAt": 1500, "StatusReason": "", "StoppedAt": 2000}, ' '{"Container": {}, "StartedAt": 3000, "StatusReason": "", "StoppedAt": 9400}]}' }, + 'step_2': [ + { + 'Attempts': [ + {'Container': {}, 'StartedAt': 500, 'StatusReason': '', 'StoppedAt': 1000}, + {'Container': {}, 'StartedAt': 3000, 'StatusReason': '', 'StoppedAt': 8900} + ] + }, + { + 'Attempts': [ + {'Container': {}, 'StartedAt': 500, 'StatusReason': '', 'StoppedAt': 3000}, + {'Container': {}, 'StartedAt': 4000, 'StatusReason': '', 'StoppedAt': 4200} + ] + }, + ] } } - assert check_processing_time.lambda_handler(event, None) == [5.7, 6.4] + assert check_processing_time.lambda_handler(event, None) == [5.7, 6.4, [5.9, 0.2]] From 9ce5fdcda5132bec12be70e1076c5fe901226ce4 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 8 Oct 2024 17:04:41 -0400 Subject: [PATCH 040/163] moved compute env logic to render_cf.py --- apps/compute-cf.yml.j2 | 21 ------------- .../handle-batch-event-cf.yml.j2 | 13 -------- apps/main-cf.yml.j2 | 13 -------- apps/render_cf.py | 30 +++++++++++++++++-- apps/scale-cluster/scale-cluster-cf.yml.j2 | 13 -------- apps/workflow-cf.yml.j2 | 13 -------- job_spec/ARIA_RAIDER.yml | 2 +- job_spec/AUTORIFT.yml | 2 +- job_spec/AUTORIFT_ITS_LIVE.yml | 2 +- job_spec/INSAR_GAMMA.yml | 2 +- job_spec/INSAR_ISCE_BURST.yml | 2 +- job_spec/INSAR_ISCE_MULTI_BURST.yml | 2 +- job_spec/RTC_GAMMA.yml | 2 +- job_spec/S1_CORRECTION_ITS_LIVE.yml | 2 +- job_spec/SRG_TIME_SERIES.yml | 2 +- job_spec/WATER_MAP.yml | 6 ++-- job_spec/WATER_MAP_EQ.yml | 4 +-- 17 files changed, 42 insertions(+), 89 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 58bd29ec1..ff689299a 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -1,26 +1,5 @@ AWSTemplateFormatVersion: 2010-09-09 - -{%- set compute_envs = [] -%} -{%- set compute_env_names = [] -%} -{%- for job_type, job_spec in job_types.items() -%} - {%- for task in job_spec['tasks'] -%} - {% set env = task['compute_environment'] %} - {% if 'name' in env and env['name'] != 'Default' and env['name'] not in compute_env_names %} - {%- do compute_envs.append(env) -%} - {%- do compute_env_names.append(env['name']) -%} - {% endif %} - {% if 'import' in env and env['import'] not in compute_env_names %} - {% set name = env['import'] %} - {% set env = compute_env_imports[name] %} - {% do env.update({'name': name}) %} - {% do compute_envs.append(env) %} - {% do compute_env_names.append(name) %} - {% endif %} - {% endfor %} -{% endfor %} - - Parameters: VpcId: diff --git a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 index f9a81400a..ecbfea0ee 100644 --- a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 +++ b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 @@ -1,18 +1,5 @@ AWSTemplateFormatVersion: 2010-09-09 -{%- set compute_env_names = [] -%} -{%- for job_type, job_spec in job_types.items() -%} - {%- for task in job_spec['tasks'] -%} - {% set env = task['compute_environment'] %} - {% if 'name' in env and env['name'] != 'Default' %} - {%- do compute_env_names.append(env['name']) -%} - {% endif %} - {% if 'import' in env and env['import'] not in compute_env_names %} - {%- do compute_env_names.append(env['import']) -%} - {% endif %} - {% endfor %} -{% endfor %} - Parameters: JobQueueArn: diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index dcc47750e..ee7061246 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -1,18 +1,5 @@ AWSTemplateFormatVersion: 2010-09-09 -{%- set compute_env_names = [] -%} -{%- for job_type, job_spec in job_types.items() -%} - {%- for task in job_spec['tasks'] -%} - {% set env = task['compute_environment'] %} - {% if 'name' in env and env['name'] != 'Default' %} - {%- do compute_env_names.append(env['name']) -%} - {% endif %} - {% if 'import' in env and env['import'] not in compute_env_names %} - {%- do compute_env_names.append(env['import']) -%} - {% endif %} - {% endfor %} -{% endfor %} - Parameters: VpcId: diff --git a/apps/render_cf.py b/apps/render_cf.py index cd44bba63..40e86d715 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -151,7 +151,8 @@ def render_templates(job_types, compute_envs, security_environment, api_name): output = template.render( job_types=job_types, - compute_env_imports=compute_envs, + compute_envs=compute_envs, + compute_env_names=[env['name'] for env in compute_envs], security_environment=security_environment, api_name=api_name, json=json, @@ -165,6 +166,29 @@ def render_templates(job_types, compute_envs, security_environment, api_name): template_file.with_suffix('').write_text(output) +def get_compute_environments(job_types, compute_env_files): + compute_envs = [] + compute_env_names = [] + for _, job_spec in job_types.items(): + for task in job_spec['tasks']: + compute_env = task['compute_environment'] + if 'name' in compute_env: + name = compute_env['name'] + if name in compute_env_names: + raise ValueError(f'Compute environments must have unique names but the following is defined more than once: {name}.') + compute_envs.append(compute_env) + compute_env_names.append(name) + + for file in compute_env_files: + compute_envs_from_file = yaml.safe_load(file.read_text())['compute_environments'] + for compute_env_name in compute_envs_from_file: + compute_env = compute_envs_from_file[compute_env_name] + compute_env['name'] = compute_env_name + compute_envs.append(compute_env) + + return compute_envs + + def render_default_params_by_job_type(job_types: dict) -> None: default_params_by_job_type = { job_type: { @@ -210,9 +234,11 @@ def main(): for file in args.compute_environment_files: compute_envs_from_files.update(yaml.safe_load(file.read_text())['compute_environments']) + compute_envs = get_compute_environments(job_types, args.compute_environment_files) + render_default_params_by_job_type(job_types) render_costs(job_types, args.cost_profile) - render_templates(job_types, compute_envs_from_files, args.security_environment, args.api_name) + render_templates(job_types, compute_envs, args.security_environment, args.api_name) if __name__ == '__main__': diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index 8573c19eb..01e12c5bf 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -1,18 +1,5 @@ AWSTemplateFormatVersion: 2010-09-09 -{%- set compute_env_names = [] -%} -{%- for job_type, job_spec in job_types.items() -%} - {%- for task in job_spec['tasks'] -%} - {% set env = task['compute_environment'] %} - {% if 'name' in env and env['name'] != 'Default' %} - {%- do compute_env_names.append(env['name']) -%} - {% endif %} - {% if 'import' in env and env['import'] not in compute_env_names %} - {%- do compute_env_names.append(env['import']) -%} - {% endif %} - {% endfor %} -{% endfor %} - Parameters: ComputeEnvironmentArn: diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index d98606dc9..48e4a262d 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -1,18 +1,5 @@ AWSTemplateFormatVersion: 2010-09-09 -{%- set compute_env_names = [] -%} -{%- for job_type, job_spec in job_types.items() -%} - {%- for task in job_spec['tasks'] -%} - {% set env = task['compute_environment'] %} - {% if 'name' in env and env['name'] != 'Default' %} - {%- do compute_env_names.append(env['name']) -%} - {% endif %} - {% if 'import' in env and env['import'] not in compute_env_names %} - {%- do compute_env_names.append(env['import']) -%} - {% endif %} - {% endfor %} -{% endfor %} - Parameters: JobQueueArn: diff --git a/job_spec/ARIA_RAIDER.yml b/job_spec/ARIA_RAIDER.yml index 0dc69f4bf..2cbe8ac23 100644 --- a/job_spec/ARIA_RAIDER.yml +++ b/job_spec/ARIA_RAIDER.yml @@ -41,7 +41,7 @@ ARIA_RAIDER: - Ref::job_id timeout: 10800 compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 7500 secrets: diff --git a/job_spec/AUTORIFT.yml b/job_spec/AUTORIFT.yml index f4a4a244c..e0496c07c 100644 --- a/job_spec/AUTORIFT.yml +++ b/job_spec/AUTORIFT.yml @@ -56,7 +56,7 @@ AUTORIFT: - Ref::granules timeout: 10800 compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/AUTORIFT_ITS_LIVE.yml b/job_spec/AUTORIFT_ITS_LIVE.yml index ac4fd0fb1..3776fea73 100644 --- a/job_spec/AUTORIFT_ITS_LIVE.yml +++ b/job_spec/AUTORIFT_ITS_LIVE.yml @@ -70,7 +70,7 @@ AUTORIFT: - Ref::granules timeout: 10800 compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/INSAR_GAMMA.yml b/job_spec/INSAR_GAMMA.yml index bf5af6c84..70cae14b1 100644 --- a/job_spec/INSAR_GAMMA.yml +++ b/job_spec/INSAR_GAMMA.yml @@ -115,7 +115,7 @@ INSAR_GAMMA: - Ref::granules timeout: 10800 compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/INSAR_ISCE_BURST.yml b/job_spec/INSAR_ISCE_BURST.yml index 53c871799..3bade566c 100644 --- a/job_spec/INSAR_ISCE_BURST.yml +++ b/job_spec/INSAR_ISCE_BURST.yml @@ -63,7 +63,7 @@ INSAR_ISCE_BURST: - Ref::granules timeout: 5400 compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 7600 secrets: diff --git a/job_spec/INSAR_ISCE_MULTI_BURST.yml b/job_spec/INSAR_ISCE_MULTI_BURST.yml index 5f0a3aa15..30bc3c3fb 100644 --- a/job_spec/INSAR_ISCE_MULTI_BURST.yml +++ b/job_spec/INSAR_ISCE_MULTI_BURST.yml @@ -80,7 +80,7 @@ INSAR_ISCE_MULTI_BURST: - Ref::secondary timeout: 126000 # 35 hours compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 4 # Memory is always overridden by the step function secrets: diff --git a/job_spec/RTC_GAMMA.yml b/job_spec/RTC_GAMMA.yml index 3949d8518..75913e5ed 100644 --- a/job_spec/RTC_GAMMA.yml +++ b/job_spec/RTC_GAMMA.yml @@ -138,7 +138,7 @@ RTC_GAMMA: - Ref::granules timeout: 36000 compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/S1_CORRECTION_ITS_LIVE.yml b/job_spec/S1_CORRECTION_ITS_LIVE.yml index c3732596f..e2a222672 100644 --- a/job_spec/S1_CORRECTION_ITS_LIVE.yml +++ b/job_spec/S1_CORRECTION_ITS_LIVE.yml @@ -48,7 +48,7 @@ S1_CORRECTION_TEST: - Ref::granules timeout: 10800 compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 15750 secrets: diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 519fe7329..622bdd40d 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -95,7 +95,7 @@ SRG_TIME_SERIES: - --use-granules-from-s3 timeout: 86400 # TODO compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 30500 # TODO # TODO secrets? diff --git a/job_spec/WATER_MAP.yml b/job_spec/WATER_MAP.yml index 4fd0c0770..26e07f10d 100644 --- a/job_spec/WATER_MAP.yml +++ b/job_spec/WATER_MAP.yml @@ -151,7 +151,7 @@ WATER_MAP: - Ref::granules timeout: 36000 compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 31500 secrets: @@ -178,7 +178,7 @@ WATER_MAP: - Ref::membership_threshold timeout: 36000 compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 126000 - name: FLOOD_MAP @@ -206,6 +206,6 @@ WATER_MAP: - Ref::minimization_metric timeout: 86400 compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 126000 diff --git a/job_spec/WATER_MAP_EQ.yml b/job_spec/WATER_MAP_EQ.yml index 8daf295b4..f46c97afa 100644 --- a/job_spec/WATER_MAP_EQ.yml +++ b/job_spec/WATER_MAP_EQ.yml @@ -93,7 +93,7 @@ WATER_MAP_EQ: - Ref::granules timeout: 36000 compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 31500 secrets: @@ -114,6 +114,6 @@ WATER_MAP_EQ: - Ref::hand_fraction timeout: 36000 compute_environment: - name: 'Default' + import: 'Default' vcpu: 1 memory: 126000 From d3d57026bf169cf2334b807206a00af10f4fdd2d Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 9 Oct 2024 12:58:24 -0400 Subject: [PATCH 041/163] refactor --- apps/render_cf.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 40e86d715..2184fd537 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -166,7 +166,7 @@ def render_templates(job_types, compute_envs, security_environment, api_name): template_file.with_suffix('').write_text(output) -def get_compute_environments(job_types, compute_env_files): +def get_compute_environments(job_types: dict, compute_env_file: Path) -> list[dict]: compute_envs = [] compute_env_names = [] for _, job_spec in job_types.items(): @@ -179,12 +179,11 @@ def get_compute_environments(job_types, compute_env_files): compute_envs.append(compute_env) compute_env_names.append(name) - for file in compute_env_files: - compute_envs_from_file = yaml.safe_load(file.read_text())['compute_environments'] - for compute_env_name in compute_envs_from_file: - compute_env = compute_envs_from_file[compute_env_name] - compute_env['name'] = compute_env_name - compute_envs.append(compute_env) + compute_envs_from_file = yaml.safe_load(compute_env_file.read_text())['compute_environments'] + for compute_env_name in compute_envs_from_file: + compute_env = compute_envs_from_file[compute_env_name] + compute_env['name'] = compute_env_name + compute_envs.append(compute_env) return compute_envs @@ -216,7 +215,7 @@ def render_costs(job_types: dict, cost_profile: str) -> None: def main(): parser = argparse.ArgumentParser() parser.add_argument('-j', '--job-spec-files', required=True, nargs='+', type=Path) - parser.add_argument('-e', '--compute-environment-files', nargs='+', type=Path) + parser.add_argument('-e', '--compute-environment-file', type=Path) parser.add_argument('-s', '--security-environment', default='ASF', choices=['ASF', 'EDC', 'JPL', 'JPL-public']) parser.add_argument('-n', '--api-name', required=True) parser.add_argument('-c', '--cost-profile', default='DEFAULT', choices=['DEFAULT', 'EDC']) @@ -230,11 +229,7 @@ def main(): for task in job_spec['tasks']: task['name'] = job_type + '_' + task['name'] if task['name'] else job_type - compute_envs_from_files = {} - for file in args.compute_environment_files: - compute_envs_from_files.update(yaml.safe_load(file.read_text())['compute_environments']) - - compute_envs = get_compute_environments(job_types, args.compute_environment_files) + compute_envs = get_compute_environments(job_types, args.compute_environment_file) render_default_params_by_job_type(job_types) render_costs(job_types, args.cost_profile) From 95c63ebac3c3bd82297a11215eabba3fc3329dc0 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 9 Oct 2024 13:07:41 -0400 Subject: [PATCH 042/163] add exception to imports --- apps/render_cf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/render_cf.py b/apps/render_cf.py index 2184fd537..05143b529 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -181,6 +181,8 @@ def get_compute_environments(job_types: dict, compute_env_file: Path) -> list[di compute_envs_from_file = yaml.safe_load(compute_env_file.read_text())['compute_environments'] for compute_env_name in compute_envs_from_file: + if compute_env_name in compute_env_names: + raise ValueError(f'Compute environments must have unique names but the following is defined more than once: {name}.') compute_env = compute_envs_from_file[compute_env_name] compute_env['name'] = compute_env_name compute_envs.append(compute_env) From 3b39f145656031532058ac4cda2e27486fc1f90f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 9 Oct 2024 13:09:26 -0400 Subject: [PATCH 043/163] Update apps/render_cf.py Co-authored-by: Jake Herrmann --- apps/render_cf.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 05143b529..cb028d909 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -93,12 +93,10 @@ def get_job_parameters(item: str, items: str, job_spec: dict) -> dict: def get_step_for_batch_submit_job(task: dict) -> dict: - compute_environment = None - if 'compute_environment' in task: - if 'import' in task['compute_environment']: - compute_environment = task['compute_environment']['import'] - else: - compute_environment = task['compute_environment']['name'] + if 'import' in task['compute_environment']: + compute_environment = task['compute_environment']['import'] + else: + compute_environment = task['compute_environment']['name'] job_queue = 'JobQueueArn' if compute_environment == 'Default' else compute_environment + 'JobQueueArn' return { 'Type': 'Task', From bc96682538afcc0f580945256661d20240b70dfa Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 9 Oct 2024 13:14:56 -0400 Subject: [PATCH 044/163] compute_env_name to name --- apps/render_cf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 05143b529..f6bc48a07 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -180,11 +180,11 @@ def get_compute_environments(job_types: dict, compute_env_file: Path) -> list[di compute_env_names.append(name) compute_envs_from_file = yaml.safe_load(compute_env_file.read_text())['compute_environments'] - for compute_env_name in compute_envs_from_file: - if compute_env_name in compute_env_names: + for name in compute_envs_from_file: + if name in compute_env_names: raise ValueError(f'Compute environments must have unique names but the following is defined more than once: {name}.') - compute_env = compute_envs_from_file[compute_env_name] - compute_env['name'] = compute_env_name + compute_env = compute_envs_from_file[name] + compute_env['name'] = name compute_envs.append(compute_env) return compute_envs From 456561b06ee3efd43abaac19e97231c2658e8750 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 9 Oct 2024 17:37:54 -0400 Subject: [PATCH 045/163] only parse compute env file if one was given --- apps/render_cf.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index c33a76b27..e3bf7cd7f 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -177,13 +177,14 @@ def get_compute_environments(job_types: dict, compute_env_file: Path) -> list[di compute_envs.append(compute_env) compute_env_names.append(name) - compute_envs_from_file = yaml.safe_load(compute_env_file.read_text())['compute_environments'] - for name in compute_envs_from_file: - if name in compute_env_names: - raise ValueError(f'Compute environments must have unique names but the following is defined more than once: {name}.') - compute_env = compute_envs_from_file[name] - compute_env['name'] = name - compute_envs.append(compute_env) + if compute_env_file: + compute_envs_from_file = yaml.safe_load(compute_env_file.read_text())['compute_environments'] + for name in compute_envs_from_file: + if name in compute_env_names: + raise ValueError(f'Compute environments must have unique names but the following is defined more than once: {name}.') + compute_env = compute_envs_from_file[name] + compute_env['name'] = name + compute_envs.append(compute_env) return compute_envs From fd748b178993bcc8905b668dcd36423b4a3fd6ca Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 9 Oct 2024 17:58:30 -0400 Subject: [PATCH 046/163] ensure that imported compute envs are defined --- apps/render_cf.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index e3bf7cd7f..18a9db8d1 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -167,24 +167,38 @@ def render_templates(job_types, compute_envs, security_environment, api_name): def get_compute_environments(job_types: dict, compute_env_file: Path) -> list[dict]: compute_envs = [] compute_env_names = [] + compute_env_imports = set() for _, job_spec in job_types.items(): for task in job_spec['tasks']: compute_env = task['compute_environment'] if 'name' in compute_env: name = compute_env['name'] if name in compute_env_names: - raise ValueError(f'Compute environments must have unique names but the following is defined more than once: {name}.') + raise NameError( + f'Compute envs must have unique names but the following is defined more than once: {name}.' + ) compute_envs.append(compute_env) compute_env_names.append(name) + elif 'import' in compute_env and compute_env['import'] != 'Default': + compute_env_imports.add(compute_env['import']) if compute_env_file: compute_envs_from_file = yaml.safe_load(compute_env_file.read_text())['compute_environments'] for name in compute_envs_from_file: if name in compute_env_names: - raise ValueError(f'Compute environments must have unique names but the following is defined more than once: {name}.') + raise NameError( + f'Compute envs must have unique names but the following is defined more than once: {name}.' + ) compute_env = compute_envs_from_file[name] compute_env['name'] = name compute_envs.append(compute_env) + compute_env_names.append(name) + + for name in compute_env_imports: + if name not in compute_envs_from_file: + raise NotImplementedError( + f'The following compute env is imported but not defined in the compute envs file: {name}.' + ) return compute_envs From 6bbb1def3e62b65188b79a044bb8a9d314958a10 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 10 Oct 2024 11:59:16 -0400 Subject: [PATCH 047/163] Update apps/render_cf.py Co-authored-by: Jake Herrmann --- apps/render_cf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 18a9db8d1..3d76cc0de 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -174,7 +174,7 @@ def get_compute_environments(job_types: dict, compute_env_file: Path) -> list[di if 'name' in compute_env: name = compute_env['name'] if name in compute_env_names: - raise NameError( + raise ValueError( f'Compute envs must have unique names but the following is defined more than once: {name}.' ) compute_envs.append(compute_env) From 9ae8fb1a31ae3e5020ab0fd379e8fe1faed9fe20 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 10 Oct 2024 11:59:25 -0400 Subject: [PATCH 048/163] Update apps/render_cf.py Co-authored-by: Jake Herrmann --- apps/render_cf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 3d76cc0de..f44dad04d 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -186,7 +186,7 @@ def get_compute_environments(job_types: dict, compute_env_file: Path) -> list[di compute_envs_from_file = yaml.safe_load(compute_env_file.read_text())['compute_environments'] for name in compute_envs_from_file: if name in compute_env_names: - raise NameError( + raise ValueError( f'Compute envs must have unique names but the following is defined more than once: {name}.' ) compute_env = compute_envs_from_file[name] From 76e0fc4b2cb0d5a9594ab279ab3a7be13bc5ef99 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 10 Oct 2024 11:59:48 -0400 Subject: [PATCH 049/163] Update apps/render_cf.py Co-authored-by: Jake Herrmann --- apps/render_cf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index f44dad04d..ec7c1b628 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -196,7 +196,7 @@ def get_compute_environments(job_types: dict, compute_env_file: Path) -> list[di for name in compute_env_imports: if name not in compute_envs_from_file: - raise NotImplementedError( + raise ValueError( f'The following compute env is imported but not defined in the compute envs file: {name}.' ) From 146769c0429bcf9a6003e0d6d7914efafa2f939d Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 10 Oct 2024 12:03:05 -0400 Subject: [PATCH 050/163] Update Makefile Co-authored-by: Jake Herrmann --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 54eea5d6d..bfd4de1d8 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ install: python -m pip install -r requirements-all.txt files ?= job_spec/*.yml -compute_env_files ?= job_spec/config/compute_environments.yml +compute_env_file ?= job_spec/config/compute_environments.yml security_environment ?= ASF api_name ?= local cost_profile ?= DEFAULT From 071ce78ba76ee3201af93dd0aca52d69629b96ee Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 10 Oct 2024 12:03:16 -0400 Subject: [PATCH 051/163] Update Makefile Co-authored-by: Jake Herrmann --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bfd4de1d8..0effa7bc1 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,7 @@ security_environment ?= ASF api_name ?= local cost_profile ?= DEFAULT render: - @echo rendering $(files) for API $(api_name) and security environment $(security_environment); python apps/render_cf.py -j $(files) -e $(compute_env_files) -s $(security_environment) -n $(api_name) -c $(cost_profile) + @echo rendering $(files) for API $(api_name) and security environment $(security_environment); python apps/render_cf.py -j $(files) -e $(compute_env_file) -s $(security_environment) -n $(api_name) -c $(cost_profile) static: flake8 openapi-validate cfn-lint From 692a8ef407d8277d9bf59c350ae5b8eeef9b690b Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 10 Oct 2024 12:03:39 -0400 Subject: [PATCH 052/163] Update apps/compute-cf.yml.j2 Co-authored-by: Jake Herrmann --- apps/compute-cf.yml.j2 | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index ff689299a..d863a0198 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -29,8 +29,7 @@ Outputs: JobQueueArn: Value: !Ref BatchJobQueue - {% for env in compute_envs %} - {% set name = env['name'] %} + {% for name in compute_env_names %} {{ name }}ComputeEnvironmentArn: Value: !Ref {{ name }}ComputeEnvironment From 802e8b43c481af4b81c04053c0eed9f6dcc1a714 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 10 Oct 2024 12:25:05 -0400 Subject: [PATCH 053/163] Update apps/render_cf.py Co-authored-by: Jake Herrmann --- apps/render_cf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/render_cf.py b/apps/render_cf.py index ec7c1b628..53917e330 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -181,6 +181,8 @@ def get_compute_environments(job_types: dict, compute_env_file: Path) -> list[di compute_env_names.append(name) elif 'import' in compute_env and compute_env['import'] != 'Default': compute_env_imports.add(compute_env['import']) + else: + assert compute_env['import'] == 'Default' if compute_env_file: compute_envs_from_file = yaml.safe_load(compute_env_file.read_text())['compute_environments'] From 2a2ce44324e786a4a7839caca496b71710677d3d Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 10 Oct 2024 12:25:31 -0400 Subject: [PATCH 054/163] Update apps/render_cf.py Co-authored-by: Jake Herrmann --- apps/render_cf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 53917e330..f73e0dcd5 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -164,7 +164,7 @@ def render_templates(job_types, compute_envs, security_environment, api_name): template_file.with_suffix('').write_text(output) -def get_compute_environments(job_types: dict, compute_env_file: Path) -> list[dict]: +def get_compute_environments(job_types: dict, compute_env_file: Optional[Path]) -> list[dict]: compute_envs = [] compute_env_names = [] compute_env_imports = set() From 78b8dc6cb3c938f716bdaade876c59b2e249b226 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 10 Oct 2024 12:28:36 -0400 Subject: [PATCH 055/163] add Optional and change list to set --- apps/render_cf.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index f73e0dcd5..4db0af06b 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -1,6 +1,7 @@ import argparse import json from pathlib import Path +from typing import Optional import jinja2 import yaml @@ -166,7 +167,7 @@ def render_templates(job_types, compute_envs, security_environment, api_name): def get_compute_environments(job_types: dict, compute_env_file: Optional[Path]) -> list[dict]: compute_envs = [] - compute_env_names = [] + compute_env_names = set() compute_env_imports = set() for _, job_spec in job_types.items(): for task in job_spec['tasks']: @@ -178,7 +179,7 @@ def get_compute_environments(job_types: dict, compute_env_file: Optional[Path]) f'Compute envs must have unique names but the following is defined more than once: {name}.' ) compute_envs.append(compute_env) - compute_env_names.append(name) + compute_env_names.add(name) elif 'import' in compute_env and compute_env['import'] != 'Default': compute_env_imports.add(compute_env['import']) else: @@ -194,7 +195,7 @@ def get_compute_environments(job_types: dict, compute_env_file: Optional[Path]) compute_env = compute_envs_from_file[name] compute_env['name'] = name compute_envs.append(compute_env) - compute_env_names.append(name) + compute_env_names.add(name) for name in compute_env_imports: if name not in compute_envs_from_file: From ae71a169426319404e5ad252cad3c414f46e3b5b Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 10 Oct 2024 15:19:48 -0400 Subject: [PATCH 056/163] Update apps/render_cf.py Co-authored-by: Jake Herrmann --- apps/render_cf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/render_cf.py b/apps/render_cf.py index 4db0af06b..b72087da8 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -174,6 +174,7 @@ def get_compute_environments(job_types: dict, compute_env_file: Optional[Path]) compute_env = task['compute_environment'] if 'name' in compute_env: name = compute_env['name'] + assert name != 'Default' if name in compute_env_names: raise ValueError( f'Compute envs must have unique names but the following is defined more than once: {name}.' From 7956857eadfcd07877bedb2de71ce566c468cf67 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 10 Oct 2024 15:22:55 -0400 Subject: [PATCH 057/163] Update apps/render_cf.py Co-authored-by: Jake Herrmann --- apps/render_cf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/render_cf.py b/apps/render_cf.py index b72087da8..1836881a2 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -189,6 +189,7 @@ def get_compute_environments(job_types: dict, compute_env_file: Optional[Path]) if compute_env_file: compute_envs_from_file = yaml.safe_load(compute_env_file.read_text())['compute_environments'] for name in compute_envs_from_file: + assert name != 'Default' if name in compute_env_names: raise ValueError( f'Compute envs must have unique names but the following is defined more than once: {name}.' From c4f14fbf06f315cde69ae4a028f136290a423c25 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 10 Oct 2024 14:32:46 -0800 Subject: [PATCH 058/163] update srg time series job spec --- job_spec/SRG_TIME_SERIES.yml | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 622bdd40d..a7ef8d5fc 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -36,25 +36,18 @@ SRG_TIME_SERIES: description: min lon, min lat, max lon, max lat in EPSG:4326 type: number example: -116.583 + # TODO validators validators: [] cost_profiles: DEFAULT: cost: 1.0 -# TODO revert compute_environment -# compute_environment: -# # TODO use different compute environments for back_projection and time_series -# # TODO use same compute env as SRG_GSLC.yml -# name: SrgBackProjection -# instance_types: g6.2xlarge -# # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id -# ami_id: ami-0729c079aae647cb3 tasks: - name: BACK_PROJECTION map: for granule in granules image: ghcr.io/asfhyp3/hyp3-srg # TODO revert image_tag #image_tag: latest.gpu - image_tag: 0.8.1.dev16_g3983395 + image_tag: 0.8.1.dev20_g0eb9d69 command: - ++process - back_projection @@ -66,12 +59,15 @@ SRG_TIME_SERIES: - '!Ref Bucket' - --bucket-prefix - Ref::bucket_prefix + - --use-gslc-prefix - Ref::granule # TODO revert timeout for gpu env #timeout: 10800 timeout: 86400 compute_environment: - import: 'SrgGslc' + # TODO revert import + #import: SrgGslc + import: Default vcpu: 1 # TODO revert gpu #gpu: 1 @@ -81,8 +77,8 @@ SRG_TIME_SERIES: - EARTHDATA_PASSWORD - name: '' image: ghcr.io/asfhyp3/hyp3-srg - # TODO decide on appropriate image_tag - image_tag: 0.8.1.dev16_g3983395 + # TODO image tag + image_tag: 0.8.1.dev20_g0eb9d69 command: - ++process - time_series @@ -92,10 +88,10 @@ SRG_TIME_SERIES: - '!Ref Bucket' - --bucket-prefix - Ref::bucket_prefix - - --use-granules-from-s3 + - --use-gslc-prefix timeout: 86400 # TODO compute_environment: - import: 'Default' + import: Default vcpu: 1 memory: 30500 # TODO # TODO secrets? From 6d832e4110f16d341f341dcf9680c930ece31c49 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 10 Oct 2024 15:54:26 -0800 Subject: [PATCH 059/163] do not overwrite original job params in start execution worker --- apps/render_cf.py | 18 +++++++++--------- .../src/set_batch_overrides.py | 10 +++++----- .../src/start_execution_worker.py | 7 +------ apps/step-function.json.j2 | 2 +- tests/test_set_batch_overrides.py | 18 +++++++++--------- tests/test_start_execution_worker.py | 13 ++++++++++++- 6 files changed, 37 insertions(+), 31 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 1836881a2..5133c1f5d 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -54,18 +54,18 @@ def get_step_for_task(task: dict, index: int, next_step_name: str, job_spec: dic def get_step_for_map_task(task: dict, job_spec: dict) -> dict: item, items = parse_task_map(task['map']) - job_parameters = get_job_parameters(item, items, job_spec) + batch_job_parameters = get_batch_job_parameters(item, items, job_spec) submit_job_step = get_step_for_batch_submit_job(task) submit_job_step['End'] = True submit_job_step_name = task['name'] + '_SUBMIT_JOB' return { 'Type': 'Map', - 'ItemsPath': f'$.original_job_parameters.{items}', + 'ItemsPath': f'$.job_parameters.{items}', 'ItemSelector': { 'job_id.$': '$.job_id', 'priority.$': '$.priority', 'container_overrides.$': '$.container_overrides', - 'job_parameters': job_parameters, + 'batch_job_parameters': batch_job_parameters, }, 'ItemProcessor': { 'StartAt': submit_job_step_name, @@ -83,14 +83,14 @@ def parse_task_map(task_map: str) -> tuple[str, str]: return tokens[1], tokens[3] -def get_job_parameters(item: str, items: str, job_spec: dict) -> dict: - job_parameters = { - f'{param}.$': f'$.job_parameters.{param}' +def get_batch_job_parameters(item: str, items: str, job_spec: dict) -> dict: + batch_job_parameters = { + f'{param}.$': f'$.batch_job_parameters.{param}' for param in job_spec['parameters'] if param != items } - job_parameters[f'{item}.$'] = '$$.Map.Item.Value' - return job_parameters + batch_job_parameters[f'{item}.$'] = '$$.Map.Item.Value' + return batch_job_parameters def get_step_for_batch_submit_job(task: dict) -> dict: @@ -108,7 +108,7 @@ def get_step_for_batch_submit_job(task: dict) -> dict: 'JobQueue': '${' + job_queue + '}', 'ShareIdentifier': 'default', 'SchedulingPriorityOverride.$': '$.priority', - 'Parameters.$': '$.job_parameters', + 'Parameters.$': '$.batch_job_parameters', 'ContainerOverrides.$': '$.container_overrides', 'RetryStrategy': { 'Attempts': 3 diff --git a/apps/set-batch-overrides/src/set_batch_overrides.py b/apps/set-batch-overrides/src/set_batch_overrides.py index 0a9dc6627..9d419ec31 100644 --- a/apps/set-batch-overrides/src/set_batch_overrides.py +++ b/apps/set-batch-overrides/src/set_batch_overrides.py @@ -35,7 +35,7 @@ def get_container_overrides(memory: str, omp_num_threads: str = None) -> dict: def get_insar_isce_burst_memory(job_parameters: dict) -> str: looks = job_parameters['looks'] - bursts = len(job_parameters['reference'].split(' ')) + bursts = len(job_parameters['reference']) if looks == '5x1': if bursts < 2: return INSAR_ISCE_BURST_MEMORY_8G @@ -70,16 +70,16 @@ def lambda_handler(event: dict, _) -> dict: omp_num_threads = INSAR_ISCE_BURST_OMP_NUM_THREADS[memory] return get_container_overrides(memory, omp_num_threads) - if job_type == 'AUTORIFT' and job_parameters['granules'].startswith('S2'): + if job_type == 'AUTORIFT' and job_parameters['granules'][0].startswith('S2'): return get_container_overrides(AUTORIFT_S2_MEMORY) - if job_type == 'AUTORIFT' and job_parameters['granules'].startswith('L'): + if job_type == 'AUTORIFT' and job_parameters['granules'][0].startswith('L'): return get_container_overrides(AUTORIFT_LANDSAT_MEMORY) - if job_type == 'RTC_GAMMA' and job_parameters['resolution'] in ['10', '20']: + if job_type == 'RTC_GAMMA' and job_parameters['resolution'] in [10, 20]: return get_container_overrides(RTC_GAMMA_10M_MEMORY) - if job_type in ['WATER_MAP', 'WATER_MAP_EQ'] and job_parameters['resolution'] in ['10', '20']: + if job_type in ['WATER_MAP', 'WATER_MAP_EQ'] and job_parameters['resolution'] in [10, 20]: return get_container_overrides(WATER_MAP_10M_MEMORY) return {} diff --git a/apps/start-execution-worker/src/start_execution_worker.py b/apps/start-execution-worker/src/start_execution_worker.py index 6031ad43e..7c314ea5d 100644 --- a/apps/start-execution-worker/src/start_execution_worker.py +++ b/apps/start-execution-worker/src/start_execution_worker.py @@ -25,12 +25,7 @@ def submit_jobs(jobs: list[dict]) -> None: for job in jobs: # Convert parameters to strings so they can be passed to Batch; see: # https://docs.aws.amazon.com/batch/latest/APIReference/API_SubmitJob.html#Batch-SubmitJob-request-parameters - - # TODO: instead, assign the stringified params to a batch_job_parameters key, and keep the original job_parameters - # value unchanged; will require searching for usage of job_parameters in step-function.json.j2 and render_cf.py - # and updating how they're used - job['original_job_parameters'] = job['job_parameters'] - job['job_parameters'] = convert_parameters_to_strings(job['job_parameters']) + job['batch_job_parameters'] = convert_parameters_to_strings(job['job_parameters']) STEP_FUNCTION.start_execution( stateMachineArn=step_function_arn, input=json.dumps(job, sort_keys=True), diff --git a/apps/step-function.json.j2 b/apps/step-function.json.j2 index 646d81da0..79c6f60ea 100644 --- a/apps/step-function.json.j2 +++ b/apps/step-function.json.j2 @@ -35,7 +35,7 @@ "ADD_PREFIX_TO_JOB_PARAMETERS": { "Type": "Pass", "InputPath": "$.job_id", - "ResultPath": "$.job_parameters.bucket_prefix", + "ResultPath": "$.batch_job_parameters.bucket_prefix", "Next": "SET_BATCH_OVERRIDES" }, "SET_BATCH_OVERRIDES": { diff --git a/tests/test_set_batch_overrides.py b/tests/test_set_batch_overrides.py index 375413556..3b9bafefe 100644 --- a/tests/test_set_batch_overrides.py +++ b/tests/test_set_batch_overrides.py @@ -19,7 +19,7 @@ def mock_insar_isce_burst_job(looks: str, bursts: int) -> dict: 'job_type': 'INSAR_ISCE_MULTI_BURST', 'job_parameters': { 'looks': looks, - 'reference': ' '.join('foo' for _ in range(bursts)), + 'reference': ['foo' for _ in range(bursts)], } } @@ -154,7 +154,7 @@ def test_set_batch_overrides_autorift_s2(): assert lambda_handler( { 'job_type': 'AUTORIFT', - 'job_parameters': {'granules': 'S2B_'}, + 'job_parameters': {'granules': ['S2B_']}, }, None, ) == { @@ -171,7 +171,7 @@ def test_set_batch_overrides_autorift_landsat(): assert lambda_handler( { 'job_type': 'AUTORIFT', - 'job_parameters': {'granules': 'LC08_'}, + 'job_parameters': {'granules': ['LC08_']}, }, None, ) == { @@ -188,7 +188,7 @@ def test_set_batch_overrides_rtc_gamma_10m(): assert lambda_handler( { 'job_type': 'RTC_GAMMA', - 'job_parameters': {'resolution': '10'}, + 'job_parameters': {'resolution': 10}, }, None, ) == { @@ -202,7 +202,7 @@ def test_set_batch_overrides_rtc_gamma_10m(): assert lambda_handler( { 'job_type': 'RTC_GAMMA', - 'job_parameters': {'resolution': '20'}, + 'job_parameters': {'resolution': 20}, }, None, ) == { @@ -219,7 +219,7 @@ def test_set_batch_overrides_water_map_10m(): assert lambda_handler( { 'job_type': 'WATER_MAP', - 'job_parameters': {'resolution': '10'}, + 'job_parameters': {'resolution': 10}, }, None, ) == { @@ -233,7 +233,7 @@ def test_set_batch_overrides_water_map_10m(): assert lambda_handler( { 'job_type': 'WATER_MAP', - 'job_parameters': {'resolution': '20'}, + 'job_parameters': {'resolution': 20}, }, None, ) == { @@ -247,7 +247,7 @@ def test_set_batch_overrides_water_map_10m(): assert lambda_handler( { 'job_type': 'WATER_MAP_EQ', - 'job_parameters': {'resolution': '10'}, + 'job_parameters': {'resolution': 10}, }, None, ) == { @@ -261,7 +261,7 @@ def test_set_batch_overrides_water_map_10m(): assert lambda_handler( { 'job_type': 'WATER_MAP_EQ', - 'job_parameters': {'resolution': '20'}, + 'job_parameters': {'resolution': 20}, }, None, ) == { diff --git a/tests/test_start_execution_worker.py b/tests/test_start_execution_worker.py index f7ab6a8a4..c391286e9 100644 --- a/tests/test_start_execution_worker.py +++ b/tests/test_start_execution_worker.py @@ -63,6 +63,16 @@ def test_submit_jobs(): 'float_field': 10.1, 'integer_field': 10, 'job_parameters': { + 'granules': [ + 'granule1', + 'granule2', + ], + 'string_field': 'value1', + 'boolean_field': True, + 'float_field': 10.1, + 'integer_field': 10, + }, + 'batch_job_parameters': { 'granules': 'granule1 granule2', 'string_field': 'value1', 'boolean_field': 'True', @@ -76,7 +86,8 @@ def test_submit_jobs(): expected_input_job1 = json.dumps( { 'job_id': 'job1', - 'job_parameters': {'granules': 'granule1'}, + 'job_parameters': {'granules': ['granule1']}, + 'batch_job_parameters': {'granules': 'granule1'}, }, sort_keys=True, ) From 4362d4955eca2143dd9c5b6900581262c86bfc4f Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 11 Oct 2024 10:04:59 -0800 Subject: [PATCH 060/163] rename job tasks to job steps --- .github/actions/deploy-hyp3/action.yml | 2 +- apps/main-cf.yml.j2 | 2 +- apps/render_cf.py | 78 +++++++++++++------------- apps/step-function.json.j2 | 4 +- apps/workflow-cf.yml.j2 | 34 +++++------ job_spec/ARIA_AUTORIFT.yml | 2 +- job_spec/ARIA_RAIDER.yml | 2 +- job_spec/AUTORIFT.yml | 2 +- job_spec/AUTORIFT_ITS_LIVE.yml | 2 +- job_spec/INSAR_GAMMA.yml | 2 +- job_spec/INSAR_ISCE.yml | 2 +- job_spec/INSAR_ISCE_BURST.yml | 2 +- job_spec/INSAR_ISCE_MULTI_BURST.yml | 2 +- job_spec/RTC_GAMMA.yml | 2 +- job_spec/S1_CORRECTION_ITS_LIVE.yml | 2 +- job_spec/SRG_GSLC.yml | 2 +- job_spec/SRG_TIME_SERIES.yml | 2 +- job_spec/WATER_MAP.yml | 2 +- job_spec/WATER_MAP_EQ.yml | 2 +- 19 files changed, 74 insertions(+), 74 deletions(-) diff --git a/.github/actions/deploy-hyp3/action.yml b/.github/actions/deploy-hyp3/action.yml index 52986fd3e..d096c7e8c 100644 --- a/.github/actions/deploy-hyp3/action.yml +++ b/.github/actions/deploy-hyp3/action.yml @@ -30,7 +30,7 @@ inputs: description: "Comma separated list of Subnet IDs" required: true SECRET_ARN: - description: "ARN of Secret in AWS Secrets Manager containing all the credentials needed for job tasks" + description: "ARN of Secret in AWS Secrets Manager containing all the credentials needed for job steps" required: true CLOUDFORMATION_ROLE_ARN: description: "The CloudFormation role to use for this deployment" diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index ee7061246..47835d3b9 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -9,7 +9,7 @@ Parameters: Type: List SecretArn: - Description: ARN of Secret in AWS Secrets Manager containing all the credentials needed for job tasks. + Description: ARN of Secret in AWS Secrets Manager containing all the credentials needed for job steps. Type: String ImageTag: diff --git a/apps/render_cf.py b/apps/render_cf.py index 5133c1f5d..0bfba6915 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -12,29 +12,29 @@ def snake_to_pascal_case(input_string: str): return ''.join([i.title() for i in split_string]) -def get_steps_for_jobs(job_types: dict) -> dict: - steps = {} +def get_states_for_jobs(job_types: dict) -> dict: + states = {} for job_spec in job_types.values(): - steps.update(get_steps_for_job(job_spec)) - return steps + states.update(get_states_for_job(job_spec)) + return states -def get_steps_for_job(job_spec: dict) -> dict: - steps = {} - tasks = job_spec['tasks'] - for i in range(len(tasks)): - task = tasks[i] - next_step_name = tasks[i + 1]['name'] if i < len(tasks) - 1 else 'GET_FILES' - steps[task['name']] = get_step_for_task(task, i, next_step_name, job_spec) - return steps +def get_states_for_job(job_spec: dict) -> dict: + states = {} + job_steps = job_spec['steps'] + for i in range(len(job_steps)): + job_step = job_steps[i] + next_state_name = job_steps[i + 1]['name'] if i < len(job_steps) - 1 else 'GET_FILES' + states[job_step['name']] = get_state_for_job_step(job_step, i, next_state_name, job_spec) + return states -def get_step_for_task(task: dict, index: int, next_step_name: str, job_spec: dict) -> dict: - if 'map' in task: - step = get_step_for_map_task(task, job_spec) +def get_state_for_job_step(job_step: dict, index: int, next_state_name: str, job_spec: dict) -> dict: + if 'map' in job_step: + state = get_map_state(job_step, job_spec) else: - step = get_step_for_batch_submit_job(task) - step.update( + state = get_batch_submit_job_state(job_step) + state.update( { 'Catch': [ { @@ -46,18 +46,18 @@ def get_step_for_task(task: dict, index: int, next_step_name: str, job_spec: dic }, ], 'ResultPath': f'$.results.processing_results.step_{index}', - 'Next': next_step_name, + 'Next': next_state_name, } ) - return step + return state -def get_step_for_map_task(task: dict, job_spec: dict) -> dict: - item, items = parse_task_map(task['map']) +def get_map_state(job_step: dict, job_spec: dict) -> dict: + item, items = parse_job_step_map(job_step['map']) batch_job_parameters = get_batch_job_parameters(item, items, job_spec) - submit_job_step = get_step_for_batch_submit_job(task) - submit_job_step['End'] = True - submit_job_step_name = task['name'] + '_SUBMIT_JOB' + submit_job_state = get_batch_submit_job_state(job_step) + submit_job_state['End'] = True + submit_job_state_name = job_step['name'] + '_SUBMIT_JOB' return { 'Type': 'Map', 'ItemsPath': f'$.job_parameters.{items}', @@ -68,16 +68,16 @@ def get_step_for_map_task(task: dict, job_spec: dict) -> dict: 'batch_job_parameters': batch_job_parameters, }, 'ItemProcessor': { - 'StartAt': submit_job_step_name, + 'StartAt': submit_job_state_name, 'States': { - submit_job_step_name: submit_job_step, + submit_job_state_name: submit_job_state, } } } -def parse_task_map(task_map: str) -> tuple[str, str]: - tokens = task_map.split(' ') +def parse_job_step_map(job_step_map: str) -> tuple[str, str]: + tokens = job_step_map.split(' ') assert len(tokens) == 4 assert tokens[0], tokens[2] == ('for', 'in') return tokens[1], tokens[3] @@ -93,17 +93,17 @@ def get_batch_job_parameters(item: str, items: str, job_spec: dict) -> dict: return batch_job_parameters -def get_step_for_batch_submit_job(task: dict) -> dict: - if 'import' in task['compute_environment']: - compute_environment = task['compute_environment']['import'] +def get_batch_submit_job_state(job_step: dict) -> dict: + if 'import' in job_step['compute_environment']: + compute_environment = job_step['compute_environment']['import'] else: - compute_environment = task['compute_environment']['name'] + compute_environment = job_step['compute_environment']['name'] job_queue = 'JobQueueArn' if compute_environment == 'Default' else compute_environment + 'JobQueueArn' return { 'Type': 'Task', 'Resource': 'arn:aws:states:::batch:submitJob.sync', 'Parameters': { - 'JobDefinition': '${'+ snake_to_pascal_case(task['name']) + '}', + 'JobDefinition': '${' + snake_to_pascal_case(job_step['name']) + '}', 'JobName.$': '$.job_id', 'JobQueue': '${' + job_queue + '}', 'ShareIdentifier': 'default', @@ -133,7 +133,7 @@ def get_step_for_batch_submit_job(task: dict) -> dict: def render_templates(job_types, compute_envs, security_environment, api_name): - job_steps = get_steps_for_jobs(job_types) + job_states = get_states_for_jobs(job_types) env = jinja2.Environment( loader=jinja2.FileSystemLoader('./'), @@ -156,7 +156,7 @@ def render_templates(job_types, compute_envs, security_environment, api_name): api_name=api_name, json=json, snake_to_pascal_case=snake_to_pascal_case, - job_steps=job_steps, + job_states=job_states, ) if str(template_file).endswith('.json.j2'): @@ -170,8 +170,8 @@ def get_compute_environments(job_types: dict, compute_env_file: Optional[Path]) compute_env_names = set() compute_env_imports = set() for _, job_spec in job_types.items(): - for task in job_spec['tasks']: - compute_env = task['compute_environment'] + for job_step in job_spec['steps']: + compute_env = job_step['compute_environment'] if 'name' in compute_env: name = compute_env['name'] assert name != 'Default' @@ -246,8 +246,8 @@ def main(): job_types.update(yaml.safe_load(file.read_text())) for job_type, job_spec in job_types.items(): - for task in job_spec['tasks']: - task['name'] = job_type + '_' + task['name'] if task['name'] else job_type + for job_step in job_spec['steps']: + job_step['name'] = job_type + '_' + job_step['name'] if job_step['name'] else job_type compute_envs = get_compute_environments(job_types, args.compute_environment_file) diff --git a/apps/step-function.json.j2 b/apps/step-function.json.j2 index 79c6f60ea..567f1cef4 100644 --- a/apps/step-function.json.j2 +++ b/apps/step-function.json.j2 @@ -72,13 +72,13 @@ { "Variable": "$.job_type", "StringEquals": "{{ job_type }}", - "Next": "{{ job_spec['tasks'][0]['name'] }}" + "Next": "{{ job_spec['steps'][0]['name'] }}" }{% if not loop.last %},{% endif %} {% endfor %} ], "Default": "JOB_FAILED" }, - {% for name, body in job_steps.items() %} + {% for name, body in job_states.items() %} "{{ name }}": {{ json.dumps(body) }}, {% endfor %} "PROCESSING_FAILED": { diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index 48e4a262d..4db02e93b 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -44,8 +44,8 @@ Outputs: Resources: {% for job_type, job_spec in job_types.items() %} - {% for task in job_spec['tasks'] %} - {{ snake_to_pascal_case(task['name']) }}: + {% for job_step in job_spec['steps'] %} + {{ snake_to_pascal_case(job_step['name']) }}: Type: AWS::Batch::JobDefinition Properties: Type: container @@ -54,35 +54,35 @@ Resources: {{ k }}: {{ v.get('default') or v['api_schema'].get('default') }} {% endfor %} ContainerProperties: - Image: {% if 'image_tag' in task -%} - "{{ task['image'] }}:{{ task['image_tag'] }}" + Image: {% if 'image_tag' in job_step -%} + "{{ job_step['image'] }}:{{ job_step['image_tag'] }}" {% else -%} - !Sub "{{ task['image'] }}:${ImageTag}" + !Sub "{{ job_step['image'] }}:${ImageTag}" {% endif %} JobRoleArn: !Ref TaskRoleArn ExecutionRoleArn: !GetAtt ExecutionRole.Arn ResourceRequirements: - Type: VCPU - Value: "{{ task['vcpu'] }}" + Value: "{{ job_step['vcpu'] }}" - Type: MEMORY - Value: "{{ task['memory'] }}" - {% if 'gpu' in task %} + Value: "{{ job_step['memory'] }}" + {% if 'gpu' in job_step %} - Type: GPU - Value: "{{ task['gpu'] }}" + Value: "{{ job_step['gpu'] }}" {% endif %} Command: - {% for command in task['command'] %} + {% for command in job_step['command'] %} - {{ command }} {% endfor %} - {% if task.get('secrets') %} + {% if job_step.get('secrets') %} Secrets: - {% for secret in task['secrets'] %} + {% for secret in job_step['secrets'] %} - Name: {{ secret }} ValueFrom: !Sub "${SecretArn}:{{ secret }}::" {% endfor %} {% endif %} Timeout: - AttemptDurationSeconds: {{ task['timeout'] }} + AttemptDurationSeconds: {{ job_step['timeout'] }} {% endfor %} {% endfor %} @@ -97,8 +97,8 @@ Resources: {{ name }}JobQueueArn: !Ref {{ name }}JobQueueArn {% endfor %} {% for job_type, job_spec in job_types.items() %} - {% for task in job_spec['tasks'] %} - {{ snake_to_pascal_case(task['name']) }}: !Ref {{ snake_to_pascal_case(task['name']) }} + {% for job_step in job_spec['steps'] %} + {{ snake_to_pascal_case(job_step['name']) }}: !Ref {{ snake_to_pascal_case(job_step['name']) }} {% endfor %} {% endfor %} UpdateDBLambdaArn: !GetAtt UpdateDB.Outputs.LambdaArn @@ -142,8 +142,8 @@ Resources: - !Ref {{ name }}JobQueueArn {% endfor %} {% for job_type, job_spec in job_types.items() %} - {% for task in job_spec['tasks'] %} - - !Ref {{ snake_to_pascal_case(task['name']) }} + {% for job_step in job_spec['steps'] %} + - !Ref {{ snake_to_pascal_case(job_step['name']) }} {% endfor %} {% endfor %} - Effect: Allow diff --git a/job_spec/ARIA_AUTORIFT.yml b/job_spec/ARIA_AUTORIFT.yml index 9d7d681bd..09e4f7d02 100644 --- a/job_spec/ARIA_AUTORIFT.yml +++ b/job_spec/ARIA_AUTORIFT.yml @@ -42,7 +42,7 @@ AUTORIFT: DEFAULT: cost: 1.0 validators: [] - tasks: + steps: - name: '' image: ghcr.io/asfhyp3/hyp3-autorift command: diff --git a/job_spec/ARIA_RAIDER.yml b/job_spec/ARIA_RAIDER.yml index 2cbe8ac23..823b2478b 100644 --- a/job_spec/ARIA_RAIDER.yml +++ b/job_spec/ARIA_RAIDER.yml @@ -25,7 +25,7 @@ ARIA_RAIDER: DEFAULT: cost: 1.0 validators: [] - tasks: + steps: - name: '' image: ghcr.io/dbekaert/raider command: diff --git a/job_spec/AUTORIFT.yml b/job_spec/AUTORIFT.yml index e0496c07c..20b108b64 100644 --- a/job_spec/AUTORIFT.yml +++ b/job_spec/AUTORIFT.yml @@ -39,7 +39,7 @@ AUTORIFT: DEFAULT: cost: 1.0 validators: [] - tasks: + steps: - name: '' image: ghcr.io/asfhyp3/hyp3-autorift command: diff --git a/job_spec/AUTORIFT_ITS_LIVE.yml b/job_spec/AUTORIFT_ITS_LIVE.yml index 3776fea73..ab6f0fd2d 100644 --- a/job_spec/AUTORIFT_ITS_LIVE.yml +++ b/job_spec/AUTORIFT_ITS_LIVE.yml @@ -51,7 +51,7 @@ AUTORIFT: DEFAULT: cost: 1.0 validators: [] - tasks: + steps: - name: '' image: ghcr.io/asfhyp3/hyp3-autorift command: diff --git a/job_spec/INSAR_GAMMA.yml b/job_spec/INSAR_GAMMA.yml index 70cae14b1..5fbc01510 100644 --- a/job_spec/INSAR_GAMMA.yml +++ b/job_spec/INSAR_GAMMA.yml @@ -82,7 +82,7 @@ INSAR_GAMMA: cost: 1.0 validators: - check_dem_coverage - tasks: + steps: - name: '' image: 845172464411.dkr.ecr.us-west-2.amazonaws.com/hyp3-gamma command: diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index 0b501b87b..f1b6a1625 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -92,7 +92,7 @@ INSAR_ISCE: DEFAULT: cost: 1.0 validators: [] - tasks: + steps: - name: '' image: ghcr.io/access-cloud-based-insar/dockerizedtopsapp command: diff --git a/job_spec/INSAR_ISCE_BURST.yml b/job_spec/INSAR_ISCE_BURST.yml index 3bade566c..1ae8de7f9 100644 --- a/job_spec/INSAR_ISCE_BURST.yml +++ b/job_spec/INSAR_ISCE_BURST.yml @@ -44,7 +44,7 @@ INSAR_ISCE_BURST: - check_valid_polarizations - check_same_burst_ids - check_not_antimeridian - tasks: + steps: - name: '' image: ghcr.io/asfhyp3/hyp3-isce2 command: diff --git a/job_spec/INSAR_ISCE_MULTI_BURST.yml b/job_spec/INSAR_ISCE_MULTI_BURST.yml index 30bc3c3fb..9ade842ec 100644 --- a/job_spec/INSAR_ISCE_MULTI_BURST.yml +++ b/job_spec/INSAR_ISCE_MULTI_BURST.yml @@ -60,7 +60,7 @@ INSAR_ISCE_MULTI_BURST: - check_valid_polarizations - check_same_burst_ids - check_not_antimeridian - tasks: + steps: - name: '' image: ghcr.io/asfhyp3/hyp3-isce2 command: diff --git a/job_spec/RTC_GAMMA.yml b/job_spec/RTC_GAMMA.yml index 75913e5ed..e0b3a867e 100644 --- a/job_spec/RTC_GAMMA.yml +++ b/job_spec/RTC_GAMMA.yml @@ -103,7 +103,7 @@ RTC_GAMMA: cost: 1.0 validators: - check_dem_coverage - tasks: + steps: - name: '' image: 845172464411.dkr.ecr.us-west-2.amazonaws.com/hyp3-gamma command: diff --git a/job_spec/S1_CORRECTION_ITS_LIVE.yml b/job_spec/S1_CORRECTION_ITS_LIVE.yml index e2a222672..3c956e9ee 100644 --- a/job_spec/S1_CORRECTION_ITS_LIVE.yml +++ b/job_spec/S1_CORRECTION_ITS_LIVE.yml @@ -29,7 +29,7 @@ S1_CORRECTION_TEST: DEFAULT: cost: 1.0 validators: [] - tasks: + steps: - name: '' image: ghcr.io/asfhyp3/hyp3-autorift command: diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index d326cd0d4..0fee4bd5b 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -40,7 +40,7 @@ SRG_GSLC: cost_profiles: DEFAULT: cost: 1.0 - tasks: + steps: - name: '' image: ghcr.io/asfhyp3/hyp3-srg image_tag: latest.gpu diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index a7ef8d5fc..a3b8fd811 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -41,7 +41,7 @@ SRG_TIME_SERIES: cost_profiles: DEFAULT: cost: 1.0 - tasks: + steps: - name: BACK_PROJECTION map: for granule in granules image: ghcr.io/asfhyp3/hyp3-srg diff --git a/job_spec/WATER_MAP.yml b/job_spec/WATER_MAP.yml index 26e07f10d..0501820fd 100644 --- a/job_spec/WATER_MAP.yml +++ b/job_spec/WATER_MAP.yml @@ -116,7 +116,7 @@ WATER_MAP: cost: 1.0 validators: - check_dem_coverage - tasks: + steps: - name: RTC image: 845172464411.dkr.ecr.us-west-2.amazonaws.com/hyp3-gamma command: diff --git a/job_spec/WATER_MAP_EQ.yml b/job_spec/WATER_MAP_EQ.yml index f46c97afa..9c6207cfd 100644 --- a/job_spec/WATER_MAP_EQ.yml +++ b/job_spec/WATER_MAP_EQ.yml @@ -58,7 +58,7 @@ WATER_MAP_EQ: cost: 1.0 validators: - check_dem_coverage - tasks: + steps: - name: RTC image: 845172464411.dkr.ecr.us-west-2.amazonaws.com/hyp3-gamma command: From 6e1f4d6f41da46a47bae315e211c1fd0736f50f5 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 14:34:32 -0400 Subject: [PATCH 061/163] add validators --- apps/api/src/hyp3_api/validation.py | 39 +++++++++++++++++++++++ tests/test_api/test_validation.py | 48 +++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index 0f5acbe5e..feedc4829 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -18,6 +18,9 @@ class GranuleValidationError(Exception): pass +class BoundsValidationError(Exception): + pass + with open(Path(__file__).parent / 'job_validation_map.yml') as f: JOB_VALIDATION_MAP = yaml.safe_load(f.read()) @@ -136,6 +139,42 @@ def get_multipolygon_from_geojson(input_file): return MultiPolygon(polygons) +def check_bounds_formatting(job, _): + bounds = job['job_parameters']['bounds'] + if bounds == [0.0, 0.0, 0.0, 0.0]: + pass + is_invalid_order = bounds[0] >= bounds[2] or bounds[1] >= bounds[3] + if is_invalid_order: + raise BoundsValidationError( + 'Invalid order for bounds. Bounds should be ordered [min lon, min lat, max lon, max lat].' + ) + + def bad_lat(lat): + return lat > 90 or lat < -90 + + def bad_lon(lon): + return lon > 180 or lon < -180 + + has_invalid_value = [bad_lon(bounds[0]), bad_lon(bounds[2]), bad_lat(bounds[1]), bad_lat(bounds[3])] + if sum(has_invalid_value): + raise BoundsValidationError( + 'Invalid lat/lon value in bounds. Bounds should be ordered [min lon, min lat, max lon, max lat].' + ) + + +def check_granules_intersecting_bounds(job, granule_metadata): + bounds = job['job_parameters']['bounds'] + bad_granules = [] + for granule in granule_metadata: + bbox = granule['polygon'] + if not bbox.intersection(Polygon.from_bounds(bounds[0], bounds[1], bounds[2], bounds[3])): + bad_granules.append(granule['name']) + if bad_granules: + raise GranuleValidationError( + f'The following granules do not intersect the provided bounds: {bad_granules}.' + ) + + def convert_single_burst_jobs(jobs: list[dict]) -> list[dict]: jobs = deepcopy(jobs) for job in jobs: diff --git a/tests/test_api/test_validation.py b/tests/test_api/test_validation.py index 67ccee8d2..e528fd434 100644 --- a/tests/test_api/test_validation.py +++ b/tests/test_api/test_validation.py @@ -483,3 +483,51 @@ def test_validate_jobs(): ] with raises(validation.GranuleValidationError): validation.validate_jobs(jobs) + + +def test_check_bounds_formatting(): + valid_jobs = [ + {'job_parameters': {"bounds": [-10, 0, 10, 10]}}, + {'job_parameters': {"bounds": [-180, -90, -170, -80]}}, + {'job_parameters': {"bounds": [170, 75, 180, 90]}}, + ] + invalid_jobs_valid_values = [ + {'job_parameters': {"bounds": [10, 0, -10, 10]}}, + {'job_parameters': {"bounds": [-10, 10, 10, 0]}}, + ] + invalid_jobs_invalid_values = [ + {'job_parameters': {"bounds": [-10, 0, 10, 100]}}, + {'job_parameters': {"bounds": [-200, 0, 10, 10]}}, + ] + for valid_job in valid_jobs: + validation.check_bounds_formatting(valid_job, {}) + for invalid_job in invalid_jobs_valid_values: + with raises(validation.BoundsValidationError, match=r'.*Invalid order for bounds.*'): + validation.check_bounds_formatting(invalid_job, {}) + for invalid_job in invalid_jobs_invalid_values: + with raises(validation.BoundsValidationError, match=r'.*Invalid lat/lon value in bounds.*'): + validation.check_bounds_formatting(invalid_job, {}) + + +def test_check_granules_intersecting_bounds(): + job = { + 'job_parameters': { + "bounds": [-10, 0, 10, 10] + } + } + valid_granule_metadata = [ + {'name': 'intersects1', 'polygon': Polygon.from_bounds(-10.0, 0.0, 10.0, 10.0)}, + {'name': 'intersects2', 'polygon': Polygon.from_bounds(-9.0, -1.0, 20.0, 11.0)}, + {'name': 'intersects3', 'polygon': Polygon.from_bounds(0.0, 5.0, 15.0, 15.0)} + ] + invalid_granule_metadata = [ + {'name': 'intersects1', 'polygon': Polygon.from_bounds(-10.0, 0.0, 10.0, 10.0)}, + {'name': 'does_not_intersect1', 'polygon': Polygon.from_bounds(10.1, -10, 20.0, -0.1)}, + {'name': 'intersects2', 'polygon': Polygon.from_bounds(-9.0, -1.0, 20.0, 11.0)}, + {'name': 'does_not_intersect2', 'polygon': Polygon.from_bounds(-80.0, 20.0, -60.0, 90.0)}, + {'name': 'does_not_intersect3', 'polygon': Polygon.from_bounds(100.0, -50.0, 120.0, -0.1)}, + ] + validation.check_granules_intersecting_bounds(job, valid_granule_metadata) + error_pattern = r".*bounds: \['does_not_intersect1', 'does_not_intersect2', 'does_not_intersect3'\]*" + with raises(validation.GranuleValidationError, match=error_pattern): + validation.check_granules_intersecting_bounds(job, invalid_granule_metadata) From 0f585d95477146f4ef3390129363c416081151c6 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 14:35:23 -0400 Subject: [PATCH 062/163] add validators to job_spec --- job_spec/SRG_GSLC.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index d326cd0d4..28482bcbd 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -36,7 +36,10 @@ SRG_GSLC: description: min lon, min lat, max lon, max lat in EPSG:4326 type: number example: -116.583 - validators: [] + validators: [ + check_bounds_formatting, + check_granules_intersecting_bounds + ] cost_profiles: DEFAULT: cost: 1.0 From b7dcfbff78e8052541851d76060554b14d00248a Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 14:35:45 -0400 Subject: [PATCH 063/163] add validators to job_spec --- job_spec/SRG_TIME_SERIES.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index a7ef8d5fc..8fb638d45 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -37,7 +37,10 @@ SRG_TIME_SERIES: type: number example: -116.583 # TODO validators - validators: [] + validators: [ + check_bounds_formatting, + check_granules_intersecting_bounds + ] cost_profiles: DEFAULT: cost: 1.0 From 227dc5040cbba218228ff0e6a2dd5e0989a990ce Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 14:41:06 -0400 Subject: [PATCH 064/163] flake8 --- apps/api/src/hyp3_api/validation.py | 3 ++- apps/render_cf.py | 2 +- tests/test_api/test_validation.py | 8 ++------ 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index feedc4829..e7013d3cb 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -21,6 +21,7 @@ class GranuleValidationError(Exception): class BoundsValidationError(Exception): pass + with open(Path(__file__).parent / 'job_validation_map.yml') as f: JOB_VALIDATION_MAP = yaml.safe_load(f.read()) @@ -141,7 +142,7 @@ def get_multipolygon_from_geojson(input_file): def check_bounds_formatting(job, _): bounds = job['job_parameters']['bounds'] - if bounds == [0.0, 0.0, 0.0, 0.0]: + if bounds == [0.0, 0.0, 0.0, 0.0]: pass is_invalid_order = bounds[0] >= bounds[2] or bounds[1] >= bounds[3] if is_invalid_order: diff --git a/apps/render_cf.py b/apps/render_cf.py index 5133c1f5d..4bcb49782 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -103,7 +103,7 @@ def get_step_for_batch_submit_job(task: dict) -> dict: 'Type': 'Task', 'Resource': 'arn:aws:states:::batch:submitJob.sync', 'Parameters': { - 'JobDefinition': '${'+ snake_to_pascal_case(task['name']) + '}', + 'JobDefinition': '${' + snake_to_pascal_case(task['name']) + '}', 'JobName.$': '$.job_id', 'JobQueue': '${' + job_queue + '}', 'ShareIdentifier': 'default', diff --git a/tests/test_api/test_validation.py b/tests/test_api/test_validation.py index e528fd434..66500b4a0 100644 --- a/tests/test_api/test_validation.py +++ b/tests/test_api/test_validation.py @@ -510,11 +510,7 @@ def test_check_bounds_formatting(): def test_check_granules_intersecting_bounds(): - job = { - 'job_parameters': { - "bounds": [-10, 0, 10, 10] - } - } + job = {'job_parameters': {"bounds": [-10, 0, 10, 10]}} valid_granule_metadata = [ {'name': 'intersects1', 'polygon': Polygon.from_bounds(-10.0, 0.0, 10.0, 10.0)}, {'name': 'intersects2', 'polygon': Polygon.from_bounds(-9.0, -1.0, 20.0, 11.0)}, @@ -529,5 +525,5 @@ def test_check_granules_intersecting_bounds(): ] validation.check_granules_intersecting_bounds(job, valid_granule_metadata) error_pattern = r".*bounds: \['does_not_intersect1', 'does_not_intersect2', 'does_not_intersect3'\]*" - with raises(validation.GranuleValidationError, match=error_pattern): + with raises(validation.GranuleValidationError, match=error_pattern): validation.check_granules_intersecting_bounds(job, invalid_granule_metadata) From b6a7094fd4cd0c81d6e57bc9ddf67c0b5a6e4d67 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 15:01:05 -0400 Subject: [PATCH 065/163] better bounds description --- job_spec/SRG_GSLC.yml | 2 +- job_spec/SRG_TIME_SERIES.yml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index aec3877dd..1c24d8562 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -24,6 +24,7 @@ SRG_GSLC: default: '""' api_schema: type: array + description: Bounds for extent of processing formatted like [min lon, min lat, max lon, max lat] in EPSG:4326. Setting to [0, 0, 0, 0] will use the extent of the first granule. default: [0.0, 0.0, 0.0, 0.0] minItems: 4 maxItems: 4 @@ -33,7 +34,6 @@ SRG_GSLC: - -113.209 - 38.138 items: - description: min lon, min lat, max lon, max lat in EPSG:4326 type: number example: -116.583 validators: [ diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index ff624beab..14bf8eb9d 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -24,6 +24,7 @@ SRG_TIME_SERIES: default: '""' api_schema: type: array + description: Bounds for extent of processing formatted like [min lon, min lat, max lon, max lat] in EPSG:4326. Setting to [0, 0, 0, 0] will use the extent of the first granule. default: [0.0, 0.0, 0.0, 0.0] minItems: 4 maxItems: 4 @@ -33,10 +34,8 @@ SRG_TIME_SERIES: - -113.209 - 38.138 items: - description: min lon, min lat, max lon, max lat in EPSG:4326 type: number example: -116.583 - # TODO validators validators: [ check_bounds_formatting, check_granules_intersecting_bounds From ee21a5dedf774c55588d81a9012e4d59762a2801 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 15:04:29 -0400 Subject: [PATCH 066/163] Update job_spec/SRG_GSLC.yml Co-authored-by: Jake Herrmann --- job_spec/SRG_GSLC.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 1c24d8562..26609f537 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -24,7 +24,7 @@ SRG_GSLC: default: '""' api_schema: type: array - description: Bounds for extent of processing formatted like [min lon, min lat, max lon, max lat] in EPSG:4326. Setting to [0, 0, 0, 0] will use the extent of the first granule. + description: Bounds for extent of processing, formatted like [min lon, min lat, max lon, max lat] in EPSG:4326. Setting to [0, 0, 0, 0] will use the extent of the first granule. default: [0.0, 0.0, 0.0, 0.0] minItems: 4 maxItems: 4 From 5515e47794f79138421a2e92417744dd9d4fe3e9 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 15:04:34 -0400 Subject: [PATCH 067/163] Update job_spec/SRG_TIME_SERIES.yml Co-authored-by: Jake Herrmann --- job_spec/SRG_TIME_SERIES.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 14bf8eb9d..2cfe31af0 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -24,7 +24,7 @@ SRG_TIME_SERIES: default: '""' api_schema: type: array - description: Bounds for extent of processing formatted like [min lon, min lat, max lon, max lat] in EPSG:4326. Setting to [0, 0, 0, 0] will use the extent of the first granule. + description: Bounds for extent of processing, formatted like [min lon, min lat, max lon, max lat] in EPSG:4326. Setting to [0, 0, 0, 0] will use the extent of the first granule. default: [0.0, 0.0, 0.0, 0.0] minItems: 4 maxItems: 4 From 48045b8dead2e560b6a4e6e41a91b6d90fe2e58c Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 15:32:24 -0400 Subject: [PATCH 068/163] Update apps/api/src/hyp3_api/validation.py Co-authored-by: Jake Herrmann --- apps/api/src/hyp3_api/validation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index e7013d3cb..d2b3a4d2c 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -144,8 +144,8 @@ def check_bounds_formatting(job, _): bounds = job['job_parameters']['bounds'] if bounds == [0.0, 0.0, 0.0, 0.0]: pass - is_invalid_order = bounds[0] >= bounds[2] or bounds[1] >= bounds[3] - if is_invalid_order: + + if bounds[0] >= bounds[2] or bounds[1] >= bounds[3]: raise BoundsValidationError( 'Invalid order for bounds. Bounds should be ordered [min lon, min lat, max lon, max lat].' ) From 1c40c135fd57be2e3716affec446c2ecbef29f0a Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 15:32:54 -0400 Subject: [PATCH 069/163] Update apps/api/src/hyp3_api/validation.py Co-authored-by: Jake Herrmann --- apps/api/src/hyp3_api/validation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index d2b3a4d2c..535043f46 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -156,8 +156,7 @@ def bad_lat(lat): def bad_lon(lon): return lon > 180 or lon < -180 - has_invalid_value = [bad_lon(bounds[0]), bad_lon(bounds[2]), bad_lat(bounds[1]), bad_lat(bounds[3])] - if sum(has_invalid_value): + if any([bad_lon(bounds[0]), bad_lon(bounds[2]), bad_lat(bounds[1]), bad_lat(bounds[3])]): raise BoundsValidationError( 'Invalid lat/lon value in bounds. Bounds should be ordered [min lon, min lat, max lon, max lat].' ) From aae02acc7f1751c5280ed7e70d2e72d6d9e32a9d Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 15:34:21 -0400 Subject: [PATCH 070/163] Update apps/api/src/hyp3_api/validation.py Co-authored-by: Jake Herrmann --- apps/api/src/hyp3_api/validation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index 535043f46..1bb9573ad 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -163,11 +163,11 @@ def bad_lon(lon): def check_granules_intersecting_bounds(job, granule_metadata): - bounds = job['job_parameters']['bounds'] + bounds = Polygon.from_bounds(*job['job_parameters']['bounds']) bad_granules = [] for granule in granule_metadata: bbox = granule['polygon'] - if not bbox.intersection(Polygon.from_bounds(bounds[0], bounds[1], bounds[2], bounds[3])): + if not bbox.intersection(bounds): bad_granules.append(granule['name']) if bad_granules: raise GranuleValidationError( From 0a572b7f5f619cc9850a78c777a206769a4c50b8 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 15:50:14 -0400 Subject: [PATCH 071/163] handle default bounds in check_granules_intersecting_bounds --- apps/api/src/hyp3_api/validation.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index 1bb9573ad..6d9ff9646 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -143,7 +143,7 @@ def get_multipolygon_from_geojson(input_file): def check_bounds_formatting(job, _): bounds = job['job_parameters']['bounds'] if bounds == [0.0, 0.0, 0.0, 0.0]: - pass + return if bounds[0] >= bounds[2] or bounds[1] >= bounds[3]: raise BoundsValidationError( @@ -158,12 +158,16 @@ def bad_lon(lon): if any([bad_lon(bounds[0]), bad_lon(bounds[2]), bad_lat(bounds[1]), bad_lat(bounds[3])]): raise BoundsValidationError( - 'Invalid lat/lon value in bounds. Bounds should be ordered [min lon, min lat, max lon, max lat].' + 'Invalid lon/lat value(s) in bounds. Bounds should be ordered [min lon, min lat, max lon, max lat].' ) def check_granules_intersecting_bounds(job, granule_metadata): - bounds = Polygon.from_bounds(*job['job_parameters']['bounds']) + bounds = job['job_parameters']['bounds'] + if bounds == [0.0, 0.0, 0.0, 0.0]: + bounds = granule_metadata[0]['polygon'] + else: + bounds = Polygon.from_bounds(*bounds) bad_granules = [] for granule in granule_metadata: bbox = granule['polygon'] From ec9929aa0533dc854c20f82b9eadbeb9a9889c1f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 15:50:28 -0400 Subject: [PATCH 072/163] handle default bounds case --- tests/test_api/test_validation.py | 35 ++++++++++++++++++------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/tests/test_api/test_validation.py b/tests/test_api/test_validation.py index 66500b4a0..00ffe856a 100644 --- a/tests/test_api/test_validation.py +++ b/tests/test_api/test_validation.py @@ -487,30 +487,32 @@ def test_validate_jobs(): def test_check_bounds_formatting(): valid_jobs = [ - {'job_parameters': {"bounds": [-10, 0, 10, 10]}}, - {'job_parameters': {"bounds": [-180, -90, -170, -80]}}, - {'job_parameters': {"bounds": [170, 75, 180, 90]}}, + {'job_parameters': {'bounds': [-10, 0, 10, 10]}}, + {'job_parameters': {'bounds': [-180, -90, -170, -80]}}, + {'job_parameters': {'bounds': [170, 75, 180, 90]}}, + {'job_parameters': {'bounds': [0, 0, 0, 0]}} ] - invalid_jobs_valid_values = [ - {'job_parameters': {"bounds": [10, 0, -10, 10]}}, - {'job_parameters': {"bounds": [-10, 10, 10, 0]}}, + invalid_jobs_bad_order = [ + {'job_parameters': {'bounds': [10, 0, -10, 10]}}, + {'job_parameters': {'bounds': [-10, 10, 10, 0]}}, ] - invalid_jobs_invalid_values = [ - {'job_parameters': {"bounds": [-10, 0, 10, 100]}}, - {'job_parameters': {"bounds": [-200, 0, 10, 10]}}, + invalid_jobs_bad_values = [ + {'job_parameters': {'bounds': [-10, 0, 10, 100]}}, + {'job_parameters': {'bounds': [-200, 0, 10, 10]}}, ] for valid_job in valid_jobs: validation.check_bounds_formatting(valid_job, {}) - for invalid_job in invalid_jobs_valid_values: + for invalid_job in invalid_jobs_bad_order: with raises(validation.BoundsValidationError, match=r'.*Invalid order for bounds.*'): validation.check_bounds_formatting(invalid_job, {}) - for invalid_job in invalid_jobs_invalid_values: - with raises(validation.BoundsValidationError, match=r'.*Invalid lat/lon value in bounds.*'): + for invalid_job in invalid_jobs_bad_values: + with raises(validation.BoundsValidationError, match=r'.*Invalid lon/lat value(s)*'): validation.check_bounds_formatting(invalid_job, {}) def test_check_granules_intersecting_bounds(): - job = {'job_parameters': {"bounds": [-10, 0, 10, 10]}} + job_with_specified_bounds = {'job_parameters': {"bounds": [-10, 0, 10, 10]}} + job_with_default_bounds = {'job_parameters': {'bounds': [0, 0, 0, 0]}} valid_granule_metadata = [ {'name': 'intersects1', 'polygon': Polygon.from_bounds(-10.0, 0.0, 10.0, 10.0)}, {'name': 'intersects2', 'polygon': Polygon.from_bounds(-9.0, -1.0, 20.0, 11.0)}, @@ -523,7 +525,10 @@ def test_check_granules_intersecting_bounds(): {'name': 'does_not_intersect2', 'polygon': Polygon.from_bounds(-80.0, 20.0, -60.0, 90.0)}, {'name': 'does_not_intersect3', 'polygon': Polygon.from_bounds(100.0, -50.0, 120.0, -0.1)}, ] - validation.check_granules_intersecting_bounds(job, valid_granule_metadata) + validation.check_granules_intersecting_bounds(job_with_specified_bounds, valid_granule_metadata) + validation.check_granules_intersecting_bounds(job_with_default_bounds, valid_granule_metadata) error_pattern = r".*bounds: \['does_not_intersect1', 'does_not_intersect2', 'does_not_intersect3'\]*" with raises(validation.GranuleValidationError, match=error_pattern): - validation.check_granules_intersecting_bounds(job, invalid_granule_metadata) + validation.check_granules_intersecting_bounds(job_with_specified_bounds, invalid_granule_metadata) + with raises(validation.GranuleValidationError, match=error_pattern): + validation.check_granules_intersecting_bounds(job_with_default_bounds, invalid_granule_metadata) From 7d21eb265a4fd1b79537b41c51746cdb63c1ba15 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 15:52:58 -0400 Subject: [PATCH 073/163] extra test cases --- tests/test_api/test_validation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_api/test_validation.py b/tests/test_api/test_validation.py index 00ffe856a..410d01d77 100644 --- a/tests/test_api/test_validation.py +++ b/tests/test_api/test_validation.py @@ -495,10 +495,14 @@ def test_check_bounds_formatting(): invalid_jobs_bad_order = [ {'job_parameters': {'bounds': [10, 0, -10, 10]}}, {'job_parameters': {'bounds': [-10, 10, 10, 0]}}, + {'job_parameters': {'bounds': [10, 0, 10, 10]}}, + {'job_parameters': {'bounds': [-10, 0, 10, 0]}}, ] invalid_jobs_bad_values = [ {'job_parameters': {'bounds': [-10, 0, 10, 100]}}, {'job_parameters': {'bounds': [-200, 0, 10, 10]}}, + {'job_parameters': {'bounds': [-10, -100, 10, 80]}}, + {'job_parameters': {'bounds': [-100, 0, 200, 10]}}, ] for valid_job in valid_jobs: validation.check_bounds_formatting(valid_job, {}) From 0964444a39c4ebe09f2620597dca39895ba59833 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 11 Oct 2024 13:19:24 -0800 Subject: [PATCH 074/163] use gpu image for gslc map step --- job_spec/SRG_TIME_SERIES.yml | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index a3b8fd811..2f9bda730 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -47,30 +47,24 @@ SRG_TIME_SERIES: image: ghcr.io/asfhyp3/hyp3-srg # TODO revert image_tag #image_tag: latest.gpu - image_tag: 0.8.1.dev20_g0eb9d69 + image_tag: 0.8.1.dev21_g991363d command: - ++process - back_projection - --bounds - Ref::bounds - # TODO revert --gpu - #- --gpu + - --gpu - --bucket - '!Ref Bucket' - --bucket-prefix - Ref::bucket_prefix - --use-gslc-prefix - Ref::granule - # TODO revert timeout for gpu env - #timeout: 10800 - timeout: 86400 + timeout: 10800 compute_environment: - # TODO revert import - #import: SrgGslc - import: Default + import: SrgGslc vcpu: 1 - # TODO revert gpu - #gpu: 1 + gpu: 1 memory: 30500 secrets: - EARTHDATA_USERNAME @@ -78,7 +72,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO image tag - image_tag: 0.8.1.dev20_g0eb9d69 + image_tag: 0.8.1.dev21_g991363d command: - ++process - time_series From 6d486a6e785b945b3a17e96017943b87d225f1e4 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 11 Oct 2024 13:35:33 -0800 Subject: [PATCH 075/163] Update tests/test_api/test_validation.py --- tests/test_api/test_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_api/test_validation.py b/tests/test_api/test_validation.py index 410d01d77..2bf251e7f 100644 --- a/tests/test_api/test_validation.py +++ b/tests/test_api/test_validation.py @@ -515,7 +515,7 @@ def test_check_bounds_formatting(): def test_check_granules_intersecting_bounds(): - job_with_specified_bounds = {'job_parameters': {"bounds": [-10, 0, 10, 10]}} + job_with_specified_bounds = {'job_parameters': {'bounds': [-10, 0, 10, 10]}} job_with_default_bounds = {'job_parameters': {'bounds': [0, 0, 0, 0]}} valid_granule_metadata = [ {'name': 'intersects1', 'polygon': Polygon.from_bounds(-10.0, 0.0, 10.0, 10.0)}, From 2212e4a978dfe352433019b0522317b93ee7295c Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 11 Oct 2024 13:38:31 -0800 Subject: [PATCH 076/163] revert image tag for time_series step back to CPU version --- job_spec/SRG_TIME_SERIES.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 5654280f0..23c2118da 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -74,7 +74,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO image tag - image_tag: 0.8.1.dev21_g991363d + image_tag: 0.8.1.dev20_g0eb9d69 command: - ++process - time_series From dd94f5d846702a4f59844d7c8f1ed5c9861ca260 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 11 Oct 2024 15:45:59 -0800 Subject: [PATCH 077/163] image tags --- job_spec/SRG_TIME_SERIES.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 23c2118da..7dfe49fee 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -49,7 +49,7 @@ SRG_TIME_SERIES: image: ghcr.io/asfhyp3/hyp3-srg # TODO revert image_tag #image_tag: latest.gpu - image_tag: 0.8.1.dev21_g991363d + image_tag: 0.8.1.dev23_gce1c8a5 command: - ++process - back_projection @@ -74,7 +74,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO image tag - image_tag: 0.8.1.dev20_g0eb9d69 + image_tag: 0.8.1.dev24_gdd8285e command: - ++process - time_series From 3fe2592c59a9c050e1a1d468adf4c94ce0fcf11c Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 14 Oct 2024 10:54:08 -0800 Subject: [PATCH 078/163] image tags --- job_spec/SRG_TIME_SERIES.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 7dfe49fee..ef25ecc93 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -49,7 +49,7 @@ SRG_TIME_SERIES: image: ghcr.io/asfhyp3/hyp3-srg # TODO revert image_tag #image_tag: latest.gpu - image_tag: 0.8.1.dev23_gce1c8a5 + image_tag: 0.8.1.dev26_g096e7e7.gpu command: - ++process - back_projection @@ -74,7 +74,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO image tag - image_tag: 0.8.1.dev24_gdd8285e + image_tag: 0.8.1.dev26_g096e7e7.cpu command: - ++process - time_series From 77b32728101fec955126c48359eef420743975c2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Oct 2024 19:33:09 +0000 Subject: [PATCH 079/163] Bump cfn-lint from 1.16.0 to 1.16.1 Bumps [cfn-lint](https://github.com/aws-cloudformation/cfn-lint) from 1.16.0 to 1.16.1. - [Release notes](https://github.com/aws-cloudformation/cfn-lint/releases) - [Changelog](https://github.com/aws-cloudformation/cfn-lint/blob/main/CHANGELOG.md) - [Commits](https://github.com/aws-cloudformation/cfn-lint/compare/v1.16.0...v1.16.1) --- updated-dependencies: - dependency-name: cfn-lint dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-all.txt b/requirements-all.txt index c34af36e6..95d1b76cb 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -17,4 +17,4 @@ flake8-blind-except==0.2.1 flake8-builtins==2.5.0 setuptools==75.1.0 openapi-spec-validator==0.7.1 -cfn-lint==1.16.0 +cfn-lint==1.16.1 From e557088b532da0a9a39cc11acccf682e2c8a64e6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Oct 2024 19:33:36 +0000 Subject: [PATCH 080/163] Bump boto3 from 1.35.34 to 1.35.40 Bumps [boto3](https://github.com/boto/boto3) from 1.35.34 to 1.35.40. - [Release notes](https://github.com/boto/boto3/releases) - [Commits](https://github.com/boto/boto3/compare/1.35.34...1.35.40) --- updated-dependencies: - dependency-name: boto3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- requirements-apps-disable-private-dns.txt | 2 +- requirements-apps-start-execution-manager.txt | 2 +- requirements-apps-start-execution-worker.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements-all.txt b/requirements-all.txt index c34af36e6..d1c9b1632 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -5,7 +5,7 @@ -r requirements-apps-start-execution-worker.txt -r requirements-apps-disable-private-dns.txt -r requirements-apps-update-db.txt -boto3==1.35.34 +boto3==1.35.40 jinja2==3.1.4 moto[dynamodb]==5.0.16 pytest==8.3.3 diff --git a/requirements-apps-disable-private-dns.txt b/requirements-apps-disable-private-dns.txt index 591888472..9abbd0a9b 100644 --- a/requirements-apps-disable-private-dns.txt +++ b/requirements-apps-disable-private-dns.txt @@ -1 +1 @@ -boto3==1.35.34 +boto3==1.35.40 diff --git a/requirements-apps-start-execution-manager.txt b/requirements-apps-start-execution-manager.txt index 3a73b98e4..81b056a7b 100644 --- a/requirements-apps-start-execution-manager.txt +++ b/requirements-apps-start-execution-manager.txt @@ -1,3 +1,3 @@ -boto3==1.35.34 +boto3==1.35.40 ./lib/dynamo/ ./lib/lambda_logging/ diff --git a/requirements-apps-start-execution-worker.txt b/requirements-apps-start-execution-worker.txt index ad5be7290..c8fcb15e5 100644 --- a/requirements-apps-start-execution-worker.txt +++ b/requirements-apps-start-execution-worker.txt @@ -1,2 +1,2 @@ -boto3==1.35.34 +boto3==1.35.40 ./lib/lambda_logging/ From 94130854c0bceb272e481cf2f08a655fc8047731 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 14 Oct 2024 11:51:46 -0800 Subject: [PATCH 081/163] image tag --- job_spec/SRG_TIME_SERIES.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index ef25ecc93..de59702c3 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -49,7 +49,7 @@ SRG_TIME_SERIES: image: ghcr.io/asfhyp3/hyp3-srg # TODO revert image_tag #image_tag: latest.gpu - image_tag: 0.8.1.dev26_g096e7e7.gpu + image_tag: 0.8.1.dev29_g94579f4.gpu command: - ++process - back_projection From d65e9066fdbfd550ac567eb20c47b18096fb6104 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 14 Oct 2024 13:44:00 -0800 Subject: [PATCH 082/163] remove old todo --- apps/upload-log/src/upload_log.py | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/upload-log/src/upload_log.py b/apps/upload-log/src/upload_log.py index 284641093..aecab3043 100644 --- a/apps/upload-log/src/upload_log.py +++ b/apps/upload-log/src/upload_log.py @@ -58,7 +58,6 @@ def write_log_to_s3(bucket, prefix, content): def lambda_handler(event, context): - # TODO handle all results, not just the last one results_dict = event['processing_results'] result = results_dict[max(results_dict.keys())] From dea1718985299ed057214d55b1629c6663229bb0 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 14 Oct 2024 14:12:00 -0800 Subject: [PATCH 083/163] image tags --- job_spec/SRG_TIME_SERIES.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index de59702c3..0c54a882b 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -49,7 +49,7 @@ SRG_TIME_SERIES: image: ghcr.io/asfhyp3/hyp3-srg # TODO revert image_tag #image_tag: latest.gpu - image_tag: 0.8.1.dev29_g94579f4.gpu + image_tag: 0.8.1.dev30_gb0a4a1c.gpu command: - ++process - back_projection @@ -74,7 +74,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO image tag - image_tag: 0.8.1.dev26_g096e7e7.cpu + image_tag: 0.8.1.dev30_gb0a4a1c.cpu command: - ++process - time_series From f05c6c96324d4ea84ac57e497e7dfff9317527c3 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 14 Oct 2024 14:20:39 -0800 Subject: [PATCH 084/163] increase srg time series max to 600, fix default example --- job_spec/SRG_TIME_SERIES.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 0c54a882b..1e473812b 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -7,17 +7,18 @@ SRG_TIME_SERIES: api_schema: type: array minItems: 1 - maxItems: 300 + maxItems: 600 example: - - S1A_IW_RAW__0SDV_20231229T134339_20231229T134411_051870_064437_4F42 - - S1A_IW_RAW__0SDV_20231229T134404_20231229T134436_051870_064437_5F38 + - S1A_IW_RAW__0SDV_20240629T020812_20240629T020845_054532_06A2F8_8276 + - S1A_IW_RAW__0SDV_20240723T020812_20240723T020844_054882_06AF26_2CE5 + - S1A_IW_RAW__0SDV_20240804T020812_20240804T020844_055057_06B527_1346 items: description: Name of the Level-0 Sentinel-1 scenes to process type: string pattern: "^S1[AB]_IW_RAW" minLength: 67 maxLength: 67 - example: S1A_IW_RAW__0SDV_20231229T134404_20231229T134436_051870_064437_5F38 + example: S1A_IW_RAW__0SDV_20240629T020812_20240629T020845_054532_06A2F8_8276 bucket_prefix: default: '""' bounds: @@ -29,13 +30,13 @@ SRG_TIME_SERIES: minItems: 4 maxItems: 4 example: - - -116.583 - - 35.714 - - -113.209 - - 38.138 + - -124.41473278572731 + - 37.098700238673814 + - -120.9825007499895 + - 39.52359974376425 items: type: number - example: -116.583 + example: -124.41473278572731 validators: [ check_bounds_formatting, check_granules_intersecting_bounds From 6b6da4a4862275d855e2516d9b428673eef21376 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 14 Oct 2024 15:04:38 -0800 Subject: [PATCH 085/163] update processing_times api spec --- apps/api/src/hyp3_api/api-spec/openapi-spec.yml.j2 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/apps/api/src/hyp3_api/api-spec/openapi-spec.yml.j2 b/apps/api/src/hyp3_api/api-spec/openapi-spec.yml.j2 index 16c014488..bdf598caf 100644 --- a/apps/api/src/hyp3_api/api-spec/openapi-spec.yml.j2 +++ b/apps/api/src/hyp3_api/api-spec/openapi-spec.yml.j2 @@ -408,17 +408,17 @@ components: processing_times: description: > - A list of run times for the job's processing steps in the order that they were executed. For example, - a job comprised of a single processing step would yield a list containing one processing time, while a job - comprised of three processing steps would yield a list containing three processing times. An empty list - represents a failure to calculate processing times. + List of run times for the job's processing steps in the order that they were executed. + An empty list represents a failure to calculate processing times. type: array items: - $ref: '#/components/schemas/processing_time_in_seconds' + oneOf: + - $ref: "#/components/schemas/processing_times" + - $ref: '#/components/schemas/processing_time_in_seconds' processing_time_in_seconds: description: > - Run time in seconds for a particular processing step's final attempt (regardless of whether it succeeded). + Run time in seconds for a processing step's final attempt (regardless of whether it succeeded). A value of zero indicates that there were no attempts. type: number minimum: 0 From 11704f9ee1e60a16f679bb80b01e889113b78b75 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 14 Oct 2024 15:34:36 -0800 Subject: [PATCH 086/163] fix proc time api schema --- apps/api/src/hyp3_api/api-spec/openapi-spec.yml.j2 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/apps/api/src/hyp3_api/api-spec/openapi-spec.yml.j2 b/apps/api/src/hyp3_api/api-spec/openapi-spec.yml.j2 index bdf598caf..ebee991a0 100644 --- a/apps/api/src/hyp3_api/api-spec/openapi-spec.yml.j2 +++ b/apps/api/src/hyp3_api/api-spec/openapi-spec.yml.j2 @@ -413,15 +413,20 @@ components: type: array items: oneOf: - - $ref: "#/components/schemas/processing_times" + - type: array + items: + $ref: '#/components/schemas/processing_time_in_seconds' + example: [200, 100, 150] - $ref: '#/components/schemas/processing_time_in_seconds' + processing_time_in_seconds: description: > Run time in seconds for a processing step's final attempt (regardless of whether it succeeded). A value of zero indicates that there were no attempts. type: number minimum: 0 + example: 50 securitySchemes: EarthDataLogin: From be3d81bbe96cb3b737bf3a8179e9f0c9f1bc1548 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 16:42:12 -0400 Subject: [PATCH 087/163] add relative orbit check for srg time series --- apps/api/src/hyp3_api/validation.py | 18 ++++++++++++++++++ job_spec/SRG_TIME_SERIES.yml | 3 ++- tests/test_api/test_validation.py | 17 +++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index 6d9ff9646..d99cb0bb7 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -179,6 +179,24 @@ def check_granules_intersecting_bounds(job, granule_metadata): ) +def check_same_relative_orbits(job, granule_metadata): + relative_orbit_numbers = [] + for granule in granule_metadata: + name = granule['name'].split('_') + absolute_orbit = name[7] + if name[0] == 'S1A': + relative_orbit = str(((int(absolute_orbit) - 73) % 175) + 1) + else: + relative_orbit = str(((int(absolute_orbit) - 27) % 175) + 1) + if not relative_orbit_numbers: + relative_orbit_numbers.append(relative_orbit) + else: + if relative_orbit not in relative_orbit_numbers: + raise GranuleValidationError( + f'The relative orbit number for {granule["name"]} does not match the previous granules: {relative_orbit} is not {relative_orbit_numbers[0]}.' + ) + + def convert_single_burst_jobs(jobs: list[dict]) -> list[dict]: jobs = deepcopy(jobs) for job in jobs: diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 2cfe31af0..c081bf8c4 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -38,7 +38,8 @@ SRG_TIME_SERIES: example: -116.583 validators: [ check_bounds_formatting, - check_granules_intersecting_bounds + check_granules_intersecting_bounds, + check_same_relative_orbits ] cost_profiles: DEFAULT: diff --git a/tests/test_api/test_validation.py b/tests/test_api/test_validation.py index 410d01d77..534039409 100644 --- a/tests/test_api/test_validation.py +++ b/tests/test_api/test_validation.py @@ -536,3 +536,20 @@ def test_check_granules_intersecting_bounds(): validation.check_granules_intersecting_bounds(job_with_specified_bounds, invalid_granule_metadata) with raises(validation.GranuleValidationError, match=error_pattern): validation.check_granules_intersecting_bounds(job_with_default_bounds, invalid_granule_metadata) + + +def check_same_relative_orbits(): + valid_granule_metadata = [ + {'name': 'S1A_IW_RAW__0SDV_20201015T161622_20201015T161654_034809_040E95_AF3C'}, + {'name': 'S1A_IW_RAW__0SDV_20200816T161620_20200816T161652_033934_03EFCE_5730'}, + {'name': 'S1B_IW_RAW__0SDV_20200810T161537_20200810T161610_022863_02B66A_F7D7'}, + {'name': 'S1B_IW_RAW__0SDV_20200623T161535_20200623T161607_022163_02A10F_7FD6'} + ] + invalid_granule_metadata = valid_granule_metadata + invalid_granule_metadata.append( + {'name': 'S1B_IW_RAW__0SDV_20200623T161535_20200623T161607_012345_02A10F_7FD6'} # 23 + ) + validation.check_same_relative_orbits({}, valid_granule_metadata) + error_pattern = r'.*S1B_IW_RAW__0SDV_20200623T161535_20200623T161607_012345_02A10F_7FD6 does not match the previous granules: 23 is not 87.*' + with raises(validation.GranuleValidationError, match=error_pattern): + validation.check_same_relative_orbits({}, invalid_granule_metadata) From 4e4d024cf61f17b9b174d7fd3cc9799aec6536d1 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 16:44:12 -0400 Subject: [PATCH 088/163] remove else --- apps/api/src/hyp3_api/validation.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index d99cb0bb7..9980f9383 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -190,11 +190,10 @@ def check_same_relative_orbits(job, granule_metadata): relative_orbit = str(((int(absolute_orbit) - 27) % 175) + 1) if not relative_orbit_numbers: relative_orbit_numbers.append(relative_orbit) - else: - if relative_orbit not in relative_orbit_numbers: - raise GranuleValidationError( - f'The relative orbit number for {granule["name"]} does not match the previous granules: {relative_orbit} is not {relative_orbit_numbers[0]}.' - ) + if relative_orbit not in relative_orbit_numbers: + raise GranuleValidationError( + f'The relative orbit number for {granule["name"]} does not match the previous granules: {relative_orbit} is not {relative_orbit_numbers[0]}.' + ) def convert_single_burst_jobs(jobs: list[dict]) -> list[dict]: From c02c4eb4f43596b8cbaea7032790304bee4580d6 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 16:48:40 -0400 Subject: [PATCH 089/163] refactor --- apps/api/src/hyp3_api/validation.py | 5 +++-- tests/test_api/test_validation.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index 9980f9383..bb6cea021 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -190,9 +190,10 @@ def check_same_relative_orbits(job, granule_metadata): relative_orbit = str(((int(absolute_orbit) - 27) % 175) + 1) if not relative_orbit_numbers: relative_orbit_numbers.append(relative_orbit) - if relative_orbit not in relative_orbit_numbers: + if relative_orbit not in relative_orbit_numbers: raise GranuleValidationError( - f'The relative orbit number for {granule["name"]} does not match the previous granules: {relative_orbit} is not {relative_orbit_numbers[0]}.' + f'Relative orbit number for {granule["name"]} does not match that of the previous granules: ' + f'{relative_orbit} is not {relative_orbit_numbers[0]}.' ) diff --git a/tests/test_api/test_validation.py b/tests/test_api/test_validation.py index 534039409..b936653e2 100644 --- a/tests/test_api/test_validation.py +++ b/tests/test_api/test_validation.py @@ -550,6 +550,6 @@ def check_same_relative_orbits(): {'name': 'S1B_IW_RAW__0SDV_20200623T161535_20200623T161607_012345_02A10F_7FD6'} # 23 ) validation.check_same_relative_orbits({}, valid_granule_metadata) - error_pattern = r'.*S1B_IW_RAW__0SDV_20200623T161535_20200623T161607_012345_02A10F_7FD6 does not match the previous granules: 23 is not 87.*' + error_pattern = r'.*23 is not 87.*' with raises(validation.GranuleValidationError, match=error_pattern): validation.check_same_relative_orbits({}, invalid_granule_metadata) From 54d898f4b6723ff419d827124625ad73cd89a0ad Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 16:50:41 -0400 Subject: [PATCH 090/163] list to number --- apps/api/src/hyp3_api/validation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index bb6cea021..466ea95ee 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -180,7 +180,7 @@ def check_granules_intersecting_bounds(job, granule_metadata): def check_same_relative_orbits(job, granule_metadata): - relative_orbit_numbers = [] + relative_orbit_number = None for granule in granule_metadata: name = granule['name'].split('_') absolute_orbit = name[7] @@ -188,12 +188,12 @@ def check_same_relative_orbits(job, granule_metadata): relative_orbit = str(((int(absolute_orbit) - 73) % 175) + 1) else: relative_orbit = str(((int(absolute_orbit) - 27) % 175) + 1) - if not relative_orbit_numbers: - relative_orbit_numbers.append(relative_orbit) - if relative_orbit not in relative_orbit_numbers: + if not relative_orbit_number: + relative_orbit_number = relative_orbit + if relative_orbit != relative_orbit_number: raise GranuleValidationError( f'Relative orbit number for {granule["name"]} does not match that of the previous granules: ' - f'{relative_orbit} is not {relative_orbit_numbers[0]}.' + f'{relative_orbit} is not {relative_orbit_number}.' ) From 5c564897d8e26efd0e45525f6fd93686f9174b13 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 16:52:13 -0400 Subject: [PATCH 091/163] flake8 --- apps/api/src/hyp3_api/validation.py | 2 +- tests/test_api/test_validation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index 466ea95ee..2beab0651 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -190,7 +190,7 @@ def check_same_relative_orbits(job, granule_metadata): relative_orbit = str(((int(absolute_orbit) - 27) % 175) + 1) if not relative_orbit_number: relative_orbit_number = relative_orbit - if relative_orbit != relative_orbit_number: + if relative_orbit != relative_orbit_number: raise GranuleValidationError( f'Relative orbit number for {granule["name"]} does not match that of the previous granules: ' f'{relative_orbit} is not {relative_orbit_number}.' diff --git a/tests/test_api/test_validation.py b/tests/test_api/test_validation.py index b936653e2..6a32aefc4 100644 --- a/tests/test_api/test_validation.py +++ b/tests/test_api/test_validation.py @@ -547,7 +547,7 @@ def check_same_relative_orbits(): ] invalid_granule_metadata = valid_granule_metadata invalid_granule_metadata.append( - {'name': 'S1B_IW_RAW__0SDV_20200623T161535_20200623T161607_012345_02A10F_7FD6'} # 23 + {'name': 'S1B_IW_RAW__0SDV_20200623T161535_20200623T161607_012345_02A10F_7FD6'} ) validation.check_same_relative_orbits({}, valid_granule_metadata) error_pattern = r'.*23 is not 87.*' From 1ef5d156b85524a26593ceb046a2a27035c9d58f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 17:05:06 -0400 Subject: [PATCH 092/163] refactor --- apps/api/src/hyp3_api/validation.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index 2beab0651..382ebac8e 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -182,12 +182,10 @@ def check_granules_intersecting_bounds(job, granule_metadata): def check_same_relative_orbits(job, granule_metadata): relative_orbit_number = None for granule in granule_metadata: - name = granule['name'].split('_') - absolute_orbit = name[7] - if name[0] == 'S1A': - relative_orbit = str(((int(absolute_orbit) - 73) % 175) + 1) - else: - relative_orbit = str(((int(absolute_orbit) - 27) % 175) + 1) + name_split = granule['name'].split('_') + absolute_orbit = name_split[7] + offset = 73 if name_split[0] == 'S1A' else 27 + relative_orbit = str(((int(absolute_orbit) - offset) % 175) + 1) if not relative_orbit_number: relative_orbit_number = relative_orbit if relative_orbit != relative_orbit_number: From 168472450ee70d972e6dabad4e680a39fea24946 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 17:08:50 -0400 Subject: [PATCH 093/163] add relative orbit check to srg_gslc --- job_spec/SRG_GSLC.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 26609f537..ccff7c84e 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -38,7 +38,8 @@ SRG_GSLC: example: -116.583 validators: [ check_bounds_formatting, - check_granules_intersecting_bounds + check_granules_intersecting_bounds, + check_same_relative_orbits ] cost_profiles: DEFAULT: From 4666beaf06d03fc967c4e4a05ed58f16c121e608 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 17:23:24 -0400 Subject: [PATCH 094/163] add reference for relative orbit calc --- apps/api/src/hyp3_api/validation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index 382ebac8e..60e80edb6 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -184,6 +184,7 @@ def check_same_relative_orbits(job, granule_metadata): for granule in granule_metadata: name_split = granule['name'].split('_') absolute_orbit = name_split[7] + # "Relationship between relative and absolute orbit numbers": https://sentiwiki.copernicus.eu/web/s1-products offset = 73 if name_split[0] == 'S1A' else 27 relative_orbit = str(((int(absolute_orbit) - offset) % 175) + 1) if not relative_orbit_number: From 51fea7bc8f5462588dde2fac0d7c30a1090e3e9d Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 15 Oct 2024 14:22:23 -0800 Subject: [PATCH 095/163] image tags --- job_spec/SRG_TIME_SERIES.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 1e473812b..e821c8591 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -50,7 +50,7 @@ SRG_TIME_SERIES: image: ghcr.io/asfhyp3/hyp3-srg # TODO revert image_tag #image_tag: latest.gpu - image_tag: 0.8.1.dev30_gb0a4a1c.gpu + image_tag: 0.8.1.dev47_g2d96d14.gpu command: - ++process - back_projection @@ -75,7 +75,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO image tag - image_tag: 0.8.1.dev30_gb0a4a1c.cpu + image_tag: 0.8.1.dev47_g2d96d14.cpu command: - ++process - time_series From d507b456bef72b82a533cb966365b3122a13c7a3 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 15 Oct 2024 15:50:54 -0800 Subject: [PATCH 096/163] filter batch job output in step function --- apps/check-processing-time/src/check_processing_time.py | 6 ++++-- apps/render_cf.py | 4 ++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/apps/check-processing-time/src/check_processing_time.py b/apps/check-processing-time/src/check_processing_time.py index 27187d09b..c0e1d16ac 100644 --- a/apps/check-processing-time/src/check_processing_time.py +++ b/apps/check-processing-time/src/check_processing_time.py @@ -14,8 +14,10 @@ def get_time_from_result(result: Union[list, dict]) -> Union[list, float]: if isinstance(result, list): return [get_time_from_result(item) for item in result] - if 'Attempts' in result: - return get_time_from_attempts(result['Attempts']) + if 'start' in result: + # TODO: update tests + attempts = [{'StartedAt': start, 'StoppedAt': stop} for start, stop in zip(result['start'], result['stop'])] + return get_time_from_attempts(attempts) return get_time_from_attempts(json.loads(result['Cause'])['Attempts']) diff --git a/apps/render_cf.py b/apps/render_cf.py index 0bfba6915..e49062f4d 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -114,6 +114,10 @@ def get_batch_submit_job_state(job_step: dict) -> dict: 'Attempts': 3 }, }, + 'ResultSelector': { + 'start.$': '$.Attempts[0:].StartedAt', + 'stop.$': '$.Attempts[0:].StoppedAt', + }, 'Retry': [ { 'ErrorEquals': [ From d9eadf023d2c420014902a4d7216290aa76bb730 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 15 Oct 2024 15:56:05 -0800 Subject: [PATCH 097/163] json path * --- apps/render_cf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index e49062f4d..bc10c82d5 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -115,8 +115,8 @@ def get_batch_submit_job_state(job_step: dict) -> dict: }, }, 'ResultSelector': { - 'start.$': '$.Attempts[0:].StartedAt', - 'stop.$': '$.Attempts[0:].StoppedAt', + 'start.$': '$.Attempts[*].StartedAt', + 'stop.$': '$.Attempts[*].StoppedAt', }, 'Retry': [ { From 15c7813d32e07b75eed47ef67c5c99cc1d1a3d07 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 16 Oct 2024 14:23:43 -0400 Subject: [PATCH 098/163] refactor --- apps/api/src/hyp3_api/validation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/apps/api/src/hyp3_api/validation.py b/apps/api/src/hyp3_api/validation.py index 60e80edb6..4355f71f6 100644 --- a/apps/api/src/hyp3_api/validation.py +++ b/apps/api/src/hyp3_api/validation.py @@ -180,19 +180,19 @@ def check_granules_intersecting_bounds(job, granule_metadata): def check_same_relative_orbits(job, granule_metadata): - relative_orbit_number = None + previous_relative_orbit = None for granule in granule_metadata: name_split = granule['name'].split('_') absolute_orbit = name_split[7] # "Relationship between relative and absolute orbit numbers": https://sentiwiki.copernicus.eu/web/s1-products offset = 73 if name_split[0] == 'S1A' else 27 - relative_orbit = str(((int(absolute_orbit) - offset) % 175) + 1) - if not relative_orbit_number: - relative_orbit_number = relative_orbit - if relative_orbit != relative_orbit_number: + relative_orbit = ((int(absolute_orbit) - offset) % 175) + 1 + if not previous_relative_orbit: + previous_relative_orbit = relative_orbit + if relative_orbit != previous_relative_orbit: raise GranuleValidationError( f'Relative orbit number for {granule["name"]} does not match that of the previous granules: ' - f'{relative_orbit} is not {relative_orbit_number}.' + f'{relative_orbit} is not {previous_relative_orbit}.' ) From 61cf1d3883ac887d5ef0ef1e2e4c471803383a5c Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 16 Oct 2024 10:23:44 -0800 Subject: [PATCH 099/163] allow excluding job params --- apps/render_cf.py | 55 ++++++++++++++++++++---------------- job_spec/SRG_TIME_SERIES.yml | 2 ++ 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index bc10c82d5..e80746b95 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -30,10 +30,11 @@ def get_states_for_job(job_spec: dict) -> dict: def get_state_for_job_step(job_step: dict, index: int, next_state_name: str, job_spec: dict) -> dict: + excluded_parameters = set(job_step.get('excluded_parameters', [])) if 'map' in job_step: - state = get_map_state(job_step, job_spec) + state = get_map_state(job_spec, job_step, excluded_parameters) else: - state = get_batch_submit_job_state(job_step) + state = get_batch_submit_job_state(job_spec, job_step, excluded_parameters) state.update( { 'Catch': [ @@ -52,10 +53,14 @@ def get_state_for_job_step(job_step: dict, index: int, next_state_name: str, job return state -def get_map_state(job_step: dict, job_spec: dict) -> dict: +def get_map_state(job_spec: dict, job_step: dict, excluded_parameters: set[str]) -> dict: item, items = parse_job_step_map(job_step['map']) - batch_job_parameters = get_batch_job_parameters(item, items, job_spec) - submit_job_state = get_batch_submit_job_state(job_step) + + excluded_parameters.add(items) + batch_job_parameters = get_batch_job_parameters(job_spec, excluded_parameters) + batch_job_parameters[f'{item}.$'] = '$$.Map.Item.Value' + + submit_job_state = get_batch_submit_job_state(job_spec, job_step) submit_job_state['End'] = True submit_job_state_name = job_step['name'] + '_SUBMIT_JOB' return { @@ -76,24 +81,11 @@ def get_map_state(job_step: dict, job_spec: dict) -> dict: } -def parse_job_step_map(job_step_map: str) -> tuple[str, str]: - tokens = job_step_map.split(' ') - assert len(tokens) == 4 - assert tokens[0], tokens[2] == ('for', 'in') - return tokens[1], tokens[3] - - -def get_batch_job_parameters(item: str, items: str, job_spec: dict) -> dict: - batch_job_parameters = { - f'{param}.$': f'$.batch_job_parameters.{param}' - for param in job_spec['parameters'] - if param != items - } - batch_job_parameters[f'{item}.$'] = '$$.Map.Item.Value' - return batch_job_parameters - - -def get_batch_submit_job_state(job_step: dict) -> dict: +def get_batch_submit_job_state(job_spec: dict, job_step: dict, excluded_parameters: set[str] = None) -> dict: + if not excluded_parameters: + batch_job_parameters = '$.batch_job_parameters' + else: + batch_job_parameters = get_batch_job_parameters(job_spec, excluded_parameters) if 'import' in job_step['compute_environment']: compute_environment = job_step['compute_environment']['import'] else: @@ -108,7 +100,7 @@ def get_batch_submit_job_state(job_step: dict) -> dict: 'JobQueue': '${' + job_queue + '}', 'ShareIdentifier': 'default', 'SchedulingPriorityOverride.$': '$.priority', - 'Parameters.$': '$.batch_job_parameters', + 'Parameters.$': batch_job_parameters, 'ContainerOverrides.$': '$.container_overrides', 'RetryStrategy': { 'Attempts': 3 @@ -136,6 +128,21 @@ def get_batch_submit_job_state(job_step: dict) -> dict: } +def parse_job_step_map(job_step_map: str) -> tuple[str, str]: + tokens = job_step_map.split(' ') + assert len(tokens) == 4 + assert tokens[0], tokens[2] == ('for', 'in') + return tokens[1], tokens[3] + + +def get_batch_job_parameters(job_spec: dict, excluded_parameters: set[str]) -> dict: + return { + f'{param}.$': f'$.batch_job_parameters.{param}' + for param in job_spec['parameters'] + if param not in excluded_parameters + } + + def render_templates(job_types, compute_envs, security_environment, api_name): job_states = get_states_for_jobs(job_types) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index e821c8591..564abda2f 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -73,6 +73,8 @@ SRG_TIME_SERIES: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - name: '' + exclude_parameters: + - granules image: ghcr.io/asfhyp3/hyp3-srg # TODO image tag image_tag: 0.8.1.dev47_g2d96d14.cpu From 7585feda066546521a5c04a9cefd701e97bf2ee0 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 16 Oct 2024 10:28:33 -0800 Subject: [PATCH 100/163] fix typo --- apps/render_cf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index e80746b95..ec0de929e 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -30,7 +30,7 @@ def get_states_for_job(job_spec: dict) -> dict: def get_state_for_job_step(job_step: dict, index: int, next_state_name: str, job_spec: dict) -> dict: - excluded_parameters = set(job_step.get('excluded_parameters', [])) + excluded_parameters = set(job_step.get('exclude_parameters', [])) if 'map' in job_step: state = get_map_state(job_spec, job_step, excluded_parameters) else: From 0ba47e36aab3d1c76ede21f349fdf360a86b7384 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 16 Oct 2024 10:52:35 -0800 Subject: [PATCH 101/163] fix step function batch params key --- apps/render_cf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index ec0de929e..d7846078c 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -84,8 +84,10 @@ def get_map_state(job_spec: dict, job_step: dict, excluded_parameters: set[str]) def get_batch_submit_job_state(job_spec: dict, job_step: dict, excluded_parameters: set[str] = None) -> dict: if not excluded_parameters: batch_job_parameters = '$.batch_job_parameters' + parameters_key = 'Parameters.$' else: batch_job_parameters = get_batch_job_parameters(job_spec, excluded_parameters) + parameters_key = 'Parameters' if 'import' in job_step['compute_environment']: compute_environment = job_step['compute_environment']['import'] else: @@ -100,7 +102,7 @@ def get_batch_submit_job_state(job_spec: dict, job_step: dict, excluded_paramete 'JobQueue': '${' + job_queue + '}', 'ShareIdentifier': 'default', 'SchedulingPriorityOverride.$': '$.priority', - 'Parameters.$': batch_job_parameters, + parameters_key: batch_job_parameters, 'ContainerOverrides.$': '$.container_overrides', 'RetryStrategy': { 'Attempts': 3 From 865308b47da0d2908ddf999214fcef9008ab278f Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 16 Oct 2024 12:00:50 -0800 Subject: [PATCH 102/163] relative orbits unit test is failing --- tests/test_api/test_validation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_api/test_validation.py b/tests/test_api/test_validation.py index 2a85600a1..bbd40be87 100644 --- a/tests/test_api/test_validation.py +++ b/tests/test_api/test_validation.py @@ -538,7 +538,8 @@ def test_check_granules_intersecting_bounds(): validation.check_granules_intersecting_bounds(job_with_default_bounds, invalid_granule_metadata) -def check_same_relative_orbits(): +def test_check_same_relative_orbits(): + # FIXME: this test fails valid_granule_metadata = [ {'name': 'S1A_IW_RAW__0SDV_20201015T161622_20201015T161654_034809_040E95_AF3C'}, {'name': 'S1A_IW_RAW__0SDV_20200816T161620_20200816T161652_033934_03EFCE_5730'}, From 83a374a5b181799a8ff7340509d2eb0d53d82c77 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 16 Oct 2024 13:47:50 -0800 Subject: [PATCH 103/163] automatically filter batch params in step function --- apps/render_cf.py | 33 ++++++++++++++++++++------------- job_spec/SRG_TIME_SERIES.yml | 2 -- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index d7846078c..c908f9f40 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -30,11 +30,10 @@ def get_states_for_job(job_spec: dict) -> dict: def get_state_for_job_step(job_step: dict, index: int, next_state_name: str, job_spec: dict) -> dict: - excluded_parameters = set(job_step.get('exclude_parameters', [])) if 'map' in job_step: - state = get_map_state(job_spec, job_step, excluded_parameters) + state = get_map_state(job_spec, job_step) else: - state = get_batch_submit_job_state(job_spec, job_step, excluded_parameters) + state = get_batch_submit_job_state(job_spec, job_step, filter_batch_params=True) state.update( { 'Catch': [ @@ -53,12 +52,10 @@ def get_state_for_job_step(job_step: dict, index: int, next_state_name: str, job return state -def get_map_state(job_spec: dict, job_step: dict, excluded_parameters: set[str]) -> dict: +def get_map_state(job_spec: dict, job_step: dict) -> dict: item, items = parse_job_step_map(job_step['map']) - excluded_parameters.add(items) - batch_job_parameters = get_batch_job_parameters(job_spec, excluded_parameters) - batch_job_parameters[f'{item}.$'] = '$$.Map.Item.Value' + batch_job_parameters = get_batch_job_parameters(job_spec, job_step, map_item=item) submit_job_state = get_batch_submit_job_state(job_spec, job_step) submit_job_state['End'] = True @@ -81,12 +78,12 @@ def get_map_state(job_spec: dict, job_step: dict, excluded_parameters: set[str]) } -def get_batch_submit_job_state(job_spec: dict, job_step: dict, excluded_parameters: set[str] = None) -> dict: - if not excluded_parameters: +def get_batch_submit_job_state(job_spec: dict, job_step: dict, filter_batch_params = False) -> dict: + if not filter_batch_params: batch_job_parameters = '$.batch_job_parameters' parameters_key = 'Parameters.$' else: - batch_job_parameters = get_batch_job_parameters(job_spec, excluded_parameters) + batch_job_parameters = get_batch_job_parameters(job_spec, job_step) parameters_key = 'Parameters' if 'import' in job_step['compute_environment']: compute_environment = job_step['compute_environment']['import'] @@ -137,12 +134,22 @@ def parse_job_step_map(job_step_map: str) -> tuple[str, str]: return tokens[1], tokens[3] -def get_batch_job_parameters(job_spec: dict, excluded_parameters: set[str]) -> dict: - return { +def get_batch_job_parameters(job_spec: dict, job_step: dict, map_item: str = None) -> dict: + ref_prefix = 'Ref::' + param_names = { + arg.removeprefix(ref_prefix) + for arg in job_step['command'] + if arg.startswith(ref_prefix) + } + batch_params = { f'{param}.$': f'$.batch_job_parameters.{param}' for param in job_spec['parameters'] - if param not in excluded_parameters + if param in param_names } + if map_item is not None: + assert map_item in param_names + batch_params[f'{map_item}.$'] = '$$.Map.Item.Value' + return batch_params def render_templates(job_types, compute_envs, security_environment, api_name): diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 38e9e5456..21b0b43e0 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -74,8 +74,6 @@ SRG_TIME_SERIES: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - name: '' - exclude_parameters: - - granules image: ghcr.io/asfhyp3/hyp3-srg # TODO image tag image_tag: 0.8.1.dev47_g2d96d14.cpu From e5a731d8075bc9d8cb6c4bee9d77c5a15e7817a4 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 16 Oct 2024 13:56:21 -0800 Subject: [PATCH 104/163] readability --- apps/render_cf.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index c908f9f40..07c27c6a0 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -79,16 +79,18 @@ def get_map_state(job_spec: dict, job_step: dict) -> dict: def get_batch_submit_job_state(job_spec: dict, job_step: dict, filter_batch_params = False) -> dict: - if not filter_batch_params: - batch_job_parameters = '$.batch_job_parameters' - parameters_key = 'Parameters.$' - else: + if filter_batch_params: batch_job_parameters = get_batch_job_parameters(job_spec, job_step) parameters_key = 'Parameters' + else: + batch_job_parameters = '$.batch_job_parameters' + parameters_key = 'Parameters.$' + if 'import' in job_step['compute_environment']: compute_environment = job_step['compute_environment']['import'] else: compute_environment = job_step['compute_environment']['name'] + job_queue = 'JobQueueArn' if compute_environment == 'Default' else compute_environment + 'JobQueueArn' return { 'Type': 'Task', From 5de24c86fbd751bf0fcb5ff646db5c2cc96252bd Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 16 Oct 2024 14:51:46 -0800 Subject: [PATCH 105/163] filter stringified batch params in start_execution_worker --- apps/render_cf.py | 28 +++++++++++++++---- .../src/start_execution_worker.py | 14 ++++++++-- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 07c27c6a0..143a22870 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -137,12 +137,7 @@ def parse_job_step_map(job_step_map: str) -> tuple[str, str]: def get_batch_job_parameters(job_spec: dict, job_step: dict, map_item: str = None) -> dict: - ref_prefix = 'Ref::' - param_names = { - arg.removeprefix(ref_prefix) - for arg in job_step['command'] - if arg.startswith(ref_prefix) - } + param_names = get_batch_param_names_for_job_step(job_step) batch_params = { f'{param}.$': f'$.batch_job_parameters.{param}' for param in job_spec['parameters'] @@ -154,6 +149,15 @@ def get_batch_job_parameters(job_spec: dict, job_step: dict, map_item: str = Non return batch_params +def get_batch_param_names_for_job_step(job_step: dict) -> set[str]: + ref_prefix = 'Ref::' + return { + arg.removeprefix(ref_prefix) + for arg in job_step['command'] + if arg.startswith(ref_prefix) + } + + def render_templates(job_types, compute_envs, security_environment, api_name): job_states = get_states_for_jobs(job_types) @@ -230,6 +234,17 @@ def get_compute_environments(job_types: dict, compute_env_file: Optional[Path]) return compute_envs +def render_batch_params_by_job_type(job_types: dict) -> None: + batch_params_by_job_type = {} + for job_type, job_spec in job_types.items(): + params = set() + for job_step in job_spec['steps']: + params.update(get_batch_param_names_for_job_step(job_step)) + batch_params_by_job_type[job_type] = list(params) + with (Path('apps') / 'start-execution-worker' / 'src' / 'batch_params_by_job_type.json').open('w') as f: + json.dump(batch_params_by_job_type, f, indent=2) + + def render_default_params_by_job_type(job_types: dict) -> None: default_params_by_job_type = { job_type: { @@ -273,6 +288,7 @@ def main(): compute_envs = get_compute_environments(job_types, args.compute_environment_file) + render_batch_params_by_job_type(job_types) render_default_params_by_job_type(job_types) render_costs(job_types, args.cost_profile) render_templates(job_types, compute_envs, args.security_environment, args.api_name) diff --git a/apps/start-execution-worker/src/start_execution_worker.py b/apps/start-execution-worker/src/start_execution_worker.py index 7c314ea5d..8d1caa168 100644 --- a/apps/start-execution-worker/src/start_execution_worker.py +++ b/apps/start-execution-worker/src/start_execution_worker.py @@ -1,5 +1,6 @@ import json import os +from pathlib import Path from typing import Any import boto3 @@ -8,6 +9,9 @@ STEP_FUNCTION = boto3.client('stepfunctions') +batch_params_file = Path(__file__).parent / 'batch_params_by_job_type.json' +BATCH_PARAMS_BY_JOB_TYPE = json.loads(batch_params_file.read_text()) + def convert_to_string(obj: Any) -> str: if isinstance(obj, list): @@ -15,8 +19,12 @@ def convert_to_string(obj: Any) -> str: return str(obj) -def convert_parameters_to_strings(parameters: dict[str, Any]) -> dict[str, str]: - return {key: convert_to_string(value) for key, value in parameters.items()} +def get_batch_job_parameters(job: dict) -> dict[str, str]: + return { + key: convert_to_string(value) + for key, value in job['job_parameters'].items() + if key in BATCH_PARAMS_BY_JOB_TYPE[job['job_type']] + } def submit_jobs(jobs: list[dict]) -> None: @@ -25,7 +33,7 @@ def submit_jobs(jobs: list[dict]) -> None: for job in jobs: # Convert parameters to strings so they can be passed to Batch; see: # https://docs.aws.amazon.com/batch/latest/APIReference/API_SubmitJob.html#Batch-SubmitJob-request-parameters - job['batch_job_parameters'] = convert_parameters_to_strings(job['job_parameters']) + job['batch_job_parameters'] = get_batch_job_parameters(job['job_parameters']) STEP_FUNCTION.start_execution( stateMachineArn=step_function_arn, input=json.dumps(job, sort_keys=True), From dfb93fa46085ae65858e4a43eb74b97ae9ccc34a Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 16 Oct 2024 14:58:48 -0800 Subject: [PATCH 106/163] pass entire job --- apps/start-execution-worker/src/start_execution_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/start-execution-worker/src/start_execution_worker.py b/apps/start-execution-worker/src/start_execution_worker.py index 8d1caa168..53e2cf56e 100644 --- a/apps/start-execution-worker/src/start_execution_worker.py +++ b/apps/start-execution-worker/src/start_execution_worker.py @@ -33,7 +33,7 @@ def submit_jobs(jobs: list[dict]) -> None: for job in jobs: # Convert parameters to strings so they can be passed to Batch; see: # https://docs.aws.amazon.com/batch/latest/APIReference/API_SubmitJob.html#Batch-SubmitJob-request-parameters - job['batch_job_parameters'] = get_batch_job_parameters(job['job_parameters']) + job['batch_job_parameters'] = get_batch_job_parameters(job) STEP_FUNCTION.start_execution( stateMachineArn=step_function_arn, input=json.dumps(job, sort_keys=True), From ef1efd113d162866dc2de4149ec32e7780ec858b Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 16 Oct 2024 15:04:42 -0800 Subject: [PATCH 107/163] gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f1e057d6a..d37283ec2 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ apps/api/src/hyp3_api/api-spec/job_parameters.yml apps/api/src/hyp3_api/job_validation_map.yml apps/step-function.json apps/**/*-cf.yml +apps/start-execution-worker/src/batch_params_by_job_type.json lib/dynamo/dynamo/*.json lib/dynamo/dynamo/*.yml From 88f2f06f668b3762d307d3e18c5e6d97932000c1 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 17 Oct 2024 12:51:37 -0400 Subject: [PATCH 108/163] test fix --- tests/test_api/test_validation.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_api/test_validation.py b/tests/test_api/test_validation.py index bbd40be87..01912dda3 100644 --- a/tests/test_api/test_validation.py +++ b/tests/test_api/test_validation.py @@ -539,18 +539,17 @@ def test_check_granules_intersecting_bounds(): def test_check_same_relative_orbits(): - # FIXME: this test fails valid_granule_metadata = [ {'name': 'S1A_IW_RAW__0SDV_20201015T161622_20201015T161654_034809_040E95_AF3C'}, {'name': 'S1A_IW_RAW__0SDV_20200816T161620_20200816T161652_033934_03EFCE_5730'}, {'name': 'S1B_IW_RAW__0SDV_20200810T161537_20200810T161610_022863_02B66A_F7D7'}, {'name': 'S1B_IW_RAW__0SDV_20200623T161535_20200623T161607_022163_02A10F_7FD6'} ] - invalid_granule_metadata = valid_granule_metadata + invalid_granule_metadata = valid_granule_metadata.copy() invalid_granule_metadata.append( {'name': 'S1B_IW_RAW__0SDV_20200623T161535_20200623T161607_012345_02A10F_7FD6'} ) validation.check_same_relative_orbits({}, valid_granule_metadata) - error_pattern = r'.*23 is not 87.*' + error_pattern = r'.*69 is not 87.*' with raises(validation.GranuleValidationError, match=error_pattern): validation.check_same_relative_orbits({}, invalid_granule_metadata) From 94eb5ec15472bdcaa8867eac9cf8774fcde9f083 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 09:52:53 -0800 Subject: [PATCH 109/163] remove job definition default param values --- apps/workflow-cf.yml.j2 | 4 ---- 1 file changed, 4 deletions(-) diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index 4db02e93b..1f53a4392 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -49,10 +49,6 @@ Resources: Type: AWS::Batch::JobDefinition Properties: Type: container - Parameters: - {% for k, v in job_spec['parameters'].items() %} - {{ k }}: {{ v.get('default') or v['api_schema'].get('default') }} - {% endfor %} ContainerProperties: Image: {% if 'image_tag' in job_step -%} "{{ job_step['image'] }}:{{ job_step['image_tag'] }}" From 8e54a2a0e9a92507ae9a8c084d8128813297cdfc Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 10:06:41 -0800 Subject: [PATCH 110/163] try removing non-api-schema default job params --- apps/render_cf.py | 2 +- job_spec/INSAR_ISCE_BURST.yml | 3 --- job_spec/SRG_TIME_SERIES.yml | 4 ---- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 143a22870..131e6004a 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -249,7 +249,7 @@ def render_default_params_by_job_type(job_types: dict) -> None: default_params_by_job_type = { job_type: { key: value['api_schema']['default'] for key, value in job_spec['parameters'].items() - if key not in job_spec['required_parameters'] and 'api_schema' in value + if key not in job_spec['required_parameters'] } for job_type, job_spec in job_types.items() } diff --git a/job_spec/INSAR_ISCE_BURST.yml b/job_spec/INSAR_ISCE_BURST.yml index 1ae8de7f9..8a464acc6 100644 --- a/job_spec/INSAR_ISCE_BURST.yml +++ b/job_spec/INSAR_ISCE_BURST.yml @@ -3,7 +3,6 @@ INSAR_ISCE_BURST: - granules parameters: granules: - default: '""' api_schema: type: array minItems: 2 @@ -18,8 +17,6 @@ INSAR_ISCE_BURST: minLength: 43 maxLength: 43 example: S1_136231_IW2_20200604T022312_VV_7C85-BURST - bucket_prefix: - default: '""' apply_water_mask: api_schema: description: Sets pixels over coastal and large inland waterbodies as invalid for phase unwrapping. diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 21b0b43e0..7d9735e57 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -3,7 +3,6 @@ SRG_TIME_SERIES: - granules parameters: granules: - default: '""' api_schema: type: array minItems: 1 @@ -19,10 +18,7 @@ SRG_TIME_SERIES: minLength: 67 maxLength: 67 example: S1A_IW_RAW__0SDV_20240629T020812_20240629T020845_054532_06A2F8_8276 - bucket_prefix: - default: '""' bounds: - default: '""' api_schema: type: array description: Bounds for extent of processing, formatted like [min lon, min lat, max lon, max lat] in EPSG:4326. Setting to [0, 0, 0, 0] will use the extent of the first granule. From 0b171b54f442efe737f48ff9d1dfddb029ebb8d2 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 10:08:45 -0800 Subject: [PATCH 111/163] fix SRG_GSLC --- job_spec/SRG_GSLC.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index ccff7c84e..0ac0f690c 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -3,7 +3,6 @@ SRG_GSLC: - granules parameters: granules: - default: '""' api_schema: type: array minItems: 1 @@ -18,10 +17,7 @@ SRG_GSLC: minLength: 67 maxLength: 67 example: S1A_IW_RAW__0SDV_20231229T134404_20231229T134436_051870_064437_5F38 - bucket_prefix: - default: '""' bounds: - default: '""' api_schema: type: array description: Bounds for extent of processing, formatted like [min lon, min lat, max lon, max lat] in EPSG:4326. Setting to [0, 0, 0, 0] will use the extent of the first granule. From 06422a2a435f0c185ec898fc7d374ba2465cf23b Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 10:33:04 -0800 Subject: [PATCH 112/163] include bucket_prefix in batch params --- apps/render_cf.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 131e6004a..fd356e0ed 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -137,14 +137,15 @@ def parse_job_step_map(job_step_map: str) -> tuple[str, str]: def get_batch_job_parameters(job_spec: dict, job_step: dict, map_item: str = None) -> dict: - param_names = get_batch_param_names_for_job_step(job_step) + job_params = ['bucket_prefix', *job_spec['parameters'].keys()] + step_params = get_batch_param_names_for_job_step(job_step) batch_params = { f'{param}.$': f'$.batch_job_parameters.{param}' - for param in job_spec['parameters'] - if param in param_names + for param in job_params + if param in step_params } if map_item is not None: - assert map_item in param_names + assert map_item in step_params batch_params[f'{map_item}.$'] = '$$.Map.Item.Value' return batch_params From 7e2502e5ed0136d322760649f294740441e06226 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 10:34:30 -0800 Subject: [PATCH 113/163] assume all params have api_schema in api spec template --- apps/api/src/hyp3_api/api-spec/job_parameters.yml.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/hyp3_api/api-spec/job_parameters.yml.j2 b/apps/api/src/hyp3_api/api-spec/job_parameters.yml.j2 index 0351fbf7a..83ea5c963 100644 --- a/apps/api/src/hyp3_api/api-spec/job_parameters.yml.j2 +++ b/apps/api/src/hyp3_api/api-spec/job_parameters.yml.j2 @@ -26,13 +26,13 @@ components: description: Parameters for running {{ job_type }} jobs type: object additionalProperties: false - {% for parameter, parameter_spec in job_spec['parameters'].items() if 'api_schema' in parameter_spec and parameter in job_spec.get('required_parameters', []) %} + {% for parameter, parameter_spec in job_spec['parameters'].items() if parameter in job_spec.get('required_parameters', []) %} {% if loop.first %} required: {% endif %} - {{ parameter }} {% endfor %} - {% for parameter, parameter_spec in job_spec['parameters'].items() if 'api_schema' in parameter_spec %} + {% for parameter, parameter_spec in job_spec['parameters'].items() %} {% if loop.first %} properties: {% endif %} From 98727e65e4091f16073167310c80189a82a8e921 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 10:58:31 -0800 Subject: [PATCH 114/163] finish removing non-api-schema defaults from job specs --- job_spec/ARIA_AUTORIFT.yml | 3 --- job_spec/ARIA_RAIDER.yml | 2 -- job_spec/AUTORIFT.yml | 3 --- job_spec/AUTORIFT_ITS_LIVE.yml | 3 --- job_spec/INSAR_GAMMA.yml | 3 --- job_spec/INSAR_ISCE.yml | 4 ---- job_spec/INSAR_ISCE_MULTI_BURST.yml | 4 ---- job_spec/RTC_GAMMA.yml | 3 --- job_spec/S1_CORRECTION_ITS_LIVE.yml | 3 --- job_spec/WATER_MAP.yml | 3 --- job_spec/WATER_MAP_EQ.yml | 3 --- 11 files changed, 34 deletions(-) diff --git a/job_spec/ARIA_AUTORIFT.yml b/job_spec/ARIA_AUTORIFT.yml index 09e4f7d02..fcfe08748 100644 --- a/job_spec/ARIA_AUTORIFT.yml +++ b/job_spec/ARIA_AUTORIFT.yml @@ -3,7 +3,6 @@ AUTORIFT: - granules parameters: granules: - default: '""' api_schema: type: array minItems: 2 @@ -31,8 +30,6 @@ AUTORIFT: minLength: 40 maxLength: 40 example: LC08_L1GT_118112_20210107_20210107_02_T2 - bucket_prefix: - default: '""' parameter_file: api_schema: description: Shapefile for determining the correct search parameters by geographic location. Path to shapefile must be understood by GDAL. diff --git a/job_spec/ARIA_RAIDER.yml b/job_spec/ARIA_RAIDER.yml index 823b2478b..951491f5a 100644 --- a/job_spec/ARIA_RAIDER.yml +++ b/job_spec/ARIA_RAIDER.yml @@ -19,8 +19,6 @@ ARIA_RAIDER: - GMAO - HRES - HRRR - bucket_prefix: - default: '""' cost_profiles: DEFAULT: cost: 1.0 diff --git a/job_spec/AUTORIFT.yml b/job_spec/AUTORIFT.yml index 20b108b64..7797fd52a 100644 --- a/job_spec/AUTORIFT.yml +++ b/job_spec/AUTORIFT.yml @@ -3,7 +3,6 @@ AUTORIFT: - granules parameters: granules: - default: '""' api_schema: type: array minItems: 2 @@ -31,8 +30,6 @@ AUTORIFT: minLength: 40 maxLength: 40 example: LC08_L1GT_118112_20210107_20210107_02_T2 - bucket_prefix: - default: '""' cost_profiles: EDC: cost: 25.0 diff --git a/job_spec/AUTORIFT_ITS_LIVE.yml b/job_spec/AUTORIFT_ITS_LIVE.yml index ab6f0fd2d..ecfeafc74 100644 --- a/job_spec/AUTORIFT_ITS_LIVE.yml +++ b/job_spec/AUTORIFT_ITS_LIVE.yml @@ -3,7 +3,6 @@ AUTORIFT: - granules parameters: granules: - default: '""' api_schema: type: array minItems: 2 @@ -31,8 +30,6 @@ AUTORIFT: minLength: 40 maxLength: 40 example: LC08_L1GT_118112_20210107_20210107_02_T2 - bucket_prefix: - default: '""' parameter_file: api_schema: description: Shapefile for determining the correct search parameters by geographic location. Path to shapefile must be understood by GDAL. diff --git a/job_spec/INSAR_GAMMA.yml b/job_spec/INSAR_GAMMA.yml index 5fbc01510..37f4c5fa0 100644 --- a/job_spec/INSAR_GAMMA.yml +++ b/job_spec/INSAR_GAMMA.yml @@ -3,7 +3,6 @@ INSAR_GAMMA: - granules parameters: granules: - default: '""' api_schema: type: array minItems: 2 @@ -18,8 +17,6 @@ INSAR_GAMMA: minLength: 67 maxLength: 67 example: S1A_IW_SLC__1SSV_20150621T120220_20150621T120232_006471_008934_72D8 - bucket_prefix: - default: '""' include_look_vectors: api_schema: description: Include the look vector theta and phi files in the product package diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index f1b6a1625..2ef7dd3fb 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -5,7 +5,6 @@ INSAR_ISCE: - frame_id parameters: granules: - default: '""' api_schema: type: array minItems: 1 @@ -17,7 +16,6 @@ INSAR_ISCE: maxLength: 67 example: S1B_IW_SLC__1SDV_20210723T014947_20210723T015014_027915_0354B4_B3A9 secondary_granules: - default: '""' api_schema: type: array minItems: 1 @@ -86,8 +84,6 @@ INSAR_ISCE: - GMAO - HRES - HRRR - bucket_prefix: - default: '""' cost_profiles: DEFAULT: cost: 1.0 diff --git a/job_spec/INSAR_ISCE_MULTI_BURST.yml b/job_spec/INSAR_ISCE_MULTI_BURST.yml index 9ade842ec..289421c4b 100644 --- a/job_spec/INSAR_ISCE_MULTI_BURST.yml +++ b/job_spec/INSAR_ISCE_MULTI_BURST.yml @@ -4,7 +4,6 @@ INSAR_ISCE_MULTI_BURST: - secondary parameters: reference: - default: '""' api_schema: type: array minItems: 1 @@ -20,7 +19,6 @@ INSAR_ISCE_MULTI_BURST: maxLength: 43 example: S1_136231_IW2_20200604T022312_VV_7C85-BURST secondary: - default: '""' api_schema: type: array minItems: 1 @@ -34,8 +32,6 @@ INSAR_ISCE_MULTI_BURST: minLength: 43 maxLength: 43 example: S1_136231_IW2_20200616T022313_VV_5D11-BURST - bucket_prefix: - default: '""' apply_water_mask: api_schema: description: Sets pixels over coastal and large inland waterbodies as invalid for phase unwrapping. diff --git a/job_spec/RTC_GAMMA.yml b/job_spec/RTC_GAMMA.yml index e0b3a867e..dcc5a8496 100644 --- a/job_spec/RTC_GAMMA.yml +++ b/job_spec/RTC_GAMMA.yml @@ -3,7 +3,6 @@ RTC_GAMMA: - granules parameters: granules: - default: '""' api_schema: type: array minItems: 1 @@ -24,8 +23,6 @@ RTC_GAMMA: minLength: 67 maxLength: 67 example: S1A_IW_SLC__1SSV_20150621T120220_20150621T120232_006471_008934_72D8 - bucket_prefix: - default: '""' resolution: api_schema: default: 30.0 diff --git a/job_spec/S1_CORRECTION_ITS_LIVE.yml b/job_spec/S1_CORRECTION_ITS_LIVE.yml index 3c956e9ee..16bdb9944 100644 --- a/job_spec/S1_CORRECTION_ITS_LIVE.yml +++ b/job_spec/S1_CORRECTION_ITS_LIVE.yml @@ -3,7 +3,6 @@ S1_CORRECTION_TEST: - granules parameters: granules: - default: '""' api_schema: type: array minItems: 1 @@ -23,8 +22,6 @@ S1_CORRECTION_TEST: default: 0 type: integer minimum: 0 - bucket_prefix: - default: '""' cost_profiles: DEFAULT: cost: 1.0 diff --git a/job_spec/WATER_MAP.yml b/job_spec/WATER_MAP.yml index 0501820fd..599a54a1b 100644 --- a/job_spec/WATER_MAP.yml +++ b/job_spec/WATER_MAP.yml @@ -3,7 +3,6 @@ WATER_MAP: - granules parameters: granules: - default: '""' api_schema: type: array minItems: 1 @@ -22,8 +21,6 @@ WATER_MAP: minLength: 67 maxLength: 67 example: S1A_IW_SLC__1SDV_20211110T234815_20211110T234842_040516_04CE0A_E717 - bucket_prefix: - default: '""' resolution: api_schema: default: 30.0 diff --git a/job_spec/WATER_MAP_EQ.yml b/job_spec/WATER_MAP_EQ.yml index 9c6207cfd..37561e499 100644 --- a/job_spec/WATER_MAP_EQ.yml +++ b/job_spec/WATER_MAP_EQ.yml @@ -3,7 +3,6 @@ WATER_MAP_EQ: - granules parameters: granules: - default: '""' api_schema: type: array minItems: 1 @@ -22,8 +21,6 @@ WATER_MAP_EQ: minLength: 67 maxLength: 67 example: S1A_IW_SLC__1SDV_20211110T234815_20211110T234842_040516_04CE0A_E717 - bucket_prefix: - default: '""' resolution: api_schema: default: 30.0 From f91914969afc64ebcde2b3b1c9f5aeb3d87502d7 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 11:05:22 -0800 Subject: [PATCH 115/163] image tag --- job_spec/SRG_TIME_SERIES.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 7d9735e57..7efa865f5 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -47,7 +47,7 @@ SRG_TIME_SERIES: image: ghcr.io/asfhyp3/hyp3-srg # TODO revert image_tag #image_tag: latest.gpu - image_tag: 0.8.1.dev47_g2d96d14.gpu + image_tag: 0.8.1.dev50_gf0b8347.gpu command: - ++process - back_projection @@ -72,7 +72,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO image tag - image_tag: 0.8.1.dev47_g2d96d14.cpu + image_tag: 0.8.1.dev50_gf0b8347.cpu command: - ++process - time_series From 6cdec5e76718f438a1b0f544f0c6a6542f18491a Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 13:33:42 -0800 Subject: [PATCH 116/163] fix check_processing_time unit tests --- .../src/check_processing_time.py | 1 - tests/test_check_processing_time.py | 36 +++++++------------ 2 files changed, 12 insertions(+), 25 deletions(-) diff --git a/apps/check-processing-time/src/check_processing_time.py b/apps/check-processing-time/src/check_processing_time.py index c0e1d16ac..bb5456e13 100644 --- a/apps/check-processing-time/src/check_processing_time.py +++ b/apps/check-processing-time/src/check_processing_time.py @@ -15,7 +15,6 @@ def get_time_from_result(result: Union[list, dict]) -> Union[list, float]: return [get_time_from_result(item) for item in result] if 'start' in result: - # TODO: update tests attempts = [{'StartedAt': start, 'StoppedAt': stop} for start, stop in zip(result['start'], result['stop'])] return get_time_from_attempts(attempts) diff --git a/tests/test_check_processing_time.py b/tests/test_check_processing_time.py index 7f1597732..9ad6d1e90 100644 --- a/tests/test_check_processing_time.py +++ b/tests/test_check_processing_time.py @@ -41,10 +41,8 @@ def test_no_attempts(): def test_get_time_from_result(): result = { - 'Attempts': [ - {'Container': {}, 'StartedAt': 500, 'StatusReason': '', 'StoppedAt': 1000}, - {'Container': {}, 'StartedAt': 3000, 'StatusReason': '', 'StoppedAt': 8700} - ] + 'start': [500, 3000], + 'stop': [1000, 8700], } assert check_processing_time.get_time_from_result(result) == 5.7 @@ -52,16 +50,12 @@ def test_get_time_from_result(): def test_get_time_from_result_list(): result = [ { - 'Attempts': [ - {'Container': {}, 'StartedAt': 500, 'StatusReason': '', 'StoppedAt': 1000}, - {'Container': {}, 'StartedAt': 3000, 'StatusReason': '', 'StoppedAt': 8900} - ] + 'start': [500, 3000], + 'stop': [1000, 8900], }, { - 'Attempts': [ - {'Container': {}, 'StartedAt': 500, 'StatusReason': '', 'StoppedAt': 3000}, - {'Container': {}, 'StartedAt': 4000, 'StatusReason': '', 'StoppedAt': 4200} - ] + 'start': [500, 4000], + 'stop': [3000, 4200], }, ] assert check_processing_time.get_time_from_result(result) == [5.9, 0.2] @@ -82,10 +76,8 @@ def test_lambda_handler(): event = { 'processing_results': { 'step_0': { - 'Attempts': [ - {'Container': {}, 'StartedAt': 500, 'StatusReason': '', 'StoppedAt': 1000}, - {'Container': {}, 'StartedAt': 3000, 'StatusReason': '', 'StoppedAt': 8700} - ] + 'start': [500, 3000], + 'stop': [1000, 8700], }, 'step_1': { 'Error': 'States.TaskFailed', @@ -96,16 +88,12 @@ def test_lambda_handler(): }, 'step_2': [ { - 'Attempts': [ - {'Container': {}, 'StartedAt': 500, 'StatusReason': '', 'StoppedAt': 1000}, - {'Container': {}, 'StartedAt': 3000, 'StatusReason': '', 'StoppedAt': 8900} - ] + 'start': [500, 3000], + 'stop': [1000, 8900], }, { - 'Attempts': [ - {'Container': {}, 'StartedAt': 500, 'StatusReason': '', 'StoppedAt': 3000}, - {'Container': {}, 'StartedAt': 4000, 'StatusReason': '', 'StoppedAt': 4200} - ] + 'start': [500, 4000], + 'stop': [3000, 4200], }, ] } From 01e4f435ccd9dedb5b8ef9b23fe4d3b57bcfb3b7 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 14:01:35 -0800 Subject: [PATCH 117/163] fix `test_start_execution_worker.py::test_submit_jobs` --- .../src/start_execution_worker.py | 6 +- tests/test_start_execution_worker.py | 98 +++++++++++++++++-- 2 files changed, 97 insertions(+), 7 deletions(-) diff --git a/apps/start-execution-worker/src/start_execution_worker.py b/apps/start-execution-worker/src/start_execution_worker.py index 53e2cf56e..97cb406b0 100644 --- a/apps/start-execution-worker/src/start_execution_worker.py +++ b/apps/start-execution-worker/src/start_execution_worker.py @@ -10,7 +10,11 @@ STEP_FUNCTION = boto3.client('stepfunctions') batch_params_file = Path(__file__).parent / 'batch_params_by_job_type.json' -BATCH_PARAMS_BY_JOB_TYPE = json.loads(batch_params_file.read_text()) +if batch_params_file.exists(): + BATCH_PARAMS_BY_JOB_TYPE = json.loads(batch_params_file.read_text()) +else: + # Allows mocking with unittest.mock.patch + BATCH_PARAMS_BY_JOB_TYPE = {} def convert_to_string(obj: Any) -> str: diff --git a/tests/test_start_execution_worker.py b/tests/test_start_execution_worker.py index c391286e9..4bd641612 100644 --- a/tests/test_start_execution_worker.py +++ b/tests/test_start_execution_worker.py @@ -31,9 +31,16 @@ def test_convert_parameters_to_string(): def test_submit_jobs(): + batch_params_by_job_type = { + 'JOB_0': ['granules', 'string_field', 'boolean_field', 'float_field', 'integer_field'], + 'JOB_1': ['string_field', 'boolean_field'], + 'JOB_2': [], + } + jobs = [ { 'job_id': 'job0', + 'job_type': 'JOB_0', 'string_field': 'value1', 'boolean_field': True, 'float_field': 10.1, @@ -51,13 +58,46 @@ def test_submit_jobs(): }, { 'job_id': 'job1', - 'job_parameters': {'granules': ['granule1']}, - } + 'job_type': 'JOB_1', + 'string_field': 'value1', + 'boolean_field': True, + 'float_field': 10.1, + 'integer_field': 10, + 'job_parameters': { + 'granules': [ + 'granule1', + 'granule2', + ], + 'string_field': 'value1', + 'boolean_field': True, + 'float_field': 10.1, + 'integer_field': 10, + }, + }, + { + 'job_id': 'job2', + 'job_type': 'JOB_2', + 'string_field': 'value1', + 'boolean_field': True, + 'float_field': 10.1, + 'integer_field': 10, + 'job_parameters': { + 'granules': [ + 'granule1', + 'granule2', + ], + 'string_field': 'value1', + 'boolean_field': True, + 'float_field': 10.1, + 'integer_field': 10, + }, + }, ] expected_input_job0 = json.dumps( { 'job_id': 'job0', + 'job_type': 'JOB_0', 'string_field': 'value1', 'boolean_field': True, 'float_field': 10.1, @@ -86,14 +126,55 @@ def test_submit_jobs(): expected_input_job1 = json.dumps( { 'job_id': 'job1', - 'job_parameters': {'granules': ['granule1']}, - 'batch_job_parameters': {'granules': 'granule1'}, + 'job_type': 'JOB_1', + 'string_field': 'value1', + 'boolean_field': True, + 'float_field': 10.1, + 'integer_field': 10, + 'job_parameters': { + 'granules': [ + 'granule1', + 'granule2', + ], + 'string_field': 'value1', + 'boolean_field': True, + 'float_field': 10.1, + 'integer_field': 10, + }, + 'batch_job_parameters': { + 'string_field': 'value1', + 'boolean_field': 'True', + }, + }, + sort_keys=True, + ) + + expected_input_job2 = json.dumps( + { + 'job_id': 'job2', + 'job_type': 'JOB_2', + 'string_field': 'value1', + 'boolean_field': True, + 'float_field': 10.1, + 'integer_field': 10, + 'job_parameters': { + 'granules': [ + 'granule1', + 'granule2', + ], + 'string_field': 'value1', + 'boolean_field': True, + 'float_field': 10.1, + 'integer_field': 10, + }, + 'batch_job_parameters': {}, }, sort_keys=True, ) with patch('start_execution_worker.STEP_FUNCTION.start_execution') as mock_start_execution, \ - patch.dict(os.environ, {'STEP_FUNCTION_ARN': 'test-state-machine-arn'}, clear=True): + patch.dict(os.environ, {'STEP_FUNCTION_ARN': 'test-state-machine-arn'}, clear=True), \ + patch('start_execution_worker.BATCH_PARAMS_BY_JOB_TYPE', batch_params_by_job_type): start_execution_worker.submit_jobs(jobs) assert mock_start_execution.mock_calls == [ @@ -106,7 +187,12 @@ def test_submit_jobs(): stateMachineArn='test-state-machine-arn', input=expected_input_job1, name='job1', - ) + ), + call( + stateMachineArn='test-state-machine-arn', + input=expected_input_job2, + name='job2', + ), ] From e19abb7a6f2e8334876873be32d84ee301d6bed0 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 14:06:11 -0800 Subject: [PATCH 118/163] move a comment, delete old unit test --- .../src/start_execution_worker.py | 4 ++-- tests/test_start_execution_worker.py | 17 ----------------- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/apps/start-execution-worker/src/start_execution_worker.py b/apps/start-execution-worker/src/start_execution_worker.py index 97cb406b0..63b3b2a72 100644 --- a/apps/start-execution-worker/src/start_execution_worker.py +++ b/apps/start-execution-worker/src/start_execution_worker.py @@ -24,6 +24,8 @@ def convert_to_string(obj: Any) -> str: def get_batch_job_parameters(job: dict) -> dict[str, str]: + # Convert parameters to strings so they can be passed to Batch; see: + # https://docs.aws.amazon.com/batch/latest/APIReference/API_SubmitJob.html#Batch-SubmitJob-request-parameters return { key: convert_to_string(value) for key, value in job['job_parameters'].items() @@ -35,8 +37,6 @@ def submit_jobs(jobs: list[dict]) -> None: step_function_arn = os.environ['STEP_FUNCTION_ARN'] logger.info(f'Step function ARN: {step_function_arn}') for job in jobs: - # Convert parameters to strings so they can be passed to Batch; see: - # https://docs.aws.amazon.com/batch/latest/APIReference/API_SubmitJob.html#Batch-SubmitJob-request-parameters job['batch_job_parameters'] = get_batch_job_parameters(job) STEP_FUNCTION.start_execution( stateMachineArn=step_function_arn, diff --git a/tests/test_start_execution_worker.py b/tests/test_start_execution_worker.py index 4bd641612..1a26d469b 100644 --- a/tests/test_start_execution_worker.py +++ b/tests/test_start_execution_worker.py @@ -13,23 +13,6 @@ def test_convert_to_string(): assert start_execution_worker.convert_to_string('abc') == 'abc' -def test_convert_parameters_to_string(): - parameters = { - 'param1': 1, - 'param2': True, - 'param3': [1, 2], - 'param4': ['abc', 'bcd'], - 'param5': 'abc', - } - assert start_execution_worker.convert_parameters_to_strings(parameters) == { - 'param1': '1', - 'param2': 'True', - 'param3': '1 2', - 'param4': 'abc bcd', - 'param5': 'abc', - } - - def test_submit_jobs(): batch_params_by_job_type = { 'JOB_0': ['granules', 'string_field', 'boolean_field', 'float_field', 'integer_field'], From 335457c383801a034150e72ea64037a91a7937d6 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 15:55:18 -0800 Subject: [PATCH 119/163] image tags --- job_spec/SRG_TIME_SERIES.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 7efa865f5..307275dd5 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -47,7 +47,7 @@ SRG_TIME_SERIES: image: ghcr.io/asfhyp3/hyp3-srg # TODO revert image_tag #image_tag: latest.gpu - image_tag: 0.8.1.dev50_gf0b8347.gpu + image_tag: 0.8.1.dev51_g6a0dec3.gpu command: - ++process - back_projection @@ -72,7 +72,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO image tag - image_tag: 0.8.1.dev50_gf0b8347.cpu + image_tag: 0.8.1.dev51_g6a0dec3.cpu command: - ++process - time_series From bd25c2bb76ad9dac94f28ecb88c51c10700e826d Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 16:50:40 -0800 Subject: [PATCH 120/163] image tags --- job_spec/SRG_TIME_SERIES.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 307275dd5..7efa865f5 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -47,7 +47,7 @@ SRG_TIME_SERIES: image: ghcr.io/asfhyp3/hyp3-srg # TODO revert image_tag #image_tag: latest.gpu - image_tag: 0.8.1.dev51_g6a0dec3.gpu + image_tag: 0.8.1.dev50_gf0b8347.gpu command: - ++process - back_projection @@ -72,7 +72,7 @@ SRG_TIME_SERIES: - name: '' image: ghcr.io/asfhyp3/hyp3-srg # TODO image tag - image_tag: 0.8.1.dev51_g6a0dec3.cpu + image_tag: 0.8.1.dev50_gf0b8347.cpu command: - ++process - time_series From aaa8f5cb1d4f77d80bacf05d3f7bff2b5a9634a2 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 17:41:51 -0800 Subject: [PATCH 121/163] validate job specs --- apps/render_cf.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/apps/render_cf.py b/apps/render_cf.py index fd356e0ed..80c40e3a9 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -270,6 +270,28 @@ def render_costs(job_types: dict, cost_profile: str) -> None: json.dump(costs, f, indent=2) +def validate_job_spec(job_type: str, job_spec: dict) -> None: + # Non-comprehensive job spec validator. More checks could be added if we want to be more thorough. + + expected_fields = sorted(['required_parameters', 'parameters', 'cost_profiles', 'validators', 'steps']) + actual_fields = sorted(job_spec.keys()) + if actual_fields != expected_fields: + raise ValueError(f'{job_type} has fields {actual_fields} but should have {expected_fields}') + + reserved_params = {'bucket_prefix'} + reserved_params_in_spec = reserved_params.intersection(set(job_spec['parameters'].keys())) + if reserved_params_in_spec: + raise ValueError(f'{job_type} contains reserved parameter names: {sorted(reserved_params_in_spec)}') + + expected_param_fields = ['api_schema'] + for param_name, param_dict in job_spec['parameters'].items(): + actual_param_fields = sorted(param_dict.keys()) + if actual_param_fields != expected_param_fields: + raise ValueError( + f"parameter '{param_name}' for {job_type} has fields {actual_param_fields} but should have {expected_param_fields}" + ) + + def main(): parser = argparse.ArgumentParser() parser.add_argument('-j', '--job-spec-files', required=True, nargs='+', type=Path) @@ -283,6 +305,9 @@ def main(): for file in args.job_spec_files: job_types.update(yaml.safe_load(file.read_text())) + for job_type, job_spec in job_types.items(): + validate_job_spec(job_type, job_spec) + for job_type, job_spec in job_types.items(): for job_step in job_spec['steps']: job_step['name'] = job_type + '_' + job_step['name'] if job_step['name'] else job_type From 2be9e83f8c887dfb441c25b5f9f5cc85040b21f2 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 18:30:11 -0800 Subject: [PATCH 122/163] job_step -> step --- apps/render_cf.py | 64 ++++++++++++++++++++--------------------- apps/workflow-cf.yml.j2 | 34 +++++++++++----------- 2 files changed, 49 insertions(+), 49 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 80c40e3a9..f102babe6 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -21,19 +21,19 @@ def get_states_for_jobs(job_types: dict) -> dict: def get_states_for_job(job_spec: dict) -> dict: states = {} - job_steps = job_spec['steps'] - for i in range(len(job_steps)): - job_step = job_steps[i] - next_state_name = job_steps[i + 1]['name'] if i < len(job_steps) - 1 else 'GET_FILES' - states[job_step['name']] = get_state_for_job_step(job_step, i, next_state_name, job_spec) + steps = job_spec['steps'] + for i in range(len(steps)): + step = steps[i] + next_state_name = steps[i + 1]['name'] if i < len(steps) - 1 else 'GET_FILES' + states[step['name']] = get_state_for_job_step(step, i, next_state_name, job_spec) return states -def get_state_for_job_step(job_step: dict, index: int, next_state_name: str, job_spec: dict) -> dict: - if 'map' in job_step: - state = get_map_state(job_spec, job_step) +def get_state_for_job_step(step: dict, index: int, next_state_name: str, job_spec: dict) -> dict: + if 'map' in step: + state = get_map_state(job_spec, step) else: - state = get_batch_submit_job_state(job_spec, job_step, filter_batch_params=True) + state = get_batch_submit_job_state(job_spec, step, filter_batch_params=True) state.update( { 'Catch': [ @@ -52,14 +52,14 @@ def get_state_for_job_step(job_step: dict, index: int, next_state_name: str, job return state -def get_map_state(job_spec: dict, job_step: dict) -> dict: - item, items = parse_job_step_map(job_step['map']) +def get_map_state(job_spec: dict, step: dict) -> dict: + item, items = parse_job_step_map(step['map']) - batch_job_parameters = get_batch_job_parameters(job_spec, job_step, map_item=item) + batch_job_parameters = get_batch_job_parameters(job_spec, step, map_item=item) - submit_job_state = get_batch_submit_job_state(job_spec, job_step) + submit_job_state = get_batch_submit_job_state(job_spec, step) submit_job_state['End'] = True - submit_job_state_name = job_step['name'] + '_SUBMIT_JOB' + submit_job_state_name = step['name'] + '_SUBMIT_JOB' return { 'Type': 'Map', 'ItemsPath': f'$.job_parameters.{items}', @@ -78,25 +78,25 @@ def get_map_state(job_spec: dict, job_step: dict) -> dict: } -def get_batch_submit_job_state(job_spec: dict, job_step: dict, filter_batch_params = False) -> dict: +def get_batch_submit_job_state(job_spec: dict, step: dict, filter_batch_params = False) -> dict: if filter_batch_params: - batch_job_parameters = get_batch_job_parameters(job_spec, job_step) + batch_job_parameters = get_batch_job_parameters(job_spec, step) parameters_key = 'Parameters' else: batch_job_parameters = '$.batch_job_parameters' parameters_key = 'Parameters.$' - if 'import' in job_step['compute_environment']: - compute_environment = job_step['compute_environment']['import'] + if 'import' in step['compute_environment']: + compute_environment = step['compute_environment']['import'] else: - compute_environment = job_step['compute_environment']['name'] + compute_environment = step['compute_environment']['name'] job_queue = 'JobQueueArn' if compute_environment == 'Default' else compute_environment + 'JobQueueArn' return { 'Type': 'Task', 'Resource': 'arn:aws:states:::batch:submitJob.sync', 'Parameters': { - 'JobDefinition': '${' + snake_to_pascal_case(job_step['name']) + '}', + 'JobDefinition': '${' + snake_to_pascal_case(step['name']) + '}', 'JobName.$': '$.job_id', 'JobQueue': '${' + job_queue + '}', 'ShareIdentifier': 'default', @@ -129,16 +129,16 @@ def get_batch_submit_job_state(job_spec: dict, job_step: dict, filter_batch_para } -def parse_job_step_map(job_step_map: str) -> tuple[str, str]: - tokens = job_step_map.split(' ') +def parse_job_step_map(step_map: str) -> tuple[str, str]: + tokens = step_map.split(' ') assert len(tokens) == 4 assert tokens[0], tokens[2] == ('for', 'in') return tokens[1], tokens[3] -def get_batch_job_parameters(job_spec: dict, job_step: dict, map_item: str = None) -> dict: +def get_batch_job_parameters(job_spec: dict, step: dict, map_item: str = None) -> dict: job_params = ['bucket_prefix', *job_spec['parameters'].keys()] - step_params = get_batch_param_names_for_job_step(job_step) + step_params = get_batch_param_names_for_job_step(step) batch_params = { f'{param}.$': f'$.batch_job_parameters.{param}' for param in job_params @@ -150,11 +150,11 @@ def get_batch_job_parameters(job_spec: dict, job_step: dict, map_item: str = Non return batch_params -def get_batch_param_names_for_job_step(job_step: dict) -> set[str]: +def get_batch_param_names_for_job_step(step: dict) -> set[str]: ref_prefix = 'Ref::' return { arg.removeprefix(ref_prefix) - for arg in job_step['command'] + for arg in step['command'] if arg.startswith(ref_prefix) } @@ -197,8 +197,8 @@ def get_compute_environments(job_types: dict, compute_env_file: Optional[Path]) compute_env_names = set() compute_env_imports = set() for _, job_spec in job_types.items(): - for job_step in job_spec['steps']: - compute_env = job_step['compute_environment'] + for step in job_spec['steps']: + compute_env = step['compute_environment'] if 'name' in compute_env: name = compute_env['name'] assert name != 'Default' @@ -239,8 +239,8 @@ def render_batch_params_by_job_type(job_types: dict) -> None: batch_params_by_job_type = {} for job_type, job_spec in job_types.items(): params = set() - for job_step in job_spec['steps']: - params.update(get_batch_param_names_for_job_step(job_step)) + for step in job_spec['steps']: + params.update(get_batch_param_names_for_job_step(step)) batch_params_by_job_type[job_type] = list(params) with (Path('apps') / 'start-execution-worker' / 'src' / 'batch_params_by_job_type.json').open('w') as f: json.dump(batch_params_by_job_type, f, indent=2) @@ -309,8 +309,8 @@ def main(): validate_job_spec(job_type, job_spec) for job_type, job_spec in job_types.items(): - for job_step in job_spec['steps']: - job_step['name'] = job_type + '_' + job_step['name'] if job_step['name'] else job_type + for step in job_spec['steps']: + step['name'] = job_type + '_' + step['name'] if step['name'] else job_type compute_envs = get_compute_environments(job_types, args.compute_environment_file) diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index 1f53a4392..e04a1068c 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -44,41 +44,41 @@ Outputs: Resources: {% for job_type, job_spec in job_types.items() %} - {% for job_step in job_spec['steps'] %} - {{ snake_to_pascal_case(job_step['name']) }}: + {% for step in job_spec['steps'] %} + {{ snake_to_pascal_case(step['name']) }}: Type: AWS::Batch::JobDefinition Properties: Type: container ContainerProperties: - Image: {% if 'image_tag' in job_step -%} - "{{ job_step['image'] }}:{{ job_step['image_tag'] }}" + Image: {% if 'image_tag' in step -%} + "{{ step['image'] }}:{{ step['image_tag'] }}" {% else -%} - !Sub "{{ job_step['image'] }}:${ImageTag}" + !Sub "{{ step['image'] }}:${ImageTag}" {% endif %} JobRoleArn: !Ref TaskRoleArn ExecutionRoleArn: !GetAtt ExecutionRole.Arn ResourceRequirements: - Type: VCPU - Value: "{{ job_step['vcpu'] }}" + Value: "{{ step['vcpu'] }}" - Type: MEMORY - Value: "{{ job_step['memory'] }}" - {% if 'gpu' in job_step %} + Value: "{{ step['memory'] }}" + {% if 'gpu' in step %} - Type: GPU - Value: "{{ job_step['gpu'] }}" + Value: "{{ step['gpu'] }}" {% endif %} Command: - {% for command in job_step['command'] %} + {% for command in step['command'] %} - {{ command }} {% endfor %} - {% if job_step.get('secrets') %} + {% if step.get('secrets') %} Secrets: - {% for secret in job_step['secrets'] %} + {% for secret in step['secrets'] %} - Name: {{ secret }} ValueFrom: !Sub "${SecretArn}:{{ secret }}::" {% endfor %} {% endif %} Timeout: - AttemptDurationSeconds: {{ job_step['timeout'] }} + AttemptDurationSeconds: {{ step['timeout'] }} {% endfor %} {% endfor %} @@ -93,8 +93,8 @@ Resources: {{ name }}JobQueueArn: !Ref {{ name }}JobQueueArn {% endfor %} {% for job_type, job_spec in job_types.items() %} - {% for job_step in job_spec['steps'] %} - {{ snake_to_pascal_case(job_step['name']) }}: !Ref {{ snake_to_pascal_case(job_step['name']) }} + {% for step in job_spec['steps'] %} + {{ snake_to_pascal_case(step['name']) }}: !Ref {{ snake_to_pascal_case(step['name']) }} {% endfor %} {% endfor %} UpdateDBLambdaArn: !GetAtt UpdateDB.Outputs.LambdaArn @@ -138,8 +138,8 @@ Resources: - !Ref {{ name }}JobQueueArn {% endfor %} {% for job_type, job_spec in job_types.items() %} - {% for job_step in job_spec['steps'] %} - - !Ref {{ snake_to_pascal_case(job_step['name']) }} + {% for step in job_spec['steps'] %} + - !Ref {{ snake_to_pascal_case(step['name']) }} {% endfor %} {% endfor %} - Effect: Allow From 0bbbadccab7f29db2640f1836eb2b3c3fe87bde0 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 18:32:17 -0800 Subject: [PATCH 123/163] time series todos, start changelog --- CHANGELOG.md | 6 ++++++ job_spec/SRG_TIME_SERIES.yml | 5 ++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c546afe62..2d26e0686 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [8.0.0] + +### Added + +### Changed + ## [7.12.0] ### Changed diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 7efa865f5..2bd682508 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -83,9 +83,8 @@ SRG_TIME_SERIES: - --bucket-prefix - Ref::bucket_prefix - --use-gslc-prefix - timeout: 86400 # TODO + timeout: 21600 # 6 hr compute_environment: import: Default vcpu: 1 - memory: 30500 # TODO - # TODO secrets? + memory: 30500 # TODO how much is needed? From 573d9562ba044c215c46c83a820f25754af9042c Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 19:36:46 -0800 Subject: [PATCH 124/163] add new srg job to lavas deployments, finish changelog --- .github/workflows/deploy-enterprise-test.yml | 1 + .github/workflows/deploy-enterprise.yml | 1 + CHANGELOG.md | 22 ++++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-enterprise-test.yml b/.github/workflows/deploy-enterprise-test.yml index ab7b964d6..d4479499a 100644 --- a/.github/workflows/deploy-enterprise-test.yml +++ b/.github/workflows/deploy-enterprise-test.yml @@ -88,6 +88,7 @@ jobs: job_files: >- job_spec/INSAR_ISCE_BURST.yml job_spec/SRG_GSLC.yml + job_spec/SRG_TIME_SERIES.yml instance_types: r6id.xlarge,r6id.2xlarge,r6id.4xlarge,r6id.8xlarge,r6idn.xlarge,r6idn.2xlarge,r6idn.4xlarge,r6idn.8xlarge default_max_vcpus: 640 expanded_max_vcpus: 640 diff --git a/.github/workflows/deploy-enterprise.yml b/.github/workflows/deploy-enterprise.yml index 98bb769a3..024c27fdd 100644 --- a/.github/workflows/deploy-enterprise.yml +++ b/.github/workflows/deploy-enterprise.yml @@ -245,6 +245,7 @@ jobs: job_files: >- job_spec/INSAR_ISCE_BURST.yml job_spec/SRG_GSLC.yml + job_spec/SRG_TIME_SERIES.yml instance_types: r6id.xlarge,r6id.2xlarge,r6id.4xlarge,r6id.8xlarge,r6idn.xlarge,r6idn.2xlarge,r6idn.4xlarge,r6idn.8xlarge default_max_vcpus: 640 expanded_max_vcpus: 640 diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d26e0686..3d163df90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [8.0.0] ### Added - -### Changed +- A job step can now be applied to every item in a list using a new `map: for in ` syntax. For example, given a job spec with a `granules` parameter, a step that includes a `map: for granule in granules` field is applied to each item in the `granules` list and can refer to `Ref::granule` within its `command` field. +- If a job contains a `map` step, the processing time value for that step (in the `processing_times` list in the job's API response) is a sub-list of processing times for the step's iterations, in the same order as the items in the input list. +- A new `SRG_TIME_SERIES` job type has been added to the `hyp3-lavas` and `hyp3-lavas-test` deployments. This workflow uses the new `map` syntax described above to produce a GSLC for each level-0 Sentinel-1 granule passed via the `granules` parameter and then produces a time series product from the GSLCs. See the [HyP3 SRG](https://github.com/ASFHyP3/hyp3-srg) plugin. +- The `SRG_GSLC` job type now includes parameter validation. + +### Changed +- Changes to custom compute environments: + - Custom compute environments are now applied to individual job steps rather than to entire jobs. The `compute_environment` field is now provided at the step level rather than at the top level of the job spec. + - Custom compute environments can optionally be defined within `job_spec/config/compute_environments.yml`. Job steps can import these environments using the following syntax: + ```yaml + compute_environment: + import: MyComputeEnvironment + ``` + If the `import` value is `Default`, then the job step uses the deployment's default compute environment. + + The `compute_environment` field can still be used to define a custom compute environment directly within the job spec, as before. +- Other changes to the job spec syntax: + - The `tasks` key has been renamed to `steps`. + - Parameters no longer contain a top-level `default` key. The `default` key within each parameter's `api_schema` mapping is still supported. + - Job specs no longer explicitly define a `bucket_prefix` parameter. Instead, `bucket_prefix` is automatically defined and can still be referenced as `Ref::bucket_prefix` within each step's `command` field. ## [7.12.0] From 6fc0908a92776e62007f05858e1907eefccf5b91 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 19:45:00 -0800 Subject: [PATCH 125/163] tweaks --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d163df90..1951e825c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,8 +24,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 The `compute_environment` field can still be used to define a custom compute environment directly within the job spec, as before. - Other changes to the job spec syntax: - - The `tasks` key has been renamed to `steps`. - - Parameters no longer contain a top-level `default` key. The `default` key within each parameter's `api_schema` mapping is still supported. + - The `tasks` field has been renamed to `steps`. + - Parameters no longer contain a top-level `default` field. The `default` field within each parameter's `api_schema` mapping is still supported. - Job specs no longer explicitly define a `bucket_prefix` parameter. Instead, `bucket_prefix` is automatically defined and can still be referenced as `Ref::bucket_prefix` within each step's `command` field. ## [7.12.0] From 5ac84a61409558368c1a3bad1acf991dd3973e63 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 19:46:16 -0800 Subject: [PATCH 126/163] wording --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1951e825c..15941ede5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 The `compute_environment` field can still be used to define a custom compute environment directly within the job spec, as before. - Other changes to the job spec syntax: - The `tasks` field has been renamed to `steps`. - - Parameters no longer contain a top-level `default` field. The `default` field within each parameter's `api_schema` mapping is still supported. + - Job parameters no longer contain a top-level `default` field. The `default` field within each parameter's `api_schema` mapping is still supported. - Job specs no longer explicitly define a `bucket_prefix` parameter. Instead, `bucket_prefix` is automatically defined and can still be referenced as `Ref::bucket_prefix` within each step's `command` field. ## [7.12.0] From 1b8532a22b60e6f3dc61675ad434229325abc3cf Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 17 Oct 2024 22:16:05 -0800 Subject: [PATCH 127/163] flake8 --- apps/render_cf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index f102babe6..76549e12d 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -78,7 +78,7 @@ def get_map_state(job_spec: dict, step: dict) -> dict: } -def get_batch_submit_job_state(job_spec: dict, step: dict, filter_batch_params = False) -> dict: +def get_batch_submit_job_state(job_spec: dict, step: dict, filter_batch_params=False) -> dict: if filter_batch_params: batch_job_parameters = get_batch_job_parameters(job_spec, step) parameters_key = 'Parameters' @@ -288,7 +288,8 @@ def validate_job_spec(job_type: str, job_spec: dict) -> None: actual_param_fields = sorted(param_dict.keys()) if actual_param_fields != expected_param_fields: raise ValueError( - f"parameter '{param_name}' for {job_type} has fields {actual_param_fields} but should have {expected_param_fields}" + f"parameter '{param_name}' for {job_type} has fields {actual_param_fields} " + f"but should have {expected_param_fields}" ) From 2b596c555f1e0c5218ead0590198c93f35664186 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 18 Oct 2024 09:57:37 -0800 Subject: [PATCH 128/163] Update job_spec/INSAR_ISCE.yml --- job_spec/INSAR_ISCE.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index 2ef7dd3fb..97b33becd 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -141,7 +141,6 @@ INSAR_ISCE: - Ref::weather_model timeout: 10800 compute_environment: - # TODO: Do we want this to use this env or Default? import: 'InsarIsceAria' vcpu: 1 memory: 7500 From 9ec66765dfd7b06d5cf7c4d0568868bd198192c0 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 18 Oct 2024 10:00:38 -0800 Subject: [PATCH 129/163] Update image tags Co-authored-by: Andrew Player --- job_spec/SRG_TIME_SERIES.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index 2bd682508..d39493065 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -45,9 +45,7 @@ SRG_TIME_SERIES: - name: BACK_PROJECTION map: for granule in granules image: ghcr.io/asfhyp3/hyp3-srg - # TODO revert image_tag - #image_tag: latest.gpu - image_tag: 0.8.1.dev50_gf0b8347.gpu + image_tag: test.gpu command: - ++process - back_projection @@ -71,8 +69,7 @@ SRG_TIME_SERIES: - EARTHDATA_PASSWORD - name: '' image: ghcr.io/asfhyp3/hyp3-srg - # TODO image tag - image_tag: 0.8.1.dev50_gf0b8347.cpu + image_tag: test.cpu command: - ++process - time_series From 5f10cce9543985b0f6f4ed018a872b95536e84b5 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 18 Oct 2024 10:04:45 -0800 Subject: [PATCH 130/163] Update job_spec/SRG_TIME_SERIES.yml Co-authored-by: Andrew Player --- job_spec/SRG_TIME_SERIES.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index d39493065..ce4931dca 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -84,4 +84,4 @@ SRG_TIME_SERIES: compute_environment: import: Default vcpu: 1 - memory: 30500 # TODO how much is needed? + memory: 30500 From 24856d556123958d0deea91ba2e55539e128308f Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 18 Oct 2024 10:17:01 -0800 Subject: [PATCH 131/163] Update apps/render_cf.py Co-authored-by: Andrew Player --- apps/render_cf.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 76549e12d..c179e52a8 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -271,8 +271,6 @@ def render_costs(job_types: dict, cost_profile: str) -> None: def validate_job_spec(job_type: str, job_spec: dict) -> None: - # Non-comprehensive job spec validator. More checks could be added if we want to be more thorough. - expected_fields = sorted(['required_parameters', 'parameters', 'cost_profiles', 'validators', 'steps']) actual_fields = sorted(job_spec.keys()) if actual_fields != expected_fields: From ab5f6af12d715701e4f47fe65baa77c5501976cd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 18 Oct 2024 18:38:52 +0000 Subject: [PATCH 132/163] Bump moto[dynamodb] from 5.0.16 to 5.0.17 Bumps [moto[dynamodb]](https://github.com/getmoto/moto) from 5.0.16 to 5.0.17. - [Release notes](https://github.com/getmoto/moto/releases) - [Changelog](https://github.com/getmoto/moto/blob/master/CHANGELOG.md) - [Commits](https://github.com/getmoto/moto/compare/5.0.16...5.0.17) --- updated-dependencies: - dependency-name: moto[dynamodb] dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-all.txt b/requirements-all.txt index 3f053a09d..996831a5b 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -7,7 +7,7 @@ -r requirements-apps-update-db.txt boto3==1.35.40 jinja2==3.1.4 -moto[dynamodb]==5.0.16 +moto[dynamodb]==5.0.17 pytest==8.3.3 PyYAML==6.0.2 responses==0.25.3 From 24a7703e7bc78f2f5576d4a6409f8be84abb45b8 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 18 Oct 2024 15:29:03 -0400 Subject: [PATCH 133/163] Remove .cpu and .gpu image tags from SRG job_specs --- job_spec/SRG_GSLC.yml | 1 - job_spec/SRG_TIME_SERIES.yml | 2 -- 2 files changed, 3 deletions(-) diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 0ac0f690c..7f6ed1d3b 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -43,7 +43,6 @@ SRG_GSLC: steps: - name: '' image: ghcr.io/asfhyp3/hyp3-srg - image_tag: latest.gpu command: - ++process - back_projection diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index ce4931dca..b330cd088 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -45,7 +45,6 @@ SRG_TIME_SERIES: - name: BACK_PROJECTION map: for granule in granules image: ghcr.io/asfhyp3/hyp3-srg - image_tag: test.gpu command: - ++process - back_projection @@ -69,7 +68,6 @@ SRG_TIME_SERIES: - EARTHDATA_PASSWORD - name: '' image: ghcr.io/asfhyp3/hyp3-srg - image_tag: test.cpu command: - ++process - time_series From c6dc8424b507c881371202f3d2413acaa1865fb5 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 18 Oct 2024 17:27:39 -0400 Subject: [PATCH 134/163] refactor compute env parsing so that only used compute envs are rendered --- apps/render_cf.py | 58 +++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index c179e52a8..0a22c2447 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -192,45 +192,59 @@ def render_templates(job_types, compute_envs, security_environment, api_name): template_file.with_suffix('').write_text(output) +def parse_compute_environments_file( + compute_env_names: set, + compute_env_imports: set, + compute_env_file: Path +) -> list[dict]: + compute_envs = [] + compute_envs_from_file = yaml.safe_load(compute_env_file.read_text())['compute_environments'] + + for name in compute_envs_from_file: + if name in compute_env_imports: + if name in compute_env_names: + raise ValueError( + f'Compute envs must have unique names but the following is defined more than once: {name}.' + ) + compute_envs_from_file[name].update({'name': name}) + compute_envs.append(compute_envs_from_file[name]) + compute_env_names.add(name) + + for name in compute_env_imports: + if name not in compute_envs_from_file and name != 'Default': + raise ValueError( + f'The following compute env is imported but not defined in the compute envs file: {name}.' + ) + + return compute_envs + + def get_compute_environments(job_types: dict, compute_env_file: Optional[Path]) -> list[dict]: compute_envs = [] - compute_env_names = set() + compute_env_names = set({'Default'}) compute_env_imports = set() + for _, job_spec in job_types.items(): for step in job_spec['steps']: compute_env = step['compute_environment'] if 'name' in compute_env: name = compute_env['name'] - assert name != 'Default' if name in compute_env_names: raise ValueError( f'Compute envs must have unique names but the following is defined more than once: {name}.' ) compute_envs.append(compute_env) compute_env_names.add(name) - elif 'import' in compute_env and compute_env['import'] != 'Default': + elif 'import' in compute_env: compute_env_imports.add(compute_env['import']) - else: - assert compute_env['import'] == 'Default' if compute_env_file: - compute_envs_from_file = yaml.safe_load(compute_env_file.read_text())['compute_environments'] - for name in compute_envs_from_file: - assert name != 'Default' - if name in compute_env_names: - raise ValueError( - f'Compute envs must have unique names but the following is defined more than once: {name}.' - ) - compute_env = compute_envs_from_file[name] - compute_env['name'] = name - compute_envs.append(compute_env) - compute_env_names.add(name) - - for name in compute_env_imports: - if name not in compute_envs_from_file: - raise ValueError( - f'The following compute env is imported but not defined in the compute envs file: {name}.' - ) + compute_envs_from_file = parse_compute_environments_file( + compute_env_names, + compute_env_imports, + compute_env_file + ) + compute_envs.extend(compute_envs_from_file) return compute_envs From 7d0ade182174d546d68af884d0e6424eb4960a50 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 18 Oct 2024 18:03:50 -0400 Subject: [PATCH 135/163] added all important documentation --- job_spec/config/compute_environments.yml | 29 +++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/job_spec/config/compute_environments.yml b/job_spec/config/compute_environments.yml index 5305aa630..6bfd52469 100644 --- a/job_spec/config/compute_environments.yml +++ b/job_spec/config/compute_environments.yml @@ -1,9 +1,32 @@ +# Compute Environments that are shared between multiple jobs and/or steps should be defined here, +# rather than in the job spec. The format is the same, except the name becomes the key: + +# ExampleComputeEnvName: +# instance_types: ... +# ami_id: ... +# allocation_type: ... +# allocation_strategy: ... + +# Note that the name must be unique, i.e. a compute environment that is defined in a job spec using +# the `name` key cannot also be defined here, UNLESS it is not imported in the same deployment. +# "UNLESS" is due to the fact that compute environments defined here are only parsed if they are imported. + +# Once the environment is defined here, it can be imported in the job spec step +# by using the `import` key: + +# JOB_NAME: +# ... +# steps: +# step_name: +# ... +# compute_environment: +# import: 'ExampleComputeEnvName' + compute_environments: - # Format is the same as in the job_spec(s), except the name becomes the key. + # Default is defined per-deployment in the deployment actions. SrgGslc: instance_types: g6.2xlarge - # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id - ami_id: ami-0729c079aae647cb3 + ami_id: ami-0729c079aae647cb3 # /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id InsarIsceAria: allocation_type: EC2 allocation_strategy: BEST_FIT_PROGRESSIVE From 5123669ba08f50dee3575d33f51f97a3fe5165a2 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 18 Oct 2024 18:17:15 -0400 Subject: [PATCH 136/163] re-ordered comment --- job_spec/config/compute_environments.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/job_spec/config/compute_environments.yml b/job_spec/config/compute_environments.yml index 6bfd52469..3d14fab9f 100644 --- a/job_spec/config/compute_environments.yml +++ b/job_spec/config/compute_environments.yml @@ -7,10 +7,6 @@ # allocation_type: ... # allocation_strategy: ... -# Note that the name must be unique, i.e. a compute environment that is defined in a job spec using -# the `name` key cannot also be defined here, UNLESS it is not imported in the same deployment. -# "UNLESS" is due to the fact that compute environments defined here are only parsed if they are imported. - # Once the environment is defined here, it can be imported in the job spec step # by using the `import` key: @@ -22,6 +18,11 @@ # compute_environment: # import: 'ExampleComputeEnvName' +# Note that the name must be unique, i.e. a compute environment that is defined in a job spec using +# the `name` key cannot also be defined here, unless it is not imported in the same deployment (envs here +# are only rendered if they are imported). Also note that the name should be in pascal case to avoid +# cfn-lint errors. + compute_environments: # Default is defined per-deployment in the deployment actions. SrgGslc: From 4a8e490ccec9005970517451d60bfe756a403b3b Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 21 Oct 2024 13:45:48 -0400 Subject: [PATCH 137/163] add test for get_compute_environments --- Makefile | 3 +- tests/test_render_cf.py | 69 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 tests/test_render_cf.py diff --git a/Makefile b/Makefile index 0effa7bc1..ed72389d1 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ API = ${PWD}/apps/api/src +RENDER_CF = ${PWD}/apps CHECK_PROCESSING_TIME = ${PWD}/apps/check-processing-time/src GET_FILES = ${PWD}/apps/get-files/src HANDLE_BATCH_EVENT = ${PWD}/apps/handle-batch-event/src @@ -10,7 +11,7 @@ DISABLE_PRIVATE_DNS = ${PWD}/apps/disable-private-dns/src UPDATE_DB = ${PWD}/apps/update-db/src UPLOAD_LOG = ${PWD}/apps/upload-log/src DYNAMO = ${PWD}/lib/dynamo -export PYTHONPATH = ${API}:${CHECK_PROCESSING_TIME}:${GET_FILES}:${HANDLE_BATCH_EVENT}:${SET_BATCH_OVERRIDES}:${SCALE_CLUSTER}:${START_EXECUTION_MANAGER}:${START_EXECUTION_WORKER}:${DISABLE_PRIVATE_DNS}:${UPDATE_DB}:${UPLOAD_LOG}:${DYNAMO} +export PYTHONPATH = ${API}:${CHECK_PROCESSING_TIME}:${GET_FILES}:${HANDLE_BATCH_EVENT}:${SET_BATCH_OVERRIDES}:${SCALE_CLUSTER}:${START_EXECUTION_MANAGER}:${START_EXECUTION_WORKER}:${DISABLE_PRIVATE_DNS}:${UPDATE_DB}:${UPLOAD_LOG}:${DYNAMO}:${RENDER_CF} build: render diff --git a/tests/test_render_cf.py b/tests/test_render_cf.py new file mode 100644 index 000000000..fc1126b40 --- /dev/null +++ b/tests/test_render_cf.py @@ -0,0 +1,69 @@ +import yaml + +import render_cf + +def test_get_compute_environments(tmp_path): + job_types = { + 'FOO': { + 'steps': [ + { + 'compute_environment': { + 'name': 'compute_environment_1', + 'intance_types': ['type1', 'type2'] + } + }, + {'compute_environment': {'import': 'Default'}} + ] + }, + 'BAR': { + 'steps': [ + {'compute_environment': {'import': 'compute_environment_2'}}, + { + 'compute_environment': { + 'name': 'compute_environment_3', + 'allocation_type': 'alloc_type_1', + 'allocation_strategy': 'alloc_strat_1' + } + } + ] + } + } + + compute_env_file = { + 'compute_environments': { + 'compute_environment_2': { + 'instance_types': ['type_3'], + 'ami_id': 'ami_id_1', + 'allocation_type': 'alloc_type_2', + 'allocation_strategy': 'alloc_strat_2' + }, + 'compute_environment_4': { + 'instance_types': ['type_4'] + } + } + } + compute_env_filepath = tmp_path / 'compute_environments.yml' + yaml.dump(compute_env_file, open(compute_env_filepath, 'w')) + + expected_compute_envs = [ + { + 'name': 'compute_environment_1', + 'intance_types': ['type1', 'type2'] + }, + { + 'name': 'compute_environment_3', + 'allocation_type': 'alloc_type_1', + 'allocation_strategy': 'alloc_strat_1' + }, + { + 'name': 'compute_environment_2', + 'instance_types': ['type_3'], + 'ami_id': 'ami_id_1', + 'allocation_type': 'alloc_type_2', + 'allocation_strategy': 'alloc_strat_2' + } + ] + compute_envs = render_cf.get_compute_environments(job_types, compute_env_filepath) + assert compute_envs == expected_compute_envs + + #TODO: Invalid Case From 1852c3673c29fb642f520b5b16e691cbca3aa33a Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 21 Oct 2024 14:13:05 -0400 Subject: [PATCH 138/163] error when user imports but doesnt provide file --- apps/render_cf.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 0a22c2447..8ba90d6d5 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -219,7 +219,7 @@ def parse_compute_environments_file( return compute_envs -def get_compute_environments(job_types: dict, compute_env_file: Optional[Path]) -> list[dict]: +def get_compute_environments(job_types: dict, compute_env_file: Optional[Path] = None) -> list[dict]: compute_envs = [] compute_env_names = set({'Default'}) compute_env_imports = set() @@ -245,6 +245,11 @@ def get_compute_environments(job_types: dict, compute_env_file: Optional[Path]) compute_env_file ) compute_envs.extend(compute_envs_from_file) + elif compute_env_imports is not None: + raise ValueError( + f'The following compute envs are imported but no compute env file was provided: {compute_env_imports}.' + ) + return compute_envs From 2ac2480a5472ba036022688fbc8442ded31f623d Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 21 Oct 2024 14:13:21 -0400 Subject: [PATCH 139/163] add error cases to test_get_compute_environments --- tests/test_render_cf.py | 49 ++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/tests/test_render_cf.py b/tests/test_render_cf.py index fc1126b40..fd65d8358 100644 --- a/tests/test_render_cf.py +++ b/tests/test_render_cf.py @@ -1,3 +1,4 @@ +import pytest import yaml import render_cf @@ -28,7 +29,6 @@ def test_get_compute_environments(tmp_path): ] } } - compute_env_file = { 'compute_environments': { 'compute_environment_2': { @@ -37,14 +37,9 @@ def test_get_compute_environments(tmp_path): 'allocation_type': 'alloc_type_2', 'allocation_strategy': 'alloc_strat_2' }, - 'compute_environment_4': { - 'instance_types': ['type_4'] - } + 'compute_environment_4': {'instance_types': ['type_4']} } } - compute_env_filepath = tmp_path / 'compute_environments.yml' - yaml.dump(compute_env_file, open(compute_env_filepath, 'w')) - expected_compute_envs = [ { 'name': 'compute_environment_1', @@ -63,7 +58,45 @@ def test_get_compute_environments(tmp_path): 'allocation_strategy': 'alloc_strat_2' } ] + compute_env_filepath = tmp_path / 'compute_environments.yml' + yaml.dump(compute_env_file, open(compute_env_filepath, 'w')) compute_envs = render_cf.get_compute_environments(job_types, compute_env_filepath) assert compute_envs == expected_compute_envs - #TODO: Invalid Case + job_types_redefined_default = { + 'FOO': {'steps': [{'compute_environment': {'name': 'Default'}}]}} + with pytest.raises(ValueError, match=r'.*defined more than once: Default*'): + compute_envs = render_cf.get_compute_environments(job_types_redefined_default) + + job_types_duplicate_env = { + 'FOO': {'steps': [{'compute_environment': {'name': 'compute_environment_1'}}]}, + 'BAR': {'steps': [{'compute_environment': {'name': 'compute_environment_1'}}]} + } + with pytest.raises(ValueError, match=r'.*defined more than once: compute_environment_1*'): + compute_envs = render_cf.get_compute_environments(job_types_duplicate_env) + + job_types_import_undefined = { + 'FOO': {'steps': [{'compute_environment': {'import': 'undefined_compute_environment'}}]} + } + with pytest.raises(ValueError, match=r'.*not defined in the compute envs file: undefined_compute_environment*'): + compute_envs = render_cf.get_compute_environments(job_types_import_undefined, compute_env_filepath) + with pytest.raises(ValueError, match=r'.*no compute env file was provided: {\'undefined_compute_environment\'}*'): + compute_envs = render_cf.get_compute_environments(job_types_import_undefined) + + compute_env_file_redefined_default = {'compute_environments': {'Default': {}}} + yaml.dump(compute_env_file_redefined_default, open(compute_env_filepath, 'w')) + with pytest.raises(ValueError, match=r'.*defined more than once: Default*'): + compute_envs = render_cf.get_compute_environments(job_types, compute_env_filepath) + + job_types = { + 'FOO': { + 'steps': [ + {'compute_environment': {'name': 'compute_environment_1'}}, + {'compute_environment': {'import': 'compute_environment_1'}} + ] + } + } + compute_env_file_duplicate = {'compute_environments': {'compute_environment_1': {}}} + yaml.dump(compute_env_file_duplicate, open(compute_env_filepath, 'w')) + with pytest.raises(ValueError, match=r'.*defined more than once: compute_environment_1*'): + compute_envs = render_cf.get_compute_environments(job_types, compute_env_filepath) From ddc4c10371ad6ceee1c3bf89b84d3010198c6817 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 21 Oct 2024 14:15:44 -0400 Subject: [PATCH 140/163] flake8 --- tests/test_render_cf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_render_cf.py b/tests/test_render_cf.py index fd65d8358..a1e1c5049 100644 --- a/tests/test_render_cf.py +++ b/tests/test_render_cf.py @@ -1,7 +1,7 @@ import pytest +import render_cf import yaml -import render_cf def test_get_compute_environments(tmp_path): job_types = { From c5fbbe4b8d3c66fa6c0c0fd85edfe4b1e957f86f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 21 Oct 2024 14:16:49 -0400 Subject: [PATCH 141/163] flake8 --- apps/render_cf.py | 1 - tests/test_render_cf.py | 34 +++++++++++++++++----------------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 8ba90d6d5..c762b2485 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -250,7 +250,6 @@ def get_compute_environments(job_types: dict, compute_env_file: Optional[Path] = f'The following compute envs are imported but no compute env file was provided: {compute_env_imports}.' ) - return compute_envs diff --git a/tests/test_render_cf.py b/tests/test_render_cf.py index a1e1c5049..beeb8959d 100644 --- a/tests/test_render_cf.py +++ b/tests/test_render_cf.py @@ -41,22 +41,22 @@ def test_get_compute_environments(tmp_path): } } expected_compute_envs = [ - { - 'name': 'compute_environment_1', - 'intance_types': ['type1', 'type2'] - }, - { - 'name': 'compute_environment_3', - 'allocation_type': 'alloc_type_1', - 'allocation_strategy': 'alloc_strat_1' - }, - { - 'name': 'compute_environment_2', - 'instance_types': ['type_3'], - 'ami_id': 'ami_id_1', - 'allocation_type': 'alloc_type_2', - 'allocation_strategy': 'alloc_strat_2' - } + { + 'name': 'compute_environment_1', + 'intance_types': ['type1', 'type2'] + }, + { + 'name': 'compute_environment_3', + 'allocation_type': 'alloc_type_1', + 'allocation_strategy': 'alloc_strat_1' + }, + { + 'name': 'compute_environment_2', + 'instance_types': ['type_3'], + 'ami_id': 'ami_id_1', + 'allocation_type': 'alloc_type_2', + 'allocation_strategy': 'alloc_strat_2' + } ] compute_env_filepath = tmp_path / 'compute_environments.yml' yaml.dump(compute_env_file, open(compute_env_filepath, 'w')) @@ -81,7 +81,7 @@ def test_get_compute_environments(tmp_path): with pytest.raises(ValueError, match=r'.*not defined in the compute envs file: undefined_compute_environment*'): compute_envs = render_cf.get_compute_environments(job_types_import_undefined, compute_env_filepath) with pytest.raises(ValueError, match=r'.*no compute env file was provided: {\'undefined_compute_environment\'}*'): - compute_envs = render_cf.get_compute_environments(job_types_import_undefined) + compute_envs = render_cf.get_compute_environments(job_types_import_undefined) compute_env_file_redefined_default = {'compute_environments': {'Default': {}}} yaml.dump(compute_env_file_redefined_default, open(compute_env_filepath, 'w')) From ec0d26c13f30c4bff6fc4c5103ebb263912241fd Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 21 Oct 2024 14:31:31 -0400 Subject: [PATCH 142/163] Update Makefile Co-authored-by: Jake Herrmann --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ed72389d1..6a596f51c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ API = ${PWD}/apps/api/src -RENDER_CF = ${PWD}/apps +APPS = ${PWD}/apps CHECK_PROCESSING_TIME = ${PWD}/apps/check-processing-time/src GET_FILES = ${PWD}/apps/get-files/src HANDLE_BATCH_EVENT = ${PWD}/apps/handle-batch-event/src From 3062516924dd9dca354c447473910a7a2275bd29 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 21 Oct 2024 14:31:36 -0400 Subject: [PATCH 143/163] Update Makefile Co-authored-by: Jake Herrmann --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6a596f51c..81d462c22 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ DISABLE_PRIVATE_DNS = ${PWD}/apps/disable-private-dns/src UPDATE_DB = ${PWD}/apps/update-db/src UPLOAD_LOG = ${PWD}/apps/upload-log/src DYNAMO = ${PWD}/lib/dynamo -export PYTHONPATH = ${API}:${CHECK_PROCESSING_TIME}:${GET_FILES}:${HANDLE_BATCH_EVENT}:${SET_BATCH_OVERRIDES}:${SCALE_CLUSTER}:${START_EXECUTION_MANAGER}:${START_EXECUTION_WORKER}:${DISABLE_PRIVATE_DNS}:${UPDATE_DB}:${UPLOAD_LOG}:${DYNAMO}:${RENDER_CF} +export PYTHONPATH = ${API}:${CHECK_PROCESSING_TIME}:${GET_FILES}:${HANDLE_BATCH_EVENT}:${SET_BATCH_OVERRIDES}:${SCALE_CLUSTER}:${START_EXECUTION_MANAGER}:${START_EXECUTION_WORKER}:${DISABLE_PRIVATE_DNS}:${UPDATE_DB}:${UPLOAD_LOG}:${DYNAMO}:${APPS} build: render From 4538d57afc7d1ef1aaf1329f4d85693569076e18 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 21 Oct 2024 14:56:50 -0400 Subject: [PATCH 144/163] Update apps/render_cf.py Co-authored-by: Jake Herrmann --- apps/render_cf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index c762b2485..c66f34d5e 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -221,7 +221,7 @@ def parse_compute_environments_file( def get_compute_environments(job_types: dict, compute_env_file: Optional[Path] = None) -> list[dict]: compute_envs = [] - compute_env_names = set({'Default'}) + compute_env_names = {'Default'} compute_env_imports = set() for _, job_spec in job_types.items(): From dcad83b5495d26aeb8293843689f6e7f7b9c178b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 19:01:58 +0000 Subject: [PATCH 145/163] Bump cryptography from 43.0.1 to 43.0.3 Bumps [cryptography](https://github.com/pyca/cryptography) from 43.0.1 to 43.0.3. - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/43.0.1...43.0.3) --- updated-dependencies: - dependency-name: cryptography dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-apps-api-binary.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-apps-api-binary.txt b/requirements-apps-api-binary.txt index ccb822680..75287dd9f 100644 --- a/requirements-apps-api-binary.txt +++ b/requirements-apps-api-binary.txt @@ -1 +1 @@ -cryptography==43.0.1 +cryptography==43.0.3 From 2dabafb4ac4847884d868e9e5d2ee03c2c4287a1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 19:02:12 +0000 Subject: [PATCH 146/163] Bump setuptools from 75.1.0 to 75.2.0 Bumps [setuptools](https://github.com/pypa/setuptools) from 75.1.0 to 75.2.0. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v75.1.0...v75.2.0) --- updated-dependencies: - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-all.txt b/requirements-all.txt index 996831a5b..d30a2b5f2 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -15,6 +15,6 @@ flake8==7.1.1 flake8-import-order==0.18.2 flake8-blind-except==0.2.1 flake8-builtins==2.5.0 -setuptools==75.1.0 +setuptools==75.2.0 openapi-spec-validator==0.7.1 cfn-lint==1.16.1 From 0b95b532db4477e3bf3605fd76c3441d891e55a9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 19:02:24 +0000 Subject: [PATCH 147/163] Bump boto3 from 1.35.40 to 1.35.44 Bumps [boto3](https://github.com/boto/boto3) from 1.35.40 to 1.35.44. - [Release notes](https://github.com/boto/boto3/releases) - [Commits](https://github.com/boto/boto3/compare/1.35.40...1.35.44) --- updated-dependencies: - dependency-name: boto3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- requirements-apps-disable-private-dns.txt | 2 +- requirements-apps-start-execution-manager.txt | 2 +- requirements-apps-start-execution-worker.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements-all.txt b/requirements-all.txt index 996831a5b..a0ba085d2 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -5,7 +5,7 @@ -r requirements-apps-start-execution-worker.txt -r requirements-apps-disable-private-dns.txt -r requirements-apps-update-db.txt -boto3==1.35.40 +boto3==1.35.44 jinja2==3.1.4 moto[dynamodb]==5.0.17 pytest==8.3.3 diff --git a/requirements-apps-disable-private-dns.txt b/requirements-apps-disable-private-dns.txt index 9abbd0a9b..a54ad61db 100644 --- a/requirements-apps-disable-private-dns.txt +++ b/requirements-apps-disable-private-dns.txt @@ -1 +1 @@ -boto3==1.35.40 +boto3==1.35.44 diff --git a/requirements-apps-start-execution-manager.txt b/requirements-apps-start-execution-manager.txt index 81b056a7b..d5e084a91 100644 --- a/requirements-apps-start-execution-manager.txt +++ b/requirements-apps-start-execution-manager.txt @@ -1,3 +1,3 @@ -boto3==1.35.40 +boto3==1.35.44 ./lib/dynamo/ ./lib/lambda_logging/ diff --git a/requirements-apps-start-execution-worker.txt b/requirements-apps-start-execution-worker.txt index c8fcb15e5..0b09d31f4 100644 --- a/requirements-apps-start-execution-worker.txt +++ b/requirements-apps-start-execution-worker.txt @@ -1,2 +1,2 @@ -boto3==1.35.40 +boto3==1.35.44 ./lib/lambda_logging/ From b0c8e43360b80b4639d6e00e5d2900a5244f24c7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 19:32:54 +0000 Subject: [PATCH 148/163] Bump ASFHyP3/actions from 0.11.2 to 0.12.0 Bumps [ASFHyP3/actions](https://github.com/asfhyp3/actions) from 0.11.2 to 0.12.0. - [Release notes](https://github.com/asfhyp3/actions/releases) - [Changelog](https://github.com/ASFHyP3/actions/blob/develop/CHANGELOG.md) - [Commits](https://github.com/asfhyp3/actions/compare/v0.11.2...v0.12.0) --- updated-dependencies: - dependency-name: ASFHyP3/actions dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/changelog.yml | 2 +- .github/workflows/create-jira-issue.yml | 2 +- .github/workflows/deploy-daac.yml | 2 +- .github/workflows/labeled-pr.yml | 2 +- .github/workflows/release-template-comment.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/static-analysis.yml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml index 7740de9ed..3b1e74062 100644 --- a/.github/workflows/changelog.yml +++ b/.github/workflows/changelog.yml @@ -13,4 +13,4 @@ on: jobs: call-changelog-check-workflow: - uses: ASFHyP3/actions/.github/workflows/reusable-changelog-check.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-changelog-check.yml@v0.12.0 diff --git a/.github/workflows/create-jira-issue.yml b/.github/workflows/create-jira-issue.yml index 99489d507..d95ef849e 100644 --- a/.github/workflows/create-jira-issue.yml +++ b/.github/workflows/create-jira-issue.yml @@ -6,7 +6,7 @@ on: jobs: call-create-jira-issue-workflow: - uses: ASFHyP3/actions/.github/workflows/reusable-create-jira-issue.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-create-jira-issue.yml@v0.12.0 secrets: JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} diff --git a/.github/workflows/deploy-daac.yml b/.github/workflows/deploy-daac.yml index 9abfb18b2..d1bfc1ba7 100644 --- a/.github/workflows/deploy-daac.yml +++ b/.github/workflows/deploy-daac.yml @@ -111,6 +111,6 @@ jobs: call-bump-version-workflow: if: github.ref == 'refs/heads/main' needs: deploy - uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.12.0 secrets: USER_TOKEN: ${{ secrets.TOOLS_BOT_PAK }} diff --git a/.github/workflows/labeled-pr.yml b/.github/workflows/labeled-pr.yml index f89f3e3bd..f408f3b37 100644 --- a/.github/workflows/labeled-pr.yml +++ b/.github/workflows/labeled-pr.yml @@ -12,4 +12,4 @@ on: jobs: call-labeled-pr-check-workflow: - uses: ASFHyP3/actions/.github/workflows/reusable-labeled-pr-check.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-labeled-pr-check.yml@v0.12.0 diff --git a/.github/workflows/release-template-comment.yml b/.github/workflows/release-template-comment.yml index 8311cb7b1..1c0dcccea 100644 --- a/.github/workflows/release-template-comment.yml +++ b/.github/workflows/release-template-comment.yml @@ -7,7 +7,7 @@ on: jobs: call-release-checklist-workflow: - uses: ASFHyP3/actions/.github/workflows/reusable-release-checklist-comment.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-release-checklist-comment.yml@v0.12.0 permissions: pull-requests: write with: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 06194fe08..1bd429336 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,7 +7,7 @@ on: jobs: call-release-workflow: - uses: ASFHyP3/actions/.github/workflows/reusable-release.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-release.yml@v0.12.0 with: release_prefix: HyP3 secrets: diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml index d94186bc6..0fa2dc434 100644 --- a/.github/workflows/static-analysis.yml +++ b/.github/workflows/static-analysis.yml @@ -87,4 +87,4 @@ jobs: snyk iac test --severity-threshold=high call-secrets-analysis-workflow: - uses: ASFHyP3/actions/.github/workflows/reusable-secrets-analysis.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-secrets-analysis.yml@v0.12.0 From 4c4802ef9da88ff74410a19b608ffe2f9ce21fd2 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 21 Oct 2024 12:01:09 -0800 Subject: [PATCH 149/163] test parse_job_step_map --- apps/render_cf.py | 2 +- tests/test_render_cf_2.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 tests/test_render_cf_2.py diff --git a/apps/render_cf.py b/apps/render_cf.py index c66f34d5e..ed90167ef 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -132,7 +132,7 @@ def get_batch_submit_job_state(job_spec: dict, step: dict, filter_batch_params=F def parse_job_step_map(step_map: str) -> tuple[str, str]: tokens = step_map.split(' ') assert len(tokens) == 4 - assert tokens[0], tokens[2] == ('for', 'in') + assert (tokens[0], tokens[2]) == ('for', 'in') return tokens[1], tokens[3] diff --git a/tests/test_render_cf_2.py b/tests/test_render_cf_2.py new file mode 100644 index 000000000..dc61d6f92 --- /dev/null +++ b/tests/test_render_cf_2.py @@ -0,0 +1,5 @@ +import render_cf + +def test_parse_job_step_map(): + assert render_cf.parse_job_step_map('for item in items') == ('item', 'items') + assert render_cf.parse_job_step_map('for foo in bar') == ('foo', 'bar') From 2f0b38708684cb8d5913a48dfc9c47dd25503c63 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 21 Oct 2024 12:50:13 -0800 Subject: [PATCH 150/163] improve map statement parser error handling --- apps/render_cf.py | 14 +++++++++----- tests/test_render_cf_2.py | 20 +++++++++++++++++--- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index ed90167ef..7d975f480 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -53,7 +53,7 @@ def get_state_for_job_step(step: dict, index: int, next_state_name: str, job_spe def get_map_state(job_spec: dict, step: dict) -> dict: - item, items = parse_job_step_map(step['map']) + item, items = parse_map_statement(step['map']) batch_job_parameters = get_batch_job_parameters(job_spec, step, map_item=item) @@ -129,10 +129,14 @@ def get_batch_submit_job_state(job_spec: dict, step: dict, filter_batch_params=F } -def parse_job_step_map(step_map: str) -> tuple[str, str]: - tokens = step_map.split(' ') - assert len(tokens) == 4 - assert (tokens[0], tokens[2]) == ('for', 'in') +def parse_map_statement(map_statement: str) -> tuple[str, str]: + tokens = map_statement.split(' ') + if len(tokens) != 4: + raise ValueError(f'expected 4 tokens in map statement but got {len(tokens)}: {map_statement}') + if tokens[0] != 'for': + raise ValueError(f"expected 'for', got '{tokens[0]}': {map_statement}") + if tokens[2] != 'in': + raise ValueError(f"expected 'in', got '{tokens[2]}': {map_statement}") return tokens[1], tokens[3] diff --git a/tests/test_render_cf_2.py b/tests/test_render_cf_2.py index dc61d6f92..113658259 100644 --- a/tests/test_render_cf_2.py +++ b/tests/test_render_cf_2.py @@ -1,5 +1,19 @@ +import pytest + import render_cf -def test_parse_job_step_map(): - assert render_cf.parse_job_step_map('for item in items') == ('item', 'items') - assert render_cf.parse_job_step_map('for foo in bar') == ('foo', 'bar') +def test_parse_map_statement(): + assert render_cf.parse_map_statement('for item in items') == ('item', 'items') + assert render_cf.parse_map_statement('for foo in bar') == ('foo', 'bar') + + with pytest.raises(ValueError, match='expected 4 tokens in map statement but got 3: item in items'): + render_cf.parse_map_statement('item in items') + + with pytest.raises(ValueError, match='expected 4 tokens in map statement but got 5: for for item in items'): + render_cf.parse_map_statement('for for item in items') + + with pytest.raises(ValueError, match="expected 'for', got 'fr': fr item in items"): + render_cf.parse_map_statement('fr item in items') + + with pytest.raises(ValueError, match="expected 'in', got 'ib': for item ib items"): + render_cf.parse_map_statement('for item ib items') From 08c72ba31272b20178becc38567b6edd7615846e Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 21 Oct 2024 12:53:26 -0800 Subject: [PATCH 151/163] placeholder tests --- tests/test_render_cf_2.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_render_cf_2.py b/tests/test_render_cf_2.py index 113658259..357092231 100644 --- a/tests/test_render_cf_2.py +++ b/tests/test_render_cf_2.py @@ -17,3 +17,11 @@ def test_parse_map_statement(): with pytest.raises(ValueError, match="expected 'in', got 'ib': for item ib items"): render_cf.parse_map_statement('for item ib items') + + +def test_get_batch_job_parameters(): + assert False + + +def test_get_batch_param_names_for_job_step(): + assert False From 2161a8654ab481c42ef7a829205e6716ea7cdc8a Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 21 Oct 2024 15:51:24 -0800 Subject: [PATCH 152/163] restrict custom compute env defs to one file --- apps/compute-cf.yml.j2 | 5 +- .../handle-batch-event-cf.yml.j2 | 4 +- apps/main-cf.yml.j2 | 6 +- apps/render_cf.py | 81 ++++--------------- apps/scale-cluster/scale-cluster-cf.yml.j2 | 8 +- apps/workflow-cf.yml.j2 | 6 +- job_spec/ARIA_AUTORIFT.yml | 4 +- job_spec/ARIA_RAIDER.yml | 3 +- job_spec/AUTORIFT.yml | 3 +- job_spec/AUTORIFT_ITS_LIVE.yml | 3 +- job_spec/INSAR_GAMMA.yml | 3 +- job_spec/INSAR_ISCE.yml | 6 +- job_spec/INSAR_ISCE_BURST.yml | 3 +- job_spec/INSAR_ISCE_MULTI_BURST.yml | 3 +- job_spec/RTC_GAMMA.yml | 3 +- job_spec/S1_CORRECTION_ITS_LIVE.yml | 3 +- job_spec/SRG_GSLC.yml | 3 +- job_spec/SRG_TIME_SERIES.yml | 6 +- job_spec/WATER_MAP.yml | 9 +-- job_spec/WATER_MAP_EQ.yml | 6 +- job_spec/config/compute_environments.yml | 33 ++------ 21 files changed, 54 insertions(+), 147 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index d863a0198..aeb4cd099 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -29,7 +29,7 @@ Outputs: JobQueueArn: Value: !Ref BatchJobQueue - {% for name in compute_env_names %} + {% for name in compute_envs %} {{ name }}ComputeEnvironmentArn: Value: !Ref {{ name }}ComputeEnvironment @@ -105,8 +105,7 @@ Resources: SchedulingPolicy: Type: AWS::Batch::SchedulingPolicy - {% for env in compute_envs %} - {% set name = env['name'] %} + {% for name, env in compute_envs.items() %} {% set instance_types = env['instance_types'].split(',') if 'instance_types' in env else '!Ref InstanceTypes' %} {% set ami_id = env['ami_id'] if 'ami_id' in env else '!Ref AmiId' %} {% set type = env['allocation_type'] if 'allocation_type' in env else 'SPOT' %} diff --git a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 index ecbfea0ee..cf39fd7a2 100644 --- a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 +++ b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 @@ -5,7 +5,7 @@ Parameters: JobQueueArn: Type: String - {% for name in compute_env_names %} + {% for name in compute_envs %} {{ name }}JobQueueArn: Type: String {% endfor %} @@ -100,7 +100,7 @@ Resources: detail: jobQueue: - !Ref JobQueueArn - {% for name in compute_env_names %} + {% for name in compute_envs %} - !Ref {{ name }}JobQueueArn {% endfor %} status: diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index 47835d3b9..5f33c52f4 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -154,7 +154,7 @@ Resources: Properties: Parameters: ComputeEnvironmentArn: !GetAtt Cluster.Outputs.ComputeEnvironmentArn - {% for name in compute_env_names %} + {% for name in compute_envs %} {{ name }}ComputeEnvironmentArn: !GetAtt Cluster.Outputs.{{ name }}ComputeEnvironmentArn {% endfor %} DefaultMaxvCpus: !Ref DefaultMaxvCpus @@ -172,7 +172,7 @@ Resources: Properties: Parameters: JobQueueArn: !GetAtt Cluster.Outputs.JobQueueArn - {% for name in compute_env_names %} + {% for name in compute_envs %} {{ name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ name }}JobQueueArn {% endfor %} JobsTable: !Ref JobsTable @@ -187,7 +187,7 @@ Resources: Properties: Parameters: JobQueueArn: !GetAtt Cluster.Outputs.JobQueueArn - {% for name in compute_env_names %} + {% for name in compute_envs %} {{ name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ name }}JobQueueArn {% endfor %} TaskRoleArn: !GetAtt Cluster.Outputs.TaskRoleArn diff --git a/apps/render_cf.py b/apps/render_cf.py index 7d975f480..945f9df57 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -1,7 +1,6 @@ import argparse import json from pathlib import Path -from typing import Optional import jinja2 import yaml @@ -86,11 +85,7 @@ def get_batch_submit_job_state(job_spec: dict, step: dict, filter_batch_params=F batch_job_parameters = '$.batch_job_parameters' parameters_key = 'Parameters.$' - if 'import' in step['compute_environment']: - compute_environment = step['compute_environment']['import'] - else: - compute_environment = step['compute_environment']['name'] - + compute_environment = step['compute_environment'] job_queue = 'JobQueueArn' if compute_environment == 'Default' else compute_environment + 'JobQueueArn' return { 'Type': 'Task', @@ -163,7 +158,7 @@ def get_batch_param_names_for_job_step(step: dict) -> set[str]: } -def render_templates(job_types, compute_envs, security_environment, api_name): +def render_templates(job_types: dict, compute_envs: dict, security_environment: str, api_name: str): job_states = get_states_for_jobs(job_types) env = jinja2.Environment( @@ -182,7 +177,6 @@ def render_templates(job_types, compute_envs, security_environment, api_name): output = template.render( job_types=job_types, compute_envs=compute_envs, - compute_env_names=[env['name'] for env in compute_envs], security_environment=security_environment, api_name=api_name, json=json, @@ -196,65 +190,18 @@ def render_templates(job_types, compute_envs, security_environment, api_name): template_file.with_suffix('').write_text(output) -def parse_compute_environments_file( - compute_env_names: set, - compute_env_imports: set, - compute_env_file: Path -) -> list[dict]: - compute_envs = [] - compute_envs_from_file = yaml.safe_load(compute_env_file.read_text())['compute_environments'] - - for name in compute_envs_from_file: - if name in compute_env_imports: - if name in compute_env_names: - raise ValueError( - f'Compute envs must have unique names but the following is defined more than once: {name}.' - ) - compute_envs_from_file[name].update({'name': name}) - compute_envs.append(compute_envs_from_file[name]) - compute_env_names.add(name) - - for name in compute_env_imports: - if name not in compute_envs_from_file and name != 'Default': - raise ValueError( - f'The following compute env is imported but not defined in the compute envs file: {name}.' - ) - - return compute_envs - +def get_compute_environments_for_deployment(job_types: dict, compute_env_file: Path) -> dict: + compute_envs = yaml.safe_load(compute_env_file.read_text())['compute_environments'] -def get_compute_environments(job_types: dict, compute_env_file: Optional[Path] = None) -> list[dict]: - compute_envs = [] - compute_env_names = {'Default'} - compute_env_imports = set() + if 'Default' in compute_envs: + raise ValueError(f"'Default' is a reserved compute environment name") - for _, job_spec in job_types.items(): - for step in job_spec['steps']: - compute_env = step['compute_environment'] - if 'name' in compute_env: - name = compute_env['name'] - if name in compute_env_names: - raise ValueError( - f'Compute envs must have unique names but the following is defined more than once: {name}.' - ) - compute_envs.append(compute_env) - compute_env_names.add(name) - elif 'import' in compute_env: - compute_env_imports.add(compute_env['import']) - - if compute_env_file: - compute_envs_from_file = parse_compute_environments_file( - compute_env_names, - compute_env_imports, - compute_env_file - ) - compute_envs.extend(compute_envs_from_file) - elif compute_env_imports is not None: - raise ValueError( - f'The following compute envs are imported but no compute env file was provided: {compute_env_imports}.' - ) - - return compute_envs + return { + step['compute_environment']: compute_envs[step['compute_environment']] + for job_spec in job_types.values() + for step in job_spec['steps'] + if step['compute_environment'] != 'Default' + } def render_batch_params_by_job_type(job_types: dict) -> None: @@ -316,7 +263,7 @@ def validate_job_spec(job_type: str, job_spec: dict) -> None: def main(): parser = argparse.ArgumentParser() parser.add_argument('-j', '--job-spec-files', required=True, nargs='+', type=Path) - parser.add_argument('-e', '--compute-environment-file', type=Path) + parser.add_argument('-e', '--compute-environment-file', required=True, type=Path) parser.add_argument('-s', '--security-environment', default='ASF', choices=['ASF', 'EDC', 'JPL', 'JPL-public']) parser.add_argument('-n', '--api-name', required=True) parser.add_argument('-c', '--cost-profile', default='DEFAULT', choices=['DEFAULT', 'EDC']) @@ -333,7 +280,7 @@ def main(): for step in job_spec['steps']: step['name'] = job_type + '_' + step['name'] if step['name'] else job_type - compute_envs = get_compute_environments(job_types, args.compute_environment_file) + compute_envs = get_compute_environments_for_deployment(job_types, args.compute_environment_file) render_batch_params_by_job_type(job_types) render_default_params_by_job_type(job_types) diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index 01e12c5bf..0fa44cd90 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -5,7 +5,7 @@ Parameters: ComputeEnvironmentArn: Type: String - {% for name in compute_env_names %} + {% for name in compute_envs %} {{ name }}ComputeEnvironmentArn: Type: String {% endfor %} @@ -86,7 +86,7 @@ Resources: Action: batch:UpdateComputeEnvironment Resource: - !Ref ComputeEnvironmentArn - {% for name in compute_env_names %} + {% for name in compute_envs %} - !Ref {{ name }}ComputeEnvironmentArn {% endfor %} @@ -127,7 +127,7 @@ Resources: Targets: - Arn: !GetAtt Lambda.Arn Id: lambda - {% for name in compute_env_names %} + {% for name in compute_envs %} - Arn: !GetAtt {{ name }}Lambda.Arn Id: {{ name }}lambda {% endfor %} @@ -140,7 +140,7 @@ Resources: Principal: events.amazonaws.com SourceArn: !GetAtt Schedule.Arn - {% for name in compute_env_names %} + {% for name in compute_envs %} {{ name }}LogGroup: Type: AWS::Logs::LogGroup Properties: diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index e04a1068c..011bbf813 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -5,7 +5,7 @@ Parameters: JobQueueArn: Type: String - {% for name in compute_env_names %} + {% for name in compute_envs %} {{ name }}JobQueueArn: Type: String {% endfor %} @@ -89,7 +89,7 @@ Resources: DefinitionS3Location: step-function.json DefinitionSubstitutions: JobQueueArn: !Ref JobQueueArn - {% for name in compute_env_names %} + {% for name in compute_envs %} {{ name }}JobQueueArn: !Ref {{ name }}JobQueueArn {% endfor %} {% for job_type, job_spec in job_types.items() %} @@ -134,7 +134,7 @@ Resources: Action: batch:SubmitJob Resource: - !Ref JobQueueArn - {% for name in compute_env_names %} + {% for name in compute_envs %} - !Ref {{ name }}JobQueueArn {% endfor %} {% for job_type, job_spec in job_types.items() %} diff --git a/job_spec/ARIA_AUTORIFT.yml b/job_spec/ARIA_AUTORIFT.yml index fcfe08748..39adeb7a9 100644 --- a/job_spec/ARIA_AUTORIFT.yml +++ b/job_spec/ARIA_AUTORIFT.yml @@ -55,9 +55,7 @@ AUTORIFT: - ITS_LIVE_OD - Ref::granules timeout: 10800 - compute_environment: - name: 'AriaAutorift' - instance_types: r6id.xlarge,r6id.2xlarge,r6id.4xlarge,r6id.8xlarge,r6idn.xlarge,r6idn.2xlarge,r6idn.4xlarge,r6idn.8xlarge + compute_environment: AriaAutorift vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/ARIA_RAIDER.yml b/job_spec/ARIA_RAIDER.yml index 951491f5a..fc41b41ea 100644 --- a/job_spec/ARIA_RAIDER.yml +++ b/job_spec/ARIA_RAIDER.yml @@ -38,8 +38,7 @@ ARIA_RAIDER: - --input-bucket-prefix - Ref::job_id timeout: 10800 - compute_environment: - import: 'Default' + compute_environment: Default vcpu: 1 memory: 7500 secrets: diff --git a/job_spec/AUTORIFT.yml b/job_spec/AUTORIFT.yml index 7797fd52a..0943fd578 100644 --- a/job_spec/AUTORIFT.yml +++ b/job_spec/AUTORIFT.yml @@ -52,8 +52,7 @@ AUTORIFT: - ITS_LIVE_OD - Ref::granules timeout: 10800 - compute_environment: - import: 'Default' + compute_environment: Default vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/AUTORIFT_ITS_LIVE.yml b/job_spec/AUTORIFT_ITS_LIVE.yml index ecfeafc74..698d53ee3 100644 --- a/job_spec/AUTORIFT_ITS_LIVE.yml +++ b/job_spec/AUTORIFT_ITS_LIVE.yml @@ -66,8 +66,7 @@ AUTORIFT: - ITS_LIVE_PROD - Ref::granules timeout: 10800 - compute_environment: - import: 'Default' + compute_environment: Default vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/INSAR_GAMMA.yml b/job_spec/INSAR_GAMMA.yml index 37f4c5fa0..7a6473bac 100644 --- a/job_spec/INSAR_GAMMA.yml +++ b/job_spec/INSAR_GAMMA.yml @@ -111,8 +111,7 @@ INSAR_GAMMA: - Ref::phase_filter_parameter - Ref::granules timeout: 10800 - compute_environment: - import: 'Default' + compute_environment: Default vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index 97b33becd..7c9ee8a5c 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -119,8 +119,7 @@ INSAR_ISCE: - --unfiltered-coherence - Ref::unfiltered_coherence timeout: 21600 - compute_environment: - import: 'InsarIsceAria' + compute_environment: InsarIsceAria vcpu: 1 memory: 15500 secrets: @@ -140,8 +139,7 @@ INSAR_ISCE: - --weather-model - Ref::weather_model timeout: 10800 - compute_environment: - import: 'InsarIsceAria' + compute_environment: InsarIsceAria vcpu: 1 memory: 7500 secrets: diff --git a/job_spec/INSAR_ISCE_BURST.yml b/job_spec/INSAR_ISCE_BURST.yml index 8a464acc6..77c8b254d 100644 --- a/job_spec/INSAR_ISCE_BURST.yml +++ b/job_spec/INSAR_ISCE_BURST.yml @@ -59,8 +59,7 @@ INSAR_ISCE_BURST: - Ref::looks - Ref::granules timeout: 5400 - compute_environment: - import: 'Default' + compute_environment: Default vcpu: 1 memory: 7600 secrets: diff --git a/job_spec/INSAR_ISCE_MULTI_BURST.yml b/job_spec/INSAR_ISCE_MULTI_BURST.yml index 289421c4b..0ce01289c 100644 --- a/job_spec/INSAR_ISCE_MULTI_BURST.yml +++ b/job_spec/INSAR_ISCE_MULTI_BURST.yml @@ -75,8 +75,7 @@ INSAR_ISCE_MULTI_BURST: - --secondary - Ref::secondary timeout: 126000 # 35 hours - compute_environment: - import: 'Default' + compute_environment: Default vcpu: 1 memory: 4 # Memory is always overridden by the step function secrets: diff --git a/job_spec/RTC_GAMMA.yml b/job_spec/RTC_GAMMA.yml index dcc5a8496..36277f549 100644 --- a/job_spec/RTC_GAMMA.yml +++ b/job_spec/RTC_GAMMA.yml @@ -134,8 +134,7 @@ RTC_GAMMA: - Ref::include_rgb - Ref::granules timeout: 36000 - compute_environment: - import: 'Default' + compute_environment: Default vcpu: 1 memory: 31500 secrets: diff --git a/job_spec/S1_CORRECTION_ITS_LIVE.yml b/job_spec/S1_CORRECTION_ITS_LIVE.yml index 16bdb9944..ccddb38d9 100644 --- a/job_spec/S1_CORRECTION_ITS_LIVE.yml +++ b/job_spec/S1_CORRECTION_ITS_LIVE.yml @@ -44,8 +44,7 @@ S1_CORRECTION_TEST: - '/vsicurl/http://its-live-data.s3.amazonaws.com/autorift_parameters/v001/autorift_landice_0120m.shp' - Ref::granules timeout: 10800 - compute_environment: - import: 'Default' + compute_environment: Default vcpu: 1 memory: 15750 secrets: diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 7f6ed1d3b..92421982e 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -55,8 +55,7 @@ SRG_GSLC: - Ref::bucket_prefix - Ref::granules timeout: 10800 - compute_environment: - import: 'SrgGslc' + compute_environment: SrgGslc vcpu: 1 gpu: 1 memory: 30500 diff --git a/job_spec/SRG_TIME_SERIES.yml b/job_spec/SRG_TIME_SERIES.yml index b330cd088..f073a338c 100644 --- a/job_spec/SRG_TIME_SERIES.yml +++ b/job_spec/SRG_TIME_SERIES.yml @@ -58,8 +58,7 @@ SRG_TIME_SERIES: - --use-gslc-prefix - Ref::granule timeout: 10800 - compute_environment: - import: SrgGslc + compute_environment: SrgGslc vcpu: 1 gpu: 1 memory: 30500 @@ -79,7 +78,6 @@ SRG_TIME_SERIES: - Ref::bucket_prefix - --use-gslc-prefix timeout: 21600 # 6 hr - compute_environment: - import: Default + compute_environment: Default vcpu: 1 memory: 30500 diff --git a/job_spec/WATER_MAP.yml b/job_spec/WATER_MAP.yml index 599a54a1b..70ac8c352 100644 --- a/job_spec/WATER_MAP.yml +++ b/job_spec/WATER_MAP.yml @@ -147,8 +147,7 @@ WATER_MAP: - 'copernicus' - Ref::granules timeout: 36000 - compute_environment: - import: 'Default' + compute_environment: Default vcpu: 1 memory: 31500 secrets: @@ -174,8 +173,7 @@ WATER_MAP: - --membership-threshold - Ref::membership_threshold timeout: 36000 - compute_environment: - import: 'Default' + compute_environment: Default vcpu: 1 memory: 126000 - name: FLOOD_MAP @@ -202,7 +200,6 @@ WATER_MAP: - --minimization-metric - Ref::minimization_metric timeout: 86400 - compute_environment: - import: 'Default' + compute_environment: Default vcpu: 1 memory: 126000 diff --git a/job_spec/WATER_MAP_EQ.yml b/job_spec/WATER_MAP_EQ.yml index 37561e499..deeb575ee 100644 --- a/job_spec/WATER_MAP_EQ.yml +++ b/job_spec/WATER_MAP_EQ.yml @@ -89,8 +89,7 @@ WATER_MAP_EQ: - 'copernicus' - Ref::granules timeout: 36000 - compute_environment: - import: 'Default' + compute_environment: Default vcpu: 1 memory: 31500 secrets: @@ -110,7 +109,6 @@ WATER_MAP_EQ: - --hand-fraction - Ref::hand_fraction timeout: 36000 - compute_environment: - import: 'Default' + compute_environment: Default vcpu: 1 memory: 126000 diff --git a/job_spec/config/compute_environments.yml b/job_spec/config/compute_environments.yml index 3d14fab9f..01de096b7 100644 --- a/job_spec/config/compute_environments.yml +++ b/job_spec/config/compute_environments.yml @@ -1,33 +1,14 @@ -# Compute Environments that are shared between multiple jobs and/or steps should be defined here, -# rather than in the job spec. The format is the same, except the name becomes the key: - -# ExampleComputeEnvName: -# instance_types: ... -# ami_id: ... -# allocation_type: ... -# allocation_strategy: ... - -# Once the environment is defined here, it can be imported in the job spec step -# by using the `import` key: - -# JOB_NAME: -# ... -# steps: -# step_name: -# ... -# compute_environment: -# import: 'ExampleComputeEnvName' - -# Note that the name must be unique, i.e. a compute environment that is defined in a job spec using -# the `name` key cannot also be defined here, unless it is not imported in the same deployment (envs here -# are only rendered if they are imported). Also note that the name should be in pascal case to avoid -# cfn-lint errors. - compute_environments: - # Default is defined per-deployment in the deployment actions. + # Supported fields: + # instance_types + # ami_id + # allocation_type + # allocation_strategy SrgGslc: instance_types: g6.2xlarge ami_id: ami-0729c079aae647cb3 # /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id + AriaAutorift: + instance_types: r6id.xlarge,r6id.2xlarge,r6id.4xlarge,r6id.8xlarge,r6idn.xlarge,r6idn.2xlarge,r6idn.4xlarge,r6idn.8xlarge InsarIsceAria: allocation_type: EC2 allocation_strategy: BEST_FIT_PROGRESSIVE From e258b70d60a7e8b1c98dae1b10acaad2b9dd3a29 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 21 Oct 2024 15:53:25 -0800 Subject: [PATCH 153/163] combine test files --- tests/test_render_cf.py | 26 ++++++++++++++++++++++++++ tests/test_render_cf_2.py | 27 --------------------------- 2 files changed, 26 insertions(+), 27 deletions(-) delete mode 100644 tests/test_render_cf_2.py diff --git a/tests/test_render_cf.py b/tests/test_render_cf.py index beeb8959d..24620f70c 100644 --- a/tests/test_render_cf.py +++ b/tests/test_render_cf.py @@ -3,7 +3,33 @@ import yaml +def test_parse_map_statement(): + assert render_cf.parse_map_statement('for item in items') == ('item', 'items') + assert render_cf.parse_map_statement('for foo in bar') == ('foo', 'bar') + + with pytest.raises(ValueError, match='expected 4 tokens in map statement but got 3: item in items'): + render_cf.parse_map_statement('item in items') + + with pytest.raises(ValueError, match='expected 4 tokens in map statement but got 5: for for item in items'): + render_cf.parse_map_statement('for for item in items') + + with pytest.raises(ValueError, match="expected 'for', got 'fr': fr item in items"): + render_cf.parse_map_statement('fr item in items') + + with pytest.raises(ValueError, match="expected 'in', got 'ib': for item ib items"): + render_cf.parse_map_statement('for item ib items') + + +def test_get_batch_job_parameters(): + assert False + + +def test_get_batch_param_names_for_job_step(): + assert False + + def test_get_compute_environments(tmp_path): + # TODO update job_types = { 'FOO': { 'steps': [ diff --git a/tests/test_render_cf_2.py b/tests/test_render_cf_2.py deleted file mode 100644 index 357092231..000000000 --- a/tests/test_render_cf_2.py +++ /dev/null @@ -1,27 +0,0 @@ -import pytest - -import render_cf - -def test_parse_map_statement(): - assert render_cf.parse_map_statement('for item in items') == ('item', 'items') - assert render_cf.parse_map_statement('for foo in bar') == ('foo', 'bar') - - with pytest.raises(ValueError, match='expected 4 tokens in map statement but got 3: item in items'): - render_cf.parse_map_statement('item in items') - - with pytest.raises(ValueError, match='expected 4 tokens in map statement but got 5: for for item in items'): - render_cf.parse_map_statement('for for item in items') - - with pytest.raises(ValueError, match="expected 'for', got 'fr': fr item in items"): - render_cf.parse_map_statement('fr item in items') - - with pytest.raises(ValueError, match="expected 'in', got 'ib': for item ib items"): - render_cf.parse_map_statement('for item ib items') - - -def test_get_batch_job_parameters(): - assert False - - -def test_get_batch_param_names_for_job_step(): - assert False From 7c0557e757a8ed35583f01af57e781f2cec17ed6 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 21 Oct 2024 16:34:25 -0800 Subject: [PATCH 154/163] update a test --- apps/render_cf.py | 2 +- tests/test_render_cf.py | 119 +++++++++++++--------------------------- 2 files changed, 39 insertions(+), 82 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 945f9df57..c3958744b 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -194,7 +194,7 @@ def get_compute_environments_for_deployment(job_types: dict, compute_env_file: P compute_envs = yaml.safe_load(compute_env_file.read_text())['compute_environments'] if 'Default' in compute_envs: - raise ValueError(f"'Default' is a reserved compute environment name") + raise ValueError("'Default' is a reserved compute environment name") return { step['compute_environment']: compute_envs[step['compute_environment']] diff --git a/tests/test_render_cf.py b/tests/test_render_cf.py index 24620f70c..4ae98cfb8 100644 --- a/tests/test_render_cf.py +++ b/tests/test_render_cf.py @@ -29,100 +29,57 @@ def test_get_batch_param_names_for_job_step(): def test_get_compute_environments(tmp_path): - # TODO update job_types = { 'FOO': { 'steps': [ - { - 'compute_environment': { - 'name': 'compute_environment_1', - 'intance_types': ['type1', 'type2'] - } - }, - {'compute_environment': {'import': 'Default'}} - ] + {'compute_environment': 'ComputeEnvironment1'}, + {'compute_environment': 'Default'}, + ], }, 'BAR': { 'steps': [ - {'compute_environment': {'import': 'compute_environment_2'}}, - { - 'compute_environment': { - 'name': 'compute_environment_3', - 'allocation_type': 'alloc_type_1', - 'allocation_strategy': 'alloc_strat_1' - } - } - ] - } + {'compute_environment': 'ComputeEnvironment2'}, + ], + }, + 'BAZ': { + 'steps': [ + {'compute_environment': 'ComputeEnvironment1'}, + {'compute_environment': 'ComputeEnvironment2'}, + ], + }, } - compute_env_file = { + compute_env_file_contents = { 'compute_environments': { - 'compute_environment_2': { - 'instance_types': ['type_3'], - 'ami_id': 'ami_id_1', - 'allocation_type': 'alloc_type_2', - 'allocation_strategy': 'alloc_strat_2' - }, - 'compute_environment_4': {'instance_types': ['type_4']} + 'ComputeEnvironment1': {'key1': 'value1'}, + 'ComputeEnvironment2': {'key2': 'value2'}, + 'ComputeEnvironment3': {'key3': 'value3'}, } } - expected_compute_envs = [ - { - 'name': 'compute_environment_1', - 'intance_types': ['type1', 'type2'] - }, - { - 'name': 'compute_environment_3', - 'allocation_type': 'alloc_type_1', - 'allocation_strategy': 'alloc_strat_1' - }, - { - 'name': 'compute_environment_2', - 'instance_types': ['type_3'], - 'ami_id': 'ami_id_1', - 'allocation_type': 'alloc_type_2', - 'allocation_strategy': 'alloc_strat_2' - } - ] - compute_env_filepath = tmp_path / 'compute_environments.yml' - yaml.dump(compute_env_file, open(compute_env_filepath, 'w')) - compute_envs = render_cf.get_compute_environments(job_types, compute_env_filepath) - assert compute_envs == expected_compute_envs - - job_types_redefined_default = { - 'FOO': {'steps': [{'compute_environment': {'name': 'Default'}}]}} - with pytest.raises(ValueError, match=r'.*defined more than once: Default*'): - compute_envs = render_cf.get_compute_environments(job_types_redefined_default) - - job_types_duplicate_env = { - 'FOO': {'steps': [{'compute_environment': {'name': 'compute_environment_1'}}]}, - 'BAR': {'steps': [{'compute_environment': {'name': 'compute_environment_1'}}]} + expected_compute_envs = { + 'ComputeEnvironment1': {'key1': 'value1'}, + 'ComputeEnvironment2': {'key2': 'value2'}, } - with pytest.raises(ValueError, match=r'.*defined more than once: compute_environment_1*'): - compute_envs = render_cf.get_compute_environments(job_types_duplicate_env) + compute_env_file = tmp_path / 'compute_environments.yml' + yaml.dump(compute_env_file_contents, open(compute_env_file, 'w')) + assert render_cf.get_compute_environments_for_deployment(job_types, compute_env_file) == expected_compute_envs - job_types_import_undefined = { - 'FOO': {'steps': [{'compute_environment': {'import': 'undefined_compute_environment'}}]} + compute_env_file_contents = { + 'compute_environments': { + 'ComputeEnvironment1': {'key1': 'value1'}, + 'ComputeEnvironment2': {'key2': 'value2'}, + 'ComputeEnvironment3': {'key3': 'value3'}, + 'Default': {'key', 'value'}, + } } - with pytest.raises(ValueError, match=r'.*not defined in the compute envs file: undefined_compute_environment*'): - compute_envs = render_cf.get_compute_environments(job_types_import_undefined, compute_env_filepath) - with pytest.raises(ValueError, match=r'.*no compute env file was provided: {\'undefined_compute_environment\'}*'): - compute_envs = render_cf.get_compute_environments(job_types_import_undefined) - - compute_env_file_redefined_default = {'compute_environments': {'Default': {}}} - yaml.dump(compute_env_file_redefined_default, open(compute_env_filepath, 'w')) - with pytest.raises(ValueError, match=r'.*defined more than once: Default*'): - compute_envs = render_cf.get_compute_environments(job_types, compute_env_filepath) + yaml.dump(compute_env_file_contents, open(compute_env_file, 'w')) + with pytest.raises(ValueError, match="'Default' is a reserved compute environment name"): + render_cf.get_compute_environments_for_deployment(job_types, compute_env_file) - job_types = { - 'FOO': { - 'steps': [ - {'compute_environment': {'name': 'compute_environment_1'}}, - {'compute_environment': {'import': 'compute_environment_1'}} - ] + compute_env_file_contents = { + 'compute_environments': { + 'ComputeEnvironment1': {'key1': 'value1'}, } } - compute_env_file_duplicate = {'compute_environments': {'compute_environment_1': {}}} - yaml.dump(compute_env_file_duplicate, open(compute_env_filepath, 'w')) - with pytest.raises(ValueError, match=r'.*defined more than once: compute_environment_1*'): - compute_envs = render_cf.get_compute_environments(job_types, compute_env_filepath) + yaml.dump(compute_env_file_contents, open(compute_env_file, 'w')) + with pytest.raises(KeyError, match='ComputeEnvironment2'): + render_cf.get_compute_environments_for_deployment(job_types, compute_env_file) From 25e91e8709f87bc029f79cfa49e75fb5ded62c80 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 21 Oct 2024 16:49:28 -0800 Subject: [PATCH 155/163] implement test --- tests/test_render_cf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_render_cf.py b/tests/test_render_cf.py index 4ae98cfb8..cdeb82fb7 100644 --- a/tests/test_render_cf.py +++ b/tests/test_render_cf.py @@ -25,7 +25,8 @@ def test_get_batch_job_parameters(): def test_get_batch_param_names_for_job_step(): - assert False + step = {'command': ['param1', 'Ref::param2', 'Ref::param3', 'Ref::param2', 'param4', 'Ref::param5']} + assert render_cf.get_batch_param_names_for_job_step(step) == {'param2', 'param3', 'param5'} def test_get_compute_environments(tmp_path): From 25d9b7e785b955785cd888b25e6a57be48bccd93 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 21 Oct 2024 17:30:06 -0800 Subject: [PATCH 156/163] implement a test and improve function --- apps/render_cf.py | 18 +++++++++--------- tests/test_render_cf.py | 20 +++++++++++++++++++- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index c3958744b..3d5986d73 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -136,16 +136,16 @@ def parse_map_statement(map_statement: str) -> tuple[str, str]: def get_batch_job_parameters(job_spec: dict, step: dict, map_item: str = None) -> dict: - job_params = ['bucket_prefix', *job_spec['parameters'].keys()] + job_params = {'bucket_prefix', *job_spec['parameters'].keys()} step_params = get_batch_param_names_for_job_step(step) - batch_params = { - f'{param}.$': f'$.batch_job_parameters.{param}' - for param in job_params - if param in step_params - } - if map_item is not None: - assert map_item in step_params - batch_params[f'{map_item}.$'] = '$$.Map.Item.Value' + batch_params = {} + for param in step_params: + if param == map_item: + batch_params[f'{map_item}.$'] = '$$.Map.Item.Value' + else: + if param not in job_params: + raise ValueError(f"job parameter '{param}' has not been defined") + batch_params[f'{param}.$'] = f'$.batch_job_parameters.{param}' return batch_params diff --git a/tests/test_render_cf.py b/tests/test_render_cf.py index cdeb82fb7..ecb27bacd 100644 --- a/tests/test_render_cf.py +++ b/tests/test_render_cf.py @@ -21,7 +21,25 @@ def test_parse_map_statement(): def test_get_batch_job_parameters(): - assert False + job_spec = {'parameters': {'param1': {}, 'param2': {}, 'param3': {}, 'param4': {}}} + + step = {'command': ['foo', 'Ref::param2', 'Ref::param3', 'bar', 'Ref::bucket_prefix']} + assert render_cf.get_batch_job_parameters(job_spec, step) == { + 'param2.$': '$.batch_job_parameters.param2', + 'param3.$': '$.batch_job_parameters.param3', + 'bucket_prefix.$': '$.batch_job_parameters.bucket_prefix', + } + + step = {'command': ['foo', 'Ref::param2', 'Ref::param3', 'bar', 'Ref::param5']} + assert render_cf.get_batch_job_parameters(job_spec, step, map_item='param5') == { + 'param2.$': '$.batch_job_parameters.param2', + 'param3.$': '$.batch_job_parameters.param3', + 'param5.$': '$$.Map.Item.Value', + } + + step = {'command': ['foo', 'Ref::param2', 'Ref::param3', 'bar', 'Ref::param5']} + with pytest.raises(ValueError, match="job parameter 'param5' has not been defined"): + render_cf.get_batch_job_parameters(job_spec, step) def test_get_batch_param_names_for_job_step(): From 7f86585bea076caf3312599d4e11ecaa436c6c02 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 21 Oct 2024 17:57:40 -0800 Subject: [PATCH 157/163] adjust newline --- apps/compute-cf.yml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index aeb4cd099..fab82396b 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -35,8 +35,8 @@ Outputs: {{ name }}JobQueueArn: Value: !Ref {{ name }}JobQueue - {% endfor %} + {% endfor %} TaskRoleArn: Value: !GetAtt TaskRole.Arn From 631e8a65dd6be510fb221e9eea9ea8c023bb697c Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 22 Oct 2024 11:12:42 -0800 Subject: [PATCH 158/163] update changelog --- CHANGELOG.md | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15941ede5..18de461ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,14 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Changes to custom compute environments: - Custom compute environments are now applied to individual job steps rather than to entire jobs. The `compute_environment` field is now provided at the step level rather than at the top level of the job spec. - - Custom compute environments can optionally be defined within `job_spec/config/compute_environments.yml`. Job steps can import these environments using the following syntax: - ```yaml - compute_environment: - import: MyComputeEnvironment - ``` - If the `import` value is `Default`, then the job step uses the deployment's default compute environment. - - The `compute_environment` field can still be used to define a custom compute environment directly within the job spec, as before. + - If the value of the `compute_environment` field is `Default`, then the step uses the deployment's default compute environment. Otherwise, the value must be the name of a custom compute environment defined in `job_spec/config/compute_environments.yml`. - Other changes to the job spec syntax: - The `tasks` field has been renamed to `steps`. - Job parameters no longer contain a top-level `default` field. The `default` field within each parameter's `api_schema` mapping is still supported. From d2cb0aeea2145403b0589a6ae11ceddc6d682199 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 22 Oct 2024 12:03:32 -0800 Subject: [PATCH 159/163] revert SRG changes to multi-burst-sandbox --- .github/workflows/deploy-multi-burst-sandbox.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy-multi-burst-sandbox.yml b/.github/workflows/deploy-multi-burst-sandbox.yml index 5c26b891e..95eea8fde 100644 --- a/.github/workflows/deploy-multi-burst-sandbox.yml +++ b/.github/workflows/deploy-multi-burst-sandbox.yml @@ -3,7 +3,7 @@ name: Deploy Multi-Burst Sandbox Stack to AWS on: push: branches: - - srg + - multi-burst-sandbox concurrency: ${{ github.workflow }}-${{ github.ref }} @@ -25,8 +25,11 @@ jobs: deploy_ref: refs/heads/multi-burst-sandbox job_files: >- job_spec/INSAR_ISCE_BURST.yml - job_spec/SRG_GSLC.yml - job_spec/SRG_TIME_SERIES.yml + job_spec/INSAR_ISCE_MULTI_BURST.yml + job_spec/AUTORIFT.yml + job_spec/RTC_GAMMA.yml + job_spec/WATER_MAP.yml + job_spec/WATER_MAP_EQ.yml instance_types: r6id.xlarge,r6id.2xlarge,r6id.4xlarge,r6id.8xlarge,r6idn.xlarge,r6idn.2xlarge,r6idn.4xlarge,r6idn.8xlarge default_max_vcpus: 640 expanded_max_vcpus: 640 From 09f6acd6edebb227972934201559636bc39ab441 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 22 Oct 2024 16:35:44 -0400 Subject: [PATCH 160/163] add boundsvalidationerror --- apps/api/src/hyp3_api/handlers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/hyp3_api/handlers.py b/apps/api/src/hyp3_api/handlers.py index 393fc4ac6..fbe92de96 100644 --- a/apps/api/src/hyp3_api/handlers.py +++ b/apps/api/src/hyp3_api/handlers.py @@ -6,7 +6,7 @@ import dynamo from dynamo.exceptions import AccessCodeError, InsufficientCreditsError, UnexpectedApplicationStatusError from hyp3_api import util -from hyp3_api.validation import GranuleValidationError, validate_jobs +from hyp3_api.validation import BoundsValidationError, GranuleValidationError, validate_jobs def problem_format(status, message): @@ -28,7 +28,7 @@ def post_jobs(body, user): validate_jobs(body['jobs']) except requests.HTTPError as e: print(f'WARN: CMR search failed: {e}') - except GranuleValidationError as e: + except (BoundsValidationError, GranuleValidationError) as e: abort(problem_format(400, str(e))) try: From 6fb1c8d24ebb2e4aebecd73746fe7c4c6bd69a78 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 22 Oct 2024 16:40:33 -0400 Subject: [PATCH 161/163] remove do extension --- apps/render_cf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index 3d5986d73..37f248602 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -168,7 +168,6 @@ def render_templates(job_types: dict, compute_envs: dict, security_environment: trim_blocks=True, lstrip_blocks=True, keep_trailing_newline=True, - extensions=['jinja2.ext.do'], ) for template_file in Path('.').glob('**/*.j2'): From b49b4e18d6d210b0efd066c6f036426cfa18dda8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 19:17:39 +0000 Subject: [PATCH 162/163] Bump cfn-lint from 1.16.1 to 1.18.1 Bumps [cfn-lint](https://github.com/aws-cloudformation/cfn-lint) from 1.16.1 to 1.18.1. - [Release notes](https://github.com/aws-cloudformation/cfn-lint/releases) - [Changelog](https://github.com/aws-cloudformation/cfn-lint/blob/main/CHANGELOG.md) - [Commits](https://github.com/aws-cloudformation/cfn-lint/compare/v1.16.1...v1.18.1) --- updated-dependencies: - dependency-name: cfn-lint dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-all.txt b/requirements-all.txt index 1bc0c4e66..61c66c2ab 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -17,4 +17,4 @@ flake8-blind-except==0.2.1 flake8-builtins==2.5.0 setuptools==75.2.0 openapi-spec-validator==0.7.1 -cfn-lint==1.16.1 +cfn-lint==1.18.1 From 19471a8742ffcdac07ac95d3882f1b9e7686ad5a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 19:21:53 +0000 Subject: [PATCH 163/163] Bump moto[dynamodb] from 5.0.17 to 5.0.18 Bumps [moto[dynamodb]](https://github.com/getmoto/moto) from 5.0.17 to 5.0.18. - [Release notes](https://github.com/getmoto/moto/releases) - [Changelog](https://github.com/getmoto/moto/blob/master/CHANGELOG.md) - [Commits](https://github.com/getmoto/moto/compare/5.0.17...5.0.18) --- updated-dependencies: - dependency-name: moto[dynamodb] dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-all.txt b/requirements-all.txt index 1bc0c4e66..4e154c1b5 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -7,7 +7,7 @@ -r requirements-apps-update-db.txt boto3==1.35.44 jinja2==3.1.4 -moto[dynamodb]==5.0.17 +moto[dynamodb]==5.0.18 pytest==8.3.3 PyYAML==6.0.2 responses==0.25.3