diff --git a/research_datastream/terraform/GETTING_STARTED.md b/research_datastream/terraform/GETTING_STARTED.md index f72757e72..ee4de37c2 100644 --- a/research_datastream/terraform/GETTING_STARTED.md +++ b/research_datastream/terraform/GETTING_STARTED.md @@ -100,11 +100,12 @@ Starting from execution_template_general_purpose. Make sure to wrap commands in ``` ### Edit Run Options -The state machine is capable of confirming a complete execution by checking for the existence output data in the form of an s3 object. Set booleans here. If `s3_bucket` and `s3_prefix` are provided in `datastream_command_options`, `ngen-datastream` will create a `ngen-run.tar.gz` file that can be found at `s3:////ngen-run.tar.gz` +The state machine is capable of confirming a complete execution by checking for the existence output data in the form of an s3 object. Set booleans here. If `s3_bucket` and `s3_prefix` are provided in `datastream_command_options`, `ngen-datastream` will create a `ngen-run.tar.gz` file that can be found at `s3:////ngen-run.tar.gz`. `timeout_s` is a timeout for the commands issued during execution. This is valuable for shutting down hanging instances that may become unresponsive due to memory overflow, etc. Default is 3600. ``` "run_options":{ "ii_delete_volume" : false, - "ii_check_s3" : true + "ii_check_s3" : true, + "timeout_s" : 3600 }, ``` diff --git a/research_datastream/terraform/executions/execution_datastream_example.json b/research_datastream/terraform/executions/execution_datastream_example.json index 773f7103d..72d04d904 100644 --- a/research_datastream/terraform/executions/execution_datastream_example.json +++ b/research_datastream/terraform/executions/execution_datastream_example.json @@ -13,7 +13,8 @@ }, "run_options":{ "ii_delete_volume" : true, - "ii_check_s3" : true + "ii_check_s3" : true, + "timeout_s" : 3600 }, "instance_parameters": { "ImageId": "ami-062bdcbb454b8d833", diff --git a/research_datastream/terraform/executions/execution_gp_example.json b/research_datastream/terraform/executions/execution_gp_example.json index 1db212df8..3e6c7e86c 100644 --- a/research_datastream/terraform/executions/execution_gp_example.json +++ b/research_datastream/terraform/executions/execution_gp_example.json @@ -4,11 +4,12 @@ ], "run_options":{ "ii_delete_volume" : false, - "ii_check_s3" : false + "ii_check_s3" : false, + "timeout_s" : 3600 }, "instance_parameters" : { - "ImageId" : "ami-03b72f226b125860d", + "ImageId" : "ami-07161bb3f4b6e5b6d", "InstanceType" : "t4g.large", "KeyName" : "jlaser_west2", "SecurityGroupIds" : ["sg-04365a4248fe126bc"], diff --git a/research_datastream/terraform/executions/execution_template_datastream.json b/research_datastream/terraform/executions/execution_template_datastream.json index 5b5cc5e51..080cbdabc 100644 --- a/research_datastream/terraform/executions/execution_template_datastream.json +++ b/research_datastream/terraform/executions/execution_template_datastream.json @@ -13,7 +13,8 @@ }, "run_options":{ "ii_delete_volume" : true, - "ii_check_s3" : true + "ii_check_s3" : true, + "timeout_s" : 3600 }, "instance_parameters": { "ImageId": "", diff --git a/research_datastream/terraform/lambda_functions/checker_lambda.zip b/research_datastream/terraform/lambda_functions/checker_lambda.zip deleted file mode 100644 index 8b1549618..000000000 Binary files a/research_datastream/terraform/lambda_functions/checker_lambda.zip and /dev/null differ diff --git a/research_datastream/terraform/lambda_functions/commander_lambda.zip b/research_datastream/terraform/lambda_functions/commander_lambda.zip deleted file mode 100644 index 7c2fcd703..000000000 Binary files a/research_datastream/terraform/lambda_functions/commander_lambda.zip and /dev/null differ diff --git a/research_datastream/terraform/lambda_functions/poller/lambda_function.py b/research_datastream/terraform/lambda_functions/poller/lambda_function.py index fdc7bcde6..fc39800e6 100644 --- a/research_datastream/terraform/lambda_functions/poller/lambda_function.py +++ b/research_datastream/terraform/lambda_functions/poller/lambda_function.py @@ -20,6 +20,7 @@ def lambda_handler(event, context): Generic Poller funcion """ t0 = time.perf_counter() + timeout_s = event['run_options']['timeout_s'] global client_ssm, client_ec2 client_ssm = boto3.client('ssm',region_name=event['region']) @@ -40,9 +41,13 @@ def lambda_handler(event, context): elif output['Status'] == 'InProgress': ii_pass = False print(f'Commands are still in progress. Waiting 5 seconds and checking again') - if (time.perf_counter() - t0) > 850: + if (time.perf_counter() - t0) > 800: print(f'Cycling...') ii_time = True + duration = time.time() - event['t0'] + if duration >= timeout_s: + print(f'Duration -> {duration}\nTimeout -> {timeout_s}') + raise Exception(f'Commands duration have exceed the timeout specified in the execution') time.sleep(5) else: raise Exception(f'Command failed {output}') diff --git a/research_datastream/terraform/lambda_functions/poller_lambda.zip b/research_datastream/terraform/lambda_functions/poller_lambda.zip deleted file mode 100644 index 54256bf28..000000000 Binary files a/research_datastream/terraform/lambda_functions/poller_lambda.zip and /dev/null differ diff --git a/research_datastream/terraform/lambda_functions/start_ami/lambda_function.py b/research_datastream/terraform/lambda_functions/start_ami/lambda_function.py index 0958ea543..9ac4fc57b 100644 --- a/research_datastream/terraform/lambda_functions/start_ami/lambda_function.py +++ b/research_datastream/terraform/lambda_functions/start_ami/lambda_function.py @@ -34,6 +34,12 @@ def replace_in_dict(d, pattern, replacement): def lambda_handler(event, context): + t0 = time.time() + event['t0'] = t0 + if not "timeout_s" in event['run_options']: + print(f'Setting timeout_s to default 3600 seconds') + event['run_options']['timeout_s'] = 3600 + event['region'] = os.environ['AWS_REGION'] global client_ec2 client_ec2 = boto3.client('ec2',region_name=event['region']) diff --git a/research_datastream/terraform/lambda_functions/starter_lambda.zip b/research_datastream/terraform/lambda_functions/starter_lambda.zip deleted file mode 100644 index bda47efd5..000000000 Binary files a/research_datastream/terraform/lambda_functions/starter_lambda.zip and /dev/null differ diff --git a/research_datastream/terraform/lambda_functions/stopper_lambda.zip b/research_datastream/terraform/lambda_functions/stopper_lambda.zip deleted file mode 100644 index 01ba661c4..000000000 Binary files a/research_datastream/terraform/lambda_functions/stopper_lambda.zip and /dev/null differ