From a37ba7fe5dd217110cdfd0635570e5ae993509f4 Mon Sep 17 00:00:00 2001 From: Jordan Laser Date: Tue, 6 Feb 2024 07:29:30 -0700 Subject: [PATCH] cloud updates --- cloud/AWS/execution_dailyrun.json | 2 +- cloud/AWS/execution_dailyrun_arm.json | 39 ++++++++++++++++++++ cloud/AWS/poller/lambda_function.py | 40 +++++++++++---------- cloud/AWS/startup_ec2.sh | 13 +++++-- cloud/AWS/streamcommander/lambda_handler.py | 4 ++- 5 files changed, 75 insertions(+), 23 deletions(-) create mode 100644 cloud/AWS/execution_dailyrun_arm.json diff --git a/cloud/AWS/execution_dailyrun.json b/cloud/AWS/execution_dailyrun.json index a1040176..7856e8f7 100644 --- a/cloud/AWS/execution_dailyrun.json +++ b/cloud/AWS/execution_dailyrun.json @@ -33,7 +33,7 @@ }, "region" : "us-east-2", - "commands" : ["mount-s3 ngen-datastream /home/ec2-user/ngen-datastream/data/mount", "/home/ec2-user/ngen-datastream/scripts/stream.sh -c /home/ec2-user/ngen-datastream/configs/conf_datastream_daily.sh"], + "commands" : ["mount-s3 ngen-datastream /home/ec2-user/ngen-datastream/data/mount", "/home/ec2-user/ngen-datastream/scripts/stream.sh -c /home/ec2-user/ngen-datastream/configs/conf_datastream_daily.sh > /home/ec2-user/ngen-datastream/log.txt"], "bucket" : "ngen-datastream", "obj_key" : "daily/DATE/ngen-run/outputs/cat-1.csv" } diff --git a/cloud/AWS/execution_dailyrun_arm.json b/cloud/AWS/execution_dailyrun_arm.json new file mode 100644 index 00000000..af25c915 --- /dev/null +++ b/cloud/AWS/execution_dailyrun_arm.json @@ -0,0 +1,39 @@ +{ + "instance_parameters" : + { + "ImageId" : "ami-0f3181afcca1d96cd", + "InstanceType" : "hpc7g.16xlarge", + "KeyName" : "Processor", + "MinCount" : 1, + "MaxCount" : 1, + "SecurityGroupIds" : ["sg-0ab5f40d1eb97d163"], + "IamInstanceProfile" : { + "Name" : "Processor" + }, + "TagSpecifications" :[ + { + "ResourceType": "instance", + "Tags": [ + { + "Key" : "Name", + "Value" : "ngendatastream_DATE" + } + ] + } + ], + "BlockDeviceMappings":[ + { + "DeviceName": "/dev/xvda", + "Ebs": { + "VolumeSize": 64, + "VolumeType": "gp2" + } + } + ] + }, + + "region" : "us-east-1", + "commands" : ["mount-s3 ngen-datastream /home/ec2-user/ngen-datastream/data/mount", "/home/ec2-user/ngen-datastream/scripts/stream.sh -c /home/ec2-user/ngen-datastream/configs/conf_datastream_daily.sh > /home/ec2-user/ngen-datastream/log.txt"], + "bucket" : "ngen-datastream", + "obj_key" : "daily/DATE/ngen-run/outputs/cat-1.csv" +} diff --git a/cloud/AWS/poller/lambda_function.py b/cloud/AWS/poller/lambda_function.py index 997f2ad1..4d19a3fd 100644 --- a/cloud/AWS/poller/lambda_function.py +++ b/cloud/AWS/poller/lambda_function.py @@ -5,17 +5,16 @@ client_ssm = boto3.client('ssm') def get_command_result(command_id,instance_id): - for j in range(200): - try: - output = client_ssm.get_command_invocation( - CommandId=command_id, - InstanceId=instance_id, - ) - if output['Status'] in ['Success', 'Failed', 'Canceled']: - print(f'Command has completed -> {output}') - except: - print(f'waiting for command to finish...') - time.sleep(4) + + try: + output = client_ssm.get_command_invocation( + CommandId=command_id, + InstanceId=instance_id, + ) + if output['Status'] in ['Success', 'Failed', 'Canceled']: + print(f'Command has completed -> {output}') + except: + output = None return output @@ -29,14 +28,17 @@ def lambda_handler(event, context): output = get_command_result(command_id,instance_id) ii_pass = False - if output['Status'] == 'Success': - print(f'Command has succeeded!') - ii_pass = True - elif output['Status'] == 'InProgress': - ii_pass = False - else: - raise Exception(f'Command failed {output}') - ii_pass = False + for j in range(13): + output = get_command_result(command_id,instance_id) + if output['Status'] == 'Success': + print(f'Command has succeeded!') + ii_pass = True + break + elif output['Status'] == 'InProgress': + ii_pass = False + time.sleep(60) + else: + raise Exception(f'Command failed {output}') event['ii_pass'] = ii_pass return event diff --git a/cloud/AWS/startup_ec2.sh b/cloud/AWS/startup_ec2.sh index 7620b5ac..dbee5e87 100644 --- a/cloud/AWS/startup_ec2.sh +++ b/cloud/AWS/startup_ec2.sh @@ -1,8 +1,16 @@ #!/bin/bash +arch=$(dpkg --print-architecture) +if [ "$arch" == "amd64" ]; then + curl -L -O https://github.com/LynkerIntel/hfsubset/releases/download/hfsubset-release-12/hfsubset-linux_amd64.tar.gz + curl -L -O https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.rpm +else + echo "no release for hfsubset on arm64" + curl -L -O https://s3.amazonaws.com/mountpoint-s3-release/latest/arm64/mount-s3.rpm +fi sudo dnf update -y sudo dnf install git pip pigz awscli -y -curl -L -O https://github.com/LynkerIntel/hfsubset/releases/download/hfsubset-release-12/hfsubset-linux_amd64.tar.gz tar -xzvf hfsubset-linux_amd64.tar.gz +rm hfsubset-linux_amd64.tar.gz sudo mv hfsubset /usr/bin/hfsubset git clone https://github.com/CIROH-UA/ngen-datastream.git python3 -m pip install --upgrade pip @@ -16,7 +24,8 @@ sudo dnf -y install dnf-plugins-core sudo dnf install docker -y sudo systemctl start docker sudo usermod -aG docker ${USER} -curl -L -O https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.rpm +mkdir -p /home/ec2-user/ngen-datastream/data/mount + sudo yum install -y ./mount-s3.rpm echo "cd docker && sudo docker build -t awiciroh/ngen-deps:latest -f Dockerfile.ngen-deps --no-cache . && docker build -t awiciroh/t-route:latest -f Dockerfile.t-route . --no-cache && docker build -t awiciroh/ngen -f Dockerfile.ngen . --no-cache && docker build -t awiciroh/ciroh-ngen-image:latest-local -f Dockerfile . --no-cache" echo "copy that ^^ and log out of session, log back in and run that command" diff --git a/cloud/AWS/streamcommander/lambda_handler.py b/cloud/AWS/streamcommander/lambda_handler.py index 11622913..09ea3291 100644 --- a/cloud/AWS/streamcommander/lambda_handler.py +++ b/cloud/AWS/streamcommander/lambda_handler.py @@ -36,7 +36,9 @@ def lambda_handler(event, context): response = client_ssm.send_command( InstanceIds=[instance_id], DocumentName='AWS-RunShellScript', - Parameters={'commands': event['commands']} + Parameters={'commands': event['commands'], + "executionTimeout": [f"{3600*24}" for x in range(len(event['commands']))] + } ) wait_for_command_response(response,instance_id) print(f'{instance_id} is launched and processing forcings')