Skip to content

Commit

Permalink
Merge pull request #133 from RLOpenCatalyst/topic-release
Browse files Browse the repository at this point in the history
Topic release
  • Loading branch information
ravigurram8 authored Feb 21, 2023
2 parents 6cc0a6a + c7556c4 commit 48eef3a
Show file tree
Hide file tree
Showing 56 changed files with 410 additions and 23 deletions.
48 changes: 37 additions & 11 deletions cft-templates/sagemaker-template-with-url.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,20 @@ Description: 'AWS CloudFormation Sample Template SageMaker NotebookInstance: Thi
the creation of a SageMaker NotebookInstance with encryption. You will be billed for the AWS resources used if you create a stack from
this template. (fdp-1qj64b3fd)'
Parameters:
Namespace:
Type: String
Description: An environment name that will be prefixed to resource names
S3Mounts:
Type: String
Description: A JSON array of objects with name, bucket and prefix properties used to mount data
IamPolicyDocument:
Type: String
Description: The IAM policy to be associated with the launched workstation
EnvironmentInstanceFiles:
Type: String
Description: >-
An S3 URI (starting with "s3://") that specifies the location of files to be copied to
the environment instance, including any bootstrap scripts
NotebookInstanceType:
AllowedValues:
- ml.t2.medium
Expand All @@ -21,10 +35,15 @@ Parameters:
Default: ml.t3.medium
Description: Select Instance type for the SageMaker Notebook. e.g.ml.t3.medium
Type: String
Conditions:
IamPolicyEmpty: !Equals [!Ref IamPolicyDocument, '{}']

Resources:
SageMakerRole:
IAMRole:
Type: AWS::IAM::Role
Properties:
RoleName: !Join ['-', [Ref: Namespace, 'sagemaker-notebook-role']]
Path: '/'
AssumeRolePolicyDocument:
Version: 2012-10-17
Statement:
Expand All @@ -34,6 +53,12 @@ Resources:
- "sagemaker.amazonaws.com"
Action:
- "sts:AssumeRole"
Policies:
- !If
- IamPolicyEmpty
- !Ref 'AWS::NoValue'
- PolicyName: !Join ['-', [Ref: Namespace, 's3-studydata-policy']]
PolicyDocument: !Ref IamPolicyDocument
ManagedPolicyArns:
- "arn:aws:iam::aws:policy/AmazonSageMakerFullAccess"
- "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
Expand All @@ -42,8 +67,8 @@ Resources:
Type: "AWS::SageMaker::NotebookInstance"
Properties:
InstanceType: !Ref NotebookInstanceType
RoleArn: !GetAtt SageMakerRole.Arn
PlatformIdentifier: notebook-al2-v2
RoleArn: !GetAtt IAMRole.Arn
PlatformIdentifier: notebook-al2-v1
LifecycleConfigName: !GetAtt NotebookLifeCycleConfig.NotebookInstanceLifecycleConfigName
Tags:
- Key: cost_resource
Expand All @@ -57,10 +82,11 @@ Resources:
OnStart:
- Content:
Fn::Base64: !Sub |
#!/bin/bash

set -e

#!/usr/bin/env bash
aws s3 cp "${EnvironmentInstanceFiles}/get_bootstrap.sh" "/tmp"
chmod 500 "/tmp/get_bootstrap.sh"
/tmp/get_bootstrap.sh "${EnvironmentInstanceFiles}" '${S3Mounts}'

function get_tag()
{

Expand All @@ -80,7 +106,6 @@ Resources:
echo "export 'DATA_BUCKET'=$DATA_BUCKET" >> /etc/profile.d/jupyter-env.sh
fi


#Copy Sample notebook from S3

TEMPLATE_NOTEBOOK_BUCKET=$(get_tag "TEMPLATE_NOTEBOOK_BUCKET")
Expand All @@ -98,12 +123,13 @@ Resources:

systemctl restart jupyter-server



Outputs:
SageMakerNotebookInstanceARN:
Description: "ARN for the newly created SageMaker Notebook Instance"
Value: !Ref SageMakerNotebookInstance
NotebookInstanceName:
Description: "Name for the newly created SageMaker Notebook Instance"
Value: !GetAtt [SageMakerNotebookInstance, NotebookInstanceName]
Value: !GetAtt [SageMakerNotebookInstance, NotebookInstanceName]
WorkspaceInstanceRoleArn:
Description: IAM role assumed by the SageMaker workspace instance
Value: !GetAtt IAMRole.Arn
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ services:
- sp2net

cc-3102:
image: 045938549113.dkr.ecr.us-east-2.amazonaws.com/researchportal:_fd_1.16.0_b1600
image: 045938549113.dkr.ecr.us-east-2.amazonaws.com/researchportal:_fd_1.17.0_b1675
secrets:
- source: sp2prod-config.json
target: /rlc/cc/server/app/config/config.json
Expand All @@ -42,7 +42,7 @@ services:


scheduler-3102:
image: 045938549113.dkr.ecr.us-east-2.amazonaws.com/researchportal:_fd_1.16.0_b1600
image: 045938549113.dkr.ecr.us-east-2.amazonaws.com/researchportal:_fd_1.17.0_b1675
secrets:
- source: sp2prod-config.json
target: /rlc/cc/server/app/config/config.json
Expand Down
7 changes: 4 additions & 3 deletions dump/configs.json
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@
},
"key": "projectStorage",
"value": {
"productList": ["ec2", "rstudio", "nextflow", "cromwell", "ec2-dcv"],
"productList": ["ec2", "rstudio", "nextflow", "cromwell", "ec2-dcv", "sagemaker"],
"s3Mounts": {
"ProjectStorage": {
"id": "ProjectStorage",
Expand Down Expand Up @@ -404,10 +404,11 @@
"ec2",
"rstudio",
"nextflow",
"cromwell"
"cromwell",
"sagemaker"
]
},
"studySelectionCount" : 4
"studySelectionCount" : 5.0
},
{
"_id" : {
Expand Down
24 changes: 23 additions & 1 deletion dump/standardcatalogitems.json
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,29 @@
"availableRegions": [],
"assignedOU": [],
"metaData": {
"pre_provisioning": [],
"pre_provisioning": [
{
"code" : "CFT_PARAMS",
"params" : [
{
"name" : "EnvironmentInstanceFiles",
"type" : "RL::SC::PARAM::HD"
},
{
"name" : "IamPolicyDocument",
"type" : "RL::SC::PARAM::HD"
},
{
"name" : "S3Mounts",
"type" : "RL::SC::PARAM::HD"
},
{
"name" : "Namespace",
"type" : "RL::SC::PARAM::HD"
}
]
}
],
"post_provisioning": [],
"checks_before_assigning_product": [],
"checks_after_assigning_product": [],
Expand Down
2 changes: 1 addition & 1 deletion packer-rg.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"awsRegion": "",
"RG_HOME": "/opt/deploy/sp2",
"RG_SRC": "/home/ubuntu",
"amiName": "RG-AMI-1.16.0"
"amiName": "RG-AMI-1.17.0"
},
"builders": [
{
Expand Down
2 changes: 1 addition & 1 deletion provisioners/provision-ecr.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash -xe
sudo docker login -u AWS -p $(aws ecr get-login-password --region us-east-2) 045938549113.dkr.ecr.us-east-2.amazonaws.com
sudo docker pull 045938549113.dkr.ecr.us-east-2.amazonaws.com/researchportal:_fd_1.16.0_b1600
sudo docker pull 045938549113.dkr.ecr.us-east-2.amazonaws.com/researchportal:_fd_1.17.0_b1675
sudo docker pull 045938549113.dkr.ecr.us-east-2.amazonaws.com/nginx:latest
sudo docker pull 045938549113.dkr.ecr.us-east-2.amazonaws.com/notificationsink:1.15.0_b1
1 change: 1 addition & 0 deletions rg_document_db.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ Resources:
DBSubnetGroupName : !Ref DocumentDBSubnetGroup
VpcSecurityGroupIds:
- !GetAtt DocumentDBSecurityGroup.GroupId
StorageEncrypted : true
DBInstance:
Type: "AWS::DocDB::DBInstance"
Properties:
Expand Down
27 changes: 27 additions & 0 deletions scripts/bootstrap-scripts/bin/mount_s3.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ AWS_CONFIG_DIR="${HOME}/.aws"
# Exit if CONFIG doesn't exist or is 0 bytes
[ ! -s "$CONFIG" ] && exit 0

# Define a function to determine what type of environment this is (EMR, SageMaker, RStudio, or EC2 Linux)
env_type() {
if [ -d "/home/ec2-user/SageMaker" ]
then
printf "sagemaker"
fi
}

# Add roleArn for a study to credentials file if not present already
append_role_to_credentials() {
study_id=$1
Expand All @@ -32,6 +40,7 @@ append_role_to_credentials() {
fi
}

export AWS_SDK_LOAD_CONFIG=1
# Mount S3 buckets
mounts="$(cat "$CONFIG")"
num_mounts=$(printf "%s" "$mounts" | jq ". | length" -)
Expand Down Expand Up @@ -73,3 +82,21 @@ do
fi
fi
done

# Define where the Jupyter notebook (if any) should be running
notebook_dir=""
case "$(env_type)" in
"sagemaker")
notebook_dir="/home/ec2-user/SageMaker"
;;
esac

# Add a link to the mount in the notebook directory.
# (The user gets easy access, but it won't check the bucket into a git repo.)
# Only create a link if Jupyter is running, there are studies mounted, and the link
# doesn't already exist.
if [ -n "$notebook_dir" -a $num_mounts -ne 0 ]
then
symlink_name="$notebook_dir/studies"
[ ! -L "$symlink_name" ] && sudo ln -s "$MOUNT_DIR" "$symlink_name"
fi
82 changes: 79 additions & 3 deletions scripts/bootstrap-scripts/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ RSTUDIO_USER="$2"

# Get directory in which this script is stored and define URL from which to download goofys
FILES_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
GOOFYS_URL="https://github.com/kahing/goofys/releases/download/v0.21.0/goofys"
GOOFYS_URL="https://github.com/kahing/goofys/releases/download/v0.24.0/goofys"

# Define a function to determine what type of environment this is (RStudio, or EC2 Linux)
env_type() {
Expand All @@ -24,23 +24,74 @@ env_type() {
elif [ -f "/usr/bin/nextflow" ]
then
printf "nextflow"
elif [ -d "/home/ec2-user/SageMaker" ]
then
printf "sagemaker"
else
printf "ec2-linux"
fi
}

# Define a function to update Jupyter configuration files
update_jupyter_config() {

config_file="$1"

# HACK: Update the default SessionManager class used by Jupyter notebooks
# so that it runs the S3 mount script the first time sessions are listed
cat << EOF | cut -b5- >> "$config_file"
import subprocess
from notebook.services.sessions.sessionmanager import SessionManager as BaseSessionManager
class SessionManager(BaseSessionManager):
def list_sessions(self, *args, **kwargs):
"""Override default list_sessions() method"""
self.mount_studies()
result = super(SessionManager, self).list_sessions(*args, **kwargs)
return result
def mount_studies(self):
"""Execute mount_s3.sh if it hasn't already been run"""
if not hasattr(self, 'studies_mounted'):
mounting_result = subprocess.run(
"mount_s3.sh",
stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
# Log results
if mounting_result.stdout:
for line in mounting_result.stdout.decode("utf-8").split("\n"):
if line: # Skip empty lines
self.log.info(line)
self.studies_mounted = True
c.NotebookApp.session_manager_class = SessionManager
EOF
}

# Install dependencies
yum install -y jq-1.5
curl -LSs -o "/usr/local/bin/goofys" "$GOOFYS_URL"
echo "Installing JQ"
sudo mv "${FILES_DIR}/offline-packages/jq-1.5-linux64" "/usr/local/bin/jq"
chmod +x "/usr/local/bin/jq"
echo "Finish installing jq"

echo "Copying Goofys from bootstrap.sh"
cp "${FILES_DIR}/offline-packages/goofys" /usr/local/bin/goofys
chmod +x "/usr/local/bin/goofys"

# Install ec2 instance connect agent
sudo yum install ec2-instance-connect-1.1

# Create S3 mount script and config file
echo "Mounting S3"
chmod +x "${FILES_DIR}/bin/mount_s3.sh"
ln -s "${FILES_DIR}/bin/mount_s3.sh" "/usr/local/bin/mount_s3.sh"
printf "%s" "$S3_MOUNTS" > "/usr/local/etc/s3-mounts.json"
echo "Finish mounting S3"

OS_VERSION=`cat /etc/os-release | grep VERSION= | sed 's/VERSION="//' | sed 's/"//'`

# Apply updates to environments based on environment type
case "$(env_type)" in
Expand All @@ -56,6 +107,31 @@ case "$(env_type)" in
yum install -y fuse-2.9.2
printf "\n# Mount S3 study data\nmount_s3.sh\n\n" >> "/home/ec2-user/.bash_profile"
;;
"sagemaker") # Update config and restart Jupyter
if [ $OS_VERSION = '2' ]
then
echo "Installing fuse for AL2"
cd "${FILES_DIR}/offline-packages/sagemaker/fuse-2.9.4_AL2"
sudo yum --disablerepo=* localinstall -y *.rpm
echo "Finish installing fuse"
echo "Installing boto3 for AL2"
cd "${FILES_DIR}/offline-packages/sagemaker/boto3"
sudo yum --disablerepo=* localinstall -y python2-boto3-1.4.4-1.amzn2.noarch.rpm
echo "Finish installing boto3"
else
echo "Installing fuse for AL1"
cd "${FILES_DIR}/offline-packages/sagemaker/fuse-2.9.4"
sudo yum --disablerepo=* localinstall -y *.rpm
echo "Finish installing fuse"
fi
update_jupyter_config "/home/ec2-user/.jupyter/jupyter_notebook_config.py"
if [ $OS_VERSION = '2' ]
then
systemctl restart jupyter-server
else
initctl restart jupyter-server --no-wait
fi
;;
esac

exit 0
Loading

0 comments on commit 48eef3a

Please sign in to comment.