Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Migrate several env variables to SSM to prevent reaching lambda… #589

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lib/chatbot-api/functions/api-handler/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pydantic import ValidationError

from genai_core.types import CommonError
from genai_core.parameters import load_all_from_ssm
from routes.health import router as health_router
from routes.embeddings import router as embeddings_router
from routes.cross_encoders import router as cross_encoders_router
Expand Down Expand Up @@ -44,7 +45,9 @@
)
@tracer.capture_lambda_handler
def handler(event: dict, context: LambdaContext) -> dict:

try:
load_all_from_ssm()
logger.info(
"Incoming request for " + event["info"]["fieldName"],
arguments=event["arguments"],
Expand Down
77 changes: 57 additions & 20 deletions lib/chatbot-api/rest-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,36 @@ export class ApiResolvers extends Construct {
vpc: props.shared.vpc,
});

// Lambda has a size limit of 4KB in the env variables.
// To reduce the size, store verbose values in SSM Parameter store.
// Only a subset is using this method because env varibale are faster
const parameterStores: { [key: string]: string } = {
AURORA_DB_HOST:
props.ragEngines?.auroraPgVector?.database?.clusterEndpoint?.hostname ??
"None",
DELETE_WORKSPACE_WORKFLOW_ARN:
props.ragEngines?.deleteWorkspaceWorkflow?.stateMachineArn ?? "None",
DELETE_DOCUMENT_WORKFLOW_ARN:
props.ragEngines?.deleteDocumentWorkflow?.stateMachineArn ?? "None",
CREATE_AURORA_WORKSPACE_WORKFLOW_ARN:
props.ragEngines?.auroraPgVector?.createAuroraWorkspaceWorkflow
?.stateMachineArn ?? "None",
CREATE_OPEN_SEARCH_WORKSPACE_WORKFLOW_ARN:
props.ragEngines?.openSearchVector?.createOpenSearchWorkspaceWorkflow
?.stateMachineArn ?? "None",
CREATE_KENDRA_WORKSPACE_WORKFLOW_ARN:
props.ragEngines?.kendraRetrieval?.createKendraWorkspaceWorkflow
?.stateMachineArn ?? "None",
};

const ssmPrefix = "/" + props.config.prefix + "/GraphQLApiHandler/";

for (const parameter in parameterStores) {
new ssm.StringParameter(this, parameter, {
parameterName: ssmPrefix + parameter,
stringValue: parameterStores[parameter],
});
}
const appSyncLambdaResolver = new lambda.Function(
this,
"GraphQLApiHandler",
Expand All @@ -62,8 +92,21 @@ export class ApiResolvers extends Construct {
vpc: props.shared.vpc,
securityGroups: [apiSecurityGroup],
vpcSubnets: props.shared.vpc.privateSubnets as ec2.SubnetSelection,
paramsAndSecrets: lambda.ParamsAndSecretsLayerVersion.fromVersion(
lambda.ParamsAndSecretsVersions.V1_0_103,
{
cacheSize: 500,
logLevel: lambda.ParamsAndSecretsLogLevel.INFO,
}
),
environment: {
...props.shared.defaultEnvironmentVariables,
LOAD_FROM_SSM_PREFIX: ssmPrefix,
LOAD_FROM_SSM: Object.keys(parameterStores).join(","),
AURORA_DB_USER: AURORA_DB_USERS.READ_ONLY.toString(),
AURORA_DB_PORT:
props.ragEngines?.auroraPgVector?.database?.clusterEndpoint?.port +
"",
CONFIG_PARAMETER_NAME: props.shared.configParameter.parameterName,
MODELS_PARAMETER_NAME: props.modelsParameter.parameterName,
X_ORIGIN_VERIFY_SECRET_ARN:
Expand All @@ -76,13 +119,6 @@ export class ApiResolvers extends Construct {
CHATBOT_FILES_BUCKET_NAME: props.filesBucket.bucketName,
PROCESSING_BUCKET_NAME:
props.ragEngines?.processingBucket?.bucketName ?? "",
AURORA_DB_USER: AURORA_DB_USERS.READ_ONLY,
AURORA_DB_HOST:
props.ragEngines?.auroraPgVector?.database?.clusterEndpoint
?.hostname ?? "",
AURORA_DB_PORT:
props.ragEngines?.auroraPgVector?.database?.clusterEndpoint?.port +
"",
WORKSPACES_TABLE_NAME:
props.ragEngines?.workspacesTable.tableName ?? "",
WORKSPACES_BY_OBJECT_TYPE_INDEX_NAME:
Expand All @@ -96,19 +132,6 @@ export class ApiResolvers extends Construct {
SAGEMAKER_RAG_MODELS_ENDPOINT:
props.ragEngines?.sageMakerRagModels?.model?.endpoint
?.attrEndpointName ?? "",
DELETE_WORKSPACE_WORKFLOW_ARN:
props.ragEngines?.deleteWorkspaceWorkflow?.stateMachineArn ?? "",
DELETE_DOCUMENT_WORKFLOW_ARN:
props.ragEngines?.deleteDocumentWorkflow?.stateMachineArn ?? "",
CREATE_AURORA_WORKSPACE_WORKFLOW_ARN:
props.ragEngines?.auroraPgVector?.createAuroraWorkspaceWorkflow
?.stateMachineArn ?? "",
CREATE_OPEN_SEARCH_WORKSPACE_WORKFLOW_ARN:
props.ragEngines?.openSearchVector
?.createOpenSearchWorkspaceWorkflow?.stateMachineArn ?? "",
CREATE_KENDRA_WORKSPACE_WORKFLOW_ARN:
props.ragEngines?.kendraRetrieval?.createKendraWorkspaceWorkflow
?.stateMachineArn ?? "",
FILE_IMPORT_WORKFLOW_ARN:
props.ragEngines?.fileImportWorkflow?.stateMachineArn ?? "",
WEBSITE_CRAWLING_WORKFLOW_ARN:
Expand All @@ -130,6 +153,20 @@ export class ApiResolvers extends Construct {
},
}
);

appSyncLambdaResolver.addToRolePolicy(
new iam.PolicyStatement({
actions: ["ssm:GetParameter"],
resources: [
`arn:aws:ssm:${cdk.Stack.of(scope).region}:${
cdk.Stack.of(scope).account
}:parameter` +
ssmPrefix +
"*",
],
})
);

this.appSyncLambdaResolver = appSyncLambdaResolver;

function addPermissions(apiHandler: lambda.Function) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
client = boto3.client("rds")

AURORA_DB_USER = os.environ.get("AURORA_DB_USER")
AURORA_DB_HOST = os.environ.get("AURORA_DB_HOST")
AURORA_DB_PORT = os.environ.get("AURORA_DB_PORT")
AURORA_DB_REGION = os.environ.get("AWS_REGION")

Expand All @@ -18,6 +17,7 @@ class AuroraConnection(object):
token_refresh = datetime.now() - timedelta(minutes=1)

def __init__(self, autocommit=True):
aurora_db_host = os.environ.get("AURORA_DB_HOST")
now = datetime.now()
if AuroraConnection.token_refresh < now:
AuroraConnection.token_refresh = now + timedelta(
Expand All @@ -26,14 +26,14 @@ def __init__(self, autocommit=True):
# Base on
# https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/UsingWithRDS.IAMDBAuth.Connecting.Python.html
AuroraConnection.token = client.generate_db_auth_token(
DBHostname=AURORA_DB_HOST,
DBHostname=aurora_db_host,
Port=AURORA_DB_PORT,
DBUsername=AURORA_DB_USER,
Region=AURORA_DB_REGION,
)
self.autocommit = autocommit

self.dbhost = AURORA_DB_HOST
self.dbhost = aurora_db_host
self.dbport = AURORA_DB_PORT
self.dbuser = AURORA_DB_USER
self.dbpass = AuroraConnection.token
Expand Down
3 changes: 1 addition & 2 deletions lib/shared/layers/python-sdk/python/genai_core/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
"DEFAULT_KENDRA_S3_DATA_SOURCE_BUCKET_NAME"
)

DELETE_DOCUMENT_WORKFLOW_ARN = os.environ.get("DELETE_DOCUMENT_WORKFLOW_ARN")
RSS_FEED_INGESTOR_FUNCTION = os.environ.get("RSS_FEED_INGESTOR_FUNCTION", "")
RSS_FEED_SCHEDULE_ROLE_ARN = os.environ.get("RSS_FEED_SCHEDULE_ROLE_ARN", "")
DOCUMENTS_BY_STATUS_INDEX = os.environ.get("DOCUMENTS_BY_STATUS_INDEX", "")
Expand Down Expand Up @@ -196,7 +195,7 @@ def delete_document(workspace_id: str, document_id: str):
raise genai_core.types.CommonError("Document not ready for deletion")

response = sfn_client.start_execution(
stateMachineArn=DELETE_DOCUMENT_WORKFLOW_ARN,
stateMachineArn=os.environ.get("DELETE_DOCUMENT_WORKFLOW_ARN"),
input=json.dumps(
{
"workspace_id": workspace_id,
Expand Down
28 changes: 28 additions & 0 deletions lib/shared/layers/python-sdk/python/genai_core/parameters.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
import urllib
import json
from aws_lambda_powertools.utilities import parameters

X_ORIGIN_VERIFY_SECRET_ARN = os.environ.get("X_ORIGIN_VERIFY_SECRET_ARN")
Expand All @@ -7,6 +9,32 @@
MODELS_PARAMETER_NAME = os.environ.get("MODELS_PARAMETER_NAME")


def load_all_from_ssm():
# Load all the Parameters and assigned them to an env variable
# This is done using the Lambda extensions for parameter store
load_from_ssm = os.environ.get("LOAD_FROM_SSM")
load_from_ssm_prefix = os.environ.get("LOAD_FROM_SSM_PREFIX")
token = os.environ.get("AWS_SESSION_TOKEN")

if not token or not load_from_ssm or not load_from_ssm_prefix:
raise Exception("Please make sure env variables are set.")

for var in load_from_ssm.split(","):
if var not in os.environ:
os.environ[var] = get_ssm_parameter(load_from_ssm_prefix + var, token)


def get_ssm_parameter(ssm_parameter_path: str, token: str):
# https://docs.aws.amazon.com/systems-manager/latest/userguide/ps-integration-lambda-extensions.html#ps-integration-lambda-extensions-how-it-works
params = urllib.parse.urlencode({"name": ssm_parameter_path})
url = "http://localhost:2773/systemsmanager/parameters/get/?%s" % params
request = urllib.request.Request(url)
request.add_header("X-Aws-Parameters-Secrets-Token", token)
# Bandit false positive. The url used cannot use a different scheme
config = json.loads(urllib.request.urlopen(request).read()) # nosec
return config["Parameter"]["Value"]


def get_external_api_key(name: str):
api_keys = parameters.get_secret(API_KEYS_SECRETS_ARN, transform="json", max_age=60)

Expand Down
18 changes: 4 additions & 14 deletions lib/shared/layers/python-sdk/python/genai_core/workspaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,6 @@
WORKSPACES_BY_OBJECT_TYPE_INDEX_NAME = os.environ.get(
"WORKSPACES_BY_OBJECT_TYPE_INDEX_NAME"
)
CREATE_AURORA_WORKSPACE_WORKFLOW_ARN = os.environ.get(
"CREATE_AURORA_WORKSPACE_WORKFLOW_ARN"
)
CREATE_OPEN_SEARCH_WORKSPACE_WORKFLOW_ARN = os.environ.get(
"CREATE_OPEN_SEARCH_WORKSPACE_WORKFLOW_ARN"
)
CREATE_KENDRA_WORKSPACE_WORKFLOW_ARN = os.environ.get(
"CREATE_KENDRA_WORKSPACE_WORKFLOW_ARN"
)
DELETE_WORKSPACE_WORKFLOW_ARN = os.environ.get("DELETE_WORKSPACE_WORKFLOW_ARN")

WORKSPACE_OBJECT_TYPE = "workspace"

Expand Down Expand Up @@ -147,7 +137,7 @@ def create_workspace_aurora(
ddb_response = table.put_item(Item=item)

response = sfn_client.start_execution(
stateMachineArn=CREATE_AURORA_WORKSPACE_WORKFLOW_ARN,
stateMachineArn=os.environ.get("CREATE_AURORA_WORKSPACE_WORKFLOW_ARN"),
input=json.dumps(
{
"workspace_id": workspace_id,
Expand Down Expand Up @@ -217,7 +207,7 @@ def create_workspace_open_search(
ddb_response = table.put_item(Item=item)

response = sfn_client.start_execution(
stateMachineArn=CREATE_OPEN_SEARCH_WORKSPACE_WORKFLOW_ARN,
stateMachineArn=os.environ.get("CREATE_OPEN_SEARCH_WORKSPACE_WORKFLOW_ARN"),
input=json.dumps(
{
"workspace_id": workspace_id,
Expand Down Expand Up @@ -263,7 +253,7 @@ def create_workspace_kendra(
ddb_response = table.put_item(Item=item)

response = sfn_client.start_execution(
stateMachineArn=CREATE_KENDRA_WORKSPACE_WORKFLOW_ARN,
stateMachineArn=os.environ.get("CREATE_KENDRA_WORKSPACE_WORKFLOW_ARN"),
input=json.dumps(
{
"workspace_id": workspace_id,
Expand Down Expand Up @@ -325,7 +315,7 @@ def delete_workspace(workspace_id: str):
raise genai_core.types.CommonError("Workspace not ready for deletion")

response = sfn_client.start_execution(
stateMachineArn=DELETE_WORKSPACE_WORKFLOW_ARN,
stateMachineArn=os.environ.get("DELETE_WORKSPACE_WORKFLOW_ARN"),
input=json.dumps(
{
"workspace_id": workspace_id,
Expand Down
Loading
Loading