Skip to content

Commit

Permalink
Bsweger/add hub specific lambda infra (#38)
Browse files Browse the repository at this point in the history
* Create an S3 bucket for storing shared hubverse assets

This is where we'll plan to publish the data transformation function
that we want to run via Lambda

* Add function to create the model-output transform lambda

Create the lamba that will be triggered when new model-output files
are pushed to a hub's S3 bucket. This definition points to a lambda
package on S3 (rather than defining the function code inline). Another
repo will be responsible for creating the the lambda package and deploying
to the s3 bucket.

* Make the S3 location of the lambda package easier to find and change

It's still hard-coded, but it's hard-coded in a better place, with
some CloudPath magic sprinkled in for more robust path parsing.

* Move permissions components of lambda to their own function

This changeset also creates an IAM policy to allows writes to CloudWatch
logs and attaches that policy to the IAM role assumed by our
hubverse-transform lambda function.

* Specify that lambda role can only be assumed by a specific function

* Create a placeholder lambda package on S3

Annoyingly, the "create lambda" function will fail if it's pointing
to a lambda code package that doesn't yet exist on S3. It creates a
chicken-and-egg problem for us, since we haven't deployed the transform
function's code to S3 yet. Might be overkill, but this changeset
creates a placeholder .zip to use as lambda code package until
we have the official deployment pipeline up and running (in the
hubverse-transform repo)

* Remove an unsed test hub from the config

Not relate to the current lambda work, but because I removed
the related assets from our Pulumi stack, they'll be recreated
unless the config is updated.

* Update README and do a little cleanup

* Tell mypy to ignore CloudPath.key

Mypy fails in GitHub CI on CloudPath.key (which not only works, it passes
the mypy check locally and in pre-commit). Gonna ignore this one instead
of trying to run it down.

* Give the transform-model-output lambda permission to write to hubs' S3

Noting that there's a limit of 10 policies per IAM role, so once
we're hosting more than a few hubs, we'll need to request a limit increase.

* Trigger the hubverse transform lambda when model-ouput files arrive

For each hub being created, add an S3 ObjectCreated trigger that
will invoke the hubverse-transform-model-output transform.

* Don't typecheck Cloudpath class
  • Loading branch information
bsweger authored May 2, 2024
1 parent 0299985 commit d83a596
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 8 deletions.
3 changes: 2 additions & 1 deletion src/hubverse_infrastructure/hubs/hub_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@ def set_up_hub(hub_info: dict):
a different bucket name.
"""

create_s3_infrastructure(hub_info)
hub_bucket = create_s3_infrastructure(hub_info)
hub_info["hub_bucket"] = hub_bucket
create_iam_infrastructure(hub_info)
40 changes: 38 additions & 2 deletions src/hubverse_infrastructure/hubs/iam.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pulumi
import pulumi_aws as aws


Expand Down Expand Up @@ -93,20 +94,55 @@ def create_bucket_write_policy(hub_name: str):
return bucket_write_policy


def attach_bucket_write_policy(hub_name: str, github_role, bucket_write_policy):
def attach_bucket_write_policy(resource_name: str, role: aws.iam.Role, bucket_write_policy: aws.iam.Policy):
"""Attach the S3 write policy to the role that Github Actions assumes."""

# Update the role we created for Github Actions by attaching the
# policy that allows writes to the hub's S3 bucket
aws.iam.RolePolicyAttachment(resource_name=hub_name, role=github_role.name, policy_arn=bucket_write_policy.id)
aws.iam.RolePolicyAttachment(resource_name=resource_name, role=role.name, policy_arn=bucket_write_policy.id)


def create_model_output_lambda_trigger(
hub_name: str, hub_bucket: aws.s3.Bucket, model_output_lambda: aws.lambda_.Function
) -> aws.s3.BucketNotification:
"""Create the trigger that will invoke the model output lambda when a new file is written to the hub's S3 bucket."""
allow_bucket = aws.lambda_.Permission(
resource_name=f"{hub_name}-allow",
statement_id="AllowExecutionFromS3Bucket",
action="lambda:InvokeFunction",
function=model_output_lambda.arn.apply(lambda arn: f"{arn}"),
principal="s3.amazonaws.com",
source_arn=hub_bucket.arn.apply(lambda arn: f"{arn}"),
)

bucket_notification = aws.s3.BucketNotification(
resource_name=f"{hub_name}-create-notification",
bucket=hub_bucket.id,
lambda_functions=[
aws.s3.BucketNotificationLambdaFunctionArgs(
lambda_function_arn=model_output_lambda.arn.apply(lambda arn: f"{arn}"),
events=["s3:ObjectCreated:*"],
filter_prefix="raw/",
)
],
opts=pulumi.ResourceOptions(depends_on=[allow_bucket]),
)

return bucket_notification


def create_iam_infrastructure(hub_info: dict):
"""Create the IAM infrastructure needed for a hub."""
org = hub_info["org"]
repo = hub_info["repo"]
hub = hub_info["hub"]
hub_bucket = hub_info["hub_bucket"]
model_output_lambda = hub_info["model_output_lambda"]
model_output_lambda_role = hub_info["model_output_lambda_role"]

trust_policy = create_trust_policy(org, repo)
github_role = create_github_role(hub, trust_policy)
s3_write_policy = create_bucket_write_policy(hub)
attach_bucket_write_policy(hub, github_role, s3_write_policy)
attach_bucket_write_policy(f"{hub}-transform-model-output-lambda", model_output_lambda_role, s3_write_policy)
create_model_output_lambda_trigger(hub, hub_bucket, model_output_lambda)
4 changes: 3 additions & 1 deletion src/hubverse_infrastructure/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from hubverse_infrastructure.shared.hubverse_transforms import create_transform_infrastructure

# First, create infrastructure components that are shared across hubs.
create_transform_infrastructure()
model_output_lambda, model_output_lambda_role = create_transform_infrastructure()


# Then, create hub-specific infrastructure.
Expand All @@ -19,4 +19,6 @@ def get_hubs() -> list[dict]:

hub_list = get_hubs()
for hub in hub_list:
hub["model_output_lambda"] = model_output_lambda
hub["model_output_lambda_role"] = model_output_lambda_role
set_up_hub(hub)
11 changes: 7 additions & 4 deletions src/hubverse_infrastructure/shared/hubverse_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,16 +210,19 @@ def create_lambda_package_placeholder(s3_bucket: str, s3_key: str):
raise Exception(f"Error when checking for existing lambda package: {s3_bucket}/{s3_key}") from e


def create_transform_infrastructure():
def create_transform_infrastructure() -> tuple[aws.lambda_.Function, aws.iam.Role]:
"""
Create all AWS infrastructure required to support the lambda function that will
operate on cloud-based model-output files.
"""
bucket_name = "hubverse-assets"
lambda_name = "hubverse-transform-model-output"
lambda_package_location = "s3://hubverse-assets/lambda/hubverse-transform-model-output.zip"
lambda_package_path = CloudPath(lambda_package_location)
lambda_package_path = CloudPath(lambda_package_location) # type: ignore

bucket = create_bucket(bucket_name)
lambda_role = create_lambda_execution_permissions(lambda_name)
create_transform_lambda(lambda_name, lambda_package_path, lambda_role, bucket)
model_output_lambda_role = create_lambda_execution_permissions(lambda_name)
model_output_lambda = create_transform_lambda(lambda_name, lambda_package_path, model_output_lambda_role, bucket)

# return the lambda's role so we can attach hub-specific policies to it
return model_output_lambda, model_output_lambda_role

0 comments on commit d83a596

Please sign in to comment.