Bsweger/add hub specific lambda infra (#38)

* Create an S3 bucket for storing shared hubverse assets This is where we'll plan to publish the data transformation function that we want to run via Lambda * Add function to create the model-output transform lambda Create the lamba that will be triggered when new model-output files are pushed to a hub's S3 bucket. This definition points to a lambda package on S3 (rather than defining the function code inline). Another repo will be responsible for creating the the lambda package and deploying to the s3 bucket. * Make the S3 location of the lambda package easier to find and change It's still hard-coded, but it's hard-coded in a better place, with some CloudPath magic sprinkled in for more robust path parsing. * Move permissions components of lambda to their own function This changeset also creates an IAM policy to allows writes to CloudWatch logs and attaches that policy to the IAM role assumed by our hubverse-transform lambda function. * Specify that lambda role can only be assumed by a specific function * Create a placeholder lambda package on S3 Annoyingly, the "create lambda" function will fail if it's pointing to a lambda code package that doesn't yet exist on S3. It creates a chicken-and-egg problem for us, since we haven't deployed the transform function's code to S3 yet. Might be overkill, but this changeset creates a placeholder .zip to use as lambda code package until we have the official deployment pipeline up and running (in the hubverse-transform repo) * Remove an unsed test hub from the config Not relate to the current lambda work, but because I removed the related assets from our Pulumi stack, they'll be recreated unless the config is updated. * Update README and do a little cleanup * Tell mypy to ignore CloudPath.key Mypy fails in GitHub CI on CloudPath.key (which not only works, it passes the mypy check locally and in pre-commit). Gonna ignore this one instead of trying to run it down. * Give the transform-model-output lambda permission to write to hubs' S3 Noting that there's a limit of 10 policies per IAM role, so once we're hosting more than a few hubs, we'll need to request a limit increase. * Trigger the hubverse transform lambda when model-ouput files arrive For each hub being created, add an S3 ObjectCreated trigger that will invoke the hubverse-transform-model-output transform. * Don't typecheck Cloudpath class
hubverse-org · May 2, 2024 · d83a596 · d83a596
1 parent 0299985
commit d83a596
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 8 deletions.
diff --git a/src/hubverse_infrastructure/hubs/hub_setup.py b/src/hubverse_infrastructure/hubs/hub_setup.py
@@ -12,5 +12,6 @@ def set_up_hub(hub_info: dict):
     a different bucket name.
     """
 
-    create_s3_infrastructure(hub_info)
+    hub_bucket = create_s3_infrastructure(hub_info)
+    hub_info["hub_bucket"] = hub_bucket
     create_iam_infrastructure(hub_info)
diff --git a/src/hubverse_infrastructure/hubs/iam.py b/src/hubverse_infrastructure/hubs/iam.py
@@ -1,3 +1,4 @@
+import pulumi
 import pulumi_aws as aws
 
 
@@ -93,20 +94,55 @@ def create_bucket_write_policy(hub_name: str):
     return bucket_write_policy
 
 
-def attach_bucket_write_policy(hub_name: str, github_role, bucket_write_policy):
+def attach_bucket_write_policy(resource_name: str, role: aws.iam.Role, bucket_write_policy: aws.iam.Policy):
     """Attach the S3 write policy to the role that Github Actions assumes."""
 
     # Update the role we created for Github Actions by attaching the
     # policy that allows writes to the hub's S3 bucket
-    aws.iam.RolePolicyAttachment(resource_name=hub_name, role=github_role.name, policy_arn=bucket_write_policy.id)
+    aws.iam.RolePolicyAttachment(resource_name=resource_name, role=role.name, policy_arn=bucket_write_policy.id)
+
+
+def create_model_output_lambda_trigger(
+    hub_name: str, hub_bucket: aws.s3.Bucket, model_output_lambda: aws.lambda_.Function
+) -> aws.s3.BucketNotification:
+    """Create the trigger that will invoke the model output lambda when a new file is written to the hub's S3 bucket."""
+    allow_bucket = aws.lambda_.Permission(
+        resource_name=f"{hub_name}-allow",
+        statement_id="AllowExecutionFromS3Bucket",
+        action="lambda:InvokeFunction",
+        function=model_output_lambda.arn.apply(lambda arn: f"{arn}"),
+        principal="s3.amazonaws.com",
+        source_arn=hub_bucket.arn.apply(lambda arn: f"{arn}"),
+    )
+
+    bucket_notification = aws.s3.BucketNotification(
+        resource_name=f"{hub_name}-create-notification",
+        bucket=hub_bucket.id,
+        lambda_functions=[
+            aws.s3.BucketNotificationLambdaFunctionArgs(
+                lambda_function_arn=model_output_lambda.arn.apply(lambda arn: f"{arn}"),
+                events=["s3:ObjectCreated:*"],
+                filter_prefix="raw/",
+            )
+        ],
+        opts=pulumi.ResourceOptions(depends_on=[allow_bucket]),
+    )
+
+    return bucket_notification
 
 
 def create_iam_infrastructure(hub_info: dict):
     """Create the IAM infrastructure needed for a hub."""
     org = hub_info["org"]
     repo = hub_info["repo"]
     hub = hub_info["hub"]
+    hub_bucket = hub_info["hub_bucket"]
+    model_output_lambda = hub_info["model_output_lambda"]
+    model_output_lambda_role = hub_info["model_output_lambda_role"]
+
     trust_policy = create_trust_policy(org, repo)
     github_role = create_github_role(hub, trust_policy)
     s3_write_policy = create_bucket_write_policy(hub)
     attach_bucket_write_policy(hub, github_role, s3_write_policy)
+    attach_bucket_write_policy(f"{hub}-transform-model-output-lambda", model_output_lambda_role, s3_write_policy)
+    create_model_output_lambda_trigger(hub, hub_bucket, model_output_lambda)
diff --git a/src/hubverse_infrastructure/main.py b/src/hubverse_infrastructure/main.py
@@ -6,7 +6,7 @@
 from hubverse_infrastructure.shared.hubverse_transforms import create_transform_infrastructure
 
 # First, create infrastructure components that are shared across hubs.
-create_transform_infrastructure()
+model_output_lambda, model_output_lambda_role = create_transform_infrastructure()
 
 
 # Then, create hub-specific infrastructure.
@@ -19,4 +19,6 @@ def get_hubs() -> list[dict]:
 
 hub_list = get_hubs()
 for hub in hub_list:
+    hub["model_output_lambda"] = model_output_lambda
+    hub["model_output_lambda_role"] = model_output_lambda_role
     set_up_hub(hub)
diff --git a/src/hubverse_infrastructure/shared/hubverse_transforms.py b/src/hubverse_infrastructure/shared/hubverse_transforms.py
@@ -210,16 +210,19 @@ def create_lambda_package_placeholder(s3_bucket: str, s3_key: str):
         raise Exception(f"Error when checking for existing lambda package: {s3_bucket}/{s3_key}") from e
 
 
-def create_transform_infrastructure():
+def create_transform_infrastructure() -> tuple[aws.lambda_.Function, aws.iam.Role]:
     """
     Create all AWS infrastructure required to support the lambda function that will
     operate on cloud-based model-output files.
     """
     bucket_name = "hubverse-assets"
     lambda_name = "hubverse-transform-model-output"
     lambda_package_location = "s3://hubverse-assets/lambda/hubverse-transform-model-output.zip"
-    lambda_package_path = CloudPath(lambda_package_location)
+    lambda_package_path = CloudPath(lambda_package_location)  # type: ignore
 
     bucket = create_bucket(bucket_name)
-    lambda_role = create_lambda_execution_permissions(lambda_name)
-    create_transform_lambda(lambda_name, lambda_package_path, lambda_role, bucket)
+    model_output_lambda_role = create_lambda_execution_permissions(lambda_name)
+    model_output_lambda = create_transform_lambda(lambda_name, lambda_package_path, model_output_lambda_role, bucket)
+
+    # return the lambda's role so we can attach hub-specific policies to it
+    return model_output_lambda, model_output_lambda_role