From 4507f3599992b2263cc250ad8e5bf05c200c5cb3 Mon Sep 17 00:00:00 2001 From: Roger Ng Date: Wed, 4 Dec 2024 16:54:10 +0000 Subject: [PATCH 1/4] Update conformance codelab instructions (#375) --- cmd/conformance/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmd/conformance/README.md b/cmd/conformance/README.md index 74293cfd..ecc15d04 100644 --- a/cmd/conformance/README.md +++ b/cmd/conformance/README.md @@ -38,10 +38,11 @@ curl -d 'two!' -H "Content-Type: application/data" -X POST ${WRITE_URL}add & curl -d 'three!' -H "Content-Type: application/data" -X POST ${WRITE_URL}add & wait -# Check that the checkpoint is of the correct size +# Check that the checkpoint is of the correct size (i.e. 3). +# If the checkpoint size is zero, this is expected. It may take a second to integrate the entries and publish the checkpoint. curl -s ${READ_URL}checkpoint -# Look at the leaves. Piping into xxd to reveal the leaf sizes. +# Look at the leaves after confirming the checkpoint size. Piping into xxd to reveal the leaf sizes. curl -s ${READ_URL}tile/entries/000.p/3 | xxd ``` From 19e302b7cebd59a595f7860216c4ff63d5656c44 Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Wed, 4 Dec 2024 17:55:56 +0000 Subject: [PATCH 2/4] AWS CI pipeline (#370) * conformance terraform module * terragrunt live * github action * add concurrency * concurrency test * try and run from main repo * on open PRs * try and trigger on draft PR * allow access to secrets before submission * back to other branch * better IAM * push on main * address comments * downgrade version * upgrade version --- .github/workflows/aws_integration_test.yml | 154 ++++++++++ .../live/aws/conformance/ci/terragrunt.hcl | 4 - .../live/aws/conformance/terragrunt.hcl | 24 +- deployment/modules/aws/conformance/main.tf | 274 ++++++++++++++++++ deployment/modules/aws/conformance/outputs.tf | 9 + .../modules/aws/conformance/variables.tf | 54 ++++ 6 files changed, 506 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/aws_integration_test.yml create mode 100644 deployment/modules/aws/conformance/main.tf create mode 100644 deployment/modules/aws/conformance/outputs.tf create mode 100644 deployment/modules/aws/conformance/variables.tf diff --git a/.github/workflows/aws_integration_test.yml b/.github/workflows/aws_integration_test.yml new file mode 100644 index 00000000..e8e5acb2 --- /dev/null +++ b/.github/workflows/aws_integration_test.yml @@ -0,0 +1,154 @@ +name: AWS Conformance Test + +on: + push: + branches: + - main + +# This prevents two workflows from running at the same time. +# This workflows calls terragrunt, which does not allow concurrent runs. +concurrency: + group: aws-conformance + cancel-in-progress: false + +permissions: + contents: read + +env: + TF_VERSION: "1.10.0" + TG_VERSION: "0.67.0" + TG_DIR: "deployment/live/aws/conformance/ci/" + TESSERA_PREFIX_NAME: trillian-tessera + ECR_REGISTRY: 864981736166.dkr.ecr.us-east-1.amazonaws.com + ECR_REPOSITORY_CONFORMANCE: trillian-tessera/conformance:latest + ECR_REPOSITORY_HAMMER: trillian-tessera/hammer:latest + AWS_REGION: us-east-1 + +jobs: + aws-integration: + runs-on: ubuntu-latest + + steps: + ## Authenticate to AWS with the credentials stored in Github Secrets. + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + # TODO(phboneff): use a better form of authentication + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Checkout code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + ## Authenticate with ECR to push the conformance and hammer images. + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + ## Build the conformance image and push it to ECR. This will be used + ## later on by Terragrunt. + - name: Build, tag, and push Conformance image to Amazon ECR + id: build-publish-conformance + shell: bash + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: ${{ env.ECR_REPOSITORY_CONFORMANCE }} + run: | + docker build -f ./cmd/conformance/aws/Dockerfile . -t "$ECR_REGISTRY/$ECR_REPOSITORY" + docker push "$ECR_REGISTRY/$ECR_REPOSITORY" + echo "Pushed image to $ECR_REGISTRY/$ECR_REPOSITORY" + + ## Build the hammer image and push it to ECR. This will be used + ## later on by Terragrunt. + - name: Build, tag, and push Hammer image to Amazon ECR + id: build-publish-hammer + shell: bash + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: ${{ env.ECR_REPOSITORY_HAMMER }} + run: | + docker build -f ./internal/hammer/Dockerfile . -t "$ECR_REGISTRY/$ECR_REPOSITORY" + docker push "$ECR_REGISTRY/$ECR_REPOSITORY" + echo "Pushed image to $ECR_REGISTRY/$ECR_REPOSITORY" + + ## Destroy any pre-existing deployment/live/aws/conformance/ci env. + ## This might happen if a previous integration test workflow has failed. + - name: Terragrunt destroy pre conformance test + id: terragrunt-destroy-pre + uses: gruntwork-io/terragrunt-action@v2 + with: + tf_version: ${{ env.TF_VERSION }} + tg_version: ${{ env.TG_VERSION }} + tg_dir: ${{ env.TG_DIR }} + tg_command: "destroy" + env: + TESSERA_SIGNER: unused + TESSERA_VERIFIER: unused + + ## Generate a new keys for the log to use, and export them to environment + ## variables for Terragrunt to use. + - name: Generate Tessera keys + id: generate-keys + shell: bash + run: | + go run github.com/transparency-dev/serverless-log/cmd/generate_keys@80334bc9dc573e8f6c5b3694efad6358da50abd4 \ + --key_name=tessera/test/conformance \ + --out_priv=${{ runner.temp }}/key.sec \ + --out_pub=${{ runner.temp }}/key.pub + cat ${{ runner.temp }}/key.pub + echo "TESSERA_SIGNER=$(cat ${{ runner.temp }}/key.sec)" >> "$GITHUB_ENV" + echo "TESSERA_VERIFIER=$(cat ${{ runner.temp }}/key.pub)" >> "$GITHUB_ENV" + + ## Apply the deployment/live/aws/conformance/ci terragrunt config. + ## This will bring up the conformance infrastructure which consists of: + ## - the storage module + ## - a private S3 <--> ECS network link for the hammer to read the log + ## - an ECS cluster to run Fargate tasks + ## - a conformance service, with multiple conformance binary instances + ## - a hammer task definition (but no execution) + # TODO(phboneff): AuroraDB takes a long time to be brought up and down + # consider keeping it around between tests / using Aurora Serveless + - name: Terragrunt apply + id: terragrunt-apply + uses: gruntwork-io/terragrunt-action@v2 + with: + tf_version: ${{ env.TF_VERSION }} + tg_version: ${{ env.TG_VERSION }} + tg_dir: ${{ env.TG_DIR }} + tg_command: "apply" + env: + INPUT_POST_EXEC_1: | + echo "ECS_CLUSTER=$(terragrunt output -raw ecs_cluster)" >> "$GITHUB_ENV" + INPUT_POST_EXEC_2: | + echo "VPC_SUBNETS=$(terragrunt output -json vpc_subnets)" >> "$GITHUB_ENV" + + ## Now we can run the hammer using the task definition, against the + ## conformance service. This step returns the hammer task's exit code. + - name: Run Hammer + id: hammer + shell: bash + run: | + cat ${{ runner.temp }}/key.pub + echo "Will launch a hammer ECS task." + HAMMER_ARN=$(aws ecs run-task \ + --cluster="$ECS_CLUSTER" \ + --task-definition=hammer \ + --count=1 \ + --launch-type=FARGATE \ + --network-configuration='{"awsvpcConfiguration": {"assignPublicIp":"ENABLED","subnets": '$VPC_SUBNETS'}}' \ + --query 'tasks[0].taskArn') + echo "Hammer task running, ARN: $HAMMER_ARN." + echo "Waiting for task to stop..." + aws ecs wait tasks-stopped --cluster="$ECS_CLUSTER" --tasks=[$HAMMER_ARN] + echo "The task has stopped. Fetching exit code and returning this action with it." + exit $(aws ecs describe-tasks --cluster="$ECS_CLUSTER" --tasks=[$HAMMER_ARN] --query 'tasks[0].containers[0].exitCode') + + - name: Terragrunt destroy post conformance test + id: terragrunt-destroy-post + uses: gruntwork-io/terragrunt-action@v2 + with: + tf_version: ${{ env.TF_VERSION }} + tg_version: ${{ env.TG_VERSION }} + tg_dir: ${{ env.TG_DIR }} + tg_command: "destroy" diff --git a/deployment/live/aws/conformance/ci/terragrunt.hcl b/deployment/live/aws/conformance/ci/terragrunt.hcl index 80c48bfa..19cbfa39 100644 --- a/deployment/live/aws/conformance/ci/terragrunt.hcl +++ b/deployment/live/aws/conformance/ci/terragrunt.hcl @@ -1,7 +1,3 @@ -terraform { - source = "${get_repo_root()}/deployment/modules/aws//storage" -} - include "root" { path = find_in_parent_folders() expose = true diff --git a/deployment/live/aws/conformance/terragrunt.hcl b/deployment/live/aws/conformance/terragrunt.hcl index 65b4d5e4..2b2cd3e3 100644 --- a/deployment/live/aws/conformance/terragrunt.hcl +++ b/deployment/live/aws/conformance/terragrunt.hcl @@ -1,15 +1,22 @@ terraform { - source = "${get_repo_root()}/deployment/modules/aws//storage" + source = "${get_repo_root()}/deployment/modules/aws//conformance" } locals { - env = path_relative_to_include() - account_id = "${get_aws_account_id()}" - region = get_env("AWS_REGION", "us-east-1") - profile = get_env("AWS_PROFILE", "default") - base_name = get_env("TESSERA_BASE_NAME", "${local.env}-conformance") - prefix_name = get_env("TESSERA_PREFIX_NAME", "trillian-tessera") - ephemeral = true + env = path_relative_to_include() + account_id = "${get_aws_account_id()}" + region = get_env("AWS_REGION", "us-east-1") + base_name = get_env("TESSERA_BASE_NAME", "${local.env}-conformance") + prefix_name = get_env("TESSERA_PREFIX_NAME", "trillian-tessera") + ecr_registry = get_env("ECR_REGISTRY", "${local.account_id}.dkr.ecr.${local.region}.amazonaws.com") + ecr_repository_conformance = get_env("ECR_REPOSITORY_CONFORMANCE", "trillian-tessera/conformance:latest") + ecr_repository_hammer = get_env("ECR_REPOSITORY_HAMMER", "trillian-tessera/hammer:latest") + signer = get_env("TESSERA_SIGNER") + verifier = get_env("TESSERA_VERIFIER") + # Roles are defined externally + ecs_execution_role = "arn:aws:iam::864981736166:role/ecsTaskExecutionRole" + ecs_conformance_task_role = "arn:aws:iam::864981736166:role/ConformanceECSTaskRolePolicy" + ephemeral = true } remote_state { @@ -17,7 +24,6 @@ remote_state { config = { region = local.region - profile = local.profile bucket = "${local.prefix_name}-${local.base_name}-terraform-state" key = "${local.env}/terraform.tfstate" dynamodb_table = "${local.prefix_name}-${local.base_name}-terraform-lock" diff --git a/deployment/modules/aws/conformance/main.tf b/deployment/modules/aws/conformance/main.tf new file mode 100644 index 00000000..a31c6509 --- /dev/null +++ b/deployment/modules/aws/conformance/main.tf @@ -0,0 +1,274 @@ +# Header ###################################################################### +terraform { + backend "s3" {} + required_providers { + aws = { + source = "hashicorp/aws" + version = "5.76.0" + } + } +} + +locals { + name = "${var.prefix_name}-${var.base_name}" + port = 2024 +} + +provider "aws" { + region = var.region +} + +module "storage" { + source = "../storage" + + prefix_name = var.prefix_name + base_name = var.base_name + region = var.region + ephemeral = true +} + +# Resources #################################################################### +## ECS cluster ################################################################# +# This will be used to run the conformance and hammer binaries on Fargate. +resource "aws_ecs_cluster" "ecs_cluster" { + name = "${local.name}" +} + +resource "aws_ecs_cluster_capacity_providers" "ecs_capacity" { + cluster_name = aws_ecs_cluster.ecs_cluster.name + + capacity_providers = ["FARGATE"] +} + +## Virtual private network ##################################################### +# This will be used for the containers to communicate between themselves, and +# the S3 bucket. +resource "aws_default_vpc" "default" { + tags = { + Name = "Default VPC" + } +} + +data "aws_subnets" "subnets" { + filter { + name = "vpc-id" + values = [aws_default_vpc.default.id] + } +} + +## Service discovery ########################################################### +# This will by the hammer to contact multiple conformance tasks with a single +# dns name. +resource "aws_service_discovery_private_dns_namespace" "internal" { + name = "internal" + vpc = aws_default_vpc.default.id +} + +resource "aws_service_discovery_service" "conformance_discovery" { + name = "conformance-discovery" + + dns_config { + namespace_id = aws_service_discovery_private_dns_namespace.internal.id + + dns_records { + ttl = 10 + type = "A" + } + + // TODO(phboneff): make sure that the hammer uses multiple IPs + // otherwise, set a low TTL and use WEIGHTED. + routing_policy = "MULTIVALUE" + } + + health_check_custom_config { + failure_threshold = 1 + } +} + +## Connect S3 bucket to VPC #################################################### +# This allows the hammer to talk to a non public S3 bucket over HTTP. +resource "aws_vpc_endpoint" "s3" { + vpc_id = aws_default_vpc.default.id + service_name = "com.amazonaws.${var.region}.s3" +} + + +resource "aws_vpc_endpoint_route_table_association" "private_s3" { + vpc_endpoint_id = aws_vpc_endpoint.s3.id + route_table_id = aws_default_vpc.default.default_route_table_id +} + +resource "aws_s3_bucket_policy" "allow_access_from_vpce" { + bucket = module.storage.log_bucket.id + policy = data.aws_iam_policy_document.allow_access_from_vpce.json +} + +data "aws_iam_policy_document" "allow_access_from_vpce" { + statement { + principals { + type = "*" + identifiers = ["*"] + } + + actions = [ + "s3:GetObject", + ] + + resources = [ + "${module.storage.log_bucket.arn}/*", + ] + + condition { + test = "StringEquals" + variable = "aws:sourceVpce" + values = [aws_vpc_endpoint.s3.id] + } + } + depends_on = [aws_vpc_endpoint.s3] +} + +## Conformance task and service ################################################ +# This will start multiple conformance tasks on Fargate within a service. +resource "aws_ecs_task_definition" "conformance" { + family = "conformance" + requires_compatibilities = ["FARGATE"] + # Required network_mode for tasks running on Fargate. + network_mode = "awsvpc" + cpu = 1024 + memory = 2048 + execution_role_arn = var.ecs_execution_role + # We need a special role that has access to S3. + task_role_arn = var.ecs_conformance_task_role + container_definitions = jsonencode([{ + "name": "${local.name}-conformance", + "image": "${var.ecr_registry}/${var.ecr_repository_conformance}", + "cpu": 0, + "portMappings": [{ + "name": "conformance-${local.port}-tcp", + "containerPort": local.port, + "hostPort": local.port, + "protocol": "tcp", + "appProtocol": "http" + }], + "essential": true, + "command": [ + "--signer=${var.signer}", + "--bucket=${module.storage.log_bucket.id}", + "--db_user=root", + "--db_password=password", + "--db_name=tessera", + "--db_host=${module.storage.log_rds_db.endpoint}", + "-v=2" + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/${local.name}", + "mode": "non-blocking", + "awslogs-create-group": "true", + "max-buffer-size": "25m", + "awslogs-region": "us-east-1", + "awslogs-stream-prefix": "ecs" + }, + }, + }]) + + runtime_platform { + operating_system_family = "LINUX" + cpu_architecture = "X86_64" + } + + depends_on = [module.storage] +} + +resource "aws_ecs_service" "conformance_service" { + name = "${local.name}" + task_definition = aws_ecs_task_definition.conformance.arn + cluster = aws_ecs_cluster.ecs_cluster.arn + launch_type = "FARGATE" + desired_count = 3 + wait_for_steady_state = true + + network_configuration { + subnets = data.aws_subnets.subnets.ids + # required to access container registry + assign_public_ip = true + } + + # connect the service with the service discovery defined above + service_registries { + registry_arn = aws_service_discovery_service.conformance_discovery.arn + } + + depends_on = [ + aws_service_discovery_private_dns_namespace.internal, + aws_service_discovery_service.conformance_discovery, + aws_ecs_cluster.ecs_cluster, + aws_ecs_task_definition.conformance, + ] +} + +## Hammer task definition and execution ######################################## +# The hammer can also be launched manually with the following command: +# aws ecs run-task \ +# --cluster="$(terragrunt output -raw ecs_cluster)" \ +# --task-definition=hammer \ +# --count=1 \ +# --launch-type=FARGATE \ +# --network-configuration='{"awsvpcConfiguration": {"assignPublicIp":"ENABLED","subnets": '$(terragrunt output -json vpc_subnets)'}}' + +resource "aws_ecs_task_definition" "hammer" { + family = "hammer" + requires_compatibilities = ["FARGATE"] + # Required network_mode for tasks running on Fargate + network_mode = "awsvpc" + cpu = 1024 + memory = 2048 + execution_role_arn = var.ecs_execution_role + container_definitions = jsonencode([{ + "name": "${local.name}-hammer", + "image": "${var.ecr_registry}/${var.ecr_repository_hammer}", + "cpu": 0, + "portMappings": [{ + "name": "hammer-80-tcp", + "containerPort": 80, + "hostPort": 80, + "protocol": "tcp", + "appProtocol": "http" + }], + "essential": true, + "command": [ + "--log_public_key=${var.verifier}", + "--log_url=https://${module.storage.log_bucket.bucket_regional_domain_name}", + "--write_log_url=http://${aws_service_discovery_service.conformance_discovery.name}.${aws_service_discovery_private_dns_namespace.internal.name}:${local.port}", + "-v=3", + "--show_ui=false", + "--logtostderr", + "--num_writers=1100", + "--max_write_ops=1500", + "--leaf_min_size=1024", + "--leaf_write_goal=50000" + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/${local.name}-hammer", + "mode": "non-blocking", + "awslogs-create-group": "true", + "max-buffer-size": "25m", + "awslogs-region": "us-east-1", + "awslogs-stream-prefix": "ecs" + }, + }, + }]) + + runtime_platform { + operating_system_family = "LINUX" + cpu_architecture = "X86_64" + } + + depends_on = [ + module.storage, + aws_ecs_cluster.ecs_cluster, + ] +} diff --git a/deployment/modules/aws/conformance/outputs.tf b/deployment/modules/aws/conformance/outputs.tf new file mode 100644 index 00000000..e3b45d3d --- /dev/null +++ b/deployment/modules/aws/conformance/outputs.tf @@ -0,0 +1,9 @@ +output "ecs_cluster" { + description = "ECS cluster name" + value = aws_ecs_cluster.ecs_cluster.id +} + +output "vpc_subnets" { + description = "VPC subnets list" + value = data.aws_subnets.subnets.ids +} diff --git a/deployment/modules/aws/conformance/variables.tf b/deployment/modules/aws/conformance/variables.tf new file mode 100644 index 00000000..56d5ec71 --- /dev/null +++ b/deployment/modules/aws/conformance/variables.tf @@ -0,0 +1,54 @@ +variable "prefix_name" { + description = "Common prefix to use when naming resources, ensures unicity of the s3 bucket name." + type = string +} + +variable "base_name" { + description = "Common name to use when naming resources." + type = string +} + +variable "region" { + description = "Region in which to create resources." + type = string +} + +variable "ephemeral" { + description = "Set to true if this is a throwaway/temporary log instance. Will set attributes on created resources to allow them to be disabled/deleted more easily." + type = bool +} + +variable "ecr_registry" { + description = "Container registry address, with the conformance and hammer repositories." + type = string +} + +variable "ecr_repository_conformance" { + description = "Container repository for the conformance binary, with the tag." + type = string +} + +variable "ecr_repository_hammer" { + description = "Container repository for the hammer binary, with the tag." + type = string +} + +variable "signer" { + description = "The note signer which used to sign checkpoints." + type = string +} + +variable "verifier" { + description = "The note verifier used to verify checkpoints." + type = string +} + +variable "ecs_execution_role" { + description = "Role used to run the ECS task." + type = string +} + +variable "ecs_conformance_task_role" { + description = "Role assumed by conformance containers when they run." + type = string +} From 7fe185e3233a69e71b4224c163e329e373a1d462 Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Wed, 4 Dec 2024 19:09:45 +0000 Subject: [PATCH 3/4] improve main readme (#368) --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b9d38bc8..346d739f 100644 --- a/README.md +++ b/README.md @@ -101,8 +101,10 @@ Take a look at the example personalities in the `/cmd/` directory: - This example runs an HTTP web server that takes arbitrary data and adds it to a file-based log. - [mysql](./cmd/conformance/mysql/): example of operating a log that uses MySQL - This example is easiest deployed via `docker compose`, which allows for easy setup and teardown. - - [gcp](./cmd/conformance/gcp/): example of operating a log running in GCP - - This example can be deployed via terraform (see the [deployment](./deployment/) directory). + - [gcp](./cmd/conformance/gcp/): example of operating a log running in GCP. + - This example can be deployed via terraform, see the [deployment instructions](./deployment/live/gcp/conformance#manual-deployment). + - [aws](./cmd/conformance/aws/): example of operating a log running on AWS. + - This example can be deployed via terraform, see the [deployment instructions](./deployment/live/aws/codelab#aws-codelab-deployment). - [posix-oneshot](./cmd/examples/posix-oneshot/): example of a command line tool to add entries to a log stored on the local filesystem - This example is not a long-lived process; running the command integrates entries into the log which lives only as files. From 3caa3329e874e996521f078905345505803580f2 Mon Sep 17 00:00:00 2001 From: Philippe Boneff Date: Thu, 5 Dec 2024 08:40:00 +0000 Subject: [PATCH 4/4] fix AWS links (#378) --- cmd/conformance/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/conformance/README.md b/cmd/conformance/README.md index ecc15d04..09fd63e7 100644 --- a/cmd/conformance/README.md +++ b/cmd/conformance/README.md @@ -9,7 +9,7 @@ Implementations are provided that use: - [A local POSIX-compliant filesystem](./posix/) - [MySQL](./mysql/) - [GCP](./gcp/) - - [AWS](deployment/live/aws/codelab/) + - [AWS](./aws/) Each of these personalities exposes an endpoint that accepts `POST` requests at a `/add` URL. The contents of any request body will be appended to the log, and the decimal index assigned to this newly _sequenced_ entry will be returned. @@ -21,7 +21,7 @@ First, you need to bring up personality (a server built with Tessera which manag - [A local POSIX-compliant filesystem](./posix#bring-up-a-log) - [MySQL](./mysql#bring-up-a-log) - [GCP](./gcp) - - [AWS](./aws) + - [AWS](/deployment/live/aws/codelab#aws-codelab-deployment) Choose one of the implementations above and deploy it. In the shell you are going to run this codelab in, define the following environment variables (check the logging output from the implementation you deployed, as these may have been output):