diff --git a/.github/workflows/aws_integration_test.yml b/.github/workflows/aws_integration_test.yml new file mode 100644 index 00000000..e8e5acb2 --- /dev/null +++ b/.github/workflows/aws_integration_test.yml @@ -0,0 +1,154 @@ +name: AWS Conformance Test + +on: + push: + branches: + - main + +# This prevents two workflows from running at the same time. +# This workflows calls terragrunt, which does not allow concurrent runs. +concurrency: + group: aws-conformance + cancel-in-progress: false + +permissions: + contents: read + +env: + TF_VERSION: "1.10.0" + TG_VERSION: "0.67.0" + TG_DIR: "deployment/live/aws/conformance/ci/" + TESSERA_PREFIX_NAME: trillian-tessera + ECR_REGISTRY: 864981736166.dkr.ecr.us-east-1.amazonaws.com + ECR_REPOSITORY_CONFORMANCE: trillian-tessera/conformance:latest + ECR_REPOSITORY_HAMMER: trillian-tessera/hammer:latest + AWS_REGION: us-east-1 + +jobs: + aws-integration: + runs-on: ubuntu-latest + + steps: + ## Authenticate to AWS with the credentials stored in Github Secrets. + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + # TODO(phboneff): use a better form of authentication + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Checkout code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + ## Authenticate with ECR to push the conformance and hammer images. + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + ## Build the conformance image and push it to ECR. This will be used + ## later on by Terragrunt. + - name: Build, tag, and push Conformance image to Amazon ECR + id: build-publish-conformance + shell: bash + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: ${{ env.ECR_REPOSITORY_CONFORMANCE }} + run: | + docker build -f ./cmd/conformance/aws/Dockerfile . -t "$ECR_REGISTRY/$ECR_REPOSITORY" + docker push "$ECR_REGISTRY/$ECR_REPOSITORY" + echo "Pushed image to $ECR_REGISTRY/$ECR_REPOSITORY" + + ## Build the hammer image and push it to ECR. This will be used + ## later on by Terragrunt. + - name: Build, tag, and push Hammer image to Amazon ECR + id: build-publish-hammer + shell: bash + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: ${{ env.ECR_REPOSITORY_HAMMER }} + run: | + docker build -f ./internal/hammer/Dockerfile . -t "$ECR_REGISTRY/$ECR_REPOSITORY" + docker push "$ECR_REGISTRY/$ECR_REPOSITORY" + echo "Pushed image to $ECR_REGISTRY/$ECR_REPOSITORY" + + ## Destroy any pre-existing deployment/live/aws/conformance/ci env. + ## This might happen if a previous integration test workflow has failed. + - name: Terragrunt destroy pre conformance test + id: terragrunt-destroy-pre + uses: gruntwork-io/terragrunt-action@v2 + with: + tf_version: ${{ env.TF_VERSION }} + tg_version: ${{ env.TG_VERSION }} + tg_dir: ${{ env.TG_DIR }} + tg_command: "destroy" + env: + TESSERA_SIGNER: unused + TESSERA_VERIFIER: unused + + ## Generate a new keys for the log to use, and export them to environment + ## variables for Terragrunt to use. + - name: Generate Tessera keys + id: generate-keys + shell: bash + run: | + go run github.com/transparency-dev/serverless-log/cmd/generate_keys@80334bc9dc573e8f6c5b3694efad6358da50abd4 \ + --key_name=tessera/test/conformance \ + --out_priv=${{ runner.temp }}/key.sec \ + --out_pub=${{ runner.temp }}/key.pub + cat ${{ runner.temp }}/key.pub + echo "TESSERA_SIGNER=$(cat ${{ runner.temp }}/key.sec)" >> "$GITHUB_ENV" + echo "TESSERA_VERIFIER=$(cat ${{ runner.temp }}/key.pub)" >> "$GITHUB_ENV" + + ## Apply the deployment/live/aws/conformance/ci terragrunt config. + ## This will bring up the conformance infrastructure which consists of: + ## - the storage module + ## - a private S3 <--> ECS network link for the hammer to read the log + ## - an ECS cluster to run Fargate tasks + ## - a conformance service, with multiple conformance binary instances + ## - a hammer task definition (but no execution) + # TODO(phboneff): AuroraDB takes a long time to be brought up and down + # consider keeping it around between tests / using Aurora Serveless + - name: Terragrunt apply + id: terragrunt-apply + uses: gruntwork-io/terragrunt-action@v2 + with: + tf_version: ${{ env.TF_VERSION }} + tg_version: ${{ env.TG_VERSION }} + tg_dir: ${{ env.TG_DIR }} + tg_command: "apply" + env: + INPUT_POST_EXEC_1: | + echo "ECS_CLUSTER=$(terragrunt output -raw ecs_cluster)" >> "$GITHUB_ENV" + INPUT_POST_EXEC_2: | + echo "VPC_SUBNETS=$(terragrunt output -json vpc_subnets)" >> "$GITHUB_ENV" + + ## Now we can run the hammer using the task definition, against the + ## conformance service. This step returns the hammer task's exit code. + - name: Run Hammer + id: hammer + shell: bash + run: | + cat ${{ runner.temp }}/key.pub + echo "Will launch a hammer ECS task." + HAMMER_ARN=$(aws ecs run-task \ + --cluster="$ECS_CLUSTER" \ + --task-definition=hammer \ + --count=1 \ + --launch-type=FARGATE \ + --network-configuration='{"awsvpcConfiguration": {"assignPublicIp":"ENABLED","subnets": '$VPC_SUBNETS'}}' \ + --query 'tasks[0].taskArn') + echo "Hammer task running, ARN: $HAMMER_ARN." + echo "Waiting for task to stop..." + aws ecs wait tasks-stopped --cluster="$ECS_CLUSTER" --tasks=[$HAMMER_ARN] + echo "The task has stopped. Fetching exit code and returning this action with it." + exit $(aws ecs describe-tasks --cluster="$ECS_CLUSTER" --tasks=[$HAMMER_ARN] --query 'tasks[0].containers[0].exitCode') + + - name: Terragrunt destroy post conformance test + id: terragrunt-destroy-post + uses: gruntwork-io/terragrunt-action@v2 + with: + tf_version: ${{ env.TF_VERSION }} + tg_version: ${{ env.TG_VERSION }} + tg_dir: ${{ env.TG_DIR }} + tg_command: "destroy" diff --git a/README.md b/README.md index b9d38bc8..346d739f 100644 --- a/README.md +++ b/README.md @@ -101,8 +101,10 @@ Take a look at the example personalities in the `/cmd/` directory: - This example runs an HTTP web server that takes arbitrary data and adds it to a file-based log. - [mysql](./cmd/conformance/mysql/): example of operating a log that uses MySQL - This example is easiest deployed via `docker compose`, which allows for easy setup and teardown. - - [gcp](./cmd/conformance/gcp/): example of operating a log running in GCP - - This example can be deployed via terraform (see the [deployment](./deployment/) directory). + - [gcp](./cmd/conformance/gcp/): example of operating a log running in GCP. + - This example can be deployed via terraform, see the [deployment instructions](./deployment/live/gcp/conformance#manual-deployment). + - [aws](./cmd/conformance/aws/): example of operating a log running on AWS. + - This example can be deployed via terraform, see the [deployment instructions](./deployment/live/aws/codelab#aws-codelab-deployment). - [posix-oneshot](./cmd/examples/posix-oneshot/): example of a command line tool to add entries to a log stored on the local filesystem - This example is not a long-lived process; running the command integrates entries into the log which lives only as files. diff --git a/cmd/conformance/README.md b/cmd/conformance/README.md index 74293cfd..09fd63e7 100644 --- a/cmd/conformance/README.md +++ b/cmd/conformance/README.md @@ -9,7 +9,7 @@ Implementations are provided that use: - [A local POSIX-compliant filesystem](./posix/) - [MySQL](./mysql/) - [GCP](./gcp/) - - [AWS](deployment/live/aws/codelab/) + - [AWS](./aws/) Each of these personalities exposes an endpoint that accepts `POST` requests at a `/add` URL. The contents of any request body will be appended to the log, and the decimal index assigned to this newly _sequenced_ entry will be returned. @@ -21,7 +21,7 @@ First, you need to bring up personality (a server built with Tessera which manag - [A local POSIX-compliant filesystem](./posix#bring-up-a-log) - [MySQL](./mysql#bring-up-a-log) - [GCP](./gcp) - - [AWS](./aws) + - [AWS](/deployment/live/aws/codelab#aws-codelab-deployment) Choose one of the implementations above and deploy it. In the shell you are going to run this codelab in, define the following environment variables (check the logging output from the implementation you deployed, as these may have been output): @@ -38,10 +38,11 @@ curl -d 'two!' -H "Content-Type: application/data" -X POST ${WRITE_URL}add & curl -d 'three!' -H "Content-Type: application/data" -X POST ${WRITE_URL}add & wait -# Check that the checkpoint is of the correct size +# Check that the checkpoint is of the correct size (i.e. 3). +# If the checkpoint size is zero, this is expected. It may take a second to integrate the entries and publish the checkpoint. curl -s ${READ_URL}checkpoint -# Look at the leaves. Piping into xxd to reveal the leaf sizes. +# Look at the leaves after confirming the checkpoint size. Piping into xxd to reveal the leaf sizes. curl -s ${READ_URL}tile/entries/000.p/3 | xxd ``` diff --git a/deployment/live/aws/conformance/ci/terragrunt.hcl b/deployment/live/aws/conformance/ci/terragrunt.hcl index 80c48bfa..19cbfa39 100644 --- a/deployment/live/aws/conformance/ci/terragrunt.hcl +++ b/deployment/live/aws/conformance/ci/terragrunt.hcl @@ -1,7 +1,3 @@ -terraform { - source = "${get_repo_root()}/deployment/modules/aws//storage" -} - include "root" { path = find_in_parent_folders() expose = true diff --git a/deployment/live/aws/conformance/terragrunt.hcl b/deployment/live/aws/conformance/terragrunt.hcl index 65b4d5e4..2b2cd3e3 100644 --- a/deployment/live/aws/conformance/terragrunt.hcl +++ b/deployment/live/aws/conformance/terragrunt.hcl @@ -1,15 +1,22 @@ terraform { - source = "${get_repo_root()}/deployment/modules/aws//storage" + source = "${get_repo_root()}/deployment/modules/aws//conformance" } locals { - env = path_relative_to_include() - account_id = "${get_aws_account_id()}" - region = get_env("AWS_REGION", "us-east-1") - profile = get_env("AWS_PROFILE", "default") - base_name = get_env("TESSERA_BASE_NAME", "${local.env}-conformance") - prefix_name = get_env("TESSERA_PREFIX_NAME", "trillian-tessera") - ephemeral = true + env = path_relative_to_include() + account_id = "${get_aws_account_id()}" + region = get_env("AWS_REGION", "us-east-1") + base_name = get_env("TESSERA_BASE_NAME", "${local.env}-conformance") + prefix_name = get_env("TESSERA_PREFIX_NAME", "trillian-tessera") + ecr_registry = get_env("ECR_REGISTRY", "${local.account_id}.dkr.ecr.${local.region}.amazonaws.com") + ecr_repository_conformance = get_env("ECR_REPOSITORY_CONFORMANCE", "trillian-tessera/conformance:latest") + ecr_repository_hammer = get_env("ECR_REPOSITORY_HAMMER", "trillian-tessera/hammer:latest") + signer = get_env("TESSERA_SIGNER") + verifier = get_env("TESSERA_VERIFIER") + # Roles are defined externally + ecs_execution_role = "arn:aws:iam::864981736166:role/ecsTaskExecutionRole" + ecs_conformance_task_role = "arn:aws:iam::864981736166:role/ConformanceECSTaskRolePolicy" + ephemeral = true } remote_state { @@ -17,7 +24,6 @@ remote_state { config = { region = local.region - profile = local.profile bucket = "${local.prefix_name}-${local.base_name}-terraform-state" key = "${local.env}/terraform.tfstate" dynamodb_table = "${local.prefix_name}-${local.base_name}-terraform-lock" diff --git a/deployment/modules/aws/conformance/main.tf b/deployment/modules/aws/conformance/main.tf new file mode 100644 index 00000000..a31c6509 --- /dev/null +++ b/deployment/modules/aws/conformance/main.tf @@ -0,0 +1,274 @@ +# Header ###################################################################### +terraform { + backend "s3" {} + required_providers { + aws = { + source = "hashicorp/aws" + version = "5.76.0" + } + } +} + +locals { + name = "${var.prefix_name}-${var.base_name}" + port = 2024 +} + +provider "aws" { + region = var.region +} + +module "storage" { + source = "../storage" + + prefix_name = var.prefix_name + base_name = var.base_name + region = var.region + ephemeral = true +} + +# Resources #################################################################### +## ECS cluster ################################################################# +# This will be used to run the conformance and hammer binaries on Fargate. +resource "aws_ecs_cluster" "ecs_cluster" { + name = "${local.name}" +} + +resource "aws_ecs_cluster_capacity_providers" "ecs_capacity" { + cluster_name = aws_ecs_cluster.ecs_cluster.name + + capacity_providers = ["FARGATE"] +} + +## Virtual private network ##################################################### +# This will be used for the containers to communicate between themselves, and +# the S3 bucket. +resource "aws_default_vpc" "default" { + tags = { + Name = "Default VPC" + } +} + +data "aws_subnets" "subnets" { + filter { + name = "vpc-id" + values = [aws_default_vpc.default.id] + } +} + +## Service discovery ########################################################### +# This will by the hammer to contact multiple conformance tasks with a single +# dns name. +resource "aws_service_discovery_private_dns_namespace" "internal" { + name = "internal" + vpc = aws_default_vpc.default.id +} + +resource "aws_service_discovery_service" "conformance_discovery" { + name = "conformance-discovery" + + dns_config { + namespace_id = aws_service_discovery_private_dns_namespace.internal.id + + dns_records { + ttl = 10 + type = "A" + } + + // TODO(phboneff): make sure that the hammer uses multiple IPs + // otherwise, set a low TTL and use WEIGHTED. + routing_policy = "MULTIVALUE" + } + + health_check_custom_config { + failure_threshold = 1 + } +} + +## Connect S3 bucket to VPC #################################################### +# This allows the hammer to talk to a non public S3 bucket over HTTP. +resource "aws_vpc_endpoint" "s3" { + vpc_id = aws_default_vpc.default.id + service_name = "com.amazonaws.${var.region}.s3" +} + + +resource "aws_vpc_endpoint_route_table_association" "private_s3" { + vpc_endpoint_id = aws_vpc_endpoint.s3.id + route_table_id = aws_default_vpc.default.default_route_table_id +} + +resource "aws_s3_bucket_policy" "allow_access_from_vpce" { + bucket = module.storage.log_bucket.id + policy = data.aws_iam_policy_document.allow_access_from_vpce.json +} + +data "aws_iam_policy_document" "allow_access_from_vpce" { + statement { + principals { + type = "*" + identifiers = ["*"] + } + + actions = [ + "s3:GetObject", + ] + + resources = [ + "${module.storage.log_bucket.arn}/*", + ] + + condition { + test = "StringEquals" + variable = "aws:sourceVpce" + values = [aws_vpc_endpoint.s3.id] + } + } + depends_on = [aws_vpc_endpoint.s3] +} + +## Conformance task and service ################################################ +# This will start multiple conformance tasks on Fargate within a service. +resource "aws_ecs_task_definition" "conformance" { + family = "conformance" + requires_compatibilities = ["FARGATE"] + # Required network_mode for tasks running on Fargate. + network_mode = "awsvpc" + cpu = 1024 + memory = 2048 + execution_role_arn = var.ecs_execution_role + # We need a special role that has access to S3. + task_role_arn = var.ecs_conformance_task_role + container_definitions = jsonencode([{ + "name": "${local.name}-conformance", + "image": "${var.ecr_registry}/${var.ecr_repository_conformance}", + "cpu": 0, + "portMappings": [{ + "name": "conformance-${local.port}-tcp", + "containerPort": local.port, + "hostPort": local.port, + "protocol": "tcp", + "appProtocol": "http" + }], + "essential": true, + "command": [ + "--signer=${var.signer}", + "--bucket=${module.storage.log_bucket.id}", + "--db_user=root", + "--db_password=password", + "--db_name=tessera", + "--db_host=${module.storage.log_rds_db.endpoint}", + "-v=2" + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/${local.name}", + "mode": "non-blocking", + "awslogs-create-group": "true", + "max-buffer-size": "25m", + "awslogs-region": "us-east-1", + "awslogs-stream-prefix": "ecs" + }, + }, + }]) + + runtime_platform { + operating_system_family = "LINUX" + cpu_architecture = "X86_64" + } + + depends_on = [module.storage] +} + +resource "aws_ecs_service" "conformance_service" { + name = "${local.name}" + task_definition = aws_ecs_task_definition.conformance.arn + cluster = aws_ecs_cluster.ecs_cluster.arn + launch_type = "FARGATE" + desired_count = 3 + wait_for_steady_state = true + + network_configuration { + subnets = data.aws_subnets.subnets.ids + # required to access container registry + assign_public_ip = true + } + + # connect the service with the service discovery defined above + service_registries { + registry_arn = aws_service_discovery_service.conformance_discovery.arn + } + + depends_on = [ + aws_service_discovery_private_dns_namespace.internal, + aws_service_discovery_service.conformance_discovery, + aws_ecs_cluster.ecs_cluster, + aws_ecs_task_definition.conformance, + ] +} + +## Hammer task definition and execution ######################################## +# The hammer can also be launched manually with the following command: +# aws ecs run-task \ +# --cluster="$(terragrunt output -raw ecs_cluster)" \ +# --task-definition=hammer \ +# --count=1 \ +# --launch-type=FARGATE \ +# --network-configuration='{"awsvpcConfiguration": {"assignPublicIp":"ENABLED","subnets": '$(terragrunt output -json vpc_subnets)'}}' + +resource "aws_ecs_task_definition" "hammer" { + family = "hammer" + requires_compatibilities = ["FARGATE"] + # Required network_mode for tasks running on Fargate + network_mode = "awsvpc" + cpu = 1024 + memory = 2048 + execution_role_arn = var.ecs_execution_role + container_definitions = jsonencode([{ + "name": "${local.name}-hammer", + "image": "${var.ecr_registry}/${var.ecr_repository_hammer}", + "cpu": 0, + "portMappings": [{ + "name": "hammer-80-tcp", + "containerPort": 80, + "hostPort": 80, + "protocol": "tcp", + "appProtocol": "http" + }], + "essential": true, + "command": [ + "--log_public_key=${var.verifier}", + "--log_url=https://${module.storage.log_bucket.bucket_regional_domain_name}", + "--write_log_url=http://${aws_service_discovery_service.conformance_discovery.name}.${aws_service_discovery_private_dns_namespace.internal.name}:${local.port}", + "-v=3", + "--show_ui=false", + "--logtostderr", + "--num_writers=1100", + "--max_write_ops=1500", + "--leaf_min_size=1024", + "--leaf_write_goal=50000" + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/${local.name}-hammer", + "mode": "non-blocking", + "awslogs-create-group": "true", + "max-buffer-size": "25m", + "awslogs-region": "us-east-1", + "awslogs-stream-prefix": "ecs" + }, + }, + }]) + + runtime_platform { + operating_system_family = "LINUX" + cpu_architecture = "X86_64" + } + + depends_on = [ + module.storage, + aws_ecs_cluster.ecs_cluster, + ] +} diff --git a/deployment/modules/aws/conformance/outputs.tf b/deployment/modules/aws/conformance/outputs.tf new file mode 100644 index 00000000..e3b45d3d --- /dev/null +++ b/deployment/modules/aws/conformance/outputs.tf @@ -0,0 +1,9 @@ +output "ecs_cluster" { + description = "ECS cluster name" + value = aws_ecs_cluster.ecs_cluster.id +} + +output "vpc_subnets" { + description = "VPC subnets list" + value = data.aws_subnets.subnets.ids +} diff --git a/deployment/modules/aws/conformance/variables.tf b/deployment/modules/aws/conformance/variables.tf new file mode 100644 index 00000000..56d5ec71 --- /dev/null +++ b/deployment/modules/aws/conformance/variables.tf @@ -0,0 +1,54 @@ +variable "prefix_name" { + description = "Common prefix to use when naming resources, ensures unicity of the s3 bucket name." + type = string +} + +variable "base_name" { + description = "Common name to use when naming resources." + type = string +} + +variable "region" { + description = "Region in which to create resources." + type = string +} + +variable "ephemeral" { + description = "Set to true if this is a throwaway/temporary log instance. Will set attributes on created resources to allow them to be disabled/deleted more easily." + type = bool +} + +variable "ecr_registry" { + description = "Container registry address, with the conformance and hammer repositories." + type = string +} + +variable "ecr_repository_conformance" { + description = "Container repository for the conformance binary, with the tag." + type = string +} + +variable "ecr_repository_hammer" { + description = "Container repository for the hammer binary, with the tag." + type = string +} + +variable "signer" { + description = "The note signer which used to sign checkpoints." + type = string +} + +variable "verifier" { + description = "The note verifier used to verify checkpoints." + type = string +} + +variable "ecs_execution_role" { + description = "Role used to run the ECS task." + type = string +} + +variable "ecs_conformance_task_role" { + description = "Role assumed by conformance containers when they run." + type = string +}