From 9a83f0cff695e23ee5f68f10af864b317e071fd8 Mon Sep 17 00:00:00 2001 From: Pat Heard Date: Thu, 12 Oct 2023 20:21:35 +0000 Subject: [PATCH] fix: add API CloudWatch error alarms Add a CloudWatch metric filter and alarm that triggers when the API logs an error. Remove the CloudWatch 4xx alarm as these are only triggering during fuzzing attacks. Update to the latest version of Terraform and Terragrunt. --- .devcontainer/docker-compose.yml | 4 +- .github/workflows/tf_apply.yml | 4 +- .github/workflows/tf_plan.yml | 4 +- bin/test_dockerfilebuild.sh | 4 +- terragrunt/aws/api/cloudwatch_api.tf | 63 ++++++++++++---------------- 5 files changed, 35 insertions(+), 44 deletions(-) diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 5dee3449..a289621f 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -15,9 +15,9 @@ services: NODE_VERSION: "lts/*" SHELLCHECK_VERSION: "0.7.2" SHELLCHECK_CHECKSUM: "70423609f27b504d6c0c47e340f33652aea975e45f312324f2dbf91c95a3b188" - TERRAFORM_VERSION: "1.0.3" + TERRAFORM_VERSION: "1.6.1" TERRAFORM_CHECKSUM: "99c4866ffc4d3a749671b1f74d37f907eda1d67d7fc29ed5485aeff592980644" - TERRAGRUNT_VERSION: "0.31.1" + TERRAGRUNT_VERSION: "0.52.1" TERRAGRUNT_CHECKSUM: "76b253919ad688025a4a37338e5602543b0426cae1be1f863b4f3d60dd95ac28" AWS_CLI_VERSION: "2.2.29" volumes: diff --git a/.github/workflows/tf_apply.yml b/.github/workflows/tf_apply.yml index 02093c58..1da79535 100644 --- a/.github/workflows/tf_apply.yml +++ b/.github/workflows/tf_apply.yml @@ -7,8 +7,8 @@ on: env: AWS_REGION: ca-central-1 - TERRAFORM_VERSION: 1.0.3 - TERRAGRUNT_VERSION: 0.31.1 + TERRAFORM_VERSION: 1.6.1 + TERRAGRUNT_VERSION: 0.52.1 TF_VAR_api_auth_token: ${{ secrets.TF_VARS_API_AUTH_TOKEN }} TF_VAR_notify_key: ${{ secrets.TF_VARS_NOTIFY_KEY }} TF_VAR_rds_password: ${{ secrets.TF_VARS_RDS_PASSWORD }} diff --git a/.github/workflows/tf_plan.yml b/.github/workflows/tf_plan.yml index 7f6df19d..ed3f2b99 100644 --- a/.github/workflows/tf_plan.yml +++ b/.github/workflows/tf_plan.yml @@ -7,8 +7,8 @@ on: - ".github/workflows/**" env: AWS_REGION: ca-central-1 - TERRAFORM_VERSION: 1.0.3 - TERRAGRUNT_VERSION: 0.31.1 + TERRAFORM_VERSION: 1.6.1 + TERRAGRUNT_VERSION: 0.52.1 TF_VAR_api_auth_token: ${{ secrets.TF_VARS_API_AUTH_TOKEN }} TF_VAR_notify_key: ${{ secrets.TF_VARS_NOTIFY_KEY }} TF_VAR_rds_password: ${{ secrets.TF_VARS_RDS_PASSWORD }} diff --git a/bin/test_dockerfilebuild.sh b/bin/test_dockerfilebuild.sh index b2400796..4f3fc7d5 100755 --- a/bin/test_dockerfilebuild.sh +++ b/bin/test_dockerfilebuild.sh @@ -13,9 +13,9 @@ DOCKER_BUILDKIT=0 docker build . -f .devcontainer/Dockerfile \ --build-arg NODE_VERSION="lts/*" \ --build-arg SHELLCHECK_VERSION="0.7.2" \ --build-arg SHELLCHECK_CHECKSUM="70423609f27b504d6c0c47e340f33652aea975e45f312324f2dbf91c95a3b188" \ - --build-arg TERRAFORM_VERSION="1.0.3" \ + --build-arg TERRAFORM_VERSION="1.6.1" \ --build-arg TERRAFORM_CHECKSUM="99c4866ffc4d3a749671b1f74d37f907eda1d67d7fc29ed5485aeff592980644" \ - --build-arg TERRAGRUNT_VERSION="0.31.1" \ + --build-arg TERRAGRUNT_VERSION="0.52.1" \ --build-arg TERRAGRUNT_CHECKSUM="76b253919ad688025a4a37338e5602543b0426cae1be1f863b4f3d60dd95ac28" \ --build-arg AWS_CLI_VERSION="2.2.29" rm .dockerignore \ No newline at end of file diff --git a/terragrunt/aws/api/cloudwatch_api.tf b/terragrunt/aws/api/cloudwatch_api.tf index 93e04b85..2386d0be 100644 --- a/terragrunt/aws/api/cloudwatch_api.tf +++ b/terragrunt/aws/api/cloudwatch_api.tf @@ -13,6 +13,18 @@ resource "aws_cloudwatch_log_metric_filter" "lambda-429-errors" { } } +resource "aws_cloudwatch_log_metric_filter" "lambda-api-errors" { + name = "api-errors" + pattern = "ERROR? Error? error?" + log_group_name = "/aws/lambda/${aws_lambda_function.api.function_name}" + + metric_transformation { + name = "api-errors" + namespace = "LogMetrics" + value = "1" + } +} + resource "aws_cloudwatch_metric_alarm" "logs-1-5XX-error-1-minute-warning" { alarm_name = "logs-1-5XX-error-1-minute-warning" alarm_description = "One 5XX error in 1 minute" @@ -49,42 +61,6 @@ resource "aws_cloudwatch_metric_alarm" "logs-10-5XX-error-5-minutes-critical" { } } -resource "aws_cloudwatch_metric_alarm" "logs-5-4xx-error-1-minute-warning" { - alarm_name = "logs-5-4xx-error-1-minute-warning" - alarm_description = "Five 4xx error in 1 minute" - comparison_operator = "GreaterThanOrEqualToThreshold" - evaluation_periods = "1" - metric_name = "4XXError" - namespace = "AWS/ApiGateway" - period = "60" - statistic = "Sum" - threshold = 5 - treat_missing_data = "notBreaching" - alarm_actions = [aws_sns_topic.warning.arn] - ok_actions = [aws_sns_topic.warning.arn] - dimensions = { - ApiName = aws_api_gateway_rest_api.api.name - } -} - -resource "aws_cloudwatch_metric_alarm" "logs-10-4xx-error-5-minutes-critical" { - alarm_name = "logs-10-4xx-error-5-minutes-critical" - alarm_description = "Ten 4xx errors in 5 minutes" - comparison_operator = "GreaterThanOrEqualToThreshold" - evaluation_periods = "1" - metric_name = "4XXError" - namespace = "AWS/ApiGateway" - period = "300" - statistic = "Sum" - threshold = 10 - treat_missing_data = "notBreaching" - alarm_actions = [aws_sns_topic.critical.arn] - ok_actions = [aws_sns_topic.critical.arn] - dimensions = { - ApiName = aws_api_gateway_rest_api.api.name - } -} - resource "aws_cloudwatch_metric_alarm" "logs-1-429-error-1-minute-warning" { alarm_name = "logs-1-429-error-1-minute-warning" alarm_description = "One 429 error in 1 minute" @@ -115,6 +91,21 @@ resource "aws_cloudwatch_metric_alarm" "logs-10-429-error-5-minutes-critical" { ok_actions = [aws_sns_topic.critical.arn] } +resource "aws_cloudwatch_metric_alarm" "logs-1-api-error-1-minute-warning" { + alarm_name = "logs-1-api-error-1-minute-warning" + alarm_description = "One API error in 1 minute" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = aws_cloudwatch_log_metric_filter.lambda-api-errors.metric_transformation[0].name + namespace = aws_cloudwatch_log_metric_filter.lambda-api-errors.metric_transformation[0].namespace + period = "60" + statistic = "Sum" + threshold = 1 + treat_missing_data = "notBreaching" + alarm_actions = [aws_sns_topic.warning.arn] + ok_actions = [aws_sns_topic.warning.arn] +} + resource "aws_cloudwatch_metric_alarm" "api-gateway-above-maximum-latency-warning" { alarm_name = "api-gateway-above-maximum-latency-warning" alarm_description = "API gateway latency between request and response above 1500ms"