Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup unused queues #1230

Merged
merged 7 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 0 additions & 41 deletions aws/common/cloudwatch_alarms_email.tf
Original file line number Diff line number Diff line change
Expand Up @@ -71,47 +71,6 @@ resource "aws_cloudwatch_metric_alarm" "ses-complaint-rate-critical" {
treat_missing_data = "notBreaching"
}

# TODO: delete this alarm and queue once we verify that we've transitioned to the new queues
resource "aws_cloudwatch_metric_alarm" "sqs-email-queue-delay-warning" {
count = var.cloudwatch_enabled ? 1 : 0
alarm_name = "sqs-email-queue-delay-warning"
alarm_description = "ApproximateAgeOfOldestMessage in email queue >= 30 minutes for 5 minutes"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "5"
metric_name = "ApproximateAgeOfOldestMessage"
namespace = "AWS/SQS"
period = 60
statistic = "Maximum"
threshold = 60 * 30
treat_missing_data = "missing"
alarm_actions = [aws_sns_topic.notification-canada-ca-alert-warning.arn]
dimensions = {
QueueName = "${var.celery_queue_prefix}${var.sqs_email_queue_name}"
}
}

# TODO: delete this alarm and queue once we verify that we've transitioned to the new queues
resource "aws_cloudwatch_metric_alarm" "sqs-email-queue-delay-critical" {
count = var.cloudwatch_enabled ? 1 : 0
alarm_name = "sqs-email-queue-delay-critical"
alarm_description = "ApproximateAgeOfOldestMessage in email queue >= 45 minutes for 5 minutes"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "5"
metric_name = "ApproximateAgeOfOldestMessage"
namespace = "AWS/SQS"
period = 60
statistic = "Maximum"
threshold = 60 * 45
treat_missing_data = "missing"
alarm_actions = [aws_sns_topic.notification-canada-ca-alert-critical.arn]
insufficient_data_actions = [aws_sns_topic.notification-canada-ca-alert-warning.arn]
ok_actions = [aws_sns_topic.notification-canada-ca-alert-ok.arn]
dimensions = {
QueueName = "${var.celery_queue_prefix}${var.sqs_email_queue_name}"
}
}


resource "aws_cloudwatch_metric_alarm" "sqs-send-email-high-queue-delay-warning" {
count = var.cloudwatch_enabled ? 1 : 0
alarm_name = "sqs-send-email-high-queue-delay-warning"
Expand Down
38 changes: 0 additions & 38 deletions aws/common/cloudwatch_alarms_sms.tf
Original file line number Diff line number Diff line change
Expand Up @@ -253,44 +253,6 @@ resource "aws_cloudwatch_metric_alarm" "sns-sms-rate-exceeded-us-west-2-warning"
treat_missing_data = "notBreaching"
}

resource "aws_cloudwatch_metric_alarm" "sqs-sms-stuck-in-queue-warning" {
count = var.cloudwatch_enabled ? 1 : 0
alarm_name = "sqs-sms-stuck-in-queue-warning"
alarm_description = "ApproximateAgeOfOldestMessage in SMS queue is older than 10 minutes for 5 minutes"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "5"
metric_name = "ApproximateAgeOfOldestMessage"
namespace = "AWS/SQS"
period = 60
statistic = "Average"
threshold = 60 * 10
treat_missing_data = "missing"
alarm_actions = [aws_sns_topic.notification-canada-ca-alert-warning.arn]
dimensions = {
QueueName = "${var.celery_queue_prefix}${var.sqs_sms_queue_name}"
}
}

resource "aws_cloudwatch_metric_alarm" "sqs-sms-stuck-in-queue-critical" {
count = var.cloudwatch_enabled ? 1 : 0
alarm_name = "sqs-sms-stuck-in-queue-critical"
alarm_description = "ApproximateAgeOfOldestMessage in SMS queue is older than 15 minutes for 5 minutes"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "5"
metric_name = "ApproximateAgeOfOldestMessage"
namespace = "AWS/SQS"
period = 60
statistic = "Average"
threshold = 60 * 15
treat_missing_data = "missing"
alarm_actions = [aws_sns_topic.notification-canada-ca-alert-critical.arn]
insufficient_data_actions = [aws_sns_topic.notification-canada-ca-alert-warning.arn]
ok_actions = [aws_sns_topic.notification-canada-ca-alert-ok.arn]
dimensions = {
QueueName = "${var.celery_queue_prefix}${var.sqs_sms_queue_name}"
}
}

resource "aws_cloudwatch_metric_alarm" "sqs-send-sms-high-queue-delay-warning" {
count = var.cloudwatch_enabled ? 1 : 0
alarm_name = "sqs-send-sms-high-queue-delay-warning"
Expand Down
77 changes: 10 additions & 67 deletions aws/common/dashboards.tf
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,6 @@ resource "aws_cloudwatch_dashboard" "emails" {
"${aws_cloudwatch_metric_alarm.ses-bounce-rate-warning[0].arn}",
"${aws_cloudwatch_metric_alarm.ses-complaint-rate-warning[0].arn}",
"${aws_cloudwatch_metric_alarm.ses-complaint-rate-critical[0].arn}",
"${aws_cloudwatch_metric_alarm.sqs-email-queue-delay-critical[0].arn}",
"${aws_cloudwatch_metric_alarm.no-emails-sent-5-minutes-critical[0].arn}",
"${aws_cloudwatch_metric_alarm.no-emails-sent-5-minutes-warning[0].arn}"
]
Expand Down Expand Up @@ -310,7 +309,7 @@ resource "aws_cloudwatch_dashboard" "emails" {
"x": 18,
"type": "text",
"properties": {
"markdown": "\n# Sending emails\n\nEmails are sent with [SES](https://${var.region}.console.aws.amazon.com/ses/home?region=${var.region}#dashboard:).\n\nOur limits are:\n- 1,000,000 emails per 24 hour period\n- 100 emails/second\n\nEmails are sent by Celery through the `deliver_email` task through the [send-email-tasks](https://${var.region}.console.aws.amazon.com/sqs/v2/home?region=${var.region}#/queues/https%3A%2F%2Fsqs.${var.region}.amazonaws.com%2F${var.account_id}%2Feks-notification-canada-casend-email-tasks) queue.\n\n## Message flow\n\nAfter a notification has been created in the database, Celery sends the email to the provider using the deliver_email Celery task. This Celery task is assigned to the SQS queue eks-notification-canada-casend-email-tasks, unless a specific queue has been assigned to the queue (for example: eks-notification-canada-capriority-tasks for priority notifications or eks-notification-canada-cabulk-tasks through the API REST service). This task calls the AWS SES API to send a text message.\n\n## Delivery receipts\n\nReceipts from SES are dispatched to SNS -> [Lambda](https://${var.region}.console.aws.amazon.com/lambda/home?region=${var.region}#/functions/ses-to-sqs-email-callbacks) -> [SQS](https://${var.region}.console.aws.amazon.com/sqs/v2/home?region=${var.region}#/queues/https%3A%2F%2Fsqs.${var.region}.amazonaws.com%2F${var.account_id}%2Feks-notification-canada-cadelivery-receipts) in the `delivery-receipts` queue.\n\nA delay in this queue means that we are slow to process delivery receipts (delivered, bounce, complaints).\n"
"markdown": "\n# Sending emails\n\nEmails are sent with [SES](https://${var.region}.console.aws.amazon.com/ses/home?region=${var.region}#dashboard:).\n\nOur limits are:\n- 1,000,000 emails per 24 hour period\n- 100 emails/second\n\nEmails are sent by Celery through the `deliver_email` task through the [send-email-low](https://${var.region}.console.aws.amazon.com/sqs/v2/home?region=${var.region}#/queues/https%3A%2F%2Fsqs.${var.region}.amazonaws.com%2F${var.account_id}%2Feks-notification-canada-casend-email-low), [send-email-medium](https://${var.region}.console.aws.amazon.com/sqs/v2/home?region=${var.region}#/queues/https%3A%2F%2Fsqs.${var.region}.amazonaws.com%2F${var.account_id}%2Feks-notification-canada-casend-email-medium), or [send-email-high](https://${var.region}.console.aws.amazon.com/sqs/v2/home?region=${var.region}#/queues/https%3A%2F%2Fsqs.${var.region}.amazonaws.com%2F${var.account_id}%2Feks-notification-canada-casend-email-high) queues.\n\n## Message flow\n\nAfter a notification has been created in the database, Celery sends the email to the provider using the deliver_email Celery task. This Celery task is assigned to the send-email-low, send-email-medium, or send-email-high SQS queue depending on the email's priority. This task calls the AWS SES API to send a text message.\n\n## Delivery receipts\n\nReceipts from SES are dispatched to SNS -> [Lambda](https://${var.region}.console.aws.amazon.com/lambda/home?region=${var.region}#/functions/ses-to-sqs-email-callbacks) -> [SQS](https://${var.region}.console.aws.amazon.com/sqs/v2/home?region=${var.region}#/queues/https%3A%2F%2Fsqs.${var.region}.amazonaws.com%2F${var.account_id}%2Feks-notification-canada-cadelivery-receipts) in the `delivery-receipts` queue.\n\nA delay in this queue means that we are slow to process delivery receipts (delivered, bounce, complaints).\n"
}
},
{
Expand Down Expand Up @@ -388,9 +387,9 @@ resource "aws_cloudwatch_dashboard" "emails" {
},
{
"height": 6,
"width": 9,
"y": 42,
"x": 9,
"width": 8,
"y": 36,
"x": 8,
"type": "metric",
"properties": {
"metrics": [
Expand All @@ -406,8 +405,8 @@ resource "aws_cloudwatch_dashboard" "emails" {
},
{
"height": 6,
"width": 9,
"y": 48,
"width": 8,
"y": 36,
"x": 0,
"type": "metric",
"properties": {
Expand All @@ -424,9 +423,9 @@ resource "aws_cloudwatch_dashboard" "emails" {
},
{
"height": 6,
"width": 9,
"y": 48,
"x": 9,
"width": 8,
"y": 36,
"x": 16,
"type": "metric",
"properties": {
"metrics": [
Expand All @@ -450,60 +449,6 @@ resource "aws_cloudwatch_dashboard" "emails" {
"markdown": "# Delivery Queues\n"
}
},
{
"height": 6,
"width": 9,
"y": 42,
"x": 0,
"type": "metric",
"properties": {
"metrics": [
[ "AWS/SQS", "ApproximateAgeOfOldestMessage", "QueueName", "eks-notification-canada-casend-email-tasks", { "color": "#1f77b4" } ]
],
"view": "timeSeries",
"stacked": true,
"region": "${var.region}",
"stat": "Average",
"period": 60,
"title": "Average approximate age of oldest message in send-email-tasks"
}
},
{
"height": 6,
"width": 9,
"y": 36,
"x": 0,
"type": "metric",
"properties": {
"metrics": [
[ "AWS/SQS", "ApproximateNumberOfMessagesVisible", "QueueName", "eks-notification-canada-casend-email-tasks" ]
],
"view": "timeSeries",
"stacked": false,
"region": "${var.region}",
"title": "Approximate number of messages in send-email-tasks",
"period": 60,
"stat": "Average"
}
},
{
"height": 6,
"width": 9,
"y": 36,
"x": 9,
"type": "metric",
"properties": {
"metrics": [
[ "AWS/SQS", "ApproximateAgeOfOldestMessage", "QueueName", "eks-notification-canada-casend-email-tasks", { "color": "#1f77b4" } ]
],
"view": "timeSeries",
"stacked": true,
"region": "${var.region}",
"stat": "Average",
"period": 60,
"title": "Average approximate age of oldest message in send-email-tasks"
}
},
{
"height": 6,
"width": 8,
Expand Down Expand Up @@ -703,8 +648,6 @@ resource "aws_cloudwatch_dashboard" "sms" {
"alarms": [
"${aws_cloudwatch_metric_alarm.sns-sms-success-rate-canadian-numbers-critical[0].arn}",
"${aws_cloudwatch_metric_alarm.sns-sms-success-rate-canadian-numbers-warning[0].arn}",
"${aws_cloudwatch_metric_alarm.sqs-sms-stuck-in-queue-warning[0].arn}",
"${aws_cloudwatch_metric_alarm.sqs-sms-stuck-in-queue-critical[0].arn}",
"${aws_cloudwatch_metric_alarm.sqs-send-sms-high-queue-delay-warning[0].arn}",
"${aws_cloudwatch_metric_alarm.sqs-send-sms-high-queue-delay-critical[0].arn}",
"${aws_cloudwatch_metric_alarm.sqs-send-sms-medium-queue-delay-warning[0].arn}",
Expand Down Expand Up @@ -900,7 +843,7 @@ resource "aws_cloudwatch_dashboard" "sms" {
"x": 18,
"type": "text",
"properties": {
"markdown": "\n## Limits\n- SNS [maximum sending rate](https://docs.aws.amazon.com/general/latest/gr/sns.html#limits_sns): 20 SMS/second\n- [Spending limit](https://${var.region}.console.aws.amazon.com/sns/v3/home?region=${var.region}#/mobile/text-messaging) of 30,000 USD/month\n\n## Message flow\nAfter a notification has been created in the database, Celery sends the SMS to the provider using the `deliver_sms` Celery task. This Celery task is assigned to the SQS queue [eks-notification-canada-casend-sms-tasks](#/queues/https%3A%2F%2Fsqs.${var.region}.amazonaws.com%2F${var.account_id}%2Feks-notification-canada-casend-sms-tasks), unless a specific queue has been assigned to the queue (for example priority templates, SMS sent by the Notify service etc.). This task calls the SNS API to send a text message.\n\n## SNS IDs\nSNS keeps track of SMS with a `messageId`, the value of SNS' `messageId` is stored in the `Notification` object in the `reference` column.\n\n## Logging\nCelery tasks output multiple messages when processing tasks/calling the SNS API, take a look at the relevant Celery code to know more.\n\nAfter an SMS has been sent by SNS, the delivery details are stored in CloudWatch Log groups:\n\n- [sns/${var.region}/${var.account_id}/DirectPublishToPhoneNumber](#logsV2:log-groups/log-group/sns$252F${var.region}$252F${var.account_id}$252FDirectPublishToPhoneNumber) for successful deliveries\n- [sns/${var.region}/${var.account_id}/DirectPublishToPhoneNumber/Failure](#logsV2:log-groups/log-group/sns$252F${var.region}$252F${var.account_id}$252FDirectPublishToPhoneNumber$252FFailure) for failures\n\n## Phone numbers\n\nSMS sent in `${var.region}` use random phone numbers managed by AWS.\n\n### ⚠️ SNS in `us-west-2`\nIf a Notify service has an inbound number attached, SMS will be sent with SNS using a long code phone number ordered on Pinpoint in the `us-west-2` region. Statistics for this region and alarms are **not visible on this dashboard**.\n"
"markdown": "\n## Limits\n- SNS [maximum sending rate](https://docs.aws.amazon.com/general/latest/gr/sns.html#limits_sns): 20 SMS/second\n- [Spending limit](https://${var.region}.console.aws.amazon.com/sns/v3/home?region=${var.region}#/mobile/text-messaging) of 30,000 USD/month\n\n## Message flow\nAfter a notification has been created in the database, Celery sends the SMS to the provider using the `deliver_sms` Celery task. This Celery task is assigned to the SQS queue [${var.celery_queue_prefix}send-sms-low](#/queues/https%3A%2F%2Fsqs.${var.region}.amazonaws.com%2F${var.account_id}%2F${var.celery_queue_prefix}send-sms-low), [${var.celery_queue_prefix}send-sms-medium](#/queues/https%3A%2F%2Fsqs.${var.region}.amazonaws.com%2F${var.account_id}%2F${var.celery_queue_prefix}send-sms-medium), or [${var.celery_queue_prefix}send-sms-high](#/queues/https%3A%2F%2Fsqs.${var.region}.amazonaws.com%2F${var.account_id}%2F${var.celery_queue_prefix}send-sms-high) depending on the SMS priority. This task calls the SNS API to send a text message.\n\n## SNS IDs\nSNS keeps track of SMS with a `messageId`, the value of SNS' `messageId` is stored in the `Notification` object in the `reference` column.\n\n## Logging\nCelery tasks output multiple messages when processing tasks/calling the SNS API, take a look at the relevant Celery code to know more.\n\nAfter an SMS has been sent by SNS, the delivery details are stored in CloudWatch Log groups:\n\n- [sns/${var.region}/${var.account_id}/DirectPublishToPhoneNumber](#logsV2:log-groups/log-group/sns$252F${var.region}$252F${var.account_id}$252FDirectPublishToPhoneNumber) for successful deliveries\n- [sns/${var.region}/${var.account_id}/DirectPublishToPhoneNumber/Failure](#logsV2:log-groups/log-group/sns$252F${var.region}$252F${var.account_id}$252FDirectPublishToPhoneNumber$252FFailure) for failures\n\n## Phone numbers\n\nSMS sent in `${var.region}` use random phone numbers managed by AWS.\n\n### ⚠️ SNS in `us-west-2`\nIf a Notify service has an inbound number attached, SMS will be sent with SNS using a long code phone number ordered on Pinpoint in the `us-west-2` region. Statistics for this region and alarms are **not visible on this dashboard**.\n"
}
},
{
Expand Down
18 changes: 1 addition & 17 deletions aws/common/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,6 @@ variable "sqs_visibility_timeout_priority_high" {
default = 26
}

# TODO: delete this variable once we verify that we've transitioned to the new queues
variable "sqs_email_queue_name" {
type = string
# See QueueNames in
# https://github.com/cds-snc/notification-api/blob/master/app/config.py
default = "send-email-tasks"
}

variable "sqs_send_email_high_queue_name" {
type = string
# See QueueNames in
Expand All @@ -97,14 +89,6 @@ variable "sqs_send_email_low_queue_name" {
default = "send-email-low"
}

# TODO: delete this variable once we verify that we've transitioned to the new queues
variable "sqs_sms_queue_name" {
type = string
# See QueueNames in
# https://github.com/cds-snc/notification-api/blob/master/app/config.py
default = "send-sms-tasks"
}

variable "sqs_send_sms_high_queue_name" {
type = string
# See QueueNames in
Expand Down Expand Up @@ -313,4 +297,4 @@ variable "budget_sre_bot_webhook" {
description = "Slack webhook used to post budget alerts to the SRE bot"
type = string
sensitive = true
}
}
1 change: 0 additions & 1 deletion aws/eks/dashboards.tf
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@ resource "aws_cloudwatch_dashboard" "notify_system" {
"arn:aws:cloudwatch:${var.region}:${var.account_id}:alarm:logs-10-500-error-5-minutes-critical-ses_to_sqs_email_callbacks-500-errors-api",
"arn:aws:cloudwatch:${var.region}:${var.account_id}:alarm:lambda-ses-delivery-receipts-errors-critical",
"arn:aws:cloudwatch:${var.region}:${var.account_id}:alarm:sqs-sms-stuck-in-queue-critical",
"arn:aws:cloudwatch:${var.region}:${var.account_id}:alarm:sqs-email-queue-delay-critical",
"arn:aws:cloudwatch:${var.region}:${var.account_id}:alarm:sns-sms-success-rate-canadian-numbers-critical",
"arn:aws:cloudwatch:${var.region}:${var.account_id}:alarm:ddos-detected-load-balancer-critical",
"arn:aws:cloudwatch:${var.region}:${var.account_id}:alarm:inflights-not-being-processed-critical",
Expand Down
8 changes: 0 additions & 8 deletions aws/eks/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -206,14 +206,6 @@ variable "sqs_send_email_low_queue_name" {
default = "send-email-low"
}

# TODO: delete this variable once we verify that we've transitioned to the new queues
variable "sqs_sms_queue_name" {
type = string
# See QueueNames in
# https://github.com/cds-snc/notification-api/blob/master/app/config.py
default = "send-sms-tasks"
}

variable "sqs_send_sms_high_queue_name" {
type = string
# See QueueNames in
Expand Down
Loading