Skip to content

Commit

Permalink
add warnings for throttling exceptions (#1555)
Browse files Browse the repository at this point in the history
  • Loading branch information
sastels authored Sep 26, 2024
1 parent d06e598 commit 74166db
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 1 deletion.
32 changes: 31 additions & 1 deletion aws/eks/cloudwatch_alarms.tf
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,7 @@ resource "aws_cloudwatch_metric_alarm" "service-callback-too-many-failures-warni

resource "aws_cloudwatch_metric_alarm" "service-callback-too-many-failures-critical" {
count = var.cloudwatch_enabled ? 1 : 0
alarm_name = "service-callback-too-many-failures-warning"
alarm_name = "service-callback-too-many-failures-critical"
alarm_description = "Service reached the max number of callback retries 100 times in 10 minutes"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "1"
Expand All @@ -984,3 +984,33 @@ resource "aws_cloudwatch_metric_alarm" "service-callback-too-many-failures-criti
treat_missing_data = "notBreaching"
alarm_actions = [var.sns_alert_critical_arn]
}

resource "aws_cloudwatch_metric_alarm" "throttling-exception-warning" {
count = var.cloudwatch_enabled ? 1 : 0
alarm_name = "throttling-exception-warning"
alarm_description = "Have received a throttling exception in the last minute"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "1"
metric_name = aws_cloudwatch_log_metric_filter.throttling-exceptions[0].metric_transformation[0].name
namespace = aws_cloudwatch_log_metric_filter.throttling-exceptions[0].metric_transformation[0].namespace
period = 60
statistic = "Sum"
threshold = 1
treat_missing_data = "notBreaching"
alarm_actions = [var.sns_alert_warning_arn]
}

resource "aws_cloudwatch_metric_alarm" "many-throttling-exceptions-warning" {
count = var.cloudwatch_enabled ? 1 : 0
alarm_name = "many-throttling-exceptions-warning"
alarm_description = "Have received 100 throttling exception in the last minute"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "1"
metric_name = aws_cloudwatch_log_metric_filter.throttling-exceptions[0].metric_transformation[0].name
namespace = aws_cloudwatch_log_metric_filter.throttling-exceptions[0].metric_transformation[0].namespace
period = 60
statistic = "Sum"
threshold = 100
treat_missing_data = "notBreaching"
alarm_actions = [var.sns_alert_warning_arn]
}
13 changes: 13 additions & 0 deletions aws/eks/cloudwatch_log.tf
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,16 @@ resource "aws_cloudwatch_log_metric_filter" "callback-request-failures" {
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "throttling-exceptions" {
count = var.cloudwatch_enabled ? 1 : 0
name = "throttling-exceptions"
pattern = "ThrottlingException"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name

metric_transformation {
name = "throttling-exceptions"
namespace = "LogMetrics"
value = "1"
}
}

0 comments on commit 74166db

Please sign in to comment.