Skip to content

Commit

Permalink
feat: setup an alarm when no log within 5 mins
Browse files Browse the repository at this point in the history
  • Loading branch information
gcharest authored Sep 20, 2024
1 parent 84c115c commit 08340bd
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 0 deletions.
31 changes: 31 additions & 0 deletions terraform/alarms.tf
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,34 @@ resource "aws_cloudwatch_metric_alarm" "sre_bot_warning" {
alarm_actions = [aws_sns_topic.cloudwatch_warning.arn]
ok_actions = [aws_sns_topic.cloudwatch_warning.arn]
}


resource "aws_cloudwatch_log_metric_filter" "sre_bot_scheduled_tasks" {
name = local.scheduled_tasks_logged
pattern = "Scheduler is running at"
log_group_name = local.api_cloudwatch_log_group

metric_transformation {
name = local.scheduled_tasks_logged
namespace = local.error_namespace
value = "1"
}
}


resource "aws_cloudwatch_metric_alarm" "sre_bot_scheduled_tasks" {
alarm_name = "SRE Bot Scheduled Tasks"
alarm_description = "SRE Bot Scheduled Tasks logs missing"
comparison_operator = "LessThanThreshold"

metric_name = aws_cloudwatch_log_metric_filter.sre_bot_scheduled_tasks.metric_transformation[0].name
namespace = aws_cloudwatch_log_metric_filter.sre_bot_scheduled_tasks.metric_transformation[0].namespace
period = "300"
evaluation_periods = "1"
statistic = "Sum"
threshold = "1"
treat_missing_data = "notBreaching"

alarm_actions = [aws_sns_topic.cloudwatch_warning.arn]
ok_actions = [aws_sns_topic.cloudwatch_warning.arn]
}
1 change: 1 addition & 0 deletions terraform/local.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ locals {
error_logged = "SREBotErrorLogged"
error_namespace = "SREBot"
warning_logged = "SREBotWarningLogged"
scheduled_tasks_logged = "SREBotScheduledTasksLogged"
}
15 changes: 15 additions & 0 deletions terraform/queries.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,18 @@ resource "aws_cloudwatch_query_definition" "api_warnings" {
| limit 20
QUERY
}

resource "aws_cloudwatch_query_definition" "scheduled_tasks_errors" {
name = "SRE Bot Scheduled Tasks Logged"

log_group_names = [
local.api_cloudwatch_log_group
]

query_string = <<-QUERY
fields @timestamp, @message, @logStream
| filter @message like /Scheduler is running at/
| sort @timestamp desc
| limit 20
QUERY
}

0 comments on commit 08340bd

Please sign in to comment.