diff --git a/terraform/alarms.tf b/terraform/alarms.tf index e1b747da..12a34b0f 100644 --- a/terraform/alarms.tf +++ b/terraform/alarms.tf @@ -55,3 +55,34 @@ resource "aws_cloudwatch_metric_alarm" "sre_bot_warning" { alarm_actions = [aws_sns_topic.cloudwatch_warning.arn] ok_actions = [aws_sns_topic.cloudwatch_warning.arn] } + + +resource "aws_cloudwatch_log_metric_filter" "sre_bot_scheduled_tasks" { + name = local.scheduled_tasks_logged + pattern = "Scheduler is running at" + log_group_name = local.api_cloudwatch_log_group + + metric_transformation { + name = local.scheduled_tasks_logged + namespace = local.error_namespace + value = "1" + } +} + + +resource "aws_cloudwatch_metric_alarm" "sre_bot_scheduled_tasks" { + alarm_name = "SRE Bot Scheduled Tasks" + alarm_description = "SRE Bot Scheduled Tasks logs missing" + comparison_operator = "LessThanThreshold" + + metric_name = aws_cloudwatch_log_metric_filter.sre_bot_scheduled_tasks.metric_transformation[0].name + namespace = aws_cloudwatch_log_metric_filter.sre_bot_scheduled_tasks.metric_transformation[0].namespace + period = "300" + evaluation_periods = "1" + statistic = "Sum" + threshold = "1" + treat_missing_data = "notBreaching" + + alarm_actions = [aws_sns_topic.cloudwatch_warning.arn] + ok_actions = [aws_sns_topic.cloudwatch_warning.arn] +} \ No newline at end of file diff --git a/terraform/local.tf b/terraform/local.tf index 18e858c7..f4895c60 100644 --- a/terraform/local.tf +++ b/terraform/local.tf @@ -3,4 +3,5 @@ locals { error_logged = "SREBotErrorLogged" error_namespace = "SREBot" warning_logged = "SREBotWarningLogged" + scheduled_tasks_logged = "SREBotScheduledTasksLogged" } diff --git a/terraform/queries.tf b/terraform/queries.tf index 8f09358b..3a7cf019 100644 --- a/terraform/queries.tf +++ b/terraform/queries.tf @@ -27,3 +27,18 @@ resource "aws_cloudwatch_query_definition" "api_warnings" { | limit 20 QUERY } + +resource "aws_cloudwatch_query_definition" "scheduled_tasks_errors" { + name = "SRE Bot Scheduled Tasks Logged" + + log_group_names = [ + local.api_cloudwatch_log_group + ] + + query_string = <<-QUERY + fields @timestamp, @message, @logStream + | filter @message like /Scheduler is running at/ + | sort @timestamp desc + | limit 20 + QUERY +}