From 5cec3171c73f635fc83c64a15f5118b6e3d61ec9 Mon Sep 17 00:00:00 2001 From: Ben Larabie Date: Mon, 13 May 2024 09:52:01 -0400 Subject: [PATCH] Adding alarms for github arc runner failures (#1242) Co-authored-by: Mike Pond <32133001+P0NDER0SA@users.noreply.github.com> --- aws/eks/cloudwatch_alarms.tf | 16 ++++++++++++++++ aws/eks/cloudwatch_log.tf | 13 +++++++++++++ 2 files changed, 29 insertions(+) diff --git a/aws/eks/cloudwatch_alarms.tf b/aws/eks/cloudwatch_alarms.tf index 2009c6c66..679535613 100644 --- a/aws/eks/cloudwatch_alarms.tf +++ b/aws/eks/cloudwatch_alarms.tf @@ -938,3 +938,19 @@ resource "aws_cloudwatch_metric_alarm" "aggregating-queues-not-active-5-minutes- alarm_actions = [var.sns_alert_critical_arn] ok_actions = [var.sns_alert_critical_arn] } + +resource "aws_cloudwatch_metric_alarm" "github-arc-runner-write-alarm" { + count = var.cloudwatch_enabled ? 1 : 0 + alarm_name = "github-arc-runner-write-alarm" + alarm_description = "GitHub ARC Runners Are Failing - Check Version Deprecation" + comparison_operator = "LessThanThreshold" + evaluation_periods = "1" + metric_name = aws_cloudwatch_log_metric_filter.github-arc-write-alarm[0].metric_transformation[0].name + namespace = aws_cloudwatch_log_metric_filter.github-arc-write-alarm[0].metric_transformation[0].namespace + period = "300" + statistic = "Sum" + threshold = 1 + treat_missing_data = "notBreaching" + alarm_actions = [var.sns_alert_critical_arn] + ok_actions = [var.sns_alert_critical_arn] +} diff --git a/aws/eks/cloudwatch_log.tf b/aws/eks/cloudwatch_log.tf index 1397e9767..540d5eac2 100644 --- a/aws/eks/cloudwatch_log.tf +++ b/aws/eks/cloudwatch_log.tf @@ -166,3 +166,16 @@ resource "aws_cloudwatch_log_metric_filter" "aggregating-queues-are-active" { value = "1" } } + +resource "aws_cloudwatch_log_metric_filter" "github-arc-write-alarm" { + count = var.cloudwatch_enabled ? 1 : 0 + name = "GitHub ARC Runners Write Alarm" + pattern = "WRITE ERROR: An error occured:" + log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name + + metric_transformation { + name = "aggregating-github-arc-write-alarm" + namespace = "LogMetrics" + value = "1" + } +}