Skip to content

Commit

Permalink
Updating cloudwatch stuff to reflect new pod and deployment names
Browse files Browse the repository at this point in the history
[review]
  • Loading branch information
ben851 committed Dec 3, 2024
1 parent 98cc11b commit 0cdff2d
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 157 deletions.
58 changes: 21 additions & 37 deletions aws/eks/cloudwatch_alarms.tf
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ resource "aws_cloudwatch_metric_alarm" "admin-pods-high-cpu-warning" {
treat_missing_data = "missing"
dimensions = {
Namespace = "notification-canada-ca"
Service = "admin"
Service = "notify-admin"
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
}
}
Expand All @@ -197,7 +197,7 @@ resource "aws_cloudwatch_metric_alarm" "api-pods-high-cpu-warning" {
treat_missing_data = "missing"
dimensions = {
Namespace = "notification-canada-ca"
Service = "api"
Service = "notify-api"
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
}
}
Expand All @@ -218,7 +218,7 @@ resource "aws_cloudwatch_metric_alarm" "celery-primary-pods-high-cpu-warning" {
treat_missing_data = "missing"
dimensions = {
Namespace = "notification-canada-ca"
Service = "celery-primary"
Service = "notify-celery-primary"
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
}
}
Expand All @@ -239,7 +239,7 @@ resource "aws_cloudwatch_metric_alarm" "celery-scalable-pods-high-cpu-warning" {
treat_missing_data = "missing"
dimensions = {
Namespace = "notification-canada-ca"
Service = "celery-scalable"
Service = "notify-celery-scalable"
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
}
}
Expand All @@ -260,7 +260,7 @@ resource "aws_cloudwatch_metric_alarm" "celery-sms-pods-high-cpu-warning" {
treat_missing_data = "missing"
dimensions = {
Namespace = "notification-canada-ca"
Service = "celery-sms"
Service = "notify-celery-sms"
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
}
}
Expand All @@ -282,7 +282,7 @@ resource "aws_cloudwatch_metric_alarm" "admin-pods-high-memory-warning" {
treat_missing_data = "missing"
dimensions = {
Namespace = "notification-canada-ca"
Service = "admin"
Service = "notify-admin"
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
}
}
Expand All @@ -303,7 +303,7 @@ resource "aws_cloudwatch_metric_alarm" "api-pods-high-memory-warning" {
treat_missing_data = "missing"
dimensions = {
Namespace = "notification-canada-ca"
Service = "api"
Service = "notify-api"
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
}
}
Expand All @@ -324,7 +324,7 @@ resource "aws_cloudwatch_metric_alarm" "celery-primary-pods-high-memory-warning"
treat_missing_data = "missing"
dimensions = {
Namespace = "notification-canada-ca"
Service = "celery-primary"
Service = "notify-celery-primary"
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
}
}
Expand All @@ -345,7 +345,7 @@ resource "aws_cloudwatch_metric_alarm" "celery-sms-pods-high-memory-warning" {
treat_missing_data = "missing"
dimensions = {
Namespace = "notification-canada-ca"
Service = "celery-sms"
Service = "notify-celery-sms"
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
}
}
Expand Down Expand Up @@ -478,7 +478,7 @@ resource "aws_cloudwatch_metric_alarm" "celery-primary-replicas-unavailable" {
dimensions = {
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
namespace = var.notify_k8s_namespace
deployment = "celery-primary"
deployment = "notify-celery-primary"
}
}
}
Expand Down Expand Up @@ -507,7 +507,7 @@ resource "aws_cloudwatch_metric_alarm" "celery-scalable-replicas-unavailable" {
dimensions = {
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
namespace = var.notify_k8s_namespace
deployment = "celery-scalable"
deployment = "notify-celery-scalable"
}
}
}
Expand Down Expand Up @@ -535,7 +535,7 @@ resource "aws_cloudwatch_metric_alarm" "celery-beat-replicas-unavailable" {
dimensions = {
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
namespace = var.notify_k8s_namespace
deployment = "celery-beat"
deployment = "notify-celery-beat"
}
}
}
Expand Down Expand Up @@ -563,7 +563,7 @@ resource "aws_cloudwatch_metric_alarm" "celery-sms-replicas-unavailable" {
dimensions = {
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
namespace = var.notify_k8s_namespace
deployment = "celery-sms"
deployment = "notify-celery-sms"
}
}
}
Expand Down Expand Up @@ -591,7 +591,7 @@ resource "aws_cloudwatch_metric_alarm" "celery-email-send-primary-replicas-unava
dimensions = {
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
namespace = var.notify_k8s_namespace
deployment = "celery-email-send-primary"
deployment = "notify-celery-email-send-primary"
}
}
}
Expand Down Expand Up @@ -620,7 +620,7 @@ resource "aws_cloudwatch_metric_alarm" "celery-email-send-scalable-replicas-unav
dimensions = {
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
namespace = var.notify_k8s_namespace
deployment = "celery-email-send-scalable"
deployment = "notify-celery-email-send-scalable"
}
}
}
Expand Down Expand Up @@ -648,7 +648,7 @@ resource "aws_cloudwatch_metric_alarm" "celery-sms-send-primary-replicas-unavail
dimensions = {
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
namespace = var.notify_k8s_namespace
deployment = "celery-sms-send-primary"
deployment = "notify-celery-sms-send-primary"
}
}
}
Expand Down Expand Up @@ -677,7 +677,7 @@ resource "aws_cloudwatch_metric_alarm" "celery-sms-send-scalable-replicas-unavai
dimensions = {
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
namespace = var.notify_k8s_namespace
deployment = "celery-sms-send-scalable"
deployment = "notify-celery-sms-send-scalable"
}
}
}
Expand Down Expand Up @@ -705,7 +705,7 @@ resource "aws_cloudwatch_metric_alarm" "admin-replicas-unavailable" {
dimensions = {
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
namespace = var.notify_k8s_namespace
deployment = "admin"
deployment = "notify-admin"
}
}
}
Expand Down Expand Up @@ -733,7 +733,7 @@ resource "aws_cloudwatch_metric_alarm" "api-replicas-unavailable" {
dimensions = {
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
namespace = var.notify_k8s_namespace
deployment = "api"
deployment = "notify-api"
}
}
}
Expand Down Expand Up @@ -761,7 +761,7 @@ resource "aws_cloudwatch_metric_alarm" "documentation-replicas-unavailable" {
dimensions = {
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
namespace = var.notify_k8s_namespace
deployment = "documentation"
deployment = "notify-documentation"
}
}
}
Expand Down Expand Up @@ -789,7 +789,7 @@ resource "aws_cloudwatch_metric_alarm" "document-download-api-replicas-unavailab
dimensions = {
ClusterName = aws_eks_cluster.notification-canada-ca-eks-cluster.name
namespace = var.notify_k8s_namespace
deployment = "document-download-api"
deployment = "notify-document-download"
}
}
}
Expand Down Expand Up @@ -939,22 +939,6 @@ resource "aws_cloudwatch_metric_alarm" "aggregating-queues-not-active-5-minutes-
ok_actions = [var.sns_alert_critical_arn]
}

resource "aws_cloudwatch_metric_alarm" "github-arc-runner-error-alarm" {
count = var.cloudwatch_enabled ? 1 : 0
alarm_name = "github-arc-runner-error-alarm"
alarm_description = "GitHub ARC Runners Are Failing"
comparison_operator = "LessThanThreshold"
evaluation_periods = "1"
metric_name = aws_cloudwatch_log_metric_filter.github-arc-runner-alarm[0].metric_transformation[0].name
namespace = aws_cloudwatch_log_metric_filter.github-arc-runner-alarm[0].metric_transformation[0].namespace
period = "300"
statistic = "Sum"
threshold = 1
treat_missing_data = "notBreaching"
alarm_actions = [var.sns_alert_critical_arn]
ok_actions = [var.sns_alert_critical_arn]
}

resource "aws_cloudwatch_metric_alarm" "service-callback-too-many-failures-warning" {
count = var.cloudwatch_enabled ? 1 : 0
alarm_name = "service-callback-too-many-failures-warning"
Expand Down
23 changes: 5 additions & 18 deletions aws/eks/cloudwatch_log.tf
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ resource "aws_cloudwatch_log_metric_filter" "bounce-rate-critical" {
resource "aws_cloudwatch_log_metric_filter" "api-evicted-pods" {
count = var.cloudwatch_enabled ? 1 : 0
name = "api-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"api-*\") }"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-api-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name

metric_transformation {
Expand All @@ -105,7 +105,7 @@ resource "aws_cloudwatch_log_metric_filter" "api-evicted-pods" {
resource "aws_cloudwatch_log_metric_filter" "celery-evicted-pods" {
count = var.cloudwatch_enabled ? 1 : 0
name = "celery-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"celery-*\") }"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-celery-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name

metric_transformation {
Expand All @@ -118,7 +118,7 @@ resource "aws_cloudwatch_log_metric_filter" "celery-evicted-pods" {
resource "aws_cloudwatch_log_metric_filter" "admin-evicted-pods" {
count = var.cloudwatch_enabled ? 1 : 0
name = "admin-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"admin-*\") }"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-admin-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name

metric_transformation {
Expand All @@ -131,7 +131,7 @@ resource "aws_cloudwatch_log_metric_filter" "admin-evicted-pods" {
resource "aws_cloudwatch_log_metric_filter" "document-download-evicted-pods" {
count = var.cloudwatch_enabled ? 1 : 0
name = "document-download-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"document-download-*\") }"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-document-download-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name

metric_transformation {
Expand All @@ -144,7 +144,7 @@ resource "aws_cloudwatch_log_metric_filter" "document-download-evicted-pods" {
resource "aws_cloudwatch_log_metric_filter" "documentation-evicted-pods" {
count = var.cloudwatch_enabled ? 1 : 0
name = "documentation-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"documentation-*\") }"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-documentation-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name

metric_transformation {
Expand All @@ -167,19 +167,6 @@ resource "aws_cloudwatch_log_metric_filter" "aggregating-queues-are-active" {
}
}

resource "aws_cloudwatch_log_metric_filter" "github-arc-runner-alarm" {
count = var.cloudwatch_enabled ? 1 : 0
name = "GitHub ARC Runners Write Alarm"
pattern = "{ $.kubernetes.pod_name = \"github-arc-ss-${var.env}-*-runner-*\" && $.log = \"*ERROR*\" }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name

metric_transformation {
name = "aggregating-github-arc-runner-alarm"
namespace = "LogMetrics"
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "callback-request-failures" {
count = var.cloudwatch_enabled ? 1 : 0
name = "callback-request-failures"
Expand Down
Loading

0 comments on commit 0cdff2d

Please sign in to comment.