Skip to content

Commit

Permalink
making these conditional for our production rollout. Need them to wor…
Browse files Browse the repository at this point in the history
…k on staging first only for a while

[review]
  • Loading branch information
P0NDER0SA committed Dec 12, 2024
1 parent 97b97e6 commit 621d0fe
Show file tree
Hide file tree
Showing 10 changed files with 4,099 additions and 92 deletions.
96 changes: 48 additions & 48 deletions aws/eks/cloudwatch_alarms.tf

Large diffs are not rendered by default.

1,016 changes: 1,016 additions & 0 deletions aws/eks/cloudwatch_alarms_kustomize.tf

Large diffs are not rendered by default.

34 changes: 17 additions & 17 deletions aws/eks/cloudwatch_log.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,25 @@
###

resource "aws_cloudwatch_log_group" "notification-canada-ca-eks-cluster-logs" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "/aws/eks/${var.eks_cluster_name}/cluster"
retention_in_days = var.log_retention_period_days
}

resource "aws_cloudwatch_log_group" "notification-canada-ca-eks-application-logs" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "/aws/containerinsights/${var.eks_cluster_name}/application"
retention_in_days = var.log_retention_period_days
}

resource "aws_cloudwatch_log_group" "notification-canada-ca-eks-prometheus-logs" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "/aws/containerinsights/${var.eks_cluster_name}/prometheus"
retention_in_days = var.log_retention_period_days
}

resource "aws_cloudwatch_log_group" "blazer" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "blazer"
retention_in_days = 1827 # 5 years
}
Expand All @@ -31,7 +31,7 @@ resource "aws_cloudwatch_log_group" "blazer" {
# AWS EKS Cloudwatch log metric filters
###
resource "aws_cloudwatch_log_metric_filter" "web-500-errors" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "web-500-errors"
pattern = "\"\\\" 500 \""
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name
Expand All @@ -44,7 +44,7 @@ resource "aws_cloudwatch_log_metric_filter" "web-500-errors" {
}

resource "aws_cloudwatch_log_metric_filter" "celery-error" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "celery-error"
pattern = "%ERROR/.*Worker|ERROR/MainProcess%"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name
Expand All @@ -57,7 +57,7 @@ resource "aws_cloudwatch_log_metric_filter" "celery-error" {
}

resource "aws_cloudwatch_log_metric_filter" "malware-detected" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "malware-detected"
pattern = jsonencode("Malicious content detected! Download and attachment failed")
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name
Expand All @@ -70,7 +70,7 @@ resource "aws_cloudwatch_log_metric_filter" "malware-detected" {
}

resource "aws_cloudwatch_log_metric_filter" "scanfiles-timeout" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "scanfiles-timeout"
pattern = "Malware scan timed out for notification.id"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name
Expand All @@ -83,7 +83,7 @@ resource "aws_cloudwatch_log_metric_filter" "scanfiles-timeout" {
}

resource "aws_cloudwatch_log_metric_filter" "bounce-rate-critical" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "bounce-rate-critical"
pattern = "critical bounce rate threshold of 10"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name
Expand All @@ -96,7 +96,7 @@ resource "aws_cloudwatch_log_metric_filter" "bounce-rate-critical" {
}

resource "aws_cloudwatch_log_metric_filter" "api-evicted-pods" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "api-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-api-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name
Expand All @@ -109,7 +109,7 @@ resource "aws_cloudwatch_log_metric_filter" "api-evicted-pods" {
}

resource "aws_cloudwatch_log_metric_filter" "celery-evicted-pods" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "celery-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-celery-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name
Expand All @@ -122,7 +122,7 @@ resource "aws_cloudwatch_log_metric_filter" "celery-evicted-pods" {
}

resource "aws_cloudwatch_log_metric_filter" "admin-evicted-pods" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "admin-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-admin-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name
Expand All @@ -135,7 +135,7 @@ resource "aws_cloudwatch_log_metric_filter" "admin-evicted-pods" {
}

resource "aws_cloudwatch_log_metric_filter" "document-download-evicted-pods" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "document-download-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-document-download-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name
Expand All @@ -148,7 +148,7 @@ resource "aws_cloudwatch_log_metric_filter" "document-download-evicted-pods" {
}

resource "aws_cloudwatch_log_metric_filter" "documentation-evicted-pods" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "documentation-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-documentation-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name
Expand All @@ -161,7 +161,7 @@ resource "aws_cloudwatch_log_metric_filter" "documentation-evicted-pods" {
}

resource "aws_cloudwatch_log_metric_filter" "aggregating-queues-are-active" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "aggregating-queues-are-active"
pattern = "Batch saving with"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name
Expand All @@ -174,7 +174,7 @@ resource "aws_cloudwatch_log_metric_filter" "aggregating-queues-are-active" {
}

resource "aws_cloudwatch_log_metric_filter" "callback-request-failures" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "callback-request-failures"
pattern = "send_delivery_status_to_service request failed for notification_id"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name
Expand All @@ -187,7 +187,7 @@ resource "aws_cloudwatch_log_metric_filter" "callback-request-failures" {
}

resource "aws_cloudwatch_log_metric_filter" "throttling-exceptions" {
count = var.cloudwatch_enabled ? 1 : 0
count = var.cloudwatch_enabled && var.env != "production" ? 1 : 0
name = "throttling-exceptions"
pattern = "ThrottlingException"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name
Expand Down
200 changes: 200 additions & 0 deletions aws/eks/cloudwatch_log_kustomize.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
###
# AWS EKS Cloudwatch groups
###

resource "aws_cloudwatch_log_group" "notification-canada-ca-eks-cluster-logs" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "/aws/eks/${var.eks_cluster_name}/cluster"
retention_in_days = var.log_retention_period_days
}

resource "aws_cloudwatch_log_group" "notification-canada-ca-eks-application-logs" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "/aws/containerinsights/${var.eks_cluster_name}/application"
retention_in_days = var.log_retention_period_days
}

resource "aws_cloudwatch_log_group" "notification-canada-ca-eks-prometheus-logs" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "/aws/containerinsights/${var.eks_cluster_name}/prometheus"
retention_in_days = var.log_retention_period_days
}

resource "aws_cloudwatch_log_group" "blazer" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "blazer"
retention_in_days = 1827 # 5 years
}


###
# AWS EKS Cloudwatch log metric filters
###
resource "aws_cloudwatch_log_metric_filter" "web-500-errors" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "web-500-errors"
pattern = "\"\\\" 500 \""
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name

metric_transformation {
name = "500-errors"
namespace = "LogMetrics"
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "celery-error" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "celery-error"
pattern = "%ERROR/.*Worker|ERROR/MainProcess%"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name

metric_transformation {
name = "celery-error"
namespace = "LogMetrics"
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "malware-detected" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "malware-detected"
pattern = jsonencode("Malicious content detected! Download and attachment failed")
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name

metric_transformation {
name = "malware-detected"
namespace = "LogMetrics"
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "scanfiles-timeout" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "scanfiles-timeout"
pattern = "Malware scan timed out for notification.id"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name

metric_transformation {
name = "scanfiles-timeout"
namespace = "LogMetrics"
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "bounce-rate-critical" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "bounce-rate-critical"
pattern = "critical bounce rate threshold of 10"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name

metric_transformation {
name = "bounce-rate-critical"
namespace = "LogMetrics"
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "api-evicted-pods" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "api-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-api-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name

metric_transformation {
name = "api-evicted-pods"
namespace = "LogMetrics"
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "celery-evicted-pods" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "celery-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-celery-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name

metric_transformation {
name = "celery-evicted-pods"
namespace = "LogMetrics"
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "admin-evicted-pods" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "admin-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-admin-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name

metric_transformation {
name = "admin-evicted-pods"
namespace = "LogMetrics"
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "document-download-evicted-pods" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "document-download-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-document-download-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name

metric_transformation {
name = "document-download-evicted-pods"
namespace = "LogMetrics"
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "documentation-evicted-pods" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "documentation-evicted-pods"
pattern = "{ ($.reason = \"Evicted\") && ($.kube_pod_status_reason = 1) && ($.pod = \"notify-documentation-*\") }"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-prometheus-logs[0].name

metric_transformation {
name = "documentation-evicted-pods"
namespace = "LogMetrics"
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "aggregating-queues-are-active" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "aggregating-queues-are-active"
pattern = "Batch saving with"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name

metric_transformation {
name = "aggregating-queues-are-active"
namespace = "LogMetrics"
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "callback-request-failures" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "callback-request-failures"
pattern = "send_delivery_status_to_service request failed for notification_id"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name

metric_transformation {
name = "callback-max-retry-failures"
namespace = "LogMetrics"
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "throttling-exceptions" {
count = var.cloudwatch_enabled && var.env == "production" ? 1 : 0
name = "throttling-exceptions"
pattern = "ThrottlingException"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name

metric_transformation {
name = "throttling-exceptions"
namespace = "LogMetrics"
value = "1"
}
}
Loading

0 comments on commit 621d0fe

Please sign in to comment.