Skip to content

Commit

Permalink
Add low severity alerts for cloud sql metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
codysoyland committed Sep 14, 2023
1 parent 2ed4752 commit 8467ab2
Showing 1 changed file with 134 additions and 0 deletions.
134 changes: 134 additions & 0 deletions terraform/gcp/modules/monitoring/infra/alerts.tf
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,45 @@ resource "google_monitoring_alert_policy" "ssl_cert_expiry_alert" {

### Cloud SQL Alerts

# Cloud SQL Database CPU Utilization > 80%
resource "google_monitoring_alert_policy" "cloud_sql_cpu_utilization" {
# In the absence of data, incident will auto-close in 7 days
alert_strategy {
auto_close = "604800s"
}
combiner = "OR"

conditions {
condition_threshold {
aggregations {
alignment_period = "300s"
per_series_aligner = "ALIGN_MEAN"
}
comparison = "COMPARISON_GT"
duration = "0s"
filter = "metric.type=\"cloudsql.googleapis.com/database/cpu/utilization\" resource.type=\"cloudsql_database\""
threshold_value = "0.9"
trigger {
count = "1"
percent = "0"
}
}
display_name = "Cloud SQL Database - CPU Utilization [MEAN]"
}
display_name = "Cloud SQL Database CPU Utilization > 80%"
documentation {
content = "Cloud SQL Database CPU Utilization is >80%. Please increase CPU capacity."
mime_type = "text/markdown"
}
enabled = "true"
notification_channels = local.notification_channels
project = var.project_id

user_labels = {
severity = "warning"
}
}

# Cloud SQL Database CPU Utilization > 90%
resource "google_monitoring_alert_policy" "cloud_sql_cpu_utilization" {
# In the absence of data, incident will auto-close in 7 days
Expand Down Expand Up @@ -101,6 +140,53 @@ resource "google_monitoring_alert_policy" "cloud_sql_cpu_utilization" {
project = var.project_id
}


# Cloud SQL Database Memory Utilization > 80%
resource "google_monitoring_alert_policy" "cloud_sql_memory_utilization" {
# In the absence of data, incident will auto-close in 7 days
alert_strategy {
auto_close = "604800s"
}

combiner = "OR"

conditions {
condition_threshold {
aggregations {
alignment_period = "300s"
per_series_aligner = "ALIGN_MEAN"
}

comparison = "COMPARISON_GT"
duration = "0s"
filter = "metric.type=\"cloudsql.googleapis.com/database/memory/utilization\" resource.type=\"cloudsql_database\""
threshold_value = "0.9"

trigger {
count = "1"
percent = "0"
}
}

display_name = "Cloud SQL Database - Memory utilization [MEAN]"
}

display_name = "Cloud SQL Database Memory Utilization > 80%"

documentation {
content = "Cloud SQL Database Memory Utilization is >80%. Please increase memory capacity."
mime_type = "text/markdown"
}

enabled = "true"
notification_channels = local.notification_channels
project = var.project_id

user_labels = {
severity = "warning"
}
}

# Cloud SQL Database Memory Utilization > 90%
resource "google_monitoring_alert_policy" "cloud_sql_memory_utilization" {
# In the absence of data, incident will auto-close in 7 days
Expand Down Expand Up @@ -143,6 +229,54 @@ resource "google_monitoring_alert_policy" "cloud_sql_memory_utilization" {
project = var.project_id
}

# Cloud SQL Database Disk has < 30GiB Free
resource "google_monitoring_alert_policy" "cloud_sql_disk_utilization" {
# In the absence of data, incident will auto-close in 7 days
alert_strategy {
auto_close = "604800s"
}

combiner = "OR"

# Disk has less that 30GiB free
conditions {
# < 30GiB disk space free
condition_monitoring_query_language {
duration = "0s"
query = <<-EOT
fetch cloudsql_database
| { bytes: metric 'cloudsql.googleapis.com/database/disk/bytes_used'
; quota: metric 'cloudsql.googleapis.com/database/disk/quota'
; utilization: metric 'cloudsql.googleapis.com/database/disk/utilization' }
| join
| group_by 5m, [q_mean: mean(value.quota), b_mean: mean(value.bytes_used), u_mean: mean(value.utilization)]
| every 5m
| group_by [resource.database_id], [free_space: sub(mean(q_mean), mean(b_mean)), u: mean(u_mean)]
| condition and(free_space < 30 'GiBy', u > 0.98)
EOT
trigger {
count = "1"
percent = "0"
}
}

display_name = "Cloud SQL Database - Disk free space and utilization [MEAN]"
}

display_name = "Cloud SQL Database Disk has < 30GiB Free and Utilization > 98%"

documentation {
content = "Cloud SQL disk has less than 30GiB free space remaining. Please increase capacity. Note that autoresize should be enabled for the database. Ensure there is no issue with the autoresize process."
mime_type = "text/markdown"
}

enabled = "true"
notification_channels = local.notification_channels
project = var.project_id

severity = "warning"
}

# Cloud SQL Database Disk has < 20GiB Free
resource "google_monitoring_alert_policy" "cloud_sql_disk_utilization" {
# In the absence of data, incident will auto-close in 7 days
Expand Down

0 comments on commit 8467ab2

Please sign in to comment.