diff --git a/database/postgresql/README.md b/database/postgresql/README.md index a0232a31..8eb7dbdc 100644 --- a/database/postgresql/README.md +++ b/database/postgresql/README.md @@ -19,6 +19,7 @@ Creates DataDog monitors with the following checks: - PostgreSQL Connections - PostgreSQL disk queue depth +- PostgreSQL replication delay on {{host}}:{{port}} - PostgreSQL server does not respond - PostgreSQL too many locks @@ -49,6 +50,7 @@ Creates DataDog monitors with the following checks: | [datadog_monitor.postgresql_availability](https://registry.terraform.io/providers/DataDog/datadog/latest/docs/resources/monitor) | resource | | [datadog_monitor.postgresql_connection_too_high](https://registry.terraform.io/providers/DataDog/datadog/latest/docs/resources/monitor) | resource | | [datadog_monitor.postgresql_disk_queue_depth](https://registry.terraform.io/providers/DataDog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.postgresql_replication_delay](https://registry.terraform.io/providers/DataDog/datadog/latest/docs/resources/monitor) | resource | | [datadog_monitor.postgresql_too_many_locks](https://registry.terraform.io/providers/DataDog/datadog/latest/docs/resources/monitor) | resource | ## Inputs @@ -90,6 +92,13 @@ Creates DataDog monitors with the following checks: | [postgresql\_lock\_threshold\_warning](#input\_postgresql\_lock\_threshold\_warning) | Maximum warning acceptable number of locks | `number` | `70` | no | | [postgresql\_lock\_time\_aggregator](#input\_postgresql\_lock\_time\_aggregator) | Monitor time aggregator for PostgreSQL lock monitor [available values: min, max or avg] | `string` | `"min"` | no | | [postgresql\_lock\_timeframe](#input\_postgresql\_lock\_timeframe) | Monitor timeframe for PostgreSQL lock monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | +| [postgresql\_replication\_delay\_aggregator](#input\_postgresql\_replication\_delay\_aggregator) | Monitor time aggregator for PostgreSQL replication delay [available values: min, max or avg] | `string` | `"avg"` | no | +| [postgresql\_replication\_delay\_enabled](#input\_postgresql\_replication\_delay\_enabled) | Flag to enable PostgreSQL replication delay monitor | `bool` | `true` | no | +| [postgresql\_replication\_delay\_extra\_tags](#input\_postgresql\_replication\_delay\_extra\_tags) | Extra tags for PostgreSQL replication delay monitor | `list(string)` | `[]` | no | +| [postgresql\_replication\_delay\_message](#input\_postgresql\_replication\_delay\_message) | Custom message for PostgreSQL replication delay monitor | `string` | `""` | no | +| [postgresql\_replication\_delay\_threshold\_critical](#input\_postgresql\_replication\_delay\_threshold\_critical) | Critical threshold in seconds | `number` | `200` | no | +| [postgresql\_replication\_delay\_threshold\_warning](#input\_postgresql\_replication\_delay\_threshold\_warning) | Warning threshold in seconds | `number` | `100` | no | +| [postgresql\_replication\_delay\_timeframe](#input\_postgresql\_replication\_delay\_timeframe) | Monitor timeframe for PostgreSQL replication delay [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | | [prefix\_slug](#input\_prefix\_slug) | Prefix string to prepend between brackets on every monitors names | `string` | `""` | no | | [timeout\_h](#input\_timeout\_h) | Default auto-resolving state (in hours) | `number` | `0` | no | @@ -100,6 +109,7 @@ Creates DataDog monitors with the following checks: | [postgresql\_availability\_id](#output\_postgresql\_availability\_id) | id for monitor postgresql\_availability | | [postgresql\_connection\_too\_high\_id](#output\_postgresql\_connection\_too\_high\_id) | id for monitor postgresql\_connection\_too\_high | | [postgresql\_disk\_queue\_depth\_id](#output\_postgresql\_disk\_queue\_depth\_id) | id for monitor postgresql\_disk\_queue\_depth | +| [postgresql\_replication\_delay\_id](#output\_postgresql\_replication\_delay\_id) | id for monitor postgresql\_replication\_delay | | [postgresql\_too\_many\_locks\_id](#output\_postgresql\_too\_many\_locks\_id) | id for monitor postgresql\_too\_many\_locks | ## Related documentation diff --git a/database/postgresql/inputs.tf b/database/postgresql/inputs.tf index 47ab4dba..0e3dfc08 100644 --- a/database/postgresql/inputs.tf +++ b/database/postgresql/inputs.tf @@ -219,3 +219,47 @@ variable "postgresql_disk_queue_message" { type = string default = "" } + +######################################## +### PostgreSQL replication delay ### +######################################## + +variable "postgresql_replication_delay_aggregator" { + description = "Monitor time aggregator for PostgreSQL replication delay [available values: min, max or avg]" + type = string + default = "avg" +} + +variable "postgresql_replication_delay_timeframe" { + description = "Monitor timeframe for PostgreSQL replication delay [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = string + default = "last_15m" +} + +variable "postgresql_replication_delay_threshold_critical" { + default = 200 + description = "Critical threshold in seconds" +} + +variable "postgresql_replication_delay_threshold_warning" { + default = 100 + description = "Warning threshold in seconds" +} + +variable "postgresql_replication_delay_enabled" { + description = "Flag to enable PostgreSQL replication delay monitor" + type = bool + default = true +} + +variable "postgresql_replication_delay_extra_tags" { + description = "Extra tags for PostgreSQL replication delay monitor" + type = list(string) + default = [] +} + +variable "postgresql_replication_delay_message" { + description = "Custom message for PostgreSQL replication delay monitor" + type = string + default = "" +} diff --git a/database/postgresql/monitors-postgresql.tf b/database/postgresql/monitors-postgresql.tf index 29713fd1..93b94d6f 100644 --- a/database/postgresql/monitors-postgresql.tf +++ b/database/postgresql/monitors-postgresql.tf @@ -111,3 +111,31 @@ EOQ tags = concat(["env:${var.environment}", "type:database", "provider:postgres", "resource:postgresql", "team:claranet", "created-by:terraform"], var.postgresql_disk_queue_extra_tags) } + +resource "datadog_monitor" "postgresql_replication_delay" { + count = var.postgresql_replication_delay_enabled ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] PostgreSQL replication delay on {{host}}:{{port}}" + message = coalesce(var.postgresql_replication_delay_message, var.message) + type = "query alert" + + query = < ${var.postgresql_replication_delay_threshold_critical} +EOQ + + monitor_thresholds { + warning = var.postgresql_replication_delay_threshold_warning + critical = var.postgresql_replication_delay_threshold_critical + } + + evaluation_delay = var.evaluation_delay + new_host_delay = var.new_host_delay + notify_no_data = false + renotify_interval = 0 + require_full_window = true + timeout_h = 0 + include_tags = true + + tags = concat(["env:${var.environment}", "type:database", "provider:postgres", "resource:postgresql", "team:claranet", "created-by:terraform"], var.postgresql_replication_delay_extra_tags) +} diff --git a/database/postgresql/outputs.tf b/database/postgresql/outputs.tf index 63596ec7..d0a3f432 100644 --- a/database/postgresql/outputs.tf +++ b/database/postgresql/outputs.tf @@ -13,6 +13,11 @@ output "postgresql_disk_queue_depth_id" { value = datadog_monitor.postgresql_disk_queue_depth.*.id } +output "postgresql_replication_delay_id" { + description = "id for monitor postgresql_replication_delay" + value = datadog_monitor.postgresql_replication_delay.*.id +} + output "postgresql_too_many_locks_id" { description = "id for monitor postgresql_too_many_locks" value = datadog_monitor.postgresql_too_many_locks.*.id