Skip to content

Commit

Permalink
Added rds & load balancer monitors in TF
Browse files Browse the repository at this point in the history
  • Loading branch information
himsharma01 committed Dec 17, 2024
1 parent 728438f commit ecc123c
Show file tree
Hide file tree
Showing 4 changed files with 272 additions and 0 deletions.
68 changes: 68 additions & 0 deletions aws-observability-terraform/app-modules/alb/app.tf
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,74 @@ module "alb_module" {
connection_notifications = var.connection_notifications
email_notifications = var.email_notifications
},
"AWSApplicationLoadBalancerDeletionAlert" = {
monitor_name = "AWS Application Load Balancer - Deletion Alert"
monitor_description = "This alert fires when we detect greater than or equal to 2 application load balancers are deleted over a 5 minute time-period."
monitor_monitor_type = "Logs"
monitor_parent_id = var.monitor_folder_id
monitor_is_disabled = var.monitors_disabled
monitor_evaluation_delay = "0m"
queries = {
A = "account=* region=* \"\"eventsource\":\"elasticloadbalancing.amazonaws.com\"\" \"errorCode\" \"2015-12-01\" | json \"eventSource\", \"eventName\",\"apiVersion\" as event_source, event_name, api_version nodrop | where event_source = \"elasticloadbalancing.amazonaws.com\" and api_version matches \"2015-12-01\" and namespace matches \"aws/applicationelb\" | where event_name matches \"DeleteLoadBalancer\""
}
triggers = [
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "Critical",
threshold = 2,
threshold_type = "GreaterThanOrEqual",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
},
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "ResolvedCritical",
threshold = 2,
threshold_type = "LessThan",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
}
],
group_notifications = var.group_notifications
connection_notifications = var.connection_notifications
email_notifications = var.email_notifications
},
"AWSApplicationLoadBalancerTargetsDeregistered" = {
monitor_name = "AWS Application Load Balancer - Targets Deregistered"
monitor_description = "This alert fires when we detect greater than or equal to 1 target is de-registered over a 5 minute time-period."
monitor_monitor_type = "Logs"
monitor_parent_id = var.monitor_folder_id
monitor_is_disabled = var.monitors_disabled
monitor_evaluation_delay = "0m"
queries = {
A = "account=* region=* \"\"eventsource\":\"elasticloadbalancing.amazonaws.com\"\" \"errorCode\" \"2015-12-01\" | json \"eventSource\", \"eventName\",\"apiVersion\" as event_source, event_name, api_version nodrop | where event_source = \"elasticloadbalancing.amazonaws.com\" and api_version matches \"2015-12-01\" | where namespace matches \"aws/applicationelb\" and event_name=\"DeregisterTargets\""
}
triggers = [
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "Critical",
threshold = 1,
threshold_type = "GreaterThanOrEqual",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
},
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "ResolvedCritical",
threshold = 1,
threshold_type = "LessThan",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
}
],
group_notifications = var.group_notifications
connection_notifications = var.connection_notifications
email_notifications = var.email_notifications
},
"AWSApplicationLoadBalancerHigh5XXErrors" = {
monitor_name = "AWS Application Load Balancer - High 5XX Errors"
monitor_description = "This alert fires where there are too many HTTP requests (>5%) with a response status of 5xx within an interval of 5 minutes."
Expand Down
68 changes: 68 additions & 0 deletions aws-observability-terraform/app-modules/elb/app.tf
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,74 @@ module "classic_elb_module" {
connection_notifications = var.connection_notifications
email_notifications = var.email_notifications
},
"AWSClassicLoadBalancerDeletionAlert" = {
monitor_name = "AWS Classic Load Balancer - Deletion Alert"
monitor_description = "This alert fires when we detect greater than or equal to 2 classic load balancers are deleted over a 5 minute time-period."
monitor_monitor_type = "Logs"
monitor_parent_id = var.monitor_folder_id
monitor_is_disabled = var.monitors_disabled
monitor_evaluation_delay = "0m"
queries = {
A = "account=* region=* namespace=aws/elb \"\"eventsource\":\"elasticloadbalancing.amazonaws.com\"\" \"\"apiVersion\":\"2012-06-01\"\" | json \"eventSource\", \"eventName\" as event_source, event_name nodrop | where event_source = \"elasticloadbalancing.amazonaws.com\" | where event_name matches \"DeleteLoadBalancer\""
}
triggers = [
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "Critical",
threshold = 2,
threshold_type = "GreaterThanOrEqual",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
},
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "ResolvedCritical",
threshold = 2,
threshold_type = "LessThan",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
}
],
group_notifications = var.group_notifications
connection_notifications = var.connection_notifications
email_notifications = var.email_notifications
},
"AWSClassicLoadBalancerTargetsDeregistered" = {
monitor_name = "AWS Classic Load Balancer - Targets Deregistered"
monitor_description = "This alert fires when we detect greater than or equal to 1 target is de-registered over a 5 minute time-period."
monitor_monitor_type = "Logs"
monitor_parent_id = var.monitor_folder_id
monitor_is_disabled = var.monitors_disabled
monitor_evaluation_delay = "0m"
queries = {
A = "account=* region=* namespace=aws/elb \"\"eventsource\":\"elasticloadbalancing.amazonaws.com\"\" \"\"apiVersion\":\"2012-06-01\"\" \n| json \"eventSource\", \"eventName\" as event_source, event_name nodrop \n| where event_source = \"elasticloadbalancing.amazonaws.com\" \n| where event_name matches \"DeregisterInstancesFromLoadBalancer\""
}
triggers = [
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "Critical",
threshold = 1,
threshold_type = "GreaterThanOrEqual",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
},
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "ResolvedCritical",
threshold = 1,
threshold_type = "LessThan",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
}
],
group_notifications = var.group_notifications
connection_notifications = var.connection_notifications
email_notifications = var.email_notifications
},
"AWSClassicLoadBalancerHigh5XXErrors" = {
monitor_name = "AWS Classic Load Balancer - High 5XX Errors"
monitor_description = "This alert fires where there are too many HTTP requests (>5%) with a response status of 5xx within an interval of 5 minutes."
Expand Down
68 changes: 68 additions & 0 deletions aws-observability-terraform/app-modules/nlb/app.tf
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,74 @@ module "nlb_module" {
connection_notifications = var.connection_notifications
email_notifications = var.email_notifications
},
"AWSNetworkLoadBalancerDeletionAlert" = {
monitor_name = "AWS Network Load Balancer - Deletion Alert"
monitor_description = "This alert fires when we detect greater than or equal to 2 network load balancers are deleted over a 5 minute time-period."
monitor_monitor_type = "Logs"
monitor_parent_id = var.monitor_folder_id
monitor_is_disabled = var.monitors_disabled
monitor_evaluation_delay = "0m"
queries = {
A = "account=* region=* \"\"eventsource\":\"elasticloadbalancing.amazonaws.com\"\" \"errorCode\" \"2015-12-01\" | json \"eventSource\", \"eventName\",\"apiVersion\" as event_source, event_name, api_version nodrop | where event_source = \"elasticloadbalancing.amazonaws.com\" and api_version matches \"2015-12-01\" and namespace matches \"aws/networkelb\" | where event_name matches \"DeleteLoadBalancer\""
}
triggers = [
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "Critical",
threshold = 2,
threshold_type = "GreaterThanOrEqual",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
},
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "ResolvedCritical",
threshold = 2,
threshold_type = "LessThan",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
}
],
group_notifications = var.group_notifications
connection_notifications = var.connection_notifications
email_notifications = var.email_notifications
},
"AWSNetworkLoadBalancerTargetsDeregistered" = {
monitor_name = "AWS Network Load Balancer - Targets Deregistered"
monitor_description = "This alert fires when we detect greater than or equal to 1 target is de-registered over a 5 minute time-period."
monitor_monitor_type = "Logs"
monitor_parent_id = var.monitor_folder_id
monitor_is_disabled = var.monitors_disabled
monitor_evaluation_delay = "0m"
queries = {
A = "account=* region=* \"\"eventsource\":\"elasticloadbalancing.amazonaws.com\"\" \"errorCode\" \"2015-12-01\" | json \"eventSource\", \"eventName\",\"apiVersion\" as event_source, event_name, api_version nodrop | where event_source = \"elasticloadbalancing.amazonaws.com\" and api_version matches \"2015-12-01\" and namespace matches \"aws/networkelb\" | where event_name matches \"DeregisterTargets\""
}
triggers = [
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "Critical",
threshold = 1,
threshold_type = "GreaterThanOrEqual",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
},
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "ResolvedCritical",
threshold = 1,
threshold_type = "LessThan",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
}
],
group_notifications = var.group_notifications
connection_notifications = var.connection_notifications
email_notifications = var.email_notifications
},
"AWSNetworkLoadBalancerHighUnhealthyHosts" = {
monitor_name = "AWS Network Load Balancer - High Unhealthy Hosts"
monitor_description = "This alert fires when we detect that are there are too many unhealthy hosts (>=10%) within an interval of 5 minutes for a given network load balancer"
Expand Down
68 changes: 68 additions & 0 deletions aws-observability-terraform/app-modules/rds/app.tf
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,74 @@ module "rds_module" {
group_notifications = var.group_notifications
connection_notifications = var.connection_notifications
email_notifications = var.email_notifications
},
"RDSOracleLogsDBCrash" = {
monitor_name = "Amazon RDS - Oracle Logs - DB Crash"
monitor_description = "This alert fires when we detect greater than or equal to 1 Oracle DB crash over a 5 minute time-period."
monitor_monitor_type = "Logs"
monitor_parent_id = var.monitor_folder_id
monitor_is_disabled = var.monitors_disabled
monitor_evaluation_delay = "0m"
queries = {
A = "account=* region=* namespace=aws/rds dbidentifier=* _sourceHost=/aws/rds/*alert ORA-* | json \"message\" nodrop | if (_raw matches \"{*\", message, _raw) as message | parse regex field=message \"(?<oraerr>ORA-\\d{5}): (?<oramsg>.*)\" multi | count"
}
triggers = [
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "Critical",
threshold = 1,
threshold_type = "GreaterThanOrEqual",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
},
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "ResolvedCritical",
threshold = 1,
threshold_type = "LessThan",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
}
],
group_notifications = var.group_notifications
connection_notifications = var.connection_notifications
email_notifications = var.email_notifications
},
"RDSOracleLogsFailedConnectionAttempts" = {
monitor_name = "Amazon RDS - Oracle Logs - Failed Connection Attempts"
monitor_description = "This alert fires when we detect greater than or equal to 25 failed connection attempts over a 5 minute time-period."
monitor_monitor_type = "Logs"
monitor_parent_id = var.monitor_folder_id
monitor_is_disabled = var.monitors_disabled
monitor_evaluation_delay = "0m"
queries = {
A = "account=* region=* namespace=aws/rds dbidentifier=* _sourceHost=/aws/rds/*listener establish \"PROGRAM=\" (\"SID=\" or \"SERVICE_NAME=\") and (\"\nTNS-\" or \"TNS-\") | json \"message\" nodrop | if (_raw matches \"{*\", message, _raw) as message | parse regex field=message \"\\* \\(CONNECT_DATA[\\s\\S]+?\\* establish \\* \\S+ \\* (?<status>\\d+)\" nodrop | parse regex field=message \"CONNECT_DATA[\\s\\S]+?SERVICE_NAME=(?<serviceName>[^)]*)\\)[\\s\\S]+establish\" nodrop | parse regex field=message \"CONNECT_DATA[\\s\\S]+?service_name=(?<serviceName>[^)]*)\\)[\\s\\S]+establish\" nodrop | parse regex field=message \"CONNECT_DATA[\\s\\S]+?SID=(?<SID>[^)]*)\\)[\\s\\S]+establish\" nodrop | parse regex field=message \"CONNECT_DATA[\\s\\S]+?sid=(?<SID>[^)]*)\\)[\\s\\S]+establish\" nodrop | parse regex field=message \"CONNECT_DATA[\\s\\S]+?PROGRAM=(?<userProgramName>[^)]*)\\)[\\s\\S]+?HOST=(?<userHost>[^)]*)\\)[\\s\\S]+?USER=(?<databaseUser>[^)]*)\\)\" nodrop | parse field=message \"(ADDRESS=(PROTOCOL=*)(HOST=*)(PORT=*))\" as clientProtocol, clientHost, clientPort nodrop | parse regex field=message \"(?<TNSerr>TNS-\\d{5}): (?<tnsmsg>.*)\" nodrop | where status != \"0\""
}
triggers = [
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "Critical",
threshold = 25,
threshold_type = "GreaterThanOrEqual",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
},
{
detection_method = "StaticCondition",
time_range = "-5m",
trigger_type = "ResolvedCritical",
threshold = 25,
threshold_type = "LessThan",
occurrence_type = "ResultCount",
trigger_source = "AllResults"
}
],
group_notifications = var.group_notifications
connection_notifications = var.connection_notifications
email_notifications = var.email_notifications
}
}
}

0 comments on commit ecc123c

Please sign in to comment.