diff --git a/app/integrations/opsgenie.py b/app/integrations/opsgenie.py index b11b34ca..2d5aaf66 100644 --- a/app/integrations/opsgenie.py +++ b/app/integrations/opsgenie.py @@ -44,14 +44,14 @@ def create_alert(description): def healthcheck(): """Check if the bot can interact with the Opsgenie API.""" healthy = False - content = api_get_request( - "https://api.opsgenie.com/v2/account", - {"name": "GenieKey", "token": OPSGENIE_KEY}, - ) try: + content = api_get_request( + "https://api.opsgenie.com/v1/services", + {"name": "GenieKey", "token": OPSGENIE_KEY}, + ) result = json.loads(content) - logging.info(f"OpsGenie healthcheck result: {result}") healthy = "data" in result + logging.info(f"OpsGenie healthcheck result: {result}") except Exception as error: logging.error(f"OpsGenie healthcheck failed: {error}") return healthy diff --git a/app/jobs/scheduled_tasks.py b/app/jobs/scheduled_tasks.py index 94891e65..433f4fe0 100644 --- a/app/jobs/scheduled_tasks.py +++ b/app/jobs/scheduled_tasks.py @@ -28,10 +28,14 @@ def scheduler_heartbeat(): def integration_healthchecks(): logging.info("Running integration healthchecks ...") - healthchecks = [opsgenie.healthcheck] - for healthcheck in healthchecks: + healthchecks = { + "opsgenie": opsgenie.healthcheck, + } + for key, healthcheck in healthchecks.items(): if not healthcheck(): - logging.error(f"Integration {healthcheck.__name__} is unhealthy 💀") + logging.error(f"Integration {key} is unhealthy 💀") + else: + logging.info(f"Integration {key} is healthy 🌈") def run_continuously(interval=1): diff --git a/terraform/alarms.tf b/terraform/alarms.tf index b631192e..e1b747da 100644 --- a/terraform/alarms.tf +++ b/terraform/alarms.tf @@ -1,6 +1,6 @@ resource "aws_cloudwatch_log_metric_filter" "sre_bot_error" { name = local.error_logged - pattern = "\"ERROR:slack_bolt.App\"" + pattern = "?ERROR ?Exception" log_group_name = local.api_cloudwatch_log_group metric_transformation { @@ -29,7 +29,7 @@ resource "aws_cloudwatch_metric_alarm" "sre_bot_error" { resource "aws_cloudwatch_log_metric_filter" "sre_bot_warning" { name = local.warning_logged - pattern = "\"WARNING:slack_bolt.App\"" + pattern = "WARNING" log_group_name = local.api_cloudwatch_log_group metric_transformation {