diff --git a/app/integrations/opsgenie.py b/app/integrations/opsgenie.py index 6d2f1340..b11b34ca 100644 --- a/app/integrations/opsgenie.py +++ b/app/integrations/opsgenie.py @@ -41,6 +41,22 @@ def create_alert(description): return "Could not issue alert to Opsgenie!" +def healthcheck(): + """Check if the bot can interact with the Opsgenie API.""" + healthy = False + content = api_get_request( + "https://api.opsgenie.com/v2/account", + {"name": "GenieKey", "token": OPSGENIE_KEY}, + ) + try: + result = json.loads(content) + logging.info(f"OpsGenie healthcheck result: {result}") + healthy = "data" in result + except Exception as error: + logging.error(f"OpsGenie healthcheck failed: {error}") + return healthy + + def api_get_request(url, auth): req = Request(url) req.add_header("Authorization", f"{auth['name']} {auth['token']}") diff --git a/app/jobs/scheduled_tasks.py b/app/jobs/scheduled_tasks.py index 8f37e20a..94891e65 100644 --- a/app/jobs/scheduled_tasks.py +++ b/app/jobs/scheduled_tasks.py @@ -5,6 +5,8 @@ import schedule import logging +from integrations import opsgenie + logging.basicConfig(level=logging.INFO) @@ -17,12 +19,21 @@ def init(bot): schedule.every(10).seconds.do(revoke_aws_sso_access, client=bot.client) schedule.every(5).minutes.do(scheduler_heartbeat) + schedule.every(5).minutes.do(integration_healthchecks) def scheduler_heartbeat(): logging.info("Scheduler is running at %s", time.ctime()) +def integration_healthchecks(): + logging.info("Running integration healthchecks ...") + healthchecks = [opsgenie.healthcheck] + for healthcheck in healthchecks: + if not healthcheck(): + logging.error(f"Integration {healthcheck.__name__} is unhealthy 💀") + + def run_continuously(interval=1): """Continuously run, while executing pending jobs at each elapsed time interval. diff --git a/app/tests/intergrations/test_opsgenie.py b/app/tests/intergrations/test_opsgenie.py index bc6682a6..56cb6c26 100644 --- a/app/tests/intergrations/test_opsgenie.py +++ b/app/tests/intergrations/test_opsgenie.py @@ -86,3 +86,21 @@ def test_api_post_request(urlopen_mock, request_mock): "GenieKey OPSGENIE_KEY", ) urlopen_mock.assert_called_once_with(request_mock.return_value) + + +@patch("integrations.opsgenie.api_get_request") +def test_healthcheck_healthy(api_get_request_mock): + api_get_request_mock.return_value = '{"data": {"name": "test_user"}}' + assert opsgenie.healthcheck() is True + + +@patch("integrations.opsgenie.api_get_request") +def test_healthcheck_unhealthy(api_get_request_mock): + api_get_request_mock.return_value = '{"error": "failed"}' + assert opsgenie.healthcheck() is False + + +@patch("integrations.opsgenie.api_get_request") +def test_healthcheck_unhealthy_error(api_get_request_mock): + api_get_request_mock.return_value = "{]" + assert opsgenie.healthcheck() is False diff --git a/app/tests/jobs/test_scheduled_tasks.py b/app/tests/jobs/test_scheduled_tasks.py index 415b8735..fd51bb59 100644 --- a/app/tests/jobs/test_scheduled_tasks.py +++ b/app/tests/jobs/test_scheduled_tasks.py @@ -22,3 +22,22 @@ def test_run_continuously(time_mock, threading_mock, schedule_mock): threading_mock.Event.return_value = cease_continuous_run result = scheduled_tasks.run_continuously(interval=1) assert result == cease_continuous_run + + +@patch("jobs.scheduled_tasks.opsgenie") +@patch("jobs.scheduled_tasks.logging") +def test_integration_healthchecks_healthy(mock_logging, mock_opsgenie): + mock_opsgenie.healthcheck.return_value = True + scheduled_tasks.integration_healthchecks() + assert mock_opsgenie.healthcheck.call_count == 1 + assert mock_logging.error.call_count == 0 + + +@patch("jobs.scheduled_tasks.opsgenie") +@patch("jobs.scheduled_tasks.logging") +def test_integration_healthchecks_unhealthy(mock_logging, mock_opsgenie): + mock_opsgenie.healthcheck.return_value = False + mock_opsgenie.healthcheck.__name__ = "test_integration" + scheduled_tasks.integration_healthchecks() + assert mock_opsgenie.healthcheck.call_count == 1 + assert mock_logging.error.call_count == 1