From 4878ab918991922ae50886fa037cf950a4b7f4dd Mon Sep 17 00:00:00 2001 From: wbanks Date: Tue, 4 Jun 2024 16:36:53 -0400 Subject: [PATCH 1/8] Add callback failure warning email - Alphabeticalized the list of Notify's templates in config.py, because readability is nice. --- app/celery/service_callback_tasks.py | 2 +- app/config.py | 65 ++++++++++++++-------------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/app/celery/service_callback_tasks.py b/app/celery/service_callback_tasks.py index 9296958f85..57ad8e9c8a 100644 --- a/app/celery/service_callback_tasks.py +++ b/app/celery/service_callback_tasks.py @@ -86,5 +86,5 @@ def _send_data_to_service_callback_api(self, data, service_callback_url, token, self.retry(queue=QueueNames.CALLBACKS_RETRY) except self.MaxRetriesExceededError: current_app.logger.warning( - "Retry: {function_name} has retried the max num of times for callback url {service_callback_url} and notification_id: {notification_id}" + f"Retry: {function_name} has retried the max num of times for callback url {service_callback_url} and notification_id: {notification_id}" ) diff --git a/app/config.py b/app/config.py index aab8422f27..3ca9e70576 100644 --- a/app/config.py +++ b/app/config.py @@ -299,47 +299,48 @@ class Config(object): CHECK_PROXY_HEADER = False # Notify's notifications templates - NOTIFY_SERVICE_ID = "d6aa2c68-a2d9-4437-ab19-3ae8eb202553" - NOTIFY_USER_ID = "6af522d0-2915-4e52-83a3-3690455a5fe6" - INVITATION_EMAIL_TEMPLATE_ID = "4f46df42-f795-4cc4-83bb-65ca312f49cc" - SMS_CODE_TEMPLATE_ID = "36fb0730-6259-4da1-8a80-c8de22ad4246" - EMAIL_2FA_TEMPLATE_ID = "299726d2-dba6-42b8-8209-30e1d66ea164" - EMAIL_MAGIC_LINK_TEMPLATE_ID = "6e97fd09-6da0-4cc8-829d-33cf5b818103" - NEW_USER_EMAIL_VERIFICATION_TEMPLATE_ID = "ece42649-22a8-4d06-b87f-d52d5d3f0a27" - PASSWORD_RESET_TEMPLATE_ID = "474e9242-823b-4f99-813d-ed392e7f1201" - FORCED_PASSWORD_RESET_TEMPLATE_ID = "e9a65a6b-497b-42f2-8f43-1736e43e13b3" + ACCOUNT_CHANGE_TEMPLATE_ID = "5b39e16a-9ff8-487c-9bfb-9e06bdb70f36" ALREADY_REGISTERED_EMAIL_TEMPLATE_ID = "0880fbb1-a0c6-46f0-9a8e-36c986381ceb" + APIKEY_REVOKE_TEMPLATE_ID = "a0a4e7b8-8a6a-4eaa-9f4e-9c3a5b2dbcf3" + BRANDING_REQUEST_TEMPLATE_ID = "7d423d9e-e94e-4118-879d-d52f383206ae" + CALLBACK_FAILURE_TEMPLATE_ID = "d8d580f4-86b3-4ba4-9d7c-263a630af354" CHANGE_EMAIL_CONFIRMATION_TEMPLATE_ID = "eb4d9930-87ab-4aef-9bce-786762687884" - SERVICE_NOW_LIVE_TEMPLATE_ID = "618185c6-3636-49cd-b7d2-6f6f5eb3bdde" - ORGANISATION_INVITATION_EMAIL_TEMPLATE_ID = "203566f0-d835-47c5-aa06-932439c86573" - TEAM_MEMBER_EDIT_EMAIL_TEMPLATE_ID = "c73f1d71-4049-46d5-a647-d013bdeca3f0" - TEAM_MEMBER_EDIT_MOBILE_TEMPLATE_ID = "8a31520f-4751-4789-8ea1-fe54496725eb" - REPLY_TO_EMAIL_ADDRESS_VERIFICATION_TEMPLATE_ID = "a42f1d17-9404-46d5-a647-d013bdfca3e1" - MOU_SIGNER_RECEIPT_TEMPLATE_ID = "4fd2e43c-309b-4e50-8fb8-1955852d9d71" - MOU_SIGNED_ON_BEHALF_SIGNER_RECEIPT_TEMPLATE_ID = "c20206d5-bf03-4002-9a90-37d5032d9e84" - MOU_SIGNED_ON_BEHALF_ON_BEHALF_RECEIPT_TEMPLATE_ID = "522b6657-5ca5-4368-a294-6b527703bd0b" - MOU_NOTIFY_TEAM_ALERT_TEMPLATE_ID = "d0e66c4c-0c50-43f0-94f5-f85b613202d4" + CONTACT_FORM_DIRECT_EMAIL_TEMPLATE_ID = "b04beb4a-8408-4280-9a5c-6a046b6f7704" CONTACT_US_TEMPLATE_ID = "8ea9b7a0-a824-4dd3-a4c3-1f508ed20a69" - ACCOUNT_CHANGE_TEMPLATE_ID = "5b39e16a-9ff8-487c-9bfb-9e06bdb70f36" - BRANDING_REQUEST_TEMPLATE_ID = "7d423d9e-e94e-4118-879d-d52f383206ae" - NO_REPLY_TEMPLATE_ID = "86950840-6da4-4865-841b-16028110e980" - NEAR_DAILY_LIMIT_TEMPLATE_ID = "5d3e4322-4ee6-457a-a710-c48755f6b643" - REACHED_DAILY_LIMIT_TEMPLATE_ID = "fd29f796-fcdc-471b-a0d4-0093880d9173" + DAILY_EMAIL_LIMIT_UPDATED_TEMPLATE_ID = "97dade64-ea8d-460f-8a34-900b74ee5eb0" DAILY_LIMIT_UPDATED_TEMPLATE_ID = "b3c766e6-be32-4edf-b8db-0f04ef404edc" - NEAR_DAILY_SMS_LIMIT_TEMPLATE_ID = "a796568f-a89b-468e-b635-8105554301b9" - REACHED_DAILY_SMS_LIMIT_TEMPLATE_ID = "a646e614-c527-4f94-a955-ed7185d577f4" DAILY_SMS_LIMIT_UPDATED_TEMPLATE_ID = "6ec12dd0-680a-4073-8d58-91d17cc8442f" - CONTACT_FORM_DIRECT_EMAIL_TEMPLATE_ID = "b04beb4a-8408-4280-9a5c-6a046b6f7704" - NEAR_DAILY_EMAIL_LIMIT_TEMPLATE_ID = "9aa60ad7-2d7f-46f0-8cbe-2bac3d4d77d8" - REACHED_DAILY_EMAIL_LIMIT_TEMPLATE_ID = "ee036547-e51b-49f1-862b-10ea982cfceb" - DAILY_EMAIL_LIMIT_UPDATED_TEMPLATE_ID = "97dade64-ea8d-460f-8a34-900b74ee5eb0" - APIKEY_REVOKE_TEMPLATE_ID = "a0a4e7b8-8a6a-4eaa-9f4e-9c3a5b2dbcf3" + EMAIL_2FA_TEMPLATE_ID = "299726d2-dba6-42b8-8209-30e1d66ea164" + EMAIL_MAGIC_LINK_TEMPLATE_ID = "6e97fd09-6da0-4cc8-829d-33cf5b818103" + FORCED_PASSWORD_RESET_TEMPLATE_ID = "e9a65a6b-497b-42f2-8f43-1736e43e13b3" + HEARTBEAT_TEMPLATE_EMAIL_HIGH = "276da251-3103-49f3-9054-cbf6b5d74411" HEARTBEAT_TEMPLATE_EMAIL_LOW = "73079cb9-c169-44ea-8cf4-8d397711cc9d" HEARTBEAT_TEMPLATE_EMAIL_MEDIUM = "c75c4539-3014-4c4c-96b5-94d326758a74" - HEARTBEAT_TEMPLATE_EMAIL_HIGH = "276da251-3103-49f3-9054-cbf6b5d74411" + HEARTBEAT_TEMPLATE_SMS_HIGH = "4969a9e9-ddfd-476e-8b93-6231e6f1be4a" HEARTBEAT_TEMPLATE_SMS_LOW = "ab3a603b-d602-46ea-8c83-e05cb280b950" HEARTBEAT_TEMPLATE_SMS_MEDIUM = "a48b54ce-40f6-4e4a-abe8-1e2fa389455b" - HEARTBEAT_TEMPLATE_SMS_HIGH = "4969a9e9-ddfd-476e-8b93-6231e6f1be4a" + INVITATION_EMAIL_TEMPLATE_ID = "4f46df42-f795-4cc4-83bb-65ca312f49cc" + MOU_NOTIFY_TEAM_ALERT_TEMPLATE_ID = "d0e66c4c-0c50-43f0-94f5-f85b613202d4" + MOU_SIGNED_ON_BEHALF_ON_BEHALF_RECEIPT_TEMPLATE_ID = "522b6657-5ca5-4368-a294-6b527703bd0b" + MOU_SIGNED_ON_BEHALF_SIGNER_RECEIPT_TEMPLATE_ID = "c20206d5-bf03-4002-9a90-37d5032d9e84" + MOU_SIGNER_RECEIPT_TEMPLATE_ID = "4fd2e43c-309b-4e50-8fb8-1955852d9d71" + NEAR_DAILY_EMAIL_LIMIT_TEMPLATE_ID = "9aa60ad7-2d7f-46f0-8cbe-2bac3d4d77d8" + NEAR_DAILY_LIMIT_TEMPLATE_ID = "5d3e4322-4ee6-457a-a710-c48755f6b643" + NEAR_DAILY_SMS_LIMIT_TEMPLATE_ID = "a796568f-a89b-468e-b635-8105554301b9" + NEW_USER_EMAIL_VERIFICATION_TEMPLATE_ID = "ece42649-22a8-4d06-b87f-d52d5d3f0a27" + NO_REPLY_TEMPLATE_ID = "86950840-6da4-4865-841b-16028110e980" + NOTIFY_SERVICE_ID = "d6aa2c68-a2d9-4437-ab19-3ae8eb202553" + NOTIFY_USER_ID = "6af522d0-2915-4e52-83a3-3690455a5fe6" + ORGANISATION_INVITATION_EMAIL_TEMPLATE_ID = "203566f0-d835-47c5-aa06-932439c86573" + PASSWORD_RESET_TEMPLATE_ID = "474e9242-823b-4f99-813d-ed392e7f1201" + REACHED_DAILY_EMAIL_LIMIT_TEMPLATE_ID = "ee036547-e51b-49f1-862b-10ea982cfceb" + REACHED_DAILY_LIMIT_TEMPLATE_ID = "fd29f796-fcdc-471b-a0d4-0093880d9173" + REACHED_DAILY_SMS_LIMIT_TEMPLATE_ID = "a646e614-c527-4f94-a955-ed7185d577f4" + REPLY_TO_EMAIL_ADDRESS_VERIFICATION_TEMPLATE_ID = "a42f1d17-9404-46d5-a647-d013bdfca3e1" + SERVICE_NOW_LIVE_TEMPLATE_ID = "618185c6-3636-49cd-b7d2-6f6f5eb3bdde" + SMS_CODE_TEMPLATE_ID = "36fb0730-6259-4da1-8a80-c8de22ad4246" + TEAM_MEMBER_EDIT_EMAIL_TEMPLATE_ID = "c73f1d71-4049-46d5-a647-d013bdeca3f0" + TEAM_MEMBER_EDIT_MOBILE_TEMPLATE_ID = "8a31520f-4751-4789-8ea1-fe54496725eb" # Allowed service IDs able to send HTML through their templates. ALLOW_HTML_SERVICE_IDS: List[str] = [id.strip() for id in os.getenv("ALLOW_HTML_SERVICE_IDS", "").split(",")] From ece47da90288706db98699fe98aacb8b1a1efdde Mon Sep 17 00:00:00 2001 From: wbanks Date: Tue, 4 Jun 2024 17:06:16 -0400 Subject: [PATCH 2/8] Add callback failure warning email (actually this time) - Added CALLBACK_FAILURE_THRESHOLD_PERCENTAGE env var --- app/config.py | 3 + .../0453_add_callback_failure_email.py | 108 ++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 migrations/versions/0453_add_callback_failure_email.py diff --git a/app/config.py b/app/config.py index 3ca9e70576..7a4ff76412 100644 --- a/app/config.py +++ b/app/config.py @@ -565,6 +565,9 @@ class Config(object): SRE_USER_NAME = "SRE_CLIENT_USER" SRE_CLIENT_SECRET = os.getenv("SRE_CLIENT_SECRET") + # TODO: Adjust when actual threshold is defined + CALLBACK_FAILURE_THRESHOLD_PERCENTAGE = 0.5 + @classmethod def get_sensitive_config(cls) -> list[str]: "List of config keys that contain sensitive information" diff --git a/migrations/versions/0453_add_callback_failure_email.py b/migrations/versions/0453_add_callback_failure_email.py new file mode 100644 index 0000000000..709c37fa3b --- /dev/null +++ b/migrations/versions/0453_add_callback_failure_email.py @@ -0,0 +1,108 @@ +""" +Revision ID: 0453_add_callback_failure_email +Revises: 0452_set_pgaudit_config +Create Date: 2024-06-04 16:09:00 +""" +from datetime import datetime + +from alembic import op +from flask import current_app + +revision = "0453_add_callback_failure_email" +down_revision = "0452_set_pgaudit_config" + +callback_failure_template_id = current_app.config["CALLBACK_FAILURE_TEMPLATE_ID"] + +def upgrade(): + template_insert = """ + INSERT INTO templates (id, name, template_type, created_at, content, archived, service_id, subject, + created_by_id, version, process_type, hidden) + VALUES('{}', '{}', '{}', '{}', '{}', False, '{}', '{}', '{}', 1, '{}', false) + """ + template_history_insert = """ + INSERT INTO templates_history (id, name, template_type, created_at, content, archived, service_id, subject, + created_by_id, version, process_type, hidden) + VALUES ('{}', '{}', '{}', '{}', '{}', False, '{}', '{}', '{}', 1, '{}', false) + """ + + callback_failure_content = "\n".join( + [ + "[[fr]]", + "(la version française suit)", + "[[/fr]]", + "", + "[[en]]", + "Hello ((name)),", + "", + "The callbacks for “((service_name))” are not working. This could mean that:", + "", + "(1) Your callback service is down.", + "(2) Your service is using a proxy that we cannot access.", + "(3) We’re able to reach your service, but it responds with errors.", + "", + "It’s important to check your callback service is running, check your callback service’s logs for errors and repair any errors in your logs. Once you’ve taken these steps, request confirmation that your callbacks are working again by contacting us.", + "", + "The GC Notify team", + "[[/en]]", + "", + "---", + "", + "[[fr]]", + "Bonjour ((name)),", + "", + "Les rappels pour « ((service_name)) » ne fonctionnent pas. Cela pourrait signifier que :" + "", + "Votre service de rappel est hors service.", + "Votre service utilise un proxy auquel nous ne pouvons pas accéder.", + "Nous parvenons à joindre votre service, mais il répond avec des erreurs.", + "Il est important de vérifier que votre service de rappel fonctionne, de consulter les journaux de votre service de rappel pour y détecter des erreurs et de corriger les erreurs dans vos journaux. Une fois ces étapes effectuées, veuillez demander une confirmation que vos rappels fonctionnent à nouveau en nous contactant.", + "", + "L’équipe GC Notify", + "[[/fr]]", + ] + ) + + templates = [ + { + "id": callback_failure_template_id, + "name": "Callback failures EMAIL", + "subject": "Your callbacks are not working | Vos rappels ne fonctionnent pas", + "content": callback_failure_content, + } + ] + + for template in templates: + op.execute( + sqltext=template_insert.format( + template["id"], + template["name"], + "email", + datetime.utcnow(), + template["content"], + current_app.config["NOTIFY_SERVICE_ID"], + template["subject"], + current_app.config["NOTIFY_USER_ID"], + "normal", + ) + ) + + op.execute( + template_history_insert.format( + template["id"], + template["name"], + "email", + datetime.utcnow(), + template["content"], + current_app.config["NOTIFY_SERVICE_ID"], + template["subject"], + current_app.config["NOTIFY_USER_ID"], + "normal", + ) + ) + +def downgrade(): + op.execute("DELETE FROM notifications WHERE template_id = '{}'".format(callback_failure_template_id)) + op.execute("DELETE FROM notification_history WHERE template_id = '{}'".format(callback_failure_template_id)) + op.execute("DELETE FROM template_redacted WHERE template_id = '{}'".format(callback_failure_template_id)) + op.execute("DELETE FROM templates_history WHERE id = '{}'".format(callback_failure_template_id)) + op.execute("DELETE FROM templates WHERE id = '{}'".format(callback_failure_template_id)) \ No newline at end of file From 36f38c0fd640251e48b9d92afb35da79691a1bd4 Mon Sep 17 00:00:00 2001 From: wbanks Date: Wed, 5 Jun 2024 16:00:14 -0400 Subject: [PATCH 3/8] Add email sending functionality for callback email - Updated the callback email migration script with the latest changes from content - Added a method to send the callback failure email to service owners - Stubbed a method to query cloudwatch so we can determine if callbacks for the service have failed at least 5 times in a 30 minute time period before we send the email to the service owner --- app/celery/service_callback_tasks.py | 65 ++++++++++++++++++- .../0453_add_callback_failure_email.py | 4 +- 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/app/celery/service_callback_tasks.py b/app/celery/service_callback_tasks.py index 57ad8e9c8a..8b6db5c6cd 100644 --- a/app/celery/service_callback_tasks.py +++ b/app/celery/service_callback_tasks.py @@ -2,10 +2,12 @@ from flask import current_app from notifications_utils.statsd_decorators import statsd -from requests import HTTPError, RequestException, request +from requests import HTTPError, RequestException, TimeoutError, InvalidURL, request from app import notify_celery, signer_complaint, signer_delivery_status from app.config import QueueNames +from app.models import Service +from app.service.sender import send_notification_to_service_users @notify_celery.task(bind=True, name="send-delivery-status", max_retries=5, default_retry_delay=300) @@ -80,6 +82,15 @@ def _send_data_to_service_callback_api(self, data, service_callback_url, token, current_app.logger.warning( f"{function_name} request failed for notification_id: {notification_id} and url: {service_callback_url}. exc: {e}" ) + + # TODO: Instate once we monitor alarms to determine how often this happens and we implement + # check_cloudwatch_for_callback_failures(), otherwise we risk flooding the service + # owner's inbox with callback failure email notifications. + + # if isinstance(e, TimeoutError) or isinstance(e, InvalidURL) or e.response.status_code == 500: + # if check_cloudwatch_for_callback_failures(): + # send_email_callback_failure_email(current_app.service) + # Retry if the response status code is server-side or 429 (too many requests). if not isinstance(e, HTTPError) or e.response.status_code >= 500 or e.response.status_code == 429: try: @@ -88,3 +99,55 @@ def _send_data_to_service_callback_api(self, data, service_callback_url, token, current_app.logger.warning( f"Retry: {function_name} has retried the max num of times for callback url {service_callback_url} and notification_id: {notification_id}" ) + +def send_email_callback_failure_email(service: Service): + send_notification_to_service_users( + service_id=service.id, + template_id=current_app.config["CALLBACK_FAILURE_TEMPLATE_ID"], + personalisation={ + "service_name": service.name, + "contact_url": f"{current_app.config['ADMIN_BASE_URL']}/contact", + "callback_doc_url": f"{current_app.config['DOCUMENTATION_DOAMIN']}/en/callbacks.html" + }, + include_user_fields=["name"], + ) + + +def check_cloudwatch_for_callback_failures(): + """ + TODO: Use boto3 to check cloudwatch for callback failures + https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/logs/client/start_query.html + + Check if a service has failed 5 callbacks in a 30 minute time period + + ---------------- + + import boto3 + from datetime import datetime, timedelta + import time + + client = boto3.client('logs') + + query = "TODO" + + log_group = 'TODO' + + start_query_response = client.start_query( + logGroupName=log_group, + startTime=int((datetime.today() - timedelta(minutes=30)).timestamp()), + endTime=int(datetime.now().timestamp()), + queryString=query, + ) + + query_id = start_query_response['queryId'] + + response = None + + while response == None or response['status'] == 'Running': + print('Waiting for query to complete ...') + time.sleep(1) + response = client.get_query_results( + queryId=query_id + ) + + """ \ No newline at end of file diff --git a/migrations/versions/0453_add_callback_failure_email.py b/migrations/versions/0453_add_callback_failure_email.py index 709c37fa3b..e8ecc69423 100644 --- a/migrations/versions/0453_add_callback_failure_email.py +++ b/migrations/versions/0453_add_callback_failure_email.py @@ -40,7 +40,9 @@ def upgrade(): "(2) Your service is using a proxy that we cannot access.", "(3) We’re able to reach your service, but it responds with errors.", "", - "It’s important to check your callback service is running, check your callback service’s logs for errors and repair any errors in your logs. Once you’ve taken these steps, request confirmation that your callbacks are working again by contacting us.", + "It’s important to check your callback service is running, check your callback service’s logs for errors and repair any errors in your logs. To find your callback configuration, sign into your account, visit the API integration page for “((service_name))” and select callbacks.", + "", + "Once you’ve taken these steps, request confirmation that your callbacks are working again by [contacting us](((contact_url))). For more information, you can also access our [API documentation on callbacks](((callback_docs_url))).", "", "The GC Notify team", "[[/en]]", From 02b533fa1d8ffce17129c2231c15c2985db70de9 Mon Sep 17 00:00:00 2001 From: wbanks Date: Thu, 6 Jun 2024 14:56:49 -0400 Subject: [PATCH 4/8] Update email content - fix circular dependency - formatting --- app/celery/service_callback_tasks.py | 65 ++++++++++--------- app/config.py | 3 - .../0453_add_callback_failure_email.py | 38 ++++++----- 3 files changed, 55 insertions(+), 51 deletions(-) diff --git a/app/celery/service_callback_tasks.py b/app/celery/service_callback_tasks.py index 8b6db5c6cd..f75cbb0561 100644 --- a/app/celery/service_callback_tasks.py +++ b/app/celery/service_callback_tasks.py @@ -2,12 +2,14 @@ from flask import current_app from notifications_utils.statsd_decorators import statsd -from requests import HTTPError, RequestException, TimeoutError, InvalidURL, request +from requests import HTTPError, RequestException, request from app import notify_celery, signer_complaint, signer_delivery_status from app.config import QueueNames from app.models import Service -from app.service.sender import send_notification_to_service_users + +# Uncomment when we implement email sending for callback failures +# from requests.exceptions import InvalidURL, Timeout @notify_celery.task(bind=True, name="send-delivery-status", max_retries=5, default_retry_delay=300) @@ -87,7 +89,7 @@ def _send_data_to_service_callback_api(self, data, service_callback_url, token, # check_cloudwatch_for_callback_failures(), otherwise we risk flooding the service # owner's inbox with callback failure email notifications. - # if isinstance(e, TimeoutError) or isinstance(e, InvalidURL) or e.response.status_code == 500: + # if isinstance(e, Timeout) or isinstance(e, InvalidURL) or e.response.status_code == 500: # if check_cloudwatch_for_callback_failures(): # send_email_callback_failure_email(current_app.service) @@ -97,17 +99,18 @@ def _send_data_to_service_callback_api(self, data, service_callback_url, token, self.retry(queue=QueueNames.CALLBACKS_RETRY) except self.MaxRetriesExceededError: current_app.logger.warning( - f"Retry: {function_name} has retried the max num of times for callback url {service_callback_url} and notification_id: {notification_id}" + f"Retry: {function_name} has retried the max num of times for callback url {service_callback_url} and notification_id: {notification_id} for service: {current_app.service.id}" ) + def send_email_callback_failure_email(service: Service): - send_notification_to_service_users( + service.send_notification_to_service_users( service_id=service.id, template_id=current_app.config["CALLBACK_FAILURE_TEMPLATE_ID"], personalisation={ "service_name": service.name, "contact_url": f"{current_app.config['ADMIN_BASE_URL']}/contact", - "callback_doc_url": f"{current_app.config['DOCUMENTATION_DOAMIN']}/en/callbacks.html" + "callback_doc_url": f"{current_app.config['DOCUMENTATION_DOAMIN']}/en/callbacks.html", }, include_user_fields=["name"], ) @@ -115,39 +118,39 @@ def send_email_callback_failure_email(service: Service): def check_cloudwatch_for_callback_failures(): """ - TODO: Use boto3 to check cloudwatch for callback failures - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/logs/client/start_query.html + TODO: Use boto3 to check cloudwatch for callback failures + https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/logs/client/start_query.html - Check if a service has failed 5 callbacks in a 30 minute time period + Check if a service has failed 5 callbacks in a 30 minute time period - ---------------- + ---------------- - import boto3 - from datetime import datetime, timedelta - import time + import boto3 + from datetime import datetime, timedelta + import time - client = boto3.client('logs') + client = boto3.client('logs') - query = "TODO" + query = "TODO" - log_group = 'TODO' + log_group = 'TODO' - start_query_response = client.start_query( - logGroupName=log_group, - startTime=int((datetime.today() - timedelta(minutes=30)).timestamp()), - endTime=int(datetime.now().timestamp()), - queryString=query, - ) + start_query_response = client.start_query( + logGroupName=log_group, + startTime=int((datetime.today() - timedelta(minutes=30)).timestamp()), + endTime=int(datetime.now().timestamp()), + queryString=query, + ) - query_id = start_query_response['queryId'] + query_id = start_query_response['queryId'] - response = None + response = None - while response == None or response['status'] == 'Running': - print('Waiting for query to complete ...') - time.sleep(1) - response = client.get_query_results( - queryId=query_id - ) + while response == None or response['status'] == 'Running': + print('Waiting for query to complete ...') + time.sleep(1) + response = client.get_query_results( + queryId=query_id + ) - """ \ No newline at end of file + """ diff --git a/app/config.py b/app/config.py index 8e40497508..341fba9d75 100644 --- a/app/config.py +++ b/app/config.py @@ -565,9 +565,6 @@ class Config(object): SRE_USER_NAME = "SRE_CLIENT_USER" SRE_CLIENT_SECRET = os.getenv("SRE_CLIENT_SECRET") - # TODO: Adjust when actual threshold is defined - CALLBACK_FAILURE_THRESHOLD_PERCENTAGE = 0.5 - @classmethod def get_sensitive_config(cls) -> list[str]: "List of config keys that contain sensitive information" diff --git a/migrations/versions/0453_add_callback_failure_email.py b/migrations/versions/0453_add_callback_failure_email.py index e8ecc69423..53ac2d3695 100644 --- a/migrations/versions/0453_add_callback_failure_email.py +++ b/migrations/versions/0453_add_callback_failure_email.py @@ -13,6 +13,7 @@ callback_failure_template_id = current_app.config["CALLBACK_FAILURE_TEMPLATE_ID"] + def upgrade(): template_insert = """ INSERT INTO templates (id, name, template_type, created_at, content, archived, service_id, subject, @@ -52,12 +53,14 @@ def upgrade(): "[[fr]]", "Bonjour ((name)),", "", - "Les rappels pour « ((service_name)) » ne fonctionnent pas. Cela pourrait signifier que :" + "Les rappels pour « ((service_name)) » ne fonctionnent pas. Cela pourrait signifier que :" "", + "(1) Votre service de rappel est hors service.", + "(2) Votre service utilise un proxy auquel nous ne pouvons pas accéder.", + "(3) Nous parvenons à joindre votre service, mais il répond avec des erreurs.", + "", + "Il est important de vérifier que votre service de rappel fonctionne, de vérifier les journaux de votre service de rappel pour détecter des erreurs et de corriger toute erreur dans vos journaux. Pour trouver votre configuration de rappel, connectez-vous à votre compte, visitez la page d’intégration API pour « ((service_name)) » et sélectionnez rappels.", "", - "Votre service de rappel est hors service.", - "Votre service utilise un proxy auquel nous ne pouvons pas accéder.", - "Nous parvenons à joindre votre service, mais il répond avec des erreurs.", - "Il est important de vérifier que votre service de rappel fonctionne, de consulter les journaux de votre service de rappel pour y détecter des erreurs et de corriger les erreurs dans vos journaux. Une fois ces étapes effectuées, veuillez demander une confirmation que vos rappels fonctionnent à nouveau en nous contactant.", + "Une fois ces étapes effectuées, demandez une confirmation que vos rappels fonctionnent à nouveau en nous contactant. Pour plus d’informations, vous pouvez également consulter notre documentation API sur les rappels.", "", "L’équipe GC Notify", "[[/fr]]", @@ -89,22 +92,23 @@ def upgrade(): ) op.execute( - template_history_insert.format( - template["id"], - template["name"], - "email", - datetime.utcnow(), - template["content"], - current_app.config["NOTIFY_SERVICE_ID"], - template["subject"], - current_app.config["NOTIFY_USER_ID"], - "normal", - ) + template_history_insert.format( + template["id"], + template["name"], + "email", + datetime.utcnow(), + template["content"], + current_app.config["NOTIFY_SERVICE_ID"], + template["subject"], + current_app.config["NOTIFY_USER_ID"], + "normal", ) + ) + def downgrade(): op.execute("DELETE FROM notifications WHERE template_id = '{}'".format(callback_failure_template_id)) op.execute("DELETE FROM notification_history WHERE template_id = '{}'".format(callback_failure_template_id)) op.execute("DELETE FROM template_redacted WHERE template_id = '{}'".format(callback_failure_template_id)) op.execute("DELETE FROM templates_history WHERE id = '{}'".format(callback_failure_template_id)) - op.execute("DELETE FROM templates WHERE id = '{}'".format(callback_failure_template_id)) \ No newline at end of file + op.execute("DELETE FROM templates WHERE id = '{}'".format(callback_failure_template_id)) From f490ccfefbf70a90f1f23ce09565a16b39476a6e Mon Sep 17 00:00:00 2001 From: wbanks Date: Thu, 6 Jun 2024 17:03:49 -0400 Subject: [PATCH 5/8] Fix implicit string concat --- migrations/versions/0453_add_callback_failure_email.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/migrations/versions/0453_add_callback_failure_email.py b/migrations/versions/0453_add_callback_failure_email.py index 53ac2d3695..63628f08dd 100644 --- a/migrations/versions/0453_add_callback_failure_email.py +++ b/migrations/versions/0453_add_callback_failure_email.py @@ -53,7 +53,8 @@ def upgrade(): "[[fr]]", "Bonjour ((name)),", "", - "Les rappels pour « ((service_name)) » ne fonctionnent pas. Cela pourrait signifier que :" "", + "Les rappels pour « ((service_name)) » ne fonctionnent pas. Cela pourrait signifier que :" + "", "(1) Votre service de rappel est hors service.", "(2) Votre service utilise un proxy auquel nous ne pouvons pas accéder.", "(3) Nous parvenons à joindre votre service, mais il répond avec des erreurs.", From 8a41ad84a27b97e2bf94f14255b007e138922211 Mon Sep 17 00:00:00 2001 From: wbanks Date: Tue, 11 Jun 2024 10:05:32 -0400 Subject: [PATCH 6/8] Formatting --- migrations/versions/0453_add_callback_failure_email.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/migrations/versions/0453_add_callback_failure_email.py b/migrations/versions/0453_add_callback_failure_email.py index 63628f08dd..53ac2d3695 100644 --- a/migrations/versions/0453_add_callback_failure_email.py +++ b/migrations/versions/0453_add_callback_failure_email.py @@ -53,8 +53,7 @@ def upgrade(): "[[fr]]", "Bonjour ((name)),", "", - "Les rappels pour « ((service_name)) » ne fonctionnent pas. Cela pourrait signifier que :" - "", + "Les rappels pour « ((service_name)) » ne fonctionnent pas. Cela pourrait signifier que :" "", "(1) Votre service de rappel est hors service.", "(2) Votre service utilise un proxy auquel nous ne pouvons pas accéder.", "(3) Nous parvenons à joindre votre service, mais il répond avec des erreurs.", From 55076433981c589ff28b826674a7c6ef8bcda404 Mon Sep 17 00:00:00 2001 From: wbanks Date: Tue, 30 Jul 2024 15:52:41 -0400 Subject: [PATCH 7/8] Update migration --- migrations/versions/0453_add_callback_failure_email.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/migrations/versions/0453_add_callback_failure_email.py b/migrations/versions/0453_add_callback_failure_email.py index 53ac2d3695..cba644bb28 100644 --- a/migrations/versions/0453_add_callback_failure_email.py +++ b/migrations/versions/0453_add_callback_failure_email.py @@ -1,15 +1,15 @@ """ -Revision ID: 0453_add_callback_failure_email -Revises: 0452_set_pgaudit_config -Create Date: 2024-06-04 16:09:00 +Revision ID: 0458_add_callback_failure_email +Revises: 0457_update_categories +Create Date: 2024-07-30 15:51:00 """ from datetime import datetime from alembic import op from flask import current_app -revision = "0453_add_callback_failure_email" -down_revision = "0452_set_pgaudit_config" +revision = "0458_add_callback_failure_email" +down_revision = "0457_update_categories" callback_failure_template_id = current_app.config["CALLBACK_FAILURE_TEMPLATE_ID"] From d11521dd16cfee5935abfe6608cdcec40a0e1c28 Mon Sep 17 00:00:00 2001 From: wbanks Date: Tue, 13 Aug 2024 16:34:22 -0400 Subject: [PATCH 8/8] Timeout callback requests after 1 sec --- app/celery/service_callback_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/celery/service_callback_tasks.py b/app/celery/service_callback_tasks.py index f75cbb0561..47aaa4d11c 100644 --- a/app/celery/service_callback_tasks.py +++ b/app/celery/service_callback_tasks.py @@ -72,7 +72,7 @@ def _send_data_to_service_callback_api(self, data, service_callback_url, token, "Content-Type": "application/json", "Authorization": f"Bearer {token}", }, - timeout=5, + timeout=1, ) current_app.logger.info(