From ec14c43591a0f469dbf1772f94950b157b388c2c Mon Sep 17 00:00:00 2001 From: Stephen McMurtry Date: Wed, 27 Sep 2023 07:52:07 -0700 Subject: [PATCH] Encoding unicode in "friendly-from" field (#1990) This PR changes the way we are encoding the "friendly from" field in the email headers, so that we can include non-ascii characters that show up in service names. The AWS documentation provided examples of how to do this. There are 2 steps: - encode the utf-8 characters as base64 - replace the ascii-encoded "friendly from" field with "MIME encoded-word syntax" that includes the base64 encoded string Here is a link to the AWS docs that I used: https://docs.aws.amazon.com/ses/latest/dg/send-email-raw.html#send-email-mime-encoding-headers --- app/clients/email/aws_ses.py | 3 +- app/delivery/send_to_providers.py | 29 +++++++++- tests/app/delivery/test_send_to_providers.py | 59 +++++++++++++++++++- 3 files changed, 85 insertions(+), 6 deletions(-) diff --git a/app/clients/email/aws_ses.py b/app/clients/email/aws_ses.py index a7131d6232..624f0f1fbe 100644 --- a/app/clients/email/aws_ses.py +++ b/app/clients/email/aws_ses.py @@ -8,7 +8,6 @@ from flask import current_app from notifications_utils.recipients import InvalidEmailError from notifications_utils.statsd_decorators import statsd -from unidecode import unidecode from app.clients.email import EmailClient, EmailClientException @@ -63,7 +62,7 @@ def attach_html(m, content): attachments = attachments or [] if isinstance(to_addresses, str): to_addresses = [to_addresses] - source = unidecode(source) + reply_to_addresses = [reply_to_address] if reply_to_address else [] # - If sending a TXT email without attachments: diff --git a/app/delivery/send_to_providers.py b/app/delivery/send_to_providers.py index 567f0f1442..e39872bcb0 100644 --- a/app/delivery/send_to_providers.py +++ b/app/delivery/send_to_providers.py @@ -1,3 +1,4 @@ +import base64 import os import re import urllib.request @@ -15,6 +16,7 @@ PlainTextEmailTemplate, SMSMessageTemplate, ) +from unidecode import unidecode from app import bounce_rate_client, clients, document_download_client, statsd_client from app.celery.research_mode_tasks import send_email_response, send_sms_response @@ -185,6 +187,28 @@ def check_service_over_bounce_rate(service_id: str): ) +def mime_encoded_word_syntax(encoded_text="", charset="utf-8", encoding="B") -> str: + """MIME encoded-word syntax is a way to encode non-ASCII characters in email headers. + It is described here: + https://docs.aws.amazon.com/ses/latest/dg/send-email-raw.html#send-email-mime-encoding-headers + """ + return f"=?{charset}?{encoding}?{encoded_text}?=" + + +def get_from_address(friendly_from: str, email_from: str, sending_domain: str) -> str: + """ + This function returns the from_address or source in MIME encoded-word syntax + friendly_from is the sender's display name and may contain accents so we need to encode it to base64 + email_from and sending_domain should be ASCII only + https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ses/client/send_raw_email.html + "If you want to use Unicode characters in the “friendly from” name, you must encode the “friendly from” + name using MIME encoded-word syntax, as described in Sending raw email using the Amazon SES API." + """ + friendly_from_b64 = base64.b64encode(friendly_from.encode()).decode("utf-8") + friendly_from_mime = mime_encoded_word_syntax(encoded_text=friendly_from_b64, charset="utf-8", encoding="B") + return f'"{friendly_from_mime}" <{unidecode(email_from)}@{unidecode(sending_domain)}>' + + def send_email_to_provider(notification: Notification): current_app.logger.info(f"Sending email to provider for notification id {notification.id}") service = notification.service @@ -267,8 +291,9 @@ def send_email_to_provider(notification: Notification): else: sending_domain = service.sending_domain - from_address = '"{}" <{}@{}>'.format(service.name, service.email_from, sending_domain) - + from_address = get_from_address( + friendly_from=service.name, email_from=service.email_from, sending_domain=sending_domain + ) email_reply_to = notification.reply_to_text reference = provider.send_email( diff --git a/tests/app/delivery/test_send_to_providers.py b/tests/app/delivery/test_send_to_providers.py index d08e654289..57cd6fba41 100644 --- a/tests/app/delivery/test_send_to_providers.py +++ b/tests/app/delivery/test_send_to_providers.py @@ -1,6 +1,7 @@ import uuid from collections import namedtuple from datetime import datetime +from unittest import TestCase from unittest.mock import ANY, MagicMock, call import pytest @@ -153,7 +154,7 @@ def test_should_send_personalised_template_to_correct_email_provider_and_persist send_to_providers.send_email_to_provider(db_notification) app.aws_ses_client.send_email.assert_called_once_with( - '"Sample service" ', + '"=?utf-8?B?U2FtcGxlIHNlcnZpY2U=?=" ', "jo.smith@example.com", "Jo some HTML", body="Hello Jo\nThis is an email from GOV.\u200bUK with some HTML\n", @@ -244,7 +245,7 @@ def test_should_respect_custom_sending_domains(sample_service, mocker, sample_em send_to_providers.send_email_to_provider(db_notification) app.aws_ses_client.send_email.assert_called_once_with( - '"Sample service" ', + '"=?utf-8?B?U2FtcGxlIHNlcnZpY2U=?=" ', "jo.smith@example.com", "Jo some HTML", body="Hello Jo\nThis is an email from GOV.\u200bUK with some HTML\n", @@ -1221,3 +1222,57 @@ def test_check_service_over_bounce_rate_normal(self, mocker: MockFixture, notify mock_logger = mocker.patch("app.notifications.validators.current_app.logger.warning") assert send_to_providers.check_service_over_bounce_rate(fake_uuid) is None mock_logger.assert_not_called() + + +@pytest.mark.parametrize( + "encoded_text, charset, encoding, expected", + [ + ("hello_world", "utf-8", "B", "=?utf-8?B?hello_world?="), + ("hello_world", "utf-8", "Q", "=?utf-8?Q?hello_world?="), + ("hello_world2", "utf-8", "B", "=?utf-8?B?hello_world2?="), + ], +) +def test_mime_encoded_word_syntax_encoding(encoded_text, charset, encoding, expected): + result = send_to_providers.mime_encoded_word_syntax(encoded_text=encoded_text, charset=charset, encoding=encoding) + assert result == expected + + +class TestGetFromAddress(TestCase): + def test_get_from_address_ascii(self): + # Arrange + friendly_from = "John Doe" + email_from = "johndoe" + sending_domain = "example.com" + + # Act + result = send_to_providers.get_from_address(friendly_from, email_from, sending_domain) + + # Assert + expected_result = '"=?utf-8?B?Sm9obiBEb2U=?=" ' + self.assertEqual(result, expected_result) + + def test_get_from_address_non_ascii(self): + # Arrange + friendly_from = "Jöhn Döe" + email_from = "johndoe" + sending_domain = "example.com" + + # Act + result = send_to_providers.get_from_address(friendly_from, email_from, sending_domain) + + # Assert + expected_result = '"=?utf-8?B?SsO2aG4gRMO2ZQ==?=" ' + self.assertEqual(result, expected_result) + + def test_get_from_address_empty_friendly_from(self): + # Arrange + friendly_from = "" + email_from = "johndoe" + sending_domain = "example.com" + + # Act + result = send_to_providers.get_from_address(friendly_from, email_from, sending_domain) + + # Assert + expected_result = '"=?utf-8?B??=" ' + self.assertEqual(result, expected_result)