Skip to content

Commit

Permalink
Encoding unicode in "friendly-from" field (#1990)
Browse files Browse the repository at this point in the history
This PR changes the way we are encoding the "friendly from" field in the email headers, so that we can include non-ascii characters that show up in service names. The AWS documentation provided examples of how to do this. There are 2 steps:
- encode the utf-8 characters as base64
- replace the ascii-encoded "friendly from" field with "MIME encoded-word syntax" that includes the base64 encoded string
Here is a link to the AWS docs that I used: https://docs.aws.amazon.com/ses/latest/dg/send-email-raw.html#send-email-mime-encoding-headers
  • Loading branch information
smcmurtry authored Sep 27, 2023
1 parent 0d5bc29 commit ec14c43
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 6 deletions.
3 changes: 1 addition & 2 deletions app/clients/email/aws_ses.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from flask import current_app
from notifications_utils.recipients import InvalidEmailError
from notifications_utils.statsd_decorators import statsd
from unidecode import unidecode

from app.clients.email import EmailClient, EmailClientException

Expand Down Expand Up @@ -63,7 +62,7 @@ def attach_html(m, content):
attachments = attachments or []
if isinstance(to_addresses, str):
to_addresses = [to_addresses]
source = unidecode(source)

reply_to_addresses = [reply_to_address] if reply_to_address else []

# - If sending a TXT email without attachments:
Expand Down
29 changes: 27 additions & 2 deletions app/delivery/send_to_providers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import os
import re
import urllib.request
Expand All @@ -15,6 +16,7 @@
PlainTextEmailTemplate,
SMSMessageTemplate,
)
from unidecode import unidecode

from app import bounce_rate_client, clients, document_download_client, statsd_client
from app.celery.research_mode_tasks import send_email_response, send_sms_response
Expand Down Expand Up @@ -185,6 +187,28 @@ def check_service_over_bounce_rate(service_id: str):
)


def mime_encoded_word_syntax(encoded_text="", charset="utf-8", encoding="B") -> str:
"""MIME encoded-word syntax is a way to encode non-ASCII characters in email headers.
It is described here:
https://docs.aws.amazon.com/ses/latest/dg/send-email-raw.html#send-email-mime-encoding-headers
"""
return f"=?{charset}?{encoding}?{encoded_text}?="


def get_from_address(friendly_from: str, email_from: str, sending_domain: str) -> str:
"""
This function returns the from_address or source in MIME encoded-word syntax
friendly_from is the sender's display name and may contain accents so we need to encode it to base64
email_from and sending_domain should be ASCII only
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ses/client/send_raw_email.html
"If you want to use Unicode characters in the “friendly from” name, you must encode the “friendly from”
name using MIME encoded-word syntax, as described in Sending raw email using the Amazon SES API."
"""
friendly_from_b64 = base64.b64encode(friendly_from.encode()).decode("utf-8")
friendly_from_mime = mime_encoded_word_syntax(encoded_text=friendly_from_b64, charset="utf-8", encoding="B")
return f'"{friendly_from_mime}" <{unidecode(email_from)}@{unidecode(sending_domain)}>'


def send_email_to_provider(notification: Notification):
current_app.logger.info(f"Sending email to provider for notification id {notification.id}")
service = notification.service
Expand Down Expand Up @@ -267,8 +291,9 @@ def send_email_to_provider(notification: Notification):
else:
sending_domain = service.sending_domain

from_address = '"{}" <{}@{}>'.format(service.name, service.email_from, sending_domain)

from_address = get_from_address(
friendly_from=service.name, email_from=service.email_from, sending_domain=sending_domain
)
email_reply_to = notification.reply_to_text

reference = provider.send_email(
Expand Down
59 changes: 57 additions & 2 deletions tests/app/delivery/test_send_to_providers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import uuid
from collections import namedtuple
from datetime import datetime
from unittest import TestCase
from unittest.mock import ANY, MagicMock, call

import pytest
Expand Down Expand Up @@ -153,7 +154,7 @@ def test_should_send_personalised_template_to_correct_email_provider_and_persist
send_to_providers.send_email_to_provider(db_notification)

app.aws_ses_client.send_email.assert_called_once_with(
'"Sample service" <[email protected]>',
'"=?utf-8?B?U2FtcGxlIHNlcnZpY2U=?=" <[email protected]>',
"[email protected]",
"Jo <em>some HTML</em>",
body="Hello Jo\nThis is an email from GOV.\u200bUK with <em>some HTML</em>\n",
Expand Down Expand Up @@ -244,7 +245,7 @@ def test_should_respect_custom_sending_domains(sample_service, mocker, sample_em
send_to_providers.send_email_to_provider(db_notification)

app.aws_ses_client.send_email.assert_called_once_with(
'"Sample service" <[email protected]>',
'"=?utf-8?B?U2FtcGxlIHNlcnZpY2U=?=" <[email protected]>',
"[email protected]",
"Jo <em>some HTML</em>",
body="Hello Jo\nThis is an email from GOV.\u200bUK with <em>some HTML</em>\n",
Expand Down Expand Up @@ -1221,3 +1222,57 @@ def test_check_service_over_bounce_rate_normal(self, mocker: MockFixture, notify
mock_logger = mocker.patch("app.notifications.validators.current_app.logger.warning")
assert send_to_providers.check_service_over_bounce_rate(fake_uuid) is None
mock_logger.assert_not_called()


@pytest.mark.parametrize(
"encoded_text, charset, encoding, expected",
[
("hello_world", "utf-8", "B", "=?utf-8?B?hello_world?="),
("hello_world", "utf-8", "Q", "=?utf-8?Q?hello_world?="),
("hello_world2", "utf-8", "B", "=?utf-8?B?hello_world2?="),
],
)
def test_mime_encoded_word_syntax_encoding(encoded_text, charset, encoding, expected):
result = send_to_providers.mime_encoded_word_syntax(encoded_text=encoded_text, charset=charset, encoding=encoding)
assert result == expected


class TestGetFromAddress(TestCase):
def test_get_from_address_ascii(self):
# Arrange
friendly_from = "John Doe"
email_from = "johndoe"
sending_domain = "example.com"

# Act
result = send_to_providers.get_from_address(friendly_from, email_from, sending_domain)

# Assert
expected_result = '"=?utf-8?B?Sm9obiBEb2U=?=" <[email protected]>'
self.assertEqual(result, expected_result)

def test_get_from_address_non_ascii(self):
# Arrange
friendly_from = "Jöhn Döe"
email_from = "johndoe"
sending_domain = "example.com"

# Act
result = send_to_providers.get_from_address(friendly_from, email_from, sending_domain)

# Assert
expected_result = '"=?utf-8?B?SsO2aG4gRMO2ZQ==?=" <[email protected]>'
self.assertEqual(result, expected_result)

def test_get_from_address_empty_friendly_from(self):
# Arrange
friendly_from = ""
email_from = "johndoe"
sending_domain = "example.com"

# Act
result = send_to_providers.get_from_address(friendly_from, email_from, sending_domain)

# Assert
expected_result = '"=?utf-8?B??=" <[email protected]>'
self.assertEqual(result, expected_result)

0 comments on commit ec14c43

Please sign in to comment.