Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Encoding unicode in "friendly-from" field #1990

Merged
merged 6 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions app/clients/email/aws_ses.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from flask import current_app
from notifications_utils.recipients import InvalidEmailError
from notifications_utils.statsd_decorators import statsd
from unidecode import unidecode

from app.clients.email import EmailClient, EmailClientException

Expand Down Expand Up @@ -63,7 +62,7 @@ def attach_html(m, content):
attachments = attachments or []
if isinstance(to_addresses, str):
to_addresses = [to_addresses]
source = unidecode(source)

reply_to_addresses = [reply_to_address] if reply_to_address else []

# - If sending a TXT email without attachments:
Expand Down
29 changes: 27 additions & 2 deletions app/delivery/send_to_providers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import os
import re
import urllib.request
Expand All @@ -15,6 +16,7 @@
PlainTextEmailTemplate,
SMSMessageTemplate,
)
from unidecode import unidecode

from app import bounce_rate_client, clients, document_download_client, statsd_client
from app.celery.research_mode_tasks import send_email_response, send_sms_response
Expand Down Expand Up @@ -185,6 +187,28 @@ def check_service_over_bounce_rate(service_id: str):
)


def mime_encoded_word_syntax(encoded_text="", charset="utf-8", encoding="B") -> str:
"""MIME encoded-word syntax is a way to encode non-ASCII characters in email headers.
It is described here:
https://docs.aws.amazon.com/ses/latest/dg/send-email-raw.html#send-email-mime-encoding-headers
"""
return f"=?{charset}?{encoding}?{encoded_text}?="


def get_from_address(friendly_from: str, email_from: str, sending_domain: str) -> str:
"""
This function returns the from_address or source in MIME encoded-word syntax
friendly_from is the sender's display name and may contain accents so we need to encode it to base64
email_from and sending_domain should be ASCII only
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ses/client/send_raw_email.html
"If you want to use Unicode characters in the “friendly from” name, you must encode the “friendly from”
name using MIME encoded-word syntax, as described in Sending raw email using the Amazon SES API."
"""
friendly_from_b64 = base64.b64encode(friendly_from.encode()).decode("utf-8")
friendly_from_mime = mime_encoded_word_syntax(encoded_text=friendly_from_b64, charset="utf-8", encoding="B")
return f'"{friendly_from_mime}" <{unidecode(email_from)}@{unidecode(sending_domain)}>'


def send_email_to_provider(notification: Notification):
current_app.logger.info(f"Sending email to provider for notification id {notification.id}")
service = notification.service
Expand Down Expand Up @@ -267,8 +291,9 @@ def send_email_to_provider(notification: Notification):
else:
sending_domain = service.sending_domain

from_address = '"{}" <{}@{}>'.format(service.name, service.email_from, sending_domain)

from_address = get_from_address(
friendly_from=service.name, email_from=service.email_from, sending_domain=sending_domain
)
email_reply_to = notification.reply_to_text

reference = provider.send_email(
Expand Down
59 changes: 57 additions & 2 deletions tests/app/delivery/test_send_to_providers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import uuid
from collections import namedtuple
from datetime import datetime
from unittest import TestCase
from unittest.mock import ANY, MagicMock, call

import pytest
Expand Down Expand Up @@ -153,7 +154,7 @@ def test_should_send_personalised_template_to_correct_email_provider_and_persist
send_to_providers.send_email_to_provider(db_notification)

app.aws_ses_client.send_email.assert_called_once_with(
'"Sample service" <[email protected]>',
'"=?utf-8?B?U2FtcGxlIHNlcnZpY2U=?=" <[email protected]>',
"[email protected]",
"Jo <em>some HTML</em>",
body="Hello Jo\nThis is an email from GOV.\u200bUK with <em>some HTML</em>\n",
Expand Down Expand Up @@ -244,7 +245,7 @@ def test_should_respect_custom_sending_domains(sample_service, mocker, sample_em
send_to_providers.send_email_to_provider(db_notification)

app.aws_ses_client.send_email.assert_called_once_with(
'"Sample service" <[email protected]>',
'"=?utf-8?B?U2FtcGxlIHNlcnZpY2U=?=" <[email protected]>',
"[email protected]",
"Jo <em>some HTML</em>",
body="Hello Jo\nThis is an email from GOV.\u200bUK with <em>some HTML</em>\n",
Expand Down Expand Up @@ -1221,3 +1222,57 @@ def test_check_service_over_bounce_rate_normal(self, mocker: MockFixture, notify
mock_logger = mocker.patch("app.notifications.validators.current_app.logger.warning")
assert send_to_providers.check_service_over_bounce_rate(fake_uuid) is None
mock_logger.assert_not_called()


@pytest.mark.parametrize(
"encoded_text, charset, encoding, expected",
[
("hello_world", "utf-8", "B", "=?utf-8?B?hello_world?="),
("hello_world", "utf-8", "Q", "=?utf-8?Q?hello_world?="),
("hello_world2", "utf-8", "B", "=?utf-8?B?hello_world2?="),
],
)
def test_mime_encoded_word_syntax_encoding(encoded_text, charset, encoding, expected):
result = send_to_providers.mime_encoded_word_syntax(encoded_text=encoded_text, charset=charset, encoding=encoding)
assert result == expected


class TestGetFromAddress(TestCase):
def test_get_from_address_ascii(self):
# Arrange
friendly_from = "John Doe"
email_from = "johndoe"
sending_domain = "example.com"

# Act
result = send_to_providers.get_from_address(friendly_from, email_from, sending_domain)

# Assert
expected_result = '"=?utf-8?B?Sm9obiBEb2U=?=" <[email protected]>'
self.assertEqual(result, expected_result)

def test_get_from_address_non_ascii(self):
# Arrange
friendly_from = "Jöhn Döe"
email_from = "johndoe"
sending_domain = "example.com"

# Act
result = send_to_providers.get_from_address(friendly_from, email_from, sending_domain)

# Assert
expected_result = '"=?utf-8?B?SsO2aG4gRMO2ZQ==?=" <[email protected]>'
self.assertEqual(result, expected_result)

def test_get_from_address_empty_friendly_from(self):
# Arrange
friendly_from = ""
email_from = "johndoe"
sending_domain = "example.com"

# Act
result = send_to_providers.get_from_address(friendly_from, email_from, sending_domain)

# Assert
expected_result = '"=?utf-8?B??=" <[email protected]>'
self.assertEqual(result, expected_result)