diff --git a/.github/actions/waffles/requirements.txt b/.github/actions/waffles/requirements.txt index de1263a9..b3d834f0 100644 --- a/.github/actions/waffles/requirements.txt +++ b/.github/actions/waffles/requirements.txt @@ -1,4 +1,4 @@ docopt==0.6.2 Flask==2.3.3 markupsafe==2.1.3 -git+https://github.com/cds-snc/notifier-utils.git@52.0.7#egg=notifications-utils \ No newline at end of file +git+https://github.com/cds-snc/notifier-utils.git@52.0.8#egg=notifications-utils diff --git a/notifications_utils/sanitise_text.py b/notifications_utils/sanitise_text.py index 8c43cba1..b6a56cc2 100644 --- a/notifications_utils/sanitise_text.py +++ b/notifications_utils/sanitise_text.py @@ -100,6 +100,7 @@ class SanitiseSMS(SanitiseText): # Welsh characters not already included in GSM WELSH_NON_GSM_CHARACTERS = set("ÂâÊêÎîÔôÛûŴŵŶŷ") + FRENCH_NON_GSM_CHARACTESR = set("ÀÂËÎÏÔŒÙÛâçêëîïôœû") ALLOWED_CHARACTERS = ( set( @@ -108,6 +109,7 @@ class SanitiseSMS(SanitiseText): + "^{}\\[~]|€" # character set extension ) | WELSH_NON_GSM_CHARACTERS + | FRENCH_NON_GSM_CHARACTESR ) diff --git a/pyproject.toml b/pyproject.toml index ede05b70..37d47a5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ include = '(notifications_utils|tests)/.*\.pyi?$' [tool.poetry] name = "notifications-utils" -version = "52.0.7" +version = "52.0.8" description = "Shared python code for Notification - Provides logging utils etc." authors = ["Canadian Digital Service"] license = "MIT license" diff --git a/tests/test_sanitise_text.py b/tests/test_sanitise_text.py index d5b46bf3..e78959bf 100644 --- a/tests/test_sanitise_text.py +++ b/tests/test_sanitise_text.py @@ -3,12 +3,16 @@ from notifications_utils.sanitise_text import SanitiseText, SanitiseSMS, SanitiseASCII +@pytest.mark.parametrize("chars, cls", [("ÀÂËÎÏÔŒÙÛâçêëîïôœû", SanitiseSMS)]) +def test_encode_chars_sms_fr_not_downgraded(chars, cls): + for char in chars: + assert cls.encode_char(char) == char + + params, ids = zip( (("a", "a"), "ascii char (a)"), # ascii control char (not in GSM) (("\t", " "), "ascii control char not in gsm (tab)"), - # these are not in GSM charset so are downgraded - (("ç", "c"), "decomposed unicode char (C with cedilla)"), # these unicode chars should change to something completely different for compatibility (("–", "-"), "compatibility transform unicode char (EN DASH (U+2013)"), (("—", "-"), "compatibility transform unicode char (EM DASH (U+2014)"),