Skip to content

Commit

Permalink
Ensure french characters do not get downgraded when sanitizing text (#…
Browse files Browse the repository at this point in the history
…241)

* Ensure french characters do not get downgraded when sanitizing text

* Bump utils & waffles version

* Remove a character from the FR exception list

* Update character set

* Update requirements.txt
  • Loading branch information
whabanks authored Sep 11, 2023
1 parent 1cf8eae commit 2fd0d33
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/actions/waffles/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
docopt==0.6.2
Flask==2.3.3
markupsafe==2.1.3
git+https://github.com/cds-snc/[email protected].7#egg=notifications-utils
git+https://github.com/cds-snc/[email protected].8#egg=notifications-utils
2 changes: 2 additions & 0 deletions notifications_utils/sanitise_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class SanitiseSMS(SanitiseText):

# Welsh characters not already included in GSM
WELSH_NON_GSM_CHARACTERS = set("ÂâÊêÎîÔôÛûŴŵŶŷ")
FRENCH_NON_GSM_CHARACTESR = set("ÀÂËÎÏÔŒÙÛâçêëîïôœû")

ALLOWED_CHARACTERS = (
set(
Expand All @@ -108,6 +109,7 @@ class SanitiseSMS(SanitiseText):
+ "^{}\\[~]|€" # character set extension
)
| WELSH_NON_GSM_CHARACTERS
| FRENCH_NON_GSM_CHARACTESR
)


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ include = '(notifications_utils|tests)/.*\.pyi?$'

[tool.poetry]
name = "notifications-utils"
version = "52.0.7"
version = "52.0.8"
description = "Shared python code for Notification - Provides logging utils etc."
authors = ["Canadian Digital Service"]
license = "MIT license"
Expand Down
8 changes: 6 additions & 2 deletions tests/test_sanitise_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@
from notifications_utils.sanitise_text import SanitiseText, SanitiseSMS, SanitiseASCII


@pytest.mark.parametrize("chars, cls", [("ÀÂËÎÏÔŒÙÛâçêëîïôœû", SanitiseSMS)])
def test_encode_chars_sms_fr_not_downgraded(chars, cls):
for char in chars:
assert cls.encode_char(char) == char


params, ids = zip(
(("a", "a"), "ascii char (a)"),
# ascii control char (not in GSM)
(("\t", " "), "ascii control char not in gsm (tab)"),
# these are not in GSM charset so are downgraded
(("ç", "c"), "decomposed unicode char (C with cedilla)"),
# these unicode chars should change to something completely different for compatibility
(("–", "-"), "compatibility transform unicode char (EN DASH (U+2013)"),
(("—", "-"), "compatibility transform unicode char (EM DASH (U+2014)"),
Expand Down

0 comments on commit 2fd0d33

Please sign in to comment.