Skip to content

Commit

Permalink
Add Inuktitut to GCNotify (#270)
Browse files Browse the repository at this point in the history
* Add Inuktitut to GCNotify

* encode inuktitut

* Made a new version for utils
  • Loading branch information
jzbahrai authored Feb 1, 2024
1 parent 981fb6a commit 2da7468
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/actions/waffles/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
docopt==0.6.2
Flask==2.3.3
markupsafe==2.1.4
git+https://github.com/cds-snc/notifier-utils.git@52.0.19#egg=notifications-utils
git+https://github.com/cds-snc/notifier-utils.git@52.1.0#egg=notifications-utils
10 changes: 10 additions & 0 deletions notifications_utils/sanitise_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@ class SanitiseSMS(SanitiseText):
# Welsh characters not already included in GSM
WELSH_NON_GSM_CHARACTERS = set("ÂâÊêÎîÔôÛûŴŵŶŷ")
FRENCH_NON_GSM_CHARACTESR = set("ÀÂËÎÏÔŒÙÛâçêëîïôœû")
INUKTITUK_CHARACTERS = set(
"ᐁᐯᑌᑫᕴᒉᒣᓀᓭᓓᔦᑦᔦᕓᕂᙯᖅᑫᙰᐃᐱᑎᑭᕵᒋᒥᓂᓯ𑪶𑪰ᓕᔨᑦᔨᖨᕕᕆᕿᖅᑭᖏᙱᖠᐄᐲᑏᑮᕶᒌᒦᓃᓰ𑪷𑪱ᓖᔩᑦᔩᖩᕖᕇᖀᖅᑮᖐᙲᖡᐅᐳᑐᑯᕷᒍᒧᓄᓱ𑪸𑪲ᓗᔪᑦᔪᖪᕗᕈᖁᖅᑯᖑᙳᖢᐊᐸᑕᑲᕹᒐᒪᓇᓴ𑪺𑪴ᓚᔭᑦᔭᖬᕙᕋᖃᖅᑲᖓᙵᖤᑉᑦᒃᕻᒡᒻᓐᔅᓪᔾᑦᔾᖮᕝᕐᖅᖅᒃᖕᖖᖦᖯᕼᑊ" # noqa: E501
)
CREE_CHARACTERS = set("ᐊᐁᐃᐅᐸᐯᐱᐳᑕᑌᑎᑐᑲᑫᑭᑯᒐᒉᒋᒍᒪᒣᒥᒧᓇᓀᓂᓄᓴᓭᓯᓱᔭᔦᔨᔪ")
OJIBWE_CHARACTERS = set(
"ᐁᐃᐅᐊᐄᐆᐋᐊᐊᐞᐊᐊᐊᐦᐊᐊᐊᐊᐦᐊᐊᐞᐊᐯᐱᐳᐸᐲᐴᐹᐊᑉᐊᣔᑌᑎᑐᑕᑏᑑᑖᐊᑦᐊᣕᑫᑭᑯᑲᑮᑰᑳᐊᒃᐊᣖᒉᒋᒍᒐᒌᒎᒑᐊᒡᐊᣗᒣᒥᒧᒪᒦᒨᒫᐊᒻᐊᣘᐊᒻᐊᐊᣘᐊᓀᓂᓄᓇᓃᓅᓈᐊᓐᐊᣙᐊᓐᐊᐊᣙᐊᓭᓯᓱᓴᓰᓲᓵᐊᔅᐊᣚᐊᔅᐊᐊᣚᐊᔐᔑᔓᔕᔒᔔᔖᐊᔥᐊᣛᐊᔥᐊᐊᣛᐊᔦᔨᔪᔭᔩᔫᔮᐊᔾᐊᐤᐊᐃᐧᐁᐧᐃᐧᐅᐧᐊᐧᐄᐧᐆᐧᐋᐊᐤᐊᐤᐊᣜᐦᐁᐦᐃᐦᐅᐦᐊᐦᐄᐦᐆᐦᐋᐊᐦᐊᐦᐊᐦᐊᐊᐦᐊ" # noqa: E501
)

ALLOWED_CHARACTERS = (
set(
Expand All @@ -110,6 +117,9 @@ class SanitiseSMS(SanitiseText):
)
| WELSH_NON_GSM_CHARACTERS
| FRENCH_NON_GSM_CHARACTESR
| INUKTITUK_CHARACTERS
| CREE_CHARACTERS
| OJIBWE_CHARACTERS
)


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ include = '(notifications_utils|tests)/.*\.pyi?$'

[tool.poetry]
name = "notifications-utils"
version = "52.0.19"
version = "52.1.0"
description = "Shared python code for Notification - Provides logging utils etc."
authors = ["Canadian Digital Service"]
license = "MIT license"
Expand Down
147 changes: 147 additions & 0 deletions tests/test_sanitise_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,160 @@ def test_encode_chars_the_same_for_ascii_and_sms(char, expected, cls):
(("Ŷ", "Ŷ", "Y"), "non-gsm Welsh char (capital y with hat)"),
)

params_inuktitut = (
("ᐁ", "ᐁ"),
("ᐯ", "ᐯ"),
("ᑌ", "ᑌ"),
("ᑫ", "ᑫ"),
("ᕴ", "ᕴ"),
("ᒉ", "ᒉ"),
("ᒣ", "ᒣ"),
("ᓀ", "ᓀ"),
("ᓭ", "ᓭ"),
("ᓓ", "ᓓ"),
("ᔦ", "ᔦ"),
("ᑦ", "ᑦ"),
("ᔦ", "ᔦ"),
("ᕓ", "ᕓ"),
("ᕂ", "ᕂ"),
("ᙯ", "ᙯ"),
("ᖅ", "ᖅ"),
("ᑫ", "ᑫ"),
("ᙰ", "ᙰ"),
("ᐃ", "ᐃ"),
("ᐱ", "ᐱ"),
("ᑎ", "ᑎ"),
("ᑭ", "ᑭ"),
("ᕵ", "ᕵ"),
("ᒋ", "ᒋ"),
("ᒥ", "ᒥ"),
("ᓂ", "ᓂ"),
("ᓯ", "ᓯ"),
("\U00011ab6", "\U00011ab6"),
("\U00011ab0", "\U00011ab0"),
("ᓕ", "ᓕ"),
("ᔨ", "ᔨ"),
("ᑦ", "ᑦ"),
("ᔨ", "ᔨ"),
("ᖨ", "ᖨ"),
("ᕕ", "ᕕ"),
("ᕆ", "ᕆ"),
("ᕿ", "ᕿ"),
("ᖅ", "ᖅ"),
("ᑭ", "ᑭ"),
("ᖏ", "ᖏ"),
("ᙱ", "ᙱ"),
("ᖠ", "ᖠ"),
("ᐄ", "ᐄ"),
("ᐲ", "ᐲ"),
("ᑏ", "ᑏ"),
("ᑮ", "ᑮ"),
("ᕶ", "ᕶ"),
("ᒌ", "ᒌ"),
("ᒦ", "ᒦ"),
("ᓃ", "ᓃ"),
("ᓰ", "ᓰ"),
("\U00011ab7", "\U00011ab7"),
("\U00011ab1", "\U00011ab1"),
("ᓖ", "ᓖ"),
("ᔩ", "ᔩ"),
("ᑦ", "ᑦ"),
("ᔩ", "ᔩ"),
("ᖩ", "ᖩ"),
("ᕖ", "ᕖ"),
("ᕇ", "ᕇ"),
("ᖀ", "ᖀ"),
("ᖅ", "ᖅ"),
("ᑮ", "ᑮ"),
("ᖐ", "ᖐ"),
("ᙲ", "ᙲ"),
("ᖡ", "ᖡ"),
("ᐅ", "ᐅ"),
("ᐳ", "ᐳ"),
("ᑐ", "ᑐ"),
("ᑯ", "ᑯ"),
("ᕷ", "ᕷ"),
("ᒍ", "ᒍ"),
("ᒧ", "ᒧ"),
("ᓄ", "ᓄ"),
("ᓱ", "ᓱ"),
("\U00011ab8", "\U00011ab8"),
("\U00011ab2", "\U00011ab2"),
("ᓗ", "ᓗ"),
("ᔪ", "ᔪ"),
("ᑦ", "ᑦ"),
("ᔪ", "ᔪ"),
("ᖪ", "ᖪ"),
("ᕗ", "ᕗ"),
("ᕈ", "ᕈ"),
("ᖁ", "ᖁ"),
("ᖅ", "ᖅ"),
("ᑯ", "ᑯ"),
("ᖑ", "ᖑ"),
("ᙳ", "ᙳ"),
("ᖢ", "ᖢ"),
("ᐊ", "ᐊ"),
("ᐸ", "ᐸ"),
("ᑕ", "ᑕ"),
("ᑲ", "ᑲ"),
("ᕹ", "ᕹ"),
("ᒐ", "ᒐ"),
("ᒪ", "ᒪ"),
("ᓇ", "ᓇ"),
("ᓴ", "ᓴ"),
("\U00011aba", "\U00011aba"),
("\U00011ab4", "\U00011ab4"),
("ᓚ", "ᓚ"),
("ᔭ", "ᔭ"),
("ᑦ", "ᑦ"),
("ᔭ", "ᔭ"),
("ᖬ", "ᖬ"),
("ᕙ", "ᕙ"),
("ᕋ", "ᕋ"),
("ᖃ", "ᖃ"),
("ᖅ", "ᖅ"),
("ᑲ", "ᑲ"),
("ᖓ", "ᖓ"),
("ᙵ", "ᙵ"),
("ᖤ", "ᖤ"),
("ᑉ", "ᑉ"),
("ᑦ", "ᑦ"),
("ᒃ", "ᒃ"),
("ᕻ", "ᕻ"),
("ᒡ", "ᒡ"),
("ᒻ", "ᒻ"),
("ᓐ", "ᓐ"),
("ᔅ", "ᔅ"),
("ᓪ", "ᓪ"),
("ᔾ", "ᔾ"),
("ᑦ", "ᑦ"),
("ᔾ", "ᔾ"),
("ᖮ", "ᖮ"),
("ᕝ", "ᕝ"),
("ᕐ", "ᕐ"),
("ᖅ", "ᖅ"),
("ᖅ", "ᖅ"),
("ᒃ", "ᒃ"),
("ᖕ", "ᖕ"),
("ᖖ", "ᖖ"),
("ᖦ", "ᖦ"),
("ᖯ", "ᖯ"),
("ᕼ", "ᕼ"),
("ᑊ", "ᑊ"),
)


@pytest.mark.parametrize("char, expected_sms, expected_ascii", params, ids=ids)
def test_encode_chars_different_between_ascii_and_sms(char, expected_sms, expected_ascii):
assert SanitiseSMS.encode_char(char) == expected_sms
assert SanitiseASCII.encode_char(char) == expected_ascii


@pytest.mark.parametrize("char, expected", params_inuktitut)
def test_encode_chars_inuktitut_sms(char, expected):
assert SanitiseSMS.encode_char(char) == expected


@pytest.mark.parametrize(
"codepoint, char",
[
Expand Down

0 comments on commit 2da7468

Please sign in to comment.