Skip to content

Commit

Permalink
Properly serialize Fluent placeables generated by Pretranslation (moz…
Browse files Browse the repository at this point in the history
…illa#3061)

After Fluent pretranslations are created, we need to parse and serialize them again in order to make sure they are stored in the canonical form (e.g. with spaces within curly braces).

Fix test:
- The following code only applies to non-FTL strings: "Empty strings and strings containing whitespace only do not need translation".
  • Loading branch information
mathjazz authored Jan 18, 2024
1 parent d9f47f3 commit 2a02872
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 27 deletions.
6 changes: 5 additions & 1 deletion pontoon/pretranslation/pretranslate.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def get_pretranslations(entity, locale, preserve_placeables=False):

pretranslation = serializer.serialize_entry(entry)

# Parse and serialize pretranslation again in order to assure cannonical style
parsed_pretranslation = parser.parse_entry(pretranslation)
pretranslation = serializer.serialize_entry(parsed_pretranslation)

authors = [services[service] for service in pretranslate.services]
author = max(set(authors), key=authors.count) if authors else services["tm"]

Expand All @@ -82,7 +86,7 @@ def get_pretranslations(entity, locale, preserve_placeables=False):


def get_pretranslated_data(source, locale, preserve_placeables):
# Empty strings do not need translation
# Empty strings and strings containing whitespace only do not need translation
if re.search("^\\s*$", source):
return source, "tm"

Expand Down
42 changes: 16 additions & 26 deletions pontoon/pretranslation/tests/test_pretranslate.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,22 @@ def test_get_pretranslations_empty_string(entity_a, locale_b, tm_user):
assert response == [("", None, tm_user)]


@pytest.mark.django_db
def test_get_pretranslations_whitespace(entity_a, locale_b, tm_user):
# Entity.string is an empty string
entity_a.string = " "
response = get_pretranslations(entity_a, locale_b)
assert response == [(" ", None, tm_user)]

entity_a.string = "\t"
response = get_pretranslations(entity_a, locale_b)
assert response == [("\t", None, tm_user)]

entity_a.string = "\n"
response = get_pretranslations(entity_a, locale_b)
assert response == [("\n", None, tm_user)]


@pytest.mark.django_db
def test_get_pretranslations_tm_match(entity_a, entity_b, locale_b, tm_user):
# 100% TM match exists
Expand Down Expand Up @@ -145,32 +161,6 @@ def test_get_pretranslations_fluent_empty(
assert response == [(pretranslated_string, None, gt_user)]


@pytest.mark.django_db
def test_get_pretranslations_fluent_whitespace(
fluent_resource, google_translate_locale, tm_user
):
# Various types of whitespace should be preserved
input_string = dedent(
"""
whitespace =
{ $count ->
[0] { "" }
[1] { " " }
*[other] { "\t" } { "\n" }
}
"""
)
fluent_entity = EntityFactory(resource=fluent_resource, string=input_string)

output_string = input_string

# Re-serialize to match whitespace
pretranslated_string = serializer.serialize_entry(parser.parse_entry(output_string))

response = get_pretranslations(fluent_entity, google_translate_locale)
assert response == [(pretranslated_string, None, tm_user)]


@patch("pontoon.pretranslation.pretranslate.get_google_translate_data")
@pytest.mark.django_db
def test_get_pretranslations_fluent_accesskeys_no_attribute_source(
Expand Down

0 comments on commit 2a02872

Please sign in to comment.