diff --git a/pontoon/pretranslation/pretranslate.py b/pontoon/pretranslation/pretranslate.py index c36471bb70..2388b2964e 100644 --- a/pontoon/pretranslation/pretranslate.py +++ b/pontoon/pretranslation/pretranslate.py @@ -58,6 +58,10 @@ def get_pretranslations(entity, locale, preserve_placeables=False): pretranslation = serializer.serialize_entry(entry) + # Parse and serialize pretranslation again in order to assure cannonical style + parsed_pretranslation = parser.parse_entry(pretranslation) + pretranslation = serializer.serialize_entry(parsed_pretranslation) + authors = [services[service] for service in pretranslate.services] author = max(set(authors), key=authors.count) if authors else services["tm"] @@ -82,7 +86,7 @@ def get_pretranslations(entity, locale, preserve_placeables=False): def get_pretranslated_data(source, locale, preserve_placeables): - # Empty strings do not need translation + # Empty strings and strings containing whitespace only do not need translation if re.search("^\\s*$", source): return source, "tm" diff --git a/pontoon/pretranslation/tests/test_pretranslate.py b/pontoon/pretranslation/tests/test_pretranslate.py index ee3a33843d..6ce9dc28ec 100644 --- a/pontoon/pretranslation/tests/test_pretranslate.py +++ b/pontoon/pretranslation/tests/test_pretranslate.py @@ -47,6 +47,22 @@ def test_get_pretranslations_empty_string(entity_a, locale_b, tm_user): assert response == [("", None, tm_user)] +@pytest.mark.django_db +def test_get_pretranslations_whitespace(entity_a, locale_b, tm_user): + # Entity.string is an empty string + entity_a.string = " " + response = get_pretranslations(entity_a, locale_b) + assert response == [(" ", None, tm_user)] + + entity_a.string = "\t" + response = get_pretranslations(entity_a, locale_b) + assert response == [("\t", None, tm_user)] + + entity_a.string = "\n" + response = get_pretranslations(entity_a, locale_b) + assert response == [("\n", None, tm_user)] + + @pytest.mark.django_db def test_get_pretranslations_tm_match(entity_a, entity_b, locale_b, tm_user): # 100% TM match exists @@ -145,32 +161,6 @@ def test_get_pretranslations_fluent_empty( assert response == [(pretranslated_string, None, gt_user)] -@pytest.mark.django_db -def test_get_pretranslations_fluent_whitespace( - fluent_resource, google_translate_locale, tm_user -): - # Various types of whitespace should be preserved - input_string = dedent( - """ - whitespace = - { $count -> - [0] { "" } - [1] { " " } - *[other] { "\t" } { "\n" } - } - """ - ) - fluent_entity = EntityFactory(resource=fluent_resource, string=input_string) - - output_string = input_string - - # Re-serialize to match whitespace - pretranslated_string = serializer.serialize_entry(parser.parse_entry(output_string)) - - response = get_pretranslations(fluent_entity, google_translate_locale) - assert response == [(pretranslated_string, None, tm_user)] - - @patch("pontoon.pretranslation.pretranslate.get_google_translate_data") @pytest.mark.django_db def test_get_pretranslations_fluent_accesskeys_no_attribute_source(