Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix deepl variable translation issue 276 #290

Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions rosetta/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,44 @@ def test_47_2_deeps_ajax_translation(self):
)
self.assertContains(r, '"Salut tout le monde"')

@vcr.use_cassette(
"fixtures/vcr_cassettes/test_deepl_ajax_translation_with_variables.yaml",
match_on=["method", "scheme", "port", "path", "query", "raw_body"],
record_mode="once",
)
@override_settings(
DEEPL_AUTH_KEY="FAKE",
AZURE_CLIENT_SECRET=None,
)
def test_deepl_ajax_translation_with_variables(self):
cases = {
"de": "Es gibt %(items)d %(name)s verfügbar.",
"it": "Ci sono %(items)d %(name)s disponibili.",
"pt": "Há %(items)d %(name)s disponíveis.",
}
for lang, text in cases.items():
r = self.client.get(
reverse("rosetta.translate_text") + f"?from={lang}&to=en&text={text}"
)
self.assertEqual(
r.json().get("translation"), "There are %(items)d %(name)s available."
)

def test_formating_text_to_and_from_deepl(self):
from ..translate_utils import format_text

samples = [
"Es gibt %(items)d %(name)s verfügbar.",
"Ci sono %(items)d %(name)s disponibili.",
"Há %(items)d %(name)s disponíveis.",
"Stokta %(items)d %(name)s var.",
]
for sample in samples:
to_deepl = format_text(sample, "to_deepl")
from_deepl = format_text(to_deepl, "from_deepl")
back_to_deepl = format_text(from_deepl, "to_deepl")
self.assertEqual(to_deepl, back_to_deepl)

@override_settings(ROSETTA_REQUIRES_AUTH=True)
def test_48_requires_auth_not_respected_issue_203(self):
self.client.logout()
Expand Down
42 changes: 39 additions & 3 deletions rosetta/translate_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import re
import uuid

import requests

from django.conf import settings
Expand Down Expand Up @@ -47,7 +47,41 @@ def translate(text, from_language, to_language):
raise TranslationException("No translation API service is configured.")


def format_text(text, direction="to_deepl"):
halitcelik marked this conversation as resolved.
Show resolved Hide resolved
# TODO find a better name
if direction == "to_deepl":
halitcelik marked this conversation as resolved.
Show resolved Hide resolved
pattern = r"%\((\w+)\)(\w)"

def replace_variable(match):
# Our pattern will always catch 2 groups, the first group being '%('
# Second group being ')d' or ')s'
variable = match.group(1)
type_specifier = match.group(2)
if variable and type_specifier:
halitcelik marked this conversation as resolved.
Show resolved Hide resolved
return f"<var>{variable}</var><type><var>{type_specifier}</var></type>"
else:
raise TranslationException("Badly formatted variable in translation")

return re.sub(pattern, replace_variable, text)
else:
return (
text.replace("<type><var>", "")
.replace("</var></type>", "")
.replace("<var>", "%(")
.replace("</var>", ")")
)


def translate_by_deepl(text, to_language, auth_key):
"""
This method connects to the translator Deepl API and fetches a response with translations.
:param text: The source text to be translated
:param to_language: The target language to translate the text into
Wraps variables in <var></var> tags and instructs Deepl not to translate those.
Then from Deepl response, converts back these tags to django variable syntax.
%(name)s becomes <var>name</var><type><var>s</var></type> and back to %(name) in the response text.
:return: Returns the response from the Deepl as a python object.
"""
if auth_key.lower().endswith(":fx"):
endpoint = "https://api-free.deepl.com"
else:
Expand All @@ -57,16 +91,18 @@ def translate_by_deepl(text, to_language, auth_key):
f"{endpoint}/v2/translate",
headers={"Authorization": f"DeepL-Auth-Key {auth_key}"},
data={
"tag_handling": "xml",
"ignore_tags": "var",
"target_lang": to_language.upper(),
"text": text,
"text": format_text(text, "to_deepl"),
},
)
if r.status_code != 200:
raise TranslationException(
f"Deepl response is {r.status_code}. Please check your API key or try again later."
)
try:
return r.json().get("translations")[0].get("text")
return format_text(r.json().get("translations")[0].get("text"), "from_deepl")
except Exception:
raise TranslationException("Deepl returned a non-JSON or unexpected response.")

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
interactions:
- request:
body: target_lang=FR&text=hello+world
body: tag_handling=xml&ignore_tags=var&target_lang=FR&text=hello+world
headers:
Accept:
- '*/*'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
interactions:
- request:
body: tag_handling=xml&ignore_tags=var&target_lang=EN&text=Es+gibt+%3Cvar%3Eitems%3C%2Fvar%3E%3Ctype%3E%3Cvar%3Ed%3C%2Fvar%3E%3C%2Ftype%3E+%3Cvar%3Ename%3C%2Fvar%3E%3Ctype%3E%3Cvar%3Es%3C%2Fvar%3E%3C%2Ftype%3E+verf%C3%BCgbar.
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Authorization:
- DeepL-Auth-Key FAKE
Connection:
- keep-alive
Content-Length:
- '219'
Content-Type:
- application/x-www-form-urlencoded
User-Agent:
- python-requests/2.32.3
method: POST
uri: https://api-free.deepl.com/v2/translate
response:
body:
string: '{"translations":[{"detected_source_language":"DE","text":"There are
<var>items</var><type><var>d</var></type> <var>name</var><type><var>s</var></type>
available."}]}'
headers:
access-control-allow-origin:
- '*'
access-control-expose-headers:
- Server-Timing, X-Trace-ID
content-type:
- application/json
date:
- Mon, 10 Jun 2024 11:47:17 GMT
server-timing:
- l7_lb_tls;dur=99, l7_lb_idle;dur=4, l7_lb_receive;dur=2, l7_lb_total;dur=165
strict-transport-security:
- max-age=63072000; includeSubDomains; preload
transfer-encoding:
- chunked
vary:
- Accept-Encoding
x-trace-id:
- ceec50b6cca24e9cbeed2bd1bdf4435a
status:
code: 200
message: OK
- request:
body: tag_handling=xml&ignore_tags=var&target_lang=EN&text=Ci+sono+%3Cvar%3Eitems%3C%2Fvar%3E%3Ctype%3E%3Cvar%3Ed%3C%2Fvar%3E%3C%2Ftype%3E+%3Cvar%3Ename%3C%2Fvar%3E%3Ctype%3E%3Cvar%3Es%3C%2Fvar%3E%3C%2Ftype%3E+disponibili.
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Authorization:
- DeepL-Auth-Key FAKE
Connection:
- keep-alive
Content-Length:
- '216'
Content-Type:
- application/x-www-form-urlencoded
User-Agent:
- python-requests/2.32.3
method: POST
uri: https://api-free.deepl.com/v2/translate
response:
body:
string: '{"translations":[{"detected_source_language":"IT","text":"There are
<var>items</var><type><var>d</var></type> <var>name</var><type><var>s</var></type>
available."}]}'
headers:
access-control-allow-origin:
- '*'
access-control-expose-headers:
- Server-Timing, X-Trace-ID
content-type:
- application/json
date:
- Mon, 10 Jun 2024 11:47:17 GMT
server-timing:
- l7_lb_tls;dur=112, l7_lb_idle;dur=0, l7_lb_receive;dur=0, l7_lb_total;dur=215
strict-transport-security:
- max-age=63072000; includeSubDomains; preload
transfer-encoding:
- chunked
vary:
- Accept-Encoding
x-trace-id:
- c40d4791453848babe7024b55da74cba
status:
code: 200
message: OK
- request:
body: tag_handling=xml&ignore_tags=var&target_lang=EN&text=H%C3%A1+%3Cvar%3Eitems%3C%2Fvar%3E%3Ctype%3E%3Cvar%3Ed%3C%2Fvar%3E%3C%2Ftype%3E+%3Cvar%3Ename%3C%2Fvar%3E%3Ctype%3E%3Cvar%3Es%3C%2Fvar%3E%3C%2Ftype%3E+dispon%C3%ADveis.
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Authorization:
- DeepL-Auth-Key FAKE
Connection:
- keep-alive
Content-Length:
- '221'
Content-Type:
- application/x-www-form-urlencoded
User-Agent:
- python-requests/2.32.3
method: POST
uri: https://api-free.deepl.com/v2/translate
response:
body:
string: '{"translations":[{"detected_source_language":"PT","text":"There are
<var>items</var><type><var>d</var></type> <var>name</var><type><var>s</var></type>
available."}]}'
headers:
access-control-allow-origin:
- '*'
access-control-expose-headers:
- Server-Timing, X-Trace-ID
content-type:
- application/json
date:
- Mon, 10 Jun 2024 11:47:17 GMT
server-timing:
- l7_lb_tls;dur=101, l7_lb_idle;dur=4, l7_lb_receive;dur=1, l7_lb_total;dur=216
strict-transport-security:
- max-age=63072000; includeSubDomains; preload
transfer-encoding:
- chunked
vary:
- Accept-Encoding
x-trace-id:
- 7f1111989f574859b3db10f1bafa4efb
status:
code: 200
message: OK
version: 1
Loading