Skip to content

Commit

Permalink
Add ability to upload personal translation memory (mozilla#3452)
Browse files Browse the repository at this point in the history
This is the final step towards Translation Memory Management: ability for team managers and translators to upload .TMX files to their locale's TM. 

Details:
- The maximum file upload size is 20 MB.
- Supported srclang values (read from <header> and <tu> elements) are en, en-US and en_US (in all cases).
- In <tuv> elements, lang or xml:lang value must match the locale code.
- Source-target combinations that already exist in the DB are not imported.

Also included:
* Cancel button that appears while editing or deleting a TM entry is now positioned left to the main button.
* Error messages are now presented as errors (red color), rather than regular messages (green).
  • Loading branch information
mathjazz authored Nov 27, 2024
1 parent 90eaebf commit f751a51
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 7 deletions.
5 changes: 4 additions & 1 deletion pontoon/teams/static/css/translation_memory.css
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@

.button {
background: var(--button-background-1);
margin-left: 5px;

.fa {
margin-right: 2px;
Expand All @@ -132,6 +131,10 @@
background: var(--status-translated);
}

.button.delete {
margin-left: 5px;
}

.button.delete:hover {
background: var(--status-error);
}
Expand Down
40 changes: 37 additions & 3 deletions pontoon/teams/static/js/translation_memory.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ $(function () {
updateURL(); // Update the URL with the new pages count and search query
},
error: function () {
Pontoon.endLoader('Error loading more TM entries.');
Pontoon.endLoader('Error loading more TM entries.', 'error');
loader.each(function () {
$(this).removeClass('loading');
});
Expand Down Expand Up @@ -125,7 +125,7 @@ $(function () {
node.html(new_target);
},
error: function () {
Pontoon.endLoader('Error editing TM entries.');
Pontoon.endLoader('Error editing TM entries.', 'error');
},
complete: function () {
row.removeClass('editing');
Expand Down Expand Up @@ -160,12 +160,46 @@ $(function () {
}, 500);
},
error: function () {
Pontoon.endLoader('Error deleting TM entries.');
Pontoon.endLoader('Error deleting TM entries.', 'error');
},
complete: function () {
row.removeClass('deleting');
},
});
},
);

// Upload TM entries
$('body').on('click', '.translation-memory .upload-button', function () {
const fileInput = $('<input type="file" accept=".tmx">');
fileInput.on('change', function () {
const file = this.files[0];
if (!file) {
return;
}

const formData = new FormData();
formData.append('tmx_file', file);
formData.append('csrfmiddlewaretoken', $('body').data('csrf'));

$.ajax({
url: `/${locale}/ajax/translation-memory/upload/`,
method: 'POST',
data: formData,
processData: false,
contentType: false,
success: function (response) {
Pontoon.endLoader(response.message);
},
error: function (xhr) {
Pontoon.endLoader(
xhr.responseJSON?.message ?? 'Error uploading TMX file.',
'error',
);
},
});
});

fileInput.click();
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
<input class="table-filter" type="search" autocomplete="off" value="{{ search_query }}" autofocus
placeholder="Search translation memory">
</div>

<button class="upload-button button">
<span class="fa fa-upload"></span>
Upload .TMX
</button>
</menu>

<table class="translation-memory-list">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
<textarea>{{ entry.target }}</textarea>
</td>
<td class="actions controls">
<button class="cancel button">
<span class="fa fa-times"></span>
</button>
<button class="edit button">
<span class="fa fa-pencil-alt"></span>
Edit
Expand All @@ -34,9 +37,6 @@
<span class="fa fa-trash"></span>
Delete {{ entry.ids|length }} TM entr{{ entry.ids|length|pluralize("y,ies") }}?
</button>
<button class="cancel button">
<span class="fa fa-times"></span>
</button>
</td>
</tr>
{% endfor %}
Expand Down
6 changes: 6 additions & 0 deletions pontoon/teams/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@
views.ajax_translation_memory_delete,
name="pontoon.teams.ajax.translation-memory.delete",
),
# Upload .TMX file
path(
"translation-memory/upload/",
views.ajax_translation_memory_upload,
name="pontoon.teams.ajax.translation-memory.upload",
),
]
),
),
Expand Down
151 changes: 151 additions & 0 deletions pontoon/teams/views.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import json
import logging
import re
import xml.etree.ElementTree as ET

import bleach

Expand Down Expand Up @@ -37,6 +40,9 @@
from pontoon.teams.forms import LocaleRequestForm


log = logging.getLogger(__name__)


def teams(request):
"""List all active localization teams."""
locales = Locale.objects.available().prefetch_related(
Expand Down Expand Up @@ -379,6 +385,151 @@ def ajax_translation_memory_delete(request, locale):
return HttpResponse("ok")


@require_AJAX
@require_POST
@permission_required_or_403("base.can_translate_locale", (Locale, "code", "locale"))
@transaction.atomic
def ajax_translation_memory_upload(request, locale):
"""Upload Translation Memory entries from a .TMX file."""
try:
file = request.FILES["tmx_file"]
except MultiValueDictKeyError:
return JsonResponse(
{"status": False, "message": "No file uploaded."},
status=400,
)

if file.size > 20 * 1024 * 1024:
return JsonResponse(
{
"status": False,
"message": "File size limit exceeded. The maximum allowed size is 20 MB.",
},
status=400,
)

if not file.name.endswith(".tmx"):
return JsonResponse(
{
"status": False,
"message": "Invalid file format. Only .TMX files are supported.",
},
status=400,
)

locale = get_object_or_404(Locale, code=locale)
code = locale.code

# Parse the TMX file
try:
tree = ET.parse(file)
root = tree.getroot()
except ET.ParseError as e:
return JsonResponse(
{"status": False, "message": f"Invalid XML file: {e}"}, status=400
)

# Extract TM entries
file_entries = []
srclang_pattern = re.compile(r"^en(?:[-_](us))?$", re.IGNORECASE)
ns = {"xml": "http://www.w3.org/XML/1998/namespace"}

header = root.find("header")
header_srclang = header.attrib.get("srclang", "") if header else ""

def get_seg_text(tu, lang, ns):
# Try to find <tuv> with the xml:lang attribute
seg = tu.find(f"./tuv[@xml:lang='{lang}']/seg", namespaces=ns)

# If not found, try the lang attribute
if seg is None:
seg = tu.find(f"./tuv[@lang='{lang}']/seg")

return seg.text.strip() if seg is not None and seg.text else None

tu_elements = root.findall(".//tu")
for tu in tu_elements:
try:
srclang = tu.attrib.get("srclang", header_srclang)
tu_str = ET.tostring(tu, encoding="unicode")

if not srclang_pattern.match(srclang):
log.info(f"Skipping <tu> with unsupported srclang: {tu_str}")
continue

source = get_seg_text(tu, srclang, ns)
target = get_seg_text(tu, code, ns)

if source and target:
file_entries.append({"source": source, "target": target})
else:
log.info(f"Skipping <tu> with missing or empty segment: {tu_str}")

except Exception as e:
log.info(f"Error processing <tu>: {e}")

if not file_entries:
return JsonResponse(
{"status": False, "message": "No valid translation entries found."},
status=400,
)

# Create TranslationMemoryEntry objects
tm_entries = [
TranslationMemoryEntry(
source=entry["source"],
target=entry["target"],
locale=locale,
)
for entry in file_entries
]

# Filter out entries that already exist in the database
existing_combinations = set(
TranslationMemoryEntry.objects.filter(locale=locale).values_list(
"source", "target"
)
)
tm_entries_to_create = [
entry
for entry in tm_entries
if (entry.source, entry.target) not in existing_combinations
]

created_entries = TranslationMemoryEntry.objects.bulk_create(
tm_entries_to_create, batch_size=1000
)

log_action(
ActionLog.ActionType.TM_ENTRIES_UPLOADED,
request.user,
tm_entries=created_entries,
)

parsed = len(file_entries)
skipped_on_parse = len(tu_elements) - parsed
imported = len(created_entries)
duplicates = parsed - len(tm_entries_to_create)

message = f"Importing TM entries complete. Imported: {imported}."
if imported == 0:
message = "No TM entries imported."

if duplicates:
message += f" Skipped duplicates: {duplicates}."

return JsonResponse(
{
"status": True,
"message": message,
"parsed": parsed,
"skipped_on_parse": skipped_on_parse,
"imported": imported,
"duplicates": duplicates,
}
)


@login_required(redirect_field_name="", login_url="/403")
@require_POST
def request_item(request, locale=None):
Expand Down

0 comments on commit f751a51

Please sign in to comment.