-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpo_excel_translate.py
510 lines (400 loc) · 18 KB
/
po_excel_translate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
import os
import sys
import time
import click
import polib
import openpyxl
from typing import List
from pathlib import Path
from enum import Enum, unique
from collections import OrderedDict
from openpyxl.styles import Font, Alignment, Protection
from openpyxl.utils import get_column_letter
from openpyxl.cell import WriteOnlyCell
class ColumnHeaders:
message_context = "Message context"
message_id = "Message id"
comment_source = "Source comment"
comment_translator = "Translator comment"
comment_references = "References"
@unique
class CommentType(Enum):
# NONE = "None"
SOURCE = "Extracted"
TRANSLATOR = "Translator"
REFERENCES = "References"
ALL = "All"
def __str__(self):
return self.value
@classmethod
def get_all(cls):
return list(cls)
class PortableObjectFile:
""" Represents a po file """
def __init__(self, file_path, locale=None, encoding="utf-8"):
self.file_path = str(file_path)
self.po_file = None
self.locale = locale
# Convert
if not os.path.exists(self.file_path) and ":" in self.file_path:
# The user passed a <locale>:<path> value
self.locale, self.file_path = self.file_path.split(":", 1)
self.file_path = Path(self.file_path).resolve()
self.po_file = polib.pofile(self.file_path, encoding=encoding)
else:
self.file_path = Path(self.file_path).resolve()
self.po_file = polib.pofile(self.file_path, encoding=encoding)
# Fallback to metadata
if not self.locale:
self.locale = self.po_file.metadata.get("Language")
# Fallback to filename without extension
if not self.locale:
self.locale = self.file_path.stem
def has_any_message_context(self):
return any(m.msgctxt for m in self.po_file)
class PortableObjectFileToXLSX:
"""
Convert .PO files to an XLSX file.
po-to-xls tries to guess the locale for PO files by looking at the
"Language" key in the PO metadata, falling back to the filename. You
can also specify the locale manually by adding prefixing the filename
with "<locale>:". For example: "nl:locales/nl/mydomain.po".
"""
def __init__(
self,
po_files: List[PortableObjectFile],
comment_types: List[CommentType],
output_file_path: Path,
width_message_context: int = 20,
width_message_id: int = 80,
width_message_translation: int = 80,
width_comments: int = 50,
wrap_message_id: bool = True,
wrap_comments: bool = False,
wrap_message_translation: bool = True,
always_write_message_context: bool = False,
lock_sheet: bool = False,
font_regular_name: str = "Verdana",
font_regular_size: int = 11,
):
"""
message_context = namespace, is optional
message_id = source string to translate
"""
self.po_files = po_files
self.output_file_path = output_file_path
self.comment_types = comment_types
# Widths should be in range [0, 200]
self.width_message_context = width_message_context
self.width_message_id = width_message_id
self.width_message_translation = width_message_translation
self.width_comments = width_comments
# Wrap options
self.wrap_message_id = wrap_message_id
self.wrap_comments = wrap_comments
self.wrap_message_translation = wrap_message_translation
# Should we lock some cells for protection
self.lock_sheet = lock_sheet
self.unlock_message_locale = self.lock_sheet
self.always_write_message_context = always_write_message_context
self.has_message_context = False
self.has_comment_references = False
self.has_comment_source = False
self.has_comment_translator = False
# Has message context/namespace/group name
if self.always_write_message_context:
self.has_message_context = True
else:
for po_file in self.po_files:
if self.has_message_context:
break
self.has_message_context = self.has_message_context or po_file.has_any_message_context()
# Fonts
self.font_regular_name = font_regular_name
self.font_regular_size = font_regular_size
self.font_regular = Font(name=self.font_regular_name, size=self.font_regular_size)
self.font_regular_bold = Font(name=self.font_regular_name, size=self.font_regular_size, bold=True)
self.font_fuzzy = Font(italic=True, bold=True)
# AlignmentAlignment
self.alignment_wrap_text = Alignment(wrap_text=True)
self.alignment_shrink_to_fit = Alignment(shrink_to_fit=True)
# NOTE: using optimized mode
self.work_book = openpyxl.Workbook(write_only=True)
self.work_sheet = self.work_book.create_sheet(title="Translations")
self.column_names = self.get_column_names()
# NOTE: if we are not using optimized mode we should move this
self.apply_style()
self.write_columns_header()
self.write_body()
self.save()
def get_column_names(self):
columns = []
if self.has_message_context:
columns.append(ColumnHeaders.message_context)
columns.append(ColumnHeaders.message_id)
# Headers
if CommentType.REFERENCES in self.comment_types or CommentType.ALL in self.comment_types:
self.has_comment_references = True
columns.append(ColumnHeaders.comment_references)
if CommentType.SOURCE in self.comment_types or CommentType.ALL in self.comment_types:
self.has_comment_source = True
columns.append(ColumnHeaders.comment_source)
if CommentType.TRANSLATOR in self.comment_types or CommentType.ALL in self.comment_types:
self.has_comment_translator = True
columns.append(ColumnHeaders.comment_translator)
# The languages headers
for f in self.po_files:
columns.append(f.locale)
return columns
# NOTE: excel uses 1 base indexing
def get_column_index_message_context(self) -> int:
return self.column_names.index(ColumnHeaders.message_context) + 1
def get_column_index_message_id(self) -> int:
return self.column_names.index(ColumnHeaders.message_id) + 1
def get_columns_indices_comments(self) -> List[int]:
indices = []
# index is 1 based
if self.has_comment_references:
indices.append(self.column_names.index(ColumnHeaders.comment_references) + 1)
if self.has_comment_source:
indices.append(self.column_names.index(ColumnHeaders.comment_source) + 1)
if self.has_comment_translator:
indices.append(self.column_names.index(ColumnHeaders.comment_translator) + 1)
return indices
def get_column_indices_locales(self) -> List[int]:
indices = []
for f in self.po_files:
try:
indices.append(self.column_names.index(f.locale) + 1)
except ValueError:
# Locale does not exist
pass
return indices
def get_column_letter_message_context(self) -> str:
return get_column_letter(self.get_column_index_message_context())
def get_column_letter_message_id(self):
return get_column_letter(self.get_column_index_message_id())
def get_column_message_context(self):
return self.work_sheet.column_dimensions[self.get_column_letter_message_context()]
def get_column_message_id(self):
return self.work_sheet.column_dimensions[self.get_column_letter_message_id()]
def apply_style(self):
# NOTE: Because we are using optimized mode we must set these before writing anything
# https://openpyxl.readthedocs.io/en/stable/optimized.html
# Reference: https://automatetheboringstuff.com/chapter12/
# Lock
if self.lock_sheet:
self.work_sheet.protection.sheet = True
#
# Set sizes
#
# Message context and id
if self.has_message_context:
column_message_context = self.get_column_message_context()
column_message_context.width = self.width_message_context
column_message_id = self.get_column_message_id()
column_message_id.width = self.width_message_id
# Comments
for i in self.get_columns_indices_comments():
self.work_sheet.column_dimensions[get_column_letter(i)].width = self.width_comments
# Translations, set the width the same as the message id, as that is the source string
for i in self.get_column_indices_locales():
self.work_sheet.column_dimensions[get_column_letter(i)].width = self.width_message_translation
# Freeze the first row
self.work_sheet.freeze_panes = "A2"
# Freeze the first 2 columns
self.work_sheet.freeze_panes = "C2"
# Set fonts extend to the right + 5
for i in range(len(self.column_names) + 5):
# index is 1 based
self.work_sheet.column_dimensions[get_column_letter(i + 1)].font = self.font_regular
def get_cell(self, value, wrap=False, shrink_to_fit=False, bold=False, unlock=False) -> WriteOnlyCell:
cell = WriteOnlyCell(self.work_sheet, value=value)
if bold:
cell.font = self.font_regular_bold
else:
cell.font = self.font_regular
if wrap:
cell.alignment = self.alignment_wrap_text
elif shrink_to_fit:
cell.alignment = self.alignment_shrink_to_fit
if unlock:
cell.protection = Protection(locked=False)
return cell
def write_columns_header(self):
row = []
for name in self.column_names:
row.append(self.get_cell(name, bold=True))
self.work_sheet.append(row)
def write_body(self):
# Collect the messages
messages = []
seen = set()
for f in self.po_files:
for msg in f.po_file:
# Has message
if not msg.msgid or msg.obsolete:
continue
if (msg.msgid, msg.msgctxt) not in seen:
messages.append((msg.msgid, msg.msgctxt))
seen.add((msg.msgid, msg.msgctxt))
# used to write the first columns
reference_po_file = self.po_files[0].po_file
# The rest of the rows
for msgid, msgctxt in messages:
row = []
# Message context
if self.has_message_context:
row.append(self.get_cell(msgctxt))
# Message id
row.append(self.get_cell(msgid, wrap=self.wrap_message_id))
msg = reference_po_file.find(msgid, msgctxt=msgctxt)
# Metadata comment columns
if self.has_comment_references:
data = []
if msg is not None:
for (entry, lineno) in msg.occurrences:
if lineno:
data.append("%s:%s" % (entry, lineno))
else:
data.append(entry)
if data:
row.append(
self.get_cell(", ".join(data), wrap=self.wrap_comments, shrink_to_fit=not self.wrap_comments)
)
else:
row.append(self.get_cell(None, wrap=self.wrap_comments, shrink_to_fit=not self.wrap_comments))
if self.has_comment_source:
data = None
if msg is not None:
data = msg.comment
row.append(self.get_cell(data, wrap=self.wrap_comments, shrink_to_fit=not self.wrap_comments))
if self.has_comment_translator:
data = None
if msg is not None:
data = msg.tcomment
row.append(self.get_cell(data, wrap=self.wrap_comments, shrink_to_fit=not self.wrap_comments))
# Write the language rows, aka strings to translate
for f in self.po_files:
po_file = f.po_file
msg = po_file.find(msgid, msgctxt=msgctxt)
if msg is None:
row.append(
self.get_cell(None, wrap=self.wrap_message_translation, unlock=self.unlock_message_locale)
)
elif "fuzzy" in msg.flags:
# Weird case
cell = WriteOnlyCell(self.work_sheet, value=msg.msgstr)
cell.font = self.font_fuzzy
row.append(cell)
else:
# Normal case
row.append(
self.get_cell(msg.msgstr, wrap=self.wrap_message_translation, unlock=self.unlock_message_locale)
)
self.work_sheet.append(row)
def save(self):
self.work_book.save(str(self.output_file_path))
class XLSXToPortableObjectFile:
"""
Convert an locale from a XLSX file to a .PO file
"""
def __init__(
self,
locale: str,
input_file_path: Path,
output_file_path: Path,
wrap_width: int = 240,
copy_metadata_from_target: bool = True,
encoding="utf-8",
):
self.input_file_path = input_file_path
self.output_file_path = output_file_path
self.copy_metadata_from_target = copy_metadata_from_target
self.book = openpyxl.load_workbook(input_file_path)
# Already has file?
existing_po_file = None
if output_file_path.exists():
existing_po_file = polib.pofile(output_file_path, encoding=encoding)
self.po_file = polib.POFile(wrap_width=wrap_width, encoding=encoding)
self.po_file.header = "This file was generated from %s" % input_file_path
self.po_file.metadata_is_fuzzy = False
self.po_file.metadata = OrderedDict()
# Copy metadata
if copy_metadata_from_target and existing_po_file:
self.po_file.metadata = existing_po_file.metadata
# self.po_file.merge(existing_po_file)
root_dir = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(root_dir, "VERSION")) as version_file:
version = version_file.read().strip()
self.po_file.metadata["PO-Revision-Date"] = self.po_timestamp(input_file_path)
self.po_file.metadata["Content-Type"] = "text/plain; charset=UTF-8"
self.po_file.metadata["Content-Transfer-Encoding"] = "8bit"
self.po_file.metadata["Language"] = locale
self.po_file.metadata["Generated-By"] = f"xls2po {version}"
# Make metadata ordered it ordered
self.po_file.metadata = OrderedDict(self.po_file.metadata)
# Transfer data
for sheet in self.book.worksheets:
if sheet.max_row < 2:
continue
print("Processing sheet %s" % sheet.title)
row_iterator = sheet.iter_rows()
headers = [c.value for c in next(row_iterator)]
headers = dict((b, a) for (a, b) in enumerate(headers))
message_context_column_index = headers.get(ColumnHeaders.message_context)
message_id_column_index = headers.get(ColumnHeaders.message_id)
comment_translator_column_index = headers.get(ColumnHeaders.comment_translator)
comment_references_column_index = headers.get(ColumnHeaders.comment_references)
comment_column_index = headers.get(ColumnHeaders.comment_source)
message_locale_column_index = headers.get(locale)
if message_id_column_index is None:
print('Could not find a "%s" column' % ColumnHeaders.message_id, err=True)
continue
if message_locale_column_index is None:
print('Could not find a "%s" column' % locale, err=True)
continue
# Process each value
for row_index, row in enumerate(row_iterator):
row = [c.value for c in row]
if not row[message_id_column_index]:
continue
try:
msgid = row[message_id_column_index]
# Special case, sometimes this is identified as empty object?
msgstr = row[message_locale_column_index]
# Empty string most likely
if msgstr is None:
msgstr = ""
# Type different than default
if not isinstance(msgstr, str):
print(f"[WARNING][row={row_index}] key={msgid} got value of type = {type(msgstr)}")
entry = polib.POEntry(msgid=str(msgid), msgstr=str(msgstr) or "")
if message_context_column_index is not None and row[message_context_column_index]:
entry.msgctxt = str(row[message_context_column_index])
if comment_translator_column_index:
entry.tcomment = str(row[comment_translator_column_index])
if comment_column_index:
entry.comment = str(row[comment_column_index])
if comment_references_column_index:
entry.occurrences = str(row[comment_references_column_index])
self.po_file.append(entry)
except IndexError:
print("Row %s is too short" % row)
if not self.po_file:
sys.exit("No messages found, aborting", 1)
self.save()
def po_timestamp(self, filename):
local = time.localtime(os.stat(filename).st_mtime)
offset = -(time.altzone if local.tm_isdst else time.timezone)
return "%s%s%s" % (
time.strftime("%Y-%m-%d %H:%M", local),
"-" if offset < 0 else "+",
time.strftime("%H%M", time.gmtime(abs(offset))),
)
def save(self):
"""
Save catalog to a PO file.
"""
self.po_file.save(str(self.output_file_path))
# self.output_file.write(str(self.po_file))