diff --git a/scriptshifter/hooks/greek/__init__.py b/scriptshifter/hooks/greek/__init__.py index b0c0b10..f098375 100644 --- a/scriptshifter/hooks/greek/__init__.py +++ b/scriptshifter/hooks/greek/__init__.py @@ -77,12 +77,17 @@ def parse_numeral(ctx): characters mixed with letter characters without a space. Therefore, "͵ακακαα" would transliterate "1021kaa", and "͵αακαα", "1001kaa". """ - # Parse thousands. + # Parse ≥1000. if ctx.src[ctx.cur] == THOUSANDS_PREFIX: tk = ctx.src[ctx.cur + 1] try: - ctx.dest_ls.append(str(DIGITS[4][tk])) + # Exception for 2-letter digit. + if ctx.src[ctx.cur + 1: ctx.cur + 3] == "στ": + ctx.dest_ls.append(str(DIGITS[4]["στ"])) + ctx.cur += 1 + else: + ctx.dest_ls.append(str(DIGITS[4][tk])) ctx.cur += 2 except KeyError: @@ -104,8 +109,13 @@ def parse_numeral(ctx): ext[ext_cur] = str(DIGITS[3 - i][ctx.src[ctx.cur]]) ctx.cur += 1 except KeyError: - # If the number char is not in the correct position, pad with 0 - continue + # Exception for 2-letter digit. + if i == 2 and ctx.src[ctx.cur: ctx.cur + 2] == "στ": + ext[ext_cur] = "6" + ctx.cur += 2 + else: + # If the char is not in the correct position, pad with 0. + continue finally: ext_cur += 1 ctx.dest_ls.extend(ext) @@ -119,23 +129,51 @@ def parse_numeral(ctx): # transliterated characters. if ctx.src[ctx.cur] == NUM_SUFFIX: # Move back up to 3 positions. - for i in range(1, 4): - cur = ctx.cur - i + offset = 0 # Added offset if στ is found. + parsed = 0 # Parsed numeral to replace the alpha characters. + breakout = False # Break out of i loop. + + i = 1 # Current position in the numeral. 1 = units, 2 = tens, etc. + mark_pos = ctx.cur # Mark this position to resume parsing later. + while i < 4: + if breakout: + break + cur = ctx.cur - i - offset if cur >= 0: num_tk = ctx.src[cur] # Number to be parsed - if ctx.src[cur] in DIGITS[i]: - # Not yet reached word boundary. - ctx.dest_ls[-i] = str(DIGITS[i][num_tk]) - else: - if ctx.src[cur] != " ": # Word boundary. - # Something's wrong. + # Exception for στ. Scan one character farther left. + if ctx.src[cur - 1:cur + 1] == "στ": + num_tk = "στ" + offset = 1 + for j in range(i, 4): + i = j + if num_tk in DIGITS[j]: + # Not yet reached word boundary. + parsed += DIGITS[j][num_tk] * 10 ** (j - 1) + break + + if num_tk == " " or cur == 0: # Word boundary. + breakout = True + break + + # If we got here we tried all positions without finding a + # match. Something's wrong. + if j == 3: + # continue ctx.warnings.append( - f"Character `{ctx.src[cur] }` at position " + f"Character `{num_tk}` at position " f"{cur} is not a valid digit character " f"at place #{4 - i} in a numeral.") - ctx.cur += 1 - return CONT # Continue normal parsing. + # ctx.cur += 1 + offset + # return CONT # Continue normal parsing. + i += 1 + + if parsed > 0: + ctx.dest_ls = ( + ctx.dest_ls[:mark_pos - len(str(parsed)) - offset] + + [str(parsed)]) + + ctx.cur = mark_pos + 1 # Skip past numeral suffix. - ctx.cur += 1 return CONT diff --git a/scriptshifter/tables/data/greek_classical.yml b/scriptshifter/tables/data/greek_classical.yml index 68a8be4..15507f2 100644 --- a/scriptshifter/tables/data/greek_classical.yml +++ b/scriptshifter/tables/data/greek_classical.yml @@ -344,6 +344,7 @@ script_to_roman: "\u037C": "(." "\u037D": ".)" "\u037E": "?\u0333" + ";": "?" "\u037F": "J" # \u0380 reserved # \u0381 reserved @@ -594,6 +595,7 @@ script_to_roman: ".)\u0333": "\u03FF" ".)": "\u037D" "?\u0333": "\u037E" + "?": "\u037E" "\"\u0332": "\u201C" "\"\u0333": "\u201D" "'\u0332": "\u2018" diff --git a/tests/data/script_samples/greek.csv b/tests/data/script_samples/greek.csv index e5223de..e189653 100644 --- a/tests/data/script_samples/greek.csv +++ b/tests/data/script_samples/greek.csv @@ -10,7 +10,7 @@ greek_classical,ἀΰπνους νύκτας ἴαυον,aypnous nyktas iauon,, greek_classical,Λητοῦς καὶ Διὸς υἱός,Lētous kai Dios huios,, greek_classical,ὑϊκὸν πάσχειν,hyikon paschein,, greek_classical,εἶπε πρὸς τὸν ἄνδρα τὸν ἑωυτῆς,eipe pros ton andra ton heōutēs,, -greek_classical,τί τοῦδ’ ἂν εὕρημ’ ηὗρον εὐτυχέστερον;,ti toud’ an heurēm’ hēuron eutychesteron,, +greek_classical,τί τοῦδ’ ἂν εὕρημ’ ηὗρον εὐτυχέστερον;,ti toud’ an heurēm’ hēuron eutychesteron?,, greek_classical,Τοῦ Κατὰ πασῶν αἱρέσεων ἐλέγχου βιβλίον αʹ,Tou Kata pasōn haireseōn elenchou biblion 1,, greek_classical,καλὸν κἀγαθόν,kalon kagathon,, greek_classical,ᾤχοντο θοἰμάτιον λαβόντες μου,ōchonto thoimation labontes mou,, @@ -21,6 +21,9 @@ greek_classical,ἄλαϲτα δὲ ϝέργα πάθον κακὰ μηϲαμέ greek_classical,Δαμαρέτα τ’ ἐρατά τε Ϝιανθεμίϲ,Damareta t’ erata te Wianthemis,, greek_classical,ξένϝος,xenwos,, greek_classical,Πάτροϙλος,Patroḳlos,, +greek_classical,"λβʹ. Ἐπεὶ δὲ ἡ τύχη κράτιστον ἐπὶ πάντα τὰ ἀνθρώπεια, μηδὲ Ἡλιόδωρος ἀπαξιούσθω σοφιστῶν κύκλου παράδοξον ἀγώνισμα τύχης γενόμενος·","32. Epei de ē tychi kratiston epi panta ta anthrōpeia, mide Hēliodōros apaxiousthō sophistōn kyklou paradoxon agōnisma tychis genomenos",, +greek_classical,"κζʹ. Μὴ δεύτερα τῶν προειρημένων σοφιστῶν μηδὲ Ἱππόδρομόν τις ἡγείσθω τὸν Θετταλόν, τῶν μὲν γὰρ βελτίων φαίνεται, τῶν δὲ οὐκ οἶδα ὅ τι λείπεται","27. Mē deutera tōn proeirēmenōn sophistōn mide Ippodromon tis ēgeisthō ton Thettalon, tōn men gar beltiōn phainetai, tōn de ouk oida o ti leipetai",, +greek_classical,"ιγʹ. Πῶλον δὲ τὸν Ἀκραγαντῖνον Γοργίας σοφιστὴν ἐξεμελέτησε πολλῶν, ὥς φασι, χρημάτων, καὶ γὰρ δὴ καὶ τῶν πλουτούντων ὁ Πῶλος.","13. Pōlon de ton Akragantinon Gorgias sophistēn exemeletēse pollōn, ōs phasi, chrēmatōn, kai gar dē kai tōn ploutountōn o Pōlos",, greek_modern,"Ἐτήσια ἔκθεσις / Κυπριακὴ Δημοκρατία, Ὑπουργεῖον Ἐργασίας καὶ Κοινωνικῶν Ἀσφαλίσεων","Etēsia ekthesis / Kypriakē Dēmokratia, Hypourgeion Ergasias kai Koinōnikōn Asphaliseōn",, greek_modern,"Ετήσια έκθεση / Κυπριακή Δημοκρατία, Υπουργείο Εργασίας και Κοινωνικών Ασφαλίσεων","Etēsia ekthesē / Kypriakē Dēmokratia, Hypourgeio Ergasias kai Koinōnikōn Asphaliseōn",, greek_modern,Ελληνικό Ίδρυμα Ευρωπαϊκής και Εξωτερικής Πολιτικής,Hellēniko Hidryma Eurōpaikēs kai Exōterikēs Politikēs,,