Skip to content

Commit

Permalink
Fix Greek numerals logic; add test strings.
Browse files Browse the repository at this point in the history
  • Loading branch information
scossu committed Jul 8, 2024
1 parent f74b1ba commit 7f1c33f
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 17 deletions.
70 changes: 54 additions & 16 deletions scriptshifter/hooks/greek/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,17 @@ def parse_numeral(ctx):
characters mixed with letter characters without a space. Therefore,
"͵ακακαα" would transliterate "1021kaa", and "͵αακαα", "1001kaa".
"""
# Parse thousands.
# Parse ≥1000.
if ctx.src[ctx.cur] == THOUSANDS_PREFIX:
tk = ctx.src[ctx.cur + 1]

try:
ctx.dest_ls.append(str(DIGITS[4][tk]))
# Exception for 2-letter digit.
if ctx.src[ctx.cur + 1: ctx.cur + 3] == "στ":
ctx.dest_ls.append(str(DIGITS[4]["στ"]))
ctx.cur += 1
else:
ctx.dest_ls.append(str(DIGITS[4][tk]))
ctx.cur += 2

except KeyError:
Expand All @@ -104,8 +109,13 @@ def parse_numeral(ctx):
ext[ext_cur] = str(DIGITS[3 - i][ctx.src[ctx.cur]])
ctx.cur += 1
except KeyError:
# If the number char is not in the correct position, pad with 0
continue
# Exception for 2-letter digit.
if i == 2 and ctx.src[ctx.cur: ctx.cur + 2] == "στ":
ext[ext_cur] = "6"
ctx.cur += 2
else:
# If the char is not in the correct position, pad with 0.
continue
finally:
ext_cur += 1
ctx.dest_ls.extend(ext)
Expand All @@ -119,23 +129,51 @@ def parse_numeral(ctx):
# transliterated characters.
if ctx.src[ctx.cur] == NUM_SUFFIX:
# Move back up to 3 positions.
for i in range(1, 4):
cur = ctx.cur - i
offset = 0 # Added offset if στ is found.
parsed = 0 # Parsed numeral to replace the alpha characters.
breakout = False # Break out of i loop.

i = 1 # Current position in the numeral. 1 = units, 2 = tens, etc.
mark_pos = ctx.cur # Mark this position to resume parsing later.
while i < 4:
if breakout:
break
cur = ctx.cur - i - offset
if cur >= 0:
num_tk = ctx.src[cur] # Number to be parsed
if ctx.src[cur] in DIGITS[i]:
# Not yet reached word boundary.
ctx.dest_ls[-i] = str(DIGITS[i][num_tk])
else:
if ctx.src[cur] != " ": # Word boundary.
# Something's wrong.
# Exception for στ. Scan one character farther left.
if ctx.src[cur - 1:cur + 1] == "στ":
num_tk = "στ"
offset = 1
for j in range(i, 4):
i = j
if num_tk in DIGITS[j]:
# Not yet reached word boundary.
parsed += DIGITS[j][num_tk] * 10 ** (j - 1)
break

if num_tk == " " or cur == 0: # Word boundary.
breakout = True
break

# If we got here we tried all positions without finding a
# match. Something's wrong.
if j == 3:
# continue
ctx.warnings.append(
f"Character `{ctx.src[cur] }` at position "
f"Character `{num_tk}` at position "
f"{cur} is not a valid digit character "
f"at place #{4 - i} in a numeral.")

ctx.cur += 1
return CONT # Continue normal parsing.
# ctx.cur += 1 + offset
# return CONT # Continue normal parsing.
i += 1

if parsed > 0:
ctx.dest_ls = (
ctx.dest_ls[:mark_pos - len(str(parsed)) - offset]
+ [str(parsed)])

ctx.cur = mark_pos + 1 # Skip past numeral suffix.

ctx.cur += 1
return CONT
2 changes: 2 additions & 0 deletions scriptshifter/tables/data/greek_classical.yml
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ script_to_roman:
"\u037C": "(."
"\u037D": ".)"
"\u037E": "?\u0333"
";": "?"
"\u037F": "J"
# \u0380 reserved
# \u0381 reserved
Expand Down Expand Up @@ -594,6 +595,7 @@ script_to_roman:
".)\u0333": "\u03FF"
".)": "\u037D"
"?\u0333": "\u037E"
"?": "\u037E"
"\"\u0332": "\u201C"
"\"\u0333": "\u201D"
"'\u0332": "\u2018"
Expand Down
5 changes: 4 additions & 1 deletion tests/data/script_samples/greek.csv
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ greek_classical,ἀΰπνους νύκτας ἴαυον,aypnous nyktas iauon,,
greek_classical,Λητοῦς καὶ Διὸς υἱός,Lētous kai Dios huios,,
greek_classical,ὑϊκὸν πάσχειν,hyikon paschein,,
greek_classical,εἶπε πρὸς τὸν ἄνδρα τὸν ἑωυτῆς,eipe pros ton andra ton heōutēs,,
greek_classical,τί τοῦδ’ ἂν εὕρημ’ ηὗρον εὐτυχέστερον;,ti toud’ an heurēm’ hēuron eutychesteron,,
greek_classical,τί τοῦδ’ ἂν εὕρημ’ ηὗρον εὐτυχέστερον;,ti toud’ an heurēm’ hēuron eutychesteron?,,
greek_classical,Τοῦ Κατὰ πασῶν αἱρέσεων ἐλέγχου βιβλίον αʹ,Tou Kata pasōn haireseōn elenchou biblion 1,,
greek_classical,καλὸν κἀγαθόν,kalon kagathon,,
greek_classical,ᾤχοντο θοἰμάτιον λαβόντες μου,ōchonto thoimation labontes mou,,
Expand All @@ -21,6 +21,9 @@ greek_classical,ἄλαϲτα δὲ ϝέργα πάθον κακὰ μηϲαμέ
greek_classical,Δαμαρέτα τ’ ἐρατά τε Ϝιανθεμίϲ,Damareta t’ erata te Wianthemis,,
greek_classical,ξένϝος,xenwos,,
greek_classical,Πάτροϙλος,Patroḳlos,,
greek_classical,"λβʹ. Ἐπεὶ δὲ ἡ τύχη κράτιστον ἐπὶ πάντα τὰ ἀνθρώπεια, μηδὲ Ἡλιόδωρος ἀπαξιούσθω σοφιστῶν κύκλου παράδοξον ἀγώνισμα τύχης γενόμενος·","32. Epei de ē tychi kratiston epi panta ta anthrōpeia, mide Hēliodōros apaxiousthō sophistōn kyklou paradoxon agōnisma tychis genomenos",,
greek_classical,"κζʹ. Μὴ δεύτερα τῶν προειρημένων σοφιστῶν μηδὲ Ἱππόδρομόν τις ἡγείσθω τὸν Θετταλόν, τῶν μὲν γὰρ βελτίων φαίνεται, τῶν δὲ οὐκ οἶδα ὅ τι λείπεται","27. Mē deutera tōn proeirēmenōn sophistōn mide Ippodromon tis ēgeisthō ton Thettalon, tōn men gar beltiōn phainetai, tōn de ouk oida o ti leipetai",,
greek_classical,"ιγʹ. Πῶλον δὲ τὸν Ἀκραγαντῖνον Γοργίας σοφιστὴν ἐξεμελέτησε πολλῶν, ὥς φασι, χρημάτων, καὶ γὰρ δὴ καὶ τῶν πλουτούντων ὁ Πῶλος.","13. Pōlon de ton Akragantinon Gorgias sophistēn exemeletēse pollōn, ōs phasi, chrēmatōn, kai gar dē kai tōn ploutountōn o Pōlos",,
greek_modern,"Ἐτήσια ἔκθεσις / Κυπριακὴ Δημοκρατία, Ὑπουργεῖον Ἐργασίας καὶ Κοινωνικῶν Ἀσφαλίσεων","Etēsia ekthesis / Kypriakē Dēmokratia, Hypourgeion Ergasias kai Koinōnikōn Asphaliseōn",,
greek_modern,"Ετήσια έκθεση / Κυπριακή Δημοκρατία, Υπουργείο Εργασίας και Κοινωνικών Ασφαλίσεων","Etēsia ekthesē / Kypriakē Dēmokratia, Hypourgeio Ergasias kai Koinōnikōn Asphaliseōn",,
greek_modern,Ελληνικό Ίδρυμα Ευρωπαϊκής και Εξωτερικής Πολιτικής,Hellēniko Hidryma Eurōpaikēs kai Exōterikēs Politikēs,,
Expand Down

0 comments on commit 7f1c33f

Please sign in to comment.