-
Notifications
You must be signed in to change notification settings - Fork 4
/
p2gFuntion.py
69 lines (66 loc) · 2.2 KB
/
p2gFuntion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os, json
from collections import defaultdict
def load_p2g():
p2g = defaultdict(list)
with open('simple_p2g.json','r') as f:
p2g = json.load(f)
#print(p2g)
return p2g
def p2g_simple(text):
p2g = load_p2g()
text = text.replace(' ','_<sp>_').split('_')
gtext = []
#vowel = ['i','is','i']
for i,p in enumerate(text):
if p == '<sp>':
gtext.append('<sp>')
elif p == '':
continue
else:
gs = p2g[p]
g = ''
#print(gs)
if len(gs) == 1:
g = gs[0]
else:
#print('len > 1')
if p in ['g','ng','c']:
#print(p)
#print('in [c g ng]')
if text[i-1] == '<sp>' or i == 0:
#print('after sp')
#print(text[i+1])
if text[i+1][0] in ['i','e']:
#print('in i e ee')
if p == 'g':
g = 'gh'
elif p == 'ng':
g = 'ngh'
elif p == 'c':
g = 'k'
else:
#print('not i e ee')
if p == 'g':
g = 'g'
elif p == 'ng':
g = 'ng'
elif p == 'c':
#print('ccc')
g = 'c'
else:
#print('not begin word')
if p == 'g':
g = 'g'
elif p == 'ng':
g = 'ng'
elif p == 'c':
g = 'c'
else:
g = gs[0]
gtext.append(g)
gtext = ''.join(gtext).replace('<sp>',' ')
return gtext
phoneme = 'dd_oof_ng ng_ieej_p t_aay b_aor nh_aj_c c_uar t_ooi ch_ir th_oas_ng ng_e'
grapheme = p2g_simple(phoneme)
print("phoneme:",phoneme)
print("grapheme:",grapheme)