-
Notifications
You must be signed in to change notification settings - Fork 0
/
regex.py
31 lines (22 loc) · 1.22 KB
/
regex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import re
from os import path
from .grammar import Grammar
# NOTE: hyphen is not part of the word
my_path = path.abspath(path.dirname(__file__))
yml_path = path.join(my_path, 'listing.yml')
_g = Grammar()
_g.load(yml_path)
_g.putr('unknown_char', '[^{consonant_with_nuktak_raw}{consonant_raw}{nuktak}{matra_raw}{halanta}{joiner}{non_joiner}{vowel_raw}{nasal_raw}{ohm}{avagraha}{dot}{laghav}{number_raw}^]')
_g.putr('not_word', r'{unknown_char}+')
_g.putr('boundary', r'({unknown_char})\1*')
_g.putr('full_consonant', r'(?:{consonant}{nuktak}?|{consonant_with_nuktak})')
_g.putr('half_consonant', r'(?:{full_consonant}{halanta}[{joiner}{non_joiner}]?)')
_g.putr('vowel_syllable', r'(?:{vowel}{nasal}?)')
_g.putr('half_consonant_syllable', r'(?:{half_consonant}+)')
_g.putr('consonant_syllable', r'(?:{half_consonant_syllable}?{full_consonant}{matra}?{nasal}?)')
_g.putr('syllable', r'(?:(?:{consonant_syllable}|{vowel_syllable}){avagraha}*)')
_g.putr('word', '(?:[{dot}]?{syllable}+{half_consonant_syllable}?[{laghav}{dot}]?)')
_g.putr('pragmatic_word', r'(?:{word}|{number}+{word}?|{ohm})')
not_word_re = re.compile(_g.get('not_word'))
boundary_re = re.compile(_g.get('boundary'))
word_re = re.compile('^' + _g.get('pragmatic_word') + '$')