-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtest_ngram.py
24 lines (18 loc) · 853 Bytes
/
test_ngram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import unittest
from corpus import *
from lm import LanguageModel
class TestCorpus(unittest.TestCase):
def test_punctuation_remove_string(self):
result = tokenize("[to john], Hey come (here)")
self.assertEqual(result, ["[to","john]",",","hey","come","here"])
def test_contraction_string(self):
result = tokenize("I'd say you'd do it, won't you?")
self.assertEqual(result, ["i","would","say","you","would","do","it",",","will","not","you","?"])
def test_punctuation_capitalization(self):
result = detokenize(["to","john",".","hey","come","here"])
self.assertEqual(result, "To john. Hey come here")
def test_ngram(self):
result = LanguageModel(2).get_ngrams(["hello", "world","lmao"])
self.assertEqual(result, [(None, 'hello'), ('hello', 'world'), ('world', 'lmao'), ('lmao', None)])
if __name__ == "__main__":
unittest.main()