-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_match_finder.py
41 lines (31 loc) · 1.61 KB
/
test_match_finder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import unittest
import os
from code import match_finder
import pandas as pd
# TODO: convert os.path -> pathlib
class Ngram(unittest.TestCase):
def test_ngram_split_for_range_from_string(self):
example = "I really like python, it's pretty awesome."
self.assertEqual(len(match_finder.get_ngram_of_text(example, 2, 4)),
15)
def test_ngram_split_for_range_from_file(self):
test_file = os.path.join("test_data", "mock_text")
self.assertEqual(len(match_finder.get_ngram_of_text(test_file, 2, 4, path_file=True)),
15)
class FuzzySet(unittest.TestCase):
@unittest.skip("long test skipped")
def test_init_fuzzyset_performance(self):
test_file = os.path.join("test_data", "long_medical_text_1.2M_words.txt")
match_finder.init_fuzzy_set_on_ngrams(test_file, 2, 4)
def test_search_in_fuzzyset(self):
test_file = os.path.join("test_data", "mock_text")
test_fuzzyset = match_finder.init_fuzzy_set_on_ngrams(test_file, 2, 4)
test_text = "realy like python"
res = [(0.8947368421052632, 'really like python,')]
self.assertEqual(match_finder.search_in_fuzzy_set(test_fuzzyset, test_text), res)
class Corpus(unittest.TestCase):
def test_get_all_text_file_from_corpus(self):
self.assertEqual(len(match_finder.get_list_of_text_files_in_dir("test_data")), 1)
def test_return_file_stem_from_corpus(self):
self.assertEqual((match_finder.get_list_of_text_files_in_dir("test_data")),
["long_medical_text_1.2M_words"])