-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlexer.cpp
81 lines (73 loc) · 2.05 KB
/
lexer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#include <string>
#include "compression_map.cpp"
#define LEXER
using namespace std;
class CaptitalizationProcesor
{
protected:
tuple<string, bool> static remove_capitalization(string word)
{
string new_word = "";
for (int i = 0; i < word.length(); i++)
{
new_word += tolower(word[i]);
}
return make_tuple(new_word, word != new_word);
}
};
class PunctuationProcessor
{
protected:
tuple<string, char> static remove_punctuation(string word)
{
string new_word = "";
for (int i = 0; i < word.length(); i++)
{
if (isalpha(word[i]))
{
new_word += word[i];
}
}
return make_tuple(new_word, word.length() > new_word.length() ? word[word.length() - 1] : '\0');
}
};
class Lexer : public CaptitalizationProcesor, public PunctuationProcessor
{
public:
/**
* @brief Normalizes a given string by removing capitalization and punctuation.
*
* @param word The string to be normalized.
* @return The normalized string.
*/
std::tuple<std::string, CompressionType, char> static normalize_string(std::string word)
{
// Remove punctuation
tuple<string, char> punctuation_result = remove_punctuation(word);
string punctuation_removed_word = get<0>(punctuation_result);
char punctuation = get<1>(punctuation_result);
// Remove capitalization
tuple<string, bool> capitalization_result = remove_capitalization(punctuation_removed_word);
string capitalization_removed_word = get<0>(capitalization_result);
bool capitalization = get<1>(capitalization_result);
// Determine compression type
CompressionType compression_type = NONE;
if (punctuation != '\0' && capitalization)
{
compression_type = BOTH;
}
else if (capitalization)
{
compression_type = CAPITALIZATION;
}
else if (punctuation != '\0')
{
compression_type = PUNCTUATION;
}
else if (word.length() < 3 && word.length() > 0)
{
compression_type = INELIGIBLE;
}
return make_tuple(capitalization_removed_word, compression_type, punctuation);
}
};