Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert Input grammar To LL(1) grammar automatically #36

Merged
merged 16 commits into from
Dec 24, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@ main
.cache
.vscode
.idea/
cmake-build-debug/
cmake-build-debug/
build/

9 changes: 1 addition & 8 deletions CFG.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1 @@
# E= T E`
# E` = '+' T E`
| Epsilon
# T = F T`
# T`= '*' F T`
| Epsilon
# F = '(' E ')'
| 'id'
# A = 'a' 'd' | 'a' | 'a' 'b' | 'a' 'b' 'c' | 'b'
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ set(CMAKE_CXX_STANDARD 14)

include_directories(include)
include_directories(include/Lex)
include_directories(include/Parser)

add_executable(anycc
include/constants.h
Expand Down Expand Up @@ -38,6 +39,9 @@ add_executable(anycc
src/Parser/CellValue.cpp
include/Parser/CellValue.h
include/Parser/ParsingTableEntryType.h
include/Parser/LeftRecursionRemover.h
src/Parser/LeftRecursionRemover.cpp
src/Parser/LeftFactorer.cpp
include/Parser/PredictiveTopDownParser.h
src/Parser/PredictiveTopDownParser.cpp
)
23 changes: 23 additions & 0 deletions include/Parser/LeftFactorer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#ifndef LEFT_FACTORER_H
#define LEFT_FACTORER_H

#include <string>
#include <vector>
#include <unordered_map>
#include <unordered_map>
#include <set>


class LeftFactorer {
public:
static std::unordered_map<std::string, std::vector<std::vector<std::string>>> leftFactor(std::unordered_map<std::string, std::vector<std::vector<std::string>>>& grammar);

private:
static std::pair<std::vector<std::string>, std::set<std::vector<std::string>>> getLongestCommonPrefix(const std::unordered_map<std::string, std::vector<std::vector<std::string>>>& grammar, std::string non_terminal);

static std::string leftFactorProd(std::unordered_map<std::string, std::vector<std::vector<std::string>>>& grammar, std::string non_terminal, size_t factored_count);
Bazina marked this conversation as resolved.
Show resolved Hide resolved

};


#endif // LEFT_FACTORER_H
23 changes: 23 additions & 0 deletions include/Parser/LeftRecursionRemover.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#ifndef LEFT_RECUSRION_REMOVER_H
#define LEFT_RECUSRION_REMOVER_H

#include <vector>
#include <string>
#include <unordered_map>


class LeftRecursionRemover {
Bazina marked this conversation as resolved.
Show resolved Hide resolved
public:

static std::unordered_map<std::string, std::vector<std::vector<std::string>>> removeLR(std::unordered_map<std::string, std::vector<std::vector<std::string>>>& grammar);

private:
static bool isImmediateLR(const std::unordered_map<std::string, std::vector<std::vector<std::string>>>& grammar, std::string non_terminal);

static void removeImmediateLR(std::unordered_map<std::string, std::vector<std::vector<std::string>>>& grammar, std::string non_terminal);

static void substituteRHS(std::unordered_map<std::string, std::vector<std::vector<std::string>>>& grammar, std::string lhs_non_terminal, std::string rhs_non_terminal);
};


#endif // LEFT_RECUSRION_REMOVER_H
4 changes: 2 additions & 2 deletions include/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include <string>

#define EPSILON "Epsilon"
#define START_SYMBOL "E"

#define START_SYMBOL "A"
#define NEW_NT_SYMBOL "`"
MuhammadKotb marked this conversation as resolved.
Show resolved Hide resolved

#endif // CONSTANTS_H
4 changes: 2 additions & 2 deletions src/Lex/Utilities.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "Utilities.h"
#include "Operator.h"
#include "Lex/Utilities.h"
#include "Lex/Operator.h"
#include <regex>

std::string *Utilities::cleanRegex(std::string *input) {
Expand Down
2 changes: 0 additions & 2 deletions src/Parser/FirstAndFollowGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@ FirstAndFollowGenerator::FirstAndFollowGenerator(
for (const auto &entry: grammar) {
const std::string &nonTerminal = entry.first;
const std::vector<std::vector<std::string>> &productions = entry.second;

productionVector.push_back({nonTerminal, productions});
}

nonTerminals = collectNonTerminals(productionVector);
}

Expand Down
109 changes: 109 additions & 0 deletions src/Parser/LeftFactorer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#include "Parser/LeftFactorer.h"
#include "constants.h"
#include <algorithm>
#include <unordered_map>
#include <stack>
#include <map>
#include <iostream>
#include <utility>

std::unordered_map<std::string, std::vector<std::vector<std::string>>> LeftFactorer::leftFactor(std::unordered_map<std::string, std::vector<std::vector<std::string>>>& grammar) {
auto new_grammar = grammar;
std::stack<std::string> to_be_factored;
for(const auto& kv : new_grammar) {
to_be_factored.push(kv.first);
}
while(!to_be_factored.empty()) {
std::string current_non_terminal = to_be_factored.top();
to_be_factored.pop();
size_t factored_count = 1;
std::string new_non_terminal = LeftFactorer::leftFactorProd(new_grammar, current_non_terminal, factored_count);
while(new_non_terminal != current_non_terminal) {
factored_count++;
to_be_factored.push(new_non_terminal);
new_non_terminal = LeftFactorer::leftFactorProd(new_grammar, current_non_terminal, factored_count);
}
}
return new_grammar;
}


std::pair<std::vector<std::string>, std::set<std::vector<std::string>>> LeftFactorer::getLongestCommonPrefix(const std::unordered_map<std::string, std::vector<std::vector<std::string>>>& grammar, std::string non_terminal) {
std::map<std::vector<std::string>, std::set<std::vector<std::string>>> prefix_to_rhs_map;
auto& prods = grammar.at(non_terminal);
size_t max_len = 0;
for(const auto& prod : prods) {
max_len = std::max(prod.size(), max_len);
}
std::vector<std::string> common_prefix;
size_t max_rhs_len_prefix = 0;
for(size_t i = 0; i < max_len; i++) {
for(const auto& prod : prods) {
std::vector<std::string> prefix;
for(size_t j = 0; j <= i && j < prod.size(); j++) {
prefix.push_back(prod[j]);
}
if(!prefix.empty()) {
if(prefix_to_rhs_map.find(prefix) == prefix_to_rhs_map.end()) {
prefix_to_rhs_map[prefix] = std::set<std::vector<std::string>>();
}
prefix_to_rhs_map.at(prefix).insert(prod);
size_t current_rhs_prefix_size = prefix_to_rhs_map.at(prefix).size();
if(current_rhs_prefix_size > 1) {
if(max_rhs_len_prefix < current_rhs_prefix_size) {
common_prefix = prefix;
max_rhs_len_prefix = prefix_to_rhs_map.at(prefix).size();
}
else if(max_rhs_len_prefix == current_rhs_prefix_size) {
if(common_prefix.size() < prefix.size()) {
common_prefix = prefix;
max_rhs_len_prefix = prefix_to_rhs_map.at(prefix).size();
}
}
}
}
}
}
if(common_prefix.empty()) return {};
if(prefix_to_rhs_map.find(common_prefix) == prefix_to_rhs_map.end()) return {};
return std::pair<std::vector<std::string>, std::set<std::vector<std::string>>>(common_prefix, prefix_to_rhs_map.at(common_prefix));
}

std::string LeftFactorer::leftFactorProd(std::unordered_map<std::string, std::vector<std::vector<std::string>>>& grammar, std::string non_terminal, size_t factored_count) {
auto common_prefix_pair = LeftFactorer::getLongestCommonPrefix(grammar, non_terminal);
auto common_prefix = common_prefix_pair.first;
if(common_prefix.empty()) return non_terminal;
auto common_prefix_betas = common_prefix_pair.second;
std::string new_non_terminal = non_terminal;
for(size_t i = 0; i < factored_count; i++) {
new_non_terminal += NEW_NT_SYMBOL;
}
auto old_prods = grammar.at(non_terminal);
std::vector<std::vector<std::string>> new_prods;
new_prods.push_back(std::vector<std::string>());
for(const auto& str : common_prefix) {
new_prods[0].push_back(str);
}
new_prods[0].push_back(new_non_terminal);
for(auto& prod : old_prods) {
if(common_prefix_betas.find(prod) == common_prefix_betas.end()) {
new_prods.push_back(prod);
}
}
grammar.at(non_terminal) = new_prods;
std::vector<std::vector<std::string>> new_non_terminal_prods;
for(const auto& gamma : common_prefix_betas) {
std::vector<std::string> new_prod;
for(size_t j = common_prefix.size(); j < gamma.size(); j++) {
new_prod.push_back(gamma[j]);
}
if(!new_prod.empty()) {
new_non_terminal_prods.push_back(new_prod);
}
else {
new_non_terminal_prods.push_back(std::vector<std::string>{EPSILON});
}
}
grammar[new_non_terminal] = new_non_terminal_prods;
return new_non_terminal;
}
96 changes: 96 additions & 0 deletions src/Parser/LeftRecursionRemover.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#include "Parser/LeftRecursionRemover.h"
#include "constants.h"
#include <algorithm>
#include <iostream>


std::unordered_map<std::string, std::vector<std::vector<std::string>>> LeftRecursionRemover::removeLR(std::unordered_map<std::string, std::vector<std::vector<std::string>>>& grammar) {
std::vector<std::string> prev_non_terminals;
std::vector<std::string> ordered_non_terminals;
auto new_grammar = grammar;
for(auto& kv : new_grammar) {
ordered_non_terminals.push_back(kv.first);
}
std::reverse(ordered_non_terminals.begin(), ordered_non_terminals.end());
for(size_t i = 0; i < ordered_non_terminals.size(); i++) {
for(size_t j = 0; j < i; j++) {
bool replace = false;
for(const auto& prod : new_grammar.at(ordered_non_terminals[j])) {
if(prod[0] == ordered_non_terminals[i]) {
replace = true;
break;
}
}
if(replace) {
LeftRecursionRemover::substituteRHS(new_grammar, ordered_non_terminals[i], ordered_non_terminals[j]);
}
}
if(isImmediateLR(new_grammar, ordered_non_terminals[i])) {
LeftRecursionRemover::removeImmediateLR(new_grammar, ordered_non_terminals[i]);
}
}
return new_grammar;
}

bool LeftRecursionRemover::isImmediateLR(const std::unordered_map<std::string, std::vector<std::vector<std::string>>> &grammar, std::string non_terminal) {
if(grammar.find(non_terminal) == grammar.end()) return false;
const auto& prods = grammar.at(non_terminal);
for(const auto& prod : prods) {
if(prod[0] == non_terminal) return true;
}
return false;
}

void LeftRecursionRemover::removeImmediateLR(std::unordered_map<std::string, std::vector<std::vector<std::string>>>& grammar, std::string non_terminal) {
if(grammar.find(non_terminal) == grammar.end()) return;
if(!isImmediateLR(grammar, non_terminal)) return;
std::string new_non_terminal = non_terminal + NEW_NT_SYMBOL;
const auto& prods = grammar.at(non_terminal);
std::vector<std::vector<std::string>> new_prods;
std::vector<std::vector<std::string>> new_prods_prime;
for(const auto& prod : prods) {
if(prod[0] != non_terminal) {
new_prods.push_back(prod);
}
else {
std::vector<std::string> alpha(prod.begin() + 1, prod.end());
new_prods_prime.push_back(alpha);
}
}
for(size_t i = 0; i < new_prods.size(); i++) {
new_prods[i].push_back(new_non_terminal);
}
for(size_t i = 0; i < new_prods_prime.size(); i++) {
new_prods_prime[i].push_back(new_non_terminal);
}
new_prods_prime.push_back(std::vector<std::string>{EPSILON});
grammar.at(non_terminal) = new_prods;
grammar.insert({new_non_terminal, new_prods_prime});
}

void LeftRecursionRemover::substituteRHS(std::unordered_map<std::string, std::vector<std::vector<std::string>>> &grammar, std::string lhs_non_terminal, std::string rhs_non_terminal) {
auto& src_prods = grammar.at(rhs_non_terminal);
auto& dist_prods = grammar.at(lhs_non_terminal);
std::vector<std::vector<std::string>> new_dist_prods;
for(auto dist_prod : dist_prods) {
for(size_t i = 0; i < src_prods.size(); i++) {
std::vector<std::string> new_dist_prod;
if(std::find(dist_prod.begin(), dist_prod.end(), rhs_non_terminal) == dist_prod.end()) {
new_dist_prods.push_back(dist_prod);
break;
}
for(size_t j = 0; j < dist_prod.size(); j++) {
if(dist_prod[j] == rhs_non_terminal) {
for(auto src_prod : src_prods[i]) {
new_dist_prod.push_back(src_prod);
}
}
else {
new_dist_prod.push_back(dist_prod[j]);
}
}
new_dist_prods.push_back(new_dist_prod);
}
}
grammar.at(lhs_non_terminal) = new_dist_prods;
}
9 changes: 7 additions & 2 deletions src/anycc.cpp
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
#include <map>
#include "Lex/Lex.h"
#include "Parser/FirstAndFollowGenerator.h"
#include "Parser/LeftFactorer.h"
#include "Parser/LeftRecursionRemover.h"
#include "constants.h"
#include <iostream>
#include <set>
#include <unordered_map>
#include <vector>
#include "Utilities.h"
#include "Lex/Utilities.h"
#include "Parser/PredictiveTable.h"
#include "Parser/PredictiveTopDownParser.h"

int main() {
std::unordered_map<std::string, std::vector<std::vector<std::string>>> grammar = Utilities::parseCFGInput(
"../CFG.txt");
FirstAndFollowGenerator firstAndFollowGenerator(grammar);

auto lr_free_grammar = LeftRecursionRemover::removeLR(grammar);
auto left_factored_grammar = LeftFactorer::leftFactor(lr_free_grammar);
FirstAndFollowGenerator firstAndFollowGenerator(left_factored_grammar);
firstAndFollowGenerator.compute();
// Print first_sets
const auto &first_sets = firstAndFollowGenerator.getFirstSets();
Expand Down
Loading