Skip to content

Commit

Permalink
Merge pull request #145 from VeriFIT/regex_union
Browse files Browse the repository at this point in the history
Multiple membership heuristic improvement
  • Loading branch information
jurajsic authored May 22, 2024
2 parents e30c1c6 + 11a800e commit 4bcf006
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 15 deletions.
6 changes: 6 additions & 0 deletions src/smt/theory_str_noodler/theory_str_noodler.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,12 @@ namespace smt::noodler {
* We then only need to compute intersection from the regular languages and check if it is not empty.
* The heuristics sorts the regexes by expected complexity of computing nfa, and iteratively computes
* the intersection, so that if the formula is unsat, we do not need to build all automata.
* Furthermore, for all regexes that should be complemented, we compute their union and then check
* the inclusion with the intersection from the previous step, i.e., we have:
* L_1 \cap ... \cap L_m \cap \neg L_{m+1} \cap ... \cap \neg L_n = L \cap \neg (L_{m+1} \cup ... \cup L_n)
* = L \cap \neg L'
* where L = L_1 \cap ... \cap L_m, and L' = L_{m+1} \cup ... \cup L_n.
* We then want to check if L \cap \neg L' is empty (unsat), which is the same as asking if L is subset of L'.
*/
bool is_mult_membership_suitable();

Expand Down
65 changes: 50 additions & 15 deletions src/smt/theory_str_noodler/theory_str_noodler_final_check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -608,36 +608,71 @@ namespace smt::noodler {
return ((!l.first && r.first) | (regex::get_loop_sum(l.second, m_util_s) < regex::get_loop_sum(r.second, m_util_s)));
});
STRACE("str-mult-memb-heur",
tout << "Sorted NFAs for var " << var << std::endl;
tout << "Sorted regexes for var " << var << std::endl;
unsigned i = 0;
for (const auto & [is_complement, nfa] : list_of_regexes) {
tout << i << " (" << (is_complement ? "" : "not ") <<"complemented):" << std::endl;
tout << nfa << std::endl;
for (const auto & [is_complement, reg] : list_of_regexes) {
tout << i << " (should " << (is_complement ? "" : "NOT ") <<"be complemented):" << mk_pp(reg, m) << std::endl;
}
);

std::shared_ptr<mata::nfa::Nfa> intersection = nullptr; // we save the intersected automata here
std::vector<app*> list_of_normal_regs;
std::vector<app*> list_of_compl_regs;
for (auto& [is_complement, reg] : list_of_regexes) {
STRACE("str", tout << "building intersection for var " << var << " and regex " << mk_pp(reg, m) << (is_complement ? " that needs to be first complemented" : " that does not need to be first complemented") << std::endl;);
if (is_complement) {
list_of_compl_regs.push_back(reg);
} else {
list_of_normal_regs.push_back(reg);
}
}

std::shared_ptr<mata::nfa::Nfa> nfa = std::make_shared<mata::nfa::Nfa>(regex::conv_to_nfa(reg, m_util_s, m, alph, is_complement, is_complement));
// Compute intersection L of all regexes that should not be complemented
mata::nfa::Nfa intersection(1, {0}, {0});
// initalize to universal automaton
for (const mata::Symbol& symb : alph.get_alphabet()) {
intersection.delta.add(0, symb, 0);
}

if (intersection == nullptr) {
intersection = nfa; // this is first nfa
} else {
intersection = std::make_shared<mata::nfa::Nfa>(mata::nfa::reduce(mata::nfa::intersection(*nfa, *intersection)));
bool first = true;
for (auto& reg : list_of_normal_regs) {
intersection = mata::nfa::intersection(regex::conv_to_nfa(reg, m_util_s, m, alph, false, false), intersection);
if (!first // for first iteration we won't do reduction, as it would just be done twice, once in conv_to_nfa and once here
&& intersection.num_of_states() < regex::RED_BOUND)
{
intersection = mata::nfa::reduce(intersection);
}
nfa = nullptr;

if (intersection->is_lang_empty()) {
first = false;
if (intersection.is_lang_empty()) {
STRACE("str", tout << "intersection is empty => UNSAT" << std::endl;);
block_curr_len(expr_ref(this->m.mk_false(), this->m));
return l_false;
}
}

// Compute union L' of all regexes that should be complemented (we are using de Morgan)
mata::nfa::Nfa unionn; // initialize to empty automaton
first = true;
for (auto& reg : list_of_compl_regs) {
unionn = mata::nfa::uni(regex::conv_to_nfa(reg, m_util_s, m, alph, false, false), unionn);
if (!first // for first iteration we won't do reduction, as it would just be done twice, once in conv_to_nfa and once here
&& unionn.num_of_states() < regex::RED_BOUND)
{
unionn = mata::nfa::reduce(unionn);
}
first = false;
}

STRACE("str-mult-memb-heur", tout << "computing inclusion" << std::endl;);

// We want to know if L \intersect \neg L' is empty, which is same as asking if L is subset of L'
if (mata::nfa::is_included(intersection, unionn)) {
// if inclusion holds, the intersection is empty => UNSAT
STRACE("str", tout << "inclusion holds => UNSAT" << std::endl;);
block_curr_len(expr_ref(this->m.mk_false(), this->m));
return l_false;
}
}

STRACE("str", tout << "intersection is not empty => SAT" << std::endl;);
STRACE("str", tout << "inclusion holds for all vars => SAT" << std::endl;);
return l_true;
}

Expand Down

0 comments on commit 4bcf006

Please sign in to comment.