Merge pull request #145 from VeriFIT/regex_union

Multiple membership heuristic improvement
VeriFIT · May 22, 2024 · 4bcf006 · 4bcf006
2 parents e30c1c6 + 11a800e
commit 4bcf006
Show file tree

Hide file tree

Showing 2 changed files with 56 additions and 15 deletions.
diff --git a/src/smt/theory_str_noodler/theory_str_noodler.h b/src/smt/theory_str_noodler/theory_str_noodler.h
@@ -375,6 +375,12 @@ namespace smt::noodler {
          * We then only need to compute intersection from the regular languages and check if it is not empty.
          * The heuristics sorts the regexes by expected complexity of computing nfa, and iteratively computes
          * the intersection, so that if the formula is unsat, we do not need to build all automata.
+         * Furthermore, for all regexes that should be complemented, we compute their union and then check
+         * the inclusion with the intersection from the previous step, i.e., we have:
+         * L_1 \cap ... \cap L_m \cap \neg L_{m+1} \cap ... \cap \neg L_n = L \cap \neg (L_{m+1} \cup ... \cup L_n)
+         *                                                                = L \cap \neg L'
+         * where L = L_1 \cap ... \cap L_m, and L' = L_{m+1} \cup ... \cup L_n.
+         * We then want to check if L \cap \neg L' is empty (unsat), which is the same as asking if L is subset of L'.
          */
         bool is_mult_membership_suitable();
 

diff --git a/src/smt/theory_str_noodler/theory_str_noodler_final_check.cpp b/src/smt/theory_str_noodler/theory_str_noodler_final_check.cpp
@@ -608,36 +608,71 @@ namespace smt::noodler {
                 return ((!l.first && r.first) | (regex::get_loop_sum(l.second, m_util_s) < regex::get_loop_sum(r.second, m_util_s)));
             });
             STRACE("str-mult-memb-heur",
-                tout << "Sorted NFAs for var " << var << std::endl;
+                tout << "Sorted regexes for var " << var << std::endl;
                 unsigned i = 0;
-                for (const auto & [is_complement, nfa] : list_of_regexes) {
-                    tout << i << " (" << (is_complement ? "" : "not ") <<"complemented):" << std::endl;
-                    tout << nfa << std::endl;
+                for (const auto & [is_complement, reg] : list_of_regexes) {
+                    tout << i << " (should " << (is_complement ? "" : "NOT ") <<"be complemented):" << mk_pp(reg, m) << std::endl;
                 }
             );
 
-            std::shared_ptr<mata::nfa::Nfa> intersection = nullptr; // we save the intersected automata here
+            std::vector<app*> list_of_normal_regs;
+            std::vector<app*> list_of_compl_regs;
             for (auto& [is_complement, reg] : list_of_regexes) {
-                STRACE("str", tout << "building intersection for var " << var << " and regex " << mk_pp(reg, m) << (is_complement ? " that needs to be first complemented" : " that does not need to be first complemented") << std::endl;);
+                if (is_complement) {
+                    list_of_compl_regs.push_back(reg);
+                } else {
+                    list_of_normal_regs.push_back(reg);
+                }
+            }
 
-                std::shared_ptr<mata::nfa::Nfa> nfa = std::make_shared<mata::nfa::Nfa>(regex::conv_to_nfa(reg, m_util_s, m, alph, is_complement, is_complement));
+            // Compute intersection L of all regexes that should not be complemented
+            mata::nfa::Nfa intersection(1, {0}, {0});
+            // initalize to universal automaton
+            for (const mata::Symbol& symb : alph.get_alphabet()) {
+                intersection.delta.add(0, symb, 0);
+            }
 
-                if (intersection == nullptr) {
-                    intersection = nfa; // this is first nfa
-                } else {
-                    intersection = std::make_shared<mata::nfa::Nfa>(mata::nfa::reduce(mata::nfa::intersection(*nfa, *intersection)));
+            bool first = true;
+            for (auto& reg : list_of_normal_regs) {
+                intersection = mata::nfa::intersection(regex::conv_to_nfa(reg, m_util_s, m, alph, false, false), intersection);
+                if (!first // for first iteration we won't do reduction, as it would just be done twice, once in conv_to_nfa and once here
+                    && intersection.num_of_states() < regex::RED_BOUND)
+                {
+                    intersection = mata::nfa::reduce(intersection);
                 }
-                nfa = nullptr;
-
-                if (intersection->is_lang_empty()) {
+                first = false;
+                if (intersection.is_lang_empty()) {
                     STRACE("str", tout << "intersection is empty => UNSAT" << std::endl;);
                     block_curr_len(expr_ref(this->m.mk_false(), this->m));
                     return l_false;
                 }
             }
+
+            // Compute union L' of all regexes that should be complemented (we are using de Morgan)
+            mata::nfa::Nfa unionn; // initialize to empty automaton
+            first = true;
+            for (auto& reg : list_of_compl_regs) {
+                unionn = mata::nfa::uni(regex::conv_to_nfa(reg, m_util_s, m, alph, false, false), unionn);
+                if (!first // for first iteration we won't do reduction, as it would just be done twice, once in conv_to_nfa and once here
+                    && unionn.num_of_states() < regex::RED_BOUND)
+                {
+                    unionn = mata::nfa::reduce(unionn);
+                }
+                first = false;
+            }
+
+            STRACE("str-mult-memb-heur", tout << "computing inclusion" << std::endl;);
+
+            // We want to know if L \intersect \neg L' is empty, which is same as asking if L is subset of L'
+            if (mata::nfa::is_included(intersection, unionn)) {
+                // if inclusion holds, the intersection is empty => UNSAT
+                STRACE("str", tout << "inclusion holds => UNSAT" << std::endl;);
+                block_curr_len(expr_ref(this->m.mk_false(), this->m));
+                return l_false;
+            }
         }
 
-        STRACE("str", tout << "intersection is not empty => SAT" << std::endl;);
+        STRACE("str", tout << "inclusion holds for all vars => SAT" << std::endl;);
         return l_true;
     }