From f62a8888f50d4ac135c177ae07a78560fea79f34 Mon Sep 17 00:00:00 2001 From: Luke Mewburn Date: Fri, 29 Dec 2023 23:34:25 +1100 Subject: [PATCH] reimplement find_bm() using std::search Use std::search() to implement find_bm() instead of using a local implementation of Boyer-Moore. Avoids integer overflow reported in issue #31 and PR #31. Should fix build problem in issue #7. std::search is also faster for the test program in issue #31 on a system with an Intel Xeon E-2224 CPU: - gcc 8.5, find_bm(): 3.16s - g++ 8.5, std::search: 2.40s - g++ 13, std::search: 2.16s Experiments using the C++17 std::boyer_moore_searcher or std::boyer_moore_horspool_searcher were also slower than std::search in this experiment. --- mimetic/utils.h | 37 ++----------------------------------- 1 file changed, 2 insertions(+), 35 deletions(-) diff --git a/mimetic/utils.h b/mimetic/utils.h index 2c2497c..5290b56 100644 --- a/mimetic/utils.h +++ b/mimetic/utils.h @@ -6,6 +6,7 @@ ***************************************************************************/ #ifndef _MIMETIC_UTILS_H_ #define _MIMETIC_UTILS_H_ +#include #include #include #include @@ -43,37 +44,6 @@ int str2int(const std::string& s); /// returns a string hexadecimal representation of \p n std::string int2hex(unsigned int n); -// find_bm specialization for random access iterators -template -Iterator find_bm(Iterator bit, Iterator eit, const std::string& word, const std::random_access_iterator_tag&) -{ - int bLen = word.length(); - const char* pWord = word.c_str(); - int i, t, shift[256]; - unsigned char c; - - for(i = 0; i < 256; ++i) - shift[i] = bLen; - - for(i = 0; i < bLen; ++i) - shift[ (unsigned char) pWord[i] ] = bLen -i - 1; - - for(i = t = bLen-1; t >= 0; --i, --t) - { - if((bit + i) >= eit) - return eit; - - while((c = *(bit + i)) != pWord[t]) - { - i += std::max(bLen-t, shift[c]); - if((bit + i) >= eit) return eit; - t = bLen-1; - } - } - - return bit + i + 1; -} - // boyer-moore find /** * find the first occurrence of \p word in (\p bit, \p eit] @@ -84,12 +54,9 @@ Iterator find_bm(Iterator bit, Iterator eit, const std::string& word, const std: template Iterator find_bm(Iterator bit, Iterator eit, const std::string& word) { - return find_bm(bit, eit, word, - typename std::iterator_traits::iterator_category()); + return std::search(bit, eit, word.begin(), word.end()); } - - } // ns utils }