From 977f0021481a565c08bd4f61d9c8f2b64e21dbc8 Mon Sep 17 00:00:00 2001 From: James Yean Date: Tue, 29 Oct 2024 19:32:39 +0800 Subject: [PATCH] feat: optimize memory footprint Signed-off-by: James Yean --- CMakeLists.txt | 10 +++-- actrie/matcher.py | 68 +++++++++++++++++++++++++------ actrie/src/wrap.c | 33 +++++++++++---- deps/alib | 2 +- include/actrie/config.h | 26 ++++++++++++ include/{ => actrie}/matcher.h | 14 +++++++ include/{ => actrie}/utf8ctx.h | 4 +- include/{ => actrie}/utf8helper.h | 0 jni/src/main/cpp/wrap.cpp | 4 +- ruff.toml | 1 + src/config.c | 30 ++++++++++++++ src/matcher.c | 38 ++++++++++++++--- src/reglet/expr/ambi.h | 6 +-- src/reglet/expr/anto.h | 6 +-- src/reglet/expr/dist.h | 6 +-- src/reglet/expr/expr0.h | 6 +-- src/reglet/expr/pass.h | 6 +-- src/reglet/expr/text.h | 6 +-- src/trie/acdat.c | 28 ++++++++++--- src/utf8ctx.c | 2 +- src/utf8helper.c | 2 +- tests/CMakeLists.txt | 2 +- tests/test_matcher.c | 4 +- 23 files changed, 241 insertions(+), 63 deletions(-) create mode 100644 include/actrie/config.h rename include/{ => actrie}/matcher.h (67%) rename include/{ => actrie}/utf8ctx.h (92%) rename include/{ => actrie}/utf8helper.h (100%) create mode 100644 ruff.toml create mode 100644 src/config.c diff --git a/CMakeLists.txt b/CMakeLists.txt index df36fde..b79fc7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.5) project(actrie) set(CMAKE_EXECUTABLE_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/bin) @@ -40,9 +40,10 @@ if(GENERATE_LRTABLE) endif() set(actrie_HEADER_FILES - include/matcher.h - include/utf8ctx.h - include/utf8helper.h + include/actrie/config.h + include/actrie/matcher.h + include/actrie/utf8ctx.h + include/actrie/utf8helper.h src/vocab.h src/pattern.h src/parser/lr_reduce.h @@ -74,6 +75,7 @@ set(actrie_SOURCE_FILES src/reglet/expr/dist.c src/trie/actrie.c src/trie/acdat.c + src/config.c src/matcher.c src/utf8ctx.c src/utf8helper.c) diff --git a/actrie/matcher.py b/actrie/matcher.py index 6df84b4..ec42772 100644 --- a/actrie/matcher.py +++ b/actrie/matcher.py @@ -16,49 +16,83 @@ def __del__(self): _actrie.Destruct(self._matcher) def load_from_file( - self, path, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True + self, + path, + all_as_plain=False, + ignore_bad_pattern=False, + bad_as_plain=True, + deduplicate_extra=True, + extra_hint=None, ): if self._matcher: raise MatcherError("Matcher is already initialized.") if not os.path.isfile(path): return False self._matcher = _actrie.ConstructByFile( - convert2pass(path), all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra + convert2pass(path), all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra, extra_hint ) return self._matcher != 0 @classmethod def create_by_file( - cls, path, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True + cls, + path, + all_as_plain=False, + ignore_bad_pattern=False, + bad_as_plain=True, + deduplicate_extra=True, + extra_hint=None, ): matcher = cls() - if matcher.load_from_file(path, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra): + if matcher.load_from_file( + path, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra, extra_hint=extra_hint + ): return matcher return None def load_from_string( - self, keywords, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True + self, + keywords, + all_as_plain=False, + ignore_bad_pattern=False, + bad_as_plain=True, + deduplicate_extra=True, + extra_hint=None, ): if self._matcher: raise MatcherError("Matcher is already initialized.") if keywords is None: return False self._matcher = _actrie.ConstructByString( - convert2pass(keywords), all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra + convert2pass(keywords), all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra, extra_hint ) return self._matcher != 0 @classmethod def create_by_string( - cls, keywords, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True + cls, + keywords, + all_as_plain=False, + ignore_bad_pattern=False, + bad_as_plain=True, + deduplicate_extra=True, + extra_hint=None, ): matcher = cls() - if matcher.load_from_string(keywords, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra): + if matcher.load_from_string( + keywords, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra, extra_hint=extra_hint + ): return matcher return None def load_from_collection( - self, keywords, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True + self, + keywords, + all_as_plain=False, + ignore_bad_pattern=False, + bad_as_plain=True, + deduplicate_extra=True, + extra_hint=None, ): if self._matcher: raise MatcherError("Matcher is already initialized.") @@ -68,14 +102,24 @@ def load_from_collection( keywords = "\n".join([convert2pass(keyword) for keyword in keywords if convert2pass(keyword)]) else: raise MatcherError("Keywords should be list or set.") - return self.load_from_string(keywords, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra) + return self.load_from_string( + keywords, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra, extra_hint=extra_hint + ) @classmethod def create_by_collection( - cls, keywords, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True + cls, + keywords, + all_as_plain=False, + ignore_bad_pattern=False, + bad_as_plain=True, + deduplicate_extra=True, + extra_hint=None, ): matcher = cls() - if matcher.load_from_collection(keywords, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra): + if matcher.load_from_collection( + keywords, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra, extra_hint=extra_hint + ): return matcher return None diff --git a/actrie/src/wrap.c b/actrie/src/wrap.c index afb66ef..f33beb2 100644 --- a/actrie/src/wrap.c +++ b/actrie/src/wrap.c @@ -20,7 +20,7 @@ #define PY_SSIZE_T_CLEAN #include -#include "utf8ctx.h" +#include #if PY_MAJOR_VERSION >= 3 #define IS_PY3K @@ -32,11 +32,20 @@ PyObject* wrap_construct_by_file(PyObject* dummy, PyObject* args) { PyObject* ignore_bad_pattern; PyObject* bad_as_plain; PyObject* deduplicate_extra; + PyObject* extra_hint; matcher_t matcher = NULL; - if (PyArg_ParseTuple(args, "sOOOO", &path, &all_as_plain, &ignore_bad_pattern, &bad_as_plain, &deduplicate_extra)) { - matcher = matcher_construct_by_file(path, PyObject_IsTrue(all_as_plain), PyObject_IsTrue(ignore_bad_pattern), - PyObject_IsTrue(bad_as_plain), PyObject_IsTrue(deduplicate_extra)); + if (PyArg_ParseTuple(args, "sOOOOO", &path, &all_as_plain, &ignore_bad_pattern, &bad_as_plain, &deduplicate_extra, + &extra_hint)) { + if (extra_hint != Py_None) { + segarray_config_s extra_store_config = hint_segarray(PyLong_AsSize_t(extra_hint)); + matcher = matcher_construct_by_file_ext(path, PyObject_IsTrue(all_as_plain), PyObject_IsTrue(ignore_bad_pattern), + PyObject_IsTrue(bad_as_plain), PyObject_IsTrue(deduplicate_extra), + &extra_store_config); + } else { + matcher = matcher_construct_by_file(path, PyObject_IsTrue(all_as_plain), PyObject_IsTrue(ignore_bad_pattern), + PyObject_IsTrue(bad_as_plain), PyObject_IsTrue(deduplicate_extra)); + } } return Py_BuildValue("K", matcher); @@ -49,13 +58,21 @@ PyObject* wrap_construct_by_string(PyObject* dummy, PyObject* args) { PyObject* ignore_bad_pattern; PyObject* bad_as_plain; PyObject* deduplicate_extra; + PyObject* extra_hint; matcher_t matcher = NULL; - if (PyArg_ParseTuple(args, "s#OOOO", &string, &length, &all_as_plain, &ignore_bad_pattern, &bad_as_plain, - &deduplicate_extra)) { + if (PyArg_ParseTuple(args, "s#OOOOO", &string, &length, &all_as_plain, &ignore_bad_pattern, &bad_as_plain, + &deduplicate_extra, &extra_hint)) { strlen_s vocab = {.ptr = (char*)string, .len = (size_t)length}; - matcher = matcher_construct_by_string(&vocab, PyObject_IsTrue(all_as_plain), PyObject_IsTrue(ignore_bad_pattern), - PyObject_IsTrue(bad_as_plain), PyObject_IsTrue(deduplicate_extra)); + if (extra_hint != Py_None) { + segarray_config_s extra_store_config = hint_segarray(PyLong_AsSize_t(extra_hint)); + matcher = matcher_construct_by_string_ext(&vocab, PyObject_IsTrue(all_as_plain), + PyObject_IsTrue(ignore_bad_pattern), PyObject_IsTrue(bad_as_plain), + PyObject_IsTrue(deduplicate_extra), &extra_store_config); + } else { + matcher = matcher_construct_by_string(&vocab, PyObject_IsTrue(all_as_plain), PyObject_IsTrue(ignore_bad_pattern), + PyObject_IsTrue(bad_as_plain), PyObject_IsTrue(deduplicate_extra)); + } } return Py_BuildValue("K", matcher); diff --git a/deps/alib b/deps/alib index ce45239..9e8c7dc 160000 --- a/deps/alib +++ b/deps/alib @@ -1 +1 @@ -Subproject commit ce452393f90647f6f59c18fe3a51d38ec5110d77 +Subproject commit 9e8c7dc0b68a33e91e021e1969fee8409c14a251 diff --git a/include/actrie/config.h b/include/actrie/config.h new file mode 100644 index 0000000..f402d69 --- /dev/null +++ b/include/actrie/config.h @@ -0,0 +1,26 @@ +/** + * config.h + * + * @author James Yin + */ +#ifndef __ACTRIE_CONFIG_H__ +#define __ACTRIE_CONFIG_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +typedef struct _segarray_config_ { + size_t seg_blen; + size_t region_size; +} segarray_config_s, *segarray_config_t; + +segarray_config_s hint_segarray(size_t len); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif // __ACTRIE_CONFIG_H__ diff --git a/include/matcher.h b/include/actrie/matcher.h similarity index 67% rename from include/matcher.h rename to include/actrie/matcher.h index 71081e5..0dda009 100644 --- a/include/matcher.h +++ b/include/actrie/matcher.h @@ -8,6 +8,8 @@ #include +#include + #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ @@ -26,11 +28,23 @@ matcher_t matcher_construct_by_file(const char* path, bool ignore_bad_pattern, bool bad_as_plain, bool deduplicate_extra); +matcher_t matcher_construct_by_file_ext(const char* path, + bool all_as_plain, + bool ignore_bad_pattern, + bool bad_as_plain, + bool deduplicate_extra, + segarray_config_t extra_store_config); matcher_t matcher_construct_by_string(strlen_t string, bool all_as_plain, bool ignore_bad_pattern, bool bad_as_plain, bool deduplicate_extra); +matcher_t matcher_construct_by_string_ext(strlen_t string, + bool all_as_plain, + bool ignore_bad_pattern, + bool bad_as_plain, + bool deduplicate_extra, + segarray_config_t extra_store_config); void matcher_destruct(matcher_t matcher); context_t matcher_alloc_context(matcher_t matcher); diff --git a/include/utf8ctx.h b/include/actrie/utf8ctx.h similarity index 92% rename from include/utf8ctx.h rename to include/actrie/utf8ctx.h index 37771e5..5e14023 100644 --- a/include/utf8ctx.h +++ b/include/actrie/utf8ctx.h @@ -6,8 +6,8 @@ #ifndef __ACTRIE_UTF8POS_H__ #define __ACTRIE_UTF8POS_H__ -#include -#include +#include +#include #ifdef __cplusplus extern "C" { diff --git a/include/utf8helper.h b/include/actrie/utf8helper.h similarity index 100% rename from include/utf8helper.h rename to include/actrie/utf8helper.h diff --git a/jni/src/main/cpp/wrap.cpp b/jni/src/main/cpp/wrap.cpp index f61dffb..22b0687 100644 --- a/jni/src/main/cpp/wrap.cpp +++ b/jni/src/main/cpp/wrap.cpp @@ -4,8 +4,8 @@ #include #include -#include -#include +#include +#include /* * Class: psn_ifplusor_actrie_Matcher diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..f11cf63 --- /dev/null +++ b/ruff.toml @@ -0,0 +1 @@ +line-length = 120 diff --git a/src/config.c b/src/config.c new file mode 100644 index 0000000..795c961 --- /dev/null +++ b/src/config.c @@ -0,0 +1,30 @@ +/** + * matcher.c + * + * @author James Yin + */ +#include "actrie/config.h" + +#include + +segarray_config_s hint_segarray(size_t len) { + segarray_config_s config = {.seg_blen = 0}; + + size_t hint = len >> 3; + while (hint != 0) { + hint >>= 1; + config.seg_blen++; + } + if (config.seg_blen < 10) { + config.seg_blen = 10; + } + + config.region_size = (len >> config.seg_blen) + 1; + if (config.region_size < 8) { + config.region_size = 8; + } + + fprintf(stderr, "seg_blen: %zu, region_size: %zu\n", config.seg_blen, config.region_size); + + return config; +} diff --git a/src/matcher.c b/src/matcher.c index 7f7ffc8..83a83ff 100644 --- a/src/matcher.c +++ b/src/matcher.c @@ -3,7 +3,7 @@ * * @author James Yin */ -#include "matcher.h" +#include "actrie/matcher.h" #include "parser/parser.h" #include "reglet/engine.h" @@ -13,7 +13,7 @@ typedef struct _actrie_matcher_ { dat_t datrie; reglet_t reglet; - segarray_t extra_store; + segarray_t extra_store; // keep reference of extra strings. } matcher_s; static matcher_t matcher_alloc() { @@ -71,17 +71,22 @@ static matcher_t matcher_construct(vocab_t vocab, bool all_as_plain, bool ignore_bad_pattern, bool bad_as_plain, - bool deduplicate_extra) { + bool deduplicate_extra, + segarray_config_t extra_store_config) { trie_t extra_trie = deduplicate_extra ? trie_alloc() : NULL; // create matcher matcher_t matcher = matcher_alloc(); - matcher->extra_store = segarray_construct_with_type(dstr_t); + matcher->extra_store = + extra_store_config == NULL + ? segarray_construct_with_type(dstr_t) + : segarray_construct_with_type_ext(dstr_t, extra_store_config->seg_blen, extra_store_config->region_size); matcher->reglet = reglet_construct(); // load vocabulary add_pattern_params_s add_pattern_args = {.matcher = matcher, .extra_trie = extra_trie}; if (!parse_vocab(vocab, add_pattern_to_matcher, &add_pattern_args, all_as_plain, ignore_bad_pattern, bad_as_plain)) { + // release trie manually trie_free(matcher->reglet->trie, (trie_node_free_f)expr_list_free); matcher->reglet->trie = NULL; matcher_destruct(matcher); @@ -110,12 +115,22 @@ matcher_t matcher_construct_by_file(const char* path, bool ignore_bad_pattern, bool bad_as_plain, bool deduplicate_extra) { + return matcher_construct_by_file_ext(path, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra, NULL); +} + +matcher_t matcher_construct_by_file_ext(const char* path, + bool all_as_plain, + bool ignore_bad_pattern, + bool bad_as_plain, + bool deduplicate_extra, + segarray_config_t extra_store_config) { vocab_t vocab = vocab_construct(stream_type_file, (void*)path); if (vocab == NULL) { return NULL; } - matcher_t matcher = matcher_construct(vocab, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra); + matcher_t matcher = + matcher_construct(vocab, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra, extra_store_config); vocab_destruct(vocab); return matcher; } @@ -125,8 +140,19 @@ matcher_t matcher_construct_by_string(strlen_t string, bool ignore_bad_pattern, bool bad_as_plain, bool deduplicate_extra) { + return matcher_construct_by_string_ext(string, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra, + NULL); +} + +matcher_t matcher_construct_by_string_ext(strlen_t string, + bool all_as_plain, + bool ignore_bad_pattern, + bool bad_as_plain, + bool deduplicate_extra, + segarray_config_t extra_store_config) { vocab_t vocab = vocab_construct(stream_type_string, string); - matcher_t matcher = matcher_construct(vocab, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra); + matcher_t matcher = + matcher_construct(vocab, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra, extra_store_config); vocab_destruct(vocab); return matcher; } diff --git a/src/reglet/expr/ambi.h b/src/reglet/expr/ambi.h index 326d378..1872ece 100644 --- a/src/reglet/expr/ambi.h +++ b/src/reglet/expr/ambi.h @@ -3,8 +3,8 @@ * * @author James Yin */ -#ifndef __ACTRIE_EXPR_AMBI__ -#define __ACTRIE_EXPR_AMBI__ +#ifndef __ACTRIE_EXPR_AMBI_H__ +#define __ACTRIE_EXPR_AMBI_H__ #include "expr0.h" @@ -25,4 +25,4 @@ void expr_feed_ambi_center(expr_t self, pos_cache_t center, reg_ctx_t context); } #endif /* __cplusplus */ -#endif // __ACTRIE_EXPR_AMBI__ +#endif // __ACTRIE_EXPR_AMBI_H__ diff --git a/src/reglet/expr/anto.h b/src/reglet/expr/anto.h index 7af647c..14b342c 100644 --- a/src/reglet/expr/anto.h +++ b/src/reglet/expr/anto.h @@ -3,8 +3,8 @@ * * @author James Yin */ -#ifndef __ACTRIE_EXPR_ANTO__ -#define __ACTRIE_EXPR_ANTO__ +#ifndef __ACTRIE_EXPR_ANTO_H__ +#define __ACTRIE_EXPR_ANTO_H__ #include "expr0.h" @@ -25,4 +25,4 @@ void expr_feed_anto_center(expr_t self, pos_cache_t center, reg_ctx_t context); } #endif /* __cplusplus */ -#endif // __ACTRIE_EXPR_ANTO__ +#endif // __ACTRIE_EXPR_ANTO_H__ diff --git a/src/reglet/expr/dist.h b/src/reglet/expr/dist.h index da994c5..656a40f 100644 --- a/src/reglet/expr/dist.h +++ b/src/reglet/expr/dist.h @@ -3,8 +3,8 @@ * * @author James Yin */ -#ifndef __ACTRIE_EXPR_DIST__ -#define __ACTRIE_EXPR_DIST__ +#ifndef __ACTRIE_EXPR_DIST_H__ +#define __ACTRIE_EXPR_DIST_H__ #include "expr0.h" @@ -29,4 +29,4 @@ void expr_feed_ddist_suffix(expr_t self, pos_cache_t suffix, reg_ctx_t context); } #endif /* __cplusplus */ -#endif // __ACTRIE_EXPR_DIST__ +#endif // __ACTRIE_EXPR_DIST_H__ diff --git a/src/reglet/expr/expr0.h b/src/reglet/expr/expr0.h index ad252d9..766029f 100644 --- a/src/reglet/expr/expr0.h +++ b/src/reglet/expr/expr0.h @@ -3,8 +3,8 @@ * * @author James Yin */ -#ifndef __ACTRIE_EXPR_FEED_H__ -#define __ACTRIE_EXPR_FEED_H__ +#ifndef __ACTRIE_EXPR_EXPR0_H__ +#define __ACTRIE_EXPR_EXPR0_H__ #include @@ -76,4 +76,4 @@ inline void expr_ctx_init(expr_ctx_t self, expr_t expr, expr_ctx_free_f free, ex } #endif /* __cplusplus */ -#endif // __ACTRIE_EXPR_FEED_H__ +#endif // __ACTRIE_EXPR_EXPR0_H__ diff --git a/src/reglet/expr/pass.h b/src/reglet/expr/pass.h index 2f5e43c..8a4aa4d 100644 --- a/src/reglet/expr/pass.h +++ b/src/reglet/expr/pass.h @@ -3,8 +3,8 @@ * * @author James Yin */ -#ifndef __ACTRIE_EXPR_PASS__ -#define __ACTRIE_EXPR_PASS__ +#ifndef __ACTRIE_EXPR_PASS_H__ +#define __ACTRIE_EXPR_PASS_H__ #include "expr0.h" @@ -24,4 +24,4 @@ void expr_feed_pass(expr_t self, pos_cache_t keyword, reg_ctx_t context); } #endif /* __cplusplus */ -#endif // __ACTRIE_EXPR_PASS__ +#endif // __ACTRIE_EXPR_PASS_H__ diff --git a/src/reglet/expr/text.h b/src/reglet/expr/text.h index 7c771c7..8a0eaae 100644 --- a/src/reglet/expr/text.h +++ b/src/reglet/expr/text.h @@ -3,8 +3,8 @@ * * @author James Yin */ -#ifndef __ACTRIE_EXPR_TEXT__ -#define __ACTRIE_EXPR_TEXT__ +#ifndef __ACTRIE_EXPR_TEXT_H__ +#define __ACTRIE_EXPR_TEXT_H__ #include "expr0.h" @@ -25,4 +25,4 @@ void expr_feed_text(expr_t self, pos_cache_t keyword, void* context); } #endif /* __cplusplus */ -#endif // __ACTRIE_EXPR_TEXT__ +#endif // __ACTRIE_EXPR_TEXT_H__ diff --git a/src/trie/acdat.c b/src/trie/acdat.c index d26fe62..d6131f6 100644 --- a/src/trie/acdat.c +++ b/src/trie/acdat.c @@ -8,6 +8,8 @@ #include #include +#include + /* Trie 内部接口,仅限 Double-Array Trie 使用 */ size_t trie_size(trie_t self); @@ -197,9 +199,21 @@ static void dat_post_construct(dat_t self, trie_t origin) { pDatNode->failed.ptr = dat_access_node(self, pDatNode->failed.idx); } + // 回溯优化 if (self->enable_automation) { - // 回溯优化 - self->value_array = segarray_construct(sizeof(dat_value_s), NULL, NULL); + size_t value_len = 0; + for (size_t index = 0; index < len; index++) { // bfs + trie_node_t pNode = trie_access_node(origin, index); + dat_node_t pDatNode = dat_access_node(self, pNode->trie_datidx); + if (pDatNode->value.raw != NULL) { + value_len++; + } + } + + segarray_config_s value_config = hint_segarray(value_len); + self->value_array = + segarray_construct_ext(sizeof(dat_value_s), value_config.seg_blen, value_config.region_size, NULL, NULL); + for (size_t index = 0; index < len; index++) { // bfs trie_node_t pNode = trie_access_node(origin, index); dat_node_t pDatNode = dat_access_node(self, pNode->trie_datidx); @@ -219,7 +233,7 @@ static void dat_post_construct(dat_t self, trie_t origin) { } } -dat_t dat_alloc() { +dat_t dat_alloc(segarray_config_t config) { dat_t datrie = (dat_t)amalloc(sizeof(dat_s)); if (datrie == NULL) { return NULL; @@ -231,7 +245,9 @@ dat_t dat_alloc() { dat_node_s dummy_node = {0}; datrie->_sentinel = &dummy_node; - datrie->node_array = segarray_construct(sizeof(dat_node_s), dat_init_segment, datrie); + datrie->node_array = config == NULL ? segarray_construct(sizeof(dat_node_s), dat_init_segment, datrie) + : segarray_construct_ext(sizeof(dat_node_s), config->seg_blen, + config->region_size, dat_init_segment, datrie); segarray_extend(datrie->node_array, DAT_ROOT_IDX + 2); datrie->_sentinel = dat_access_node(datrie, 0); @@ -314,7 +330,9 @@ void dat_destruct(dat_t dat, dat_node_free_f node_free_func) { } dat_t dat_construct_by_trie(trie_t origin, bool enable_automation) { - dat_t dat = dat_alloc(); + segarray_config_s node_config = hint_segarray(trie_size(origin)); + + dat_t dat = dat_alloc(&node_config); if (dat == NULL) { return NULL; } diff --git a/src/utf8ctx.c b/src/utf8ctx.c index 9e90ff2..983f350 100644 --- a/src/utf8ctx.c +++ b/src/utf8ctx.c @@ -3,7 +3,7 @@ * * @author James Yin */ -#include "utf8ctx.h" +#include "actrie/utf8ctx.h" utf8ctx_t utf8ctx_alloc_context(matcher_t matcher) { context_t context; diff --git a/src/utf8helper.c b/src/utf8helper.c index 13e07b1..23f1595 100644 --- a/src/utf8helper.c +++ b/src/utf8helper.c @@ -3,7 +3,7 @@ * * @author James Yin */ -#include "utf8helper.h" +#include "actrie/utf8helper.h" #include diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5a437a4..2a51fd2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.5) if(MSVC) add_compile_options(/utf-8) diff --git a/tests/test_matcher.c b/tests/test_matcher.c index 495ce09..3a332e1 100644 --- a/tests/test_matcher.c +++ b/tests/test_matcher.c @@ -3,8 +3,8 @@ * * @author James Yin */ -#include -#include +#include +#include int main() { char* str = "不(好|会)好";