From 6e6f83f5a3e5a4f1e32c2aa339117da7970bd596 Mon Sep 17 00:00:00 2001 From: Carlo Benussi Date: Tue, 18 Jul 2017 15:43:40 +0200 Subject: [PATCH 1/6] added tags extraction for jsgf/fsg --- include/pocketsphinx.h | 18 +++++ src/libpocketsphinx/allphone_search.c | 8 +++ src/libpocketsphinx/allphone_search.h | 5 ++ src/libpocketsphinx/fsg_search.c | 75 ++++++++++++++++++--- src/libpocketsphinx/fsg_search_internal.h | 5 ++ src/libpocketsphinx/kws_search.c | 8 +++ src/libpocketsphinx/kws_search.h | 5 ++ src/libpocketsphinx/ngram_search.c | 9 +++ src/libpocketsphinx/phone_loop_search.c | 9 +++ src/libpocketsphinx/pocketsphinx.c | 11 +++ src/libpocketsphinx/pocketsphinx_internal.h | 9 +++ src/libpocketsphinx/state_align_search.c | 1 + 12 files changed, 154 insertions(+), 9 deletions(-) diff --git a/include/pocketsphinx.h b/include/pocketsphinx.h index c890b2bef..d834e94e7 100644 --- a/include/pocketsphinx.h +++ b/include/pocketsphinx.h @@ -47,6 +47,7 @@ #include #include #include +#include /* PocketSphinx headers (not many of them!) */ #include @@ -61,6 +62,11 @@ extern "C" { } #endif +/** + * PocketSphinx hyptags struct. + */ +typedef struct ps_hyptags_s ps_hyptags_t; + /** * PocketSphinx speech recognizer object. */ @@ -408,6 +414,18 @@ int ps_end_utt(ps_decoder_t *ps); POCKETSPHINX_EXPORT char const *ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score); +/** + * Get glist containing word-tags pairs, as ps_hyptags_t. + * + * @param ps Decoder. + * @param out_best_score Output: path score corresponding to returned string. + * @return glist_t containing ps_hyptags_t. Each struct has a word of the best + * hypothesis at this point in decoding, and a glist_t of chars for the tags (can be NULL). + * NULL is returned if no hypothesis is available. + */ +POCKETSPHINX_EXPORT +glist_t ps_get_hyp_with_tags(ps_decoder_t *ps, int32 *out_best_score); + /** * Get posterior probability. * diff --git a/src/libpocketsphinx/allphone_search.c b/src/libpocketsphinx/allphone_search.c index ebb3a1172..c40c9b868 100644 --- a/src/libpocketsphinx/allphone_search.c +++ b/src/libpocketsphinx/allphone_search.c @@ -131,6 +131,7 @@ static ps_searchfuncs_t allphone_funcs = { /* free: */ allphone_search_free, /* lattice: */ allphone_search_lattice, /* hyp: */ allphone_search_hyp, + /* hyptags_list: */ allphone_search_hyp_with_tags, /* prob: */ allphone_search_prob, /* seg_iter: */ allphone_search_seg_iter, }; @@ -907,3 +908,10 @@ allphone_search_hyp(ps_search_t * search, int32 * out_score) E_INFO("Hyp: %s\n", search->hyp_str); return search->hyp_str; } + +glist_t +allphone_search_hyp_with_tags(ps_search_t * search, int32 * out_score) +{ + E_WARN("Tags extraction for allphone_search not implemented\n"); + return NULL; +} diff --git a/src/libpocketsphinx/allphone_search.h b/src/libpocketsphinx/allphone_search.h index d09a4e3ce..08e6e71fa 100644 --- a/src/libpocketsphinx/allphone_search.h +++ b/src/libpocketsphinx/allphone_search.h @@ -176,4 +176,9 @@ int allphone_search_finish(ps_search_t * search); */ char const *allphone_search_hyp(ps_search_t * search, int32 * out_score); +/** + * Get glist_t with word-tags pairs. + */ +glist_t allphone_search_hyp_with_tags(ps_search_t * search, int32 * out_score); + #endif /* __ALLPHONE_SEARCH_H__ */ diff --git a/src/libpocketsphinx/fsg_search.c b/src/libpocketsphinx/fsg_search.c index 09969c908..34028ce3a 100644 --- a/src/libpocketsphinx/fsg_search.c +++ b/src/libpocketsphinx/fsg_search.c @@ -75,15 +75,16 @@ static ps_lattice_t *fsg_search_lattice(ps_search_t *search); static int fsg_search_prob(ps_search_t *search); static ps_searchfuncs_t fsg_funcs = { - /* start: */ fsg_search_start, - /* step: */ fsg_search_step, - /* finish: */ fsg_search_finish, - /* reinit: */ fsg_search_reinit, - /* free: */ fsg_search_free, - /* lattice: */ fsg_search_lattice, - /* hyp: */ fsg_search_hyp, - /* prob: */ fsg_search_prob, - /* seg_iter: */ fsg_search_seg_iter, + /* start: */ fsg_search_start, + /* step: */ fsg_search_step, + /* finish: */ fsg_search_finish, + /* reinit: */ fsg_search_reinit, + /* free: */ fsg_search_free, + /* lattice: */ fsg_search_lattice, + /* hyp: */ fsg_search_hyp, + /* hyptags_list: */ fsg_search_hyp_with_tags, + /* prob: */ fsg_search_prob, + /* seg_iter: */ fsg_search_seg_iter, }; static int @@ -1053,6 +1054,62 @@ fsg_search_hyp(ps_search_t *search, int32 *out_score) return search->hyp_str; } +glist_t +fsg_search_hyp_with_tags(ps_search_t *search, int32 *out_score) +{ + fsg_search_t *fsgs = (fsg_search_t *)search; + dict_t *dict = ps_search_dict(search); + int bp, bpidx; + + /* Get last backpointer table index. */ + bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score); + /* No hypothesis (yet). */ + if (bpidx <= 0) { + return NULL; + } + + /* If bestpath is enabled and the utterance is complete, then run it. */ + if (fsgs->bestpath && fsgs->final) { + ps_lattice_t *dag; + ps_latlink_t *link; + + if ((dag = fsg_search_lattice(search)) == NULL) { + E_WARN("Failed to obtain the lattice while bestpath enabled\n"); + return NULL; + } + if ((link = fsg_search_bestpath(search, out_score, FALSE)) == NULL) { + E_WARN("Failed to find the bestpath in a lattice\n"); + return NULL; + } + return ps_lattice_hyp(dag, link); + } + + glist_free(search->hyptags_list); + bp = bpidx; + while (bp > 0) { + fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp); + fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry); + + char const *baseword; + int32 wid; + glist_t tags = fsg_link_tags(fl); + bp = fsg_hist_entry_pred(hist_entry); + wid = fsg_link_wid(fl); + if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid)) + continue; + baseword = dict_basestr(dict, + dict_wordid(dict, + fsg_model_word_str(fsgs->fsg, wid))); + + ps_hyptags_t *r = ckd_calloc(1, sizeof(ps_hyptags_t)); + r->tags = tags; + r->word = baseword; + + search->hyptags_list = glist_add_ptr(search->hyptags_list, (void *)r); + } + return search->hyptags_list; +} + static void fsg_seg_bp2itor(ps_seg_t *seg, fsg_hist_entry_t *hist_entry) { diff --git a/src/libpocketsphinx/fsg_search_internal.h b/src/libpocketsphinx/fsg_search_internal.h index 7f3135926..515547c40 100644 --- a/src/libpocketsphinx/fsg_search_internal.h +++ b/src/libpocketsphinx/fsg_search_internal.h @@ -150,4 +150,9 @@ int fsg_search_finish(ps_search_t *search); */ char const *fsg_search_hyp(ps_search_t *search, int32 *out_score); +/** + * Get glist_t with word-tags pairs. + */ +glist_t fsg_search_hyp_with_tags(ps_search_t *search, int32 *out_score); + #endif diff --git a/src/libpocketsphinx/kws_search.c b/src/libpocketsphinx/kws_search.c index 9f7fbd270..1b2fba449 100644 --- a/src/libpocketsphinx/kws_search.c +++ b/src/libpocketsphinx/kws_search.c @@ -147,6 +147,7 @@ static ps_searchfuncs_t kws_funcs = { /* free: */ kws_search_free, /* lattice: */ kws_search_lattice, /* hyp: */ kws_search_hyp, + /* hyptags_list: */ kws_search_hyp_with_tags, /* prob: */ kws_search_prob, /* seg_iter: */ kws_search_seg_iter, }; @@ -675,6 +676,13 @@ kws_search_hyp(ps_search_t * search, int32 * out_score) return search->hyp_str; } +glist_t +kws_search_hyp_with_tags(ps_search_t * search, int32 * out_score) +{ + E_WARN("Tags extraction for kws_search not implemented\n"); + return NULL; +} + char * kws_search_get_keyphrases(ps_search_t * search) { diff --git a/src/libpocketsphinx/kws_search.h b/src/libpocketsphinx/kws_search.h index c820afb6e..941ddc476 100644 --- a/src/libpocketsphinx/kws_search.h +++ b/src/libpocketsphinx/kws_search.h @@ -134,6 +134,11 @@ int kws_search_finish(ps_search_t * search); */ char const *kws_search_hyp(ps_search_t * search, int32 * out_score); +/** + * Get glist_t with word-tags pairs. + */ +glist_t kws_search_hyp_with_tags(ps_search_t * search, int32 * out_score); + /** * Get active keyphrases */ diff --git a/src/libpocketsphinx/ngram_search.c b/src/libpocketsphinx/ngram_search.c index 153e4d0c9..59c01ff6d 100644 --- a/src/libpocketsphinx/ngram_search.c +++ b/src/libpocketsphinx/ngram_search.c @@ -60,6 +60,7 @@ static int ngram_search_step(ps_search_t *search, int frame_idx); static int ngram_search_finish(ps_search_t *search); static int ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p); static char const *ngram_search_hyp(ps_search_t *search, int32 *out_score); +static glist_t ngram_search_hyp_with_tags(ps_search_t *search, int32 *out_score); static int32 ngram_search_prob(ps_search_t *search); static ps_seg_t *ngram_search_seg_iter(ps_search_t *search); @@ -71,6 +72,7 @@ static ps_searchfuncs_t ngram_funcs = { /* free: */ ngram_search_free, /* lattice: */ ngram_search_lattice, /* hyp: */ ngram_search_hyp, + /* hyptags_list: */ ngram_search_hyp_with_tags, /* prob: */ ngram_search_prob, /* seg_iter: */ ngram_search_seg_iter, }; @@ -887,6 +889,13 @@ ngram_search_hyp(ps_search_t *search, int32 *out_score) return NULL; } +static glist_t +ngram_search_hyp_with_tags(ps_search_t * search, int32 * out_score) +{ + E_WARN("Tags extraction for ngram_search not implemented\n"); + return NULL; +} + static void ngram_search_bp2itor(ps_seg_t *seg, int bp) { diff --git a/src/libpocketsphinx/phone_loop_search.c b/src/libpocketsphinx/phone_loop_search.c index f93c1e8aa..ce195e4c2 100644 --- a/src/libpocketsphinx/phone_loop_search.c +++ b/src/libpocketsphinx/phone_loop_search.c @@ -49,6 +49,7 @@ static int phone_loop_search_finish(ps_search_t *search); static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p); static void phone_loop_search_free(ps_search_t *search); static char const *phone_loop_search_hyp(ps_search_t *search, int32 *out_score); +static glist_t phone_loop_search_hyp_with_tags(ps_search_t *search, int32 *out_score); static int32 phone_loop_search_prob(ps_search_t *search); static ps_seg_t *phone_loop_search_seg_iter(ps_search_t *search); @@ -60,6 +61,7 @@ static ps_searchfuncs_t phone_loop_search_funcs = { /* free: */ phone_loop_search_free, /* lattice: */ NULL, /* hyp: */ phone_loop_search_hyp, + /* hyptags_list: */ phone_loop_search_hyp_with_tags, /* prob: */ phone_loop_search_prob, /* seg_iter: */ phone_loop_search_seg_iter, }; @@ -351,6 +353,13 @@ phone_loop_search_hyp(ps_search_t *search, int32 *out_score) return NULL; } +static glist_t +phone_loop_search_hyp_with_tags(ps_search_t *search, int32 *out_score) +{ + E_WARN("hyptags lists are not returned from phone loop search"); + return NULL; +} + static int32 phone_loop_search_prob(ps_search_t *search) { diff --git a/src/libpocketsphinx/pocketsphinx.c b/src/libpocketsphinx/pocketsphinx.c index 0d9b09960..a673c5925 100644 --- a/src/libpocketsphinx/pocketsphinx.c +++ b/src/libpocketsphinx/pocketsphinx.c @@ -1199,6 +1199,17 @@ ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score) return hyp; } +glist_t +ps_get_hyp_with_tags(ps_decoder_t *ps, int32 *out_best_score) +{ + glist_t hyptags_list; + + ptmr_start(&ps->perf); + hyptags_list = ps_search_hyp_with_tags(ps->search, out_best_score); + ptmr_stop(&ps->perf); + return hyptags_list; +} + int32 ps_get_prob(ps_decoder_t *ps) { diff --git a/src/libpocketsphinx/pocketsphinx_internal.h b/src/libpocketsphinx/pocketsphinx_internal.h index 3f7dd98f7..f5716b768 100644 --- a/src/libpocketsphinx/pocketsphinx_internal.h +++ b/src/libpocketsphinx/pocketsphinx_internal.h @@ -76,6 +76,12 @@ typedef struct ps_search_s ps_search_t; #define PS_SEARCH_TYPE_STATE_ALIGN "state_align" #define PS_SEARCH_TYPE_PHONE_LOOP "phone_loop" +struct ps_hyptags_s { + char const *word; + glist_t tags; +}; + + /** * V-table for search algorithm. */ @@ -88,6 +94,7 @@ typedef struct ps_searchfuncs_s { ps_lattice_t *(*lattice)(ps_search_t *search); char const *(*hyp)(ps_search_t *search, int32 *out_score); + glist_t (*hyptags_list)(ps_search_t *search, int32 *out_score); int32 (*prob)(ps_search_t *search); ps_seg_t *(*seg_iter)(ps_search_t *search); } ps_searchfuncs_t; @@ -107,6 +114,7 @@ struct ps_search_s { dict_t *dict; /**< Pronunciation dictionary. */ dict2pid_t *d2p; /**< Dictionary to senone mappings. */ char *hyp_str; /**< Current hypothesis string. */ + glist_t hyptags_list; /**< glist containing word-tags pairs. */ ps_lattice_t *dag; /**< Current hypothesis word graph. */ ps_latlink_t *last_link; /**< Final link in best path. */ int32 post; /**< Utterance posterior probability. */ @@ -139,6 +147,7 @@ struct ps_search_s { #define ps_search_free(s) (*(ps_search_base(s)->vt->free))(s) #define ps_search_lattice(s) (*(ps_search_base(s)->vt->lattice))(s) #define ps_search_hyp(s,sc) (*(ps_search_base(s)->vt->hyp))(s,sc) +#define ps_search_hyp_with_tags(s,sc) (*(ps_search_base(s)->vt->hyptags_list))(s,sc) #define ps_search_prob(s) (*(ps_search_base(s)->vt->prob))(s) #define ps_search_seg_iter(s) (*(ps_search_base(s)->vt->seg_iter))(s) diff --git a/src/libpocketsphinx/state_align_search.c b/src/libpocketsphinx/state_align_search.c index da03af483..09eedcd00 100644 --- a/src/libpocketsphinx/state_align_search.c +++ b/src/libpocketsphinx/state_align_search.c @@ -276,6 +276,7 @@ static ps_searchfuncs_t state_align_search_funcs = { /* free: */ state_align_search_free, /* lattice: */ NULL, /* hyp: */ NULL, + /* hyptags_list: */ NULL, /* prob: */ NULL, /* seg_iter: */ NULL, }; From b0a31fac32912790839901cc1fceead8542fe1a7 Mon Sep 17 00:00:00 2001 From: Carlo Benussi Date: Tue, 18 Jul 2017 17:53:55 +0200 Subject: [PATCH 2/6] changed user access to tags --- include/pocketsphinx.h | 44 +++++++++-------- src/libpocketsphinx/fsg_search.c | 2 +- src/libpocketsphinx/pocketsphinx.c | 55 +++++++++++++-------- src/libpocketsphinx/pocketsphinx_internal.h | 4 +- 4 files changed, 62 insertions(+), 43 deletions(-) diff --git a/include/pocketsphinx.h b/include/pocketsphinx.h index d834e94e7..c62039211 100644 --- a/include/pocketsphinx.h +++ b/include/pocketsphinx.h @@ -8,7 +8,7 @@ * are met: * * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in @@ -16,16 +16,16 @@ * distribution. * * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== @@ -62,11 +62,6 @@ extern "C" { } #endif -/** - * PocketSphinx hyptags struct. - */ -typedef struct ps_hyptags_s ps_hyptags_t; - /** * PocketSphinx speech recognizer object. */ @@ -272,7 +267,7 @@ int ps_add_word(ps_decoder_t *ps, char const *phones, int update); -/** +/** * Lookup for the word in the dictionary and return phone transcription * for it. * @@ -284,7 +279,7 @@ int ps_add_word(ps_decoder_t *ps, * allocated and must be freed by the user. */ POCKETSPHINX_EXPORT -char *ps_lookup_word(ps_decoder_t *ps, +char *ps_lookup_word(ps_decoder_t *ps, const char *word); /** @@ -419,13 +414,22 @@ char const *ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score); * * @param ps Decoder. * @param out_best_score Output: path score corresponding to returned string. - * @return glist_t containing ps_hyptags_t. Each struct has a word of the best - * hypothesis at this point in decoding, and a glist_t of chars for the tags (can be NULL). - * NULL is returned if no hypothesis is available. + * @return glist_t of ps_hyptags_t structs, accessed through iterators below. NULL on error */ + POCKETSPHINX_EXPORT glist_t ps_get_hyp_with_tags(ps_decoder_t *ps, int32 *out_best_score); +/** + * Get glist containing tags and word referenced. + * + * @param glist containing word-tags pairs. + * @param referenced word of the hypothesis. + * + * @return glist_t to hold extracted tags,or NULL on error. + */ +glist_t ps_get_word_and_tags(glist_t hyptags_list, char *word); + /** * Get posterior probability. * @@ -551,7 +555,7 @@ ps_nbest_t *ps_nbest(ps_decoder_t *ps); * @return Updated N-best iterator, or NULL if no more hypotheses are * available (iterator is freed ni this case). */ -POCKETSPHINX_EXPORT +POCKETSPHINX_EXPORT ps_nbest_t *ps_nbest_next(ps_nbest_t *nbest); /** @@ -629,7 +633,7 @@ void ps_set_rawdata_size(ps_decoder_t *ps, int32 size); /** * Retrieves the raw data collected during utterance decoding. - * + * * @param ps Decoder * @param buffer preallocated buffer to store the data, must be within the limit * set before diff --git a/src/libpocketsphinx/fsg_search.c b/src/libpocketsphinx/fsg_search.c index 34028ce3a..d3b9635e0 100644 --- a/src/libpocketsphinx/fsg_search.c +++ b/src/libpocketsphinx/fsg_search.c @@ -1101,7 +1101,7 @@ fsg_search_hyp_with_tags(ps_search_t *search, int32 *out_score) dict_wordid(dict, fsg_model_word_str(fsgs->fsg, wid))); - ps_hyptags_t *r = ckd_calloc(1, sizeof(ps_hyptags_t)); + ps_hyptags_t *r = (ps_hyptags_t *)ckd_calloc(1, sizeof(ps_hyptags_t)); r->tags = tags; r->word = baseword; diff --git a/src/libpocketsphinx/pocketsphinx.c b/src/libpocketsphinx/pocketsphinx.c index a673c5925..705d381fe 100644 --- a/src/libpocketsphinx/pocketsphinx.c +++ b/src/libpocketsphinx/pocketsphinx.c @@ -234,7 +234,7 @@ ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) return -1; } } - + ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir"); @@ -324,7 +324,7 @@ ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) fsg_model_free(fsg); ps_set_search(ps, PS_DEFAULT_SEARCH); } - + /* Or load a JSGF grammar */ if ((path = cmd_ln_str_r(ps->config, "-jsgf"))) { if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path) @@ -338,7 +338,7 @@ ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) return -1; } - if ((path = cmd_ln_str_r(ps->config, "-lm")) && + if ((path = cmd_ln_str_r(ps->config, "-lm")) && !cmd_ln_boolean_r(ps->config, "-allphone")) { if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) @@ -356,8 +356,8 @@ ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) } for(lmset_it = ngram_model_set_iter(lmset); - lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) { - ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name); + lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) { + ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name); E_INFO("adding search %s\n", name); if (ps_set_lm(ps, name, lm)) { ngram_model_set_iter_free(lmset_it); @@ -387,7 +387,7 @@ ps_decoder_t * ps_init(cmd_ln_t *config) { ps_decoder_t *ps; - + if (!config) { E_ERROR("No configuration specified"); return NULL; @@ -502,7 +502,7 @@ ps_get_search(ps_decoder_t *ps) return name; } -int +int ps_unset_search(ps_decoder_t *ps, const char *name) { ps_search_t *search = hash_table_delete(ps->searches, name); @@ -526,13 +526,13 @@ ps_search_iter_next(ps_search_iter_t *itor) return (ps_search_iter_t *)hash_table_iter_next((hash_iter_t *)itor); } -const char* +const char* ps_search_iter_val(ps_search_iter_t *itor) { return (const char*)(((hash_iter_t *)itor)->ent->key); } -void +void ps_search_iter_free(ps_search_iter_t *itor) { hash_table_iter_free((hash_iter_t *)itor); @@ -569,7 +569,7 @@ static int set_search_internal(ps_decoder_t *ps, ps_search_t *search) { ps_search_t *old_search; - + if (!search) return -1; @@ -651,7 +651,7 @@ ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg) return set_search_internal(ps, search); } -int +int ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path) { fsg_model_t *fsg; @@ -690,7 +690,7 @@ ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path) return result; } -int +int ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string) { fsg_model_t *fsg; @@ -865,7 +865,7 @@ ps_lookup_word(ps_decoder_t *ps, const char *word) int32 phlen, j; char *phones; dict_t *dict = ps->dict; - + wid = dict_wordid(dict, word); if (wid == BAD_S3WID) return NULL; @@ -936,7 +936,7 @@ ps_start_utt(ps_decoder_t *ps) { int rv; char uttid[16]; - + if (ps->acmod->state == ACMOD_STARTED || ps->acmod->state == ACMOD_PROCESSING) { E_ERROR("Utterance already started\n"); return -1; @@ -1165,7 +1165,7 @@ ps_end_utt(ps_decoder_t *ps) int32 score; hyp = ps_get_hyp(ps, &score); - + if (hyp != NULL) { E_INFO("%s (%d)\n", hyp, score); E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n", @@ -1202,14 +1202,29 @@ ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score) glist_t ps_get_hyp_with_tags(ps_decoder_t *ps, int32 *out_best_score) { - glist_t hyptags_list; - ptmr_start(&ps->perf); - hyptags_list = ps_search_hyp_with_tags(ps->search, out_best_score); + glist_t hyptags_list = ps_search_hyp_with_tags(ps->search, out_best_score); ptmr_stop(&ps->perf); return hyptags_list; } +glist_t +ps_get_word_and_tags(glist_t hyptags_list, char *word) +{ + if(!hyptags_list) + return NULL; + + ps_hyptags_t *r = (ps_hyptags_t *)gnode_ptr(hyptags_list); + if(!r){ + E_ERROR("glist_t data NULL"); + return NULL; + } + + strncpy(word, r->word, strlen(r->word)+1); + return r->tags; +} + + int32 ps_get_prob(ps_decoder_t *ps) { @@ -1374,7 +1389,7 @@ ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, *out_nwall = ps->perf.t_tot_elapsed; } -uint8 +uint8 ps_get_in_speech(ps_decoder_t *ps) { return fe_get_vad_state(ps->acmod->fe); @@ -1450,7 +1465,7 @@ ps_search_base_reinit(ps_search_t *search, dict_t *dict, } void -ps_set_rawdata_size(ps_decoder_t *ps, int32 size) +ps_set_rawdata_size(ps_decoder_t *ps, int32 size) { acmod_set_rawdata_size(ps->acmod, size); } diff --git a/src/libpocketsphinx/pocketsphinx_internal.h b/src/libpocketsphinx/pocketsphinx_internal.h index f5716b768..146a6555f 100644 --- a/src/libpocketsphinx/pocketsphinx_internal.h +++ b/src/libpocketsphinx/pocketsphinx_internal.h @@ -76,10 +76,10 @@ typedef struct ps_search_s ps_search_t; #define PS_SEARCH_TYPE_STATE_ALIGN "state_align" #define PS_SEARCH_TYPE_PHONE_LOOP "phone_loop" -struct ps_hyptags_s { +typedef struct ps_hyptags_s{ char const *word; glist_t tags; -}; +}ps_hyptags_t; /** From b8a425e13eff59b962398a7fa35a072551fe1a74 Mon Sep 17 00:00:00 2001 From: Carlo Benussi Date: Fri, 28 Jul 2017 12:15:12 +0200 Subject: [PATCH 3/6] fsg_link_t changed, fixed memory leak --- include/pocketsphinx.h | 5 +- src/libpocketsphinx/fsg_search.c | 53 +++++++++++---------- src/libpocketsphinx/pocketsphinx.c | 20 +++++--- src/libpocketsphinx/pocketsphinx_internal.h | 28 +++++------ 4 files changed, 59 insertions(+), 47 deletions(-) diff --git a/include/pocketsphinx.h b/include/pocketsphinx.h index c62039211..c02e4c698 100644 --- a/include/pocketsphinx.h +++ b/include/pocketsphinx.h @@ -428,7 +428,7 @@ glist_t ps_get_hyp_with_tags(ps_decoder_t *ps, int32 *out_best_score); * * @return glist_t to hold extracted tags,or NULL on error. */ -glist_t ps_get_word_and_tags(glist_t hyptags_list, char *word); +void ps_get_word_and_tag(glist_t hyptags_list, char *word, char *tag); /** * Get posterior probability. @@ -443,6 +443,9 @@ glist_t ps_get_word_and_tags(glist_t hyptags_list, char *word); * @param ps Decoder. * @return Posterior probability of the best hypothesis. */ + +void ps_free_tags_struct(glist_t hyptags_list); + POCKETSPHINX_EXPORT int32 ps_get_prob(ps_decoder_t *ps); diff --git a/src/libpocketsphinx/fsg_search.c b/src/libpocketsphinx/fsg_search.c index d3b9635e0..5f980c647 100644 --- a/src/libpocketsphinx/fsg_search.c +++ b/src/libpocketsphinx/fsg_search.c @@ -8,7 +8,7 @@ * are met: * * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in @@ -16,16 +16,16 @@ * distribution. * * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== @@ -34,14 +34,14 @@ /* * fsg_search.c -- Search structures for FSM decoding. - * + * * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 2004 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** - * + * * HISTORY * * 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon @@ -246,10 +246,10 @@ fsg_search_init(const char *name, { ps_search_free(ps_search_base(fsgs)); return NULL; - + } ptmr_init(&fsgs->perf); - + return ps_search_base(fsgs); } @@ -270,6 +270,7 @@ fsg_search_free(ps_search_t *search) ps_search_base_free(search); fsg_lextree_free(fsgs->lextree); + if (fsgs->history) { fsg_history_reset(fsgs->history); fsg_history_set_fsg(fsgs->history, NULL, NULL); @@ -291,7 +292,7 @@ fsg_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) /* Free old dict2pid, dict */ ps_search_base_reinit(search, dict, d2p); - + /* Update the number of words (not used by this module though). */ search->n_words = dict_size(dict); @@ -913,7 +914,7 @@ fsg_search_find_exit(fsg_search_t *fsgs, int frame_idx, int final, int32 *out_sc fl = fsg_hist_entry_fsglink(hist_entry); score = fsg_hist_entry_score(hist_entry); - + if (fl == NULL) break; @@ -928,7 +929,7 @@ fsg_search_find_exit(fsg_search_t *fsgs, int frame_idx, int final, int32 *out_sc besthist = bpidx; } } - + --bpidx; if (bpidx < 0) break; @@ -1019,7 +1020,7 @@ fsg_search_hyp(ps_search_t *search, int32 *out_score) fsg_model_word_str(fsgs->fsg, wid))); len += strlen(baseword) + 1; } - + ckd_free(search->hyp_str); if (len == 0) { search->hyp_str = NULL; @@ -1084,7 +1085,8 @@ fsg_search_hyp_with_tags(ps_search_t *search, int32 *out_score) return ps_lattice_hyp(dag, link); } - glist_free(search->hyptags_list); + if(search->hyptags_list) glist_free(search->hyptags_list); + search->hyptags_list = (glist_t)ckd_calloc(1,sizeof(gnode_t)); bp = bpidx; while (bp > 0) { fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp); @@ -1092,7 +1094,6 @@ fsg_search_hyp_with_tags(ps_search_t *search, int32 *out_score) char const *baseword; int32 wid; - glist_t tags = fsg_link_tags(fl); bp = fsg_hist_entry_pred(hist_entry); wid = fsg_link_wid(fl); if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid)) @@ -1102,8 +1103,8 @@ fsg_search_hyp_with_tags(ps_search_t *search, int32 *out_score) fsg_model_word_str(fsgs->fsg, wid))); ps_hyptags_t *r = (ps_hyptags_t *)ckd_calloc(1, sizeof(ps_hyptags_t)); - r->tags = tags; - r->word = baseword; + strncpy(r->tag,fsg_link_tag(fl),50); + strncpy(r->word,baseword,50); search->hyptags_list = glist_add_ptr(search->hyptags_list, (void *)r); } @@ -1220,7 +1221,7 @@ fsg_search_seg_iter(ps_search_t *search) /* Fill in relevant fields for first element. */ fsg_seg_bp2itor((ps_seg_t *)itor, itor->hist[0]); - + return (ps_seg_t *)itor; } @@ -1371,7 +1372,7 @@ find_end_node(fsg_search_t *fsgs, ps_lattice_t *dag) E_INFO("End node %s.%d:%d:%d (%d)\n", fsg_model_word_str(fsgs->fsg, node->wid), node->sf, node->fef, node->lef, node->info.best_exit); - } + } else { /* If there was more than one end node candidate, then we need * to create an artificial end node with epsilon transitions @@ -1522,7 +1523,7 @@ fsg_search_lattice(ps_search_t *search) for (itor = fsg_model_arcs(fsg, fsg_link_to_state(fh->fsglink)); itor; itor = fsg_arciter_next(itor)) { fsg_link_t *link = fsg_arciter_get(itor); - + /* FIXME: Need to figure out what to do about tag transitions. */ if (link->wid >= 0) { /* @@ -1538,7 +1539,7 @@ fsg_search_lattice(ps_search_t *search) * just need to look one link forward from them. */ fsg_arciter_t *itor2; - + /* Add all non-null links out of j. */ for (itor2 = fsg_model_arcs(fsg, fsg_link_to_state(link)); itor2; itor2 = fsg_arciter_next(itor2)) { @@ -1546,7 +1547,7 @@ fsg_search_lattice(ps_search_t *search) if (link->wid == -1) continue; - + if ((dest = find_node(dag, fsg, sf, link->wid, fsg_link_to_state(link))) != NULL) { ps_lattice_link(dag, src, dest, ascr, fh->frame); } @@ -1601,7 +1602,7 @@ fsg_search_lattice(ps_search_t *search) cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob")) * fsg->lw) >> SENSCR_SHIFT; - + ps_lattice_penalize_fillers(dag, silpen, fillpen); } search->dag = dag; diff --git a/src/libpocketsphinx/pocketsphinx.c b/src/libpocketsphinx/pocketsphinx.c index 705d381fe..1788b02bb 100644 --- a/src/libpocketsphinx/pocketsphinx.c +++ b/src/libpocketsphinx/pocketsphinx.c @@ -1208,20 +1208,28 @@ ps_get_hyp_with_tags(ps_decoder_t *ps, int32 *out_best_score) return hyptags_list; } -glist_t -ps_get_word_and_tags(glist_t hyptags_list, char *word) +void +ps_get_word_and_tag(glist_t hyptags_list, char *word, char *tag) { if(!hyptags_list) - return NULL; + return; ps_hyptags_t *r = (ps_hyptags_t *)gnode_ptr(hyptags_list); if(!r){ - E_ERROR("glist_t data NULL"); - return NULL; + E_WARN("r NULL\n"); + return; } + strncpy(tag, r->tag, strlen(r->tag)+1); strncpy(word, r->word, strlen(r->word)+1); - return r->tags; +} + +void ps_free_tags_struct(glist_t hyptags_list) +{ + ps_hyptags_t *r = (ps_hyptags_t *)gnode_ptr(hyptags_list); + if(r){ + free(r); + } } diff --git a/src/libpocketsphinx/pocketsphinx_internal.h b/src/libpocketsphinx/pocketsphinx_internal.h index 146a6555f..9fffb320b 100644 --- a/src/libpocketsphinx/pocketsphinx_internal.h +++ b/src/libpocketsphinx/pocketsphinx_internal.h @@ -8,27 +8,27 @@ * are met: * * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== @@ -77,8 +77,8 @@ typedef struct ps_search_s ps_search_t; #define PS_SEARCH_TYPE_PHONE_LOOP "phone_loop" typedef struct ps_hyptags_s{ - char const *word; - glist_t tags; + char word[50]; + char tag[50]; }ps_hyptags_t; @@ -104,10 +104,10 @@ typedef struct ps_searchfuncs_s { */ struct ps_search_s { ps_searchfuncs_t *vt; /**< V-table of search methods. */ - + char *type; char *name; - + ps_search_t *pls; /**< Phoneme loop for lookahead. */ cmd_ln_t *config; /**< Configuration. */ acmod_t *acmod; /**< Acoustic model. */ From d30a8a4ac87315a8883f80d30ca725ac4bee8303 Mon Sep 17 00:00:00 2001 From: Carlo Benussi Date: Fri, 28 Jul 2017 12:26:35 +0200 Subject: [PATCH 4/6] tag size not hard coded --- include/pocketsphinx.h | 5 +++-- src/libpocketsphinx/fsg_search.c | 4 ++-- src/libpocketsphinx/pocketsphinx_internal.h | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/pocketsphinx.h b/include/pocketsphinx.h index c02e4c698..3746742b8 100644 --- a/include/pocketsphinx.h +++ b/include/pocketsphinx.h @@ -62,6 +62,8 @@ extern "C" { } #endif +#define MAX_TAG_SIZE 50 + /** * PocketSphinx speech recognizer object. */ @@ -425,8 +427,7 @@ glist_t ps_get_hyp_with_tags(ps_decoder_t *ps, int32 *out_best_score); * * @param glist containing word-tags pairs. * @param referenced word of the hypothesis. - * - * @return glist_t to hold extracted tags,or NULL on error. + * @param referenced tag of the hypothesis. */ void ps_get_word_and_tag(glist_t hyptags_list, char *word, char *tag); diff --git a/src/libpocketsphinx/fsg_search.c b/src/libpocketsphinx/fsg_search.c index 5f980c647..ae41ac5a2 100644 --- a/src/libpocketsphinx/fsg_search.c +++ b/src/libpocketsphinx/fsg_search.c @@ -1103,8 +1103,8 @@ fsg_search_hyp_with_tags(ps_search_t *search, int32 *out_score) fsg_model_word_str(fsgs->fsg, wid))); ps_hyptags_t *r = (ps_hyptags_t *)ckd_calloc(1, sizeof(ps_hyptags_t)); - strncpy(r->tag,fsg_link_tag(fl),50); - strncpy(r->word,baseword,50); + strncpy(r->tag,fsg_link_tag(fl),MAX_TAG_SIZE); + strncpy(r->word,baseword,MAX_TAG_SIZE); search->hyptags_list = glist_add_ptr(search->hyptags_list, (void *)r); } diff --git a/src/libpocketsphinx/pocketsphinx_internal.h b/src/libpocketsphinx/pocketsphinx_internal.h index 9fffb320b..bc3c09217 100644 --- a/src/libpocketsphinx/pocketsphinx_internal.h +++ b/src/libpocketsphinx/pocketsphinx_internal.h @@ -77,8 +77,8 @@ typedef struct ps_search_s ps_search_t; #define PS_SEARCH_TYPE_PHONE_LOOP "phone_loop" typedef struct ps_hyptags_s{ - char word[50]; - char tag[50]; + char word[MAX_TAG_SIZE]; + char tag[MAX_TAG_SIZE]; }ps_hyptags_t; From 4cfa3c74cdcd52aa4fe6f9233df63d48de4a10bc Mon Sep 17 00:00:00 2001 From: Carlo Benussi Date: Fri, 28 Jul 2017 15:46:10 +0200 Subject: [PATCH 5/6] better UI, no coredump at 2 rec --- include/pocketsphinx.h | 6 ++- src/libpocketsphinx/fsg_search.c | 52 ++++++++++++++++++--- src/libpocketsphinx/fsg_search_internal.h | 34 +++++++------- src/libpocketsphinx/pocketsphinx.c | 9 ++-- src/libpocketsphinx/pocketsphinx_internal.h | 5 +- 5 files changed, 74 insertions(+), 32 deletions(-) diff --git a/include/pocketsphinx.h b/include/pocketsphinx.h index 3746742b8..637ef4af4 100644 --- a/include/pocketsphinx.h +++ b/include/pocketsphinx.h @@ -416,11 +416,13 @@ char const *ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score); * * @param ps Decoder. * @param out_best_score Output: path score corresponding to returned string. - * @return glist_t of ps_hyptags_t structs, accessed through iterators below. NULL on error + * @param glist pointer in which to store word-tag pairs + * @return String containing best hypothesis at this point in + * decoding. NULL if no hypothesis is available. */ POCKETSPHINX_EXPORT -glist_t ps_get_hyp_with_tags(ps_decoder_t *ps, int32 *out_best_score); +char const *ps_get_hyp_with_tags(ps_decoder_t *ps, int32 *out_best_score, glist_t *hyptagsP); /** * Get glist containing tags and word referenced. diff --git a/src/libpocketsphinx/fsg_search.c b/src/libpocketsphinx/fsg_search.c index ae41ac5a2..3287f6e62 100644 --- a/src/libpocketsphinx/fsg_search.c +++ b/src/libpocketsphinx/fsg_search.c @@ -1055,11 +1055,13 @@ fsg_search_hyp(ps_search_t *search, int32 *out_score) return search->hyp_str; } -glist_t -fsg_search_hyp_with_tags(ps_search_t *search, int32 *out_score) +char const * +fsg_search_hyp_with_tags(ps_search_t *search, int32 *out_score, glist_t *wordtags_listP) { fsg_search_t *fsgs = (fsg_search_t *)search; dict_t *dict = ps_search_dict(search); + char *c; + size_t len; int bp, bpidx; /* Get last backpointer table index. */ @@ -1085,15 +1087,43 @@ fsg_search_hyp_with_tags(ps_search_t *search, int32 *out_score) return ps_lattice_hyp(dag, link); } - if(search->hyptags_list) glist_free(search->hyptags_list); - search->hyptags_list = (glist_t)ckd_calloc(1,sizeof(gnode_t)); bp = bpidx; + len = 0; while (bp > 0) { fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp); fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry); + char const *baseword; + int32 wid; + + bp = fsg_hist_entry_pred(hist_entry); + wid = fsg_link_wid(fl); + if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid)) + continue; + baseword = dict_basestr(dict, + dict_wordid(dict, + fsg_model_word_str(fsgs->fsg, wid))); + len += strlen(baseword) + 1; + } + ckd_free(search->hyp_str); + if (len == 0) { + search->hyp_str = NULL; + return search->hyp_str; + } + search->hyp_str = ckd_calloc(1, len); + + glist_t wt; + wt = (glist_t)ckd_calloc(1,sizeof(gnode_t)); + wt->next = NULL; + + bp = bpidx; + c = search->hyp_str + len - 1; + while (bp > 0) { + fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp); + fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry); char const *baseword; int32 wid; + bp = fsg_hist_entry_pred(hist_entry); wid = fsg_link_wid(fl); if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid)) @@ -1101,14 +1131,24 @@ fsg_search_hyp_with_tags(ps_search_t *search, int32 *out_score) baseword = dict_basestr(dict, dict_wordid(dict, fsg_model_word_str(fsgs->fsg, wid))); + len = strlen(baseword); + c -= len; + memcpy(c, baseword, len); + if (c > search->hyp_str) { + --c; + *c = ' '; + } ps_hyptags_t *r = (ps_hyptags_t *)ckd_calloc(1, sizeof(ps_hyptags_t)); strncpy(r->tag,fsg_link_tag(fl),MAX_TAG_SIZE); strncpy(r->word,baseword,MAX_TAG_SIZE); - search->hyptags_list = glist_add_ptr(search->hyptags_list, (void *)r); + wt = glist_add_ptr(wt, (void *)r); } - return search->hyptags_list; + + if(*wordtags_listP) glist_free(*wordtags_listP); + *wordtags_listP = wt; + return search->hyp_str; } static void diff --git a/src/libpocketsphinx/fsg_search_internal.h b/src/libpocketsphinx/fsg_search_internal.h index 515547c40..bc47cd44b 100644 --- a/src/libpocketsphinx/fsg_search_internal.h +++ b/src/libpocketsphinx/fsg_search_internal.h @@ -8,7 +8,7 @@ * are met: * * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in @@ -16,16 +16,16 @@ * distribution. * * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== @@ -74,10 +74,10 @@ typedef struct fsg_search_s { struct fsg_lextree_s *lextree;/**< Lextree structure for the currently active FSG */ struct fsg_history_s *history;/**< For storing the Viterbi search history */ - + glist_t pnode_active; /**< Those active in this frame */ glist_t pnode_active_next; /**< Those activated for the next frame */ - + int32 beam_orig; /**< Global pruning threshold */ int32 pbeam_orig; /**< Pruning threshold for phone transition */ int32 wbeam_orig; /**< Pruning threshold for word exit */ @@ -86,7 +86,7 @@ typedef struct fsg_search_s { For implementing absolute pruning. */ int32 beam, pbeam, wbeam; /**< Effective beams after applying beam_factor */ int32 lw, pip, wip; /**< Language weights */ - + frame_idx_t frame; /**< Current frame. */ uint8 final; /**< Decoding is finished for this utterance. */ uint8 bestpath; /**< Whether to run bestpath search @@ -95,15 +95,15 @@ typedef struct fsg_search_s { int32 bestscore; /**< For beam pruning */ int32 bpidx_start; /**< First history entry index this frame */ - + int32 ascr, lscr; /**< Total acoustic and lm score for utt */ - + int32 n_hmm_eval; /**< Total HMMs evaluated this utt */ int32 n_sen_eval; /**< Total senones evaluated this utt */ - + ptmr_t perf; /**< Performance counter */ int32 n_tot_frame; - + } fsg_search_t; /* Access macros */ @@ -151,8 +151,8 @@ int fsg_search_finish(ps_search_t *search); char const *fsg_search_hyp(ps_search_t *search, int32 *out_score); /** - * Get glist_t with word-tags pairs. + * Get glist_t with word-tags pairs and hyp. */ -glist_t fsg_search_hyp_with_tags(ps_search_t *search, int32 *out_score); +char const *fsg_search_hyp_with_tags(ps_search_t *search, int32 *out_score, glist_t *wordtags_listP); #endif diff --git a/src/libpocketsphinx/pocketsphinx.c b/src/libpocketsphinx/pocketsphinx.c index 1788b02bb..e82b26053 100644 --- a/src/libpocketsphinx/pocketsphinx.c +++ b/src/libpocketsphinx/pocketsphinx.c @@ -1199,13 +1199,14 @@ ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score) return hyp; } -glist_t -ps_get_hyp_with_tags(ps_decoder_t *ps, int32 *out_best_score) +char const * +ps_get_hyp_with_tags(ps_decoder_t *ps, int32 *out_best_score, glist_t *hyptagsP) { + char const *hyp; ptmr_start(&ps->perf); - glist_t hyptags_list = ps_search_hyp_with_tags(ps->search, out_best_score); + hyp = ps_search_hyp_with_tags(ps->search, out_best_score, hyptagsP); ptmr_stop(&ps->perf); - return hyptags_list; + return hyp; } void diff --git a/src/libpocketsphinx/pocketsphinx_internal.h b/src/libpocketsphinx/pocketsphinx_internal.h index bc3c09217..a0804bb75 100644 --- a/src/libpocketsphinx/pocketsphinx_internal.h +++ b/src/libpocketsphinx/pocketsphinx_internal.h @@ -94,7 +94,7 @@ typedef struct ps_searchfuncs_s { ps_lattice_t *(*lattice)(ps_search_t *search); char const *(*hyp)(ps_search_t *search, int32 *out_score); - glist_t (*hyptags_list)(ps_search_t *search, int32 *out_score); + char const *(*hyp_tags)(ps_search_t *search, int32 *out_score, glist_t *hyptags); int32 (*prob)(ps_search_t *search); ps_seg_t *(*seg_iter)(ps_search_t *search); } ps_searchfuncs_t; @@ -114,7 +114,6 @@ struct ps_search_s { dict_t *dict; /**< Pronunciation dictionary. */ dict2pid_t *d2p; /**< Dictionary to senone mappings. */ char *hyp_str; /**< Current hypothesis string. */ - glist_t hyptags_list; /**< glist containing word-tags pairs. */ ps_lattice_t *dag; /**< Current hypothesis word graph. */ ps_latlink_t *last_link; /**< Final link in best path. */ int32 post; /**< Utterance posterior probability. */ @@ -147,7 +146,7 @@ struct ps_search_s { #define ps_search_free(s) (*(ps_search_base(s)->vt->free))(s) #define ps_search_lattice(s) (*(ps_search_base(s)->vt->lattice))(s) #define ps_search_hyp(s,sc) (*(ps_search_base(s)->vt->hyp))(s,sc) -#define ps_search_hyp_with_tags(s,sc) (*(ps_search_base(s)->vt->hyptags_list))(s,sc) +#define ps_search_hyp_with_tags(s,sc,tg) (*(ps_search_base(s)->vt->hyp_tags))(s,sc,tg) #define ps_search_prob(s) (*(ps_search_base(s)->vt->prob))(s) #define ps_search_seg_iter(s) (*(ps_search_base(s)->vt->seg_iter))(s) From 1d6f9b3c39b39f7e9c4f8b818fa54f18bd2a3b80 Mon Sep 17 00:00:00 2001 From: Carlo Benussi Date: Mon, 25 Sep 2017 12:56:48 +0200 Subject: [PATCH 6/6] removed some E_INFO --- include/ps_search.h | 57 ++++++++++++++++++------------ src/libpocketsphinx/fsg_search.c | 1 - src/libpocketsphinx/pocketsphinx.c | 45 +++++++++++++++++++++-- 3 files changed, 77 insertions(+), 26 deletions(-) diff --git a/include/ps_search.h b/include/ps_search.h index 833875221..b9bd9e801 100644 --- a/include/ps_search.h +++ b/include/ps_search.h @@ -8,7 +8,7 @@ * are met: * * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in @@ -16,16 +16,16 @@ * distribution. * * - * THIS SOFTWARE IS PROVIDED BY ALPHA CEPHEI INC. ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THIS SOFTWARE IS PROVIDED BY ALPHA CEPHEI INC. ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== @@ -37,14 +37,14 @@ * provide interactive experience for the user. * * There are different possible search modes: - * + * *
    *
  • keyphrase - efficiently looks for keyphrase and ignores other speech. allows to configure detection threshold.
  • *
  • grammar - recognizes speech according to JSGF grammar. Unlike keyphrase grammar search doesn't ignore words which are not in grammar but tries to recognize them.
  • *
  • ngram/lm - recognizes natural speech with a language model.
  • *
  • allphone - recognizes phonemes with a phonetic language model.
  • *
- * + * * Each search has a name and can be referenced by a name, names are * application-specific. The function ps_set_search allows to activate * the search previously added by a name. Only single search can be @@ -53,7 +53,7 @@ * To add the search one needs to point to the grammar/language model * describing the search. The location of the grammar is specific to the * application. - * + * * The exact design of a searches depends on your application. For * example, you might want to listen for activation keyphrase first and once * keyphrase is recognized switch to ngram search to recognize actual @@ -68,6 +68,7 @@ #ifndef __PS_SEARCH_H__ #define __PS_SEARCH_H__ +#include #include #include @@ -100,7 +101,7 @@ int ps_set_search(ps_decoder_t *ps, const char *name); * * @see ps_set_search */ -POCKETSPHINX_EXPORT +POCKETSPHINX_EXPORT const char* ps_get_search(ps_decoder_t *ps); /** @@ -117,7 +118,7 @@ POCKETSPHINX_EXPORT int ps_unset_search(ps_decoder_t *ps, const char *name); /** - * Returns iterator over current searches + * Returns iterator over current searches * * @see ps_set_search */ @@ -126,7 +127,7 @@ ps_search_iter_t *ps_search_iter(ps_decoder_t *ps); /** * Updates search iterator to point to the next position. - * + * * This function automatically frees the iterator object upon reaching * the final entry. * @see ps_set_search @@ -152,7 +153,7 @@ void ps_search_iter_free(ps_search_iter_t *itor); /** * Updates search iterator to point to the next position. - * + * * This function automatically frees the iterator object upon reaching * the final entry. * @see ps_set_search @@ -172,7 +173,7 @@ const char* ps_search_iter_val(ps_search_iter_t *itor); * not attempt to free it manually. Use ngram_model_retain() * if you wish to reuse it elsewhere. */ -POCKETSPHINX_EXPORT +POCKETSPHINX_EXPORT ngram_model_t *ps_get_lm(ps_decoder_t *ps, const char *name); /** @@ -182,7 +183,7 @@ ngram_model_t *ps_get_lm(ps_decoder_t *ps, const char *name); * using ps_set_search(). * * @see ps_set_search. - */ + */ POCKETSPHINX_EXPORT int ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm); @@ -190,7 +191,7 @@ int ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm); * Adds new search based on N-gram language model. * * Convenient method to load N-gram model and create a search. - * + * * @see ps_set_lm */ POCKETSPHINX_EXPORT @@ -219,6 +220,16 @@ fsg_model_t *ps_get_fsg(ps_decoder_t *ps, const char *name); POCKETSPHINX_EXPORT int ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg); +/** + * Adds new search using JSGF model. + * + * Convenient method to use already built jsgf_t structs. + * + * @see ps_set_fsg + */ +POCKETSPHINX_EXPORT +int ps_set_jsgf(ps_decoder_t *ps, const char *name, jsgf_t *jsgf); + /** * Adds new search using JSGF model. * @@ -247,7 +258,7 @@ int ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_stri * * @return The current keyphrase to spot */ -POCKETSPHINX_EXPORT +POCKETSPHINX_EXPORT const char* ps_get_kws(ps_decoder_t *ps, const char *name); /** @@ -258,7 +269,7 @@ const char* ps_get_kws(ps_decoder_t *ps, const char *name); * * @see ps_set_search */ -POCKETSPHINX_EXPORT +POCKETSPHINX_EXPORT int ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile); /** @@ -269,7 +280,7 @@ int ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile); * * @see ps_set_search */ -POCKETSPHINX_EXPORT +POCKETSPHINX_EXPORT int ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase); /** @@ -279,7 +290,7 @@ int ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase); * using ps_set_search(). * * @see ps_set_search. - */ + */ POCKETSPHINX_EXPORT int ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm); @@ -287,7 +298,7 @@ int ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm); * Adds new search based on phone N-gram language model. * * Convenient method to load N-gram model and create a search. - * + * * @see ps_set_allphone */ POCKETSPHINX_EXPORT diff --git a/src/libpocketsphinx/fsg_search.c b/src/libpocketsphinx/fsg_search.c index 3287f6e62..36119f222 100644 --- a/src/libpocketsphinx/fsg_search.c +++ b/src/libpocketsphinx/fsg_search.c @@ -684,7 +684,6 @@ fsg_search_word_trans(fsg_search_t *fsgs) } } - int fsg_search_step(ps_search_t *search, int frame_idx) { diff --git a/src/libpocketsphinx/pocketsphinx.c b/src/libpocketsphinx/pocketsphinx.c index e82b26053..565b2487c 100644 --- a/src/libpocketsphinx/pocketsphinx.c +++ b/src/libpocketsphinx/pocketsphinx.c @@ -651,6 +651,47 @@ ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg) return set_search_internal(ps, search); } +int +ps_set_jsgf(ps_decoder_t *ps, const char *name, jsgf_t *jsgf) +{ + if (!jsgf) + return -1; + + fsg_model_t *fsg; + jsgf_rule_t *rule; + char const *toprule; + float lw; + int result; + + if (!jsgf) + return -1; + + rule = NULL; + /* Take the -toprule if specified. */ + if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) { + rule = jsgf_get_rule(jsgf, toprule); + if (rule == NULL) { + E_ERROR("Start rule %s not found\n", toprule); + jsgf_grammar_free(jsgf); + return -1; + } + } else { + rule = jsgf_get_public_rule(jsgf); + if (rule == NULL) { + E_ERROR("No public rules found in grammar\n"); + jsgf_grammar_free(jsgf); + return -1; + } + } + + lw = cmd_ln_float32_r(ps->config, "-lw"); + fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw); + result = ps_set_fsg(ps, name, fsg); + fsg_model_free(fsg); + jsgf_grammar_free(jsgf); + return result; +} + int ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path) { @@ -951,6 +992,7 @@ ps_start_utt(ps_decoder_t *ps) ptmr_reset(&ps->perf); ptmr_start(&ps->perf); + sprintf(uttid, "%09u", ps->uttno); ++ps->uttno; @@ -964,6 +1006,7 @@ ps_start_utt(ps_decoder_t *ps) if ((rv = acmod_start_utt(ps->acmod)) < 0) return rv; + /* Start logging features and audio if requested. */ if (ps->mfclogdir) { char *logfn = string_join(ps->mfclogdir, "/", @@ -1004,11 +1047,9 @@ ps_start_utt(ps_decoder_t *ps) ckd_free(logfn); acmod_set_senfh(ps->acmod, senfh); } - /* Start auxiliary phone loop search. */ if (ps->phone_loop) ps_search_start(ps->phone_loop); - return ps_search_start(ps->search); }