1449 lines
38 KiB
C
1449 lines
38 KiB
C
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
/* ====================================================================
|
|
* Copyright (c) 2008 Carnegie Mellon University. All rights
|
|
* reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* This work was supported in part by funding from the Defense Advanced
|
|
* Research Projects Agency and the National Science Foundation of the
|
|
* United States of America, and the CMU Sphinx Speech Consortium.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
|
|
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
|
|
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* ====================================================================
|
|
*
|
|
*/
|
|
|
|
/* System headers. */
|
|
#include <stdio.h>
|
|
#include <assert.h>
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
/* SphinxBase headers. */
|
|
#include <sphinxbase/err.h>
|
|
#include <sphinxbase/strfuncs.h>
|
|
#include <sphinxbase/filename.h>
|
|
#include <sphinxbase/pio.h>
|
|
#include <sphinxbase/jsgf.h>
|
|
#include <sphinxbase/hash_table.h>
|
|
|
|
/* Local headers. */
|
|
#include "cmdln_macro.h"
|
|
#include "pocketsphinx.h"
|
|
#include "pocketsphinx_internal.h"
|
|
#include "ps_lattice_internal.h"
|
|
#include "phone_loop_search.h"
|
|
#include "kws_search.h"
|
|
#include "fsg_search_internal.h"
|
|
#include "ngram_search.h"
|
|
#include "ngram_search_fwdtree.h"
|
|
#include "ngram_search_fwdflat.h"
|
|
#include "allphone_search.h"
|
|
|
|
static const arg_t ps_args_def[] = {
|
|
POCKETSPHINX_OPTIONS,
|
|
CMDLN_EMPTY_OPTION
|
|
};
|
|
|
|
/* I'm not sure what the portable way to do this is. */
|
|
static int
|
|
file_exists(const char *path)
|
|
{
|
|
FILE *tmp;
|
|
|
|
tmp = fopen(path, "rb");
|
|
if (tmp) fclose(tmp);
|
|
return (tmp != NULL);
|
|
}
|
|
|
|
#ifdef MODELDIR
|
|
static int
|
|
hmmdir_exists(const char *path)
|
|
{
|
|
FILE *tmp;
|
|
char *mdef = string_join(path, "/mdef", NULL);
|
|
|
|
tmp = fopen(mdef, "rb");
|
|
if (tmp) fclose(tmp);
|
|
ckd_free(mdef);
|
|
return (tmp != NULL);
|
|
}
|
|
#endif
|
|
|
|
static void
|
|
ps_expand_file_config(ps_decoder_t *ps, const char *arg, const char *extra_arg,
|
|
const char *hmmdir, const char *file)
|
|
{
|
|
const char *val;
|
|
if ((val = cmd_ln_str_r(ps->config, arg)) != NULL) {
|
|
cmd_ln_set_str_extra_r(ps->config, extra_arg, val);
|
|
} else if (hmmdir == NULL) {
|
|
cmd_ln_set_str_extra_r(ps->config, extra_arg, NULL);
|
|
} else {
|
|
char *tmp = string_join(hmmdir, "/", file, NULL);
|
|
if (file_exists(tmp))
|
|
cmd_ln_set_str_extra_r(ps->config, extra_arg, tmp);
|
|
else
|
|
cmd_ln_set_str_extra_r(ps->config, extra_arg, NULL);
|
|
ckd_free(tmp);
|
|
}
|
|
}
|
|
|
|
/* Feature and front-end parameters that may be in feat.params */
|
|
static const arg_t feat_defn[] = {
|
|
waveform_to_cepstral_command_line_macro(),
|
|
cepstral_to_feature_command_line_macro(),
|
|
CMDLN_EMPTY_OPTION
|
|
};
|
|
|
|
static void
|
|
ps_expand_model_config(ps_decoder_t *ps)
|
|
{
|
|
char const *hmmdir, *featparams;
|
|
|
|
/* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */
|
|
#ifdef __ADSPBLACKFIN__
|
|
E_INFO("Will not use mmap() on uClinux/Blackfin.");
|
|
cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE);
|
|
#endif
|
|
|
|
/* Get acoustic model filenames and add them to the command-line */
|
|
hmmdir = cmd_ln_str_r(ps->config, "-hmm");
|
|
ps_expand_file_config(ps, "-mdef", "_mdef", hmmdir, "mdef");
|
|
ps_expand_file_config(ps, "-mean", "_mean", hmmdir, "means");
|
|
ps_expand_file_config(ps, "-var", "_var", hmmdir, "variances");
|
|
ps_expand_file_config(ps, "-tmat", "_tmat", hmmdir, "transition_matrices");
|
|
ps_expand_file_config(ps, "-mixw", "_mixw", hmmdir, "mixture_weights");
|
|
ps_expand_file_config(ps, "-sendump", "_sendump", hmmdir, "sendump");
|
|
ps_expand_file_config(ps, "-fdict", "_fdict", hmmdir, "noisedict");
|
|
ps_expand_file_config(ps, "-lda", "_lda", hmmdir, "feature_transform");
|
|
ps_expand_file_config(ps, "-featparams", "_featparams", hmmdir, "feat.params");
|
|
ps_expand_file_config(ps, "-senmgau", "_senmgau", hmmdir, "senmgau");
|
|
|
|
/* Look for feat.params in acoustic model dir. */
|
|
if ((featparams = cmd_ln_str_r(ps->config, "_featparams"))) {
|
|
if (NULL !=
|
|
cmd_ln_parse_file_r(ps->config, feat_defn, featparams, FALSE))
|
|
E_INFO("Parsed model-specific feature parameters from %s\n",
|
|
featparams);
|
|
}
|
|
|
|
/* Print here because acmod_init might load feat.params file */
|
|
if (err_get_logfp() != NULL) {
|
|
cmd_ln_print_values_r(ps->config, err_get_logfp(), ps_args());
|
|
}
|
|
}
|
|
|
|
static void
|
|
ps_free_searches(ps_decoder_t *ps)
|
|
{
|
|
if (ps->searches) {
|
|
hash_iter_t *search_it;
|
|
for (search_it = hash_table_iter(ps->searches); search_it;
|
|
search_it = hash_table_iter_next(search_it)) {
|
|
ps_search_free(hash_entry_val(search_it->ent));
|
|
}
|
|
hash_table_free(ps->searches);
|
|
}
|
|
|
|
ps->searches = NULL;
|
|
ps->search = NULL;
|
|
}
|
|
|
|
static ps_search_t *
|
|
ps_find_search(ps_decoder_t *ps, char const *name)
|
|
{
|
|
void *search = NULL;
|
|
hash_table_lookup(ps->searches, name, &search);
|
|
|
|
return (ps_search_t *) search;
|
|
}
|
|
|
|
/* Set default acoustic and language models if they are not defined in configuration. */
|
|
void
|
|
ps_default_search_args(cmd_ln_t *config)
|
|
{
|
|
#ifdef MODELDIR
|
|
const char *hmmdir = cmd_ln_str_r(config, "-hmm");
|
|
if (hmmdir == NULL && hmmdir_exists(MODELDIR "/en-us/en-us")) {
|
|
hmmdir = MODELDIR "/en-us/en-us";
|
|
cmd_ln_set_str_r(config, "-hmm", hmmdir);
|
|
}
|
|
|
|
const char *lmfile = cmd_ln_str_r(config, "-lm");
|
|
if (lmfile == NULL && !cmd_ln_str_r(config, "-fsg")
|
|
&& !cmd_ln_str_r(config, "-jsgf")
|
|
&& !cmd_ln_str_r(config, "-lmctl")
|
|
&& !cmd_ln_str_r(config, "-kws")
|
|
&& !cmd_ln_str_r(config, "-keyphrase")
|
|
&& file_exists(MODELDIR "/en-us/en-us.lm.bin")) {
|
|
lmfile = MODELDIR "/en-us/en-us.lm.bin";
|
|
cmd_ln_set_str_r(config, "-lm", lmfile);
|
|
}
|
|
|
|
const char *dictfile = cmd_ln_str_r(config, "-dict");
|
|
if (dictfile == NULL && file_exists(MODELDIR "/en-us/cmudict-en-us.dict")) {
|
|
dictfile = MODELDIR "/en-us/cmudict-en-us.dict";
|
|
cmd_ln_set_str_r(config, "-dict", dictfile);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
int
|
|
ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
|
|
{
|
|
const char *path;
|
|
const char *keyphrase;
|
|
int32 lw;
|
|
|
|
if (config && config != ps->config) {
|
|
cmd_ln_free_r(ps->config);
|
|
ps->config = cmd_ln_retain(config);
|
|
}
|
|
|
|
err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug"));
|
|
/* Set up logging. We need to do this earlier because we want to dump
|
|
* the information to the configured log, not to the stderr. */
|
|
if (config && cmd_ln_str_r(ps->config, "-logfn")) {
|
|
if (err_set_logfile(cmd_ln_str_r(ps->config, "-logfn")) < 0) {
|
|
E_ERROR("Cannot redirect log output\n");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir");
|
|
ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir");
|
|
ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir");
|
|
|
|
/* Fill in some default arguments. */
|
|
ps_expand_model_config(ps);
|
|
|
|
/* Free old searches (do this before other reinit) */
|
|
ps_free_searches(ps);
|
|
ps->searches = hash_table_new(3, HASH_CASE_YES);
|
|
|
|
/* Free old acmod. */
|
|
acmod_free(ps->acmod);
|
|
ps->acmod = NULL;
|
|
|
|
/* Free old dictionary (must be done after the two things above) */
|
|
dict_free(ps->dict);
|
|
ps->dict = NULL;
|
|
|
|
/* Free d2p */
|
|
dict2pid_free(ps->d2p);
|
|
ps->d2p = NULL;
|
|
|
|
/* Logmath computation (used in acmod and search) */
|
|
if (ps->lmath == NULL
|
|
|| (logmath_get_base(ps->lmath) !=
|
|
(float64)cmd_ln_float32_r(ps->config, "-logbase"))) {
|
|
if (ps->lmath)
|
|
logmath_free(ps->lmath);
|
|
ps->lmath = logmath_init
|
|
((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0,
|
|
cmd_ln_boolean_r(ps->config, "-bestpath"));
|
|
}
|
|
|
|
/* Acoustic model (this is basically everything that
|
|
* uttproc.c, senscr.c, and others used to do) */
|
|
if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL)
|
|
return -1;
|
|
|
|
|
|
|
|
if (cmd_ln_int32_r(ps->config, "-pl_window") > 0) {
|
|
/* Initialize an auxiliary phone loop search, which will run in
|
|
* "parallel" with FSG or N-Gram search. */
|
|
if ((ps->phone_loop =
|
|
phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL)
|
|
return -1;
|
|
hash_table_enter(ps->searches,
|
|
ps_search_name(ps->phone_loop),
|
|
ps->phone_loop);
|
|
}
|
|
|
|
/* Dictionary and triphone mappings (depends on acmod). */
|
|
/* FIXME: pass config, change arguments, implement LTS, etc. */
|
|
if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL)
|
|
return -1;
|
|
if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
|
|
return -1;
|
|
|
|
lw = cmd_ln_float32_r(ps->config, "-lw");
|
|
|
|
/* Determine whether we are starting out in FSG or N-Gram search mode.
|
|
* If neither is used skip search initialization. */
|
|
|
|
/* Load KWS if one was specified in config */
|
|
if ((keyphrase = cmd_ln_str_r(ps->config, "-keyphrase"))) {
|
|
if (ps_set_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase))
|
|
return -1;
|
|
ps_set_search(ps, PS_DEFAULT_SEARCH);
|
|
}
|
|
|
|
if ((path = cmd_ln_str_r(ps->config, "-kws"))) {
|
|
if (ps_set_kws(ps, PS_DEFAULT_SEARCH, path))
|
|
return -1;
|
|
ps_set_search(ps, PS_DEFAULT_SEARCH);
|
|
}
|
|
|
|
/* Load an FSG if one was specified in config */
|
|
if ((path = cmd_ln_str_r(ps->config, "-fsg"))) {
|
|
fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw);
|
|
if (!fsg)
|
|
return -1;
|
|
if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) {
|
|
fsg_model_free(fsg);
|
|
return -1;
|
|
}
|
|
fsg_model_free(fsg);
|
|
ps_set_search(ps, PS_DEFAULT_SEARCH);
|
|
}
|
|
|
|
/* Or load a JSGF grammar */
|
|
if ((path = cmd_ln_str_r(ps->config, "-jsgf"))) {
|
|
if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path)
|
|
|| ps_set_search(ps, PS_DEFAULT_SEARCH))
|
|
return -1;
|
|
}
|
|
|
|
if ((path = cmd_ln_str_r(ps->config, "-allphone"))) {
|
|
if (ps_set_allphone_file(ps, PS_DEFAULT_SEARCH, path)
|
|
|| ps_set_search(ps, PS_DEFAULT_SEARCH))
|
|
return -1;
|
|
}
|
|
|
|
if ((path = cmd_ln_str_r(ps->config, "-lm")) &&
|
|
!cmd_ln_boolean_r(ps->config, "-allphone")) {
|
|
if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path)
|
|
|| ps_set_search(ps, PS_DEFAULT_SEARCH))
|
|
return -1;
|
|
}
|
|
|
|
if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) {
|
|
const char *name;
|
|
ngram_model_t *lmset;
|
|
ngram_model_set_iter_t *lmset_it;
|
|
|
|
if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) {
|
|
E_ERROR("Failed to read language model control file: %s\n", path);
|
|
return -1;
|
|
}
|
|
|
|
for(lmset_it = ngram_model_set_iter(lmset);
|
|
lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) {
|
|
ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name);
|
|
E_INFO("adding search %s\n", name);
|
|
if (ps_set_lm(ps, name, lm)) {
|
|
ngram_model_set_iter_free(lmset_it);
|
|
ngram_model_free(lmset);
|
|
return -1;
|
|
}
|
|
}
|
|
ngram_model_free(lmset);
|
|
|
|
name = cmd_ln_str_r(ps->config, "-lmname");
|
|
if (name)
|
|
ps_set_search(ps, name);
|
|
else {
|
|
E_ERROR("No default LM name (-lmname) for `-lmctl'\n");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* Initialize performance timer. */
|
|
ps->perf.name = "decode";
|
|
ptmr_init(&ps->perf);
|
|
|
|
return 0;
|
|
}
|
|
|
|
ps_decoder_t *
|
|
ps_init(cmd_ln_t *config)
|
|
{
|
|
ps_decoder_t *ps;
|
|
|
|
if (!config) {
|
|
E_ERROR("No configuration specified");
|
|
return NULL;
|
|
}
|
|
|
|
ps = ckd_calloc(1, sizeof(*ps));
|
|
ps->refcount = 1;
|
|
if (ps_reinit(ps, config) < 0) {
|
|
ps_free(ps);
|
|
return NULL;
|
|
}
|
|
return ps;
|
|
}
|
|
|
|
arg_t const *
|
|
ps_args(void)
|
|
{
|
|
return ps_args_def;
|
|
}
|
|
|
|
ps_decoder_t *
|
|
ps_retain(ps_decoder_t *ps)
|
|
{
|
|
++ps->refcount;
|
|
return ps;
|
|
}
|
|
|
|
int
|
|
ps_free(ps_decoder_t *ps)
|
|
{
|
|
if (ps == NULL)
|
|
return 0;
|
|
if (--ps->refcount > 0)
|
|
return ps->refcount;
|
|
ps_free_searches(ps);
|
|
dict_free(ps->dict);
|
|
dict2pid_free(ps->d2p);
|
|
acmod_free(ps->acmod);
|
|
logmath_free(ps->lmath);
|
|
cmd_ln_free_r(ps->config);
|
|
ckd_free(ps);
|
|
return 0;
|
|
}
|
|
|
|
cmd_ln_t *
|
|
ps_get_config(ps_decoder_t *ps)
|
|
{
|
|
return ps->config;
|
|
}
|
|
|
|
logmath_t *
|
|
ps_get_logmath(ps_decoder_t *ps)
|
|
{
|
|
return ps->lmath;
|
|
}
|
|
|
|
fe_t *
|
|
ps_get_fe(ps_decoder_t *ps)
|
|
{
|
|
return ps->acmod->fe;
|
|
}
|
|
|
|
feat_t *
|
|
ps_get_feat(ps_decoder_t *ps)
|
|
{
|
|
return ps->acmod->fcb;
|
|
}
|
|
|
|
ps_mllr_t *
|
|
ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr)
|
|
{
|
|
return acmod_update_mllr(ps->acmod, mllr);
|
|
}
|
|
|
|
int
|
|
ps_set_search(ps_decoder_t *ps, const char *name)
|
|
{
|
|
ps_search_t *search;
|
|
|
|
if (ps->acmod->state != ACMOD_ENDED && ps->acmod->state != ACMOD_IDLE) {
|
|
E_ERROR("Cannot change search while decoding, end utterance first\n");
|
|
return -1;
|
|
}
|
|
|
|
if (!(search = ps_find_search(ps, name))) {
|
|
return -1;
|
|
}
|
|
|
|
ps->search = search;
|
|
/* Set pl window depending on the search */
|
|
if (!strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) {
|
|
ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window");
|
|
} else {
|
|
ps->pl_window = 0;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
const char*
|
|
ps_get_search(ps_decoder_t *ps)
|
|
{
|
|
hash_iter_t *search_it;
|
|
const char* name = NULL;
|
|
for (search_it = hash_table_iter(ps->searches); search_it;
|
|
search_it = hash_table_iter_next(search_it)) {
|
|
if (hash_entry_val(search_it->ent) == ps->search) {
|
|
name = hash_entry_key(search_it->ent);
|
|
break;
|
|
}
|
|
}
|
|
return name;
|
|
}
|
|
|
|
int
|
|
ps_unset_search(ps_decoder_t *ps, const char *name)
|
|
{
|
|
ps_search_t *search = hash_table_delete(ps->searches, name);
|
|
if (!search)
|
|
return -1;
|
|
if (ps->search == search)
|
|
ps->search = NULL;
|
|
ps_search_free(search);
|
|
return 0;
|
|
}
|
|
|
|
ps_search_iter_t *
|
|
ps_search_iter(ps_decoder_t *ps)
|
|
{
|
|
return (ps_search_iter_t *)hash_table_iter(ps->searches);
|
|
}
|
|
|
|
ps_search_iter_t *
|
|
ps_search_iter_next(ps_search_iter_t *itor)
|
|
{
|
|
return (ps_search_iter_t *)hash_table_iter_next((hash_iter_t *)itor);
|
|
}
|
|
|
|
const char*
|
|
ps_search_iter_val(ps_search_iter_t *itor)
|
|
{
|
|
return (const char*)(((hash_iter_t *)itor)->ent->key);
|
|
}
|
|
|
|
void
|
|
ps_search_iter_free(ps_search_iter_t *itor)
|
|
{
|
|
hash_table_iter_free((hash_iter_t *)itor);
|
|
}
|
|
|
|
ngram_model_t *
|
|
ps_get_lm(ps_decoder_t *ps, const char *name)
|
|
{
|
|
ps_search_t *search = ps_find_search(ps, name);
|
|
if (search && strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search)))
|
|
return NULL;
|
|
return search ? ((ngram_search_t *) search)->lmset : NULL;
|
|
}
|
|
|
|
fsg_model_t *
|
|
ps_get_fsg(ps_decoder_t *ps, const char *name)
|
|
{
|
|
ps_search_t *search = ps_find_search(ps, name);
|
|
if (search && strcmp(PS_SEARCH_TYPE_FSG, ps_search_type(search)))
|
|
return NULL;
|
|
return search ? ((fsg_search_t *) search)->fsg : NULL;
|
|
}
|
|
|
|
const char*
|
|
ps_get_kws(ps_decoder_t *ps, const char* name)
|
|
{
|
|
ps_search_t *search = ps_find_search(ps, name);
|
|
if (search && strcmp(PS_SEARCH_TYPE_KWS, ps_search_type(search)))
|
|
return NULL;
|
|
return search ? kws_search_get_keywords(search) : NULL;
|
|
}
|
|
|
|
static int
|
|
set_search_internal(ps_decoder_t *ps, ps_search_t *search)
|
|
{
|
|
ps_search_t *old_search;
|
|
|
|
if (!search)
|
|
return -1;
|
|
|
|
search->pls = ps->phone_loop;
|
|
old_search = (ps_search_t *) hash_table_replace(ps->searches, ps_search_name(search), search);
|
|
if (old_search != search)
|
|
ps_search_free(old_search);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
|
|
{
|
|
ps_search_t *search;
|
|
search = ngram_search_init(name, lm, ps->config, ps->acmod, ps->dict, ps->d2p);
|
|
return set_search_internal(ps, search);
|
|
}
|
|
|
|
int
|
|
ps_set_lm_file(ps_decoder_t *ps, const char *name, const char *path)
|
|
{
|
|
ngram_model_t *lm;
|
|
int result;
|
|
|
|
lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath);
|
|
if (!lm)
|
|
return -1;
|
|
|
|
result = ps_set_lm(ps, name, lm);
|
|
ngram_model_free(lm);
|
|
return result;
|
|
}
|
|
|
|
int
|
|
ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
|
|
{
|
|
ps_search_t *search;
|
|
search = allphone_search_init(name, lm, ps->config, ps->acmod, ps->dict, ps->d2p);
|
|
return set_search_internal(ps, search);
|
|
}
|
|
|
|
int
|
|
ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path)
|
|
{
|
|
ngram_model_t *lm;
|
|
int result;
|
|
|
|
lm = NULL;
|
|
if (path)
|
|
lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath);
|
|
result = ps_set_allphone(ps, name, lm);
|
|
if (lm)
|
|
ngram_model_free(lm);
|
|
return result;
|
|
}
|
|
|
|
int
|
|
ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile)
|
|
{
|
|
ps_search_t *search;
|
|
search = kws_search_init(name, NULL, keyfile, ps->config, ps->acmod, ps->dict, ps->d2p);
|
|
return set_search_internal(ps, search);
|
|
}
|
|
|
|
int
|
|
ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase)
|
|
{
|
|
ps_search_t *search;
|
|
search = kws_search_init(name, keyphrase, NULL, ps->config, ps->acmod, ps->dict, ps->d2p);
|
|
return set_search_internal(ps, search);
|
|
}
|
|
|
|
int
|
|
ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg)
|
|
{
|
|
ps_search_t *search;
|
|
search = fsg_search_init(name, fsg, ps->config, ps->acmod, ps->dict, ps->d2p);
|
|
return set_search_internal(ps, search);
|
|
}
|
|
|
|
int
|
|
ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path)
|
|
{
|
|
fsg_model_t *fsg;
|
|
jsgf_rule_t *rule;
|
|
char const *toprule;
|
|
jsgf_t *jsgf = jsgf_parse_file(path, NULL);
|
|
float lw;
|
|
int result;
|
|
|
|
if (!jsgf)
|
|
return -1;
|
|
|
|
rule = NULL;
|
|
/* Take the -toprule if specified. */
|
|
if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) {
|
|
rule = jsgf_get_rule(jsgf, toprule);
|
|
if (rule == NULL) {
|
|
E_ERROR("Start rule %s not found\n", toprule);
|
|
jsgf_grammar_free(jsgf);
|
|
return -1;
|
|
}
|
|
} else {
|
|
rule = jsgf_get_public_rule(jsgf);
|
|
if (rule == NULL) {
|
|
E_ERROR("No public rules found in %s\n", path);
|
|
jsgf_grammar_free(jsgf);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
lw = cmd_ln_float32_r(ps->config, "-lw");
|
|
fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw);
|
|
result = ps_set_fsg(ps, name, fsg);
|
|
fsg_model_free(fsg);
|
|
jsgf_grammar_free(jsgf);
|
|
return result;
|
|
}
|
|
|
|
int
|
|
ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string)
|
|
{
|
|
fsg_model_t *fsg;
|
|
jsgf_rule_t *rule;
|
|
char const *toprule;
|
|
jsgf_t *jsgf = jsgf_parse_string(jsgf_string, NULL);
|
|
float lw;
|
|
int result;
|
|
|
|
if (!jsgf)
|
|
return -1;
|
|
|
|
rule = NULL;
|
|
/* Take the -toprule if specified. */
|
|
if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) {
|
|
rule = jsgf_get_rule(jsgf, toprule);
|
|
if (rule == NULL) {
|
|
E_ERROR("Start rule %s not found\n", toprule);
|
|
return -1;
|
|
}
|
|
} else {
|
|
rule = jsgf_get_public_rule(jsgf);
|
|
if (rule == NULL) {
|
|
E_ERROR("No public rules found in input string\n");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
lw = cmd_ln_float32_r(ps->config, "-lw");
|
|
fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw);
|
|
result = ps_set_fsg(ps, name, fsg);
|
|
fsg_model_free(fsg);
|
|
return result;
|
|
}
|
|
|
|
|
|
int
|
|
ps_load_dict(ps_decoder_t *ps, char const *dictfile,
|
|
char const *fdictfile, char const *format)
|
|
{
|
|
dict2pid_t *d2p;
|
|
dict_t *dict;
|
|
hash_iter_t *search_it;
|
|
cmd_ln_t *newconfig;
|
|
|
|
/* Create a new scratch config to load this dict (so existing one
|
|
* won't be affected if it fails) */
|
|
newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL);
|
|
cmd_ln_set_boolean_r(newconfig, "-dictcase",
|
|
cmd_ln_boolean_r(ps->config, "-dictcase"));
|
|
cmd_ln_set_str_r(newconfig, "-dict", dictfile);
|
|
if (fdictfile)
|
|
cmd_ln_set_str_extra_r(newconfig, "_fdict", fdictfile);
|
|
else
|
|
cmd_ln_set_str_extra_r(newconfig, "_fdict",
|
|
cmd_ln_str_r(ps->config, "_fdict"));
|
|
|
|
/* Try to load it. */
|
|
if ((dict = dict_init(newconfig, ps->acmod->mdef)) == NULL) {
|
|
cmd_ln_free_r(newconfig);
|
|
return -1;
|
|
}
|
|
|
|
/* Reinit the dict2pid. */
|
|
if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) {
|
|
cmd_ln_free_r(newconfig);
|
|
return -1;
|
|
}
|
|
|
|
/* Success! Update the existing config to reflect new dicts and
|
|
* drop everything into place. */
|
|
cmd_ln_free_r(newconfig);
|
|
dict_free(ps->dict);
|
|
ps->dict = dict;
|
|
dict2pid_free(ps->d2p);
|
|
ps->d2p = d2p;
|
|
|
|
/* And tell all searches to reconfigure themselves. */
|
|
for (search_it = hash_table_iter(ps->searches); search_it;
|
|
search_it = hash_table_iter_next(search_it)) {
|
|
if (ps_search_reinit(hash_entry_val(search_it->ent), dict, d2p) < 0) {
|
|
hash_table_iter_free(search_it);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
ps_save_dict(ps_decoder_t *ps, char const *dictfile,
|
|
char const *format)
|
|
{
|
|
return dict_write(ps->dict, dictfile, format);
|
|
}
|
|
|
|
int
|
|
ps_add_word(ps_decoder_t *ps,
|
|
char const *word,
|
|
char const *phones,
|
|
int update)
|
|
{
|
|
int32 wid;
|
|
s3cipid_t *pron;
|
|
hash_iter_t *search_it;
|
|
char **phonestr, *tmp;
|
|
int np, i, rv;
|
|
|
|
/* Parse phones into an array of phone IDs. */
|
|
tmp = ckd_salloc(phones);
|
|
np = str2words(tmp, NULL, 0);
|
|
phonestr = ckd_calloc(np, sizeof(*phonestr));
|
|
str2words(tmp, phonestr, np);
|
|
pron = ckd_calloc(np, sizeof(*pron));
|
|
for (i = 0; i < np; ++i) {
|
|
pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
|
|
if (pron[i] == -1) {
|
|
E_ERROR("Unknown phone %s in phone string %s\n",
|
|
phonestr[i], tmp);
|
|
ckd_free(phonestr);
|
|
ckd_free(tmp);
|
|
ckd_free(pron);
|
|
return -1;
|
|
}
|
|
}
|
|
/* No longer needed. */
|
|
ckd_free(phonestr);
|
|
ckd_free(tmp);
|
|
|
|
/* Add it to the dictionary. */
|
|
if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
|
|
ckd_free(pron);
|
|
return -1;
|
|
}
|
|
/* No longer needed. */
|
|
ckd_free(pron);
|
|
|
|
/* Now we also have to add it to dict2pid. */
|
|
dict2pid_add_word(ps->d2p, wid);
|
|
|
|
/* TODO: we definitely need to refactor this */
|
|
for (search_it = hash_table_iter(ps->searches); search_it;
|
|
search_it = hash_table_iter_next(search_it)) {
|
|
ps_search_t *search = hash_entry_val(search_it->ent);
|
|
if (!strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) {
|
|
ngram_model_t *lmset = ((ngram_search_t *) search)->lmset;
|
|
if (ngram_model_add_word(lmset, word, 1.0) == NGRAM_INVALID_WID) {
|
|
hash_table_iter_free(search_it);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if (update) {
|
|
if ((rv = ps_search_reinit(search, ps->dict, ps->d2p) < 0)) {
|
|
hash_table_iter_free(search_it);
|
|
return rv;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Rebuild the widmap and search tree if requested. */
|
|
return wid;
|
|
}
|
|
|
|
char *
|
|
ps_lookup_word(ps_decoder_t *ps, const char *word)
|
|
{
|
|
s3wid_t wid;
|
|
int32 phlen, j;
|
|
char *phones;
|
|
dict_t *dict = ps->dict;
|
|
|
|
wid = dict_wordid(dict, word);
|
|
if (wid == BAD_S3WID)
|
|
return NULL;
|
|
|
|
for (phlen = j = 0; j < dict_pronlen(dict, wid); ++j)
|
|
phlen += strlen(dict_ciphone_str(dict, wid, j)) + 1;
|
|
phones = ckd_calloc(1, phlen);
|
|
for (j = 0; j < dict_pronlen(dict, wid); ++j) {
|
|
strcat(phones, dict_ciphone_str(dict, wid, j));
|
|
if (j != dict_pronlen(dict, wid) - 1)
|
|
strcat(phones, " ");
|
|
}
|
|
return phones;
|
|
}
|
|
|
|
long
|
|
ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
|
|
long maxsamps)
|
|
{
|
|
int16 *data;
|
|
long total, pos, endpos;
|
|
|
|
ps_start_stream(ps);
|
|
ps_start_utt(ps);
|
|
|
|
/* If this file is seekable or maxsamps is specified, then decode
|
|
* the whole thing at once. */
|
|
if (maxsamps != -1) {
|
|
data = ckd_calloc(maxsamps, sizeof(*data));
|
|
total = fread(data, sizeof(*data), maxsamps, rawfh);
|
|
ps_process_raw(ps, data, total, FALSE, TRUE);
|
|
ckd_free(data);
|
|
} else if ((pos = ftell(rawfh)) >= 0) {
|
|
fseek(rawfh, 0, SEEK_END);
|
|
endpos = ftell(rawfh);
|
|
fseek(rawfh, pos, SEEK_SET);
|
|
maxsamps = endpos - pos;
|
|
|
|
data = ckd_calloc(maxsamps, sizeof(*data));
|
|
total = fread(data, sizeof(*data), maxsamps, rawfh);
|
|
ps_process_raw(ps, data, total, FALSE, TRUE);
|
|
ckd_free(data);
|
|
} else {
|
|
/* Otherwise decode it in a stream. */
|
|
total = 0;
|
|
while (!feof(rawfh)) {
|
|
int16 data[256];
|
|
size_t nread;
|
|
|
|
nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh);
|
|
ps_process_raw(ps, data, nread, FALSE, FALSE);
|
|
total += nread;
|
|
}
|
|
}
|
|
ps_end_utt(ps);
|
|
return total;
|
|
}
|
|
|
|
int
|
|
ps_start_stream(ps_decoder_t *ps)
|
|
{
|
|
acmod_start_stream(ps->acmod);
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
ps_start_utt(ps_decoder_t *ps)
|
|
{
|
|
int rv;
|
|
char uttid[16];
|
|
|
|
if (ps->acmod->state == ACMOD_STARTED || ps->acmod->state == ACMOD_PROCESSING) {
|
|
E_ERROR("Utterance already started\n");
|
|
return -1;
|
|
}
|
|
|
|
if (ps->search == NULL) {
|
|
E_ERROR("No search module is selected, did you forget to "
|
|
"specify a language model or grammar?\n");
|
|
return -1;
|
|
}
|
|
|
|
ptmr_reset(&ps->perf);
|
|
ptmr_start(&ps->perf);
|
|
|
|
sprintf(uttid, "%09u", ps->uttno);
|
|
++ps->uttno;
|
|
|
|
/* Remove any residual word lattice and hypothesis. */
|
|
ps_lattice_free(ps->search->dag);
|
|
ps->search->dag = NULL;
|
|
ps->search->last_link = NULL;
|
|
ps->search->post = 0;
|
|
ckd_free(ps->search->hyp_str);
|
|
ps->search->hyp_str = NULL;
|
|
if ((rv = acmod_start_utt(ps->acmod)) < 0)
|
|
return rv;
|
|
|
|
/* Start logging features and audio if requested. */
|
|
if (ps->mfclogdir) {
|
|
char *logfn = string_join(ps->mfclogdir, "/",
|
|
uttid, ".mfc", NULL);
|
|
FILE *mfcfh;
|
|
E_INFO("Writing MFCC log file: %s\n", logfn);
|
|
if ((mfcfh = fopen(logfn, "wb")) == NULL) {
|
|
E_ERROR_SYSTEM("Failed to open MFCC log file %s", logfn);
|
|
ckd_free(logfn);
|
|
return -1;
|
|
}
|
|
ckd_free(logfn);
|
|
acmod_set_mfcfh(ps->acmod, mfcfh);
|
|
}
|
|
if (ps->rawlogdir) {
|
|
char *logfn = string_join(ps->rawlogdir, "/",
|
|
uttid, ".raw", NULL);
|
|
FILE *rawfh;
|
|
E_INFO("Writing raw audio log file: %s\n", logfn);
|
|
if ((rawfh = fopen(logfn, "wb")) == NULL) {
|
|
E_ERROR_SYSTEM("Failed to open raw audio log file %s", logfn);
|
|
ckd_free(logfn);
|
|
return -1;
|
|
}
|
|
ckd_free(logfn);
|
|
acmod_set_rawfh(ps->acmod, rawfh);
|
|
}
|
|
if (ps->senlogdir) {
|
|
char *logfn = string_join(ps->senlogdir, "/",
|
|
uttid, ".sen", NULL);
|
|
FILE *senfh;
|
|
E_INFO("Writing senone score log file: %s\n", logfn);
|
|
if ((senfh = fopen(logfn, "wb")) == NULL) {
|
|
E_ERROR_SYSTEM("Failed to open senone score log file %s", logfn);
|
|
ckd_free(logfn);
|
|
return -1;
|
|
}
|
|
ckd_free(logfn);
|
|
acmod_set_senfh(ps->acmod, senfh);
|
|
}
|
|
|
|
/* Start auxiliary phone loop search. */
|
|
if (ps->phone_loop)
|
|
ps_search_start(ps->phone_loop);
|
|
|
|
return ps_search_start(ps->search);
|
|
}
|
|
|
|
static int
|
|
ps_search_forward(ps_decoder_t *ps)
|
|
{
|
|
int nfr;
|
|
|
|
nfr = 0;
|
|
while (ps->acmod->n_feat_frame > 0) {
|
|
int k;
|
|
if (ps->pl_window > 0)
|
|
if ((k = ps_search_step(ps->phone_loop, ps->acmod->output_frame)) < 0)
|
|
return k;
|
|
if (ps->acmod->output_frame >= ps->pl_window)
|
|
if ((k = ps_search_step(ps->search,
|
|
ps->acmod->output_frame - ps->pl_window)) < 0)
|
|
return k;
|
|
acmod_advance(ps->acmod);
|
|
++ps->n_frame;
|
|
++nfr;
|
|
}
|
|
return nfr;
|
|
}
|
|
|
|
int
|
|
ps_decode_senscr(ps_decoder_t *ps, FILE *senfh)
|
|
{
|
|
int nfr, n_searchfr;
|
|
|
|
ps_start_utt(ps);
|
|
n_searchfr = 0;
|
|
acmod_set_insenfh(ps->acmod, senfh);
|
|
while ((nfr = acmod_read_scores(ps->acmod)) > 0) {
|
|
if ((nfr = ps_search_forward(ps)) < 0) {
|
|
ps_end_utt(ps);
|
|
return nfr;
|
|
}
|
|
n_searchfr += nfr;
|
|
}
|
|
ps_end_utt(ps);
|
|
acmod_set_insenfh(ps->acmod, NULL);
|
|
|
|
return n_searchfr;
|
|
}
|
|
|
|
int
|
|
ps_process_raw(ps_decoder_t *ps,
|
|
int16 const *data,
|
|
size_t n_samples,
|
|
int no_search,
|
|
int full_utt)
|
|
{
|
|
int n_searchfr = 0;
|
|
|
|
if (ps->acmod->state == ACMOD_IDLE) {
|
|
E_ERROR("Failed to process data, utterance is not started. Use start_utt to start it\n");
|
|
return 0;
|
|
}
|
|
|
|
if (no_search)
|
|
acmod_set_grow(ps->acmod, TRUE);
|
|
|
|
while (n_samples) {
|
|
int nfr;
|
|
|
|
/* Process some data into features. */
|
|
if ((nfr = acmod_process_raw(ps->acmod, &data,
|
|
&n_samples, full_utt)) < 0)
|
|
return nfr;
|
|
|
|
/* Score and search as much data as possible */
|
|
if (no_search)
|
|
continue;
|
|
if ((nfr = ps_search_forward(ps)) < 0)
|
|
return nfr;
|
|
n_searchfr += nfr;
|
|
}
|
|
|
|
return n_searchfr;
|
|
}
|
|
|
|
int
|
|
ps_process_cep(ps_decoder_t *ps,
|
|
mfcc_t **data,
|
|
int32 n_frames,
|
|
int no_search,
|
|
int full_utt)
|
|
{
|
|
int n_searchfr = 0;
|
|
|
|
if (no_search)
|
|
acmod_set_grow(ps->acmod, TRUE);
|
|
|
|
while (n_frames) {
|
|
int nfr;
|
|
|
|
/* Process some data into features. */
|
|
if ((nfr = acmod_process_cep(ps->acmod, &data,
|
|
&n_frames, full_utt)) < 0)
|
|
return nfr;
|
|
|
|
/* Score and search as much data as possible */
|
|
if (no_search)
|
|
continue;
|
|
if ((nfr = ps_search_forward(ps)) < 0)
|
|
return nfr;
|
|
n_searchfr += nfr;
|
|
}
|
|
|
|
return n_searchfr;
|
|
}
|
|
|
|
int
|
|
ps_end_utt(ps_decoder_t *ps)
|
|
{
|
|
int rv, i;
|
|
|
|
if (ps->acmod->state == ACMOD_ENDED || ps->acmod->state == ACMOD_IDLE) {
|
|
E_ERROR("Utterance is not started\n");
|
|
return -1;
|
|
}
|
|
acmod_end_utt(ps->acmod);
|
|
|
|
/* Search any remaining frames. */
|
|
if ((rv = ps_search_forward(ps)) < 0) {
|
|
ptmr_stop(&ps->perf);
|
|
return rv;
|
|
}
|
|
/* Finish phone loop search. */
|
|
if (ps->phone_loop) {
|
|
if ((rv = ps_search_finish(ps->phone_loop)) < 0) {
|
|
ptmr_stop(&ps->perf);
|
|
return rv;
|
|
}
|
|
}
|
|
/* Search any frames remaining in the lookahead window. */
|
|
if (ps->acmod->output_frame >= ps->pl_window) {
|
|
for (i = ps->acmod->output_frame - ps->pl_window;
|
|
i < ps->acmod->output_frame; ++i)
|
|
ps_search_step(ps->search, i);
|
|
}
|
|
/* Finish main search. */
|
|
if ((rv = ps_search_finish(ps->search)) < 0) {
|
|
ptmr_stop(&ps->perf);
|
|
return rv;
|
|
}
|
|
ptmr_stop(&ps->perf);
|
|
|
|
/* Log a backtrace if requested. */
|
|
if (cmd_ln_boolean_r(ps->config, "-backtrace")) {
|
|
const char* hyp;
|
|
ps_seg_t *seg;
|
|
int32 score;
|
|
|
|
hyp = ps_get_hyp(ps, &score);
|
|
|
|
if (hyp != NULL) {
|
|
E_INFO("%s (%d)\n", hyp, score);
|
|
E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
|
|
"word", "start", "end", "pprob", "ascr", "lscr", "lback");
|
|
for (seg = ps_seg_iter(ps); seg;
|
|
seg = ps_seg_next(seg)) {
|
|
char const *word;
|
|
int sf, ef;
|
|
int32 post, lscr, ascr, lback;
|
|
|
|
word = ps_seg_word(seg);
|
|
ps_seg_frames(seg, &sf, &ef);
|
|
post = ps_seg_prob(seg, &ascr, &lscr, &lback);
|
|
E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
|
|
word, sf, ef, logmath_exp(ps_get_logmath(ps), post),
|
|
ascr, lscr, lback);
|
|
}
|
|
}
|
|
}
|
|
return rv;
|
|
}
|
|
|
|
char const *
|
|
ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score)
|
|
{
|
|
char const *hyp;
|
|
|
|
ptmr_start(&ps->perf);
|
|
hyp = ps_search_hyp(ps->search, out_best_score);
|
|
ptmr_stop(&ps->perf);
|
|
return hyp;
|
|
}
|
|
|
|
int32
|
|
ps_get_prob(ps_decoder_t *ps)
|
|
{
|
|
int32 prob;
|
|
|
|
ptmr_start(&ps->perf);
|
|
prob = ps_search_prob(ps->search);
|
|
ptmr_stop(&ps->perf);
|
|
return prob;
|
|
}
|
|
|
|
ps_seg_t *
|
|
ps_seg_iter(ps_decoder_t *ps)
|
|
{
|
|
ps_seg_t *itor;
|
|
|
|
ptmr_start(&ps->perf);
|
|
itor = ps_search_seg_iter(ps->search);
|
|
ptmr_stop(&ps->perf);
|
|
return itor;
|
|
}
|
|
|
|
ps_seg_t *
|
|
ps_seg_next(ps_seg_t *seg)
|
|
{
|
|
return ps_search_seg_next(seg);
|
|
}
|
|
|
|
char const *
|
|
ps_seg_word(ps_seg_t *seg)
|
|
{
|
|
return seg->word;
|
|
}
|
|
|
|
void
|
|
ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
|
|
{
|
|
int uf;
|
|
uf = acmod_stream_offset(seg->search->acmod);
|
|
if (out_sf) *out_sf = seg->sf + uf;
|
|
if (out_ef) *out_ef = seg->ef + uf;
|
|
}
|
|
|
|
int32
|
|
ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
|
|
{
|
|
if (out_ascr) *out_ascr = seg->ascr;
|
|
if (out_lscr) *out_lscr = seg->lscr;
|
|
if (out_lback) *out_lback = seg->lback;
|
|
return seg->prob;
|
|
}
|
|
|
|
void
|
|
ps_seg_free(ps_seg_t *seg)
|
|
{
|
|
ps_search_seg_free(seg);
|
|
}
|
|
|
|
ps_lattice_t *
|
|
ps_get_lattice(ps_decoder_t *ps)
|
|
{
|
|
return ps_search_lattice(ps->search);
|
|
}
|
|
|
|
ps_nbest_t *
|
|
ps_nbest(ps_decoder_t *ps)
|
|
{
|
|
ps_lattice_t *dag;
|
|
ngram_model_t *lmset;
|
|
ps_astar_t *nbest;
|
|
float32 lwf;
|
|
|
|
if (ps->search == NULL)
|
|
return NULL;
|
|
if ((dag = ps_get_lattice(ps)) == NULL)
|
|
return NULL;
|
|
|
|
/* FIXME: This is all quite specific to N-Gram search. Either we
|
|
* should make N-best a method for each search module or it needs
|
|
* to be abstracted to work for N-Gram and FSG. */
|
|
if (0 != strcmp(ps_search_type(ps->search), PS_SEARCH_TYPE_NGRAM)) {
|
|
lmset = NULL;
|
|
lwf = 1.0f;
|
|
} else {
|
|
lmset = ((ngram_search_t *)ps->search)->lmset;
|
|
lwf = ((ngram_search_t *)ps->search)->bestpath_fwdtree_lw_ratio;
|
|
}
|
|
|
|
nbest = ps_astar_start(dag, lmset, lwf, 0, -1, -1, -1);
|
|
|
|
nbest = ps_nbest_next(nbest);
|
|
|
|
return (ps_nbest_t *)nbest;
|
|
}
|
|
|
|
void
|
|
ps_nbest_free(ps_nbest_t *nbest)
|
|
{
|
|
ps_astar_finish(nbest);
|
|
}
|
|
|
|
ps_nbest_t *
|
|
ps_nbest_next(ps_nbest_t *nbest)
|
|
{
|
|
ps_latpath_t *next;
|
|
|
|
next = ps_astar_next(nbest);
|
|
if (next == NULL) {
|
|
ps_nbest_free(nbest);
|
|
return NULL;
|
|
}
|
|
return nbest;
|
|
}
|
|
|
|
char const *
|
|
ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
|
|
{
|
|
assert(nbest != NULL);
|
|
|
|
if (nbest->top == NULL)
|
|
return NULL;
|
|
if (out_score) *out_score = nbest->top->score;
|
|
return ps_astar_hyp(nbest, nbest->top);
|
|
}
|
|
|
|
ps_seg_t *
|
|
ps_nbest_seg(ps_nbest_t *nbest)
|
|
{
|
|
if (nbest->top == NULL)
|
|
return NULL;
|
|
|
|
return ps_astar_seg_iter(nbest, nbest->top, 1.0);
|
|
}
|
|
|
|
int
|
|
ps_get_n_frames(ps_decoder_t *ps)
|
|
{
|
|
return ps->acmod->output_frame + 1;
|
|
}
|
|
|
|
void
|
|
ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech,
|
|
double *out_ncpu, double *out_nwall)
|
|
{
|
|
int32 frate;
|
|
|
|
frate = cmd_ln_int32_r(ps->config, "-frate");
|
|
*out_nspeech = (double)ps->acmod->output_frame / frate;
|
|
*out_ncpu = ps->perf.t_cpu;
|
|
*out_nwall = ps->perf.t_elapsed;
|
|
}
|
|
|
|
void
|
|
ps_get_all_time(ps_decoder_t *ps, double *out_nspeech,
|
|
double *out_ncpu, double *out_nwall)
|
|
{
|
|
int32 frate;
|
|
|
|
frate = cmd_ln_int32_r(ps->config, "-frate");
|
|
*out_nspeech = (double)ps->n_frame / frate;
|
|
*out_ncpu = ps->perf.t_tot_cpu;
|
|
*out_nwall = ps->perf.t_tot_elapsed;
|
|
}
|
|
|
|
uint8
|
|
ps_get_in_speech(ps_decoder_t *ps)
|
|
{
|
|
return fe_get_vad_state(ps->acmod->fe);
|
|
}
|
|
|
|
void
|
|
ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt,
|
|
const char *type,
|
|
const char *name,
|
|
cmd_ln_t *config, acmod_t *acmod, dict_t *dict,
|
|
dict2pid_t *d2p)
|
|
{
|
|
search->vt = vt;
|
|
search->name = ckd_salloc(name);
|
|
search->type = ckd_salloc(type);
|
|
|
|
search->config = config;
|
|
search->acmod = acmod;
|
|
if (d2p)
|
|
search->d2p = dict2pid_retain(d2p);
|
|
else
|
|
search->d2p = NULL;
|
|
if (dict) {
|
|
search->dict = dict_retain(dict);
|
|
search->start_wid = dict_startwid(dict);
|
|
search->finish_wid = dict_finishwid(dict);
|
|
search->silence_wid = dict_silwid(dict);
|
|
search->n_words = dict_size(dict);
|
|
}
|
|
else {
|
|
search->dict = NULL;
|
|
search->start_wid = search->finish_wid = search->silence_wid = -1;
|
|
search->n_words = 0;
|
|
}
|
|
}
|
|
|
|
void
|
|
ps_search_base_free(ps_search_t *search)
|
|
{
|
|
/* FIXME: We will have refcounting on acmod, config, etc, at which
|
|
* point we will free them here too. */
|
|
ckd_free(search->name);
|
|
ckd_free(search->type);
|
|
dict_free(search->dict);
|
|
dict2pid_free(search->d2p);
|
|
ckd_free(search->hyp_str);
|
|
ps_lattice_free(search->dag);
|
|
}
|
|
|
|
void
|
|
ps_search_base_reinit(ps_search_t *search, dict_t *dict,
|
|
dict2pid_t *d2p)
|
|
{
|
|
dict_free(search->dict);
|
|
dict2pid_free(search->d2p);
|
|
/* FIXME: _retain() should just return NULL if passed NULL. */
|
|
if (dict) {
|
|
search->dict = dict_retain(dict);
|
|
search->start_wid = dict_startwid(dict);
|
|
search->finish_wid = dict_finishwid(dict);
|
|
search->silence_wid = dict_silwid(dict);
|
|
search->n_words = dict_size(dict);
|
|
}
|
|
else {
|
|
search->dict = NULL;
|
|
search->start_wid = search->finish_wid = search->silence_wid = -1;
|
|
search->n_words = 0;
|
|
}
|
|
if (d2p)
|
|
search->d2p = dict2pid_retain(d2p);
|
|
else
|
|
search->d2p = NULL;
|
|
}
|
|
|
|
void
|
|
ps_set_rawdata_size(ps_decoder_t *ps, int32 size)
|
|
{
|
|
acmod_set_rawdata_size(ps->acmod, size);
|
|
}
|
|
|
|
void
|
|
ps_get_rawdata(ps_decoder_t *ps, int16 **buffer, int32 *size)
|
|
{
|
|
acmod_get_rawdata(ps->acmod, buffer, size);
|
|
}
|