Fix gaps in phonetic recognition

Randomly, entire utterances yielded no phones with the phonetic recognizer.
The cause was a check for empty utterances that made sense for word
recognition, but not for phonetic recognition.
This commit is contained in:
Daniel Wolf 2019-01-18 22:29:26 +01:00
parent 357cb0b65e
commit f3d4cfbb31
1 changed files with 12 additions and 3 deletions

View File

@ -219,10 +219,19 @@ BoundedTimeline<string> recognizeWords(const vector<int16_t>& audioBuffer, ps_de
BoundedTimeline<string> result( BoundedTimeline<string> result(
TimeRange(0_cs, centiseconds(100 * audioBuffer.size() / sphinxSampleRate)) TimeRange(0_cs, centiseconds(100 * audioBuffer.size() / sphinxSampleRate))
); );
const bool phonetic = cmd_ln_boolean_r(decoder.config, "-allphone_ci");
if (!phonetic) {
// If the decoder is in word mode (as opposed to phonetic recognition), it expects each
// utterance to contain speech. If it doesn't, ps_seg_word() logs the annoying error
// "Couldn't find <s> in first frame".
// Not every utterance does contain speech, however. In this case, we exit early to prevent
// the log output.
// We *don't* to that in phonetic mode because here, the same code would omit valid phones.
const bool noWordsRecognized = reinterpret_cast<ngram_search_t*>(decoder.search)->bpidx == 0; const bool noWordsRecognized = reinterpret_cast<ngram_search_t*>(decoder.search)->bpidx == 0;
if (noWordsRecognized) { if (noWordsRecognized) {
return result; return result;
} }
}
// Collect words // Collect words
for (ps_seg_t* it = ps_seg_iter(&decoder); it; it = ps_seg_next(it)) { for (ps_seg_t* it = ps_seg_iter(&decoder); it; it = ps_seg_next(it)) {