Fix gaps in phonetic recognition
Randomly, entire utterances yielded no phones with the phonetic recognizer. The cause was a check for empty utterances that made sense for word recognition, but not for phonetic recognition.
This commit is contained in:
parent
357cb0b65e
commit
f3d4cfbb31
|
@ -219,10 +219,19 @@ BoundedTimeline<string> recognizeWords(const vector<int16_t>& audioBuffer, ps_de
|
||||||
BoundedTimeline<string> result(
|
BoundedTimeline<string> result(
|
||||||
TimeRange(0_cs, centiseconds(100 * audioBuffer.size() / sphinxSampleRate))
|
TimeRange(0_cs, centiseconds(100 * audioBuffer.size() / sphinxSampleRate))
|
||||||
);
|
);
|
||||||
|
const bool phonetic = cmd_ln_boolean_r(decoder.config, "-allphone_ci");
|
||||||
|
if (!phonetic) {
|
||||||
|
// If the decoder is in word mode (as opposed to phonetic recognition), it expects each
|
||||||
|
// utterance to contain speech. If it doesn't, ps_seg_word() logs the annoying error
|
||||||
|
// "Couldn't find <s> in first frame".
|
||||||
|
// Not every utterance does contain speech, however. In this case, we exit early to prevent
|
||||||
|
// the log output.
|
||||||
|
// We *don't* to that in phonetic mode because here, the same code would omit valid phones.
|
||||||
const bool noWordsRecognized = reinterpret_cast<ngram_search_t*>(decoder.search)->bpidx == 0;
|
const bool noWordsRecognized = reinterpret_cast<ngram_search_t*>(decoder.search)->bpidx == 0;
|
||||||
if (noWordsRecognized) {
|
if (noWordsRecognized) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Collect words
|
// Collect words
|
||||||
for (ps_seg_t* it = ps_seg_iter(&decoder); it; it = ps_seg_next(it)) {
|
for (ps_seg_t* it = ps_seg_iter(&decoder); it; it = ps_seg_next(it)) {
|
||||||
|
|
Loading…
Reference in New Issue