No longer discarding "burnt" decoders
See https://sourceforge.net/p/cmusphinx/discussion/help/thread/f1dd91c5/#1d89/0491/7f0c/60fc
This commit is contained in:
parent
eea1eb381c
commit
600b3429a7
|
@ -19,6 +19,7 @@
|
||||||
#include "audio/processing.h"
|
#include "audio/processing.h"
|
||||||
#include "parallel.h"
|
#include "parallel.h"
|
||||||
#include <boost/version.hpp>
|
#include <boost/version.hpp>
|
||||||
|
#include "ObjectPool.h"
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#include <pocketsphinx.h>
|
#include <pocketsphinx.h>
|
||||||
|
@ -96,7 +97,7 @@ void sphinxLogCallback(void* user_data, err_lvl_t errorLevel, const char* format
|
||||||
logging::log(logLevel, message);
|
logging::log(logLevel, message);
|
||||||
}
|
}
|
||||||
|
|
||||||
BoundedTimeline<string> recognizeWords(const AudioClip& inputAudioClip, ps_decoder_t& decoder, bool& decoderIsStillUsable) {
|
BoundedTimeline<string> recognizeWords(const AudioClip& inputAudioClip, ps_decoder_t& decoder) {
|
||||||
// Convert audio stream to the exact format PocketSphinx requires
|
// Convert audio stream to the exact format PocketSphinx requires
|
||||||
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | resample(sphinxSampleRate);
|
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | resample(sphinxSampleRate);
|
||||||
|
|
||||||
|
@ -125,7 +126,6 @@ BoundedTimeline<string> recognizeWords(const AudioClip& inputAudioClip, ps_decod
|
||||||
BoundedTimeline<string> result(audioClip->getTruncatedRange());
|
BoundedTimeline<string> result(audioClip->getTruncatedRange());
|
||||||
bool noWordsRecognized = reinterpret_cast<ngram_search_t*>(decoder.search)->bpidx == 0;
|
bool noWordsRecognized = reinterpret_cast<ngram_search_t*>(decoder.search)->bpidx == 0;
|
||||||
if (noWordsRecognized) {
|
if (noWordsRecognized) {
|
||||||
decoderIsStillUsable = false;
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -136,11 +136,6 @@ BoundedTimeline<string> recognizeWords(const AudioClip& inputAudioClip, ps_decod
|
||||||
ps_seg_frames(it, &firstFrame, &lastFrame);
|
ps_seg_frames(it, &firstFrame, &lastFrame);
|
||||||
result.set(centiseconds(firstFrame), centiseconds(lastFrame + 1), word);
|
result.set(centiseconds(firstFrame), centiseconds(lastFrame + 1), word);
|
||||||
}
|
}
|
||||||
if (result.size() == 2) {
|
|
||||||
// The two recognized words are "<s>" and "</s>", which is really nothing.
|
|
||||||
// This seems to trigger a similar decoder corruption.
|
|
||||||
decoderIsStillUsable = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -299,7 +294,6 @@ Timeline<Phone> utteranceToPhones(
|
||||||
const AudioClip& audioClip,
|
const AudioClip& audioClip,
|
||||||
TimeRange utterance,
|
TimeRange utterance,
|
||||||
ps_decoder_t& decoder,
|
ps_decoder_t& decoder,
|
||||||
bool& decoderIsStillUsable,
|
|
||||||
ProgressSink& utteranceProgressSink)
|
ProgressSink& utteranceProgressSink)
|
||||||
{
|
{
|
||||||
ProgressMerger utteranceProgressMerger(utteranceProgressSink);
|
ProgressMerger utteranceProgressMerger(utteranceProgressSink);
|
||||||
|
@ -309,7 +303,7 @@ Timeline<Phone> utteranceToPhones(
|
||||||
const unique_ptr<AudioClip> clipSegment = audioClip.clone() | segment(utterance);
|
const unique_ptr<AudioClip> clipSegment = audioClip.clone() | segment(utterance);
|
||||||
|
|
||||||
// Get words
|
// Get words
|
||||||
BoundedTimeline<string> words = recognizeWords(*clipSegment, decoder, decoderIsStillUsable);
|
BoundedTimeline<string> words = recognizeWords(*clipSegment, decoder);
|
||||||
wordRecognitionProgressSink.reportProgress(1.0);
|
wordRecognitionProgressSink.reportProgress(1.0);
|
||||||
for (Timed<string> timedWord : words) {
|
for (Timed<string> timedWord : words) {
|
||||||
timedWord.getTimeRange().shift(utterance.getStart());
|
timedWord.getTimeRange().shift(utterance.getStart());
|
||||||
|
@ -391,23 +385,8 @@ BoundedTimeline<Phone> detectPhones(
|
||||||
err_set_callback(sphinxLogCallback, nullptr);
|
err_set_callback(sphinxLogCallback, nullptr);
|
||||||
|
|
||||||
// Prepare pool of decoders
|
// Prepare pool of decoders
|
||||||
std::stack<lambda_unique_ptr<ps_decoder_t>> decoderPool;
|
ObjectPool<ps_decoder_t, lambda_unique_ptr<ps_decoder_t>> decoderPool(
|
||||||
std::mutex decoderPoolMutex;
|
[&dialog] { return createDecoder(dialog); });
|
||||||
auto getDecoder = [&] {
|
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> lock(decoderPoolMutex);
|
|
||||||
if (!decoderPool.empty()) {
|
|
||||||
auto decoder = std::move(decoderPool.top());
|
|
||||||
decoderPool.pop();
|
|
||||||
return std::move(decoder);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return createDecoder(dialog);
|
|
||||||
};
|
|
||||||
auto returnDecoder = [&](lambda_unique_ptr<ps_decoder_t> decoder) {
|
|
||||||
std::lock_guard<std::mutex> lock(decoderPoolMutex);
|
|
||||||
decoderPool.push(std::move(decoder));
|
|
||||||
};
|
|
||||||
|
|
||||||
BoundedTimeline<Phone> phones(audioClip->getTruncatedRange());
|
BoundedTimeline<Phone> phones(audioClip->getTruncatedRange());
|
||||||
std::mutex resultMutex;
|
std::mutex resultMutex;
|
||||||
|
@ -421,13 +400,9 @@ BoundedTimeline<Phone> detectPhones(
|
||||||
paddedTimeRange.trim(audioClip->getTruncatedRange());
|
paddedTimeRange.trim(audioClip->getTruncatedRange());
|
||||||
|
|
||||||
// Detect phones for utterance
|
// Detect phones for utterance
|
||||||
auto decoder = getDecoder();
|
auto decoder = decoderPool.acquire();
|
||||||
bool decoderIsStillUsable = true;
|
|
||||||
Timeline<Phone> utterancePhones =
|
Timeline<Phone> utterancePhones =
|
||||||
utteranceToPhones(*audioClip, paddedTimeRange, *decoder, decoderIsStillUsable, utteranceProgressSink);
|
utteranceToPhones(*audioClip, paddedTimeRange, *decoder, utteranceProgressSink);
|
||||||
if (decoderIsStillUsable) {
|
|
||||||
returnDecoder(std::move(decoder));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy phones to result timeline
|
// Copy phones to result timeline
|
||||||
std::lock_guard<std::mutex> lock(resultMutex);
|
std::lock_guard<std::mutex> lock(resultMutex);
|
||||||
|
|
Loading…
Reference in New Issue