From b78e418a8f0d2618602ba0931934c941d78be160 Mon Sep 17 00:00:00 2001 From: Daniel Wolf Date: Mon, 7 Mar 2016 21:28:31 +0100 Subject: [PATCH] Refactored audio streams * All streams are now mono (simplifies reasoning about samples) * Streams can be cloned * Streams can be seeked within --- CMakeLists.txt | 2 +- src/audioInput/AudioStream.cpp | 5 + src/audioInput/AudioStream.h | 15 +- src/audioInput/ChannelDownmixer.cpp | 31 ----- src/audioInput/ChannelDownmixer.h | 18 --- src/audioInput/SampleRateConverter.cpp | 76 ++++++----- src/audioInput/SampleRateConverter.h | 17 ++- src/audioInput/WaveFileReader.cpp | 181 +++++++++++++++---------- src/audioInput/WaveFileReader.h | 23 ++-- src/audioInput/waveFileWriting.cpp | 10 +- src/main.cpp | 2 +- src/phoneExtraction.cpp | 31 ++--- src/phoneExtraction.h | 3 +- 13 files changed, 214 insertions(+), 200 deletions(-) create mode 100644 src/audioInput/AudioStream.cpp delete mode 100644 src/audioInput/ChannelDownmixer.cpp delete mode 100644 src/audioInput/ChannelDownmixer.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ca689e7..9a59eb2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,7 +100,7 @@ set(SOURCE_FILES src/phoneExtraction.cpp src/platformTools.cpp src/tools.cpp - src/audioInput/ChannelDownmixer.cpp + src/audioInput/AudioStream.cpp src/audioInput/SampleRateConverter.cpp src/audioInput/WaveFileReader.cpp src/audioInput/waveFileWriting.cpp diff --git a/src/audioInput/AudioStream.cpp b/src/audioInput/AudioStream.cpp new file mode 100644 index 0000000..a5424cc --- /dev/null +++ b/src/audioInput/AudioStream.cpp @@ -0,0 +1,5 @@ +#include "AudioStream.h" + +bool AudioStream::endOfStream() { + return getSampleIndex() >= getSampleCount(); +} diff --git a/src/audioInput/AudioStream.h b/src/audioInput/AudioStream.h index 59efb41..5b3e7ec 100644 --- a/src/audioInput/AudioStream.h +++ b/src/audioInput/AudioStream.h @@ -1,9 +1,16 @@ #pragma once +#include + +// A mono stream of floating-point samples. class AudioStream { public: - virtual int getFrameRate() = 0; - virtual int getFrameCount() = 0; - virtual int getChannelCount() = 0; - virtual bool getNextSample(float &sample) = 0; + virtual ~AudioStream() {} + virtual std::unique_ptr clone(bool reset) = 0; + virtual int getSampleRate() = 0; + virtual int getSampleCount() = 0; + virtual int getSampleIndex() = 0; + virtual void seek(int sampleIndex) = 0; + bool endOfStream(); + virtual float readSample() = 0; }; diff --git a/src/audioInput/ChannelDownmixer.cpp b/src/audioInput/ChannelDownmixer.cpp deleted file mode 100644 index 8f7aca6..0000000 --- a/src/audioInput/ChannelDownmixer.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include "ChannelDownmixer.h" - -ChannelDownmixer::ChannelDownmixer(std::unique_ptr inputStream) : - inputStream(std::move(inputStream)), - inputChannelCount(this->inputStream->getChannelCount()) -{} - -int ChannelDownmixer::getFrameRate() { - return inputStream->getFrameRate(); -} - -int ChannelDownmixer::getFrameCount() { - return inputStream->getFrameCount(); -} - -int ChannelDownmixer::getChannelCount() { - return 1; -} - -bool ChannelDownmixer::getNextSample(float &sample) { - float sum = 0; - for (int channelIndex = 0; channelIndex < inputChannelCount; channelIndex++) { - float currentSample; - if (!inputStream->getNextSample(currentSample)) return false; - - sum += currentSample; - } - - sample = sum / inputChannelCount; - return true; -} diff --git a/src/audioInput/ChannelDownmixer.h b/src/audioInput/ChannelDownmixer.h deleted file mode 100644 index 6b52ce1..0000000 --- a/src/audioInput/ChannelDownmixer.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include "AudioStream.h" -#include - -// Converts a multi-channel audio stream to mono. -class ChannelDownmixer : public AudioStream { -public: - ChannelDownmixer(std::unique_ptr inputStream); - virtual int getFrameRate() override; - virtual int getFrameCount() override; - virtual int getChannelCount() override; - virtual bool getNextSample(float &sample) override; - -private: - std::unique_ptr inputStream; - int inputChannelCount; -}; diff --git a/src/audioInput/SampleRateConverter.cpp b/src/audioInput/SampleRateConverter.cpp index 2b4d566..5f97c87 100644 --- a/src/audioInput/SampleRateConverter.cpp +++ b/src/audioInput/SampleRateConverter.cpp @@ -1,80 +1,94 @@ #include #include "SampleRateConverter.h" +#include +#include using std::runtime_error; SampleRateConverter::SampleRateConverter(std::unique_ptr inputStream, int outputFrameRate) : inputStream(std::move(inputStream)), - downscalingFactor(static_cast(this->inputStream->getFrameRate()) / outputFrameRate), + downscalingFactor(static_cast(this->inputStream->getSampleRate()) / outputFrameRate), outputFrameRate(outputFrameRate), - outputFrameCount(std::lround(this->inputStream->getFrameCount() / downscalingFactor)), + outputFrameCount(std::lround(this->inputStream->getSampleCount() / downscalingFactor)), lastInputSample(0), lastInputSampleIndex(-1), nextOutputSampleIndex(0) { - if (this->inputStream->getChannelCount() != 1) { - throw runtime_error("Only mono input streams are supported."); - } - if (this->inputStream->getFrameRate() < outputFrameRate) { + if (this->inputStream->getSampleRate() < outputFrameRate) { throw runtime_error("Upsampling not supported."); } } -int SampleRateConverter::getFrameRate() { +SampleRateConverter::SampleRateConverter(const SampleRateConverter& rhs, bool reset) : + SampleRateConverter(rhs.inputStream->clone(reset), outputFrameRate) +{ + nextOutputSampleIndex = reset ? 0 : rhs.nextOutputSampleIndex; +} + +std::unique_ptr SampleRateConverter::clone(bool reset) { + return std::make_unique(*this, reset); +} + +int SampleRateConverter::getSampleRate() { return outputFrameRate; } -int SampleRateConverter::getFrameCount() { +int SampleRateConverter::getSampleCount() { return outputFrameCount; } -int SampleRateConverter::getChannelCount() { - return 1; +int SampleRateConverter::getSampleIndex() { + return nextOutputSampleIndex; } -bool SampleRateConverter::getNextSample(float &sample) { - if (nextOutputSampleIndex >= outputFrameCount) return false; +void SampleRateConverter::seek(int sampleIndex) { + if (sampleIndex < 0 || sampleIndex >= outputFrameCount) throw std::invalid_argument("sampleIndex out of range."); - double start = nextOutputSampleIndex * downscalingFactor; - double end = (nextOutputSampleIndex + 1) * downscalingFactor; + nextOutputSampleIndex = sampleIndex; +} + +float SampleRateConverter::readSample() { + if (nextOutputSampleIndex >= outputFrameCount) throw std::out_of_range("End of stream."); + + double inputStart = nextOutputSampleIndex * downscalingFactor; + double inputEnd = (nextOutputSampleIndex + 1) * downscalingFactor; - sample = mean(start, end); nextOutputSampleIndex++; - return true; + return mean(inputStart, inputEnd); } -float SampleRateConverter::mean(double start, double end) { +float SampleRateConverter::mean(double inputStart, double inputEnd) { // Calculate weighted sum... double sum = 0; // ... first sample (weight <= 1) - int startIndex = static_cast(start); - sum += getInputSample(startIndex) * ((startIndex + 1) - start); + int startIndex = static_cast(inputStart); + sum += getInputSample(startIndex) * ((startIndex + 1) - inputStart); // ... middle samples (weight 1 each) - int endIndex = static_cast(end); + int endIndex = static_cast(inputEnd); for (int index = startIndex + 1; index < endIndex; index++) { sum += getInputSample(index); } // ... last sample (weight < 1) - sum += getInputSample(endIndex) * (end - endIndex); + sum += getInputSample(endIndex) * (inputEnd - endIndex); - return static_cast(sum / (end - start)); + return static_cast(sum / (inputEnd - inputStart)); } float SampleRateConverter::getInputSample(int sampleIndex) { + sampleIndex = std::min(sampleIndex, inputStream->getSampleCount() - 1); + if (sampleIndex < 0) return 0.0f; + if (sampleIndex == lastInputSampleIndex) { return lastInputSample; } - if (sampleIndex == lastInputSampleIndex + 1) { - lastInputSampleIndex++; - // Read the next sample. - // If the input stream has no more samples (at the very end), - // we'll just reuse the last sample we have. - inputStream->getNextSample(lastInputSample); - return lastInputSample; - } - throw runtime_error("Can only return the last sample or the one following it."); + if (sampleIndex != inputStream->getSampleIndex()) { + inputStream->seek(sampleIndex); + } + lastInputSample = inputStream->readSample(); + lastInputSampleIndex = sampleIndex; + return lastInputSample; } diff --git a/src/audioInput/SampleRateConverter.h b/src/audioInput/SampleRateConverter.h index 4777fe8..5b9d35a 100644 --- a/src/audioInput/SampleRateConverter.h +++ b/src/audioInput/SampleRateConverter.h @@ -1,22 +1,21 @@ #pragma once #include -#include #include "AudioStream.h" class SampleRateConverter : public AudioStream { public: SampleRateConverter(std::unique_ptr inputStream, int outputFrameRate); - virtual int getFrameRate() override; - virtual int getFrameCount() override; - virtual int getChannelCount() override; - virtual bool getNextSample(float &sample) override; + SampleRateConverter(const SampleRateConverter& rhs, bool reset); + std::unique_ptr clone(bool reset) override; + int getSampleRate() override; + int getSampleCount() override; + int getSampleIndex() override; + void seek(int sampleIndex) override; + float readSample() override; private: - // The stream we're reading from std::unique_ptr inputStream; - - // input frame rate / output frame rate - double downscalingFactor; + double downscalingFactor; // input frame rate / output frame rate int outputFrameRate; int outputFrameCount; diff --git a/src/audioInput/WaveFileReader.cpp b/src/audioInput/WaveFileReader.cpp index fcf13ab..6a02323 100644 --- a/src/audioInput/WaveFileReader.cpp +++ b/src/audioInput/WaveFileReader.cpp @@ -24,10 +24,12 @@ enum class Codec { Float = 0x03 }; -WaveFileReader::WaveFileReader(boost::filesystem::path filePath) { - // Open file - file.exceptions(std::ifstream::failbit | std::ifstream::badbit); - file.open(filePath, std::ios::binary); +WaveFileReader::WaveFileReader(boost::filesystem::path filePath) : + filePath(filePath), + file(), + sampleIndex(0) +{ + openFile(); // Read header uint32_t rootChunkId = read(file); @@ -42,111 +44,148 @@ WaveFileReader::WaveFileReader(boost::filesystem::path filePath) { // Read chunks until we reach the data chunk bool reachedDataChunk = false; - int bytesPerSample = 0; + bytesPerSample = 0; do { uint32_t chunkId = read(file); int chunkSize = read(file); switch (chunkId) { - case fourcc('f', 'm', 't', ' '): { - // Read relevant data - Codec codec = (Codec) read(file); - channelCount = read(file); - frameRate = read(file); - read(file); // Bytes per second - int frameSize = read(file); - int bitsPerSample = read(file); + case fourcc('f', 'm', 't', ' '): { + // Read relevant data + Codec codec = (Codec)read(file); + channelCount = read(file); + frameRate = read(file); + read(file); // Bytes per second + int frameSize = read(file); + int bitsPerSample = read(file); - // We're read 16 bytes so far. Skip the remainder. - file.seekg(roundToEven(chunkSize) - 16, file.cur); + // We've read 16 bytes so far. Skip the remainder. + file.seekg(roundToEven(chunkSize) - 16, file.cur); - // Determine sample format - switch (codec) { - case Codec::PCM: - // Determine sample size. - // According to the WAVE standard, sample sizes that are not multiples of 8 bits - // (e.g. 12 bits) can be treated like the next-larger byte size. - if (bitsPerSample == 8) { - sampleFormat = SampleFormat::UInt8; - bytesPerSample = 1; - } else if (bitsPerSample <= 16) { - sampleFormat = SampleFormat::Int16; - bytesPerSample = 2; - } else if (bitsPerSample <= 24) { - sampleFormat = SampleFormat::Int24; - bytesPerSample = 3; - } else { - throw runtime_error( - format("Unsupported sample format: {}-bit integer samples.", bitsPerSample)); - } - if (bytesPerSample != frameSize / channelCount) { - throw runtime_error("Unsupported sample organization."); - } - break; - case Codec::Float: - if (bitsPerSample == 32) { - sampleFormat = SampleFormat::Float32; - bytesPerSample = 4; - } else { - throw runtime_error(format("Unsupported sample format: {}-bit floating-point samples.", bitsPerSample)); - } - break; - default: - throw runtime_error("Unsupported sample format. Only uncompressed formats are supported."); + // Determine sample format + switch (codec) { + case Codec::PCM: + // Determine sample size. + // According to the WAVE standard, sample sizes that are not multiples of 8 bits + // (e.g. 12 bits) can be treated like the next-larger byte size. + if (bitsPerSample == 8) { + sampleFormat = SampleFormat::UInt8; + bytesPerSample = 1; + } else if (bitsPerSample <= 16) { + sampleFormat = SampleFormat::Int16; + bytesPerSample = 2; + } else if (bitsPerSample <= 24) { + sampleFormat = SampleFormat::Int24; + bytesPerSample = 3; + } else { + throw runtime_error( + format("Unsupported sample format: {}-bit integer samples.", bitsPerSample)); + } + if (bytesPerSample != frameSize / channelCount) { + throw runtime_error("Unsupported sample organization."); } break; - } - case fourcc('d', 'a', 't', 'a'): { - reachedDataChunk = true; - remainingSamples = chunkSize / bytesPerSample; - frameCount = remainingSamples / channelCount; - break; - } - default: { - // Skip unknown chunk - file.seekg(roundToEven(chunkSize), file.cur); + case Codec::Float: + if (bitsPerSample == 32) { + sampleFormat = SampleFormat::Float32; + bytesPerSample = 4; + } else { + throw runtime_error(format("Unsupported sample format: {}-bit floating-point samples.", bitsPerSample)); + } break; + default: + throw runtime_error("Unsupported sample format. Only uncompressed formats are supported."); } + break; + } + case fourcc('d', 'a', 't', 'a'): { + reachedDataChunk = true; + dataOffset = file.tellg(); + sampleCount = chunkSize / bytesPerSample; + frameCount = sampleCount / channelCount; + break; + } + default: { + // Skip unknown chunk + file.seekg(roundToEven(chunkSize), file.cur); + break; + } } } while (!reachedDataChunk); } -int WaveFileReader::getFrameRate() { +WaveFileReader::WaveFileReader(const WaveFileReader& rhs, bool reset) : + filePath(rhs.filePath), + file(), + bytesPerSample(rhs.bytesPerSample), + sampleFormat(rhs.sampleFormat), + frameRate(rhs.frameRate), + frameCount(rhs.frameCount), + channelCount(rhs.channelCount), + sampleCount(rhs.sampleCount), + dataOffset(rhs.dataOffset), + sampleIndex(-1) +{ + openFile(); + seek(reset ? 0 : rhs.sampleIndex); +} + +std::unique_ptr WaveFileReader::clone(bool reset) { + return std::make_unique(*this, reset); +} + +void WaveFileReader::openFile() { + file.exceptions(std::ifstream::failbit | std::ifstream::badbit); + file.open(filePath, std::ios::binary); +} + +int WaveFileReader::getSampleRate() { return frameRate; } -int WaveFileReader::getFrameCount() { +int WaveFileReader::getSampleCount() { return frameCount; } -int WaveFileReader::getChannelCount() { - return channelCount; +int WaveFileReader::getSampleIndex() { + return sampleIndex; } -bool WaveFileReader::getNextSample(float &sample) { - if (remainingSamples == 0) return false; - remainingSamples--; +void WaveFileReader::seek(int sampleIndex) { + if (sampleIndex < 0 || sampleIndex >= sampleCount) throw std::invalid_argument("sampleIndex out of range."); - switch (sampleFormat) { + file.seekg(dataOffset + sampleIndex * channelCount * bytesPerSample); + this->sampleIndex = sampleIndex; +} + +float WaveFileReader::readSample() { + if (sampleIndex + channelCount > sampleCount) throw std::out_of_range("End of stream."); + sampleIndex += channelCount; + + float sum = 0; + for (int channelIndex = 0; channelIndex < channelCount; channelIndex++) { + switch (sampleFormat) { case SampleFormat::UInt8: { uint8_t raw = read(file); - sample = toNormalizedFloat(raw, 0, UINT8_MAX); + sum += toNormalizedFloat(raw, 0, UINT8_MAX); break; } case SampleFormat::Int16: { int16_t raw = read(file); - sample = toNormalizedFloat(raw, INT16_MIN, INT16_MAX); + sum += toNormalizedFloat(raw, INT16_MIN, INT16_MAX); break; } case SampleFormat::Int24: { int raw = read(file); if (raw & 0x800000) raw |= 0xFF000000; // Fix two's complement - sample = toNormalizedFloat(raw, INT24_MIN, INT24_MAX); + sum += toNormalizedFloat(raw, INT24_MIN, INT24_MAX); break; } case SampleFormat::Float32: { - sample = read(file); + sum += read(file); break; } + } } - return true; + + return sum / channelCount; } diff --git a/src/audioInput/WaveFileReader.h b/src/audioInput/WaveFileReader.h index 171defc..0e7ffd6 100644 --- a/src/audioInput/WaveFileReader.h +++ b/src/audioInput/WaveFileReader.h @@ -1,8 +1,5 @@ #pragma once -#include -#include -#include #include #include #include "AudioStream.h" @@ -17,16 +14,26 @@ enum class SampleFormat { class WaveFileReader : public AudioStream { public: WaveFileReader(boost::filesystem::path filePath); - virtual int getFrameRate() override ; - virtual int getFrameCount() override; - virtual int getChannelCount() override; - virtual bool getNextSample(float &sample) override; + WaveFileReader(const WaveFileReader& rhs, bool reset); + std::unique_ptr clone(bool reset) override; + int getSampleRate() override ; + int getSampleCount() override; + int getSampleIndex() override; + void seek(int sampleIndex) override; + float readSample() override; private: + void openFile(); + +private: + boost::filesystem::path filePath; boost::filesystem::ifstream file; + int bytesPerSample; SampleFormat sampleFormat; int frameRate; int frameCount; int channelCount; - int remainingSamples; + int sampleCount; + size_t dataOffset; + int sampleIndex; }; diff --git a/src/audioInput/waveFileWriting.cpp b/src/audioInput/waveFileWriting.cpp index f75625c..b2f014d 100644 --- a/src/audioInput/waveFileWriting.cpp +++ b/src/audioInput/waveFileWriting.cpp @@ -13,9 +13,9 @@ void createWaveFile(std::unique_ptr inputStream, std::string fileNa // Write RIFF chunk write(fourcc('R', 'I', 'F', 'F'), file); uint32_t formatChunkSize = 16; - uint16_t channelCount = static_cast(inputStream->getChannelCount()); + uint16_t channelCount = 1; uint16_t frameSize = static_cast(channelCount * sizeof(float)); - uint32_t dataChunkSize = static_cast(inputStream->getFrameCount() * frameSize); + uint32_t dataChunkSize = static_cast(inputStream->getSampleCount() * frameSize); uint32_t riffChunkSize = 4 + (8 + formatChunkSize) + (8 + dataChunkSize); write(riffChunkSize, file); write(fourcc('W', 'A', 'V', 'E'), file); @@ -26,7 +26,7 @@ void createWaveFile(std::unique_ptr inputStream, std::string fileNa uint16_t codec = 0x03; // 32-bit float write(codec, file); write(channelCount, file); - uint32_t frameRate = static_cast(inputStream->getFrameRate()); + uint32_t frameRate = static_cast(inputStream->getSampleRate()); write(frameRate, file); uint32_t bytesPerSecond = frameRate * frameSize; write(bytesPerSecond, file); @@ -37,8 +37,8 @@ void createWaveFile(std::unique_ptr inputStream, std::string fileNa // Write data chunk write(fourcc('d', 'a', 't', 'a'), file); write(dataChunkSize, file); - float sample; - while (inputStream->getNextSample(sample)) { + while (!inputStream->endOfStream()) { + float sample = inputStream->readSample(); write(sample, file); } } diff --git a/src/main.cpp b/src/main.cpp index 40294eb..8c3557d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -107,7 +107,7 @@ int main(int argc, char *argv[]) { { ProgressBar progressBar; phones = detectPhones( - [&inputFileName]() { return createAudioStream(inputFileName.getValue()); }, + createAudioStream(inputFileName.getValue()), dialog.getValue(), progressBar); } diff --git a/src/phoneExtraction.cpp b/src/phoneExtraction.cpp index 7c99189..667206d 100644 --- a/src/phoneExtraction.cpp +++ b/src/phoneExtraction.cpp @@ -3,7 +3,6 @@ #include #include "phoneExtraction.h" #include "audioInput/SampleRateConverter.h" -#include "audioInput/ChannelDownmixer.h" #include "platformTools.h" #include "tools.h" #include @@ -33,17 +32,12 @@ using std::regex; using std::regex_replace; using std::chrono::duration; -unique_ptr to16kHzMono(unique_ptr stream) { - // Downmix, if required - if (stream->getChannelCount() != 1) { - stream.reset(new ChannelDownmixer(std::move(stream))); - } - +unique_ptr to16kHz(unique_ptr stream) { // Downsample, if required - if (stream->getFrameRate() < 16000) { + if (stream->getSampleRate() < 16000) { throw invalid_argument("Audio sample rate must not be below 16kHz."); } - if (stream->getFrameRate() != 16000) { + if (stream->getSampleRate() != 16000) { stream.reset(new SampleRateConverter(std::move(stream), 16000)); } @@ -88,7 +82,7 @@ int16_t floatSampleToInt16(float sample) { return static_cast(((sample + 1) / 2) * (INT16_MAX - INT16_MIN) + INT16_MIN); } -void processAudioStream(AudioStream& audioStream16kHzMono, function&)> processBuffer, ProgressSink& progressSink) { +void processAudioStream(AudioStream& audioStream16kHz, function&)> processBuffer, ProgressSink& progressSink) { // Process entire sound file vector buffer; const int capacity = 1600; // 0.1 second capacity @@ -97,10 +91,9 @@ void processAudioStream(AudioStream& audioStream16kHzMono, function(sampleCount) / audioStream16kHzMono.getFrameCount()); + progressSink.reportProgress(static_cast(sampleCount) / audioStream16kHz.getSampleCount()); } while (buffer.size()); } @@ -158,7 +151,7 @@ void sphinxLogCallback(void* user_data, err_lvl_t errorLevel, const char* format vector recognizeWords(unique_ptr audioStream, ps_decoder_t& recognizer, ProgressSink& progressSink) { // Convert audio stream to the exact format PocketSphinx requires - audioStream = to16kHzMono(std::move(audioStream)); + audioStream = to16kHz(std::move(audioStream)); // Start recognition int error = ps_start_utt(&recognizer); @@ -243,7 +236,7 @@ map getPhoneAlignment(const vector& wordIds, uniqu if (error) throw runtime_error("Error populating alignment struct."); // Convert audio stream to the exact format PocketSphinx requires - audioStream = to16kHzMono(std::move(audioStream)); + audioStream = to16kHz(std::move(audioStream)); // Create search structure acmod_t* acousticModel = recognizer.acmod; @@ -304,7 +297,7 @@ map getPhoneAlignment(const vector& wordIds, uniqu } map detectPhones( - std::function(void)> createAudioStream, + unique_ptr audioStream, boost::optional dialog, ProgressSink& progressSink) { @@ -329,13 +322,13 @@ map detectPhones( // Get words vector words = dialog ? extractDialogWords(*dialog) - : recognizeWords(createAudioStream(), *recognizer.get(), wordRecognitionProgressSink); + : recognizeWords(audioStream->clone(true), *recognizer.get(), wordRecognitionProgressSink); // Look up words in dictionary vector wordIds = getWordIds(words, *recognizer->dict); // Align the word's phones with speech - map result = getPhoneAlignment(wordIds, createAudioStream(), *recognizer.get(), alignmentProgressSink); + map result = getPhoneAlignment(wordIds, std::move(audioStream), *recognizer.get(), alignmentProgressSink); return result; } catch (...) { diff --git a/src/phoneExtraction.h b/src/phoneExtraction.h index a01f3b2..7d433a2 100644 --- a/src/phoneExtraction.h +++ b/src/phoneExtraction.h @@ -2,7 +2,6 @@ #include #include -#include #include "audioInput/AudioStream.h" #include "Phone.h" #include "centiseconds.h" @@ -10,6 +9,6 @@ #include std::map detectPhones( - std::function(void)> createAudioStream, + std::unique_ptr audioStream, boost::optional dialog, ProgressSink& progressSink);