Refactored audio handling
Now audio clips can be passed around as const references and don't carry state any more.
This commit is contained in:
parent
799f334fa7
commit
26cae93478
|
@ -197,11 +197,10 @@ set(SOURCE_FILES
|
|||
src/phoneExtraction.cpp src/phoneExtraction.h
|
||||
src/platformTools.cpp src/platformTools.h
|
||||
src/tools.cpp src/tools.h
|
||||
src/audio/AudioStream.cpp src/audio/AudioStream.h
|
||||
src/audio/AudioStreamSegment.cpp src/audio/AudioStreamSegment.h
|
||||
src/audio/AudioClip.cpp src/audio/AudioClip.h
|
||||
src/audio/AudioSegment.cpp src/audio/AudioSegment.h
|
||||
src/audio/DCOffset.cpp src/audio/DCOffset.h
|
||||
src/audio/SampleRateConverter.cpp src/audio/SampleRateConverter.h
|
||||
src/audio/UnboundedStream.cpp src/audio/UnboundedStream.h
|
||||
src/audio/voiceActivityDetection.cpp src/audio/voiceActivityDetection.h
|
||||
src/audio/WaveFileReader.cpp src/audio/WaveFileReader.h
|
||||
src/audio/waveFileWriting.cpp src/audio/waveFileWriting.h
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
#include "AudioClip.h"
|
||||
#include <format.h>
|
||||
|
||||
using std::invalid_argument;
|
||||
|
||||
TimeRange AudioClip::getTruncatedRange() const {
|
||||
return TimeRange(0cs, centiseconds(100 * size() / getSampleRate()));
|
||||
}
|
||||
|
||||
class SafeSampleReader {
|
||||
public:
|
||||
SafeSampleReader(SampleReader unsafeRead, AudioClip::size_type size);
|
||||
AudioClip::value_type operator()(AudioClip::size_type index);
|
||||
private:
|
||||
SampleReader unsafeRead;
|
||||
AudioClip::size_type size;
|
||||
AudioClip::size_type lastIndex = -1;
|
||||
AudioClip::value_type lastSample = 0;
|
||||
};
|
||||
|
||||
SafeSampleReader::SafeSampleReader(SampleReader unsafeRead, AudioClip::size_type size) :
|
||||
unsafeRead(unsafeRead),
|
||||
size(size)
|
||||
{}
|
||||
|
||||
inline AudioClip::value_type SafeSampleReader::operator()(AudioClip::size_type index) {
|
||||
if (index < 0) {
|
||||
throw invalid_argument(fmt::format("Cannot read from sample index {}. Index < 0.", index));
|
||||
}
|
||||
if (index >= size) {
|
||||
throw invalid_argument(fmt::format("Cannot read from sample index {}. Clip size is {}.", index, size));
|
||||
}
|
||||
if (index == lastIndex) {
|
||||
return lastSample;
|
||||
}
|
||||
|
||||
lastIndex = index;
|
||||
lastSample = unsafeRead(index);
|
||||
return lastSample;
|
||||
}
|
||||
|
||||
SampleReader AudioClip::createSampleReader() const {
|
||||
return SafeSampleReader(createUnsafeSampleReader(), size());
|
||||
}
|
||||
|
||||
AudioClip::iterator AudioClip::begin() const {
|
||||
return SampleIterator(*this, 0);
|
||||
}
|
||||
|
||||
AudioClip::iterator AudioClip::end() const {
|
||||
return SampleIterator(*this, size());
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioClip> operator|(std::unique_ptr<AudioClip> clip, AudioEffect effect) {
|
||||
return effect(std::move(clip));
|
||||
}
|
||||
|
||||
SampleIterator::SampleIterator() :
|
||||
sampleIndex(0)
|
||||
{}
|
||||
|
||||
SampleIterator::SampleIterator(const AudioClip& audioClip, size_type sampleIndex) :
|
||||
sampleReader([&audioClip] { return audioClip.createSampleReader(); }),
|
||||
sampleIndex(sampleIndex)
|
||||
{}
|
|
@ -0,0 +1,141 @@
|
|||
#pragma once
|
||||
#include <memory>
|
||||
#include "TimeRange.h"
|
||||
#include <functional>
|
||||
#include "Lazy.h"
|
||||
|
||||
class AudioClip;
|
||||
class SampleIterator;
|
||||
|
||||
class AudioClip {
|
||||
public:
|
||||
using value_type = float;
|
||||
using size_type = int64_t;
|
||||
using difference_type = int64_t;
|
||||
using iterator = SampleIterator;
|
||||
using SampleReader = std::function<value_type(size_type)>;
|
||||
|
||||
virtual ~AudioClip() {}
|
||||
virtual std::unique_ptr<AudioClip> clone() const = 0;
|
||||
virtual int getSampleRate() const = 0;
|
||||
virtual size_type size() const = 0;
|
||||
TimeRange getTruncatedRange() const;
|
||||
SampleReader createSampleReader() const;
|
||||
iterator begin() const;
|
||||
iterator end() const;
|
||||
private:
|
||||
virtual SampleReader createUnsafeSampleReader() const = 0;
|
||||
};
|
||||
|
||||
using AudioEffect = std::function<std::unique_ptr<AudioClip>(std::unique_ptr<AudioClip>)>;
|
||||
|
||||
std::unique_ptr<AudioClip> operator|(std::unique_ptr<AudioClip> clip, AudioEffect effect);
|
||||
|
||||
using SampleReader = AudioClip::SampleReader;
|
||||
|
||||
class SampleIterator {
|
||||
public:
|
||||
using value_type = AudioClip::value_type;
|
||||
using size_type = AudioClip::size_type;
|
||||
using difference_type = AudioClip::difference_type;
|
||||
|
||||
SampleIterator();
|
||||
|
||||
size_type getSampleIndex() const;
|
||||
void seek(size_type sampleIndex);
|
||||
value_type operator*() const;
|
||||
value_type operator[](difference_type n) const;
|
||||
|
||||
private:
|
||||
friend AudioClip;
|
||||
SampleIterator(const AudioClip& audioClip, size_type sampleIndex);
|
||||
|
||||
Lazy<SampleReader> sampleReader;
|
||||
size_type sampleIndex;
|
||||
};
|
||||
|
||||
inline SampleIterator::size_type SampleIterator::getSampleIndex() const {
|
||||
return sampleIndex;
|
||||
}
|
||||
|
||||
inline void SampleIterator::seek(size_type sampleIndex) {
|
||||
this->sampleIndex = sampleIndex;
|
||||
}
|
||||
|
||||
inline SampleIterator::value_type SampleIterator::operator*() const {
|
||||
return (*sampleReader)(sampleIndex);
|
||||
}
|
||||
|
||||
inline SampleIterator::value_type SampleIterator::operator[](difference_type n) const {
|
||||
return (*sampleReader)(sampleIndex + n);
|
||||
}
|
||||
|
||||
inline bool operator==(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() == rhs.getSampleIndex();
|
||||
}
|
||||
|
||||
inline bool operator!=(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() != rhs.getSampleIndex();
|
||||
}
|
||||
|
||||
inline bool operator<(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() < rhs.getSampleIndex();
|
||||
}
|
||||
|
||||
inline bool operator>(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() > rhs.getSampleIndex();
|
||||
}
|
||||
|
||||
inline bool operator<=(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() <= rhs.getSampleIndex();
|
||||
}
|
||||
|
||||
inline bool operator>=(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() >= rhs.getSampleIndex();
|
||||
}
|
||||
|
||||
inline SampleIterator& operator+=(SampleIterator& it, SampleIterator::difference_type n) {
|
||||
it.seek(it.getSampleIndex() + n);
|
||||
return it;
|
||||
}
|
||||
|
||||
inline SampleIterator& operator-=(SampleIterator& it, SampleIterator::difference_type n) {
|
||||
it.seek(it.getSampleIndex() - n);
|
||||
return it;
|
||||
}
|
||||
|
||||
inline SampleIterator& operator++(SampleIterator& it) {
|
||||
return operator+=(it, 1);
|
||||
}
|
||||
|
||||
inline SampleIterator operator++(SampleIterator& it, int) {
|
||||
SampleIterator tmp(it);
|
||||
operator++(it);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
inline SampleIterator& operator--(SampleIterator& it) {
|
||||
return operator-=(it, 1);
|
||||
}
|
||||
|
||||
inline SampleIterator operator--(SampleIterator& it, int) {
|
||||
SampleIterator tmp(it);
|
||||
operator--(it);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
inline SampleIterator operator+(const SampleIterator& it, SampleIterator::difference_type n) {
|
||||
SampleIterator result(it);
|
||||
result += n;
|
||||
return result;
|
||||
}
|
||||
|
||||
inline SampleIterator operator-(const SampleIterator& it, SampleIterator::difference_type n) {
|
||||
SampleIterator result(it);
|
||||
result -= n;
|
||||
return result;
|
||||
}
|
||||
|
||||
inline SampleIterator::difference_type operator-(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() - rhs.getSampleIndex();
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
#include "AudioSegment.h"
|
||||
|
||||
using std::unique_ptr;
|
||||
using std::make_unique;
|
||||
|
||||
AudioSegment::AudioSegment(std::unique_ptr<AudioClip> inputClip, const TimeRange& range) :
|
||||
inputClip(std::move(inputClip)),
|
||||
sampleOffset(static_cast<int64_t>(range.getStart().count()) * this->inputClip->getSampleRate() / 100),
|
||||
sampleCount(static_cast<int64_t>(range.getLength().count()) * this->inputClip->getSampleRate() / 100)
|
||||
{
|
||||
if (sampleOffset < 0 || sampleOffset + sampleCount > this->inputClip->size()) {
|
||||
throw std::invalid_argument("Segment extends beyond input clip.");
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<AudioClip> AudioSegment::clone() const {
|
||||
return make_unique<AudioSegment>(*this);
|
||||
}
|
||||
|
||||
SampleReader AudioSegment::createUnsafeSampleReader() const {
|
||||
return [read = inputClip->createSampleReader(), sampleOffset = sampleOffset](size_type index) {
|
||||
return read(index + sampleOffset);
|
||||
};
|
||||
}
|
||||
|
||||
AudioEffect segment(const TimeRange& range) {
|
||||
return [range](unique_ptr<AudioClip> inputClip) {
|
||||
return make_unique<AudioSegment>(std::move(inputClip), range);
|
||||
};
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
#include "AudioClip.h"
|
||||
|
||||
class AudioSegment : public AudioClip {
|
||||
public:
|
||||
AudioSegment(std::unique_ptr<AudioClip> inputClip, const TimeRange& range);
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override;
|
||||
size_type size() const override;
|
||||
|
||||
private:
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
|
||||
std::shared_ptr<AudioClip> inputClip;
|
||||
size_type sampleOffset, sampleCount;
|
||||
};
|
||||
|
||||
inline int AudioSegment::getSampleRate() const {
|
||||
return inputClip->getSampleRate();
|
||||
}
|
||||
|
||||
inline AudioClip::size_type AudioSegment::size() const {
|
||||
return sampleCount;
|
||||
}
|
||||
|
||||
AudioEffect segment(const TimeRange& range);
|
|
@ -1,9 +0,0 @@
|
|||
#include "AudioStream.h"
|
||||
|
||||
TimeRange AudioStream::getTruncatedRange() const {
|
||||
return TimeRange(0cs, centiseconds(100 * getSampleCount() / getSampleRate()));
|
||||
}
|
||||
|
||||
bool AudioStream::endOfStream() const {
|
||||
return getSampleIndex() >= getSampleCount();
|
||||
}
|
|
@ -1,18 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "TimeRange.h"
|
||||
|
||||
// A mono stream of floating-point samples.
|
||||
class AudioStream {
|
||||
public:
|
||||
virtual ~AudioStream() {}
|
||||
virtual std::unique_ptr<AudioStream> clone(bool reset) const = 0;
|
||||
virtual int getSampleRate() const = 0;
|
||||
virtual int64_t getSampleCount() const = 0;
|
||||
TimeRange getTruncatedRange() const;
|
||||
virtual int64_t getSampleIndex() const = 0;
|
||||
virtual void seek(int64_t sampleIndex) = 0;
|
||||
bool endOfStream() const;
|
||||
virtual float readSample() = 0;
|
||||
};
|
|
@ -1,50 +0,0 @@
|
|||
#include "AudioStreamSegment.h"
|
||||
#include <stdexcept>
|
||||
|
||||
AudioStreamSegment::AudioStreamSegment(std::unique_ptr<AudioStream> audioStream, const TimeRange& range) :
|
||||
audioStream(std::move(audioStream)),
|
||||
sampleOffset(static_cast<int64_t>(range.getStart().count()) * this->audioStream->getSampleRate() / 100),
|
||||
sampleCount(static_cast<int64_t>(range.getLength().count()) * this->audioStream->getSampleRate() / 100)
|
||||
{
|
||||
seek(0);
|
||||
|
||||
if (sampleOffset < 0 || sampleOffset + sampleCount > this->audioStream->getSampleCount()) {
|
||||
throw std::invalid_argument("Segment extends beyond input stream.");
|
||||
}
|
||||
}
|
||||
|
||||
AudioStreamSegment::AudioStreamSegment(const AudioStreamSegment& rhs, bool reset) :
|
||||
audioStream(rhs.audioStream->clone(false)),
|
||||
sampleOffset(rhs.sampleOffset),
|
||||
sampleCount(rhs.sampleCount)
|
||||
{
|
||||
if (reset) seek(0);
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioStream> AudioStreamSegment::clone(bool reset) const {
|
||||
return std::make_unique<AudioStreamSegment>(*this, reset);
|
||||
}
|
||||
|
||||
int AudioStreamSegment::getSampleRate() const {
|
||||
return audioStream->getSampleRate();
|
||||
}
|
||||
|
||||
int64_t AudioStreamSegment::getSampleCount() const {
|
||||
return sampleCount;
|
||||
}
|
||||
|
||||
int64_t AudioStreamSegment::getSampleIndex() const {
|
||||
return audioStream->getSampleIndex() - sampleOffset;
|
||||
}
|
||||
|
||||
void AudioStreamSegment::seek(int64_t sampleIndex) {
|
||||
audioStream->seek(sampleIndex + sampleOffset);
|
||||
}
|
||||
|
||||
float AudioStreamSegment::readSample() {
|
||||
return audioStream->readSample();
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioStream> createSegment(std::unique_ptr<AudioStream> audioStream, const TimeRange& range) {
|
||||
return std::make_unique<AudioStreamSegment>(std::move(audioStream), range);
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
#pragma once
|
||||
#include <audio/AudioStream.h>
|
||||
#include <TimeRange.h>
|
||||
|
||||
class AudioStreamSegment : public AudioStream {
|
||||
public:
|
||||
AudioStreamSegment(std::unique_ptr<AudioStream> audioStream, const TimeRange& range);
|
||||
AudioStreamSegment(const AudioStreamSegment& rhs, bool reset);
|
||||
std::unique_ptr<AudioStream> clone(bool reset) const override;
|
||||
int getSampleRate() const override;
|
||||
int64_t getSampleCount() const override;
|
||||
int64_t getSampleIndex() const override;
|
||||
void seek(int64_t sampleIndex) override;
|
||||
float readSample() override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<AudioStream> audioStream;
|
||||
const int64_t sampleOffset, sampleCount;
|
||||
};
|
||||
|
||||
std::unique_ptr<AudioStream> createSegment(std::unique_ptr<AudioStream> audioStream, const TimeRange& range);
|
|
@ -1,73 +1,46 @@
|
|||
#include "DCOffset.h"
|
||||
#include <gsl_util.h>
|
||||
#include <cmath>
|
||||
|
||||
DCOffset::DCOffset(std::unique_ptr<AudioStream> inputStream, float offset) :
|
||||
inputStream(std::move(inputStream)),
|
||||
using std::unique_ptr;
|
||||
using std::make_unique;
|
||||
|
||||
DCOffset::DCOffset(unique_ptr<AudioClip> inputClip, float offset) :
|
||||
inputClip(std::move(inputClip)),
|
||||
offset(offset),
|
||||
factor(1 / (1 + std::abs(offset)))
|
||||
{}
|
||||
|
||||
DCOffset::DCOffset(const DCOffset& rhs, bool reset) :
|
||||
inputStream(rhs.inputStream->clone(reset)),
|
||||
offset(rhs.offset),
|
||||
factor(rhs.factor)
|
||||
{}
|
||||
|
||||
std::unique_ptr<AudioStream> DCOffset::clone(bool reset) const {
|
||||
return std::make_unique<DCOffset>(*this, reset);
|
||||
unique_ptr<AudioClip> DCOffset::clone() const {
|
||||
return make_unique<DCOffset>(*this);
|
||||
}
|
||||
|
||||
int DCOffset::getSampleRate() const {
|
||||
return inputStream->getSampleRate();
|
||||
SampleReader DCOffset::createUnsafeSampleReader() const {
|
||||
return [read = inputClip->createSampleReader(), factor = factor, offset = offset](size_type index) {
|
||||
float sample = read(index);
|
||||
return sample * factor + offset;
|
||||
};
|
||||
}
|
||||
|
||||
int64_t DCOffset::getSampleCount() const {
|
||||
return inputStream->getSampleCount();
|
||||
}
|
||||
|
||||
int64_t DCOffset::getSampleIndex() const {
|
||||
return inputStream->getSampleIndex();
|
||||
}
|
||||
|
||||
void DCOffset::seek(int64_t sampleIndex) {
|
||||
inputStream->seek(sampleIndex);
|
||||
}
|
||||
|
||||
float DCOffset::readSample() {
|
||||
float sample = inputStream->readSample();
|
||||
return sample * factor + offset;
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioStream> addDCOffset(std::unique_ptr<AudioStream> audioStream, float offset, float epsilon) {
|
||||
if (std::abs(offset) < epsilon) return audioStream;
|
||||
return std::make_unique<DCOffset>(std::move(audioStream), offset);
|
||||
}
|
||||
|
||||
float getDCOffset(AudioStream& audioStream) {
|
||||
float getDCOffset(const AudioClip& audioClip) {
|
||||
int flatMeanSampleCount, fadingMeanSampleCount;
|
||||
int sampleRate = audioStream.getSampleRate();
|
||||
if (audioStream.getSampleCount() > 4 * sampleRate) {
|
||||
int sampleRate = audioClip.getSampleRate();
|
||||
if (audioClip.size() > 4 * sampleRate) {
|
||||
// Long audio file. Average over the first 3 seconds, then fade out over the 4th.
|
||||
flatMeanSampleCount = 3 * sampleRate;
|
||||
fadingMeanSampleCount = 1 * sampleRate;
|
||||
} else {
|
||||
// Short audio file. Average over the entire length.
|
||||
flatMeanSampleCount = static_cast<int>(audioStream.getSampleCount());
|
||||
flatMeanSampleCount = static_cast<int>(audioClip.size());
|
||||
fadingMeanSampleCount = 0;
|
||||
}
|
||||
|
||||
int64_t originalSampleIndex = audioStream.getSampleIndex();
|
||||
audioStream.seek(0);
|
||||
auto restorePosition = gsl::finally([&]() { audioStream.seek(originalSampleIndex); });
|
||||
|
||||
auto read = audioClip.createSampleReader();
|
||||
double sum = 0;
|
||||
for (int i = 0; i < flatMeanSampleCount; i++) {
|
||||
sum += audioStream.readSample();
|
||||
for (int i = 0; i < flatMeanSampleCount; ++i) {
|
||||
sum += read(i);
|
||||
}
|
||||
for (int i = 0; i < fadingMeanSampleCount; i++) {
|
||||
for (int i = 0; i < fadingMeanSampleCount; ++i) {
|
||||
double weight = static_cast<double>(fadingMeanSampleCount - i) / fadingMeanSampleCount;
|
||||
sum += audioStream.readSample() * weight;
|
||||
sum += read(flatMeanSampleCount + i) * weight;
|
||||
}
|
||||
|
||||
double totalWeight = flatMeanSampleCount + (fadingMeanSampleCount + 1) / 2.0;
|
||||
|
@ -75,7 +48,16 @@ float getDCOffset(AudioStream& audioStream) {
|
|||
return static_cast<float>(offset);
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioStream> removeDCOffset(std::unique_ptr<AudioStream> inputStream) {
|
||||
float offset = getDCOffset(*inputStream.get());
|
||||
return addDCOffset(std::move(inputStream), -offset);
|
||||
AudioEffect addDCOffset(float offset, float epsilon) {
|
||||
return [offset, epsilon](unique_ptr<AudioClip> inputClip) -> unique_ptr<AudioClip> {
|
||||
if (std::abs(offset) < epsilon) return std::move(inputClip);
|
||||
return make_unique<DCOffset>(std::move(inputClip), offset);
|
||||
};
|
||||
}
|
||||
|
||||
AudioEffect removeDCOffset(float epsilon) {
|
||||
return [epsilon](unique_ptr<AudioClip> inputClip) {
|
||||
float offset = getDCOffset(*inputClip);
|
||||
return std::move(inputClip) | addDCOffset(-offset, epsilon);
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,26 +1,32 @@
|
|||
#pragma once
|
||||
|
||||
#include "AudioStream.h"
|
||||
#include "AudioClip.h"
|
||||
|
||||
// Applies a constant DC offset to an audio stream and reduces its amplitude
|
||||
// Applies a constant DC offset to an audio clip and reduces its amplitude
|
||||
// to prevent clipping
|
||||
class DCOffset : public AudioStream {
|
||||
class DCOffset : public AudioClip {
|
||||
public:
|
||||
DCOffset(std::unique_ptr<AudioStream> inputStream, float offset);
|
||||
DCOffset(const DCOffset& rhs, bool reset);
|
||||
std::unique_ptr<AudioStream> clone(bool reset) const override;
|
||||
DCOffset(std::unique_ptr<AudioClip> inputClip, float offset);
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override;
|
||||
int64_t getSampleCount() const override;
|
||||
int64_t getSampleIndex() const override;
|
||||
void seek(int64_t sampleIndex) override;
|
||||
float readSample() override;
|
||||
|
||||
size_type size() const override;
|
||||
private:
|
||||
std::unique_ptr<AudioStream> inputStream;
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
|
||||
std::shared_ptr<AudioClip> inputClip;
|
||||
float offset;
|
||||
float factor;
|
||||
};
|
||||
|
||||
std::unique_ptr<AudioStream> addDCOffset(std::unique_ptr<AudioStream> audioStream, float offset, float epsilon = 1.0f / 15000);
|
||||
inline int DCOffset::getSampleRate() const {
|
||||
return inputClip->getSampleRate();
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioStream> removeDCOffset(std::unique_ptr<AudioStream> audioStream);
|
||||
inline AudioClip::size_type DCOffset::size() const {
|
||||
return inputClip->size();
|
||||
}
|
||||
|
||||
float getDCOffset(const AudioClip& audioClip);
|
||||
|
||||
AudioEffect addDCOffset(float offset, float epsilon = 1.0f / 15000);
|
||||
AudioEffect removeDCOffset(float epsilon = 1.0f / 15000);
|
||||
|
|
|
@ -1,105 +1,62 @@
|
|||
#include <cmath>
|
||||
#include "SampleRateConverter.h"
|
||||
#include <stdexcept>
|
||||
#include <algorithm>
|
||||
#include <format.h>
|
||||
|
||||
using std::invalid_argument;
|
||||
using std::unique_ptr;
|
||||
using std::make_unique;
|
||||
|
||||
SampleRateConverter::SampleRateConverter(std::unique_ptr<AudioStream> inputStream, int outputSampleRate) :
|
||||
inputStream(std::move(inputStream)),
|
||||
downscalingFactor(static_cast<double>(this->inputStream->getSampleRate()) / outputSampleRate),
|
||||
SampleRateConverter::SampleRateConverter(unique_ptr<AudioClip> inputClip, int outputSampleRate) :
|
||||
inputClip(std::move(inputClip)),
|
||||
downscalingFactor(static_cast<double>(this->inputClip->getSampleRate()) / outputSampleRate),
|
||||
outputSampleRate(outputSampleRate),
|
||||
outputSampleCount(std::lround(this->inputStream->getSampleCount() / downscalingFactor)),
|
||||
lastInputSample(0),
|
||||
lastInputSampleIndex(-1),
|
||||
nextOutputSampleIndex(0)
|
||||
outputSampleCount(std::lround(this->inputClip->size() / downscalingFactor))
|
||||
{
|
||||
if (outputSampleRate <= 0) {
|
||||
throw invalid_argument("Sample rate must be positive.");
|
||||
}
|
||||
if (this->inputStream->getSampleRate() < outputSampleRate) {
|
||||
throw invalid_argument(fmt::format("Upsampling not supported. Audio sample rate must not be below {}Hz.", outputSampleRate));
|
||||
if (this->inputClip->getSampleRate() < outputSampleRate) {
|
||||
throw invalid_argument(fmt::format("Upsampling not supported. Input sample rate must not be below {}Hz.", outputSampleRate));
|
||||
}
|
||||
}
|
||||
|
||||
SampleRateConverter::SampleRateConverter(const SampleRateConverter& rhs, bool reset) :
|
||||
SampleRateConverter(rhs.inputStream->clone(reset), rhs.outputSampleRate)
|
||||
{
|
||||
nextOutputSampleIndex = reset ? 0 : rhs.nextOutputSampleIndex;
|
||||
unique_ptr<AudioClip> SampleRateConverter::clone() const {
|
||||
return make_unique<SampleRateConverter>(*this);
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioStream> SampleRateConverter::clone(bool reset) const {
|
||||
return std::make_unique<SampleRateConverter>(*this, reset);
|
||||
}
|
||||
|
||||
int SampleRateConverter::getSampleRate() const {
|
||||
return outputSampleRate;
|
||||
}
|
||||
|
||||
int64_t SampleRateConverter::getSampleCount() const {
|
||||
return outputSampleCount;
|
||||
}
|
||||
|
||||
int64_t SampleRateConverter::getSampleIndex() const {
|
||||
return nextOutputSampleIndex;
|
||||
}
|
||||
|
||||
void SampleRateConverter::seek(int64_t sampleIndex) {
|
||||
if (sampleIndex < 0 || sampleIndex >= outputSampleCount) throw std::invalid_argument("sampleIndex out of range.");
|
||||
|
||||
nextOutputSampleIndex = sampleIndex;
|
||||
}
|
||||
|
||||
float SampleRateConverter::readSample() {
|
||||
if (nextOutputSampleIndex >= outputSampleCount) throw std::out_of_range("End of stream.");
|
||||
|
||||
double inputStart = nextOutputSampleIndex * downscalingFactor;
|
||||
double inputEnd = (nextOutputSampleIndex + 1) * downscalingFactor;
|
||||
|
||||
nextOutputSampleIndex++;
|
||||
return mean(inputStart, inputEnd);
|
||||
}
|
||||
|
||||
float SampleRateConverter::mean(double inputStart, double inputEnd) {
|
||||
float mean(double inputStart, double inputEnd, const SampleReader& read) {
|
||||
// Calculate weighted sum...
|
||||
double sum = 0;
|
||||
|
||||
// ... first sample (weight <= 1)
|
||||
int64_t startIndex = static_cast<int64_t>(inputStart);
|
||||
sum += getInputSample(startIndex) * ((startIndex + 1) - inputStart);
|
||||
sum += read(startIndex) * ((startIndex + 1) - inputStart);
|
||||
|
||||
// ... middle samples (weight 1 each)
|
||||
int64_t endIndex = static_cast<int64_t>(inputEnd);
|
||||
for (int64_t index = startIndex + 1; index < endIndex; ++index) {
|
||||
sum += getInputSample(index);
|
||||
sum += read(index);
|
||||
}
|
||||
|
||||
// ... last sample (weight < 1)
|
||||
sum += getInputSample(endIndex) * (inputEnd - endIndex);
|
||||
if (endIndex < inputEnd) {
|
||||
sum += read(endIndex) * (inputEnd - endIndex);
|
||||
}
|
||||
|
||||
return static_cast<float>(sum / (inputEnd - inputStart));
|
||||
}
|
||||
|
||||
float SampleRateConverter::getInputSample(int64_t sampleIndex) {
|
||||
sampleIndex = std::min(sampleIndex, inputStream->getSampleCount() - 1);
|
||||
if (sampleIndex < 0) return 0.0f;
|
||||
|
||||
if (sampleIndex == lastInputSampleIndex) {
|
||||
return lastInputSample;
|
||||
}
|
||||
|
||||
if (sampleIndex != inputStream->getSampleIndex()) {
|
||||
inputStream->seek(sampleIndex);
|
||||
}
|
||||
lastInputSample = inputStream->readSample();
|
||||
lastInputSampleIndex = sampleIndex;
|
||||
return lastInputSample;
|
||||
SampleReader SampleRateConverter::createUnsafeSampleReader() const {
|
||||
return[read = inputClip->createSampleReader(), downscalingFactor = downscalingFactor, size = size()](size_type index) {
|
||||
double inputStart = index * downscalingFactor;
|
||||
double inputEnd = std::min((index + 1) * downscalingFactor, static_cast<double>(size));
|
||||
return mean(inputStart, inputEnd, read);
|
||||
};
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioStream> convertSampleRate(std::unique_ptr<AudioStream> audioStream, int sampleRate) {
|
||||
if (sampleRate == audioStream->getSampleRate()) {
|
||||
return audioStream;
|
||||
}
|
||||
return std::make_unique<SampleRateConverter>(std::move(audioStream), sampleRate);
|
||||
AudioEffect resample(int sampleRate) {
|
||||
return [sampleRate](unique_ptr<AudioClip> inputClip) {
|
||||
return make_unique<SampleRateConverter>(std::move(inputClip), sampleRate);
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,32 +1,29 @@
|
|||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "AudioStream.h"
|
||||
#include "AudioClip.h"
|
||||
|
||||
class SampleRateConverter : public AudioStream {
|
||||
class SampleRateConverter : public AudioClip {
|
||||
public:
|
||||
SampleRateConverter(std::unique_ptr<AudioStream> inputStream, int outputSampleRate);
|
||||
SampleRateConverter(const SampleRateConverter& rhs, bool reset);
|
||||
std::unique_ptr<AudioStream> clone(bool reset) const override;
|
||||
SampleRateConverter(std::unique_ptr<AudioClip> inputClip, int outputSampleRate);
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override;
|
||||
int64_t getSampleCount() const override;
|
||||
int64_t getSampleIndex() const override;
|
||||
void seek(int64_t sampleIndex) override;
|
||||
float readSample() override;
|
||||
size_type size() const override;
|
||||
private:
|
||||
std::unique_ptr<AudioStream> inputStream;
|
||||
double downscalingFactor; // input sample rate / output sample rate
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
|
||||
std::shared_ptr<AudioClip> inputClip;
|
||||
double downscalingFactor; // input sample rate / output sample rate
|
||||
int outputSampleRate;
|
||||
int64_t outputSampleCount;
|
||||
|
||||
float lastInputSample;
|
||||
int64_t lastInputSampleIndex;
|
||||
|
||||
int64_t nextOutputSampleIndex;
|
||||
|
||||
float mean(double start, double end);
|
||||
float getInputSample(int64_t sampleIndex);
|
||||
};
|
||||
|
||||
std::unique_ptr<AudioStream> convertSampleRate(std::unique_ptr<AudioStream> audioStream, int sampleRate);
|
||||
AudioEffect resample(int sampleRate);
|
||||
|
||||
inline int SampleRateConverter::getSampleRate() const {
|
||||
return outputSampleRate;
|
||||
}
|
||||
|
||||
inline AudioClip::size_type SampleRateConverter::size() const {
|
||||
return outputSampleCount;
|
||||
}
|
||||
|
|
|
@ -1,59 +0,0 @@
|
|||
#include "UnboundedStream.h"
|
||||
|
||||
using boost::optional;
|
||||
|
||||
UnboundedStream::UnboundedStream(std::unique_ptr<AudioStream> inputStream) :
|
||||
innerStream(std::move(inputStream)),
|
||||
sampleIndex(innerStream->getSampleIndex()),
|
||||
firstSample(inputStream->getSampleCount() ? optional<float>() : 0.0f),
|
||||
lastSample(inputStream->getSampleCount() ? optional<float>() : 0.0f)
|
||||
{}
|
||||
|
||||
UnboundedStream::UnboundedStream(const UnboundedStream& rhs, bool reset) :
|
||||
innerStream(rhs.innerStream->clone(reset)),
|
||||
sampleIndex(rhs.sampleIndex),
|
||||
firstSample(rhs.firstSample),
|
||||
lastSample(rhs.lastSample)
|
||||
{}
|
||||
|
||||
std::unique_ptr<AudioStream> UnboundedStream::clone(bool reset) const {
|
||||
return std::make_unique<UnboundedStream>(*this, reset);
|
||||
}
|
||||
|
||||
int UnboundedStream::getSampleRate() const {
|
||||
return innerStream->getSampleRate();
|
||||
}
|
||||
|
||||
int64_t UnboundedStream::getSampleCount() const {
|
||||
return innerStream->getSampleCount();
|
||||
}
|
||||
|
||||
int64_t UnboundedStream::getSampleIndex() const {
|
||||
return sampleIndex;
|
||||
}
|
||||
|
||||
void UnboundedStream::seek(int64_t sampleIndex) {
|
||||
this->sampleIndex = sampleIndex;
|
||||
}
|
||||
|
||||
float UnboundedStream::readSample() {
|
||||
if (sampleIndex < 0) {
|
||||
if (!firstSample) {
|
||||
innerStream->seek(0);
|
||||
firstSample = innerStream->readSample();
|
||||
}
|
||||
return firstSample.get();
|
||||
}
|
||||
if (sampleIndex >= innerStream->getSampleCount()) {
|
||||
if (!lastSample) {
|
||||
innerStream->seek(innerStream->getSampleCount() - 1);
|
||||
lastSample = innerStream->readSample();
|
||||
}
|
||||
return lastSample.get();
|
||||
}
|
||||
|
||||
if (sampleIndex != innerStream->getSampleIndex()) {
|
||||
innerStream->seek(sampleIndex);
|
||||
}
|
||||
return innerStream->readSample();
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "AudioStream.h"
|
||||
#include <boost/optional/optional.hpp>
|
||||
|
||||
// Stream wrapper that allows reading before the start and past the end of the input stream.
|
||||
class UnboundedStream : public AudioStream {
|
||||
public:
|
||||
UnboundedStream(std::unique_ptr<AudioStream> inputStream);
|
||||
UnboundedStream(const UnboundedStream& rhs, bool reset);
|
||||
std::unique_ptr<AudioStream> clone(bool reset) const override;
|
||||
int getSampleRate() const override;
|
||||
int64_t getSampleCount() const override;
|
||||
int64_t getSampleIndex() const override;
|
||||
void seek(int64_t sampleIndex) override;
|
||||
float readSample() override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<AudioStream> innerStream;
|
||||
int64_t sampleIndex;
|
||||
boost::optional<float> firstSample, lastSample;
|
||||
};
|
|
@ -7,6 +7,10 @@ using std::runtime_error;
|
|||
using fmt::format;
|
||||
using std::string;
|
||||
using namespace little_endian;
|
||||
using std::unique_ptr;
|
||||
using std::make_unique;
|
||||
using std::make_shared;
|
||||
using boost::filesystem::path;
|
||||
|
||||
#define INT24_MIN (-8388608)
|
||||
#define INT24_MAX 8388607
|
||||
|
@ -25,12 +29,34 @@ enum class Codec {
|
|||
Float = 0x03
|
||||
};
|
||||
|
||||
WaveFileReader::WaveFileReader(boost::filesystem::path filePath) :
|
||||
std::ifstream openFile(path filePath) {
|
||||
try {
|
||||
std::ifstream file;
|
||||
file.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
||||
file.open(filePath.c_str(), std::ios::binary);
|
||||
|
||||
// Error messages on stream exceptions are mostly useless.
|
||||
// Read some dummy data so that we can throw a decent exception in case the file is missing, locked, etc.
|
||||
file.seekg(0, std::ios_base::end);
|
||||
if (file.tellg()) {
|
||||
file.seekg(0);
|
||||
file.get();
|
||||
file.seekg(0);
|
||||
}
|
||||
|
||||
return std::move(file);
|
||||
} catch (const std::ifstream::failure&) {
|
||||
char message[256];
|
||||
strerror_s(message, sizeof message, errno);
|
||||
throw runtime_error(message);
|
||||
}
|
||||
}
|
||||
|
||||
WaveFileReader::WaveFileReader(path filePath) :
|
||||
filePath(filePath),
|
||||
file(),
|
||||
frameIndex(0)
|
||||
formatInfo{}
|
||||
{
|
||||
openFile();
|
||||
auto file = openFile(filePath);
|
||||
|
||||
file.seekg(0, std::ios_base::end);
|
||||
std::streamoff fileSize = file.tellg();
|
||||
|
@ -57,16 +83,15 @@ WaveFileReader::WaveFileReader(boost::filesystem::path filePath) :
|
|||
|
||||
// Read chunks until we reach the data chunk
|
||||
bool reachedDataChunk = false;
|
||||
bytesPerSample = 0;
|
||||
while (!reachedDataChunk && remaining(8)) {
|
||||
uint32_t chunkId = read<uint32_t>(file);
|
||||
int chunkSize = read<uint32_t>(file);
|
||||
switch (chunkId) {
|
||||
case fourcc('f', 'm', 't', ' '): {
|
||||
// Read relevant data
|
||||
Codec codec = (Codec)read<uint16_t>(file);
|
||||
channelCount = read<uint16_t>(file);
|
||||
frameRate = read<uint32_t>(file);
|
||||
Codec codec = static_cast<Codec>(read<uint16_t>(file));
|
||||
formatInfo.channelCount = read<uint16_t>(file);
|
||||
formatInfo.frameRate = read<uint32_t>(file);
|
||||
read<uint32_t>(file); // Bytes per second
|
||||
int frameSize = read<uint16_t>(file);
|
||||
int bitsPerSample = read<uint16_t>(file);
|
||||
|
@ -75,31 +100,32 @@ WaveFileReader::WaveFileReader(boost::filesystem::path filePath) :
|
|||
file.seekg(roundToEven(chunkSize) - 16, file.cur);
|
||||
|
||||
// Determine sample format
|
||||
int bytesPerSample;
|
||||
switch (codec) {
|
||||
case Codec::PCM:
|
||||
// Determine sample size.
|
||||
// According to the WAVE standard, sample sizes that are not multiples of 8 bits
|
||||
// (e.g. 12 bits) can be treated like the next-larger byte size.
|
||||
if (bitsPerSample == 8) {
|
||||
sampleFormat = SampleFormat::UInt8;
|
||||
formatInfo.sampleFormat = SampleFormat::UInt8;
|
||||
bytesPerSample = 1;
|
||||
} else if (bitsPerSample <= 16) {
|
||||
sampleFormat = SampleFormat::Int16;
|
||||
formatInfo.sampleFormat = SampleFormat::Int16;
|
||||
bytesPerSample = 2;
|
||||
} else if (bitsPerSample <= 24) {
|
||||
sampleFormat = SampleFormat::Int24;
|
||||
formatInfo.sampleFormat = SampleFormat::Int24;
|
||||
bytesPerSample = 3;
|
||||
} else {
|
||||
throw runtime_error(
|
||||
format("Unsupported sample format: {}-bit integer samples.", bitsPerSample));
|
||||
}
|
||||
if (bytesPerSample != frameSize / channelCount) {
|
||||
if (bytesPerSample != frameSize / formatInfo.channelCount) {
|
||||
throw runtime_error("Unsupported sample organization.");
|
||||
}
|
||||
break;
|
||||
case Codec::Float:
|
||||
if (bitsPerSample == 32) {
|
||||
sampleFormat = SampleFormat::Float32;
|
||||
formatInfo.sampleFormat = SampleFormat::Float32;
|
||||
bytesPerSample = 4;
|
||||
} else {
|
||||
throw runtime_error(format("Unsupported sample format: {}-bit floating-point samples.", bitsPerSample));
|
||||
|
@ -108,13 +134,13 @@ WaveFileReader::WaveFileReader(boost::filesystem::path filePath) :
|
|||
default:
|
||||
throw runtime_error("Unsupported sample format. Only uncompressed formats are supported.");
|
||||
}
|
||||
formatInfo.bytesPerFrame = bytesPerSample * formatInfo.channelCount;
|
||||
break;
|
||||
}
|
||||
case fourcc('d', 'a', 't', 'a'): {
|
||||
reachedDataChunk = true;
|
||||
dataOffset = file.tellg();
|
||||
int sampleCount = chunkSize / bytesPerSample;
|
||||
frameCount = sampleCount / channelCount;
|
||||
formatInfo.dataOffset = file.tellg();
|
||||
formatInfo.frameCount = chunkSize / formatInfo.bytesPerFrame;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
|
@ -124,75 +150,13 @@ WaveFileReader::WaveFileReader(boost::filesystem::path filePath) :
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!reachedDataChunk) {
|
||||
dataOffset = file.tellg();
|
||||
frameCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
WaveFileReader::WaveFileReader(const WaveFileReader& rhs, bool reset) :
|
||||
filePath(rhs.filePath),
|
||||
file(),
|
||||
bytesPerSample(rhs.bytesPerSample),
|
||||
sampleFormat(rhs.sampleFormat),
|
||||
frameRate(rhs.frameRate),
|
||||
frameCount(rhs.frameCount),
|
||||
channelCount(rhs.channelCount),
|
||||
dataOffset(rhs.dataOffset),
|
||||
frameIndex(-1)
|
||||
{
|
||||
openFile();
|
||||
seek(reset ? 0 : rhs.frameIndex);
|
||||
unique_ptr<AudioClip> WaveFileReader::clone() const {
|
||||
return make_unique<WaveFileReader>(*this);
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioStream> WaveFileReader::clone(bool reset) const {
|
||||
return std::make_unique<WaveFileReader>(*this, reset);
|
||||
}
|
||||
|
||||
void WaveFileReader::openFile() {
|
||||
try {
|
||||
file.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
||||
file.open(filePath, std::ios::binary);
|
||||
|
||||
// Error messages on stream exceptions are mostly useless.
|
||||
// Read some dummy data so that we can throw a decent exception in case the file is missing, locked, etc.
|
||||
file.seekg(0, std::ios_base::end);
|
||||
if (file.tellg()) {
|
||||
file.seekg(0);
|
||||
file.get();
|
||||
file.seekg(0);
|
||||
}
|
||||
} catch (const std::ifstream::failure&) {
|
||||
char message[256];
|
||||
strerror_s(message, sizeof message, errno);
|
||||
throw runtime_error(message);
|
||||
}
|
||||
}
|
||||
|
||||
int WaveFileReader::getSampleRate() const {
|
||||
return frameRate;
|
||||
}
|
||||
|
||||
int64_t WaveFileReader::getSampleCount() const {
|
||||
return frameCount;
|
||||
}
|
||||
|
||||
int64_t WaveFileReader::getSampleIndex() const {
|
||||
return frameIndex;
|
||||
}
|
||||
|
||||
void WaveFileReader::seek(int64_t frameIndex) {
|
||||
if (frameIndex < 0 || frameIndex > frameCount) throw std::invalid_argument("frameIndex out of range.");
|
||||
|
||||
file.seekg(dataOffset + static_cast<std::streamoff>(frameIndex * channelCount * bytesPerSample));
|
||||
this->frameIndex = frameIndex;
|
||||
}
|
||||
|
||||
float WaveFileReader::readSample() {
|
||||
if (frameIndex >= frameCount) throw std::out_of_range("End of stream.");
|
||||
++frameIndex;
|
||||
|
||||
inline AudioClip::value_type readSample(std::ifstream& file, SampleFormat sampleFormat, int channelCount) {
|
||||
float sum = 0;
|
||||
for (int channelIndex = 0; channelIndex < channelCount; channelIndex++) {
|
||||
switch (sampleFormat) {
|
||||
|
@ -221,3 +185,13 @@ float WaveFileReader::readSample() {
|
|||
|
||||
return sum / channelCount;
|
||||
}
|
||||
|
||||
SampleReader WaveFileReader::createUnsafeSampleReader() const {
|
||||
return [formatInfo = formatInfo, file = std::make_shared<std::ifstream>(openFile(filePath)), filePos = std::streampos(0)](size_type index) mutable {
|
||||
std::streampos newFilePos = formatInfo.dataOffset + static_cast<std::streamoff>(index * formatInfo.bytesPerFrame);
|
||||
file->seekg(newFilePos);
|
||||
value_type result = readSample(*file, formatInfo.sampleFormat, formatInfo.channelCount);
|
||||
filePos = newFilePos + static_cast<std::streamoff>(formatInfo.bytesPerFrame);
|
||||
return result;
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <boost/filesystem/path.hpp>
|
||||
#include <boost/filesystem/fstream.hpp>
|
||||
#include "AudioStream.h"
|
||||
#include "AudioClip.h"
|
||||
|
||||
enum class SampleFormat {
|
||||
UInt8,
|
||||
|
@ -11,28 +10,33 @@ enum class SampleFormat {
|
|||
Float32
|
||||
};
|
||||
|
||||
class WaveFileReader : public AudioStream {
|
||||
class WaveFileReader : public AudioClip {
|
||||
public:
|
||||
WaveFileReader(boost::filesystem::path filePath);
|
||||
WaveFileReader(const WaveFileReader& rhs, bool reset);
|
||||
std::unique_ptr<AudioStream> clone(bool reset) const override;
|
||||
int getSampleRate() const override ;
|
||||
int64_t getSampleCount() const override;
|
||||
int64_t getSampleIndex() const override;
|
||||
void seek(int64_t sampleIndex) override;
|
||||
float readSample() override;
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override;
|
||||
size_type size() const override;
|
||||
|
||||
private:
|
||||
void openFile();
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
|
||||
struct WaveFormatInfo {
|
||||
int bytesPerFrame;
|
||||
SampleFormat sampleFormat;
|
||||
int frameRate;
|
||||
int64_t frameCount;
|
||||
int channelCount;
|
||||
std::streampos dataOffset;
|
||||
};
|
||||
|
||||
private:
|
||||
boost::filesystem::path filePath;
|
||||
boost::filesystem::ifstream file;
|
||||
int bytesPerSample;
|
||||
SampleFormat sampleFormat;
|
||||
int frameRate;
|
||||
int64_t frameCount;
|
||||
int channelCount;
|
||||
std::streampos dataOffset;
|
||||
int64_t frameIndex;
|
||||
WaveFormatInfo formatInfo;
|
||||
};
|
||||
|
||||
inline int WaveFileReader::getSampleRate() const {
|
||||
return formatInfo.frameRate;
|
||||
}
|
||||
|
||||
inline AudioClip::size_type WaveFileReader::size() const {
|
||||
return formatInfo.frameCount;
|
||||
}
|
||||
|
|
|
@ -10,31 +10,31 @@ inline int16_t floatSampleToInt16(float sample) {
|
|||
return static_cast<int16_t>(((sample + 1) / 2) * (INT16_MAX - INT16_MIN) + INT16_MIN);
|
||||
}
|
||||
|
||||
void process16bitAudioStream(AudioStream& audioStream, function<void(const vector<int16_t>&)> processBuffer, size_t bufferCapacity, ProgressSink& progressSink) {
|
||||
void process16bitAudioClip(const AudioClip& audioClip, function<void(const vector<int16_t>&)> processBuffer, size_t bufferCapacity, ProgressSink& progressSink) {
|
||||
// Process entire sound stream
|
||||
vector<int16_t> buffer;
|
||||
buffer.reserve(bufferCapacity);
|
||||
int sampleCount = 0;
|
||||
auto it = audioClip.begin();
|
||||
auto end = audioClip.end();
|
||||
do {
|
||||
// Read to buffer
|
||||
buffer.clear();
|
||||
while (buffer.size() < bufferCapacity && !audioStream.endOfStream()) {
|
||||
// Read sample
|
||||
float floatSample = audioStream.readSample();
|
||||
int16_t sample = floatSampleToInt16(floatSample);
|
||||
buffer.push_back(sample);
|
||||
for (; buffer.size() < bufferCapacity && it != end; ++it) {
|
||||
// Read sample to buffer
|
||||
buffer.push_back(floatSampleToInt16(*it));
|
||||
}
|
||||
|
||||
// Process buffer
|
||||
processBuffer(buffer);
|
||||
|
||||
sampleCount += buffer.size();
|
||||
progressSink.reportProgress(static_cast<double>(sampleCount) / audioStream.getSampleCount());
|
||||
progressSink.reportProgress(static_cast<double>(sampleCount) / audioClip.size());
|
||||
} while (buffer.size());
|
||||
}
|
||||
|
||||
void process16bitAudioStream(AudioStream& audioStream, function<void(const vector<int16_t>&)> processBuffer, ProgressSink& progressSink) {
|
||||
void process16bitAudioClip(const AudioClip& audioClip, function<void(const vector<int16_t>&)> processBuffer, ProgressSink& progressSink) {
|
||||
const size_t capacity = 1600; // 0.1 second capacity
|
||||
process16bitAudioStream(audioStream, processBuffer, capacity, progressSink);
|
||||
process16bitAudioClip(audioClip, processBuffer, capacity, progressSink);
|
||||
}
|
||||
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include "audio/AudioStream.h"
|
||||
#include "audio/AudioClip.h"
|
||||
#include "ProgressBar.h"
|
||||
|
||||
void process16bitAudioStream(AudioStream& audioStream, std::function<void(const std::vector<int16_t>&)> processBuffer, size_t bufferCapacity, ProgressSink& progressSink);
|
||||
void process16bitAudioStream(AudioStream& audioStream, std::function<void(const std::vector<int16_t>&)> processBuffer, ProgressSink& progressSink);
|
||||
void process16bitAudioClip(const AudioClip& audioClip, std::function<void(const std::vector<int16_t>&)> processBuffer, size_t bufferCapacity, ProgressSink& progressSink);
|
||||
void process16bitAudioClip(const AudioClip& audioClip, std::function<void(const std::vector<int16_t>&)> processBuffer, ProgressSink& progressSink);
|
|
@ -8,7 +8,7 @@
|
|||
#include "processing.h"
|
||||
#include <gsl_util.h>
|
||||
#include <parallel.h>
|
||||
#include "AudioStreamSegment.h"
|
||||
#include "AudioSegment.h"
|
||||
|
||||
using std::vector;
|
||||
using boost::adaptors::transformed;
|
||||
|
@ -16,7 +16,7 @@ using fmt::format;
|
|||
using std::runtime_error;
|
||||
using std::unique_ptr;
|
||||
|
||||
BoundedTimeline<void> webRtcDetectVoiceActivity(AudioStream& audioStream, ProgressSink& progressSink) {
|
||||
BoundedTimeline<void> webRtcDetectVoiceActivity(const AudioClip& audioClip, ProgressSink& progressSink) {
|
||||
VadInst* vadHandle = WebRtcVad_Create();
|
||||
if (!vadHandle) throw runtime_error("Error creating WebRTC VAD handle.");
|
||||
|
||||
|
@ -30,14 +30,14 @@ BoundedTimeline<void> webRtcDetectVoiceActivity(AudioStream& audioStream, Progre
|
|||
if (error) throw runtime_error("Error setting WebRTC VAD aggressiveness.");
|
||||
|
||||
// Detect activity
|
||||
BoundedTimeline<void> activity(audioStream.getTruncatedRange());
|
||||
BoundedTimeline<void> activity(audioClip.getTruncatedRange());
|
||||
centiseconds time = 0cs;
|
||||
const size_t bufferCapacity = audioStream.getSampleRate() / 100;
|
||||
const size_t bufferCapacity = audioClip.getSampleRate() / 100;
|
||||
auto processBuffer = [&](const vector<int16_t>& buffer) {
|
||||
// WebRTC is picky regarding buffer size
|
||||
if (buffer.size() < bufferCapacity) return;
|
||||
|
||||
int result = WebRtcVad_Process(vadHandle, audioStream.getSampleRate(), buffer.data(), buffer.size()) == 1;
|
||||
int result = WebRtcVad_Process(vadHandle, audioClip.getSampleRate(), buffer.data(), buffer.size()) == 1;
|
||||
if (result == -1) throw runtime_error("Error processing audio buffer using WebRTC VAD.");
|
||||
|
||||
bool isActive = result != 0;
|
||||
|
@ -46,7 +46,7 @@ BoundedTimeline<void> webRtcDetectVoiceActivity(AudioStream& audioStream, Progre
|
|||
}
|
||||
time += 1cs;
|
||||
};
|
||||
process16bitAudioStream(*audioStream.clone(true), processBuffer, bufferCapacity, progressSink);
|
||||
process16bitAudioClip(audioClip, processBuffer, bufferCapacity, progressSink);
|
||||
|
||||
// WebRTC adapts to the audio. This means results may not be correct at the very beginning.
|
||||
// It sometimes returns false activity at the very beginning, mistaking the background noise for speech.
|
||||
|
@ -54,31 +54,31 @@ BoundedTimeline<void> webRtcDetectVoiceActivity(AudioStream& audioStream, Progre
|
|||
if (!activity.empty()) {
|
||||
TimeRange firstActivity = activity.begin()->getTimeRange();
|
||||
activity.clear(firstActivity);
|
||||
unique_ptr<AudioStream> streamStart = createSegment(audioStream.clone(true), TimeRange(0cs, firstActivity.getEnd()));
|
||||
unique_ptr<AudioClip> streamStart = audioClip.clone() | segment(TimeRange(0cs, firstActivity.getEnd()));
|
||||
time = 0cs;
|
||||
process16bitAudioStream(*streamStart, processBuffer, bufferCapacity, progressSink);
|
||||
process16bitAudioClip(*streamStart, processBuffer, bufferCapacity, progressSink);
|
||||
}
|
||||
|
||||
return activity;
|
||||
}
|
||||
|
||||
BoundedTimeline<void> detectVoiceActivity(std::unique_ptr<AudioStream> audioStream, ProgressSink& progressSink) {
|
||||
BoundedTimeline<void> detectVoiceActivity(const AudioClip& inputAudioClip, ProgressSink& progressSink) {
|
||||
// Prepare audio for VAD
|
||||
audioStream = removeDCOffset(convertSampleRate(std::move(audioStream), 16000));
|
||||
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | resample(16000) | removeDCOffset();
|
||||
|
||||
BoundedTimeline<void> activity(audioStream->getTruncatedRange());
|
||||
BoundedTimeline<void> activity(audioClip->getTruncatedRange());
|
||||
std::mutex activityMutex;
|
||||
|
||||
// Split audio into segments and perform parallel VAD
|
||||
int segmentCount = getProcessorCoreCount();
|
||||
centiseconds audioLength = audioStream->getTruncatedRange().getLength();
|
||||
centiseconds audioLength = audioClip->getTruncatedRange().getLength();
|
||||
vector<TimeRange> audioSegments;
|
||||
for (int i = 0; i < segmentCount; ++i) {
|
||||
TimeRange segmentRange = TimeRange(i * audioLength / segmentCount, (i + 1) * audioLength / segmentCount);
|
||||
audioSegments.push_back(segmentRange);
|
||||
}
|
||||
runParallel([&](const TimeRange& segmentRange, ProgressSink& segmentProgressSink) {
|
||||
unique_ptr<AudioStream> audioSegment = createSegment(audioStream->clone(false), segmentRange);
|
||||
unique_ptr<AudioClip> audioSegment = audioClip->clone() | segment(segmentRange);
|
||||
BoundedTimeline<void> activitySegment = webRtcDetectVoiceActivity(*audioSegment, segmentProgressSink);
|
||||
|
||||
std::lock_guard<std::mutex> lock(activityMutex);
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#pragma once
|
||||
#include <memory>
|
||||
#include "AudioStream.h"
|
||||
#include "AudioClip.h"
|
||||
#include <BoundedTimeline.h>
|
||||
#include <ProgressBar.h>
|
||||
|
||||
BoundedTimeline<void> detectVoiceActivity(std::unique_ptr<AudioStream> audioStream, ProgressSink& progressSink);
|
||||
BoundedTimeline<void> detectVoiceActivity(const AudioClip& audioClip, ProgressSink& progressSink);
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
using namespace little_endian;
|
||||
|
||||
void createWaveFile(std::unique_ptr<AudioStream> inputStream, std::string fileName) {
|
||||
void createWaveFile(const AudioClip& audioClip, std::string fileName) {
|
||||
// Open file
|
||||
std::ofstream file;
|
||||
file.exceptions(std::ofstream::failbit | std::ofstream::badbit);
|
||||
|
@ -15,7 +15,7 @@ void createWaveFile(std::unique_ptr<AudioStream> inputStream, std::string fileNa
|
|||
uint32_t formatChunkSize = 16;
|
||||
uint16_t channelCount = 1;
|
||||
uint16_t frameSize = static_cast<uint16_t>(channelCount * sizeof(float));
|
||||
uint32_t dataChunkSize = static_cast<uint32_t>(inputStream->getSampleCount() * frameSize);
|
||||
uint32_t dataChunkSize = static_cast<uint32_t>(audioClip.size() * frameSize);
|
||||
uint32_t riffChunkSize = 4 + (8 + formatChunkSize) + (8 + dataChunkSize);
|
||||
write<uint32_t>(riffChunkSize, file);
|
||||
write<uint32_t>(fourcc('W', 'A', 'V', 'E'), file);
|
||||
|
@ -26,7 +26,7 @@ void createWaveFile(std::unique_ptr<AudioStream> inputStream, std::string fileNa
|
|||
uint16_t codec = 0x03; // 32-bit float
|
||||
write<uint16_t>(codec, file);
|
||||
write<uint16_t>(channelCount, file);
|
||||
uint32_t frameRate = static_cast<uint16_t>(inputStream->getSampleRate());
|
||||
uint32_t frameRate = static_cast<uint16_t>(audioClip.getSampleRate());
|
||||
write<uint32_t>(frameRate, file);
|
||||
uint32_t bytesPerSecond = frameRate * frameSize;
|
||||
write<uint32_t>(bytesPerSecond, file);
|
||||
|
@ -37,8 +37,7 @@ void createWaveFile(std::unique_ptr<AudioStream> inputStream, std::string fileNa
|
|||
// Write data chunk
|
||||
write<uint32_t>(fourcc('d', 'a', 't', 'a'), file);
|
||||
write<uint32_t>(dataChunkSize, file);
|
||||
while (!inputStream->endOfStream()) {
|
||||
float sample = inputStream->readSample();
|
||||
for (float sample : audioClip) {
|
||||
write<float>(sample, file);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "AudioStream.h"
|
||||
#include "AudioClip.h"
|
||||
|
||||
void createWaveFile(std::unique_ptr<AudioStream> inputStream, std::string fileName);
|
||||
void createWaveFile(const AudioClip& audioClip, std::string fileName);
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <boost/filesystem/operations.hpp>
|
||||
#include "stringTools.h"
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
#include <boost/filesystem/fstream.hpp>
|
||||
|
||||
using std::exception;
|
||||
using std::string;
|
||||
|
@ -43,7 +44,7 @@ string getMessage(const exception& e) {
|
|||
return result;
|
||||
}
|
||||
|
||||
unique_ptr<AudioStream> createAudioStream(path filePath) {
|
||||
unique_ptr<AudioClip> createAudioClip(path filePath) {
|
||||
try {
|
||||
return std::make_unique<WaveFileReader>(filePath);
|
||||
} catch (...) {
|
||||
|
@ -144,7 +145,7 @@ int main(int argc, char *argv[]) {
|
|||
{
|
||||
ProgressBar progressBar;
|
||||
phones = detectPhones(
|
||||
createAudioStream(inputFileName.getValue()),
|
||||
*createAudioClip(inputFileName.getValue()),
|
||||
dialogFile.isSet() ? readTextFile(path(dialogFile.getValue())) : boost::optional<u32string>(),
|
||||
progressBar);
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
#include <audio/DCOffset.h>
|
||||
#include <Timeline.h>
|
||||
#include <audio/voiceActivityDetection.h>
|
||||
#include <audio/AudioStreamSegment.h>
|
||||
#include "audio/AudioSegment.h"
|
||||
#include "languageModels.h"
|
||||
#include "tokenization.h"
|
||||
#include "g2p.h"
|
||||
|
@ -95,9 +95,9 @@ void sphinxLogCallback(void* user_data, err_lvl_t errorLevel, const char* format
|
|||
logging::log(logLevel, message);
|
||||
}
|
||||
|
||||
BoundedTimeline<string> recognizeWords(unique_ptr<AudioStream> audioStream, ps_decoder_t& decoder, bool& decoderIsStillUsable, ProgressSink& progressSink) {
|
||||
BoundedTimeline<string> recognizeWords(const AudioClip& inputAudioClip, ps_decoder_t& decoder, bool& decoderIsStillUsable, ProgressSink& progressSink) {
|
||||
// Convert audio stream to the exact format PocketSphinx requires
|
||||
audioStream = convertSampleRate(std::move(audioStream), sphinxSampleRate);
|
||||
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | resample(sphinxSampleRate);
|
||||
|
||||
// Restart timing at 0
|
||||
ps_start_stream(&decoder);
|
||||
|
@ -111,7 +111,7 @@ BoundedTimeline<string> recognizeWords(unique_ptr<AudioStream> audioStream, ps_d
|
|||
int searchedFrameCount = ps_process_raw(&decoder, buffer.data(), buffer.size(), false, false);
|
||||
if (searchedFrameCount < 0) throw runtime_error("Error analyzing raw audio data for word recognition.");
|
||||
};
|
||||
process16bitAudioStream(*audioStream.get(), processBuffer, progressSink);
|
||||
process16bitAudioClip(*audioClip, processBuffer, progressSink);
|
||||
|
||||
// End recognition
|
||||
error = ps_end_utt(&decoder);
|
||||
|
@ -121,7 +121,7 @@ BoundedTimeline<string> recognizeWords(unique_ptr<AudioStream> audioStream, ps_d
|
|||
// As a result, the following utterance will be garbage.
|
||||
// As a workaround, we throw away the decoder in this case.
|
||||
// See https://sourceforge.net/p/cmusphinx/discussion/help/thread/f1dd91c5/#7529
|
||||
BoundedTimeline<string> result(audioStream->getTruncatedRange());
|
||||
BoundedTimeline<string> result(audioClip->getTruncatedRange());
|
||||
bool noWordsRecognized = reinterpret_cast<ngram_search_t*>(decoder.search)->bpidx == 0;
|
||||
if (noWordsRecognized) {
|
||||
decoderIsStillUsable = false;
|
||||
|
@ -147,7 +147,7 @@ s3wid_t getWordId(const string& word, dict_t& dictionary) {
|
|||
|
||||
optional<Timeline<Phone>> getPhoneAlignment(
|
||||
const vector<s3wid_t>& wordIds,
|
||||
unique_ptr<AudioStream> audioStream,
|
||||
const AudioClip& inputAudioClip,
|
||||
ps_decoder_t& decoder,
|
||||
ProgressSink& progressSink)
|
||||
{
|
||||
|
@ -164,7 +164,7 @@ optional<Timeline<Phone>> getPhoneAlignment(
|
|||
if (error) throw runtime_error("Error populating alignment struct.");
|
||||
|
||||
// Convert audio stream to the exact format PocketSphinx requires
|
||||
audioStream = convertSampleRate(std::move(audioStream), sphinxSampleRate);
|
||||
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | resample(sphinxSampleRate);
|
||||
|
||||
// Create search structure
|
||||
acmod_t* acousticModel = decoder.acmod;
|
||||
|
@ -195,7 +195,7 @@ optional<Timeline<Phone>> getPhoneAlignment(
|
|||
}
|
||||
}
|
||||
};
|
||||
process16bitAudioStream(*audioStream.get(), processBuffer, progressSink);
|
||||
process16bitAudioClip(*audioClip, processBuffer, progressSink);
|
||||
|
||||
// End search
|
||||
error = ps_search_finish(search.get());
|
||||
|
@ -288,7 +288,7 @@ lambda_unique_ptr<ps_decoder_t> createDecoder(optional<u32string> dialog) {
|
|||
}
|
||||
|
||||
Timeline<Phone> utteranceToPhones(
|
||||
AudioStream& audioStream,
|
||||
const AudioClip& audioClip,
|
||||
TimeRange utterance,
|
||||
ps_decoder_t& decoder,
|
||||
bool& decoderIsStillUsable,
|
||||
|
@ -298,10 +298,10 @@ Timeline<Phone> utteranceToPhones(
|
|||
ProgressSink& wordRecognitionProgressSink = utteranceProgressMerger.addSink(1.0);
|
||||
ProgressSink& alignmentProgressSink = utteranceProgressMerger.addSink(0.5);
|
||||
|
||||
auto streamSegment = createSegment(audioStream.clone(true), utterance);
|
||||
const unique_ptr<AudioClip> clipSegment = audioClip.clone() | segment(utterance);
|
||||
|
||||
// Get words
|
||||
BoundedTimeline<string> words = recognizeWords(streamSegment->clone(true), decoder, decoderIsStillUsable, wordRecognitionProgressSink);
|
||||
BoundedTimeline<string> words = recognizeWords(*clipSegment, decoder, decoderIsStillUsable, wordRecognitionProgressSink);
|
||||
for (Timed<string> timedWord : words) {
|
||||
timedWord.getTimeRange().shift(utterance.getStart());
|
||||
logging::logTimedEvent("word", timedWord);
|
||||
|
@ -315,8 +315,8 @@ Timeline<Phone> utteranceToPhones(
|
|||
if (wordIds.empty()) return Timeline<Phone>();
|
||||
|
||||
// Align the words' phones with speech
|
||||
Timeline<Phone> segmentPhones = getPhoneAlignment(wordIds, std::move(streamSegment), decoder, alignmentProgressSink)
|
||||
.value_or(ContinuousTimeline<Phone>(streamSegment->getTruncatedRange(), Phone::Unknown));
|
||||
Timeline<Phone> segmentPhones = getPhoneAlignment(wordIds, *clipSegment, decoder, alignmentProgressSink)
|
||||
.value_or(ContinuousTimeline<Phone>(clipSegment->getTruncatedRange(), Phone::Unknown));
|
||||
segmentPhones.shift(utterance.getStart());
|
||||
for (const auto& timedPhone : segmentPhones) {
|
||||
logging::logTimedEvent("phone", timedPhone);
|
||||
|
@ -326,7 +326,7 @@ Timeline<Phone> utteranceToPhones(
|
|||
}
|
||||
|
||||
BoundedTimeline<Phone> detectPhones(
|
||||
unique_ptr<AudioStream> audioStream,
|
||||
const AudioClip& inputAudioClip,
|
||||
optional<u32string> dialog,
|
||||
ProgressSink& progressSink)
|
||||
{
|
||||
|
@ -335,12 +335,12 @@ BoundedTimeline<Phone> detectPhones(
|
|||
ProgressSink& dialogProgressSink = totalProgressMerger.addSink(15);
|
||||
|
||||
// Make sure audio stream has no DC offset
|
||||
audioStream = removeDCOffset(std::move(audioStream));
|
||||
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | removeDCOffset();
|
||||
|
||||
// Split audio into utterances
|
||||
BoundedTimeline<void> utterances;
|
||||
try {
|
||||
utterances = detectVoiceActivity(audioStream->clone(true), voiceActivationProgressSink);
|
||||
utterances = detectVoiceActivity(*audioClip, voiceActivationProgressSink);
|
||||
}
|
||||
catch (...) {
|
||||
std::throw_with_nested(runtime_error("Error detecting segments of speech."));
|
||||
|
@ -369,17 +369,16 @@ BoundedTimeline<Phone> detectPhones(
|
|||
decoderPool.push(std::move(decoder));
|
||||
};
|
||||
|
||||
BoundedTimeline<Phone> result(audioStream->getTruncatedRange());
|
||||
BoundedTimeline<Phone> result(audioClip->getTruncatedRange());
|
||||
std::mutex resultMutex;
|
||||
auto processUtterance = [&](Timed<void> timedUtterance, ProgressSink& utteranceProgressSink) {
|
||||
logging::logTimedEvent("utterance", timedUtterance.getTimeRange(), string(""));
|
||||
|
||||
// Detect phones for utterance
|
||||
auto decoder = getDecoder();
|
||||
auto audioStreamCopy = audioStream->clone(true);
|
||||
bool decoderIsStillUsable = true;
|
||||
Timeline<Phone> phones =
|
||||
utteranceToPhones(*audioStreamCopy, timedUtterance.getTimeRange(), *decoder, decoderIsStillUsable, utteranceProgressSink);
|
||||
utteranceToPhones(*audioClip, timedUtterance.getTimeRange(), *decoder, decoderIsStillUsable, utteranceProgressSink);
|
||||
if (decoderIsStillUsable) {
|
||||
returnDecoder(std::move(decoder));
|
||||
}
|
||||
|
@ -404,7 +403,7 @@ BoundedTimeline<Phone> detectPhones(
|
|||
// Don't use more threads than there are utterances to be processed
|
||||
static_cast<int>(utterances.size()),
|
||||
// Don't waste time creating additional threads (and decoders!) if the recording is short
|
||||
static_cast<int>(duration_cast<std::chrono::seconds>(audioStream->getTruncatedRange().getLength()).count() / 10)
|
||||
static_cast<int>(duration_cast<std::chrono::seconds>(audioClip->getTruncatedRange().getLength()).count() / 10)
|
||||
});
|
||||
logging::debug("Speech recognition -- start");
|
||||
runParallel(processUtterance, utterances, threadCount, dialogProgressSink, getUtteranceProgressWeight);
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "audio/AudioStream.h"
|
||||
#include "audio/AudioClip.h"
|
||||
#include "Phone.h"
|
||||
#include "progressBar.h"
|
||||
#include "BoundedTimeline.h"
|
||||
|
||||
BoundedTimeline<Phone> detectPhones(
|
||||
std::unique_ptr<AudioStream> audioStream,
|
||||
const AudioClip& audioClip,
|
||||
boost::optional<std::u32string> dialog,
|
||||
ProgressSink& progressSink);
|
||||
|
|
Loading…
Reference in New Issue