From a8900f80ec66f71791222c952b856caeb32f69fb Mon Sep 17 00:00:00 2001 From: Daniel Wolf Date: Tue, 15 Mar 2016 19:56:02 +0100 Subject: [PATCH] Removing DC offset from audio Also a bit of refactoring regarding audio processing --- CMakeLists.txt | 2 + src/audio/DCOffset.cpp | 80 +++++++++++++++++++++++++++++++ src/audio/DCOffset.h | 26 ++++++++++ src/audio/SampleRateConverter.cpp | 35 +++++++++----- src/audio/SampleRateConverter.h | 10 ++-- src/audio/UnboundedStream.cpp | 59 +++++++++++++++++++++++ src/audio/UnboundedStream.h | 22 +++++++++ src/phoneExtraction.cpp | 20 +++----- 8 files changed, 225 insertions(+), 29 deletions(-) create mode 100644 src/audio/DCOffset.cpp create mode 100644 src/audio/DCOffset.h create mode 100644 src/audio/UnboundedStream.cpp create mode 100644 src/audio/UnboundedStream.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 107b6f1..79f9266 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,7 +102,9 @@ set(SOURCE_FILES src/platformTools.cpp src/tools.cpp src/audio/AudioStream.cpp + src/audio/DCOffset.cpp src/audio/SampleRateConverter.cpp + src/audio/UnboundedStream.cpp src/audio/WaveFileReader.cpp src/audio/waveFileWriting.cpp src/stringTools.cpp diff --git a/src/audio/DCOffset.cpp b/src/audio/DCOffset.cpp new file mode 100644 index 0000000..6937e1a --- /dev/null +++ b/src/audio/DCOffset.cpp @@ -0,0 +1,80 @@ +#include "DCOffset.h" +#include + +DCOffset::DCOffset(std::unique_ptr inputStream, float offset) : + inputStream(std::move(inputStream)), + offset(offset), + factor(1 / (1 + std::abs(offset))) +{} + +DCOffset::DCOffset(const DCOffset& rhs, bool reset) : + inputStream(rhs.inputStream->clone(reset)), + offset(rhs.offset), + factor(rhs.factor) +{} + +std::unique_ptr DCOffset::clone(bool reset) { + return std::make_unique(*this, reset); +} + +int DCOffset::getSampleRate() { + return inputStream->getSampleRate(); +} + +int DCOffset::getSampleCount() { + return inputStream->getSampleCount(); +} + +int DCOffset::getSampleIndex() { + return inputStream->getSampleIndex(); +} + +void DCOffset::seek(int sampleIndex) { + inputStream->seek(sampleIndex); +} + +float DCOffset::readSample() { + float sample = inputStream->readSample(); + return sample * factor + offset; +} + +std::unique_ptr addDCOffset(std::unique_ptr audioStream, float offset, float epsilon) { + if (std::abs(offset) < epsilon) return audioStream; + return std::make_unique(std::move(audioStream), offset); +} + +float getDCOffset(AudioStream& audioStream) { + int flatMeanSampleCount, fadingMeanSampleCount; + int sampleRate = audioStream.getSampleRate(); + if (audioStream.getSampleCount() > 4 * sampleRate) { + // Long audio file. Average over the first 3 seconds, then fade out over the 4th. + flatMeanSampleCount = 3 * sampleRate; + fadingMeanSampleCount = 1 * sampleRate; + } else { + // Short audio file. Average over the entire length. + flatMeanSampleCount = audioStream.getSampleCount(); + fadingMeanSampleCount = 0; + } + + int originalSampleIndex = audioStream.getSampleIndex(); + audioStream.seek(0); + auto restorePosition = gsl::finally([&]() { audioStream.seek(originalSampleIndex); }); + + double sum = 0; + for (int i = 0; i < flatMeanSampleCount; i++) { + sum += audioStream.readSample(); + } + for (int i = 0; i < fadingMeanSampleCount; i++) { + double weight = static_cast(fadingMeanSampleCount - i) / fadingMeanSampleCount; + sum += audioStream.readSample() * weight; + } + + double totalWeight = flatMeanSampleCount + (fadingMeanSampleCount + 1) / 2.0; + double offset = sum / totalWeight; + return static_cast(offset); +} + +std::unique_ptr removeDCOffset(std::unique_ptr inputStream) { + float offset = getDCOffset(*inputStream.get()); + return addDCOffset(std::move(inputStream), -offset); +} diff --git a/src/audio/DCOffset.h b/src/audio/DCOffset.h new file mode 100644 index 0000000..070fd3d --- /dev/null +++ b/src/audio/DCOffset.h @@ -0,0 +1,26 @@ +#pragma once + +#include "AudioStream.h" + +// Applies a constant DC offset to an audio stream and reduces its amplitude +// to prevent clipping +class DCOffset : public AudioStream { +public: + DCOffset(std::unique_ptr inputStream, float offset); + DCOffset(const DCOffset& rhs, bool reset); + std::unique_ptr clone(bool reset) override; + int getSampleRate() override; + int getSampleCount() override; + int getSampleIndex() override; + void seek(int sampleIndex) override; + float readSample() override; + +private: + std::unique_ptr inputStream; + float offset; + float factor; +}; + +std::unique_ptr addDCOffset(std::unique_ptr audioStream, float offset, float epsilon = 1.0f / 15000); + +std::unique_ptr removeDCOffset(std::unique_ptr audioStream); diff --git a/src/audio/SampleRateConverter.cpp b/src/audio/SampleRateConverter.cpp index 5f97c87..d94a8f0 100644 --- a/src/audio/SampleRateConverter.cpp +++ b/src/audio/SampleRateConverter.cpp @@ -2,25 +2,29 @@ #include "SampleRateConverter.h" #include #include +#include -using std::runtime_error; +using std::invalid_argument; -SampleRateConverter::SampleRateConverter(std::unique_ptr inputStream, int outputFrameRate) : +SampleRateConverter::SampleRateConverter(std::unique_ptr inputStream, int outputSampleRate) : inputStream(std::move(inputStream)), - downscalingFactor(static_cast(this->inputStream->getSampleRate()) / outputFrameRate), - outputFrameRate(outputFrameRate), - outputFrameCount(std::lround(this->inputStream->getSampleCount() / downscalingFactor)), + downscalingFactor(static_cast(this->inputStream->getSampleRate()) / outputSampleRate), + outputSampleRate(outputSampleRate), + outputSampleCount(std::lround(this->inputStream->getSampleCount() / downscalingFactor)), lastInputSample(0), lastInputSampleIndex(-1), nextOutputSampleIndex(0) { - if (this->inputStream->getSampleRate() < outputFrameRate) { - throw runtime_error("Upsampling not supported."); + if (outputSampleRate <= 0) { + throw invalid_argument("Sample rate must be positive."); + } + if (this->inputStream->getSampleRate() < outputSampleRate) { + throw invalid_argument(fmt::format("Upsampling not supported. Audio sample rate must not be below {}Hz.", outputSampleRate)); } } SampleRateConverter::SampleRateConverter(const SampleRateConverter& rhs, bool reset) : - SampleRateConverter(rhs.inputStream->clone(reset), outputFrameRate) + SampleRateConverter(rhs.inputStream->clone(reset), rhs.outputSampleRate) { nextOutputSampleIndex = reset ? 0 : rhs.nextOutputSampleIndex; } @@ -30,11 +34,11 @@ std::unique_ptr SampleRateConverter::clone(bool reset) { } int SampleRateConverter::getSampleRate() { - return outputFrameRate; + return outputSampleRate; } int SampleRateConverter::getSampleCount() { - return outputFrameCount; + return outputSampleCount; } int SampleRateConverter::getSampleIndex() { @@ -42,13 +46,13 @@ int SampleRateConverter::getSampleIndex() { } void SampleRateConverter::seek(int sampleIndex) { - if (sampleIndex < 0 || sampleIndex >= outputFrameCount) throw std::invalid_argument("sampleIndex out of range."); + if (sampleIndex < 0 || sampleIndex >= outputSampleCount) throw std::invalid_argument("sampleIndex out of range."); nextOutputSampleIndex = sampleIndex; } float SampleRateConverter::readSample() { - if (nextOutputSampleIndex >= outputFrameCount) throw std::out_of_range("End of stream."); + if (nextOutputSampleIndex >= outputSampleCount) throw std::out_of_range("End of stream."); double inputStart = nextOutputSampleIndex * downscalingFactor; double inputEnd = (nextOutputSampleIndex + 1) * downscalingFactor; @@ -92,3 +96,10 @@ float SampleRateConverter::getInputSample(int sampleIndex) { lastInputSampleIndex = sampleIndex; return lastInputSample; } + +std::unique_ptr convertSampleRate(std::unique_ptr audioStream, int sampleRate) { + if (sampleRate == audioStream->getSampleRate()) { + return audioStream; + } + return std::make_unique(std::move(audioStream), sampleRate); +} diff --git a/src/audio/SampleRateConverter.h b/src/audio/SampleRateConverter.h index 5b9d35a..50835d4 100644 --- a/src/audio/SampleRateConverter.h +++ b/src/audio/SampleRateConverter.h @@ -5,7 +5,7 @@ class SampleRateConverter : public AudioStream { public: - SampleRateConverter(std::unique_ptr inputStream, int outputFrameRate); + SampleRateConverter(std::unique_ptr inputStream, int outputSampleRate); SampleRateConverter(const SampleRateConverter& rhs, bool reset); std::unique_ptr clone(bool reset) override; int getSampleRate() override; @@ -15,10 +15,10 @@ public: float readSample() override; private: std::unique_ptr inputStream; - double downscalingFactor; // input frame rate / output frame rate + double downscalingFactor; // input sample rate / output sample rate - int outputFrameRate; - int outputFrameCount; + int outputSampleRate; + int outputSampleCount; float lastInputSample; int lastInputSampleIndex; @@ -28,3 +28,5 @@ private: float mean(double start, double end); float getInputSample(int sampleIndex); }; + +std::unique_ptr convertSampleRate(std::unique_ptr audioStream, int sampleRate); \ No newline at end of file diff --git a/src/audio/UnboundedStream.cpp b/src/audio/UnboundedStream.cpp new file mode 100644 index 0000000..b01c197 --- /dev/null +++ b/src/audio/UnboundedStream.cpp @@ -0,0 +1,59 @@ +#include "UnboundedStream.h" + +using boost::optional; + +UnboundedStream::UnboundedStream(std::unique_ptr inputStream) : + innerStream(std::move(innerStream)), + sampleIndex(innerStream->getSampleIndex()), + firstSample(inputStream->getSampleCount() ? optional() : 0.0f), + lastSample(inputStream->getSampleCount() ? optional() : 0.0f) +{} + +UnboundedStream::UnboundedStream(const UnboundedStream& rhs, bool reset) : + innerStream(rhs.innerStream->clone(reset)), + sampleIndex(rhs.sampleIndex), + firstSample(rhs.firstSample), + lastSample(rhs.lastSample) +{} + +std::unique_ptr UnboundedStream::clone(bool reset) { + return std::make_unique(*this, reset); +} + +int UnboundedStream::getSampleRate() { + return innerStream->getSampleRate(); +} + +int UnboundedStream::getSampleCount() { + return innerStream->getSampleCount(); +} + +int UnboundedStream::getSampleIndex() { + return sampleIndex; +} + +void UnboundedStream::seek(int sampleIndex) { + this->sampleIndex = sampleIndex; +} + +float UnboundedStream::readSample() { + if (sampleIndex < 0) { + if (!firstSample) { + innerStream->seek(0); + firstSample = innerStream->readSample(); + } + return firstSample.get(); + } + if (sampleIndex >= innerStream->getSampleCount()) { + if (!lastSample) { + innerStream->seek(innerStream->getSampleCount() - 1); + lastSample = innerStream->readSample(); + } + return lastSample.get(); + } + + if (sampleIndex != innerStream->getSampleIndex()) { + innerStream->seek(sampleIndex); + } + return innerStream->readSample(); +} diff --git a/src/audio/UnboundedStream.h b/src/audio/UnboundedStream.h new file mode 100644 index 0000000..d20e441 --- /dev/null +++ b/src/audio/UnboundedStream.h @@ -0,0 +1,22 @@ +#pragma once + +#include "AudioStream.h" +#include + +// Stream wrapper that allows reading before the start and past the end of the input stream. +class UnboundedStream : public AudioStream { +public: + UnboundedStream(std::unique_ptr inputStream); + UnboundedStream(const UnboundedStream& rhs, bool reset); + std::unique_ptr clone(bool reset) override; + int getSampleRate() override; + int getSampleCount() override; + int getSampleIndex() override; + void seek(int sampleIndex) override; + float readSample() override; + +private: + std::unique_ptr innerStream; + int sampleIndex; + boost::optional firstSample, lastSample; +}; diff --git a/src/phoneExtraction.cpp b/src/phoneExtraction.cpp index f7b2261..1d6db77 100644 --- a/src/phoneExtraction.cpp +++ b/src/phoneExtraction.cpp @@ -10,6 +10,7 @@ #include #include #include +#include