rhubarb-lip-sync/src/audio/voiceActivityDetection.cpp

56 lines
1.7 KiB
C++
Raw Normal View History

2016-03-15 21:52:31 +00:00
#include "voiceActivityDetection.h"
#include <audio/DCOffset.h>
#include <audio/SampleRateConverter.h>
#include <boost/optional/optional.hpp>
#include <logging.h>
using std::numeric_limits;
using std::vector;
using boost::optional;
float getRMS(AudioStream& audioStream, int maxSampleCount = numeric_limits<int>::max()) {
double sum = 0;
int sampleCount;
for (sampleCount = 0; sampleCount < maxSampleCount && !audioStream.endOfStream(); sampleCount++) {
sum += std::pow(static_cast<double>(audioStream.readSample()), 2);
}
return sampleCount > 0 ? static_cast<float>(std::sqrt(sum / sampleCount)) : 0.0f;
}
2016-04-09 20:07:25 +00:00
Timeline<bool> detectVoiceActivity(std::unique_ptr<AudioStream> audioStream) {
2016-03-15 21:52:31 +00:00
// Make sure audio stream has no DC offset
audioStream = removeDCOffset(std::move(audioStream));
// Resample to remove noise
constexpr int maxFrequency = 1000;
constexpr int sampleRate = 2 * maxFrequency;
audioStream = convertSampleRate(std::move(audioStream), sampleRate);
2016-04-09 20:07:25 +00:00
// Detect activity
const float rms = getRMS(*audioStream->clone(true));
const float cutoff = rms / 50;
Timeline<bool> activity(audioStream->getTruncatedRange());
for (centiseconds time = centiseconds::zero(); !audioStream->endOfStream(); ++time) {
float currentRMS = getRMS(*audioStream, sampleRate / 100);
bool active = currentRMS > cutoff;
2016-03-15 21:52:31 +00:00
if (active) {
2016-04-09 20:07:25 +00:00
activity[time] = true;
2016-03-15 21:52:31 +00:00
}
}
2016-04-09 20:07:25 +00:00
// Fill small gaps in activity
const centiseconds maxGap(10);
for (const auto& element : Timeline<bool>(activity)) {
if (!element.getValue() && element.getLength() <= maxGap) {
activity.set(static_cast<TimeRange>(element), true);
}
}
// Log
for (const auto& element : activity) {
logging::logTimedEvent("utterance", static_cast<TimeRange>(element), std::string());
2016-03-15 21:52:31 +00:00
}
2016-04-09 20:07:25 +00:00
return activity;
2016-03-15 21:52:31 +00:00
}