From 04c828506db125f7e1d0383bfcc8b185d2fa097c Mon Sep 17 00:00:00 2001 From: Daniel Wolf Date: Sat, 9 Apr 2016 22:07:25 +0200 Subject: [PATCH] Simplified code using Timeline --- src/Timed.h | 4 +-- src/audio/AudioStream.cpp | 4 +++ src/audio/AudioStream.h | 2 ++ src/audio/voiceActivityDetection.cpp | 44 ++++++++++++++-------------- src/audio/voiceActivityDetection.h | 5 ++-- src/logging.cpp | 7 +---- src/logging.h | 19 +++++++++++- src/main.cpp | 29 ++++++++---------- src/mouthAnimation.cpp | 20 ++++--------- src/mouthAnimation.h | 5 ++-- src/phoneExtraction.cpp | 26 +++++++--------- src/phoneExtraction.h | 5 ++-- 12 files changed, 85 insertions(+), 85 deletions(-) diff --git a/src/Timed.h b/src/Timed.h index 0358b96..5ed0ddc 100644 --- a/src/Timed.h +++ b/src/Timed.h @@ -6,12 +6,12 @@ template class Timed : public TimeRange { public: - Timed(time_type start, time_type end, TValue value) : + Timed(time_type start, time_type end, const TValue& value) : TimeRange(start, end), value(value) {} - Timed(TimeRange timeRange, TValue value) : + Timed(const TimeRange& timeRange, const TValue& value) : TimeRange(timeRange), value(value) {} diff --git a/src/audio/AudioStream.cpp b/src/audio/AudioStream.cpp index a5424cc..0ceece5 100644 --- a/src/audio/AudioStream.cpp +++ b/src/audio/AudioStream.cpp @@ -1,5 +1,9 @@ #include "AudioStream.h" +TimeRange AudioStream::getTruncatedRange() { + return TimeRange(centiseconds::zero(), centiseconds(100 * getSampleCount() / getSampleRate())); +} + bool AudioStream::endOfStream() { return getSampleIndex() >= getSampleCount(); } diff --git a/src/audio/AudioStream.h b/src/audio/AudioStream.h index 5b3e7ec..41df2ff 100644 --- a/src/audio/AudioStream.h +++ b/src/audio/AudioStream.h @@ -1,6 +1,7 @@ #pragma once #include +#include "TimeRange.h" // A mono stream of floating-point samples. class AudioStream { @@ -9,6 +10,7 @@ public: virtual std::unique_ptr clone(bool reset) = 0; virtual int getSampleRate() = 0; virtual int getSampleCount() = 0; + TimeRange getTruncatedRange(); virtual int getSampleIndex() = 0; virtual void seek(int sampleIndex) = 0; bool endOfStream(); diff --git a/src/audio/voiceActivityDetection.cpp b/src/audio/voiceActivityDetection.cpp index 4758246..4b2f82c 100644 --- a/src/audio/voiceActivityDetection.cpp +++ b/src/audio/voiceActivityDetection.cpp @@ -17,7 +17,7 @@ float getRMS(AudioStream& audioStream, int maxSampleCount = numeric_limits: return sampleCount > 0 ? static_cast(std::sqrt(sum / sampleCount)) : 0.0f; } -vector detectVoiceActivity(std::unique_ptr audioStream) { +Timeline detectVoiceActivity(std::unique_ptr audioStream) { // Make sure audio stream has no DC offset audioStream = removeDCOffset(std::move(audioStream)); @@ -26,30 +26,30 @@ vector detectVoiceActivity(std::unique_ptr audioStream) constexpr int sampleRate = 2 * maxFrequency; audioStream = convertSampleRate(std::move(audioStream), sampleRate); - float rms = getRMS(*audioStream->clone(true)); - float cutoff = rms / 50; - centiseconds maxGap(10); - - vector result; - optional segmentStart, segmentEnd; - for (centiseconds time = centiseconds(0); !audioStream->endOfStream(); ++time) { - float currentPower = getRMS(*audioStream, sampleRate / 100); - bool active = currentPower > cutoff; + // Detect activity + const float rms = getRMS(*audioStream->clone(true)); + const float cutoff = rms / 50; + Timeline activity(audioStream->getTruncatedRange()); + for (centiseconds time = centiseconds::zero(); !audioStream->endOfStream(); ++time) { + float currentRMS = getRMS(*audioStream, sampleRate / 100); + bool active = currentRMS > cutoff; if (active) { - if (!segmentStart) { - segmentStart = time; - } - segmentEnd = time + centiseconds(1); - } else if (segmentEnd && time > segmentEnd.value() + maxGap) { - result.push_back(TimeRange(segmentStart.value(), segmentEnd.value())); - logTimedEvent("utterance", segmentStart.value(), segmentEnd.value(), ""); - segmentStart.reset(); - segmentEnd.reset(); + activity[time] = true; } } - if (segmentEnd) { - result.push_back(TimeRange(segmentStart.value(), segmentEnd.value())); + + // Fill small gaps in activity + const centiseconds maxGap(10); + for (const auto& element : Timeline(activity)) { + if (!element.getValue() && element.getLength() <= maxGap) { + activity.set(static_cast(element), true); + } } - return result; + // Log + for (const auto& element : activity) { + logTimedEvent("utterance", static_cast(element), std::string()); + } + + return activity; } diff --git a/src/audio/voiceActivityDetection.h b/src/audio/voiceActivityDetection.h index 0fce151..7e854c6 100644 --- a/src/audio/voiceActivityDetection.h +++ b/src/audio/voiceActivityDetection.h @@ -1,7 +1,6 @@ #pragma once -#include -#include #include #include "AudioStream.h" +#include -std::vector detectVoiceActivity(std::unique_ptr audioStream); +Timeline detectVoiceActivity(std::unique_ptr audioStream); diff --git a/src/logging.cpp b/src/logging.cpp index 7d5119b..0e99b4a 100644 --- a/src/logging.cpp +++ b/src/logging.cpp @@ -7,8 +7,7 @@ #include // ReSharper disable once CppUnusedIncludeDirective #include -#include -#include "tools.h" +#include using std::string; using std::lock_guard; @@ -122,7 +121,3 @@ void addFileSink(const boost::filesystem::path& logFilePath, LogLevel minLogLeve sink->set_filter(severity >= minLogLevel); boost::log::core::get()->add_sink(sink); } - -void logTimedEvent(const string& eventName, centiseconds start, centiseconds end, const string& value) { - LOG_DEBUG << "##" << eventName << "[" << formatDuration(start) << "-" << formatDuration(end) << "]: " << value; -} diff --git a/src/logging.h b/src/logging.h index 0031b4c..0f1a7b4 100644 --- a/src/logging.h +++ b/src/logging.h @@ -10,7 +10,9 @@ #include #include "centiseconds.h" #include +#include "tools.h" #include "enumTools.h" +#include "Timed.h" enum class LogLevel { Trace, @@ -66,4 +68,19 @@ boost::shared_ptr addPausableStderrSink(LogLevel minLogL void addFileSink(const boost::filesystem::path& logFilePath, LogLevel minLogLevel); -void logTimedEvent(const std::string& eventName, centiseconds start, centiseconds end, const std::string& value); +template +void logTimedEvent(const std::string& eventName, const Timed timedValue) { + LOG_DEBUG + << "##" << eventName << "[" << formatDuration(timedValue.getStart()) << "-" << formatDuration(timedValue.getEnd()) << "]: " + << timedValue.getValue(); +} + +template +void logTimedEvent(const std::string& eventName, const TimeRange& timeRange, const TValue& value) { + logTimedEvent(eventName, Timed(timeRange, value)); +} + +template +void logTimedEvent(const std::string& eventName, centiseconds start, centiseconds end, const TValue& value) { + logTimedEvent(eventName, Timed(start, end, value)); +} diff --git a/src/main.cpp b/src/main.cpp index 5ecae3b..3f77a77 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -13,6 +13,7 @@ #include "logging.h" #include #include +#include using std::exception; using std::string; @@ -39,34 +40,30 @@ string getMessage(const exception& e) { unique_ptr createAudioStream(path filePath) { try { - return unique_ptr(new WaveFileReader(filePath)); + return std::make_unique(filePath); } catch (...) { std::throw_with_nested(std::runtime_error("Could not open sound file.") ); } } -ptree createXmlTree(const path& filePath, const map& phones, const map& shapes) { +ptree createXmlTree(const path& filePath, const Timeline& phones, const Timeline& shapes) { ptree tree; // Add sound file path tree.add("rhubarbResult.info.soundFile", filePath.string()); // Add phones - for (auto it = phones.cbegin(), itNext = ++phones.cbegin(); itNext != phones.cend(); ++it, ++itNext) { - auto pair = *it; - auto nextPair = *itNext; - ptree& phoneElement = tree.add("rhubarbResult.phones.phone", pair.second); - phoneElement.add(".start", formatDuration(pair.first)); - phoneElement.add(".duration", formatDuration(nextPair.first - pair.first)); + for (auto& timedPhone : phones) { + ptree& phoneElement = tree.add("rhubarbResult.phones.phone", timedPhone.getValue()); + phoneElement.add(".start", formatDuration(timedPhone.getStart())); + phoneElement.add(".duration", formatDuration(timedPhone.getLength())); } // Add mouth cues - for (auto it = shapes.cbegin(), itNext = ++shapes.cbegin(); itNext != shapes.cend(); ++it, ++itNext) { - auto pair = *it; - auto nextPair = *itNext; - ptree& mouthCueElement = tree.add("rhubarbResult.mouthCues.mouthCue", pair.second); - mouthCueElement.add(".start", formatDuration(pair.first)); - mouthCueElement.add(".duration", formatDuration(nextPair.first - pair.first)); + for (auto& timedShape : shapes) { + ptree& mouthCueElement = tree.add("rhubarbResult.mouthCues.mouthCue", timedShape.getValue()); + mouthCueElement.add(".start", formatDuration(timedShape.getStart())); + mouthCueElement.add(".duration", formatDuration(timedShape.getLength())); } return tree; @@ -115,7 +112,7 @@ int main(int argc, char *argv[]) { const int columnWidth = 30; std::cerr << std::left; std::cerr << std::setw(columnWidth) << "Analyzing input file"; - map phones; + Timeline phones{}; { ProgressBar progressBar; phones = detectPhones( @@ -127,7 +124,7 @@ int main(int argc, char *argv[]) { // Generate mouth shapes std::cerr << std::setw(columnWidth) << "Generating mouth shapes"; - map shapes = animate(phones); + Timeline shapes = animate(phones); std::cerr << "Done" << std::endl; std::cerr << std::endl; diff --git a/src/mouthAnimation.cpp b/src/mouthAnimation.cpp index 37ef836..f802140 100644 --- a/src/mouthAnimation.cpp +++ b/src/mouthAnimation.cpp @@ -67,20 +67,12 @@ Shape getShape(Phone phone) { } } -map animate(const map &phones) { - map shapes; - Shape lastShape = Shape::Invalid; - for (auto it = phones.cbegin(); it != phones.cend(); ++it) { - Shape shape = getShape(it->second); - if (shape != lastShape || next(it) == phones.cend()) { - shapes[it->first] = shape; - lastShape = shape; - } - } - - for (auto it = shapes.cbegin(); it != shapes.cend(); ++it) { - if (next(it) == shapes.cend()) break; - logTimedEvent("shape", it->first, next(it)->first, enumToString(it->second)); +Timeline animate(const Timeline &phones) { + Timeline shapes(phones.getRange()); + for (auto& timedPhone : phones) { + Timed timedShape(static_cast(timedPhone), getShape(timedPhone.getValue())); + shapes.set(timedShape); + logTimedEvent("shape", timedShape); } return shapes; diff --git a/src/mouthAnimation.h b/src/mouthAnimation.h index 1e4d820..046ed8e 100644 --- a/src/mouthAnimation.h +++ b/src/mouthAnimation.h @@ -1,8 +1,7 @@ #pragma once -#include #include "Phone.h" -#include "centiseconds.h" #include "Shape.h" +#include "Timeline.h" -std::map animate(const std::map& phones); +Timeline animate(const Timeline& phones); diff --git a/src/phoneExtraction.cpp b/src/phoneExtraction.cpp index 1d6db77..e5fdf04 100644 --- a/src/phoneExtraction.cpp +++ b/src/phoneExtraction.cpp @@ -11,6 +11,7 @@ #include #include #include