Added AutoJoin template parameter to timeline classes
Previously, timelines would always automatically join adjacent elements if their values were equal. That behavior was usually desired for mouth shapes, but not for phones, animation rules, etc.
This commit is contained in:
parent
2eb0948c49
commit
a24fe8874c
|
@ -61,4 +61,3 @@ struct ShapeRule {
|
||||||
// The resulting timeline will always cover the entire duration of the phone (starting at 0 cs).
|
// The resulting timeline will always cover the entire duration of the phone (starting at 0 cs).
|
||||||
// It may extend into the negative time range if animation is required prior to the sound being heard.
|
// It may extend into the negative time range if animation is required prior to the sound being heard.
|
||||||
Timeline<ShapeRule> getShapeRules(Phone phone, centiseconds duration, centiseconds previousDuration);
|
Timeline<ShapeRule> getShapeRules(Phone phone, centiseconds duration, centiseconds previousDuration);
|
||||||
|
|
||||||
|
|
|
@ -20,11 +20,11 @@ using boost::adaptors::transformed;
|
||||||
using std::pair;
|
using std::pair;
|
||||||
using std::tuple;
|
using std::tuple;
|
||||||
|
|
||||||
Timeline<Shape> createTweens(ContinuousTimeline<Shape> shapes) {
|
JoiningTimeline<Shape> createTweens(JoiningContinuousTimeline<Shape> shapes) {
|
||||||
centiseconds minTweenDuration = 4_cs;
|
centiseconds minTweenDuration = 4_cs;
|
||||||
centiseconds maxTweenDuration = 10_cs;
|
centiseconds maxTweenDuration = 10_cs;
|
||||||
|
|
||||||
Timeline<Shape> tweens;
|
JoiningTimeline<Shape> tweens;
|
||||||
|
|
||||||
for (auto first = shapes.begin(), second = std::next(shapes.begin());
|
for (auto first = shapes.begin(), second = std::next(shapes.begin());
|
||||||
first != shapes.end() && second != shapes.end();
|
first != shapes.end() && second != shapes.end();
|
||||||
|
@ -66,8 +66,8 @@ Timeline<Shape> createTweens(ContinuousTimeline<Shape> shapes) {
|
||||||
return tweens;
|
return tweens;
|
||||||
}
|
}
|
||||||
|
|
||||||
Timeline<Shape> animatePauses(const ContinuousTimeline<Shape>& shapes) {
|
JoiningTimeline<Shape> animatePauses(const JoiningContinuousTimeline<Shape>& shapes) {
|
||||||
Timeline<Shape> result;
|
JoiningTimeline<Shape> result;
|
||||||
|
|
||||||
// Don't close mouth for short pauses
|
// Don't close mouth for short pauses
|
||||||
for_each_adjacent(shapes.begin(), shapes.end(), [&](const Timed<Shape>& lhs, const Timed<Shape>& pause, const Timed<Shape>& rhs) {
|
for_each_adjacent(shapes.begin(), shapes.end(), [&](const Timed<Shape>& lhs, const Timed<Shape>& pause, const Timed<Shape>& rhs) {
|
||||||
|
@ -96,8 +96,8 @@ Timeline<Shape> animatePauses(const ContinuousTimeline<Shape>& shapes) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T, bool AutoJoin>
|
||||||
ContinuousTimeline<optional<T>> boundedTimelinetoContinuousOptional(const BoundedTimeline<T>& timeline) {
|
ContinuousTimeline<optional<T>, AutoJoin> boundedTimelinetoContinuousOptional(const BoundedTimeline<T, AutoJoin>& timeline) {
|
||||||
return {
|
return {
|
||||||
timeline.getRange(), boost::none,
|
timeline.getRange(), boost::none,
|
||||||
timeline | transformed([](const Timed<T>& timedValue) { return Timed<optional<T>>(timedValue.getTimeRange(), timedValue.getValue()); })
|
timeline | transformed([](const Timed<T>& timedValue) { return Timed<optional<T>>(timedValue.getTimeRange(), timedValue.getValue()); })
|
||||||
|
@ -143,8 +143,8 @@ ContinuousTimeline<ShapeRule> getShapeRules(const BoundedTimeline<Phone>& phones
|
||||||
// always choosing a shape from the current set that resembles the last shape and is somewhat relaxed.
|
// always choosing a shape from the current set that resembles the last shape and is somewhat relaxed.
|
||||||
// * When speaking, we anticipate vowels, trying to form their shape before the actual vowel.
|
// * When speaking, we anticipate vowels, trying to form their shape before the actual vowel.
|
||||||
// So whenever we come across a one-shape set, we backtrack a little, spreating that shape to the left.
|
// So whenever we come across a one-shape set, we backtrack a little, spreating that shape to the left.
|
||||||
ContinuousTimeline<Shape> animate(const ContinuousTimeline<ShapeSet>& shapeSets) {
|
JoiningContinuousTimeline<Shape> animate(const ContinuousTimeline<ShapeSet>& shapeSets) {
|
||||||
ContinuousTimeline<Shape> shapes(shapeSets.getRange(), X);
|
JoiningContinuousTimeline<Shape> shapes(shapeSets.getRange(), X);
|
||||||
|
|
||||||
Shape referenceShape = X;
|
Shape referenceShape = X;
|
||||||
// Animate forwards
|
// Animate forwards
|
||||||
|
@ -186,7 +186,7 @@ ContinuousTimeline<Shape> animate(const ContinuousTimeline<ShapeSet>& shapeSets)
|
||||||
return shapes;
|
return shapes;
|
||||||
}
|
}
|
||||||
|
|
||||||
ContinuousTimeline<Shape> animate(const BoundedTimeline<Phone> &phones) {
|
JoiningContinuousTimeline<Shape> animate(const BoundedTimeline<Phone> &phones) {
|
||||||
// Create timeline of shape rules
|
// Create timeline of shape rules
|
||||||
ContinuousTimeline<ShapeRule> shapeRules = getShapeRules(phones);
|
ContinuousTimeline<ShapeRule> shapeRules = getShapeRules(phones);
|
||||||
|
|
||||||
|
@ -196,16 +196,16 @@ ContinuousTimeline<Shape> animate(const BoundedTimeline<Phone> &phones) {
|
||||||
shapeRules | transformed([](const Timed<ShapeRule>& timedRule) { return Timed<ShapeSet>(timedRule.getTimeRange(), timedRule.getValue().regularShapes); }));
|
shapeRules | transformed([](const Timed<ShapeRule>& timedRule) { return Timed<ShapeSet>(timedRule.getTimeRange(), timedRule.getValue().regularShapes); }));
|
||||||
|
|
||||||
// Animate
|
// Animate
|
||||||
ContinuousTimeline<Shape> shapes = animate(shapeSets);
|
JoiningContinuousTimeline<Shape> shapes = animate(shapeSets);
|
||||||
|
|
||||||
// Animate pauses
|
// Animate pauses
|
||||||
Timeline<Shape> pauses = animatePauses(shapes);
|
JoiningTimeline<Shape> pauses = animatePauses(shapes);
|
||||||
for (const auto& pause : pauses) {
|
for (const auto& pause : pauses) {
|
||||||
shapes.set(pause);
|
shapes.set(pause);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create inbetweens for smoother animation
|
// Create inbetweens for smoother animation
|
||||||
Timeline<Shape> tweens = createTweens(shapes);
|
JoiningTimeline<Shape> tweens = createTweens(shapes);
|
||||||
for (const auto& tween : tweens) {
|
for (const auto& tween : tweens) {
|
||||||
shapes.set(tween);
|
shapes.set(tween);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
#include "Shape.h"
|
#include "Shape.h"
|
||||||
#include "ContinuousTimeline.h"
|
#include "ContinuousTimeline.h"
|
||||||
|
|
||||||
ContinuousTimeline<Shape> animate(const BoundedTimeline<Phone>& phones);
|
JoiningContinuousTimeline<Shape> animate(const BoundedTimeline<Phone>& phones);
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include <gsl_util.h>
|
#include <gsl_util.h>
|
||||||
#include "parallel.h"
|
#include "parallel.h"
|
||||||
#include "AudioSegment.h"
|
#include "AudioSegment.h"
|
||||||
|
#include "stringTools.h"
|
||||||
|
|
||||||
using std::vector;
|
using std::vector;
|
||||||
using boost::adaptors::transformed;
|
using boost::adaptors::transformed;
|
||||||
|
@ -16,7 +17,7 @@ using fmt::format;
|
||||||
using std::runtime_error;
|
using std::runtime_error;
|
||||||
using std::unique_ptr;
|
using std::unique_ptr;
|
||||||
|
|
||||||
BoundedTimeline<void> webRtcDetectVoiceActivity(const AudioClip& audioClip, ProgressSink& progressSink) {
|
JoiningBoundedTimeline<void> webRtcDetectVoiceActivity(const AudioClip& audioClip, ProgressSink& progressSink) {
|
||||||
VadInst* vadHandle = WebRtcVad_Create();
|
VadInst* vadHandle = WebRtcVad_Create();
|
||||||
if (!vadHandle) throw runtime_error("Error creating WebRTC VAD handle.");
|
if (!vadHandle) throw runtime_error("Error creating WebRTC VAD handle.");
|
||||||
|
|
||||||
|
@ -34,7 +35,7 @@ BoundedTimeline<void> webRtcDetectVoiceActivity(const AudioClip& audioClip, Prog
|
||||||
ProgressSink& pass2ProgressSink = progressMerger.addSink(0.3);
|
ProgressSink& pass2ProgressSink = progressMerger.addSink(0.3);
|
||||||
|
|
||||||
// Detect activity
|
// Detect activity
|
||||||
BoundedTimeline<void> activity(audioClip.getTruncatedRange());
|
JoiningBoundedTimeline<void> activity(audioClip.getTruncatedRange());
|
||||||
centiseconds time = 0_cs;
|
centiseconds time = 0_cs;
|
||||||
const size_t bufferCapacity = audioClip.getSampleRate() / 100;
|
const size_t bufferCapacity = audioClip.getSampleRate() / 100;
|
||||||
auto processBuffer = [&](const vector<int16_t>& buffer) {
|
auto processBuffer = [&](const vector<int16_t>& buffer) {
|
||||||
|
@ -66,11 +67,11 @@ BoundedTimeline<void> webRtcDetectVoiceActivity(const AudioClip& audioClip, Prog
|
||||||
return activity;
|
return activity;
|
||||||
}
|
}
|
||||||
|
|
||||||
BoundedTimeline<void> detectVoiceActivity(const AudioClip& inputAudioClip, int maxThreadCount, ProgressSink& progressSink) {
|
JoiningBoundedTimeline<void> detectVoiceActivity(const AudioClip& inputAudioClip, int maxThreadCount, ProgressSink& progressSink) {
|
||||||
// Prepare audio for VAD
|
// Prepare audio for VAD
|
||||||
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | resample(16000) | removeDcOffset();
|
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | resample(16000) | removeDcOffset();
|
||||||
|
|
||||||
BoundedTimeline<void> activity(audioClip->getTruncatedRange());
|
JoiningBoundedTimeline<void> activity(audioClip->getTruncatedRange());
|
||||||
std::mutex activityMutex;
|
std::mutex activityMutex;
|
||||||
|
|
||||||
// Split audio into segments and perform parallel VAD
|
// Split audio into segments and perform parallel VAD
|
||||||
|
@ -83,7 +84,7 @@ BoundedTimeline<void> detectVoiceActivity(const AudioClip& inputAudioClip, int m
|
||||||
}
|
}
|
||||||
runParallel([&](const TimeRange& segmentRange, ProgressSink& segmentProgressSink) {
|
runParallel([&](const TimeRange& segmentRange, ProgressSink& segmentProgressSink) {
|
||||||
unique_ptr<AudioClip> audioSegment = audioClip->clone() | segment(segmentRange);
|
unique_ptr<AudioClip> audioSegment = audioClip->clone() | segment(segmentRange);
|
||||||
BoundedTimeline<void> activitySegment = webRtcDetectVoiceActivity(*audioSegment, segmentProgressSink);
|
JoiningBoundedTimeline<void> activitySegment = webRtcDetectVoiceActivity(*audioSegment, segmentProgressSink);
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lock(activityMutex);
|
std::lock_guard<std::mutex> lock(activityMutex);
|
||||||
for (auto activityRange : activitySegment) {
|
for (auto activityRange : activitySegment) {
|
||||||
|
@ -102,7 +103,7 @@ BoundedTimeline<void> detectVoiceActivity(const AudioClip& inputAudioClip, int m
|
||||||
|
|
||||||
// Shorten activities. WebRTC adds a bit of buffer at the end.
|
// Shorten activities. WebRTC adds a bit of buffer at the end.
|
||||||
const centiseconds tail(5);
|
const centiseconds tail(5);
|
||||||
for (const auto& utterance : Timeline<void>(activity)) {
|
for (const auto& utterance : JoiningBoundedTimeline<void>(activity)) {
|
||||||
if (utterance.getDuration() > tail && utterance.getEnd() < audioDuration) {
|
if (utterance.getDuration() > tail && utterance.getEnd() < audioDuration) {
|
||||||
activity.clear(utterance.getEnd() - tail, utterance.getEnd());
|
activity.clear(utterance.getEnd() - tail, utterance.getEnd());
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,4 +3,4 @@
|
||||||
#include <BoundedTimeline.h>
|
#include <BoundedTimeline.h>
|
||||||
#include <ProgressBar.h>
|
#include <ProgressBar.h>
|
||||||
|
|
||||||
BoundedTimeline<void> detectVoiceActivity(const AudioClip& audioClip, int maxThreadCount, ProgressSink& progressSink);
|
JoiningBoundedTimeline<void> detectVoiceActivity(const AudioClip& audioClip, int maxThreadCount, ProgressSink& progressSink);
|
||||||
|
|
|
@ -7,5 +7,5 @@
|
||||||
class Exporter {
|
class Exporter {
|
||||||
public:
|
public:
|
||||||
virtual ~Exporter() {}
|
virtual ~Exporter() {}
|
||||||
virtual void exportShapes(const boost::filesystem::path& inputFilePath, const ContinuousTimeline<Shape>& shapes, std::ostream& outputStream) = 0;
|
virtual void exportShapes(const boost::filesystem::path& inputFilePath, const JoiningContinuousTimeline<Shape>& shapes, std::ostream& outputStream) = 0;
|
||||||
};
|
};
|
||||||
|
|
|
@ -25,7 +25,7 @@ string escapeJsonString(const string& s) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JsonExporter::exportShapes(const boost::filesystem::path& inputFilePath, const ContinuousTimeline<Shape>& shapes, std::ostream& outputStream) {
|
void JsonExporter::exportShapes(const boost::filesystem::path& inputFilePath, const JoiningContinuousTimeline<Shape>& shapes, std::ostream& outputStream) {
|
||||||
// Export as JSON.
|
// Export as JSON.
|
||||||
// I'm not using a library because the code is short enough without one and it lets me control the formatting.
|
// I'm not using a library because the code is short enough without one and it lets me control the formatting.
|
||||||
outputStream << "{\n";
|
outputStream << "{\n";
|
||||||
|
|
|
@ -4,5 +4,5 @@
|
||||||
|
|
||||||
class JsonExporter : public Exporter {
|
class JsonExporter : public Exporter {
|
||||||
public:
|
public:
|
||||||
void exportShapes(const boost::filesystem::path& inputFilePath, const ContinuousTimeline<Shape>& shapes, std::ostream& outputStream) override;
|
void exportShapes(const boost::filesystem::path& inputFilePath, const JoiningContinuousTimeline<Shape>& shapes, std::ostream& outputStream) override;
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#include "TsvExporter.h"
|
#include "TsvExporter.h"
|
||||||
|
|
||||||
void TsvExporter::exportShapes(const boost::filesystem::path& inputFilePath, const ContinuousTimeline<Shape>& shapes, std::ostream& outputStream) {
|
void TsvExporter::exportShapes(const boost::filesystem::path& inputFilePath, const JoiningContinuousTimeline<Shape>& shapes, std::ostream& outputStream) {
|
||||||
UNUSED(inputFilePath);
|
UNUSED(inputFilePath);
|
||||||
|
|
||||||
// Output shapes with start times
|
// Output shapes with start times
|
||||||
|
|
|
@ -4,6 +4,6 @@
|
||||||
|
|
||||||
class TsvExporter : public Exporter {
|
class TsvExporter : public Exporter {
|
||||||
public:
|
public:
|
||||||
void exportShapes(const boost::filesystem::path& inputFilePath, const ContinuousTimeline<Shape>& shapes, std::ostream& outputStream) override;
|
void exportShapes(const boost::filesystem::path& inputFilePath, const JoiningContinuousTimeline<Shape>& shapes, std::ostream& outputStream) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
using std::string;
|
using std::string;
|
||||||
using boost::property_tree::ptree;
|
using boost::property_tree::ptree;
|
||||||
|
|
||||||
void XmlExporter::exportShapes(const boost::filesystem::path& inputFilePath, const ContinuousTimeline<Shape>& shapes, std::ostream& outputStream) {
|
void XmlExporter::exportShapes(const boost::filesystem::path& inputFilePath, const JoiningContinuousTimeline<Shape>& shapes, std::ostream& outputStream) {
|
||||||
ptree tree;
|
ptree tree;
|
||||||
|
|
||||||
// Add metadata
|
// Add metadata
|
||||||
|
|
|
@ -4,5 +4,5 @@
|
||||||
|
|
||||||
class XmlExporter : public Exporter {
|
class XmlExporter : public Exporter {
|
||||||
public:
|
public:
|
||||||
void exportShapes(const boost::filesystem::path& inputFilePath, const ContinuousTimeline<Shape>& shapes, std::ostream& outputStream) override;
|
void exportShapes(const boost::filesystem::path& inputFilePath, const JoiningContinuousTimeline<Shape>& shapes, std::ostream& outputStream) override;
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#include "exporterTools.h"
|
#include "exporterTools.h"
|
||||||
|
|
||||||
// Makes sure there is at least one mouth shape
|
// Makes sure there is at least one mouth shape
|
||||||
std::vector<Timed<Shape>> dummyShapeIfEmpty(const Timeline<Shape>& shapes) {
|
std::vector<Timed<Shape>> dummyShapeIfEmpty(const JoiningTimeline<Shape>& shapes) {
|
||||||
std::vector<Timed<Shape>> result;
|
std::vector<Timed<Shape>> result;
|
||||||
std::copy(shapes.begin(), shapes.end(), std::back_inserter(result));
|
std::copy(shapes.begin(), shapes.end(), std::back_inserter(result));
|
||||||
if (result.empty()) {
|
if (result.empty()) {
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
#include "Timeline.h"
|
#include "Timeline.h"
|
||||||
|
|
||||||
// Makes sure there is at least one mouth shape
|
// Makes sure there is at least one mouth shape
|
||||||
std::vector<Timed<Shape>> dummyShapeIfEmpty(const Timeline<Shape>& shapes);
|
std::vector<Timed<Shape>> dummyShapeIfEmpty(const JoiningTimeline<Shape>& shapes);
|
||||||
|
|
|
@ -10,14 +10,14 @@ using std::u32string;
|
||||||
using boost::filesystem::path;
|
using boost::filesystem::path;
|
||||||
using std::unique_ptr;
|
using std::unique_ptr;
|
||||||
|
|
||||||
ContinuousTimeline<Shape> animateAudioClip(
|
JoiningContinuousTimeline<Shape> animateAudioClip(
|
||||||
const AudioClip& audioClip,
|
const AudioClip& audioClip,
|
||||||
optional<u32string> dialog,
|
optional<u32string> dialog,
|
||||||
int maxThreadCount,
|
int maxThreadCount,
|
||||||
ProgressSink& progressSink)
|
ProgressSink& progressSink)
|
||||||
{
|
{
|
||||||
BoundedTimeline<Phone> phones = recognizePhones(audioClip, dialog, maxThreadCount, progressSink);
|
BoundedTimeline<Phone> phones = recognizePhones(audioClip, dialog, maxThreadCount, progressSink);
|
||||||
ContinuousTimeline<Shape> result = animate(phones);
|
JoiningContinuousTimeline<Shape> result = animate(phones);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ unique_ptr<AudioClip> createWaveAudioClip(path filePath) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ContinuousTimeline<Shape> animateWaveFile(
|
JoiningContinuousTimeline<Shape> animateWaveFile(
|
||||||
path filePath,
|
path filePath,
|
||||||
optional<u32string> dialog,
|
optional<u32string> dialog,
|
||||||
int maxThreadCount,
|
int maxThreadCount,
|
||||||
|
|
|
@ -6,13 +6,13 @@
|
||||||
#include "ProgressBar.h"
|
#include "ProgressBar.h"
|
||||||
#include <boost/filesystem.hpp>
|
#include <boost/filesystem.hpp>
|
||||||
|
|
||||||
ContinuousTimeline<Shape> animateAudioClip(
|
JoiningContinuousTimeline<Shape> animateAudioClip(
|
||||||
const AudioClip& audioClip,
|
const AudioClip& audioClip,
|
||||||
boost::optional<std::u32string> dialog,
|
boost::optional<std::u32string> dialog,
|
||||||
int maxThreadCount,
|
int maxThreadCount,
|
||||||
ProgressSink& progressSink);
|
ProgressSink& progressSink);
|
||||||
|
|
||||||
ContinuousTimeline<Shape> animateWaveFile(
|
JoiningContinuousTimeline<Shape> animateWaveFile(
|
||||||
boost::filesystem::path filePath,
|
boost::filesystem::path filePath,
|
||||||
boost::optional<std::u32string> dialog,
|
boost::optional<std::u32string> dialog,
|
||||||
int maxThreadCount,
|
int maxThreadCount,
|
||||||
|
|
|
@ -125,7 +125,7 @@ int main(int argc, char *argv[]) {
|
||||||
vector<char*>(argv, argv + argc) | transformed([](char* arg) { return fmt::format("\"{}\"", arg); }), " "));
|
vector<char*>(argv, argv + argc) | transformed([](char* arg) { return fmt::format("\"{}\"", arg); }), " "));
|
||||||
|
|
||||||
std::cerr << "Processing input file. ";
|
std::cerr << "Processing input file. ";
|
||||||
ContinuousTimeline<Shape> animation(TimeRange::zero(), Shape::X);
|
JoiningContinuousTimeline<Shape> animation(TimeRange::zero(), Shape::X);
|
||||||
{
|
{
|
||||||
ProgressBar progressBar;
|
ProgressBar progressBar;
|
||||||
|
|
||||||
|
|
|
@ -314,8 +314,8 @@ lambda_unique_ptr<ps_decoder_t> createDecoder(optional<u32string> dialog) {
|
||||||
return decoder;
|
return decoder;
|
||||||
}
|
}
|
||||||
|
|
||||||
Timeline<void> getNoiseSounds(TimeRange utteranceTimeRange, const Timeline<Phone>& phones) {
|
JoiningTimeline<void> getNoiseSounds(TimeRange utteranceTimeRange, const Timeline<Phone>& phones) {
|
||||||
Timeline<void> noiseSounds;
|
JoiningTimeline<void> noiseSounds;
|
||||||
|
|
||||||
// Find utterance parts without recogniced phones
|
// Find utterance parts without recogniced phones
|
||||||
noiseSounds.set(utteranceTimeRange);
|
noiseSounds.set(utteranceTimeRange);
|
||||||
|
@ -325,7 +325,7 @@ Timeline<void> getNoiseSounds(TimeRange utteranceTimeRange, const Timeline<Phone
|
||||||
|
|
||||||
// Remove undesired elements
|
// Remove undesired elements
|
||||||
const centiseconds minSoundDuration = 12_cs;
|
const centiseconds minSoundDuration = 12_cs;
|
||||||
for (const auto& unknownSound : Timeline<void>(noiseSounds)) {
|
for (const auto& unknownSound : JoiningTimeline<void>(noiseSounds)) {
|
||||||
bool startsAtZero = unknownSound.getStart() == 0_cs;
|
bool startsAtZero = unknownSound.getStart() == 0_cs;
|
||||||
bool tooShort = unknownSound.getDuration() < minSoundDuration;
|
bool tooShort = unknownSound.getDuration() < minSoundDuration;
|
||||||
if (startsAtZero || tooShort) {
|
if (startsAtZero || tooShort) {
|
||||||
|
@ -386,7 +386,7 @@ Timeline<Phone> utteranceToPhones(
|
||||||
for (const auto& timedWord : words) {
|
for (const auto& timedWord : words) {
|
||||||
wordIds.push_back(getWordId(timedWord.getValue(), *decoder.dict));
|
wordIds.push_back(getWordId(timedWord.getValue(), *decoder.dict));
|
||||||
}
|
}
|
||||||
if (wordIds.empty()) return Timeline<Phone>();
|
if (wordIds.empty()) return {};
|
||||||
|
|
||||||
// Align the words' phones with speech
|
// Align the words' phones with speech
|
||||||
#if BOOST_VERSION < 105600 // Support legacy syntax
|
#if BOOST_VERSION < 105600 // Support legacy syntax
|
||||||
|
@ -403,7 +403,7 @@ Timeline<Phone> utteranceToPhones(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Guess positions of noise sounds
|
// Guess positions of noise sounds
|
||||||
Timeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones);
|
JoiningTimeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones);
|
||||||
for (const auto& noiseSound : noiseSounds) {
|
for (const auto& noiseSound : noiseSounds) {
|
||||||
utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise);
|
utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise);
|
||||||
}
|
}
|
||||||
|
@ -430,7 +430,7 @@ BoundedTimeline<Phone> recognizePhones(
|
||||||
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | removeDcOffset();
|
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | removeDcOffset();
|
||||||
|
|
||||||
// Split audio into utterances
|
// Split audio into utterances
|
||||||
BoundedTimeline<void> utterances;
|
JoiningBoundedTimeline<void> utterances;
|
||||||
try {
|
try {
|
||||||
utterances = detectVoiceActivity(*audioClip, maxThreadCount, voiceActivationProgressSink);
|
utterances = detectVoiceActivity(*audioClip, maxThreadCount, voiceActivationProgressSink);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,14 +2,14 @@
|
||||||
|
|
||||||
#include "Timeline.h"
|
#include "Timeline.h"
|
||||||
|
|
||||||
template<typename T>
|
template<typename T, bool AutoJoin = false>
|
||||||
class BoundedTimeline : public Timeline<T> {
|
class BoundedTimeline : public Timeline<T, AutoJoin> {
|
||||||
using typename Timeline<T>::time_type;
|
using typename Timeline<T, AutoJoin>::time_type;
|
||||||
using Timeline<T>::equals;
|
using Timeline<T, AutoJoin>::equals;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using typename Timeline<T>::iterator;
|
using typename Timeline<T, AutoJoin>::iterator;
|
||||||
using Timeline<T>::end;
|
using Timeline<T, AutoJoin>::end;
|
||||||
|
|
||||||
BoundedTimeline() :
|
BoundedTimeline() :
|
||||||
range(TimeRange::zero())
|
range(TimeRange::zero())
|
||||||
|
@ -25,7 +25,7 @@ public:
|
||||||
{
|
{
|
||||||
for (auto it = first; it != last; ++it) {
|
for (auto it = first; it != last; ++it) {
|
||||||
// Virtual function call in constructor. Derived constructors shouldn't call this one!
|
// Virtual function call in constructor. Derived constructors shouldn't call this one!
|
||||||
BoundedTimeline<T>::set(*it);
|
BoundedTimeline::set(*it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@ public:
|
||||||
return range;
|
return range;
|
||||||
}
|
}
|
||||||
|
|
||||||
using Timeline<T>::set;
|
using Timeline<T, AutoJoin>::set;
|
||||||
|
|
||||||
iterator set(Timed<T> timedValue) override {
|
iterator set(Timed<T> timedValue) override {
|
||||||
// Exit if the value's range is completely out of bounds
|
// Exit if the value's range is completely out of bounds
|
||||||
|
@ -54,16 +54,16 @@ public:
|
||||||
TimeRange& valueRange = timedValue.getTimeRange();
|
TimeRange& valueRange = timedValue.getTimeRange();
|
||||||
valueRange.resize(max(range.getStart(), valueRange.getStart()), min(range.getEnd(), valueRange.getEnd()));
|
valueRange.resize(max(range.getStart(), valueRange.getStart()), min(range.getEnd(), valueRange.getEnd()));
|
||||||
|
|
||||||
return Timeline<T>::set(timedValue);
|
return Timeline<T, AutoJoin>::set(timedValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
void shift(time_type offset) override {
|
void shift(time_type offset) override {
|
||||||
Timeline<T>::shift(offset);
|
Timeline<T, AutoJoin>::shift(offset);
|
||||||
range.shift(offset);
|
range.shift(offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator==(const BoundedTimeline& rhs) const {
|
bool operator==(const BoundedTimeline& rhs) const {
|
||||||
return Timeline<T>::equals(rhs) && range == rhs.range;
|
return Timeline<T, AutoJoin>::equals(rhs) && range == rhs.range;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator!=(const BoundedTimeline& rhs) const {
|
bool operator!=(const BoundedTimeline& rhs) const {
|
||||||
|
@ -73,3 +73,6 @@ public:
|
||||||
private:
|
private:
|
||||||
TimeRange range;
|
TimeRange range;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
using JoiningBoundedTimeline = BoundedTimeline<T, true>;
|
||||||
|
|
|
@ -2,16 +2,16 @@
|
||||||
|
|
||||||
#include "BoundedTimeline.h"
|
#include "BoundedTimeline.h"
|
||||||
|
|
||||||
template<typename T>
|
template<typename T, bool AutoJoin = false>
|
||||||
class ContinuousTimeline : public BoundedTimeline<T> {
|
class ContinuousTimeline : public BoundedTimeline<T, AutoJoin> {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ContinuousTimeline(TimeRange range, T defaultValue) :
|
ContinuousTimeline(TimeRange range, T defaultValue) :
|
||||||
BoundedTimeline<T>(range),
|
BoundedTimeline<T, AutoJoin>(range),
|
||||||
defaultValue(defaultValue)
|
defaultValue(defaultValue)
|
||||||
{
|
{
|
||||||
// Virtual function call in constructor. Derived constructors shouldn't call this one!
|
// Virtual function call in constructor. Derived constructors shouldn't call this one!
|
||||||
ContinuousTimeline<T>::clear(range);
|
ContinuousTimeline::clear(range);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename InputIterator>
|
template<typename InputIterator>
|
||||||
|
@ -20,7 +20,7 @@ public:
|
||||||
{
|
{
|
||||||
// Virtual function calls in constructor. Derived constructors shouldn't call this one!
|
// Virtual function calls in constructor. Derived constructors shouldn't call this one!
|
||||||
for (auto it = first; it != last; ++it) {
|
for (auto it = first; it != last; ++it) {
|
||||||
ContinuousTimeline<T>::set(*it);
|
ContinuousTimeline::set(*it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -33,12 +33,15 @@ public:
|
||||||
ContinuousTimeline(range, defaultValue, initializerList.begin(), initializerList.end())
|
ContinuousTimeline(range, defaultValue, initializerList.begin(), initializerList.end())
|
||||||
{}
|
{}
|
||||||
|
|
||||||
using BoundedTimeline<T>::clear;
|
using BoundedTimeline<T, AutoJoin>::clear;
|
||||||
|
|
||||||
void clear(const TimeRange& range) override {
|
void clear(const TimeRange& range) override {
|
||||||
BoundedTimeline<T>::set(Timed<T>(range, defaultValue));
|
BoundedTimeline<T, AutoJoin>::set(Timed<T>(range, defaultValue));
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
T defaultValue;
|
T defaultValue;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
using JoiningContinuousTimeline = ContinuousTimeline<T, true>;
|
||||||
|
|
|
@ -26,7 +26,7 @@ namespace internal {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T, bool AutoJoin = false>
|
||||||
class Timeline {
|
class Timeline {
|
||||||
public:
|
public:
|
||||||
using time_type = TimeRange::time_type;
|
using time_type = TimeRange::time_type;
|
||||||
|
@ -94,7 +94,7 @@ public:
|
||||||
Timeline(InputIterator first, InputIterator last) {
|
Timeline(InputIterator first, InputIterator last) {
|
||||||
for (auto it = first; it != last; ++it) {
|
for (auto it = first; it != last; ++it) {
|
||||||
// Virtual function call in constructor. Derived constructors don't call this one.
|
// Virtual function call in constructor. Derived constructors don't call this one.
|
||||||
Timeline<T>::set(*it);
|
Timeline::set(*it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -200,6 +200,7 @@ public:
|
||||||
return end();
|
return end();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (AutoJoin) {
|
||||||
// Extend the timed value if it touches elements with equal value
|
// Extend the timed value if it touches elements with equal value
|
||||||
iterator elementBefore = find(timedValue.getStart(), FindMode::SampleLeft);
|
iterator elementBefore = find(timedValue.getStart(), FindMode::SampleLeft);
|
||||||
if (elementBefore != end() && ::internal::valueEquals(*elementBefore, timedValue)) {
|
if (elementBefore != end() && ::internal::valueEquals(*elementBefore, timedValue)) {
|
||||||
|
@ -209,6 +210,7 @@ public:
|
||||||
if (elementAfter != end() && ::internal::valueEquals(*elementAfter, timedValue)) {
|
if (elementAfter != end() && ::internal::valueEquals(*elementAfter, timedValue)) {
|
||||||
timedValue.getTimeRange().resize(timedValue.getStart(), elementAfter->getEnd());
|
timedValue.getTimeRange().resize(timedValue.getStart(), elementAfter->getEnd());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Erase overlapping elements
|
// Erase overlapping elements
|
||||||
Timeline::clear(timedValue.getTimeRange());
|
Timeline::clear(timedValue.getTimeRange());
|
||||||
|
@ -242,6 +244,26 @@ public:
|
||||||
return ReferenceWrapper(*this, time);
|
return ReferenceWrapper(*this, time);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Combines adjacent equal elements into one
|
||||||
|
template<bool autoJoin = AutoJoin, typename = std::enable_if_t<!autoJoin>>
|
||||||
|
void joinAdjacent() {
|
||||||
|
Timeline copy(*this);
|
||||||
|
for (auto it = copy.begin(); it != copy.end(); ++it) {
|
||||||
|
const auto rangeBegin = it;
|
||||||
|
auto rangeEnd = std::next(rangeBegin);
|
||||||
|
while (rangeEnd != copy.end() && rangeEnd->getStart() == rangeBegin->getEnd() && ::internal::valueEquals(*rangeEnd, *rangeBegin)) {
|
||||||
|
++rangeEnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rangeEnd != std::next(rangeBegin)) {
|
||||||
|
Timed<T> combined = *rangeBegin;
|
||||||
|
combined.setTimeRange({rangeBegin->getStart(), rangeEnd->getEnd()});
|
||||||
|
set(combined);
|
||||||
|
it = rangeEnd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
virtual void shift(time_type offset) {
|
virtual void shift(time_type offset) {
|
||||||
if (offset == time_type::zero()) return;
|
if (offset == time_type::zero()) return;
|
||||||
|
|
||||||
|
@ -290,7 +312,10 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
std::ostream& operator<<(std::ostream& stream, const Timeline<T>& timeline) {
|
using JoiningTimeline = Timeline<T, true>;
|
||||||
|
|
||||||
|
template<typename T, bool AutoJoin>
|
||||||
|
std::ostream& operator<<(std::ostream& stream, const Timeline<T, AutoJoin>& timeline) {
|
||||||
stream << "Timeline{";
|
stream << "Timeline{";
|
||||||
bool isFirst = true;
|
bool isFirst = true;
|
||||||
for (auto element : timeline) {
|
for (auto element : timeline) {
|
||||||
|
|
|
@ -231,17 +231,10 @@ void testSetter(std::function<void(const Timed<int>&, Timeline<int>&)> set) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check timeline via iterators
|
// Check timeline via iterators
|
||||||
Timed<int> lastElement(centiseconds::min(), centiseconds::min(), std::numeric_limits<int>::min());
|
|
||||||
for (const auto& element : timeline) {
|
for (const auto& element : timeline) {
|
||||||
// No element shound have zero-length
|
// No element shound have zero-length
|
||||||
EXPECT_LT(0_cs, element.getDuration());
|
EXPECT_LT(0_cs, element.getDuration());
|
||||||
|
|
||||||
// No two adjacent elements should have the same value; they should have been merged
|
|
||||||
if (element.getStart() == lastElement.getEnd()) {
|
|
||||||
EXPECT_NE(lastElement.getValue(), element.getValue());
|
|
||||||
}
|
|
||||||
lastElement = element;
|
|
||||||
|
|
||||||
// Element should match expected values
|
// Element should match expected values
|
||||||
for (centiseconds t = std::max(centiseconds::zero(), element.getStart()); t < element.getEnd(); ++t) {
|
for (centiseconds t = std::max(centiseconds::zero(), element.getStart()); t < element.getEnd(); ++t) {
|
||||||
optional<int> expectedValue = expectedValues[t.count()];
|
optional<int> expectedValue = expectedValues[t.count()];
|
||||||
|
@ -300,6 +293,51 @@ TEST(Timeline, indexer_set) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Timeline, joinAdjacent) {
|
||||||
|
Timeline<int> timeline{
|
||||||
|
{1_cs, 2_cs, 1},
|
||||||
|
{2_cs, 4_cs, 2},
|
||||||
|
{3_cs, 6_cs, 2},
|
||||||
|
{6_cs, 7_cs, 2},
|
||||||
|
// Gap
|
||||||
|
{8_cs, 10_cs, 2},
|
||||||
|
{11_cs, 12_cs, 3}
|
||||||
|
};
|
||||||
|
EXPECT_EQ(6, timeline.size());
|
||||||
|
timeline.joinAdjacent();
|
||||||
|
EXPECT_EQ(4, timeline.size());
|
||||||
|
|
||||||
|
Timed<int> expectedJoined[] = {
|
||||||
|
{1_cs, 2_cs, 1},
|
||||||
|
{2_cs, 7_cs, 2},
|
||||||
|
// Gap
|
||||||
|
{8_cs, 10_cs, 2},
|
||||||
|
{11_cs, 12_cs, 3}
|
||||||
|
};
|
||||||
|
EXPECT_THAT(timeline, ElementsAreArray(expectedJoined));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Timeline, autoJoin) {
|
||||||
|
JoiningTimeline<int> timeline{
|
||||||
|
{1_cs, 2_cs, 1},
|
||||||
|
{2_cs, 4_cs, 2},
|
||||||
|
{3_cs, 6_cs, 2},
|
||||||
|
{6_cs, 7_cs, 2},
|
||||||
|
// Gap
|
||||||
|
{8_cs, 10_cs, 2},
|
||||||
|
{11_cs, 12_cs, 3}
|
||||||
|
};
|
||||||
|
Timed<int> expectedJoined[] = {
|
||||||
|
{1_cs, 2_cs, 1},
|
||||||
|
{2_cs, 7_cs, 2},
|
||||||
|
// Gap
|
||||||
|
{8_cs, 10_cs, 2},
|
||||||
|
{11_cs, 12_cs, 3}
|
||||||
|
};
|
||||||
|
EXPECT_EQ(4, timeline.size());
|
||||||
|
EXPECT_THAT(timeline, ElementsAreArray(expectedJoined));
|
||||||
|
}
|
||||||
|
|
||||||
TEST(Timeline, shift) {
|
TEST(Timeline, shift) {
|
||||||
Timeline<int> timeline{ { 1_cs, 2_cs, 1 },{ 2_cs, 5_cs, 2 },{ 7_cs, 9_cs, 3 } };
|
Timeline<int> timeline{ { 1_cs, 2_cs, 1 },{ 2_cs, 5_cs, 2 },{ 7_cs, 9_cs, 3 } };
|
||||||
Timeline<int> expected{ { 3_cs, 4_cs, 1 },{ 4_cs, 7_cs, 2 },{ 9_cs, 11_cs, 3 } };
|
Timeline<int> expected{ { 3_cs, 4_cs, 1 },{ 4_cs, 7_cs, 2 },{ 9_cs, 11_cs, 3 } };
|
||||||
|
|
Loading…
Reference in New Issue