Refactoring: Replaced audio "length" with "duration"
This commit is contained in:
parent
b35e05fe7c
commit
f5b7971f52
|
@ -60,7 +60,7 @@ void XMLExporter::exportShapes(const boost::filesystem::path& inputFilePath, con
|
|||
|
||||
// Add metadata
|
||||
tree.put("rhubarbResult.metadata.soundFile", inputFilePath.string());
|
||||
tree.put("rhubarbResult.metadata.duration", formatDuration(shapes.getRange().getLength()));
|
||||
tree.put("rhubarbResult.metadata.duration", formatDuration(shapes.getRange().getDuration()));
|
||||
|
||||
// Add mouth cues
|
||||
for (auto& timedShape : dummyShapeIfEmpty(shapes)) {
|
||||
|
@ -105,7 +105,7 @@ void JSONExporter::exportShapes(const boost::filesystem::path& inputFilePath, co
|
|||
outputStream << "{\n";
|
||||
outputStream << " \"metadata\": {\n";
|
||||
outputStream << " \"soundFile\": \"" << escapeJSONString(inputFilePath.string()) << "\",\n";
|
||||
outputStream << " \"duration\": " << formatDuration(shapes.getRange().getLength()) << "\n";
|
||||
outputStream << " \"duration\": " << formatDuration(shapes.getRange().getDuration()) << "\n";
|
||||
outputStream << " },\n";
|
||||
outputStream << " \"mouthCues\": [\n";
|
||||
bool isFirst = true;
|
||||
|
|
|
@ -27,7 +27,7 @@ time_type TimeRange::getEnd() const {
|
|||
return end;
|
||||
}
|
||||
|
||||
time_type TimeRange::getLength() const {
|
||||
time_type TimeRange::getDuration() const {
|
||||
return end - start;
|
||||
}
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ public:
|
|||
|
||||
time_type getStart() const;
|
||||
time_type getEnd() const;
|
||||
time_type getLength() const;
|
||||
time_type getDuration() const;
|
||||
bool empty() const;
|
||||
|
||||
void resize(const TimeRange& newRange);
|
||||
|
|
|
@ -6,7 +6,7 @@ using std::make_unique;
|
|||
AudioSegment::AudioSegment(std::unique_ptr<AudioClip> inputClip, const TimeRange& range) :
|
||||
inputClip(std::move(inputClip)),
|
||||
sampleOffset(static_cast<int64_t>(range.getStart().count()) * this->inputClip->getSampleRate() / 100),
|
||||
sampleCount(static_cast<int64_t>(range.getLength().count()) * this->inputClip->getSampleRate() / 100)
|
||||
sampleCount(static_cast<int64_t>(range.getDuration().count()) * this->inputClip->getSampleRate() / 100)
|
||||
{
|
||||
if (sampleOffset < 0 || sampleOffset + sampleCount > this->inputClip->size()) {
|
||||
throw std::invalid_argument("Segment extends beyond input clip.");
|
||||
|
|
|
@ -29,7 +29,7 @@ float getDCOffset(const AudioClip& audioClip) {
|
|||
flatMeanSampleCount = 3 * sampleRate;
|
||||
fadingMeanSampleCount = 1 * sampleRate;
|
||||
} else {
|
||||
// Short audio file. Average over the entire length.
|
||||
// Short audio file. Average over the entire duration.
|
||||
flatMeanSampleCount = static_cast<int>(audioClip.size());
|
||||
fadingMeanSampleCount = 0;
|
||||
}
|
||||
|
|
|
@ -75,10 +75,10 @@ BoundedTimeline<void> detectVoiceActivity(const AudioClip& inputAudioClip, int m
|
|||
|
||||
// Split audio into segments and perform parallel VAD
|
||||
const int segmentCount = maxThreadCount;
|
||||
centiseconds audioLength = audioClip->getTruncatedRange().getLength();
|
||||
centiseconds audioDuration = audioClip->getTruncatedRange().getDuration();
|
||||
vector<TimeRange> audioSegments;
|
||||
for (int i = 0; i < segmentCount; ++i) {
|
||||
TimeRange segmentRange = TimeRange(i * audioLength / segmentCount, (i + 1) * audioLength / segmentCount);
|
||||
TimeRange segmentRange = TimeRange(i * audioDuration / segmentCount, (i + 1) * audioDuration / segmentCount);
|
||||
audioSegments.push_back(segmentRange);
|
||||
}
|
||||
runParallel([&](const TimeRange& segmentRange, ProgressSink& segmentProgressSink) {
|
||||
|
@ -103,7 +103,7 @@ BoundedTimeline<void> detectVoiceActivity(const AudioClip& inputAudioClip, int m
|
|||
// Shorten activities. WebRTC adds a bit of buffer at the end.
|
||||
const centiseconds tail(5);
|
||||
for (const auto& utterance : Timeline<void>(activity)) {
|
||||
if (utterance.getTimeRange().getLength() > tail && utterance.getEnd() < audioLength) {
|
||||
if (utterance.getTimeRange().getDuration() > tail && utterance.getEnd() < audioDuration) {
|
||||
activity.clear(utterance.getEnd() - tail, utterance.getEnd());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -146,17 +146,17 @@ Timeline<Shape> createTweens(ContinuousTimeline<Shape> shapes) {
|
|||
centiseconds tweenStart, tweenDuration;
|
||||
switch (tweenTiming) {
|
||||
case TweenTiming::Early: {
|
||||
tweenDuration = std::min(firstTimeRange.getLength() / 3, maxTweenDuration);
|
||||
tweenDuration = std::min(firstTimeRange.getDuration() / 3, maxTweenDuration);
|
||||
tweenStart = firstTimeRange.getEnd() - tweenDuration;
|
||||
break;
|
||||
}
|
||||
case TweenTiming::Centered: {
|
||||
tweenDuration = std::min({ firstTimeRange.getLength() / 3, secondTimeRange.getLength() / 3, maxTweenDuration });
|
||||
tweenDuration = std::min({ firstTimeRange.getDuration() / 3, secondTimeRange.getDuration() / 3, maxTweenDuration });
|
||||
tweenStart = firstTimeRange.getEnd() - tweenDuration / 2;
|
||||
break;
|
||||
}
|
||||
case TweenTiming::Late: {
|
||||
tweenDuration = std::min(secondTimeRange.getLength() / 3, maxTweenDuration);
|
||||
tweenDuration = std::min(secondTimeRange.getDuration() / 3, maxTweenDuration);
|
||||
tweenStart = secondTimeRange.getStart();
|
||||
break;
|
||||
}
|
||||
|
@ -180,7 +180,7 @@ Timeline<Shape> animatePauses(const ContinuousTimeline<Shape>& shapes) {
|
|||
|
||||
const centiseconds maxPausedOpenMouthDuration = 35_cs;
|
||||
const TimeRange timeRange = timedShape.getTimeRange();
|
||||
if (timeRange.getLength() <= maxPausedOpenMouthDuration) {
|
||||
if (timeRange.getDuration() <= maxPausedOpenMouthDuration) {
|
||||
result.set(timeRange, B);
|
||||
}
|
||||
}
|
||||
|
@ -189,11 +189,11 @@ Timeline<Shape> animatePauses(const ContinuousTimeline<Shape>& shapes) {
|
|||
for_each_adjacent(shapes.begin(), shapes.end(), [&](const Timed<Shape>& secondLast, const Timed<Shape>& last, const Timed<Shape>& pause) {
|
||||
if (pause.getValue() != X) return;
|
||||
|
||||
centiseconds lastLength = last.getTimeRange().getLength();
|
||||
centiseconds lastDuration = last.getTimeRange().getDuration();
|
||||
const centiseconds minOpenDuration = 20_cs;
|
||||
if (isClosed(secondLast.getValue()) && !isClosed(last.getValue()) && lastLength < minOpenDuration) {
|
||||
if (isClosed(secondLast.getValue()) && !isClosed(last.getValue()) && lastDuration < minOpenDuration) {
|
||||
const centiseconds minSpillDuration = 20_cs;
|
||||
centiseconds spillDuration = std::min(minSpillDuration, pause.getTimeRange().getLength());
|
||||
centiseconds spillDuration = std::min(minSpillDuration, pause.getTimeRange().getDuration());
|
||||
result.set(pause.getStart(), pause.getStart() + spillDuration, B);
|
||||
}
|
||||
});
|
||||
|
@ -214,7 +214,7 @@ ContinuousTimeline<Shape> animate(const BoundedTimeline<Phone> &phones) {
|
|||
for (const auto& timedPhone : continuousPhones) {
|
||||
// Animate one phone
|
||||
optional<Phone> phone = timedPhone.getValue();
|
||||
centiseconds duration = timedPhone.getTimeRange().getLength();
|
||||
centiseconds duration = timedPhone.getTimeRange().getDuration();
|
||||
Timeline<Viseme> phoneVisemes = animate(phone, duration, previousPhoneDuration);
|
||||
|
||||
// Result timing is relative to phone. Make absolute.
|
||||
|
@ -236,7 +236,7 @@ ContinuousTimeline<Shape> animate(const BoundedTimeline<Phone> &phones) {
|
|||
Viseme viseme = it->getValue();
|
||||
|
||||
// Convert viseme to phone
|
||||
Shape shape = viseme.getShape(it->getTimeRange().getLength(), lastShape);
|
||||
Shape shape = viseme.getShape(it->getTimeRange().getDuration(), lastShape);
|
||||
shapes.set(it->getTimeRange(), shape);
|
||||
|
||||
lastShape = shape;
|
||||
|
|
|
@ -288,10 +288,10 @@ Timeline<void> getNoiseSounds(TimeRange utteranceTimeRange, const Timeline<Phone
|
|||
}
|
||||
|
||||
// Remove undesired elements
|
||||
const centiseconds minSoundLength = 12_cs;
|
||||
const centiseconds minSoundDuration = 12_cs;
|
||||
for (const auto& unknownSound : Timeline<void>(noiseSounds)) {
|
||||
bool startsAtZero = unknownSound.getStart() == 0_cs;
|
||||
bool tooShort = unknownSound.getTimeRange().getLength() < minSoundLength;
|
||||
bool tooShort = unknownSound.getTimeRange().getDuration() < minSoundDuration;
|
||||
if (startsAtZero || tooShort) {
|
||||
noiseSounds.clear(unknownSound.getTimeRange());
|
||||
}
|
||||
|
@ -428,7 +428,7 @@ BoundedTimeline<Phone> recognizePhones(
|
|||
};
|
||||
|
||||
auto getUtteranceProgressWeight = [](const Timed<void> timedUtterance) {
|
||||
return timedUtterance.getTimeRange().getLength().count();
|
||||
return timedUtterance.getTimeRange().getDuration().count();
|
||||
};
|
||||
|
||||
// Perform speech recognition
|
||||
|
@ -439,7 +439,7 @@ BoundedTimeline<Phone> recognizePhones(
|
|||
// Don't use more threads than there are utterances to be processed
|
||||
static_cast<int>(utterances.size()),
|
||||
// Don't waste time creating additional threads (and decoders!) if the recording is short
|
||||
static_cast<int>(duration_cast<std::chrono::seconds>(audioClip->getTruncatedRange().getLength()).count() / 5)
|
||||
static_cast<int>(duration_cast<std::chrono::seconds>(audioClip->getTruncatedRange().getDuration()).count() / 5)
|
||||
});
|
||||
if (threadCount < 1) {
|
||||
threadCount = 1;
|
||||
|
|
|
@ -230,7 +230,7 @@ void testSetter(std::function<void(const Timed<int>&, Timeline<int>&)> set) {
|
|||
Timed<int> lastElement(centiseconds::min(), centiseconds::min(), std::numeric_limits<int>::min());
|
||||
for (const auto& element : timeline) {
|
||||
// No element shound have zero-length
|
||||
EXPECT_LT(0_cs, element.getTimeRange().getLength());
|
||||
EXPECT_LT(0_cs, element.getTimeRange().getDuration());
|
||||
|
||||
// No two adjacent elements should have the same value; they should have been merged
|
||||
if (element.getStart() == lastElement.getEnd()) {
|
||||
|
|
Loading…
Reference in New Issue