diff --git a/src/audio/voiceActivityDetection.cpp b/src/audio/voiceActivityDetection.cpp index c03983b..3da2d85 100644 --- a/src/audio/voiceActivityDetection.cpp +++ b/src/audio/voiceActivityDetection.cpp @@ -100,6 +100,14 @@ BoundedTimeline detectVoiceActivity(const AudioClip& inputAudioClip, int m } } + // Shorten activities. WebRTC adds a bit of buffer at the end. + const centiseconds tail(5); + for (const auto& utterance : Timeline(activity)) { + if (utterance.getTimeRange().getLength() > tail && utterance.getEnd() < audioLength) { + activity.clear(utterance.getEnd() - tail, utterance.getEnd()); + } + } + logging::debugFormat("Found {} sections of voice activity: {}", activity.size(), join(activity | transformed([](const Timed& t) { return format("{0}-{1}", t.getStart(), t.getEnd()); }), ", ")); diff --git a/src/phoneRecognition.cpp b/src/phoneRecognition.cpp index 742de17..a0717dd 100644 --- a/src/phoneRecognition.cpp +++ b/src/phoneRecognition.cpp @@ -288,7 +288,7 @@ Timeline getNoiseSounds(TimeRange utteranceTimeRange, const Timeline(noiseSounds)) { bool startsAtZero = unknownSound.getStart() == 0_cs; bool tooShort = unknownSound.getTimeRange().getLength() < minSoundLength;