From f44baaa05f038affbed14d68e7805179ca7b4bf6 Mon Sep 17 00:00:00 2001 From: Daniel Wolf Date: Thu, 29 Sep 2016 11:41:31 +0200 Subject: [PATCH] Improve noise detection heuristic --- src/audio/voiceActivityDetection.cpp | 8 ++++++++ src/phoneRecognition.cpp | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/audio/voiceActivityDetection.cpp b/src/audio/voiceActivityDetection.cpp index c03983b..3da2d85 100644 --- a/src/audio/voiceActivityDetection.cpp +++ b/src/audio/voiceActivityDetection.cpp @@ -100,6 +100,14 @@ BoundedTimeline detectVoiceActivity(const AudioClip& inputAudioClip, int m } } + // Shorten activities. WebRTC adds a bit of buffer at the end. + const centiseconds tail(5); + for (const auto& utterance : Timeline(activity)) { + if (utterance.getTimeRange().getLength() > tail && utterance.getEnd() < audioLength) { + activity.clear(utterance.getEnd() - tail, utterance.getEnd()); + } + } + logging::debugFormat("Found {} sections of voice activity: {}", activity.size(), join(activity | transformed([](const Timed& t) { return format("{0}-{1}", t.getStart(), t.getEnd()); }), ", ")); diff --git a/src/phoneRecognition.cpp b/src/phoneRecognition.cpp index 742de17..a0717dd 100644 --- a/src/phoneRecognition.cpp +++ b/src/phoneRecognition.cpp @@ -288,7 +288,7 @@ Timeline getNoiseSounds(TimeRange utteranceTimeRange, const Timeline(noiseSounds)) { bool startsAtZero = unknownSound.getStart() == 0_cs; bool tooShort = unknownSound.getTimeRange().getLength() < minSoundLength;