From 667edf9485ae6d8b44ed4825dc31789e534fc1a1 Mon Sep 17 00:00:00 2001 From: Daniel Wolf Date: Wed, 10 Feb 2016 21:53:58 +0100 Subject: [PATCH] Improved dialog handling --- src/phoneExtraction.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/phoneExtraction.cpp b/src/phoneExtraction.cpp index e8ccfa9..c5c9652 100644 --- a/src/phoneExtraction.cpp +++ b/src/phoneExtraction.cpp @@ -175,10 +175,10 @@ vector extractDialogWords(string dialog) { boost::algorithm::to_lower(dialog); // Insert silences where appropriate - dialog = regex_replace(dialog, regex("[,;.:!?] "), " "); + dialog = regex_replace(dialog, regex("[,;.:!?] |-"), " "); // Remove all undesired characters - dialog = regex_replace(dialog, regex("[^a-z.'\\-0-9<>]"), " "); + dialog = regex_replace(dialog, regex("[^a-z.'\\0-9<>]"), " "); // Collapse whitespace dialog = regex_replace(dialog, regex("\\s+"), " ");