From 27ba3ef35751fc45b664d432e1291aed30b8cf9e Mon Sep 17 00:00:00 2001 From: Daniel Wolf Date: Wed, 25 Nov 2015 22:00:24 +0100 Subject: [PATCH] Generating XML output --- src/Shape.h | 1 + src/centiseconds.h | 2 ++ src/main.cpp | 58 ++++++++++++++++++++++++++++++++-------- src/mouth_animation.cpp | 7 ++++- src/phone_extraction.cpp | 4 +-- 5 files changed, 58 insertions(+), 14 deletions(-) diff --git a/src/Shape.h b/src/Shape.h index d1018a7..759fa41 100644 --- a/src/Shape.h +++ b/src/Shape.h @@ -7,6 +7,7 @@ // For reference, see http://sunewatts.dk/lipsync/lipsync/article_02.php // For visual examples, see https://flic.kr/s/aHsj86KR4J. Their shapes "BMP".."L" map to A..H. enum class Shape { + Invalid = -1, A, // Closed mouth (silence, M, B, P) B, // Clenched teeth (most vowels, m[e]n) C, // Mouth slightly open (b[ir]d, s[ay], w[i]n...) diff --git a/src/centiseconds.h b/src/centiseconds.h index 3d17f0f..d2ab2e5 100644 --- a/src/centiseconds.h +++ b/src/centiseconds.h @@ -1,6 +1,8 @@ #ifndef LIPSYNC_CENTISECONDS_H #define LIPSYNC_CENTISECONDS_H +#include + typedef std::chrono::duration centiseconds; std::ostream& operator <<(std::ostream& stream, const centiseconds cs); diff --git a/src/main.cpp b/src/main.cpp index 2a84772..f1fbd2a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,4 +1,7 @@ #include +#include +#include +#include #include "audio_input/WaveFileReader.h" #include "phone_extraction.h" #include "mouth_animation.h" @@ -8,6 +11,11 @@ using std::exception; using std::string; using std::wstring; using std::unique_ptr; +using std::map; +using std::chrono::duration; +using std::chrono::duration_cast; +using boost::filesystem::path; +using boost::property_tree::ptree; string getMessage(const exception& e) { string result(e.what()); @@ -20,7 +28,7 @@ string getMessage(const exception& e) { return result; } -unique_ptr createAudioStream(boost::filesystem::path filePath) { +unique_ptr createAudioStream(path filePath) { try { return unique_ptr(new WaveFileReader(filePath)); } catch (...) { @@ -28,6 +36,37 @@ unique_ptr createAudioStream(boost::filesystem::path filePath) { } } +string formatDuration(duration seconds) { + return fmt::format("{0:.2f}", seconds.count()); +} + +ptree createXmlTree(const path& filePath, const map& phones, const map& shapes) { + ptree tree; + + // Add sound file path + tree.add("rhubarbResult.info.soundFile", filePath.string()); + + // Add phones + for (auto it = phones.cbegin(), itNext = ++phones.cbegin(); itNext != phones.cend(); ++it, ++itNext) { + auto pair = *it; + auto nextPair = *itNext; + ptree& phoneElement = tree.add("rhubarbResult.phones.phone", pair.second); + phoneElement.add(".start", formatDuration(pair.first)); + phoneElement.add(".duration", formatDuration(nextPair.first - pair.first)); + } + + // Add mouth cues + for (auto it = shapes.cbegin(), itNext = ++shapes.cbegin(); itNext != shapes.cend(); ++it, ++itNext) { + auto pair = *it; + auto nextPair = *itNext; + ptree& mouthCueElement = tree.add("rhubarbResult.mouthCues.mouthCue", pair.second); + mouthCueElement.add(".start", formatDuration(pair.first)); + mouthCueElement.add(".duration", formatDuration(nextPair.first - pair.first)); + } + + return tree; +} + int main(int argc, char *argv[]) { try { // Get sound file name @@ -41,21 +80,18 @@ int main(int argc, char *argv[]) { unique_ptr audioStream = createAudioStream(soundFileName); // Detect phones - std::map phones = detectPhones(std::move(audioStream)); + map phones = detectPhones(std::move(audioStream)); // Generate mouth shapes - std::map shapes = animate(phones); + map shapes = animate(phones); - for (auto &pair : phones) { - std::cout << pair.first << ": " << pair.second << "\n"; - } - for (auto &pair : shapes) { - std::cout << pair.first << ": " << pair.second << "\n"; - } + // Print XML + boost::property_tree::ptree xmlTree = createXmlTree(soundFileName, phones, shapes); + boost::property_tree::write_xml(std::cout, xmlTree, boost::property_tree::xml_writer_settings(' ', 2)); return 0; } catch (const exception& e) { - std::cout << "An error occurred. " << getMessage(e); + std::cerr << "An error occurred. " << getMessage(e); return 1; } -} \ No newline at end of file +} diff --git a/src/mouth_animation.cpp b/src/mouth_animation.cpp index dd6128e..81e998c 100644 --- a/src/mouth_animation.cpp +++ b/src/mouth_animation.cpp @@ -68,8 +68,13 @@ Shape getShape(Phone phone) { map animate(const map &phones) { map shapes; + Shape lastShape = Shape::Invalid; for (auto& pair : phones) { - shapes[pair.first] = getShape(pair.second); + Shape shape = getShape(pair.second); + if (shape != lastShape) { + shapes[pair.first] = shape; + lastShape = shape; + } } return shapes; } diff --git a/src/phone_extraction.cpp b/src/phone_extraction.cpp index 4a00336..6835764 100644 --- a/src/phone_extraction.cpp +++ b/src/phone_extraction.cpp @@ -105,6 +105,7 @@ void processAudioStream(AudioStream& audioStream16kHzMono, ps_decoder_t& recogni map getPhones(ps_decoder_t& recognizer) { map result; ps_seg_t *segmentationIter; + result[centiseconds(0)] = Phone::None; int32 score; int endFrame; for (segmentationIter = ps_seg_iter(&recognizer, &score); segmentationIter; segmentationIter = ps_seg_next(segmentationIter)) { @@ -116,9 +117,8 @@ map getPhones(ps_decoder_t& recognizer) { ps_seg_frames(segmentationIter, &startFrame, &endFrame); result[centiseconds(startFrame)] = stringToPhone(phone); + result[centiseconds(endFrame + 1)] = Phone::None; } - // Add dummy entry past the last phone - result[centiseconds(endFrame + 1)] = Phone::None; return result; };