Generating XML output

2015-11-25 22:00:24 +01:00 · 2015-11-25 22:00:24 +01:00 · 27ba3ef357
parent 2ef99119b0
commit 27ba3ef357
5 changed files with 58 additions and 14 deletions
--- a/src/Shape.h
+++ b/src/Shape.h
@ -7,6 +7,7 @@
 // For reference, see http://sunewatts.dk/lipsync/lipsync/article_02.php
 // For visual examples, see https://flic.kr/s/aHsj86KR4J. Their shapes "BMP".."L" map to A..H.
 enum class Shape {
+	Invalid = -1,
 	A,	// Closed mouth (silence, M, B, P)
 	B,	// Clenched teeth (most vowels, m[e]n)
 	C,	// Mouth slightly open (b[ir]d, s[ay], w[i]n...)
--- a/src/centiseconds.h
+++ b/src/centiseconds.h
@ -1,6 +1,8 @@
 #ifndef LIPSYNC_CENTISECONDS_H
 #define LIPSYNC_CENTISECONDS_H

+#include <chrono>
+
 typedef std::chrono::duration<int, std::centi> centiseconds;

 std::ostream& operator <<(std::ostream& stream, const centiseconds cs);
--- a/src/main.cpp
+++ b/src/main.cpp
@ -1,4 +1,7 @@
 #include <iostream>
+#include <boost/property_tree/ptree.hpp>
+#include <boost/property_tree/xml_parser.hpp>
+#include <format.h>
 #include "audio_input/WaveFileReader.h"
 #include "phone_extraction.h"
 #include "mouth_animation.h"
@ -8,6 +11,11 @@ using std::exception;
 using std::string;
 using std::wstring;
 using std::unique_ptr;
+using std::map;
+using std::chrono::duration;
+using std::chrono::duration_cast;
+using boost::filesystem::path;
+using boost::property_tree::ptree;

 string getMessage(const exception& e) {
 	string result(e.what());
@ -20,7 +28,7 @@ string getMessage(const exception& e) {
 	return result;
 }

-unique_ptr<AudioStream> createAudioStream(boost::filesystem::path filePath) {
+unique_ptr<AudioStream> createAudioStream(path filePath) {
 	try {
 		return unique_ptr<AudioStream>(new WaveFileReader(filePath));
 	} catch (...) {
@ -28,6 +36,37 @@ unique_ptr<AudioStream> createAudioStream(boost::filesystem::path filePath) {
 	}
 }

+string formatDuration(duration<double> seconds) {
+	return fmt::format("{0:.2f}", seconds.count());
+}
+
+ptree createXmlTree(const path& filePath, const map<centiseconds, Phone>& phones, const map<centiseconds, Shape>& shapes) {
+	ptree tree;
+
+	// Add sound file path
+	tree.add("rhubarbResult.info.soundFile", filePath.string());
+
+	// Add phones
+	for (auto it = phones.cbegin(), itNext = ++phones.cbegin(); itNext != phones.cend(); ++it, ++itNext) {
+		auto pair = *it;
+		auto nextPair = *itNext;
+		ptree& phoneElement = tree.add("rhubarbResult.phones.phone", pair.second);
+		phoneElement.add("<xmlattr>.start", formatDuration(pair.first));
+		phoneElement.add("<xmlattr>.duration", formatDuration(nextPair.first - pair.first));
+	}
+
+	// Add mouth cues
+	for (auto it = shapes.cbegin(), itNext = ++shapes.cbegin(); itNext != shapes.cend(); ++it, ++itNext) {
+		auto pair = *it;
+		auto nextPair = *itNext;
+		ptree& mouthCueElement = tree.add("rhubarbResult.mouthCues.mouthCue", pair.second);
+		mouthCueElement.add("<xmlattr>.start", formatDuration(pair.first));
+		mouthCueElement.add("<xmlattr>.duration", formatDuration(nextPair.first - pair.first));
+	}
+
+	return tree;
+}
+
 int main(int argc, char *argv[]) {
 	try {
 		// Get sound file name
@ -41,21 +80,18 @@ int main(int argc, char *argv[]) {
 		unique_ptr<AudioStream> audioStream = createAudioStream(soundFileName);

 		// Detect phones
-		std::map<centiseconds, Phone> phones = detectPhones(std::move(audioStream));
+		map<centiseconds, Phone> phones = detectPhones(std::move(audioStream));

 		// Generate mouth shapes
-		std::map<centiseconds, Shape> shapes = animate(phones);
+		map<centiseconds, Shape> shapes = animate(phones);

-		for (auto &pair : phones) {
-			std::cout << pair.first << ": " << pair.second << "\n";
-		}
-		for (auto &pair : shapes) {
-			std::cout << pair.first << ": " << pair.second << "\n";
-		}
+		// Print XML
+		boost::property_tree::ptree xmlTree = createXmlTree(soundFileName, phones, shapes);
+		boost::property_tree::write_xml(std::cout, xmlTree, boost::property_tree::xml_writer_settings<string>(' ', 2));

 		return 0;
 	} catch (const exception& e) {
-		std::cout << "An error occurred. " << getMessage(e);
+		std::cerr << "An error occurred. " << getMessage(e);
 		return 1;
 	}
 }
--- a/src/mouth_animation.cpp
+++ b/src/mouth_animation.cpp
@ -68,8 +68,13 @@ Shape getShape(Phone phone) {

 map<centiseconds, Shape> animate(const map<centiseconds, Phone> &phones) {
 	map<centiseconds, Shape> shapes;
+	Shape lastShape = Shape::Invalid;
 	for (auto& pair : phones) {
-		shapes[pair.first] = getShape(pair.second);
+		Shape shape = getShape(pair.second);
+		if (shape != lastShape) {
+			shapes[pair.first] = shape;
+			lastShape = shape;
+		}
 	}
 	return shapes;
 }
--- a/src/phone_extraction.cpp
+++ b/src/phone_extraction.cpp
@ -105,6 +105,7 @@ void processAudioStream(AudioStream& audioStream16kHzMono, ps_decoder_t& recogni
 map<centiseconds, Phone> getPhones(ps_decoder_t& recognizer) {
 	map<centiseconds, Phone> result;
 	ps_seg_t *segmentationIter;
+	result[centiseconds(0)] = Phone::None;
 	int32 score;
 	int endFrame;
 	for (segmentationIter = ps_seg_iter(&recognizer, &score); segmentationIter; segmentationIter = ps_seg_next(segmentationIter)) {
@ -116,9 +117,8 @@ map<centiseconds, Phone> getPhones(ps_decoder_t& recognizer) {
 		ps_seg_frames(segmentationIter, &startFrame, &endFrame);

 		result[centiseconds(startFrame)] = stringToPhone(phone);
-	}
-	// Add dummy entry past the last phone
 		result[centiseconds(endFrame + 1)] = Phone::None;
+	}
 	return result;
 };