Generating XML output

This commit is contained in:
Daniel Wolf 2015-11-25 22:00:24 +01:00
parent 2ef99119b0
commit 27ba3ef357
5 changed files with 58 additions and 14 deletions

View File

@ -7,6 +7,7 @@
// For reference, see http://sunewatts.dk/lipsync/lipsync/article_02.php
// For visual examples, see https://flic.kr/s/aHsj86KR4J. Their shapes "BMP".."L" map to A..H.
enum class Shape {
Invalid = -1,
A, // Closed mouth (silence, M, B, P)
B, // Clenched teeth (most vowels, m[e]n)
C, // Mouth slightly open (b[ir]d, s[ay], w[i]n...)

View File

@ -1,6 +1,8 @@
#ifndef LIPSYNC_CENTISECONDS_H
#define LIPSYNC_CENTISECONDS_H
#include <chrono>
typedef std::chrono::duration<int, std::centi> centiseconds;
std::ostream& operator <<(std::ostream& stream, const centiseconds cs);

View File

@ -1,4 +1,7 @@
#include <iostream>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/xml_parser.hpp>
#include <format.h>
#include "audio_input/WaveFileReader.h"
#include "phone_extraction.h"
#include "mouth_animation.h"
@ -8,6 +11,11 @@ using std::exception;
using std::string;
using std::wstring;
using std::unique_ptr;
using std::map;
using std::chrono::duration;
using std::chrono::duration_cast;
using boost::filesystem::path;
using boost::property_tree::ptree;
string getMessage(const exception& e) {
string result(e.what());
@ -20,7 +28,7 @@ string getMessage(const exception& e) {
return result;
}
unique_ptr<AudioStream> createAudioStream(boost::filesystem::path filePath) {
unique_ptr<AudioStream> createAudioStream(path filePath) {
try {
return unique_ptr<AudioStream>(new WaveFileReader(filePath));
} catch (...) {
@ -28,6 +36,37 @@ unique_ptr<AudioStream> createAudioStream(boost::filesystem::path filePath) {
}
}
string formatDuration(duration<double> seconds) {
return fmt::format("{0:.2f}", seconds.count());
}
ptree createXmlTree(const path& filePath, const map<centiseconds, Phone>& phones, const map<centiseconds, Shape>& shapes) {
ptree tree;
// Add sound file path
tree.add("rhubarbResult.info.soundFile", filePath.string());
// Add phones
for (auto it = phones.cbegin(), itNext = ++phones.cbegin(); itNext != phones.cend(); ++it, ++itNext) {
auto pair = *it;
auto nextPair = *itNext;
ptree& phoneElement = tree.add("rhubarbResult.phones.phone", pair.second);
phoneElement.add("<xmlattr>.start", formatDuration(pair.first));
phoneElement.add("<xmlattr>.duration", formatDuration(nextPair.first - pair.first));
}
// Add mouth cues
for (auto it = shapes.cbegin(), itNext = ++shapes.cbegin(); itNext != shapes.cend(); ++it, ++itNext) {
auto pair = *it;
auto nextPair = *itNext;
ptree& mouthCueElement = tree.add("rhubarbResult.mouthCues.mouthCue", pair.second);
mouthCueElement.add("<xmlattr>.start", formatDuration(pair.first));
mouthCueElement.add("<xmlattr>.duration", formatDuration(nextPair.first - pair.first));
}
return tree;
}
int main(int argc, char *argv[]) {
try {
// Get sound file name
@ -41,21 +80,18 @@ int main(int argc, char *argv[]) {
unique_ptr<AudioStream> audioStream = createAudioStream(soundFileName);
// Detect phones
std::map<centiseconds, Phone> phones = detectPhones(std::move(audioStream));
map<centiseconds, Phone> phones = detectPhones(std::move(audioStream));
// Generate mouth shapes
std::map<centiseconds, Shape> shapes = animate(phones);
map<centiseconds, Shape> shapes = animate(phones);
for (auto &pair : phones) {
std::cout << pair.first << ": " << pair.second << "\n";
}
for (auto &pair : shapes) {
std::cout << pair.first << ": " << pair.second << "\n";
}
// Print XML
boost::property_tree::ptree xmlTree = createXmlTree(soundFileName, phones, shapes);
boost::property_tree::write_xml(std::cout, xmlTree, boost::property_tree::xml_writer_settings<string>(' ', 2));
return 0;
} catch (const exception& e) {
std::cout << "An error occurred. " << getMessage(e);
std::cerr << "An error occurred. " << getMessage(e);
return 1;
}
}

View File

@ -68,8 +68,13 @@ Shape getShape(Phone phone) {
map<centiseconds, Shape> animate(const map<centiseconds, Phone> &phones) {
map<centiseconds, Shape> shapes;
Shape lastShape = Shape::Invalid;
for (auto& pair : phones) {
shapes[pair.first] = getShape(pair.second);
Shape shape = getShape(pair.second);
if (shape != lastShape) {
shapes[pair.first] = shape;
lastShape = shape;
}
}
return shapes;
}

View File

@ -105,6 +105,7 @@ void processAudioStream(AudioStream& audioStream16kHzMono, ps_decoder_t& recogni
map<centiseconds, Phone> getPhones(ps_decoder_t& recognizer) {
map<centiseconds, Phone> result;
ps_seg_t *segmentationIter;
result[centiseconds(0)] = Phone::None;
int32 score;
int endFrame;
for (segmentationIter = ps_seg_iter(&recognizer, &score); segmentationIter; segmentationIter = ps_seg_next(segmentationIter)) {
@ -116,9 +117,8 @@ map<centiseconds, Phone> getPhones(ps_decoder_t& recognizer) {
ps_seg_frames(segmentationIter, &startFrame, &endFrame);
result[centiseconds(startFrame)] = stringToPhone(phone);
result[centiseconds(endFrame + 1)] = Phone::None;
}
// Add dummy entry past the last phone
result[centiseconds(endFrame + 1)] = Phone::None;
return result;
};