Generating XML output
This commit is contained in:
parent
2ef99119b0
commit
27ba3ef357
|
@ -7,6 +7,7 @@
|
||||||
// For reference, see http://sunewatts.dk/lipsync/lipsync/article_02.php
|
// For reference, see http://sunewatts.dk/lipsync/lipsync/article_02.php
|
||||||
// For visual examples, see https://flic.kr/s/aHsj86KR4J. Their shapes "BMP".."L" map to A..H.
|
// For visual examples, see https://flic.kr/s/aHsj86KR4J. Their shapes "BMP".."L" map to A..H.
|
||||||
enum class Shape {
|
enum class Shape {
|
||||||
|
Invalid = -1,
|
||||||
A, // Closed mouth (silence, M, B, P)
|
A, // Closed mouth (silence, M, B, P)
|
||||||
B, // Clenched teeth (most vowels, m[e]n)
|
B, // Clenched teeth (most vowels, m[e]n)
|
||||||
C, // Mouth slightly open (b[ir]d, s[ay], w[i]n...)
|
C, // Mouth slightly open (b[ir]d, s[ay], w[i]n...)
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
#ifndef LIPSYNC_CENTISECONDS_H
|
#ifndef LIPSYNC_CENTISECONDS_H
|
||||||
#define LIPSYNC_CENTISECONDS_H
|
#define LIPSYNC_CENTISECONDS_H
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
typedef std::chrono::duration<int, std::centi> centiseconds;
|
typedef std::chrono::duration<int, std::centi> centiseconds;
|
||||||
|
|
||||||
std::ostream& operator <<(std::ostream& stream, const centiseconds cs);
|
std::ostream& operator <<(std::ostream& stream, const centiseconds cs);
|
||||||
|
|
56
src/main.cpp
56
src/main.cpp
|
@ -1,4 +1,7 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <boost/property_tree/ptree.hpp>
|
||||||
|
#include <boost/property_tree/xml_parser.hpp>
|
||||||
|
#include <format.h>
|
||||||
#include "audio_input/WaveFileReader.h"
|
#include "audio_input/WaveFileReader.h"
|
||||||
#include "phone_extraction.h"
|
#include "phone_extraction.h"
|
||||||
#include "mouth_animation.h"
|
#include "mouth_animation.h"
|
||||||
|
@ -8,6 +11,11 @@ using std::exception;
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::wstring;
|
using std::wstring;
|
||||||
using std::unique_ptr;
|
using std::unique_ptr;
|
||||||
|
using std::map;
|
||||||
|
using std::chrono::duration;
|
||||||
|
using std::chrono::duration_cast;
|
||||||
|
using boost::filesystem::path;
|
||||||
|
using boost::property_tree::ptree;
|
||||||
|
|
||||||
string getMessage(const exception& e) {
|
string getMessage(const exception& e) {
|
||||||
string result(e.what());
|
string result(e.what());
|
||||||
|
@ -20,7 +28,7 @@ string getMessage(const exception& e) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
unique_ptr<AudioStream> createAudioStream(boost::filesystem::path filePath) {
|
unique_ptr<AudioStream> createAudioStream(path filePath) {
|
||||||
try {
|
try {
|
||||||
return unique_ptr<AudioStream>(new WaveFileReader(filePath));
|
return unique_ptr<AudioStream>(new WaveFileReader(filePath));
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
|
@ -28,6 +36,37 @@ unique_ptr<AudioStream> createAudioStream(boost::filesystem::path filePath) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string formatDuration(duration<double> seconds) {
|
||||||
|
return fmt::format("{0:.2f}", seconds.count());
|
||||||
|
}
|
||||||
|
|
||||||
|
ptree createXmlTree(const path& filePath, const map<centiseconds, Phone>& phones, const map<centiseconds, Shape>& shapes) {
|
||||||
|
ptree tree;
|
||||||
|
|
||||||
|
// Add sound file path
|
||||||
|
tree.add("rhubarbResult.info.soundFile", filePath.string());
|
||||||
|
|
||||||
|
// Add phones
|
||||||
|
for (auto it = phones.cbegin(), itNext = ++phones.cbegin(); itNext != phones.cend(); ++it, ++itNext) {
|
||||||
|
auto pair = *it;
|
||||||
|
auto nextPair = *itNext;
|
||||||
|
ptree& phoneElement = tree.add("rhubarbResult.phones.phone", pair.second);
|
||||||
|
phoneElement.add("<xmlattr>.start", formatDuration(pair.first));
|
||||||
|
phoneElement.add("<xmlattr>.duration", formatDuration(nextPair.first - pair.first));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add mouth cues
|
||||||
|
for (auto it = shapes.cbegin(), itNext = ++shapes.cbegin(); itNext != shapes.cend(); ++it, ++itNext) {
|
||||||
|
auto pair = *it;
|
||||||
|
auto nextPair = *itNext;
|
||||||
|
ptree& mouthCueElement = tree.add("rhubarbResult.mouthCues.mouthCue", pair.second);
|
||||||
|
mouthCueElement.add("<xmlattr>.start", formatDuration(pair.first));
|
||||||
|
mouthCueElement.add("<xmlattr>.duration", formatDuration(nextPair.first - pair.first));
|
||||||
|
}
|
||||||
|
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
try {
|
try {
|
||||||
// Get sound file name
|
// Get sound file name
|
||||||
|
@ -41,21 +80,18 @@ int main(int argc, char *argv[]) {
|
||||||
unique_ptr<AudioStream> audioStream = createAudioStream(soundFileName);
|
unique_ptr<AudioStream> audioStream = createAudioStream(soundFileName);
|
||||||
|
|
||||||
// Detect phones
|
// Detect phones
|
||||||
std::map<centiseconds, Phone> phones = detectPhones(std::move(audioStream));
|
map<centiseconds, Phone> phones = detectPhones(std::move(audioStream));
|
||||||
|
|
||||||
// Generate mouth shapes
|
// Generate mouth shapes
|
||||||
std::map<centiseconds, Shape> shapes = animate(phones);
|
map<centiseconds, Shape> shapes = animate(phones);
|
||||||
|
|
||||||
for (auto &pair : phones) {
|
// Print XML
|
||||||
std::cout << pair.first << ": " << pair.second << "\n";
|
boost::property_tree::ptree xmlTree = createXmlTree(soundFileName, phones, shapes);
|
||||||
}
|
boost::property_tree::write_xml(std::cout, xmlTree, boost::property_tree::xml_writer_settings<string>(' ', 2));
|
||||||
for (auto &pair : shapes) {
|
|
||||||
std::cout << pair.first << ": " << pair.second << "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
} catch (const exception& e) {
|
} catch (const exception& e) {
|
||||||
std::cout << "An error occurred. " << getMessage(e);
|
std::cerr << "An error occurred. " << getMessage(e);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -68,8 +68,13 @@ Shape getShape(Phone phone) {
|
||||||
|
|
||||||
map<centiseconds, Shape> animate(const map<centiseconds, Phone> &phones) {
|
map<centiseconds, Shape> animate(const map<centiseconds, Phone> &phones) {
|
||||||
map<centiseconds, Shape> shapes;
|
map<centiseconds, Shape> shapes;
|
||||||
|
Shape lastShape = Shape::Invalid;
|
||||||
for (auto& pair : phones) {
|
for (auto& pair : phones) {
|
||||||
shapes[pair.first] = getShape(pair.second);
|
Shape shape = getShape(pair.second);
|
||||||
|
if (shape != lastShape) {
|
||||||
|
shapes[pair.first] = shape;
|
||||||
|
lastShape = shape;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return shapes;
|
return shapes;
|
||||||
}
|
}
|
||||||
|
|
|
@ -105,6 +105,7 @@ void processAudioStream(AudioStream& audioStream16kHzMono, ps_decoder_t& recogni
|
||||||
map<centiseconds, Phone> getPhones(ps_decoder_t& recognizer) {
|
map<centiseconds, Phone> getPhones(ps_decoder_t& recognizer) {
|
||||||
map<centiseconds, Phone> result;
|
map<centiseconds, Phone> result;
|
||||||
ps_seg_t *segmentationIter;
|
ps_seg_t *segmentationIter;
|
||||||
|
result[centiseconds(0)] = Phone::None;
|
||||||
int32 score;
|
int32 score;
|
||||||
int endFrame;
|
int endFrame;
|
||||||
for (segmentationIter = ps_seg_iter(&recognizer, &score); segmentationIter; segmentationIter = ps_seg_next(segmentationIter)) {
|
for (segmentationIter = ps_seg_iter(&recognizer, &score); segmentationIter; segmentationIter = ps_seg_next(segmentationIter)) {
|
||||||
|
@ -116,9 +117,8 @@ map<centiseconds, Phone> getPhones(ps_decoder_t& recognizer) {
|
||||||
ps_seg_frames(segmentationIter, &startFrame, &endFrame);
|
ps_seg_frames(segmentationIter, &startFrame, &endFrame);
|
||||||
|
|
||||||
result[centiseconds(startFrame)] = stringToPhone(phone);
|
result[centiseconds(startFrame)] = stringToPhone(phone);
|
||||||
|
result[centiseconds(endFrame + 1)] = Phone::None;
|
||||||
}
|
}
|
||||||
// Add dummy entry past the last phone
|
|
||||||
result[centiseconds(endFrame + 1)] = Phone::None;
|
|
||||||
return result;
|
return result;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue