Generating XML output
This commit is contained in:
parent
2ef99119b0
commit
27ba3ef357
|
@ -7,6 +7,7 @@
|
|||
// For reference, see http://sunewatts.dk/lipsync/lipsync/article_02.php
|
||||
// For visual examples, see https://flic.kr/s/aHsj86KR4J. Their shapes "BMP".."L" map to A..H.
|
||||
enum class Shape {
|
||||
Invalid = -1,
|
||||
A, // Closed mouth (silence, M, B, P)
|
||||
B, // Clenched teeth (most vowels, m[e]n)
|
||||
C, // Mouth slightly open (b[ir]d, s[ay], w[i]n...)
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#ifndef LIPSYNC_CENTISECONDS_H
|
||||
#define LIPSYNC_CENTISECONDS_H
|
||||
|
||||
#include <chrono>
|
||||
|
||||
typedef std::chrono::duration<int, std::centi> centiseconds;
|
||||
|
||||
std::ostream& operator <<(std::ostream& stream, const centiseconds cs);
|
||||
|
|
56
src/main.cpp
56
src/main.cpp
|
@ -1,4 +1,7 @@
|
|||
#include <iostream>
|
||||
#include <boost/property_tree/ptree.hpp>
|
||||
#include <boost/property_tree/xml_parser.hpp>
|
||||
#include <format.h>
|
||||
#include "audio_input/WaveFileReader.h"
|
||||
#include "phone_extraction.h"
|
||||
#include "mouth_animation.h"
|
||||
|
@ -8,6 +11,11 @@ using std::exception;
|
|||
using std::string;
|
||||
using std::wstring;
|
||||
using std::unique_ptr;
|
||||
using std::map;
|
||||
using std::chrono::duration;
|
||||
using std::chrono::duration_cast;
|
||||
using boost::filesystem::path;
|
||||
using boost::property_tree::ptree;
|
||||
|
||||
string getMessage(const exception& e) {
|
||||
string result(e.what());
|
||||
|
@ -20,7 +28,7 @@ string getMessage(const exception& e) {
|
|||
return result;
|
||||
}
|
||||
|
||||
unique_ptr<AudioStream> createAudioStream(boost::filesystem::path filePath) {
|
||||
unique_ptr<AudioStream> createAudioStream(path filePath) {
|
||||
try {
|
||||
return unique_ptr<AudioStream>(new WaveFileReader(filePath));
|
||||
} catch (...) {
|
||||
|
@ -28,6 +36,37 @@ unique_ptr<AudioStream> createAudioStream(boost::filesystem::path filePath) {
|
|||
}
|
||||
}
|
||||
|
||||
string formatDuration(duration<double> seconds) {
|
||||
return fmt::format("{0:.2f}", seconds.count());
|
||||
}
|
||||
|
||||
ptree createXmlTree(const path& filePath, const map<centiseconds, Phone>& phones, const map<centiseconds, Shape>& shapes) {
|
||||
ptree tree;
|
||||
|
||||
// Add sound file path
|
||||
tree.add("rhubarbResult.info.soundFile", filePath.string());
|
||||
|
||||
// Add phones
|
||||
for (auto it = phones.cbegin(), itNext = ++phones.cbegin(); itNext != phones.cend(); ++it, ++itNext) {
|
||||
auto pair = *it;
|
||||
auto nextPair = *itNext;
|
||||
ptree& phoneElement = tree.add("rhubarbResult.phones.phone", pair.second);
|
||||
phoneElement.add("<xmlattr>.start", formatDuration(pair.first));
|
||||
phoneElement.add("<xmlattr>.duration", formatDuration(nextPair.first - pair.first));
|
||||
}
|
||||
|
||||
// Add mouth cues
|
||||
for (auto it = shapes.cbegin(), itNext = ++shapes.cbegin(); itNext != shapes.cend(); ++it, ++itNext) {
|
||||
auto pair = *it;
|
||||
auto nextPair = *itNext;
|
||||
ptree& mouthCueElement = tree.add("rhubarbResult.mouthCues.mouthCue", pair.second);
|
||||
mouthCueElement.add("<xmlattr>.start", formatDuration(pair.first));
|
||||
mouthCueElement.add("<xmlattr>.duration", formatDuration(nextPair.first - pair.first));
|
||||
}
|
||||
|
||||
return tree;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
// Get sound file name
|
||||
|
@ -41,21 +80,18 @@ int main(int argc, char *argv[]) {
|
|||
unique_ptr<AudioStream> audioStream = createAudioStream(soundFileName);
|
||||
|
||||
// Detect phones
|
||||
std::map<centiseconds, Phone> phones = detectPhones(std::move(audioStream));
|
||||
map<centiseconds, Phone> phones = detectPhones(std::move(audioStream));
|
||||
|
||||
// Generate mouth shapes
|
||||
std::map<centiseconds, Shape> shapes = animate(phones);
|
||||
map<centiseconds, Shape> shapes = animate(phones);
|
||||
|
||||
for (auto &pair : phones) {
|
||||
std::cout << pair.first << ": " << pair.second << "\n";
|
||||
}
|
||||
for (auto &pair : shapes) {
|
||||
std::cout << pair.first << ": " << pair.second << "\n";
|
||||
}
|
||||
// Print XML
|
||||
boost::property_tree::ptree xmlTree = createXmlTree(soundFileName, phones, shapes);
|
||||
boost::property_tree::write_xml(std::cout, xmlTree, boost::property_tree::xml_writer_settings<string>(' ', 2));
|
||||
|
||||
return 0;
|
||||
} catch (const exception& e) {
|
||||
std::cout << "An error occurred. " << getMessage(e);
|
||||
std::cerr << "An error occurred. " << getMessage(e);
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -68,8 +68,13 @@ Shape getShape(Phone phone) {
|
|||
|
||||
map<centiseconds, Shape> animate(const map<centiseconds, Phone> &phones) {
|
||||
map<centiseconds, Shape> shapes;
|
||||
Shape lastShape = Shape::Invalid;
|
||||
for (auto& pair : phones) {
|
||||
shapes[pair.first] = getShape(pair.second);
|
||||
Shape shape = getShape(pair.second);
|
||||
if (shape != lastShape) {
|
||||
shapes[pair.first] = shape;
|
||||
lastShape = shape;
|
||||
}
|
||||
}
|
||||
return shapes;
|
||||
}
|
||||
|
|
|
@ -105,6 +105,7 @@ void processAudioStream(AudioStream& audioStream16kHzMono, ps_decoder_t& recogni
|
|||
map<centiseconds, Phone> getPhones(ps_decoder_t& recognizer) {
|
||||
map<centiseconds, Phone> result;
|
||||
ps_seg_t *segmentationIter;
|
||||
result[centiseconds(0)] = Phone::None;
|
||||
int32 score;
|
||||
int endFrame;
|
||||
for (segmentationIter = ps_seg_iter(&recognizer, &score); segmentationIter; segmentationIter = ps_seg_next(segmentationIter)) {
|
||||
|
@ -116,9 +117,8 @@ map<centiseconds, Phone> getPhones(ps_decoder_t& recognizer) {
|
|||
ps_seg_frames(segmentationIter, &startFrame, &endFrame);
|
||||
|
||||
result[centiseconds(startFrame)] = stringToPhone(phone);
|
||||
result[centiseconds(endFrame + 1)] = Phone::None;
|
||||
}
|
||||
// Add dummy entry past the last phone
|
||||
result[centiseconds(endFrame + 1)] = Phone::None;
|
||||
return result;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue