Refactoring

- Moved phone recognition code to phone_extraction.cpp - Introduced type centiseconds - Code reorganization
2015-11-18 20:59:03 +01:00 · 2015-11-18 20:59:03 +01:00 · f2f6f75932
parent 9fbae36e70
commit f2f6f75932
17 changed files with 326 additions and 139 deletions
--- a/.idea/LipSync.iml
+++ b/.idea/LipSync.iml
@ -126,19 +126,25 @@
      <sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase-5prealpha-2015-08-05/src/libsphinxbase/util/slamch.c" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase-5prealpha-2015-08-05/src/libsphinxbase/util/slapack_lite.c" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase-5prealpha-2015-08-05/src/libsphinxbase/util/strfuncs.c" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/16kHzMonoStream.cpp" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/16kHzMonoStream.h" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/AudioStream.h" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/ChannelDownmixer.cpp" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/ChannelDownmixer.h" isTestSource="false" />
-      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/IOTools.h" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/io_tools.h" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/SampleRateConverter.cpp" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/SampleRateConverter.h" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/wave_file_writing.cpp" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/wave_file_writing.h" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileReader.cpp" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileReader.h" isTestSource="false" />
-      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileWriter.cpp" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/centiseconds.cpp" isTestSource="false" />
-      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileWriter.h" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/centiseconds.h" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/main.cpp" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/Phone.cpp" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/Phone.h" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/phone_extraction.cpp" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/phone_extraction.h" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/platform_tools.h" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/src/platform_tools_win.cpp" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
@ -169,8 +175,8 @@
        <excluded>
          <root url="file://$MODULE_DIR$/lib/cppformat/format.cc" />
          <root url="file://$MODULE_DIR$/lib/cppformat/posix.cc" />
          <root url="file://$MODULE_DIR$/lib/cppformat/posix.h" />
          <root url="file://$MODULE_DIR$/lib/cppformat/format.h" />
          <root url="file://$MODULE_DIR$/lib/cppformat/posix.h" />
        </excluded>
      </library>
    </orderEntry>
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.3)
 project(LipSync)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall")
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
@ -11,10 +11,15 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
 set(Boost_USE_STATIC_LIBS ON) # Use static libs
 set(Boost_USE_MULTITHREADED ON) # Enable multithreading support
 set(Boost_USE_STATIC_RUNTIME ON) # Use static C++ runtime
-find_package(Boost REQUIRED COMPONENTS filesystem locale )
+find_package(Boost REQUIRED COMPONENTS filesystem locale system)
 include_directories(${Boost_INCLUDE_DIRS})
-set(SOURCE_FILES src/main.cpp src/audio_input/WaveFileReader.cpp src/audio_input/WaveFileReader.h src/audio_input/ChannelDownmixer.cpp src/audio_input/ChannelDownmixer.h src/audio_input/AudioStream.h src/audio_input/SampleRateConverter.cpp src/audio_input/SampleRateConverter.h src/audio_input/16kHzMonoStream.cpp src/audio_input/16kHzMonoStream.h src/audio_input/WaveFileWriter.cpp src/audio_input/WaveFileWriter.h src/audio_input/IOTools.h)
+set(SOURCE_FILES src/main.cpp src/audio_input/WaveFileReader.cpp src/audio_input/WaveFileReader.h src/audio_input/ChannelDownmixer.cpp src/audio_input/ChannelDownmixer.h src/audio_input/AudioStream.h src/audio_input/SampleRateConverter.cpp src/audio_input/SampleRateConverter.h src/audio_input/wave_file_writing.cpp src/audio_input/wave_file_writing.h src/audio_input/io_tools.h src/platform_tools.h src/phone_extraction.cpp src/phone_extraction.h src/Phone.cpp src/Phone.h src/centiseconds.cpp src/centiseconds.h)
 if(WIN32)
 	set(SOURCE_FILES "${SOURCE_FILES};src/platform_tools_win.cpp")
 else()
 	message(FATAL_ERROR "Target platform not supported.")
 endif()
 include_directories("lib/sphinxbase-5prealpha-2015-08-05/include" "lib/pocketsphinx-5prealpha-2015-08-05/include" "lib/cppformat")
 FILE(GLOB_RECURSE SPHINX_BASE "lib/sphinxbase-5prealpha-2015-08-05/src/libsphinxbase/*.c")
@ -44,5 +49,5 @@ endfunction()
 # Copy resource files
 set(modelDir "${CMAKE_SOURCE_DIR}/lib/pocketsphinx-5prealpha-2015-08-05/model")
-copy_after_build("${modelDir}/en-us/en-us-phone.lm.bin" "res/sphinx/acoustic_model")
+copy_after_build("${modelDir}/en-us/en-us-phone.lm.bin" "res/sphinx")
-copy_after_build("${modelDir}/en-us/en-us/*" "res/sphinx/language_model")
+copy_after_build("${modelDir}/en-us/en-us/*" "res/sphinx/acoustic_model")
--- a/src/Phone.cpp
+++ b/src/Phone.cpp
@ -0,0 +1,36 @@
 #include <boost/bimap.hpp>
 #include "Phone.h"
 using std::string;
 template <typename L, typename R>
 boost::bimap<L, R>
 makeBimap(std::initializer_list<typename boost::bimap<L, R>::value_type> list) {
 	return boost::bimap<L, R>(list.begin(), list.end());
 }
 boost::bimap<string, Phone> phonesByName = makeBimap<string, Phone>({
 	{ "None", Phone::None },
 	{ "Unknown", Phone::Unknown },
 	{ "AO",	Phone::AO },	{ "AA",	Phone::AA },	{ "IY",	Phone::IY },	{ "UW",	Phone::UW },
 	{ "EH",	Phone::EH },	{ "IH",	Phone::IH },	{ "UH",	Phone::UH },	{ "AH",	Phone::AH },
 	{ "AE",	Phone::AE },	{ "EY",	Phone::EY },	{ "AY",	Phone::AY },	{ "OW",	Phone::OW },
 	{ "AW",	Phone::AW },	{ "OY",	Phone::OY },	{ "ER",	Phone::ER },	{ "P",	Phone::P },
 	{ "B",	Phone::B },		{ "T",	Phone::T },		{ "D",	Phone::D },		{ "K",	Phone::K },
 	{ "G",	Phone::G },		{ "CH",	Phone::CH },	{ "JH",	Phone::JH },	{ "F",	Phone::F },
 	{ "V",	Phone::V },		{ "TH",	Phone::TH },	{ "DH",	Phone::DH },	{ "S",	Phone::S },
 	{ "Z",	Phone::Z },		{ "SH",	Phone::SH },	{ "ZH",	Phone::ZH },	{ "HH",	Phone::HH },
 	{ "M",	Phone::M },		{ "N",	Phone::N },		{ "NG",	Phone::NG },	{ "L",	Phone::L },
 	{ "R",	Phone::R },		{ "Y",	Phone::Y },		{ "W",	Phone::W },
 });
 Phone stringToPhone(const string& s) {
 	auto it = phonesByName.left.find(s);
 	return (it != phonesByName.left.end()) ? it->second : Phone::Unknown;
 }
 string phoneToString(Phone phone) {
 	auto it = phonesByName.right.find(phone);
 	return (it != phonesByName.right.end()) ? it->second : phoneToString(Phone::Unknown);
 }
--- a/src/Phone.h
+++ b/src/Phone.h
@ -0,0 +1,78 @@
 #ifndef LIPSYNC_PHONE_H
 #define LIPSYNC_PHONE_H
 // Defines a subset of the Arpabet
 enum class Phone {
 	None,
 	Unknown,
 	/////////
 	// Vowels
 	// ... monophthongs
 	AO,		// [ɔ] as in [o]ff, f[a]ll, fr[o]st
 	AA,		// [ɑ] as in f[a]ther
 	IY,		// [i] as in b[ee], sh[e]
 	UW,		// [u] as in y[ou], n[ew], f[oo]d
 	EH,		// [ɛ] as in r[e]d, m[e]n
 	IH,		// [ɪ] as in b[i]g, w[i]n
 	UH,		// [ʊ] as in sh[ou]ld, c[ou]ld
 	AH,		// [ʌ, ə] as in b[u]t, s[u]n, [a]lone, disc[u]s
 	AE,		// [æ] as in [a]t, b[a]t
 	// ... diphthongs
 	EY,		// [eɪ] as in s[ay], [ei]ght
 	AY,		// [aɪ] as in m[y], wh[y], r[i]de
 	OW,		// [oʊ] as in sh[ow], c[oa]t
 	AW,		// [aʊ] as in h[ow], n[ow]
 	OY,		// [ɔɪ] as in b[oy], t[oy]
 	// ... r-colored
 	ER,		// [ɝ] as in h[er], b[ir]d, h[ur]t
 	/////////////
 	// Consonants
 	// ... stops
 	P,		// [p] as in [p]ay
 	B,		// [b] as in [b]uy
 	T,		// [t] as in [t]ake
 	D,		// [d] as in [d]ay
 	K,		// [k] as in [k]ey
 	G,		// [g] as in [g]o
 	// ... affricates
 	CH,		// [tʃ] as in [ch]air
 	JH,		// [dʒ] as in [j]ust
 	// ... fricatives
 	F,		// [f] as in [f]or
 	V,		// [v] as in [v]ery
 	TH,		// [θ] as in [th]anks
 	DH,		// [ð] as in [th]at
 	S,		// [s] as in [s]ay
 	Z,		// [z] as in [z]oo
 	SH,		// [ʃ] as in [sh]ow
 	ZH,		// [ʒ] as in mea[s]ure, plea[s]ure
 	HH,		// [h] as in [h]ouse
 	// ... nasals
 	M,		// [m] as in [m]an
 	N,		// [n] as in [no]
 	NG,		// [ŋ] as in si[ng]
 	// ... liquids
 	L,		// [ɫ] as in [l]ate
 	R,		// [r, ɹ] as in [r]un
 	// ... semivowels
 	Y,		// [j] as in [y]es
 	W		// [w] as in [w]ay
 };
 Phone stringToPhone(const std::string& s);
 std::string phoneToString(Phone phone);
 #endif //LIPSYNC_PHONE_H
--- a/src/audio_input/16kHzMonoStream.cpp
+++ b/src/audio_input/16kHzMonoStream.cpp
@ -1,26 +0,0 @@
 #include "16kHzMonoStream.h"
 #include "WaveFileReader.h"
 #include "ChannelDownmixer.h"
 #include "SampleRateConverter.h"
 using std::runtime_error;
 std::unique_ptr<AudioStream> create16kHzMonoStream(std::string fileName) {
 	// Create audio stream
 	std::unique_ptr<AudioStream> stream(new WaveFileReader(fileName));
 	// Downmix, if required
 	if (stream->getChannelCount() != 1) {
 		stream.reset(new ChannelDownmixer(std::move(stream)));
 	}
 	// Downsample, if required
 	if (stream->getFrameRate() < 16000) {
 		throw runtime_error("Sample rate must not be below 16kHz.");
 	}
 	if (stream->getFrameRate() != 16000) {
 		stream.reset(new SampleRateConverter(std::move(stream), 16000));
 	}
 	return stream;
 }
--- a/src/audio_input/16kHzMonoStream.h
+++ b/src/audio_input/16kHzMonoStream.h
@ -1,10 +0,0 @@
 #ifndef LIPSYNC_WAVEFILEREADER16KHZMONO_H
 #define LIPSYNC_WAVEFILEREADER16KHZMONO_H
 #include "AudioStream.h"
 #include <memory>
 #include <string>
 std::unique_ptr<AudioStream> create16kHzMonoStream(std::string fileName);
 #endif //LIPSYNC_WAVEFILEREADER16KHZMONO_H
--- a/src/audio_input/WaveFileReader.cpp
+++ b/src/audio_input/WaveFileReader.cpp
@ -1,6 +1,6 @@
 #include <format.h>
 #include "WaveFileReader.h"
-#include "IOTools.h"
+#include "io_tools.h"
 using std::runtime_error;
 using fmt::format;
--- a/src/audio_input/io_tools.h
+++ b/src/audio_input/io_tools.h
--- a/src/audio_input/wave_file_writing.cpp
+++ b/src/audio_input/wave_file_writing.cpp
@ -1,6 +1,6 @@
 #include <fstream>
-#include "WaveFileWriter.h"
+#include "wave_file_writing.h"
-#include "IOTools.h"
+#include "io_tools.h"
 using namespace little_endian;
--- a/src/audio_input/wave_file_writing.h
+++ b/src/audio_input/wave_file_writing.h
--- a/src/centiseconds.cpp
+++ b/src/centiseconds.cpp
@ -0,0 +1,9 @@
 #include <ratio>
 #include <chrono>
 #include <ostream>
 #include "Centiseconds.h"
 std::ostream& operator <<(std::ostream& stream, const centiseconds cs) {
 	return stream << cs.count() << "cs";
 }
--- a/src/centiseconds.h
+++ b/src/centiseconds.h
@ -0,0 +1,8 @@
 #ifndef LIPSYNC_CENTISECONDS_H
 #define LIPSYNC_CENTISECONDS_H
 typedef std::chrono::duration<int, std::centi> centiseconds;
 std::ostream& operator <<(std::ostream& stream, const centiseconds cs);
 #endif //LIPSYNC_CENTISECONDS_H
--- a/src/main.cpp
+++ b/src/main.cpp
@ -1,97 +1,16 @@
 #include <pocketsphinx.h>
 #include <stdexcept>
 #include <fstream>
 #include <memory>
 #include <vector>
 #include <iostream>
-#include <chrono>
+#include "audio_input/WaveFileReader.h"
-#include "audio_input/16kHzMonoStream.h"
+#include "phone_extraction.h"
 using std::runtime_error;
 using std::shared_ptr;
 using std::unique_ptr;
 #define MODELDIR "X:/dev/projects/LipSync/lib/pocketsphinx-5prealpha-2015-08-05/model"
 // Converts a float in the range -1..1 to a signed 16-bit int
 int16_t floatSampleToInt16(float sample) {
 	sample = std::max(sample, -1.0f);
 	sample = std::min(sample, 1.0f);
 	return static_cast<int16_t>(((sample + 1) / 2) * (INT16_MAX - INT16_MIN) + INT16_MIN);
 }
 int main(int argc, char *argv[]) {
-	shared_ptr<cmd_ln_t> config(
+	// Create audio stream
-		cmd_ln_init(
+	std::unique_ptr<AudioStream> audioStream(
-			nullptr, ps_args(), true,
+		new WaveFileReader(R"(C:\Users\Daniel\Desktop\audio-test\test 16000Hz 1ch 16bit.wav)"));
 			// Set acoustic model
 			"-hmm", MODELDIR "/en-us/en-us",
 			// Set phonetic language model
 			"-allphone", MODELDIR "/en-us/en-us-phone.lm.bin",
 			"-allphone_ci", "yes",
 			// The following settings are Voodoo to me.
 			// I copied them from http://cmusphinx.sourceforge.net/wiki/phonemerecognition
 			// Set beam width applied to every frame in Viterbi search
 			"-beam", "1e-20",
 			// Set beam width applied to phone transitions
 			"-pbeam", "1e-20",
 			// Set language model probability weight
 			"-lw", "2.0",
 			nullptr),
 		[](cmd_ln_t* config) { cmd_ln_free_r(config); });
 	if (!config) throw runtime_error("Error creating configuration.");
-	shared_ptr<ps_decoder_t> recognizer(
+	std::map<centiseconds, Phone> phones = detectPhones(std::move(audioStream));
 		ps_init(config.get()),
 		[](ps_decoder_t* recognizer) { ps_free(recognizer); });
 	if (!recognizer) throw runtime_error("Error creating speech recognizer.");
-	unique_ptr<AudioStream> audioStream =
+	for (auto& pair : phones) {
-		create16kHzMonoStream(R"(C:\Users\Daniel\Desktop\audio-test\test 16000Hz 1ch 16bit.wav)");
+		std::cout << pair.first << ": " << phoneToString(pair.second) << "\n";
 	int error = ps_start_utt(recognizer.get());
 	if (error) throw runtime_error("Error starting utterance processing.");
 	auto start = std::chrono::steady_clock::now();
 	std::vector<int16_t> buffer;
 	const int capacity = 1600; // 0.1 second capacity
 	buffer.reserve(capacity);
 	int sampleCount = 0;
 	do {
 		// Read to buffer
 		buffer.clear();
 		while (buffer.size() < capacity) {
 			float sample;
 			if (!audioStream->getNextSample(sample)) break;
 			buffer.push_back(floatSampleToInt16(sample));
 		}
 		// Analyze buffer
 		int searchedFrameCount = ps_process_raw(recognizer.get(), buffer.data(), buffer.size(), false, false);
 		if (searchedFrameCount < 0) throw runtime_error("Error decoding raw audio data.");
 		sampleCount += buffer.size();
 		std::cout << sampleCount / 16000.0 << "s\n";
 	} while (buffer.size());
 	error = ps_end_utt(recognizer.get());
 	if (error) throw runtime_error("Error ending utterance processing.");
 	auto end = std::chrono::steady_clock::now();
 	std::cout << std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count() << "\n";
 	ps_seg_t *segmentationIter;
 	int32 score;
 	for (segmentationIter = ps_seg_iter(recognizer.get(), &score); segmentationIter; segmentationIter = ps_seg_next(segmentationIter)) {
 		// Get phoneme
 		char const *phoneme = ps_seg_word(segmentationIter);
 		// Get timing
 		int startFrame, endFrame;
 		ps_seg_frames(segmentationIter, &startFrame, &endFrame);
 		printf(">>> %-5s %-5d %-5d\n", phoneme, startFrame, endFrame);
 	}
 	return 0;
--- a/src/phone_extraction.cpp
+++ b/src/phone_extraction.cpp
@ -0,0 +1,116 @@
 #include <pocketsphinx.h>
 #include <iostream>
 #include <boost/filesystem.hpp>
 #include "phone_extraction.h"
 #include "audio_input/SampleRateConverter.h"
 #include "audio_input/ChannelDownmixer.h"
 #include "platform_tools.h"
 using std::runtime_error;
 using std::unique_ptr;
 using std::shared_ptr;
 using std::string;
 using std::map;
 using boost::filesystem::path;
 unique_ptr<AudioStream> to16kHzMono(unique_ptr<AudioStream> stream) {
 	// Downmix, if required
 	if (stream->getChannelCount() != 1) {
 		stream.reset(new ChannelDownmixer(std::move(stream)));
 	}
 	// Downsample, if required
 	if (stream->getFrameRate() < 16000) {
 		throw runtime_error("Sample rate must not be below 16kHz.");
 	}
 	if (stream->getFrameRate() != 16000) {
 		stream.reset(new SampleRateConverter(std::move(stream), 16000));
 	}
 	return stream;
 }
 // Converts a float in the range -1..1 to a signed 16-bit int
 int16_t floatSampleToInt16(float sample) {
 	sample = std::max(sample, -1.0f);
 	sample = std::min(sample, 1.0f);
 	return static_cast<int16_t>(((sample + 1) / 2) * (INT16_MAX - INT16_MIN) + INT16_MIN);
 }
 map<centiseconds, Phone> detectPhones(unique_ptr<AudioStream> audioStream) {
 	// Convert audio stream to the exact format PocketSphinx requires
 	audioStream = to16kHzMono(std::move(audioStream));
 	// Create PocketSphinx configuration
 	path binDirectory(getBinDirectory());
 	path resDirectory(binDirectory.parent_path() / "res");
 	shared_ptr<cmd_ln_t> config(
 		cmd_ln_init(
 			nullptr, ps_args(), true,
 			// Set acoustic model
 			"-hmm", (resDirectory / "sphinx/acoustic_model").string().c_str(),
 			// Set phonetic language model
 			"-allphone", (resDirectory / "sphinx/en-us-phone.lm.bin").string().c_str(),
 			"-allphone_ci", "yes",
 			// The following settings are taken from http://cmusphinx.sourceforge.net/wiki/phonemerecognition
 			// Set beam width applied to every frame in Viterbi search
 			"-beam", "1e-20",
 			// Set beam width applied to phone transitions
 			"-pbeam", "1e-20",
 			// Set language model probability weight
 			"-lw", "2.0",
 			nullptr),
 		[](cmd_ln_t* config) { cmd_ln_free_r(config); });
 	if (!config) throw runtime_error("Error creating configuration.");
 	// Create phone recognizer
 	shared_ptr<ps_decoder_t> recognizer(
 		ps_init(config.get()),
 		[](ps_decoder_t* recognizer) { ps_free(recognizer); });
 	if (!recognizer) throw runtime_error("Error creating speech recognizer.");
 	// Start recognition
 	int error = ps_start_utt(recognizer.get());
 	if (error) throw runtime_error("Error starting utterance processing.");
 	// Process entire sound file
 	std::vector<int16_t> buffer;
 	const int capacity = 1600; // 0.1 second capacity
 	buffer.reserve(capacity);
 	int sampleCount = 0;
 	do {
 		// Read to buffer
 		buffer.clear();
 		while (buffer.size() < capacity) {
 			float sample;
 			if (!audioStream->getNextSample(sample)) break;
 			buffer.push_back(floatSampleToInt16(sample));
 		}
 		// Analyze buffer
 		int searchedFrameCount = ps_process_raw(recognizer.get(), buffer.data(), buffer.size(), false, false);
 		if (searchedFrameCount < 0) throw runtime_error("Error decoding raw audio data.");
 		sampleCount += buffer.size();
 	} while (buffer.size());
 	error = ps_end_utt(recognizer.get());
 	if (error) throw runtime_error("Error ending utterance processing.");
 	// Collect results into map
 	map<centiseconds, Phone> result;
 	ps_seg_t *segmentationIter;
 	int32 score;
 	int endFrame;
 	for (segmentationIter = ps_seg_iter(recognizer.get(), &score); segmentationIter; segmentationIter = ps_seg_next(segmentationIter)) {
 		// Get phone
 		char const *phone = ps_seg_word(segmentationIter);
 		// Get timing
 		int startFrame;
 		ps_seg_frames(segmentationIter, &startFrame, &endFrame);
 		result[centiseconds(startFrame)] = stringToPhone(phone);
 	}
 	// Add dummy entry past the last phone
 	result[centiseconds(endFrame + 1)] = Phone::None;
 	return result;
 }
--- a/src/phone_extraction.h
+++ b/src/phone_extraction.h
@ -0,0 +1,14 @@
 #ifndef LIPSYNC_PHONE_EXTRACTION_H
 #define LIPSYNC_PHONE_EXTRACTION_H
 #include <map>
 #include <chrono>
 #include <ratio>
 #include <memory>
 #include "audio_input/AudioStream.h"
 #include "Phone.h"
 #include "centiseconds.h"
 std::map<centiseconds, Phone> detectPhones(std::unique_ptr<AudioStream> audioStream);
 #endif //LIPSYNC_PHONE_EXTRACTION_H
--- a/src/platform_tools.h
+++ b/src/platform_tools.h
@ -0,0 +1,8 @@
 #ifndef LIPSYNC_PLATFORM_TOOLS_H
 #define LIPSYNC_PLATFORM_TOOLS_H
 #include <boost/filesystem.hpp>
 boost::filesystem::path getBinDirectory();
 #endif //LIPSYNC_PLATFORM_TOOLS_H
--- a/src/platform_tools_win.cpp
+++ b/src/platform_tools_win.cpp
@ -0,0 +1,24 @@
 #include "platform_tools.h"
 #include <Windows.h>
 boost::filesystem::path getBinDirectory() {
 	std::vector<wchar_t> executablePath(MAX_PATH);
 	// Try to get the executable path with a buffer of MAX_PATH characters.
 	DWORD result = GetModuleFileNameW(0, executablePath.data(), executablePath.size());
 	// As long the function returns the buffer size, it is indicating that the buffer
 	// was too small. Keep doubling the buffer size until it fits.
 	while(result == executablePath.size()) {
 		executablePath.resize(executablePath.size() * 2);
 		result = GetModuleFileNameW(0, executablePath.data(), executablePath.size());
 	}
 	// If the function returned 0, something went wrong
 	if (result == 0) {
 		throw std::runtime_error("Could not determine path of bin directory.");
 	}
 	return boost::filesystem::path(executablePath.data()).parent_path();
 }