Restored dialog option, this time based on language model

This approach should be more robust and error-tolerant.
2016-06-03 21:07:49 +02:00 · 2016-06-03 21:07:49 +02:00 · 0d488e8de2
parent 4ed5908627
commit 0d488e8de2
11 changed files with 357 additions and 68 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -64,7 +64,7 @@ target_compile_options(cppFormat PRIVATE ${disableWarningsFlags})
 set_target_properties(cppFormat PROPERTIES FOLDER lib)
 # ... sphinxbase
-include_directories(SYSTEM "lib/sphinxbase-5prealpha-2015-08-05/include")
+include_directories(SYSTEM "lib/sphinxbase-5prealpha-2015-08-05/include" "lib/sphinxbase-5prealpha-2015-08-05/src")
 FILE(GLOB_RECURSE sphinxbaseFiles "lib/sphinxbase-5prealpha-2015-08-05/src/libsphinxbase/*.c")
 add_library(sphinxbase ${sphinxbaseFiles})
 target_compile_options(sphinxbase PRIVATE ${disableWarningsFlags})
@ -192,6 +192,8 @@ set(SOURCE_FILES
 	src/Exporter.cpp src/Exporter.h
 	src/tokenization.cpp src/tokenization.h
 	src/g2p.cpp src/g2p.h
 	src/languageModels.cpp src/languageModels.h
 	src/tupleHash.h
 )
 add_executable(rhubarb ${SOURCE_FILES})
 target_link_libraries(rhubarb ${Boost_LIBRARIES} cppFormat sphinxbase pocketSphinx flite)
--- a/src/languageModels.cpp
+++ b/src/languageModels.cpp
@ -0,0 +1,183 @@
 #include "languageModels.h"
 #include <boost/range/adaptor/map.hpp>
 #include <vector>
 #include <regex>
 #include <map>
 #include <tuple>
 #include "platformTools.h"
 #include <boost/filesystem/fstream.hpp>
 #include "appInfo.h"
 #include <cmath>
 #include <gsl_util.h>
 using std::string;
 using std::u32string;
 using std::vector;
 using std::regex;
 using std::map;
 using std::tuple;
 using std::make_tuple;
 using std::get;
 using std::endl;
 using boost::filesystem::path;
 using unigram_t = string;
 using bigram_t = tuple<string, string>;
 using trigram_t = tuple<string, string, string>;
 map<unigram_t, int> getUnigramCounts(const vector<string>& words) {
 	map<unigram_t, int> unigramCounts;
 	for (const unigram_t& unigram : words) {
 		++unigramCounts[unigram];
 	}
 	return unigramCounts;
 }
 map<bigram_t, int> getBigramCounts(const vector<string>& words) {
 	map<bigram_t, int> bigramCounts;
 	for (auto it = words.begin(); it < words.end() - 1; ++it) {
 		++bigramCounts[bigram_t(*it, *(it + 1))];
 	}
 	return bigramCounts;
 }
 map<trigram_t, int> getTrigramCounts(const vector<string>& words) {
 	map<trigram_t, int> trigramCounts;
 	if (words.size() >= 3) {
 		for (auto it = words.begin(); it < words.end() - 2; ++it) {
 			++trigramCounts[trigram_t(*it, *(it + 1), *(it + 2))];
 		}
 	}
 	return trigramCounts;
 }
 map<unigram_t, double> getUnigramProbabilities(const vector<string>& words, const map<unigram_t, int>& unigramCounts, const double deflator) {
 	map<unigram_t, double> unigramProbabilities;
 	for (const auto& pair : unigramCounts) {
 		unigram_t unigram = get<0>(pair);
 		int unigramCount = get<1>(pair);
 		unigramProbabilities[unigram] = double(unigramCount) / words.size() * deflator;
 	}
 	return unigramProbabilities;
 }
 map<bigram_t, double> getBigramProbabilities(const map<unigram_t, int>& unigramCounts, const map<bigram_t, int>& bigramCounts, const double deflator) {
 	map<bigram_t, double> bigramProbabilities;
 	for (const auto& pair : bigramCounts) {
 		bigram_t bigram = get<0>(pair);
 		int bigramCount = get<1>(pair);
 		int unigramPrefixCount = unigramCounts.at(get<0>(bigram));
 		bigramProbabilities[bigram] = double(bigramCount) / unigramPrefixCount * deflator;
 	}
 	return bigramProbabilities;
 }
 map<trigram_t, double> getTrigramProbabilities(const map<bigram_t, int>& bigramCounts, const map<trigram_t, int>& trigramCounts, const double deflator) {
 	map<trigram_t, double> trigramProbabilities;
 	for (const auto& pair : trigramCounts) {
 		trigram_t trigram = get<0>(pair);
 		int trigramCount = get<1>(pair);
 		int bigramPrefixCount = bigramCounts.at(bigram_t(get<0>(trigram), get<1>(trigram)));
 		trigramProbabilities[trigram] = double(trigramCount) / bigramPrefixCount * deflator;
 	}
 	return trigramProbabilities;
 }
 map<unigram_t, double> getUnigramBackoffWeights(
 	const map<unigram_t, int>& unigramCounts,
 	const map<unigram_t, double>& unigramProbabilities,
 	const map<bigram_t, int>& bigramCounts,
 	const double discountMass)
 {
 	map<unigram_t, double> unigramBackoffWeights;
 	for (const unigram_t& unigram : unigramCounts | boost::adaptors::map_keys) {
 		double denominator = 1;
 		for (const bigram_t& bigram : bigramCounts | boost::adaptors::map_keys) {
 			if (get<0>(bigram) == unigram) {
 				denominator -= unigramProbabilities.at(get<1>(bigram));
 			}
 		}
 		unigramBackoffWeights[unigram] = discountMass / denominator;
 	}
 	return unigramBackoffWeights;
 }
 map<bigram_t, double> getBigramBackoffWeights(
 	const map<bigram_t, int>& bigramCounts,
 	const map<bigram_t, double>& bigramProbabilities,
 	const map<trigram_t, int>& trigramCounts,
 	const double discountMass)
 {
 	map<bigram_t, double> bigramBackoffWeights;
 	for (const bigram_t& bigram : bigramCounts | boost::adaptors::map_keys) {
 		double denominator = 1;
 		for (const trigram_t& trigram : trigramCounts | boost::adaptors::map_keys) {
 			if (bigram_t(get<0>(trigram), get<1>(trigram)) == bigram) {
 				denominator -= bigramProbabilities.at(bigram_t(get<1>(trigram), get<2>(trigram)));
 			}
 		}
 		bigramBackoffWeights[bigram] = discountMass / denominator;
 	}
 	return bigramBackoffWeights;
 }
 void createLanguageModelFile(const vector<string>& words, path filePath) {
 	const double discountMass = 0.5;
 	const double deflator = 1.0 - discountMass;
 	map<unigram_t, int> unigramCounts = getUnigramCounts(words);
 	map<bigram_t, int> bigramCounts = getBigramCounts(words);
 	map<trigram_t, int> trigramCounts = getTrigramCounts(words);
 	map<unigram_t, double> unigramProbabilities = getUnigramProbabilities(words, unigramCounts, deflator);
 	map<bigram_t, double> bigramProbabilities = getBigramProbabilities(unigramCounts, bigramCounts, deflator);
 	map<trigram_t, double> trigramProbabilities = getTrigramProbabilities(bigramCounts, trigramCounts, deflator);
 	map<unigram_t, double> unigramBackoffWeights = getUnigramBackoffWeights(unigramCounts, unigramProbabilities, bigramCounts, discountMass);
 	map<bigram_t, double> bigramBackoffWeights = getBigramBackoffWeights(bigramCounts, bigramProbabilities, trigramCounts, discountMass);
 	boost::filesystem::ofstream file(filePath);
 	file << "Generated by " << appName << " " << appVersion << endl << endl;
 	file << "\\data\\" << endl;
 	file << "ngram 1=" << unigramCounts.size() << endl;
 	file << "ngram 2=" << bigramCounts.size() << endl;
 	file << "ngram 3=" << trigramCounts.size() << endl << endl;
 	file.setf(std::ios::fixed, std::ios::floatfield);
 	file.precision(4);
 	file << "\\1-grams:" << endl;
 	for (const unigram_t& unigram : unigramCounts | boost::adaptors::map_keys) {
 		file << log10(unigramProbabilities.at(unigram))
 			<< " " << unigram
 			<< " " << log10(unigramBackoffWeights.at(unigram)) << endl;
 	}
 	file << endl;
 	file << "\\2-grams:" << endl;
 	for (const bigram_t& bigram : bigramCounts | boost::adaptors::map_keys) {
 		file << log10(bigramProbabilities.at(bigram))
 			<< " " << get<0>(bigram) << " " << get<1>(bigram)
 			<< " " << log10(bigramBackoffWeights.at(bigram)) << endl;
 	}
 	file << endl;
 	file << "\\3-grams:" << endl;
 	for (const trigram_t& trigram : trigramCounts | boost::adaptors::map_keys) {
 		file << log10(trigramProbabilities.at(trigram))
 			<< " " << get<0>(trigram) << " " << get<1>(trigram) << " " << get<2>(trigram) << endl;
 	}
 	file << endl;
 	file << "\\end\\" << endl;
 }
 lambda_unique_ptr<ngram_model_t> createLanguageModel(const vector<string>& words, logmath_t& logMath) {
 	path tempFilePath = getTempFilePath();
 	createLanguageModelFile(words, tempFilePath);
 	auto deleteTempFile = gsl::finally([&]() { boost::filesystem::remove(tempFilePath); });
 	return lambda_unique_ptr<ngram_model_t>(
 		ngram_model_read(nullptr, tempFilePath.string().c_str(), NGRAM_ARPA, &logMath),
 		[](ngram_model_t* lm) { ngram_model_free(lm); });
 }
--- a/src/languageModels.h
+++ b/src/languageModels.h
@ -0,0 +1,6 @@
 #pragma once
 #include <sphinxbase/ngram_model.h>
 #include <vector>
 #include "tools.h"
 lambda_unique_ptr<ngram_model_t> createLanguageModel(const std::vector<std::string>& words, logmath_t& logMath);
--- a/src/main.cpp
+++ b/src/main.cpp
@ -12,9 +12,12 @@
 #include <gsl_util.h>
 #include "Exporter.h"
 #include "ContinuousTimeline.h"
 #include <boost/filesystem/operations.hpp>
 #include "stringTools.h"
 using std::exception;
 using std::string;
 using std::u32string;
 using std::vector;
 using std::unique_ptr;
 using std::make_unique;
@ -75,6 +78,25 @@ void addFileSink(path path, logging::Level minLevel) {
 	logging::addSink(levelFilter);
 }
 u32string readTextFile(path filePath) {
 	if (!exists(filePath)) {
 		throw std::invalid_argument(fmt::format("File {} does not exist.", filePath));
 	}
 	try {
 		boost::filesystem::ifstream file;
 		file.exceptions(std::ifstream::failbit | std::ifstream::badbit);
 		file.open(filePath);
 		string utf8Text((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
 		try {
 			return utf8ToUtf32(utf8Text);
 		} catch (...) {
 			std::throw_with_nested(std::runtime_error(fmt::format("File encoding is not ASCII or UTF-8.", filePath)));
 		}
 	} catch (...) {
 		std::throw_with_nested(std::runtime_error(fmt::format("Error reading file {0}.", filePath)));
 	}
 }
 int main(int argc, char *argv[]) {
 	auto pausableStderrSink = addPausableStdErrSink(logging::Level::Warn);
 	pausableStderrSink->pause();
@ -88,6 +110,7 @@ int main(int argc, char *argv[]) {
 	tclap::ValuesConstraint<logging::Level> logLevelConstraint(logLevels);
 	tclap::ValueArg<logging::Level> logLevel("", "logLevel", "The minimum log level to log", false, logging::Level::Debug, &logLevelConstraint, cmd);
 	tclap::ValueArg<string> logFileName("", "logFile", "The log file path.", false, string(), "string", cmd);
 	tclap::ValueArg<string> dialogFile("d", "dialogFile", "A file containing the text of the dialog.", false, string(), "string", cmd);
 	auto exportFormats = vector<ExportFormat>(ExportFormatConverter::get().getValues());
 	tclap::ValuesConstraint<ExportFormat> exportFormatConstraint(exportFormats);
 	tclap::ValueArg<ExportFormat> exportFormat("f", "exportFormat", "The export format.", false, ExportFormat::TSV, &exportFormatConstraint, cmd);
@ -117,6 +140,7 @@ int main(int argc, char *argv[]) {
 			ProgressBar progressBar;
 			phones = detectPhones(
 				createAudioStream(inputFileName.getValue()),
 				dialogFile.isSet() ? readTextFile(path(dialogFile.getValue())) : boost::optional<u32string>(),
 				progressBar);
 		}
 		std::cerr << "Done" << std::endl;
--- a/src/phoneExtraction.cpp
+++ b/src/phoneExtraction.cpp
@ -1,6 +1,5 @@
 #include <iostream>
 #include <boost/filesystem.hpp>
 #include <boost/algorithm/string.hpp>
 #include "phoneExtraction.h"
 #include "audio/SampleRateConverter.h"
 #include "platformTools.h"
@ -14,6 +13,9 @@
 #include <Timeline.h>
 #include <audio/voiceActivityDetection.h>
 #include <audio/AudioStreamSegment.h>
 #include "languageModels.h"
 #include "tokenization.h"
 #include "g2p.h"
 extern "C" {
 #include <pocketsphinx.h>
@ -35,33 +37,34 @@ using std::function;
 using std::regex;
 using std::regex_replace;
 using std::chrono::duration;
 using boost::optional;
 using std::u32string;
 constexpr int sphinxSampleRate = 16000;
-lambda_unique_ptr<cmd_ln_t> createConfig(path sphinxModelDirectory) {
+const path& getSphinxModelDirectory() {
 	static path sphinxModelDirectory(getBinDirectory() / "res/sphinx");
 	return sphinxModelDirectory;
 }
 lambda_unique_ptr<ps_decoder_t> createDecoder() {
 	lambda_unique_ptr<cmd_ln_t> config(
 		cmd_ln_init(
 			nullptr, ps_args(), true,
 			// Set acoustic model
-			"-hmm", (sphinxModelDirectory / "acoustic-model").string().c_str(),
+			"-hmm", (getSphinxModelDirectory() / "acoustic-model").string().c_str(),
-			// Set language model
+			// Set pronunciation dictionary
-			"-lm", (sphinxModelDirectory / "en-us.lm.bin").string().c_str(),
+			"-dict", (getSphinxModelDirectory() / "cmudict-en-us.dict").string().c_str(),
 			// Set pronounciation dictionary
 			"-dict", (sphinxModelDirectory / "cmudict-en-us.dict").string().c_str(),
 			// Add noise against zero silence (see http://cmusphinx.sourceforge.net/wiki/faq#qwhy_my_accuracy_is_poor)
 			"-dither", "yes",
 			nullptr),
 		[](cmd_ln_t* config) { cmd_ln_free_r(config); });
 	if (!config) throw runtime_error("Error creating configuration.");
 	return config;
 }
 lambda_unique_ptr<ps_decoder_t> createSpeechRecognizer(cmd_ln_t& config) {
 	lambda_unique_ptr<ps_decoder_t> recognizer(
-		ps_init(&config),
+		ps_init(config.get()),
 		[](ps_decoder_t* recognizer) { ps_free(recognizer); });
-	if (!recognizer) throw runtime_error("Error creating speech recognizer.");
+	if (!recognizer) throw runtime_error("Error creating speech decoder.");
 	return recognizer;
 }
@ -141,32 +144,32 @@ void sphinxLogCallback(void* user_data, err_lvl_t errorLevel, const char* format
 	logging::log(logLevel, message);
 }
-BoundedTimeline<string> recognizeWords(unique_ptr<AudioStream> audioStream, ps_decoder_t& recognizer, ProgressSink& progressSink) {
+BoundedTimeline<string> recognizeWords(unique_ptr<AudioStream> audioStream, ps_decoder_t& decoder, ProgressSink& progressSink) {
 	// Convert audio stream to the exact format PocketSphinx requires
 	audioStream = convertSampleRate(std::move(audioStream), sphinxSampleRate);
 	// Restart timing at 0
-	ps_start_stream(&recognizer);
+	ps_start_stream(&decoder);
 	// Start recognition
-	int error = ps_start_utt(&recognizer);
+	int error = ps_start_utt(&decoder);
 	if (error) throw runtime_error("Error starting utterance processing for word recognition.");
 	// Process entire sound file
-	auto processBuffer = [&recognizer](const vector<int16_t>& buffer) {
+	auto processBuffer = [&decoder](const vector<int16_t>& buffer) {
-		int searchedFrameCount = ps_process_raw(&recognizer, buffer.data(), buffer.size(), false, false);
+		int searchedFrameCount = ps_process_raw(&decoder, buffer.data(), buffer.size(), false, false);
 		if (searchedFrameCount < 0) throw runtime_error("Error analyzing raw audio data for word recognition.");
 	};
 	processAudioStream(*audioStream.get(), processBuffer, progressSink);
 	// End recognition
-	error = ps_end_utt(&recognizer);
+	error = ps_end_utt(&decoder);
 	if (error) throw runtime_error("Error ending utterance processing for word recognition.");
 	// Collect words
 	BoundedTimeline<string> result(audioStream->getTruncatedRange());
 	int32_t score;
-	for (ps_seg_t* it = ps_seg_iter(&recognizer, &score); it; it = ps_seg_next(it)) {
+	for (ps_seg_t* it = ps_seg_iter(&decoder, &score); it; it = ps_seg_next(it)) {
 		const char* word = ps_seg_word(it);
 		int firstFrame, lastFrame;
 		ps_seg_frames(it, &firstFrame, &lastFrame);
@ -176,35 +179,6 @@ BoundedTimeline<string> recognizeWords(unique_ptr<AudioStream> audioStream, ps_d
 	return result;
 }
 // Splits dialog into words, doing minimal preprocessing.
 // A robust solution should use TTS logic to cope with numbers, abbreviations, unknown words etc.
 vector<string> extractDialogWords(string dialog) {
 	// Convert to lower case
 	boost::algorithm::to_lower(dialog);
 	// Insert silences where appropriate
 	dialog = regex_replace(dialog, regex("[,;.:!?] |-"), " <sil> ");
 	// Remove all undesired characters
 	dialog = regex_replace(dialog, regex("[^a-z.'\\0-9<>]"), " ");
 	// Collapse whitespace
 	dialog = regex_replace(dialog, regex("\\s+"), " ");
 	// Trim
 	boost::algorithm::trim(dialog);
 	// Ugly hack: Remove trailing period
 	if (boost::algorithm::ends_with(dialog, ".")) {
 		dialog.pop_back();
 	}
 	// Split into words
 	vector<string> result;
 	boost::algorithm::split(result, dialog, boost::is_space());
 	return result;
 }
 s3wid_t getWordId(const string& word, dict_t& dictionary) {
 	s3wid_t wordId = dict_wordid(&dictionary, word.c_str());
 	if (wordId == BAD_S3WID) throw invalid_argument(fmt::format("Unknown word '{}'.", word));
@ -214,12 +188,12 @@ s3wid_t getWordId(const string& word, dict_t& dictionary) {
 BoundedTimeline<Phone> getPhoneAlignment(
 	const vector<s3wid_t>& wordIds,
 	unique_ptr<AudioStream> audioStream,
-	ps_decoder_t& recognizer,
+	ps_decoder_t& decoder,
 	ProgressSink& progressSink)
 {
 	// Create alignment list
 	lambda_unique_ptr<ps_alignment_t> alignment(
-		ps_alignment_init(recognizer.d2p),
+		ps_alignment_init(decoder.d2p),
 		[](ps_alignment_t* alignment) { ps_alignment_free(alignment); });
 	if (!alignment) throw runtime_error("Error creating alignment.");
 	for (s3wid_t wordId : wordIds) {
@ -233,9 +207,9 @@ BoundedTimeline<Phone> getPhoneAlignment(
 	audioStream = convertSampleRate(std::move(audioStream), sphinxSampleRate);
 	// Create search structure
-	acmod_t* acousticModel = recognizer.acmod;
+	acmod_t* acousticModel = decoder.acmod;
 	lambda_unique_ptr<ps_search_t> search(
-		state_align_search_init("state_align", recognizer.config, acousticModel, alignment.get()),
+		state_align_search_init("state_align", decoder.config, acousticModel, alignment.get()),
 		[](ps_search_t* search) { ps_search_free(search); });
 	if (!search) throw runtime_error("Error creating search.");
@ -247,7 +221,7 @@ BoundedTimeline<Phone> getPhoneAlignment(
 	ps_search_start(search.get());
 	// Process entire sound file
-	auto processBuffer = [&recognizer, &acousticModel, &search](const vector<int16_t>& buffer) {
+	auto processBuffer = [&decoder, &acousticModel, &search](const vector<int16_t>& buffer) {
 		const int16* nextSample = buffer.data();
 		size_t remainingSamples = buffer.size();
 		while (acmod_process_raw(acousticModel, &nextSample, &remainingSamples, false) > 0) {
@ -266,7 +240,7 @@ BoundedTimeline<Phone> getPhoneAlignment(
 	acmod_end_utt(acousticModel);
 	// Extract phones with timestamps
-	char** phoneNames = recognizer.dict->mdef->ciname;
+	char** phoneNames = decoder.dict->mdef->ciname;
 	BoundedTimeline<Phone> result(audioStream->getTruncatedRange());
 	for (ps_alignment_iter_t* it = ps_alignment_phones(alignment.get()); it; it = ps_alignment_iter_next(it)) {
 		// Get phone
@ -285,8 +259,28 @@ BoundedTimeline<Phone> getPhoneAlignment(
 	return result;
 }
 void addMissingDictionaryWords(const vector<string>& words, ps_decoder_t& decoder) {
 	map<string, string> missingPronunciations;
 	for (const string& word : words) {
 		if (dict_wordid(decoder.dict, word.c_str()) == BAD_S3WID) {
 			string pronunciation;
 			for (Phone phone : wordToPhones(word)) {
 				if (pronunciation.length() > 0) pronunciation += " ";
 				pronunciation += PhoneConverter::get().toString(phone);
 			}
 			missingPronunciations[word] = pronunciation;
 		}
 	}
 	for (auto it = missingPronunciations.begin(); it != missingPronunciations.end(); ++it) {
 		bool isLast = it == --missingPronunciations.end();
 		logging::infoFormat("Unknown word '{}'. Guessing pronunciation '{}'.", it->first, it->second);
 		ps_add_word(&decoder, it->first.c_str(), it->second.c_str(), isLast);
 	}
 }
 BoundedTimeline<Phone> detectPhones(
 	unique_ptr<AudioStream> audioStream,
 	optional<u32string> dialog,
 	ProgressSink& progressSink)
 {
 	// Pocketsphinx doesn't like empty input
@ -305,13 +299,6 @@ BoundedTimeline<Phone> detectPhones(
 	audioStream = removeDCOffset(std::move(audioStream));
 	try {
 		// Create PocketSphinx configuration
 		path sphinxModelDirectory(getBinDirectory() / "res/sphinx");
 		auto config = createConfig(sphinxModelDirectory);
 		// Create speech recognizer
 		auto recognizer = createSpeechRecognizer(*config.get());
 		// Split audio into utterances
 		BoundedTimeline<void> utterances = detectVoiceActivity(audioStream->clone(true));
@ -323,6 +310,29 @@ BoundedTimeline<Phone> detectPhones(
 		}
 		auto utteranceProgressSinkIt = utteranceProgressSinks.begin();
 		// Create speech recognizer
 		auto decoder = createDecoder();
 		// Set language model
 		lambda_unique_ptr<ngram_model_t> languageModel;
 		if (dialog) {
 			// Create dialog-specific language model
 			vector<string> words = tokenizeText(*dialog);
 			words.insert(words.begin(), "<s>");
 			words.push_back("</s>");
 			languageModel = createLanguageModel(words, *decoder->lmath);
 			// Add any dialog-specific words to the dictionary
 			addMissingDictionaryWords(words, *decoder);
 		} else {
 			path modelPath = getSphinxModelDirectory() / "en-us.lm.bin";
 			languageModel = lambda_unique_ptr<ngram_model_t>(
 				ngram_model_read(decoder->config, modelPath.string().c_str(), NGRAM_AUTO, decoder->lmath),
 				[](ngram_model_t* lm) { ngram_model_free(lm); });
 		}
 		ps_set_lm(decoder.get(), "lm", languageModel.get());
 		ps_set_search(decoder.get(), "lm");
 		BoundedTimeline<Phone> result(audioStream->getTruncatedRange());
 		for (const auto& timedUtterance : utterances) {
 			ProgressMerger utteranceProgressMerger(**utteranceProgressSinkIt++);
@ -335,7 +345,7 @@ BoundedTimeline<Phone> detectPhones(
 			auto streamSegment = createSegment(audioStream->clone(true), timeRange);
 			// Get words
-			BoundedTimeline<string> words = recognizeWords(streamSegment->clone(true), *recognizer.get(), wordRecognitionProgressSink);
+			BoundedTimeline<string> words = recognizeWords(streamSegment->clone(true), *decoder.get(), wordRecognitionProgressSink);
 			for (Timed<string> timedWord : words) {
 				timedWord.getTimeRange().shift(timedUtterance.getStart());
 				logging::logTimedEvent("word", timedWord);
@ -344,12 +354,12 @@ BoundedTimeline<Phone> detectPhones(
 			// Look up words in dictionary
 			vector<s3wid_t> wordIds;
 			for (const auto& timedWord : words) {
-				wordIds.push_back(getWordId(timedWord.getValue(), *recognizer->dict));
+				wordIds.push_back(getWordId(timedWord.getValue(), *decoder->dict));
 			}
 			if (wordIds.empty()) continue;
 			// Align the words' phones with speech
-			BoundedTimeline<Phone> segmentPhones = getPhoneAlignment(wordIds, std::move(streamSegment), *recognizer.get(), alignmentProgressSink);
+			BoundedTimeline<Phone> segmentPhones = getPhoneAlignment(wordIds, std::move(streamSegment), *decoder.get(), alignmentProgressSink);
 			segmentPhones.shift(timedUtterance.getStart());
 			for (const auto& timedPhone : segmentPhones) {
 				logging::logTimedEvent("phone", timedPhone);
--- a/src/phoneExtraction.h
+++ b/src/phoneExtraction.h
@ -8,4 +8,5 @@
 BoundedTimeline<Phone> detectPhones(
 	std::unique_ptr<AudioStream> audioStream,
 	boost::optional<std::u32string> dialog,
 	ProgressSink& progressSink);
--- a/src/platformTools.cpp
+++ b/src/platformTools.cpp
@ -2,10 +2,14 @@
 #include <boost/filesystem/path.hpp>
 #include <boost/predef.h>
 #include <format.h>
 #include <boost/uuid/uuid.hpp>
 #include <boost/uuid/uuid_generators.hpp>
 #include <boost/uuid/uuid_io.hpp>
 #include "platformTools.h"
 using boost::filesystem::path;
 using std::string;
 constexpr int InitialBufferSize = 256;
@ -129,3 +133,10 @@ path getBinPath() {
 path getBinDirectory() {
 	return getBinPath().parent_path();
 }
 path getTempFilePath() {
 	path tempDirectory = boost::filesystem::temp_directory_path();
 	static auto generateUuid = boost::uuids::random_generator();
 	string fileName = to_string(generateUuid());
 	return tempDirectory / fileName;
 }
--- a/src/platformTools.h
+++ b/src/platformTools.h
@ -4,3 +4,4 @@
 boost::filesystem::path getBinPath();
 boost::filesystem::path getBinDirectory();
 boost::filesystem::path getTempFilePath();
--- a/src/stringTools.cpp
+++ b/src/stringTools.cpp
@ -1,5 +1,6 @@
 #include "stringTools.h"
 #include <boost/algorithm/string/trim.hpp>
 #include <codecvt>
 using std::string;
 using std::wstring;
@ -106,3 +107,12 @@ string toASCII(const u32string& s) {
 	}
 	return result;
 }
 u32string utf8ToUtf32(const string& s) {
 	// Visual Studio 2015 has a bug regarding char32_t:
 	// https://connect.microsoft.com/VisualStudio/feedback/details/1403302/unresolved-external-when-using-codecvt-utf8
 	// Once VS2016 is out, we can use char32_t instead of uint32_t as type arguments and get rid of the outer conversion.
 	std::wstring_convert<std::codecvt_utf8<uint32_t>, uint32_t> convert;
 	return u32string(reinterpret_cast<const char32_t*>(convert.from_bytes(s).c_str()));
 }
--- a/src/stringTools.h
+++ b/src/stringTools.h
@ -1,6 +1,5 @@
 #pragma once
 #include <string>
 #include <vector>
 #include <boost/optional.hpp>
@ -15,3 +14,5 @@ std::wstring latin1ToWide(const std::string& s);
 boost::optional<char> toASCII(char32_t ch);
 std::string toASCII(const std::u32string& s);
 std::u32string utf8ToUtf32(const std::string& s);
--- a/src/tupleHash.h
+++ b/src/tupleHash.h
@ -0,0 +1,40 @@
 #pragma once
 #include <tuple>
 namespace std {
 	namespace {
 		template <typename T>
 		void hash_combine(size_t& seed, const T& value) {
 			seed ^= std::hash<T>()(value) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
 		}
 		// Recursive template code derived from Matthieu M.
 		template <typename Tuple, size_t Index = tuple_size<Tuple>::value - 1>
 		struct HashValueImpl {
 			static void apply(size_t& seed, const Tuple& tuple) {
 				HashValueImpl<Tuple, Index - 1>::apply(seed, tuple);
 				hash_combine(seed, std::get<Index>(tuple));
 			}
 		};
 		template <typename Tuple>
 		struct HashValueImpl<Tuple, 0> {
 			static void apply(size_t& seed, const Tuple& tuple) {
 				hash_combine(seed, std::get<0>(tuple));
 			}
 		};
 	}
 	template <typename ... TT>
 	struct hash<tuple<TT...>> {
 		size_t operator()(const tuple<TT...>& tt) const {
 			size_t seed = 0;
 			HashValueImpl<tuple<TT...> >::apply(seed, tt);
 			return seed;
 		}
 	};
 }