Implemented WAVE reading, writing, and conversion

2015-09-17 21:47:58 +02:00 · 2015-09-17 21:47:58 +02:00 · 641f64022d
parent 31d3867708
commit 641f64022d
16 changed files with 529 additions and 4 deletions
--- a/.idea/LipSync.iml
+++ b/.idea/LipSync.iml
@ -3,6 +3,10 @@
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$">
      <sourceFolder url="file://$MODULE_DIR$/CMakeLists.txt" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/lib/cppformat/format.cc" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/lib/cppformat/format.h" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/lib/cppformat/posix.cc" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/lib/cppformat/posix.h" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/lib/pocketsphinx/src/libpocketsphinx/acmod.c" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/lib/pocketsphinx/src/libpocketsphinx/acmod.h" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/lib/pocketsphinx/src/libpocketsphinx/allphone_search.c" isTestSource="false" />
@ -122,8 +126,19 @@
      <sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase/src/libsphinxbase/util/slamch.c" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase/src/libsphinxbase/util/slapack_lite.c" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase/src/libsphinxbase/util/strfuncs.c" isTestSource="false" />
-      <sourceFolder url="file://$MODULE_DIR$/main.cpp" isTestSource="false" />
-      <sourceFolder url="file://$MODULE_DIR$/tmp.cpp" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/16kHzMonoStream.cpp" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/16kHzMonoStream.h" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/AudioStream.h" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/ChannelDownmixer.cpp" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/ChannelDownmixer.h" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/IOTools.h" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/SampleRateConverter.cpp" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/SampleRateConverter.h" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileReader.cpp" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileReader.h" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileWriter.cpp" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileWriter.h" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/main.cpp" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
@ -135,7 +150,9 @@
          <root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include-fixed" />
          <root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/c++" />
          <root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/ssp" />
+          <root url="file://$MODULE_DIR$/lib/cppformat" />
          <root url="file://$MODULE_DIR$/lib/pocketsphinx/include" />
+          <root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6" />
          <root url="file://$MODULE_DIR$/lib/sphinxbase/include" />
        </CLASSES>
        <SOURCES>
@ -144,9 +161,20 @@
          <root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include-fixed" />
          <root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/c++" />
          <root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/ssp" />
+          <root url="file://$MODULE_DIR$/lib/cppformat" />
          <root url="file://$MODULE_DIR$/lib/pocketsphinx/include" />
+          <root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6" />
          <root url="file://$MODULE_DIR$/lib/sphinxbase/include" />
        </SOURCES>
+        <excluded>
+          <root url="file://$MODULE_DIR$/lib/cppformat/posix.cc" />
+          <root url="file://$MODULE_DIR$/lib/cppformat/posix.h" />
+          <root url="file://$MODULE_DIR$/lib/cppformat/format.h" />
+          <root url="file://$MODULE_DIR$/lib/cppformat/format.cc" />
+          <root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6/r8bbase.h" />
+          <root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6/example.cpp" />
+          <root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6/r8bbase.cpp" />
+        </excluded>
      </library>
    </orderEntry>
  </component>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
-    <mapping directory="" vcs="" />
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
  </component>
 </project>
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -3,7 +3,7 @@ project(LipSync)

 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")

-set(SOURCE_FILES main.cpp)
+set(SOURCE_FILES src/main.cpp src/audio_input/WaveFileReader.cpp src/audio_input/WaveFileReader.h src/audio_input/ChannelDownmixer.cpp src/audio_input/ChannelDownmixer.h src/audio_input/AudioStream.h src/audio_input/SampleRateConverter.cpp src/audio_input/SampleRateConverter.h src/audio_input/16kHzMonoStream.cpp src/audio_input/16kHzMonoStream.h src/audio_input/WaveFileWriter.cpp src/audio_input/WaveFileWriter.h src/audio_input/IOTools.h)

 include_directories("lib/sphinxbase-5prealpha-2015-08-05/include" "lib/pocketsphinx-5prealpha-2015-08-05/include" "lib/cppformat")
 FILE(GLOB_RECURSE SPHINX_BASE "lib/sphinxbase-5prealpha-2015-08-05/src/libsphinxbase/*.c")
--- a/src/audio_input/16kHzMonoStream.cpp
+++ b/src/audio_input/16kHzMonoStream.cpp
@ -0,0 +1,26 @@
+#include "16kHzMonoStream.h"
+#include "WaveFileReader.h"
+#include "ChannelDownmixer.h"
+#include "SampleRateConverter.h"
+
+using std::runtime_error;
+
+std::unique_ptr<AudioStream> create16kHzMonoStream(std::string fileName) {
+	// Create audio stream
+	std::unique_ptr<AudioStream> stream(new WaveFileReader(fileName));
+
+	// Downmix, if required
+	if (stream->getChannelCount() != 1) {
+		stream.reset(new ChannelDownmixer(std::move(stream)));
+	}
+
+	// Downsample, if required
+	if (stream->getFrameRate() < 16000) {
+		throw runtime_error("Sample rate must not be below 16kHz.");
+	}
+	if (stream->getFrameRate() != 16000) {
+		stream.reset(new SampleRateConverter(std::move(stream), 16000));
+	}
+
+	return stream;
+}
--- a/src/audio_input/16kHzMonoStream.h
+++ b/src/audio_input/16kHzMonoStream.h
@ -0,0 +1,10 @@
+#ifndef LIPSYNC_WAVEFILEREADER16KHZMONO_H
+#define LIPSYNC_WAVEFILEREADER16KHZMONO_H
+
+#include "AudioStream.h"
+#include <memory>
+#include <string>
+
+std::unique_ptr<AudioStream> create16kHzMonoStream(std::string fileName);
+
+#endif //LIPSYNC_WAVEFILEREADER16KHZMONO_H
--- a/src/audio_input/AudioStream.h
+++ b/src/audio_input/AudioStream.h
@ -0,0 +1,12 @@
+#ifndef LIPSYNC_AUDIOSTREAM_H
+#define LIPSYNC_AUDIOSTREAM_H
+
+class AudioStream {
+public:
+	virtual int getFrameRate() = 0;
+	virtual int getFrameCount() = 0;
+	virtual int getChannelCount() = 0;
+	virtual bool getNextSample(float &sample) = 0;
+};
+
+#endif //LIPSYNC_AUDIOSTREAM_H
--- a/src/audio_input/ChannelDownmixer.cpp
+++ b/src/audio_input/ChannelDownmixer.cpp
@ -0,0 +1,31 @@
+#include "ChannelDownmixer.h"
+
+ChannelDownmixer::ChannelDownmixer(std::unique_ptr<AudioStream> inputStream) :
+	inputStream(std::move(inputStream)),
+	inputChannelCount(this->inputStream->getChannelCount())
+{}
+
+int ChannelDownmixer::getFrameRate() {
+	return inputStream->getFrameRate();
+}
+
+int ChannelDownmixer::getFrameCount() {
+	return inputStream->getFrameCount();
+}
+
+int ChannelDownmixer::getChannelCount() {
+	return 1;
+}
+
+bool ChannelDownmixer::getNextSample(float &sample) {
+	float sum = 0;
+	for (int channelIndex = 0; channelIndex < inputChannelCount; channelIndex++) {
+		float currentSample;
+		if (!inputStream->getNextSample(currentSample)) return false;
+
+		sum += currentSample;
+	}
+
+	sample = sum / inputChannelCount;
+	return true;
+}
--- a/src/audio_input/ChannelDownmixer.h
+++ b/src/audio_input/ChannelDownmixer.h
@ -0,0 +1,21 @@
+#ifndef LIPSYNC_CHANNELDOWNMIXER_H
+#define LIPSYNC_CHANNELDOWNMIXER_H
+
+#include "AudioStream.h"
+#include <memory>
+
+// Converts a multi-channel audio stream to mono.
+class ChannelDownmixer : public AudioStream {
+public:
+	ChannelDownmixer(std::unique_ptr<AudioStream> inputStream);
+	virtual int getFrameRate() override;
+	virtual int getFrameCount() override;
+	virtual int getChannelCount() override;
+	virtual bool getNextSample(float &sample) override;
+
+private:
+	std::unique_ptr<AudioStream> inputStream;
+	int inputChannelCount;
+};
+
+#endif //LIPSYNC_CHANNELDOWNMIXER_H
--- a/src/audio_input/IOTools.h
+++ b/src/audio_input/IOTools.h
@ -0,0 +1,44 @@
+#ifndef LIPSYNC_IOTOOLS_H
+#define LIPSYNC_IOTOOLS_H
+
+#include <fstream>
+
+namespace little_endian {
+
+	template <typename Type, int bitsToRead = 8 * sizeof(Type)>
+	Type read(std::istream &stream) {
+		static_assert(bitsToRead % 8 == 0, "Cannot read fractional bytes.");
+		static_assert(bitsToRead <= sizeof(Type) * 8, "Bits to read exceed target type size.");
+
+		Type result = 0;
+		char *p = reinterpret_cast<char*>(&result);
+		int bytesToRead = bitsToRead / 8;
+		for (int byteIndex = 0; byteIndex < bytesToRead; byteIndex++) {
+			*(p + byteIndex) = static_cast<char>(stream.get());
+		}
+		return result;
+	}
+
+	template <typename Type, int bitsToWrite = 8 * sizeof(Type)>
+	void write(Type value, std::ostream &stream) {
+		static_assert(bitsToWrite % 8 == 0, "Cannot write fractional bytes.");
+		static_assert(bitsToWrite <= sizeof(Type) * 8, "Bits to write exceed target type size.");
+
+		char *p = reinterpret_cast<char*>(&value);
+		int bytesToWrite = bitsToWrite / 8;
+		for (int byteIndex = 0; byteIndex < bytesToWrite; byteIndex++) {
+			stream.put(*(p + byteIndex));
+		}
+	}
+
+	constexpr uint32_t fourcc(unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3) {
+		return c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
+	}
+
+	inline std::string fourccToString(uint32_t fourcc) {
+		return std::string(reinterpret_cast<char*>(&fourcc), 4);
+	}
+
+}
+
+#endif //LIPSYNC_IOTOOLS_H
--- a/src/audio_input/SampleRateConverter.cpp
+++ b/src/audio_input/SampleRateConverter.cpp
@ -0,0 +1,80 @@
+#include <cmath>
+#include "SampleRateConverter.h"
+
+using std::runtime_error;
+
+SampleRateConverter::SampleRateConverter(std::unique_ptr<AudioStream> inputStream, int outputFrameRate) :
+	inputStream(std::move(inputStream)),
+	downscalingFactor(static_cast<double>(this->inputStream->getFrameRate()) / outputFrameRate),
+	outputFrameRate(outputFrameRate),
+	outputFrameCount(std::lround(this->inputStream->getFrameCount() / downscalingFactor)),
+	lastInputSample(0),
+	lastInputSampleIndex(-1),
+	nextOutputSampleIndex(0)
+{
+	if (this->inputStream->getChannelCount() != 1) {
+		throw runtime_error("Only mono input streams are supported.");
+	}
+	if (this->inputStream->getFrameRate() < outputFrameRate) {
+		throw runtime_error("Upsampling not supported.");
+	}
+}
+
+int SampleRateConverter::getFrameRate() {
+	return outputFrameRate;
+}
+
+int SampleRateConverter::getFrameCount() {
+	return outputFrameCount;
+}
+
+int SampleRateConverter::getChannelCount() {
+	return 1;
+}
+
+bool SampleRateConverter::getNextSample(float &sample) {
+	if (nextOutputSampleIndex >= outputFrameCount) return false;
+
+	double start = nextOutputSampleIndex * downscalingFactor;
+	double end = (nextOutputSampleIndex + 1) * downscalingFactor;
+
+	sample = mean(start, end);
+	nextOutputSampleIndex++;
+	return true;
+}
+
+float SampleRateConverter::mean(double start, double end) {
+	// Calculate weighted sum...
+	double sum = 0;
+
+	// ... first sample (weight <= 1)
+	int startIndex = static_cast<int>(start);
+	sum += getInputSample(startIndex) * ((startIndex + 1) - start);
+
+	// ... middle samples (weight 1 each)
+	int endIndex = static_cast<int>(end);
+	for (int index = startIndex + 1; index < endIndex; index++) {
+		sum += getInputSample(index);
+	}
+
+	// ... last sample (weight < 1)
+	sum += getInputSample(endIndex) * (end - endIndex);
+
+	return static_cast<float>(sum / (end - start));
+}
+
+float SampleRateConverter::getInputSample(int sampleIndex) {
+	if (sampleIndex == lastInputSampleIndex) {
+		return lastInputSample;
+	}
+	if (sampleIndex == lastInputSampleIndex + 1) {
+		lastInputSampleIndex++;
+		// Read the next sample.
+		// If the input stream has no more samples (at the very end),
+		// we'll just reuse the last sample we have.
+		inputStream->getNextSample(lastInputSample);
+		return lastInputSample;
+	}
+
+	throw runtime_error("Can only return the last sample or the one following it.");
+}
--- a/src/audio_input/SampleRateConverter.h
+++ b/src/audio_input/SampleRateConverter.h
@ -0,0 +1,34 @@
+#ifndef LIPSYNC_SAMPLERATECONVERTER_H
+#define LIPSYNC_SAMPLERATECONVERTER_H
+
+#include <memory>
+#include <vector>
+#include "AudioStream.h"
+
+class SampleRateConverter : public AudioStream {
+public:
+	SampleRateConverter(std::unique_ptr<AudioStream> inputStream, int outputFrameRate);
+	virtual int getFrameRate() override;
+	virtual int getFrameCount() override;
+	virtual int getChannelCount() override;
+	virtual bool getNextSample(float &sample) override;
+private:
+	// The stream we're reading from
+	std::unique_ptr<AudioStream> inputStream;
+
+	// input frame rate / output frame rate
+	double downscalingFactor;
+
+	int outputFrameRate;
+	int outputFrameCount;
+
+	float lastInputSample;
+	int lastInputSampleIndex;
+
+	int nextOutputSampleIndex;
+
+	float mean(double start, double end);
+	float getInputSample(int sampleIndex);
+};
+
+#endif //LIPSYNC_SAMPLERATECONVERTER_H
--- a/src/audio_input/WaveFileReader.cpp
+++ b/src/audio_input/WaveFileReader.cpp
@ -0,0 +1,152 @@
+#include <format.h>
+#include "WaveFileReader.h"
+#include "IOTools.h"
+
+using std::runtime_error;
+using fmt::format;
+using std::string;
+using namespace little_endian;
+
+#define INT24_MIN (-8388608)
+#define INT24_MAX 8388607
+
+// Converts an int in the range min..max to a float in the range -1..1
+float toNormalizedFloat(int value, int min, int max) {
+	return (static_cast<float>(value - min) / (max - min) * 2) - 1;
+}
+
+int roundToEven(int i) {
+	return (i + 1) & (~1);
+}
+
+enum class Codec {
+	PCM = 0x01,
+	Float = 0x03
+};
+
+WaveFileReader::WaveFileReader(std::string fileName) {
+	// Open file
+	file.exceptions(std::ifstream::failbit | std::ifstream::badbit);
+	file.open(fileName, std::ios::binary);
+
+	// Read header
+	uint32_t rootChunkId = read<uint32_t>(file);
+	if (rootChunkId != fourcc('R', 'I', 'F', 'F')) {
+		throw runtime_error("Unknown file format. Only WAVE files are supported.");
+	}
+	read<uint32_t>(file); // Chunk size
+	uint32_t waveId = read<uint32_t>(file);
+	if (waveId != fourcc('W', 'A', 'V', 'E')) {
+		throw runtime_error(format("File format is not WAVE, but {}.", fourccToString(waveId)));
+	}
+
+	// Read chunks until we reach the data chunk
+	bool reachedDataChunk = false;
+	int bytesPerSample = 0;
+	do {
+		uint32_t chunkId = read<uint32_t>(file);
+		int chunkSize = read<uint32_t>(file);
+		switch (chunkId) {
+			case fourcc('f', 'm', 't', ' '): {
+				// Read relevant data
+				Codec codec = (Codec) read<uint16_t>(file);
+				channelCount = read<uint16_t>(file);
+				frameRate = read<uint32_t>(file);
+				read<uint32_t>(file); // Bytes per second
+				int frameSize = read<uint16_t>(file);
+				int bitsPerSample = read<uint16_t>(file);
+
+				// We're read 16 bytes so far. Skip the remainder.
+				file.seekg(roundToEven(chunkSize) - 16, file.cur);
+
+				// Determine sample format
+				switch (codec) {
+					case Codec::PCM:
+						// Determine sample size.
+						// According to the WAVE standard, sample sizes that are not multiples of 8 bits
+						// (e.g. 12 bits) can be treated like the next-larger byte size.
+						if (bitsPerSample == 8) {
+							sampleFormat = SampleFormat::UInt8;
+							bytesPerSample = 1;
+						} else if (bitsPerSample <= 16) {
+							sampleFormat = SampleFormat::Int16;
+							bytesPerSample = 2;
+						} else if (bitsPerSample <= 24) {
+							sampleFormat = SampleFormat::Int24;
+							bytesPerSample = 3;
+						} else {
+							throw runtime_error(
+								format("Unsupported sample format: {}-bit integer samples.", bitsPerSample));
+						}
+						if (bytesPerSample != frameSize / channelCount) {
+							throw runtime_error("Unsupported sample organization.");
+						}
+						break;
+					case Codec::Float:
+						if (bitsPerSample == 32) {
+							sampleFormat = SampleFormat::Float32;
+							bytesPerSample = 4;
+						} else {
+							throw runtime_error(format("Unsupported sample format: {}-bit floating-point samples.", bitsPerSample));
+						}
+						break;
+					default:
+						throw runtime_error("Unsupported sample format. Only uncompressed formats are supported.");
+				}
+				break;
+			}
+			case fourcc('d', 'a', 't', 'a'): {
+				reachedDataChunk = true;
+				remainingSamples = chunkSize / bytesPerSample;
+				frameCount = remainingSamples / channelCount;
+				break;
+			}
+			default: {
+				// Skip unknown chunk
+				file.seekg(roundToEven(chunkSize), file.cur);
+				break;
+			}
+		}
+	} while (!reachedDataChunk);
+}
+
+int WaveFileReader::getFrameRate() {
+	return frameRate;
+}
+
+int WaveFileReader::getFrameCount() {
+	return frameCount;
+}
+
+int WaveFileReader::getChannelCount() {
+	return channelCount;
+}
+
+bool WaveFileReader::getNextSample(float &sample) {
+	if (remainingSamples == 0) return false;
+	remainingSamples--;
+
+	switch (sampleFormat) {
+		case SampleFormat::UInt8: {
+			uint8_t raw = read<uint8_t>(file);
+			sample = toNormalizedFloat(raw, 0, UINT8_MAX);
+			break;
+		}
+		case SampleFormat::Int16: {
+			int16_t raw = read<int16_t>(file);
+			sample = toNormalizedFloat(raw, INT16_MIN, INT16_MAX);
+			break;
+		}
+		case SampleFormat::Int24: {
+			int raw = read<int, 24>(file);
+			if (raw & 0x800000) raw |= 0xFF000000; // Fix two's complement
+			sample = toNormalizedFloat(raw, INT24_MIN, INT24_MAX);
+			break;
+		}
+		case SampleFormat::Float32: {
+			sample = read<float>(file);
+			break;
+		}
+	}
+	return true;
+}
--- a/src/audio_input/WaveFileReader.h
+++ b/src/audio_input/WaveFileReader.h
@ -0,0 +1,33 @@
+#ifndef LIPSYNC_WAVFILEREADER_H
+#define LIPSYNC_WAVFILEREADER_H
+
+#include <string>
+#include <cstdint>
+#include <fstream>
+#include "AudioStream.h"
+
+enum class SampleFormat {
+	UInt8,
+	Int16,
+	Int24,
+	Float32
+};
+
+class WaveFileReader : public AudioStream {
+public:
+	WaveFileReader(std::string fileName);
+	virtual int getFrameRate() override ;
+	virtual int getFrameCount() override;
+	virtual int getChannelCount() override;
+	virtual bool getNextSample(float &sample) override;
+
+private:
+	std::ifstream file;
+	SampleFormat sampleFormat;
+	int frameRate;
+	int frameCount;
+	int channelCount;
+	int remainingSamples;
+};
+
+#endif //LIPSYNC_WAVFILEREADER_H
--- a/src/audio_input/WaveFileWriter.cpp
+++ b/src/audio_input/WaveFileWriter.cpp
@ -0,0 +1,44 @@
+#include <fstream>
+#include "WaveFileWriter.h"
+#include "IOTools.h"
+
+using namespace little_endian;
+
+void createWaveFile(std::unique_ptr<AudioStream> inputStream, std::string fileName) {
+	// Open file
+	std::ofstream file;
+	file.exceptions(std::ofstream::failbit | std::ofstream::badbit);
+	file.open(fileName, std::ios::out | std::ios::binary);
+
+	// Write RIFF chunk
+	write<uint32_t>(fourcc('R', 'I', 'F', 'F'), file);
+	uint32_t formatChunkSize = 16;
+	uint16_t channelCount = static_cast<uint16_t>(inputStream->getChannelCount());
+	uint16_t frameSize = static_cast<uint16_t>(channelCount * sizeof(float));
+	uint32_t dataChunkSize = static_cast<uint32_t>(inputStream->getFrameCount() * frameSize);
+	uint32_t riffChunkSize = 4 + (8 + formatChunkSize) + (8 + dataChunkSize);
+	write<uint32_t>(riffChunkSize, file);
+	write<uint32_t>(fourcc('W', 'A', 'V', 'E'), file);
+
+	// Write format chunk
+	write<uint32_t>(fourcc('f', 'm', 't', ' '), file);
+	write<uint32_t>(formatChunkSize, file);
+	uint16_t codec = 0x03; // 32-bit float
+	write<uint16_t>(codec, file);
+	write<uint16_t>(channelCount, file);
+	uint32_t frameRate = static_cast<uint16_t>(inputStream->getFrameRate());
+	write<uint32_t>(frameRate, file);
+	uint32_t bytesPerSecond = frameRate * frameSize;
+	write<uint32_t>(bytesPerSecond, file);
+	write<uint16_t>(frameSize, file);
+	uint16_t bitsPerSample = 8 * sizeof(float);
+	write<uint16_t>(bitsPerSample, file);
+
+	// Write data chunk
+	write<uint32_t>(fourcc('d', 'a', 't', 'a'), file);
+	write<uint32_t>(dataChunkSize, file);
+	float sample;
+	while (inputStream->getNextSample(sample)) {
+		write<float>(sample, file);
+	}
+}
--- a/src/audio_input/WaveFileWriter.h
+++ b/src/audio_input/WaveFileWriter.h
@ -0,0 +1,10 @@
+#ifndef LIPSYNC_WAVEFILEWRITER_H
+#define LIPSYNC_WAVEFILEWRITER_H
+
+#include <memory>
+#include <string>
+#include "AudioStream.h"
+
+void createWaveFile(std::unique_ptr<AudioStream> inputStream, std::string fileName);
+
+#endif //LIPSYNC_WAVEFILEWRITER_H
--- a/src/main.cpp
+++ b/src/main.cpp