Implemented WAVE reading, writing, and conversion
This commit is contained in:
parent
31d3867708
commit
641f64022d
|
@ -3,6 +3,10 @@
|
||||||
<component name="NewModuleRootManager">
|
<component name="NewModuleRootManager">
|
||||||
<content url="file://$MODULE_DIR$">
|
<content url="file://$MODULE_DIR$">
|
||||||
<sourceFolder url="file://$MODULE_DIR$/CMakeLists.txt" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/CMakeLists.txt" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/lib/cppformat/format.cc" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/lib/cppformat/format.h" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/lib/cppformat/posix.cc" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/lib/cppformat/posix.h" isTestSource="false" />
|
||||||
<sourceFolder url="file://$MODULE_DIR$/lib/pocketsphinx/src/libpocketsphinx/acmod.c" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/lib/pocketsphinx/src/libpocketsphinx/acmod.c" isTestSource="false" />
|
||||||
<sourceFolder url="file://$MODULE_DIR$/lib/pocketsphinx/src/libpocketsphinx/acmod.h" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/lib/pocketsphinx/src/libpocketsphinx/acmod.h" isTestSource="false" />
|
||||||
<sourceFolder url="file://$MODULE_DIR$/lib/pocketsphinx/src/libpocketsphinx/allphone_search.c" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/lib/pocketsphinx/src/libpocketsphinx/allphone_search.c" isTestSource="false" />
|
||||||
|
@ -122,8 +126,19 @@
|
||||||
<sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase/src/libsphinxbase/util/slamch.c" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase/src/libsphinxbase/util/slamch.c" isTestSource="false" />
|
||||||
<sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase/src/libsphinxbase/util/slapack_lite.c" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase/src/libsphinxbase/util/slapack_lite.c" isTestSource="false" />
|
||||||
<sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase/src/libsphinxbase/util/strfuncs.c" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase/src/libsphinxbase/util/strfuncs.c" isTestSource="false" />
|
||||||
<sourceFolder url="file://$MODULE_DIR$/main.cpp" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/16kHzMonoStream.cpp" isTestSource="false" />
|
||||||
<sourceFolder url="file://$MODULE_DIR$/tmp.cpp" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/16kHzMonoStream.h" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/AudioStream.h" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/ChannelDownmixer.cpp" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/ChannelDownmixer.h" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/IOTools.h" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/SampleRateConverter.cpp" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/SampleRateConverter.h" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileReader.cpp" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileReader.h" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileWriter.cpp" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileWriter.h" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/main.cpp" isTestSource="false" />
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="inheritedJdk" />
|
<orderEntry type="inheritedJdk" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
@ -135,7 +150,9 @@
|
||||||
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include-fixed" />
|
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include-fixed" />
|
||||||
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/c++" />
|
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/c++" />
|
||||||
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/ssp" />
|
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/ssp" />
|
||||||
|
<root url="file://$MODULE_DIR$/lib/cppformat" />
|
||||||
<root url="file://$MODULE_DIR$/lib/pocketsphinx/include" />
|
<root url="file://$MODULE_DIR$/lib/pocketsphinx/include" />
|
||||||
|
<root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6" />
|
||||||
<root url="file://$MODULE_DIR$/lib/sphinxbase/include" />
|
<root url="file://$MODULE_DIR$/lib/sphinxbase/include" />
|
||||||
</CLASSES>
|
</CLASSES>
|
||||||
<SOURCES>
|
<SOURCES>
|
||||||
|
@ -144,9 +161,20 @@
|
||||||
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include-fixed" />
|
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include-fixed" />
|
||||||
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/c++" />
|
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/c++" />
|
||||||
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/ssp" />
|
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/ssp" />
|
||||||
|
<root url="file://$MODULE_DIR$/lib/cppformat" />
|
||||||
<root url="file://$MODULE_DIR$/lib/pocketsphinx/include" />
|
<root url="file://$MODULE_DIR$/lib/pocketsphinx/include" />
|
||||||
|
<root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6" />
|
||||||
<root url="file://$MODULE_DIR$/lib/sphinxbase/include" />
|
<root url="file://$MODULE_DIR$/lib/sphinxbase/include" />
|
||||||
</SOURCES>
|
</SOURCES>
|
||||||
|
<excluded>
|
||||||
|
<root url="file://$MODULE_DIR$/lib/cppformat/posix.cc" />
|
||||||
|
<root url="file://$MODULE_DIR$/lib/cppformat/posix.h" />
|
||||||
|
<root url="file://$MODULE_DIR$/lib/cppformat/format.h" />
|
||||||
|
<root url="file://$MODULE_DIR$/lib/cppformat/format.cc" />
|
||||||
|
<root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6/r8bbase.h" />
|
||||||
|
<root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6/example.cpp" />
|
||||||
|
<root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6/r8bbase.cpp" />
|
||||||
|
</excluded>
|
||||||
</library>
|
</library>
|
||||||
</orderEntry>
|
</orderEntry>
|
||||||
</component>
|
</component>
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="VcsDirectoryMappings">
|
<component name="VcsDirectoryMappings">
|
||||||
<mapping directory="" vcs="" />
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
|
@ -3,7 +3,7 @@ project(LipSync)
|
||||||
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||||
|
|
||||||
set(SOURCE_FILES main.cpp)
|
set(SOURCE_FILES src/main.cpp src/audio_input/WaveFileReader.cpp src/audio_input/WaveFileReader.h src/audio_input/ChannelDownmixer.cpp src/audio_input/ChannelDownmixer.h src/audio_input/AudioStream.h src/audio_input/SampleRateConverter.cpp src/audio_input/SampleRateConverter.h src/audio_input/16kHzMonoStream.cpp src/audio_input/16kHzMonoStream.h src/audio_input/WaveFileWriter.cpp src/audio_input/WaveFileWriter.h src/audio_input/IOTools.h)
|
||||||
|
|
||||||
include_directories("lib/sphinxbase-5prealpha-2015-08-05/include" "lib/pocketsphinx-5prealpha-2015-08-05/include" "lib/cppformat")
|
include_directories("lib/sphinxbase-5prealpha-2015-08-05/include" "lib/pocketsphinx-5prealpha-2015-08-05/include" "lib/cppformat")
|
||||||
FILE(GLOB_RECURSE SPHINX_BASE "lib/sphinxbase-5prealpha-2015-08-05/src/libsphinxbase/*.c")
|
FILE(GLOB_RECURSE SPHINX_BASE "lib/sphinxbase-5prealpha-2015-08-05/src/libsphinxbase/*.c")
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
#include "16kHzMonoStream.h"
|
||||||
|
#include "WaveFileReader.h"
|
||||||
|
#include "ChannelDownmixer.h"
|
||||||
|
#include "SampleRateConverter.h"
|
||||||
|
|
||||||
|
using std::runtime_error;
|
||||||
|
|
||||||
|
std::unique_ptr<AudioStream> create16kHzMonoStream(std::string fileName) {
|
||||||
|
// Create audio stream
|
||||||
|
std::unique_ptr<AudioStream> stream(new WaveFileReader(fileName));
|
||||||
|
|
||||||
|
// Downmix, if required
|
||||||
|
if (stream->getChannelCount() != 1) {
|
||||||
|
stream.reset(new ChannelDownmixer(std::move(stream)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Downsample, if required
|
||||||
|
if (stream->getFrameRate() < 16000) {
|
||||||
|
throw runtime_error("Sample rate must not be below 16kHz.");
|
||||||
|
}
|
||||||
|
if (stream->getFrameRate() != 16000) {
|
||||||
|
stream.reset(new SampleRateConverter(std::move(stream), 16000));
|
||||||
|
}
|
||||||
|
|
||||||
|
return stream;
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
#ifndef LIPSYNC_WAVEFILEREADER16KHZMONO_H
|
||||||
|
#define LIPSYNC_WAVEFILEREADER16KHZMONO_H
|
||||||
|
|
||||||
|
#include "AudioStream.h"
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
std::unique_ptr<AudioStream> create16kHzMonoStream(std::string fileName);
|
||||||
|
|
||||||
|
#endif //LIPSYNC_WAVEFILEREADER16KHZMONO_H
|
|
@ -0,0 +1,12 @@
|
||||||
|
#ifndef LIPSYNC_AUDIOSTREAM_H
|
||||||
|
#define LIPSYNC_AUDIOSTREAM_H
|
||||||
|
|
||||||
|
class AudioStream {
|
||||||
|
public:
|
||||||
|
virtual int getFrameRate() = 0;
|
||||||
|
virtual int getFrameCount() = 0;
|
||||||
|
virtual int getChannelCount() = 0;
|
||||||
|
virtual bool getNextSample(float &sample) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif //LIPSYNC_AUDIOSTREAM_H
|
|
@ -0,0 +1,31 @@
|
||||||
|
#include "ChannelDownmixer.h"
|
||||||
|
|
||||||
|
ChannelDownmixer::ChannelDownmixer(std::unique_ptr<AudioStream> inputStream) :
|
||||||
|
inputStream(std::move(inputStream)),
|
||||||
|
inputChannelCount(this->inputStream->getChannelCount())
|
||||||
|
{}
|
||||||
|
|
||||||
|
int ChannelDownmixer::getFrameRate() {
|
||||||
|
return inputStream->getFrameRate();
|
||||||
|
}
|
||||||
|
|
||||||
|
int ChannelDownmixer::getFrameCount() {
|
||||||
|
return inputStream->getFrameCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
int ChannelDownmixer::getChannelCount() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ChannelDownmixer::getNextSample(float &sample) {
|
||||||
|
float sum = 0;
|
||||||
|
for (int channelIndex = 0; channelIndex < inputChannelCount; channelIndex++) {
|
||||||
|
float currentSample;
|
||||||
|
if (!inputStream->getNextSample(currentSample)) return false;
|
||||||
|
|
||||||
|
sum += currentSample;
|
||||||
|
}
|
||||||
|
|
||||||
|
sample = sum / inputChannelCount;
|
||||||
|
return true;
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
#ifndef LIPSYNC_CHANNELDOWNMIXER_H
|
||||||
|
#define LIPSYNC_CHANNELDOWNMIXER_H
|
||||||
|
|
||||||
|
#include "AudioStream.h"
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
// Converts a multi-channel audio stream to mono.
|
||||||
|
class ChannelDownmixer : public AudioStream {
|
||||||
|
public:
|
||||||
|
ChannelDownmixer(std::unique_ptr<AudioStream> inputStream);
|
||||||
|
virtual int getFrameRate() override;
|
||||||
|
virtual int getFrameCount() override;
|
||||||
|
virtual int getChannelCount() override;
|
||||||
|
virtual bool getNextSample(float &sample) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<AudioStream> inputStream;
|
||||||
|
int inputChannelCount;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif //LIPSYNC_CHANNELDOWNMIXER_H
|
|
@ -0,0 +1,44 @@
|
||||||
|
#ifndef LIPSYNC_IOTOOLS_H
|
||||||
|
#define LIPSYNC_IOTOOLS_H
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
namespace little_endian {
|
||||||
|
|
||||||
|
template <typename Type, int bitsToRead = 8 * sizeof(Type)>
|
||||||
|
Type read(std::istream &stream) {
|
||||||
|
static_assert(bitsToRead % 8 == 0, "Cannot read fractional bytes.");
|
||||||
|
static_assert(bitsToRead <= sizeof(Type) * 8, "Bits to read exceed target type size.");
|
||||||
|
|
||||||
|
Type result = 0;
|
||||||
|
char *p = reinterpret_cast<char*>(&result);
|
||||||
|
int bytesToRead = bitsToRead / 8;
|
||||||
|
for (int byteIndex = 0; byteIndex < bytesToRead; byteIndex++) {
|
||||||
|
*(p + byteIndex) = static_cast<char>(stream.get());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Type, int bitsToWrite = 8 * sizeof(Type)>
|
||||||
|
void write(Type value, std::ostream &stream) {
|
||||||
|
static_assert(bitsToWrite % 8 == 0, "Cannot write fractional bytes.");
|
||||||
|
static_assert(bitsToWrite <= sizeof(Type) * 8, "Bits to write exceed target type size.");
|
||||||
|
|
||||||
|
char *p = reinterpret_cast<char*>(&value);
|
||||||
|
int bytesToWrite = bitsToWrite / 8;
|
||||||
|
for (int byteIndex = 0; byteIndex < bytesToWrite; byteIndex++) {
|
||||||
|
stream.put(*(p + byteIndex));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr uint32_t fourcc(unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3) {
|
||||||
|
return c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string fourccToString(uint32_t fourcc) {
|
||||||
|
return std::string(reinterpret_cast<char*>(&fourcc), 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif //LIPSYNC_IOTOOLS_H
|
|
@ -0,0 +1,80 @@
|
||||||
|
#include <cmath>
|
||||||
|
#include "SampleRateConverter.h"
|
||||||
|
|
||||||
|
using std::runtime_error;
|
||||||
|
|
||||||
|
SampleRateConverter::SampleRateConverter(std::unique_ptr<AudioStream> inputStream, int outputFrameRate) :
|
||||||
|
inputStream(std::move(inputStream)),
|
||||||
|
downscalingFactor(static_cast<double>(this->inputStream->getFrameRate()) / outputFrameRate),
|
||||||
|
outputFrameRate(outputFrameRate),
|
||||||
|
outputFrameCount(std::lround(this->inputStream->getFrameCount() / downscalingFactor)),
|
||||||
|
lastInputSample(0),
|
||||||
|
lastInputSampleIndex(-1),
|
||||||
|
nextOutputSampleIndex(0)
|
||||||
|
{
|
||||||
|
if (this->inputStream->getChannelCount() != 1) {
|
||||||
|
throw runtime_error("Only mono input streams are supported.");
|
||||||
|
}
|
||||||
|
if (this->inputStream->getFrameRate() < outputFrameRate) {
|
||||||
|
throw runtime_error("Upsampling not supported.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int SampleRateConverter::getFrameRate() {
|
||||||
|
return outputFrameRate;
|
||||||
|
}
|
||||||
|
|
||||||
|
int SampleRateConverter::getFrameCount() {
|
||||||
|
return outputFrameCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
int SampleRateConverter::getChannelCount() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SampleRateConverter::getNextSample(float &sample) {
|
||||||
|
if (nextOutputSampleIndex >= outputFrameCount) return false;
|
||||||
|
|
||||||
|
double start = nextOutputSampleIndex * downscalingFactor;
|
||||||
|
double end = (nextOutputSampleIndex + 1) * downscalingFactor;
|
||||||
|
|
||||||
|
sample = mean(start, end);
|
||||||
|
nextOutputSampleIndex++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
float SampleRateConverter::mean(double start, double end) {
|
||||||
|
// Calculate weighted sum...
|
||||||
|
double sum = 0;
|
||||||
|
|
||||||
|
// ... first sample (weight <= 1)
|
||||||
|
int startIndex = static_cast<int>(start);
|
||||||
|
sum += getInputSample(startIndex) * ((startIndex + 1) - start);
|
||||||
|
|
||||||
|
// ... middle samples (weight 1 each)
|
||||||
|
int endIndex = static_cast<int>(end);
|
||||||
|
for (int index = startIndex + 1; index < endIndex; index++) {
|
||||||
|
sum += getInputSample(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ... last sample (weight < 1)
|
||||||
|
sum += getInputSample(endIndex) * (end - endIndex);
|
||||||
|
|
||||||
|
return static_cast<float>(sum / (end - start));
|
||||||
|
}
|
||||||
|
|
||||||
|
float SampleRateConverter::getInputSample(int sampleIndex) {
|
||||||
|
if (sampleIndex == lastInputSampleIndex) {
|
||||||
|
return lastInputSample;
|
||||||
|
}
|
||||||
|
if (sampleIndex == lastInputSampleIndex + 1) {
|
||||||
|
lastInputSampleIndex++;
|
||||||
|
// Read the next sample.
|
||||||
|
// If the input stream has no more samples (at the very end),
|
||||||
|
// we'll just reuse the last sample we have.
|
||||||
|
inputStream->getNextSample(lastInputSample);
|
||||||
|
return lastInputSample;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw runtime_error("Can only return the last sample or the one following it.");
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
#ifndef LIPSYNC_SAMPLERATECONVERTER_H
|
||||||
|
#define LIPSYNC_SAMPLERATECONVERTER_H
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "AudioStream.h"
|
||||||
|
|
||||||
|
class SampleRateConverter : public AudioStream {
|
||||||
|
public:
|
||||||
|
SampleRateConverter(std::unique_ptr<AudioStream> inputStream, int outputFrameRate);
|
||||||
|
virtual int getFrameRate() override;
|
||||||
|
virtual int getFrameCount() override;
|
||||||
|
virtual int getChannelCount() override;
|
||||||
|
virtual bool getNextSample(float &sample) override;
|
||||||
|
private:
|
||||||
|
// The stream we're reading from
|
||||||
|
std::unique_ptr<AudioStream> inputStream;
|
||||||
|
|
||||||
|
// input frame rate / output frame rate
|
||||||
|
double downscalingFactor;
|
||||||
|
|
||||||
|
int outputFrameRate;
|
||||||
|
int outputFrameCount;
|
||||||
|
|
||||||
|
float lastInputSample;
|
||||||
|
int lastInputSampleIndex;
|
||||||
|
|
||||||
|
int nextOutputSampleIndex;
|
||||||
|
|
||||||
|
float mean(double start, double end);
|
||||||
|
float getInputSample(int sampleIndex);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif //LIPSYNC_SAMPLERATECONVERTER_H
|
|
@ -0,0 +1,152 @@
|
||||||
|
#include <format.h>
|
||||||
|
#include "WaveFileReader.h"
|
||||||
|
#include "IOTools.h"
|
||||||
|
|
||||||
|
using std::runtime_error;
|
||||||
|
using fmt::format;
|
||||||
|
using std::string;
|
||||||
|
using namespace little_endian;
|
||||||
|
|
||||||
|
#define INT24_MIN (-8388608)
|
||||||
|
#define INT24_MAX 8388607
|
||||||
|
|
||||||
|
// Converts an int in the range min..max to a float in the range -1..1
|
||||||
|
float toNormalizedFloat(int value, int min, int max) {
|
||||||
|
return (static_cast<float>(value - min) / (max - min) * 2) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int roundToEven(int i) {
|
||||||
|
return (i + 1) & (~1);
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class Codec {
|
||||||
|
PCM = 0x01,
|
||||||
|
Float = 0x03
|
||||||
|
};
|
||||||
|
|
||||||
|
WaveFileReader::WaveFileReader(std::string fileName) {
|
||||||
|
// Open file
|
||||||
|
file.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
||||||
|
file.open(fileName, std::ios::binary);
|
||||||
|
|
||||||
|
// Read header
|
||||||
|
uint32_t rootChunkId = read<uint32_t>(file);
|
||||||
|
if (rootChunkId != fourcc('R', 'I', 'F', 'F')) {
|
||||||
|
throw runtime_error("Unknown file format. Only WAVE files are supported.");
|
||||||
|
}
|
||||||
|
read<uint32_t>(file); // Chunk size
|
||||||
|
uint32_t waveId = read<uint32_t>(file);
|
||||||
|
if (waveId != fourcc('W', 'A', 'V', 'E')) {
|
||||||
|
throw runtime_error(format("File format is not WAVE, but {}.", fourccToString(waveId)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read chunks until we reach the data chunk
|
||||||
|
bool reachedDataChunk = false;
|
||||||
|
int bytesPerSample = 0;
|
||||||
|
do {
|
||||||
|
uint32_t chunkId = read<uint32_t>(file);
|
||||||
|
int chunkSize = read<uint32_t>(file);
|
||||||
|
switch (chunkId) {
|
||||||
|
case fourcc('f', 'm', 't', ' '): {
|
||||||
|
// Read relevant data
|
||||||
|
Codec codec = (Codec) read<uint16_t>(file);
|
||||||
|
channelCount = read<uint16_t>(file);
|
||||||
|
frameRate = read<uint32_t>(file);
|
||||||
|
read<uint32_t>(file); // Bytes per second
|
||||||
|
int frameSize = read<uint16_t>(file);
|
||||||
|
int bitsPerSample = read<uint16_t>(file);
|
||||||
|
|
||||||
|
// We're read 16 bytes so far. Skip the remainder.
|
||||||
|
file.seekg(roundToEven(chunkSize) - 16, file.cur);
|
||||||
|
|
||||||
|
// Determine sample format
|
||||||
|
switch (codec) {
|
||||||
|
case Codec::PCM:
|
||||||
|
// Determine sample size.
|
||||||
|
// According to the WAVE standard, sample sizes that are not multiples of 8 bits
|
||||||
|
// (e.g. 12 bits) can be treated like the next-larger byte size.
|
||||||
|
if (bitsPerSample == 8) {
|
||||||
|
sampleFormat = SampleFormat::UInt8;
|
||||||
|
bytesPerSample = 1;
|
||||||
|
} else if (bitsPerSample <= 16) {
|
||||||
|
sampleFormat = SampleFormat::Int16;
|
||||||
|
bytesPerSample = 2;
|
||||||
|
} else if (bitsPerSample <= 24) {
|
||||||
|
sampleFormat = SampleFormat::Int24;
|
||||||
|
bytesPerSample = 3;
|
||||||
|
} else {
|
||||||
|
throw runtime_error(
|
||||||
|
format("Unsupported sample format: {}-bit integer samples.", bitsPerSample));
|
||||||
|
}
|
||||||
|
if (bytesPerSample != frameSize / channelCount) {
|
||||||
|
throw runtime_error("Unsupported sample organization.");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Codec::Float:
|
||||||
|
if (bitsPerSample == 32) {
|
||||||
|
sampleFormat = SampleFormat::Float32;
|
||||||
|
bytesPerSample = 4;
|
||||||
|
} else {
|
||||||
|
throw runtime_error(format("Unsupported sample format: {}-bit floating-point samples.", bitsPerSample));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw runtime_error("Unsupported sample format. Only uncompressed formats are supported.");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case fourcc('d', 'a', 't', 'a'): {
|
||||||
|
reachedDataChunk = true;
|
||||||
|
remainingSamples = chunkSize / bytesPerSample;
|
||||||
|
frameCount = remainingSamples / channelCount;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
// Skip unknown chunk
|
||||||
|
file.seekg(roundToEven(chunkSize), file.cur);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (!reachedDataChunk);
|
||||||
|
}
|
||||||
|
|
||||||
|
int WaveFileReader::getFrameRate() {
|
||||||
|
return frameRate;
|
||||||
|
}
|
||||||
|
|
||||||
|
int WaveFileReader::getFrameCount() {
|
||||||
|
return frameCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
int WaveFileReader::getChannelCount() {
|
||||||
|
return channelCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool WaveFileReader::getNextSample(float &sample) {
|
||||||
|
if (remainingSamples == 0) return false;
|
||||||
|
remainingSamples--;
|
||||||
|
|
||||||
|
switch (sampleFormat) {
|
||||||
|
case SampleFormat::UInt8: {
|
||||||
|
uint8_t raw = read<uint8_t>(file);
|
||||||
|
sample = toNormalizedFloat(raw, 0, UINT8_MAX);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SampleFormat::Int16: {
|
||||||
|
int16_t raw = read<int16_t>(file);
|
||||||
|
sample = toNormalizedFloat(raw, INT16_MIN, INT16_MAX);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SampleFormat::Int24: {
|
||||||
|
int raw = read<int, 24>(file);
|
||||||
|
if (raw & 0x800000) raw |= 0xFF000000; // Fix two's complement
|
||||||
|
sample = toNormalizedFloat(raw, INT24_MIN, INT24_MAX);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SampleFormat::Float32: {
|
||||||
|
sample = read<float>(file);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
#ifndef LIPSYNC_WAVFILEREADER_H
|
||||||
|
#define LIPSYNC_WAVFILEREADER_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <fstream>
|
||||||
|
#include "AudioStream.h"
|
||||||
|
|
||||||
|
enum class SampleFormat {
|
||||||
|
UInt8,
|
||||||
|
Int16,
|
||||||
|
Int24,
|
||||||
|
Float32
|
||||||
|
};
|
||||||
|
|
||||||
|
class WaveFileReader : public AudioStream {
|
||||||
|
public:
|
||||||
|
WaveFileReader(std::string fileName);
|
||||||
|
virtual int getFrameRate() override ;
|
||||||
|
virtual int getFrameCount() override;
|
||||||
|
virtual int getChannelCount() override;
|
||||||
|
virtual bool getNextSample(float &sample) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::ifstream file;
|
||||||
|
SampleFormat sampleFormat;
|
||||||
|
int frameRate;
|
||||||
|
int frameCount;
|
||||||
|
int channelCount;
|
||||||
|
int remainingSamples;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif //LIPSYNC_WAVFILEREADER_H
|
|
@ -0,0 +1,44 @@
|
||||||
|
#include <fstream>
|
||||||
|
#include "WaveFileWriter.h"
|
||||||
|
#include "IOTools.h"
|
||||||
|
|
||||||
|
using namespace little_endian;
|
||||||
|
|
||||||
|
void createWaveFile(std::unique_ptr<AudioStream> inputStream, std::string fileName) {
|
||||||
|
// Open file
|
||||||
|
std::ofstream file;
|
||||||
|
file.exceptions(std::ofstream::failbit | std::ofstream::badbit);
|
||||||
|
file.open(fileName, std::ios::out | std::ios::binary);
|
||||||
|
|
||||||
|
// Write RIFF chunk
|
||||||
|
write<uint32_t>(fourcc('R', 'I', 'F', 'F'), file);
|
||||||
|
uint32_t formatChunkSize = 16;
|
||||||
|
uint16_t channelCount = static_cast<uint16_t>(inputStream->getChannelCount());
|
||||||
|
uint16_t frameSize = static_cast<uint16_t>(channelCount * sizeof(float));
|
||||||
|
uint32_t dataChunkSize = static_cast<uint32_t>(inputStream->getFrameCount() * frameSize);
|
||||||
|
uint32_t riffChunkSize = 4 + (8 + formatChunkSize) + (8 + dataChunkSize);
|
||||||
|
write<uint32_t>(riffChunkSize, file);
|
||||||
|
write<uint32_t>(fourcc('W', 'A', 'V', 'E'), file);
|
||||||
|
|
||||||
|
// Write format chunk
|
||||||
|
write<uint32_t>(fourcc('f', 'm', 't', ' '), file);
|
||||||
|
write<uint32_t>(formatChunkSize, file);
|
||||||
|
uint16_t codec = 0x03; // 32-bit float
|
||||||
|
write<uint16_t>(codec, file);
|
||||||
|
write<uint16_t>(channelCount, file);
|
||||||
|
uint32_t frameRate = static_cast<uint16_t>(inputStream->getFrameRate());
|
||||||
|
write<uint32_t>(frameRate, file);
|
||||||
|
uint32_t bytesPerSecond = frameRate * frameSize;
|
||||||
|
write<uint32_t>(bytesPerSecond, file);
|
||||||
|
write<uint16_t>(frameSize, file);
|
||||||
|
uint16_t bitsPerSample = 8 * sizeof(float);
|
||||||
|
write<uint16_t>(bitsPerSample, file);
|
||||||
|
|
||||||
|
// Write data chunk
|
||||||
|
write<uint32_t>(fourcc('d', 'a', 't', 'a'), file);
|
||||||
|
write<uint32_t>(dataChunkSize, file);
|
||||||
|
float sample;
|
||||||
|
while (inputStream->getNextSample(sample)) {
|
||||||
|
write<float>(sample, file);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
#ifndef LIPSYNC_WAVEFILEWRITER_H
|
||||||
|
#define LIPSYNC_WAVEFILEWRITER_H
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include "AudioStream.h"
|
||||||
|
|
||||||
|
void createWaveFile(std::unique_ptr<AudioStream> inputStream, std::string fileName);
|
||||||
|
|
||||||
|
#endif //LIPSYNC_WAVEFILEWRITER_H
|
Loading…
Reference in New Issue