Implemented WAVE reading, writing, and conversion
This commit is contained in:
parent
31d3867708
commit
641f64022d
|
@ -3,6 +3,10 @@
|
|||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/CMakeLists.txt" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/lib/cppformat/format.cc" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/lib/cppformat/format.h" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/lib/cppformat/posix.cc" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/lib/cppformat/posix.h" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/lib/pocketsphinx/src/libpocketsphinx/acmod.c" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/lib/pocketsphinx/src/libpocketsphinx/acmod.h" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/lib/pocketsphinx/src/libpocketsphinx/allphone_search.c" isTestSource="false" />
|
||||
|
@ -122,8 +126,19 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase/src/libsphinxbase/util/slamch.c" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase/src/libsphinxbase/util/slapack_lite.c" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/lib/sphinxbase/src/libsphinxbase/util/strfuncs.c" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/main.cpp" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/tmp.cpp" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/16kHzMonoStream.cpp" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/16kHzMonoStream.h" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/AudioStream.h" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/ChannelDownmixer.cpp" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/ChannelDownmixer.h" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/IOTools.h" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/SampleRateConverter.cpp" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/SampleRateConverter.h" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileReader.cpp" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileReader.h" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileWriter.cpp" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/audio_input/WaveFileWriter.h" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main.cpp" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
|
@ -135,7 +150,9 @@
|
|||
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include-fixed" />
|
||||
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/c++" />
|
||||
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/ssp" />
|
||||
<root url="file://$MODULE_DIR$/lib/cppformat" />
|
||||
<root url="file://$MODULE_DIR$/lib/pocketsphinx/include" />
|
||||
<root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6" />
|
||||
<root url="file://$MODULE_DIR$/lib/sphinxbase/include" />
|
||||
</CLASSES>
|
||||
<SOURCES>
|
||||
|
@ -144,9 +161,20 @@
|
|||
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include-fixed" />
|
||||
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/c++" />
|
||||
<root url="file://C:/MinGW/lib/gcc/mingw32/4.8.1/include/ssp" />
|
||||
<root url="file://$MODULE_DIR$/lib/cppformat" />
|
||||
<root url="file://$MODULE_DIR$/lib/pocketsphinx/include" />
|
||||
<root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6" />
|
||||
<root url="file://$MODULE_DIR$/lib/sphinxbase/include" />
|
||||
</SOURCES>
|
||||
<excluded>
|
||||
<root url="file://$MODULE_DIR$/lib/cppformat/posix.cc" />
|
||||
<root url="file://$MODULE_DIR$/lib/cppformat/posix.h" />
|
||||
<root url="file://$MODULE_DIR$/lib/cppformat/format.h" />
|
||||
<root url="file://$MODULE_DIR$/lib/cppformat/format.cc" />
|
||||
<root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6/r8bbase.h" />
|
||||
<root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6/example.cpp" />
|
||||
<root url="file://$MODULE_DIR$/lib/r8brain-free-src-1.6/r8bbase.cpp" />
|
||||
</excluded>
|
||||
</library>
|
||||
</orderEntry>
|
||||
</component>
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="" />
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
|
@ -3,7 +3,7 @@ project(LipSync)
|
|||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||
|
||||
set(SOURCE_FILES main.cpp)
|
||||
set(SOURCE_FILES src/main.cpp src/audio_input/WaveFileReader.cpp src/audio_input/WaveFileReader.h src/audio_input/ChannelDownmixer.cpp src/audio_input/ChannelDownmixer.h src/audio_input/AudioStream.h src/audio_input/SampleRateConverter.cpp src/audio_input/SampleRateConverter.h src/audio_input/16kHzMonoStream.cpp src/audio_input/16kHzMonoStream.h src/audio_input/WaveFileWriter.cpp src/audio_input/WaveFileWriter.h src/audio_input/IOTools.h)
|
||||
|
||||
include_directories("lib/sphinxbase-5prealpha-2015-08-05/include" "lib/pocketsphinx-5prealpha-2015-08-05/include" "lib/cppformat")
|
||||
FILE(GLOB_RECURSE SPHINX_BASE "lib/sphinxbase-5prealpha-2015-08-05/src/libsphinxbase/*.c")
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
#include "16kHzMonoStream.h"
|
||||
#include "WaveFileReader.h"
|
||||
#include "ChannelDownmixer.h"
|
||||
#include "SampleRateConverter.h"
|
||||
|
||||
using std::runtime_error;
|
||||
|
||||
std::unique_ptr<AudioStream> create16kHzMonoStream(std::string fileName) {
|
||||
// Create audio stream
|
||||
std::unique_ptr<AudioStream> stream(new WaveFileReader(fileName));
|
||||
|
||||
// Downmix, if required
|
||||
if (stream->getChannelCount() != 1) {
|
||||
stream.reset(new ChannelDownmixer(std::move(stream)));
|
||||
}
|
||||
|
||||
// Downsample, if required
|
||||
if (stream->getFrameRate() < 16000) {
|
||||
throw runtime_error("Sample rate must not be below 16kHz.");
|
||||
}
|
||||
if (stream->getFrameRate() != 16000) {
|
||||
stream.reset(new SampleRateConverter(std::move(stream), 16000));
|
||||
}
|
||||
|
||||
return stream;
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
#ifndef LIPSYNC_WAVEFILEREADER16KHZMONO_H
|
||||
#define LIPSYNC_WAVEFILEREADER16KHZMONO_H
|
||||
|
||||
#include "AudioStream.h"
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
std::unique_ptr<AudioStream> create16kHzMonoStream(std::string fileName);
|
||||
|
||||
#endif //LIPSYNC_WAVEFILEREADER16KHZMONO_H
|
|
@ -0,0 +1,12 @@
|
|||
#ifndef LIPSYNC_AUDIOSTREAM_H
|
||||
#define LIPSYNC_AUDIOSTREAM_H
|
||||
|
||||
class AudioStream {
|
||||
public:
|
||||
virtual int getFrameRate() = 0;
|
||||
virtual int getFrameCount() = 0;
|
||||
virtual int getChannelCount() = 0;
|
||||
virtual bool getNextSample(float &sample) = 0;
|
||||
};
|
||||
|
||||
#endif //LIPSYNC_AUDIOSTREAM_H
|
|
@ -0,0 +1,31 @@
|
|||
#include "ChannelDownmixer.h"
|
||||
|
||||
ChannelDownmixer::ChannelDownmixer(std::unique_ptr<AudioStream> inputStream) :
|
||||
inputStream(std::move(inputStream)),
|
||||
inputChannelCount(this->inputStream->getChannelCount())
|
||||
{}
|
||||
|
||||
int ChannelDownmixer::getFrameRate() {
|
||||
return inputStream->getFrameRate();
|
||||
}
|
||||
|
||||
int ChannelDownmixer::getFrameCount() {
|
||||
return inputStream->getFrameCount();
|
||||
}
|
||||
|
||||
int ChannelDownmixer::getChannelCount() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool ChannelDownmixer::getNextSample(float &sample) {
|
||||
float sum = 0;
|
||||
for (int channelIndex = 0; channelIndex < inputChannelCount; channelIndex++) {
|
||||
float currentSample;
|
||||
if (!inputStream->getNextSample(currentSample)) return false;
|
||||
|
||||
sum += currentSample;
|
||||
}
|
||||
|
||||
sample = sum / inputChannelCount;
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
#ifndef LIPSYNC_CHANNELDOWNMIXER_H
|
||||
#define LIPSYNC_CHANNELDOWNMIXER_H
|
||||
|
||||
#include "AudioStream.h"
|
||||
#include <memory>
|
||||
|
||||
// Converts a multi-channel audio stream to mono.
|
||||
class ChannelDownmixer : public AudioStream {
|
||||
public:
|
||||
ChannelDownmixer(std::unique_ptr<AudioStream> inputStream);
|
||||
virtual int getFrameRate() override;
|
||||
virtual int getFrameCount() override;
|
||||
virtual int getChannelCount() override;
|
||||
virtual bool getNextSample(float &sample) override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<AudioStream> inputStream;
|
||||
int inputChannelCount;
|
||||
};
|
||||
|
||||
#endif //LIPSYNC_CHANNELDOWNMIXER_H
|
|
@ -0,0 +1,44 @@
|
|||
#ifndef LIPSYNC_IOTOOLS_H
|
||||
#define LIPSYNC_IOTOOLS_H
|
||||
|
||||
#include <fstream>
|
||||
|
||||
namespace little_endian {
|
||||
|
||||
template <typename Type, int bitsToRead = 8 * sizeof(Type)>
|
||||
Type read(std::istream &stream) {
|
||||
static_assert(bitsToRead % 8 == 0, "Cannot read fractional bytes.");
|
||||
static_assert(bitsToRead <= sizeof(Type) * 8, "Bits to read exceed target type size.");
|
||||
|
||||
Type result = 0;
|
||||
char *p = reinterpret_cast<char*>(&result);
|
||||
int bytesToRead = bitsToRead / 8;
|
||||
for (int byteIndex = 0; byteIndex < bytesToRead; byteIndex++) {
|
||||
*(p + byteIndex) = static_cast<char>(stream.get());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename Type, int bitsToWrite = 8 * sizeof(Type)>
|
||||
void write(Type value, std::ostream &stream) {
|
||||
static_assert(bitsToWrite % 8 == 0, "Cannot write fractional bytes.");
|
||||
static_assert(bitsToWrite <= sizeof(Type) * 8, "Bits to write exceed target type size.");
|
||||
|
||||
char *p = reinterpret_cast<char*>(&value);
|
||||
int bytesToWrite = bitsToWrite / 8;
|
||||
for (int byteIndex = 0; byteIndex < bytesToWrite; byteIndex++) {
|
||||
stream.put(*(p + byteIndex));
|
||||
}
|
||||
}
|
||||
|
||||
constexpr uint32_t fourcc(unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3) {
|
||||
return c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
|
||||
}
|
||||
|
||||
inline std::string fourccToString(uint32_t fourcc) {
|
||||
return std::string(reinterpret_cast<char*>(&fourcc), 4);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif //LIPSYNC_IOTOOLS_H
|
|
@ -0,0 +1,80 @@
|
|||
#include <cmath>
|
||||
#include "SampleRateConverter.h"
|
||||
|
||||
using std::runtime_error;
|
||||
|
||||
SampleRateConverter::SampleRateConverter(std::unique_ptr<AudioStream> inputStream, int outputFrameRate) :
|
||||
inputStream(std::move(inputStream)),
|
||||
downscalingFactor(static_cast<double>(this->inputStream->getFrameRate()) / outputFrameRate),
|
||||
outputFrameRate(outputFrameRate),
|
||||
outputFrameCount(std::lround(this->inputStream->getFrameCount() / downscalingFactor)),
|
||||
lastInputSample(0),
|
||||
lastInputSampleIndex(-1),
|
||||
nextOutputSampleIndex(0)
|
||||
{
|
||||
if (this->inputStream->getChannelCount() != 1) {
|
||||
throw runtime_error("Only mono input streams are supported.");
|
||||
}
|
||||
if (this->inputStream->getFrameRate() < outputFrameRate) {
|
||||
throw runtime_error("Upsampling not supported.");
|
||||
}
|
||||
}
|
||||
|
||||
int SampleRateConverter::getFrameRate() {
|
||||
return outputFrameRate;
|
||||
}
|
||||
|
||||
int SampleRateConverter::getFrameCount() {
|
||||
return outputFrameCount;
|
||||
}
|
||||
|
||||
int SampleRateConverter::getChannelCount() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool SampleRateConverter::getNextSample(float &sample) {
|
||||
if (nextOutputSampleIndex >= outputFrameCount) return false;
|
||||
|
||||
double start = nextOutputSampleIndex * downscalingFactor;
|
||||
double end = (nextOutputSampleIndex + 1) * downscalingFactor;
|
||||
|
||||
sample = mean(start, end);
|
||||
nextOutputSampleIndex++;
|
||||
return true;
|
||||
}
|
||||
|
||||
float SampleRateConverter::mean(double start, double end) {
|
||||
// Calculate weighted sum...
|
||||
double sum = 0;
|
||||
|
||||
// ... first sample (weight <= 1)
|
||||
int startIndex = static_cast<int>(start);
|
||||
sum += getInputSample(startIndex) * ((startIndex + 1) - start);
|
||||
|
||||
// ... middle samples (weight 1 each)
|
||||
int endIndex = static_cast<int>(end);
|
||||
for (int index = startIndex + 1; index < endIndex; index++) {
|
||||
sum += getInputSample(index);
|
||||
}
|
||||
|
||||
// ... last sample (weight < 1)
|
||||
sum += getInputSample(endIndex) * (end - endIndex);
|
||||
|
||||
return static_cast<float>(sum / (end - start));
|
||||
}
|
||||
|
||||
float SampleRateConverter::getInputSample(int sampleIndex) {
|
||||
if (sampleIndex == lastInputSampleIndex) {
|
||||
return lastInputSample;
|
||||
}
|
||||
if (sampleIndex == lastInputSampleIndex + 1) {
|
||||
lastInputSampleIndex++;
|
||||
// Read the next sample.
|
||||
// If the input stream has no more samples (at the very end),
|
||||
// we'll just reuse the last sample we have.
|
||||
inputStream->getNextSample(lastInputSample);
|
||||
return lastInputSample;
|
||||
}
|
||||
|
||||
throw runtime_error("Can only return the last sample or the one following it.");
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
#ifndef LIPSYNC_SAMPLERATECONVERTER_H
|
||||
#define LIPSYNC_SAMPLERATECONVERTER_H
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "AudioStream.h"
|
||||
|
||||
class SampleRateConverter : public AudioStream {
|
||||
public:
|
||||
SampleRateConverter(std::unique_ptr<AudioStream> inputStream, int outputFrameRate);
|
||||
virtual int getFrameRate() override;
|
||||
virtual int getFrameCount() override;
|
||||
virtual int getChannelCount() override;
|
||||
virtual bool getNextSample(float &sample) override;
|
||||
private:
|
||||
// The stream we're reading from
|
||||
std::unique_ptr<AudioStream> inputStream;
|
||||
|
||||
// input frame rate / output frame rate
|
||||
double downscalingFactor;
|
||||
|
||||
int outputFrameRate;
|
||||
int outputFrameCount;
|
||||
|
||||
float lastInputSample;
|
||||
int lastInputSampleIndex;
|
||||
|
||||
int nextOutputSampleIndex;
|
||||
|
||||
float mean(double start, double end);
|
||||
float getInputSample(int sampleIndex);
|
||||
};
|
||||
|
||||
#endif //LIPSYNC_SAMPLERATECONVERTER_H
|
|
@ -0,0 +1,152 @@
|
|||
#include <format.h>
|
||||
#include "WaveFileReader.h"
|
||||
#include "IOTools.h"
|
||||
|
||||
using std::runtime_error;
|
||||
using fmt::format;
|
||||
using std::string;
|
||||
using namespace little_endian;
|
||||
|
||||
#define INT24_MIN (-8388608)
|
||||
#define INT24_MAX 8388607
|
||||
|
||||
// Converts an int in the range min..max to a float in the range -1..1
|
||||
float toNormalizedFloat(int value, int min, int max) {
|
||||
return (static_cast<float>(value - min) / (max - min) * 2) - 1;
|
||||
}
|
||||
|
||||
int roundToEven(int i) {
|
||||
return (i + 1) & (~1);
|
||||
}
|
||||
|
||||
enum class Codec {
|
||||
PCM = 0x01,
|
||||
Float = 0x03
|
||||
};
|
||||
|
||||
WaveFileReader::WaveFileReader(std::string fileName) {
|
||||
// Open file
|
||||
file.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
||||
file.open(fileName, std::ios::binary);
|
||||
|
||||
// Read header
|
||||
uint32_t rootChunkId = read<uint32_t>(file);
|
||||
if (rootChunkId != fourcc('R', 'I', 'F', 'F')) {
|
||||
throw runtime_error("Unknown file format. Only WAVE files are supported.");
|
||||
}
|
||||
read<uint32_t>(file); // Chunk size
|
||||
uint32_t waveId = read<uint32_t>(file);
|
||||
if (waveId != fourcc('W', 'A', 'V', 'E')) {
|
||||
throw runtime_error(format("File format is not WAVE, but {}.", fourccToString(waveId)));
|
||||
}
|
||||
|
||||
// Read chunks until we reach the data chunk
|
||||
bool reachedDataChunk = false;
|
||||
int bytesPerSample = 0;
|
||||
do {
|
||||
uint32_t chunkId = read<uint32_t>(file);
|
||||
int chunkSize = read<uint32_t>(file);
|
||||
switch (chunkId) {
|
||||
case fourcc('f', 'm', 't', ' '): {
|
||||
// Read relevant data
|
||||
Codec codec = (Codec) read<uint16_t>(file);
|
||||
channelCount = read<uint16_t>(file);
|
||||
frameRate = read<uint32_t>(file);
|
||||
read<uint32_t>(file); // Bytes per second
|
||||
int frameSize = read<uint16_t>(file);
|
||||
int bitsPerSample = read<uint16_t>(file);
|
||||
|
||||
// We're read 16 bytes so far. Skip the remainder.
|
||||
file.seekg(roundToEven(chunkSize) - 16, file.cur);
|
||||
|
||||
// Determine sample format
|
||||
switch (codec) {
|
||||
case Codec::PCM:
|
||||
// Determine sample size.
|
||||
// According to the WAVE standard, sample sizes that are not multiples of 8 bits
|
||||
// (e.g. 12 bits) can be treated like the next-larger byte size.
|
||||
if (bitsPerSample == 8) {
|
||||
sampleFormat = SampleFormat::UInt8;
|
||||
bytesPerSample = 1;
|
||||
} else if (bitsPerSample <= 16) {
|
||||
sampleFormat = SampleFormat::Int16;
|
||||
bytesPerSample = 2;
|
||||
} else if (bitsPerSample <= 24) {
|
||||
sampleFormat = SampleFormat::Int24;
|
||||
bytesPerSample = 3;
|
||||
} else {
|
||||
throw runtime_error(
|
||||
format("Unsupported sample format: {}-bit integer samples.", bitsPerSample));
|
||||
}
|
||||
if (bytesPerSample != frameSize / channelCount) {
|
||||
throw runtime_error("Unsupported sample organization.");
|
||||
}
|
||||
break;
|
||||
case Codec::Float:
|
||||
if (bitsPerSample == 32) {
|
||||
sampleFormat = SampleFormat::Float32;
|
||||
bytesPerSample = 4;
|
||||
} else {
|
||||
throw runtime_error(format("Unsupported sample format: {}-bit floating-point samples.", bitsPerSample));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw runtime_error("Unsupported sample format. Only uncompressed formats are supported.");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case fourcc('d', 'a', 't', 'a'): {
|
||||
reachedDataChunk = true;
|
||||
remainingSamples = chunkSize / bytesPerSample;
|
||||
frameCount = remainingSamples / channelCount;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// Skip unknown chunk
|
||||
file.seekg(roundToEven(chunkSize), file.cur);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (!reachedDataChunk);
|
||||
}
|
||||
|
||||
int WaveFileReader::getFrameRate() {
|
||||
return frameRate;
|
||||
}
|
||||
|
||||
int WaveFileReader::getFrameCount() {
|
||||
return frameCount;
|
||||
}
|
||||
|
||||
int WaveFileReader::getChannelCount() {
|
||||
return channelCount;
|
||||
}
|
||||
|
||||
bool WaveFileReader::getNextSample(float &sample) {
|
||||
if (remainingSamples == 0) return false;
|
||||
remainingSamples--;
|
||||
|
||||
switch (sampleFormat) {
|
||||
case SampleFormat::UInt8: {
|
||||
uint8_t raw = read<uint8_t>(file);
|
||||
sample = toNormalizedFloat(raw, 0, UINT8_MAX);
|
||||
break;
|
||||
}
|
||||
case SampleFormat::Int16: {
|
||||
int16_t raw = read<int16_t>(file);
|
||||
sample = toNormalizedFloat(raw, INT16_MIN, INT16_MAX);
|
||||
break;
|
||||
}
|
||||
case SampleFormat::Int24: {
|
||||
int raw = read<int, 24>(file);
|
||||
if (raw & 0x800000) raw |= 0xFF000000; // Fix two's complement
|
||||
sample = toNormalizedFloat(raw, INT24_MIN, INT24_MAX);
|
||||
break;
|
||||
}
|
||||
case SampleFormat::Float32: {
|
||||
sample = read<float>(file);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
#ifndef LIPSYNC_WAVFILEREADER_H
|
||||
#define LIPSYNC_WAVFILEREADER_H
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include "AudioStream.h"
|
||||
|
||||
enum class SampleFormat {
|
||||
UInt8,
|
||||
Int16,
|
||||
Int24,
|
||||
Float32
|
||||
};
|
||||
|
||||
class WaveFileReader : public AudioStream {
|
||||
public:
|
||||
WaveFileReader(std::string fileName);
|
||||
virtual int getFrameRate() override ;
|
||||
virtual int getFrameCount() override;
|
||||
virtual int getChannelCount() override;
|
||||
virtual bool getNextSample(float &sample) override;
|
||||
|
||||
private:
|
||||
std::ifstream file;
|
||||
SampleFormat sampleFormat;
|
||||
int frameRate;
|
||||
int frameCount;
|
||||
int channelCount;
|
||||
int remainingSamples;
|
||||
};
|
||||
|
||||
#endif //LIPSYNC_WAVFILEREADER_H
|
|
@ -0,0 +1,44 @@
|
|||
#include <fstream>
|
||||
#include "WaveFileWriter.h"
|
||||
#include "IOTools.h"
|
||||
|
||||
using namespace little_endian;
|
||||
|
||||
void createWaveFile(std::unique_ptr<AudioStream> inputStream, std::string fileName) {
|
||||
// Open file
|
||||
std::ofstream file;
|
||||
file.exceptions(std::ofstream::failbit | std::ofstream::badbit);
|
||||
file.open(fileName, std::ios::out | std::ios::binary);
|
||||
|
||||
// Write RIFF chunk
|
||||
write<uint32_t>(fourcc('R', 'I', 'F', 'F'), file);
|
||||
uint32_t formatChunkSize = 16;
|
||||
uint16_t channelCount = static_cast<uint16_t>(inputStream->getChannelCount());
|
||||
uint16_t frameSize = static_cast<uint16_t>(channelCount * sizeof(float));
|
||||
uint32_t dataChunkSize = static_cast<uint32_t>(inputStream->getFrameCount() * frameSize);
|
||||
uint32_t riffChunkSize = 4 + (8 + formatChunkSize) + (8 + dataChunkSize);
|
||||
write<uint32_t>(riffChunkSize, file);
|
||||
write<uint32_t>(fourcc('W', 'A', 'V', 'E'), file);
|
||||
|
||||
// Write format chunk
|
||||
write<uint32_t>(fourcc('f', 'm', 't', ' '), file);
|
||||
write<uint32_t>(formatChunkSize, file);
|
||||
uint16_t codec = 0x03; // 32-bit float
|
||||
write<uint16_t>(codec, file);
|
||||
write<uint16_t>(channelCount, file);
|
||||
uint32_t frameRate = static_cast<uint16_t>(inputStream->getFrameRate());
|
||||
write<uint32_t>(frameRate, file);
|
||||
uint32_t bytesPerSecond = frameRate * frameSize;
|
||||
write<uint32_t>(bytesPerSecond, file);
|
||||
write<uint16_t>(frameSize, file);
|
||||
uint16_t bitsPerSample = 8 * sizeof(float);
|
||||
write<uint16_t>(bitsPerSample, file);
|
||||
|
||||
// Write data chunk
|
||||
write<uint32_t>(fourcc('d', 'a', 't', 'a'), file);
|
||||
write<uint32_t>(dataChunkSize, file);
|
||||
float sample;
|
||||
while (inputStream->getNextSample(sample)) {
|
||||
write<float>(sample, file);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
#ifndef LIPSYNC_WAVEFILEWRITER_H
|
||||
#define LIPSYNC_WAVEFILEWRITER_H
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "AudioStream.h"
|
||||
|
||||
void createWaveFile(std::unique_ptr<AudioStream> inputStream, std::string fileName);
|
||||
|
||||
#endif //LIPSYNC_WAVEFILEWRITER_H
|
Loading…
Reference in New Issue