Reading WAVE file
This commit is contained in:
parent
641f64022d
commit
3cd82e89f8
48
src/main.cpp
48
src/main.cpp
|
@ -2,12 +2,24 @@
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
#include <chrono>
|
||||||
|
#include "audio_input/16kHzMonoStream.h"
|
||||||
|
|
||||||
using std::runtime_error;
|
using std::runtime_error;
|
||||||
using std::shared_ptr;
|
using std::shared_ptr;
|
||||||
|
using std::unique_ptr;
|
||||||
|
|
||||||
#define MODELDIR "X:/dev/projects/LipSync/lib/pocketsphinx/model"
|
#define MODELDIR "X:/dev/projects/LipSync/lib/pocketsphinx/model"
|
||||||
|
|
||||||
|
// Converts a float in the range -1..1 to a signed 16-bit int
|
||||||
|
int16_t floatSampleToInt16(float sample) {
|
||||||
|
sample = std::max(sample, -1.0f);
|
||||||
|
sample = std::min(sample, 1.0f);
|
||||||
|
return static_cast<int16_t>(((sample + 1) / 2) * (INT16_MAX - INT16_MIN) + INT16_MIN);
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
shared_ptr<cmd_ln_t> config(
|
shared_ptr<cmd_ln_t> config(
|
||||||
cmd_ln_init(
|
cmd_ln_init(
|
||||||
|
@ -33,23 +45,41 @@ int main(int argc, char *argv[]) {
|
||||||
[](ps_decoder_t* recognizer) { ps_free(recognizer); });
|
[](ps_decoder_t* recognizer) { ps_free(recognizer); });
|
||||||
if (!recognizer) throw runtime_error("Error creating speech recognizer.");
|
if (!recognizer) throw runtime_error("Error creating speech recognizer.");
|
||||||
|
|
||||||
shared_ptr<FILE> file(
|
unique_ptr<AudioStream> audioStream =
|
||||||
fopen("X:/dev/projects/LipSync/lib/pocketsphinx/test/data/goforward.raw", "rb"),
|
create16kHzMonoStream(R"(C:\Users\Daniel\Desktop\audio-test\test 16000Hz 1ch 16bit.wav)");
|
||||||
[](FILE* file) { fclose(file); });
|
|
||||||
if (!file) throw runtime_error("Error opening sound file.");
|
|
||||||
|
|
||||||
int error = ps_start_utt(recognizer.get());
|
int error = ps_start_utt(recognizer.get());
|
||||||
if (error) throw runtime_error("Error starting utterance processing.");
|
if (error) throw runtime_error("Error starting utterance processing.");
|
||||||
|
|
||||||
int16 buffer[512];
|
auto start = std::chrono::steady_clock::now();
|
||||||
while (!feof(file.get())) {
|
|
||||||
size_t sampleCount = fread(buffer, 2, 512, file.get());
|
std::vector<int16_t> buffer;
|
||||||
int searchedFrameCount = ps_process_raw(recognizer.get(), buffer, sampleCount, false, false);
|
const int capacity = 1600;
|
||||||
|
buffer.reserve(capacity); // 0.1 second capacity
|
||||||
|
int sampleCount = 0;
|
||||||
|
do {
|
||||||
|
// Read to buffer
|
||||||
|
buffer.clear();
|
||||||
|
while (buffer.size() < capacity) {
|
||||||
|
float sample;
|
||||||
|
if (!audioStream->getNextSample(sample)) break;
|
||||||
|
buffer.push_back(floatSampleToInt16(sample));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Analyze buffer
|
||||||
|
int searchedFrameCount = ps_process_raw(recognizer.get(), buffer.data(), buffer.size(), false, false);
|
||||||
if (searchedFrameCount < 0) throw runtime_error("Error decoding raw audio data.");
|
if (searchedFrameCount < 0) throw runtime_error("Error decoding raw audio data.");
|
||||||
}
|
|
||||||
|
sampleCount += buffer.size();
|
||||||
|
|
||||||
|
std::cout << sampleCount / 16000.0 << "s\n";
|
||||||
|
} while (buffer.size());
|
||||||
error = ps_end_utt(recognizer.get());
|
error = ps_end_utt(recognizer.get());
|
||||||
if (error) throw runtime_error("Error ending utterance processing.");
|
if (error) throw runtime_error("Error ending utterance processing.");
|
||||||
|
|
||||||
|
auto end = std::chrono::steady_clock::now();
|
||||||
|
std::cout << std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count() << "\n";
|
||||||
|
|
||||||
ps_seg_t *segmentationIter;
|
ps_seg_t *segmentationIter;
|
||||||
int32 score;
|
int32 score;
|
||||||
for (segmentationIter = ps_seg_iter(recognizer.get(), &score); segmentationIter; segmentationIter = ps_seg_next(segmentationIter)) {
|
for (segmentationIter = ps_seg_iter(recognizer.get(), &score); segmentationIter; segmentationIter = ps_seg_next(segmentationIter)) {
|
||||||
|
|
Loading…
Reference in New Issue