rhubarb-lip-sync/lib/sphinxbase-rev13216/test/unit/test_ngram/test_lm_class.c

124 lines
4.3 KiB
C
Raw Normal View History

2015-10-19 19:45:08 +00:00
#include <ngram_model.h>
#include <logmath.h>
#include <strfuncs.h>
#include "test_macros.h"
#include <stdio.h>
#include <string.h>
#include <math.h>
void
run_tests(logmath_t *lmath, ngram_model_t *model)
{
int32 rv, i;
TEST_ASSERT(model);
TEST_EQUAL(ngram_wid(model, "scylla"), 285);
TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0);
rv = ngram_model_read_classdef(model, LMDIR "/100.probdef");
TEST_EQUAL(rv, 0);
/* Verify that class word IDs remain the same. */
TEST_EQUAL(ngram_wid(model, "scylla"), 285);
TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0);
/* Verify in-class word IDs. */
TEST_EQUAL(ngram_wid(model, "scylla:scylla"), 0x80000000 | 400);
/* Verify in-class and out-class unigram scores. */
TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", NULL),
logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.4));
TEST_EQUAL_LOG(ngram_score(model, "scooby:scylla", NULL),
logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.1));
TEST_EQUAL_LOG(ngram_score(model, "scylla", NULL),
logmath_log10_to_log(lmath, -2.7884));
TEST_EQUAL_LOG(ngram_score(model, "oh:zero", NULL),
logmath_log10_to_log(lmath, -1.9038) + logmath_log(lmath, 0.7));
TEST_EQUAL_LOG(ngram_score(model, "zero", NULL),
logmath_log10_to_log(lmath, -1.9038));
/* Verify class bigram scores. */
TEST_EQUAL_LOG(ngram_score(model, "scylla", "on", NULL),
logmath_log10_to_log(lmath, -1.2642));
TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", "on", NULL),
logmath_log10_to_log(lmath, -1.2642) + logmath_log(lmath, 0.4));
TEST_EQUAL_LOG(ngram_score(model, "apparently", "scylla", NULL),
logmath_log10_to_log(lmath, -0.5172));
TEST_EQUAL_LOG(ngram_score(model, "apparently", "karybdis:scylla", NULL),
logmath_log10_to_log(lmath, -0.5172));
TEST_EQUAL_LOG(ngram_score(model, "apparently", "scooby:scylla", NULL),
logmath_log10_to_log(lmath, -0.5172));
/* Verify class trigram scores. */
TEST_EQUAL_LOG(ngram_score(model, "zero", "be", "will", NULL),
logmath_log10_to_log(lmath, -0.5725));
TEST_EQUAL_LOG(ngram_score(model, "oh:zero", "be", "will", NULL),
logmath_log10_to_log(lmath, -0.5725) + logmath_log(lmath, 0.7));
TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero", NULL),
logmath_log10_to_log(lmath, -0.9404));
TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero:zero", NULL),
logmath_log10_to_log(lmath, -0.9404));
/* Add words to classes. */
rv = ngram_model_add_class_word(model, "scylla", "scrappy:scylla", 1.0);
TEST_ASSERT(rv >= 0);
TEST_EQUAL(ngram_wid(model, "scrappy:scylla"), 0x80000196);
TEST_EQUAL_LOG(ngram_score(model, "scrappy:scylla", NULL),
logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.2));
printf("scrappy:scylla %08x %d %f\n",
ngram_wid(model, "scrappy:scylla"),
ngram_score(model, "scrappy:scylla", NULL),
logmath_exp(lmath, ngram_score(model, "scrappy:scylla", NULL)));
/* Add a lot of words to a class. */
for (i = 0; i < 129; ++i) {
char word[32];
sprintf(word, "%d:scylla", i);
rv = ngram_model_add_class_word(model, "scylla", word, 1.0);
printf("%s %08x %d %f\n", word,
ngram_wid(model, word),
ngram_score(model, word, NULL),
logmath_exp(lmath, ngram_score(model, word, NULL)));
TEST_ASSERT(rv >= 0);
TEST_EQUAL(ngram_wid(model, word), 0x80000197 + i);
}
/* Add a new class. */
{
const char *words[] = { "blatz:foobie", "hurf:foobie" };
float32 weights[] = { 0.6, 0.4 };
int32 foobie_prob;
rv = ngram_model_add_class(model, "[foobie]", 1.0,
words, weights, 2);
TEST_ASSERT(rv >= 0);
foobie_prob = ngram_score(model, "[foobie]", NULL);
TEST_EQUAL_LOG(ngram_score(model, "blatz:foobie", NULL),
foobie_prob + logmath_log(lmath, 0.6));
TEST_EQUAL_LOG(ngram_score(model, "hurf:foobie", NULL),
foobie_prob + logmath_log(lmath, 0.4));
}
}
int
main(int argc, char *argv[])
{
logmath_t *lmath;
ngram_model_t *model;
lmath = logmath_init(1.0001, 0, 0);
model = ngram_model_read(NULL, LMDIR "/100.lm.dmp", NGRAM_BIN, lmath);
run_tests(lmath, model);
ngram_model_free(model);
model = ngram_model_read(NULL, LMDIR "/100.lm.gz", NGRAM_ARPA, lmath);
run_tests(lmath, model);
ngram_model_free(model);
logmath_free(lmath);
return 0;
}