rhubarb-lip-sync/lib/sphinxbase-rev13216/test/unit/test_ngram/test_lm_set.c

199 lines
7.5 KiB
C

#include <ngram_model.h>
#include <logmath.h>
#include <strfuncs.h>
#include "test_macros.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
int
main(int argc, char *argv[])
{
logmath_t *lmath;
ngram_model_t *lms[3];
ngram_model_t *lmset;
const char *names[] = { "100", "102" };
const char *words[] = {
"<UNK>",
"ROBOMAN",
"libio",
"sphinxtrain",
"bigbird",
"quuxfuzz"
};
const int32 n_words = sizeof(words) / sizeof(words[0]);
float32 weights[] = { 0.6, 0.4 };
lmath = logmath_init(1.0001, 0, 0);
lms[0] = ngram_model_read(NULL, LMDIR "/100.lm.dmp", NGRAM_BIN, lmath);
lms[1] = ngram_model_read(NULL, LMDIR "/102.lm.dmp", NGRAM_BIN, lmath);
lmset = ngram_model_set_init(NULL, lms, (char **)names, NULL, 2);
TEST_ASSERT(lmset);
TEST_EQUAL(ngram_model_set_select(lmset, "102"), lms[1]);
TEST_EQUAL(ngram_model_set_select(lmset, "100"), lms[0]);
TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
logmath_log10_to_log(lmath, -2.7884));
TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
logmath_log10_to_log(lmath, -0.0361));
TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
logmath_log10_to_log(lmath, -0.4105));
TEST_EQUAL(ngram_model_set_select(lmset, "102"), lms[1]);
TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
logmath_log10_to_log(lmath, -2.8192));
TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
logmath_log10_to_log(lmath, -0.1597));
TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
logmath_log10_to_log(lmath, -0.0512));
/* Test interpolation with default weights. */
TEST_ASSERT(ngram_model_set_interp(lmset, NULL, NULL));
TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
logmath_log(lmath,
0.5 * pow(10, -2.7884)
+ 0.5 * pow(10, -2.8192)));
/* Test interpolation with set weights. */
TEST_ASSERT(ngram_model_set_interp(lmset, names, weights));
TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
logmath_log(lmath,
0.6 * pow(10, -2.7884)
+ 0.4 * pow(10, -2.8192)));
/* Test switching back to selected mode. */
TEST_EQUAL(ngram_model_set_select(lmset, "102"), lms[1]);
TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
logmath_log10_to_log(lmath, -2.8192));
TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
logmath_log10_to_log(lmath, -0.1597));
TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
logmath_log10_to_log(lmath, -0.0512));
/* Test interpolation with previously set weights. */
TEST_ASSERT(ngram_model_set_interp(lmset, NULL, NULL));
TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
logmath_log(lmath,
0.6 * pow(10, -2.7884)
+ 0.4 * pow(10, -2.8192)));
/* Test interpolation with closed-vocabulary models and OOVs. */
lms[2] = ngram_model_read(NULL, LMDIR "/turtle.lm", NGRAM_ARPA, lmath);
TEST_ASSERT(ngram_model_set_add(lmset, lms[2], "turtle", 1.0, FALSE));
TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
logmath_log(lmath,
0.6 * (2.0 / 3.0) * pow(10, -2.7884)
+ 0.4 * (2.0 / 3.0) * pow(10, -2.8192)));
ngram_model_free(lmset);
/* Test adding and removing language models with preserved
* word ID mappings. */
lms[0] = ngram_model_read(NULL, LMDIR "/100.lm.dmp", NGRAM_BIN, lmath);
lms[1] = ngram_model_read(NULL, LMDIR "/102.lm.dmp", NGRAM_BIN, lmath);
lms[2] = ngram_model_read(NULL, LMDIR "/turtle.lm", NGRAM_ARPA, lmath);
lmset = ngram_model_set_init(NULL, lms, (char **)names, NULL, 1);
{
int32 wid;
wid = ngram_wid(lmset, "sphinxtrain");
TEST_ASSERT(ngram_model_set_add(lmset, lms[1], "102", 1.0, TRUE));
/* Verify that it is the same. */
TEST_EQUAL(wid, ngram_wid(lmset, "sphinxtrain"));
/* Now add another model and verify that its words
* don't actually get added. */
TEST_ASSERT(ngram_model_set_add(lmset, lms[2], "turtle", 1.0, TRUE));
TEST_EQUAL(wid, ngram_wid(lmset, "sphinxtrain"));
TEST_EQUAL(ngram_unknown_wid(lmset), ngram_wid(lmset, "FORWARD"));
/* Remove language model, make sure this doesn't break horribly. */
TEST_EQUAL(lms[1], ngram_model_set_remove(lmset, "102", TRUE));
ngram_model_free(lms[1]);
TEST_EQUAL(wid, ngram_wid(lmset, "sphinxtrain"));
/* Now enable remapping of word IDs and verify that it works. */
TEST_EQUAL(lms[2], ngram_model_set_remove(lmset, "turtle", TRUE));
TEST_ASSERT(ngram_model_set_add(lmset, lms[2], "turtle", 1.0, FALSE));
printf("FORWARD = %d\n", ngram_wid(lmset, "FORWARD"));
}
ngram_model_free(lmset);
/* Now test lmctl files. */
lmset = ngram_model_set_read(NULL, LMDIR "/100.lmctl", lmath);
TEST_ASSERT(lmset);
/* Test iterators. */
{
ngram_model_set_iter_t *itor;
ngram_model_t *lm;
char const *lmname;
itor = ngram_model_set_iter(lmset);
TEST_ASSERT(itor);
lm = ngram_model_set_iter_model(itor, &lmname);
printf("1: %s\n", lmname);
itor = ngram_model_set_iter_next(itor);
lm = ngram_model_set_iter_model(itor, &lmname);
printf("2: %s\n", lmname);
itor = ngram_model_set_iter_next(itor);
lm = ngram_model_set_iter_model(itor, &lmname);
printf("3: %s\n", lmname);
itor = ngram_model_set_iter_next(itor);
TEST_EQUAL(itor, NULL);
}
TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
logmath_log10_to_log(lmath, -2.7884));
TEST_ASSERT(ngram_model_set_interp(lmset, NULL, NULL));
TEST_EQUAL_LOG(ngram_score(lmset, "sphinxtrain", NULL),
logmath_log(lmath,
(1.0 / 3.0) * pow(10, -2.7884)
+ (1.0 / 3.0) * pow(10, -2.8192)));
ngram_model_set_select(lmset, "102");
TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
logmath_log10_to_log(lmath, -2.8192));
TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
logmath_log10_to_log(lmath, -0.1597));
TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
logmath_log10_to_log(lmath, -0.0512));
ngram_model_set_select(lmset, "100");
TEST_EQUAL(ngram_score(lmset, "sphinxtrain", NULL),
logmath_log10_to_log(lmath, -2.7884));
TEST_EQUAL(ngram_score(lmset, "huggins", "david", NULL),
logmath_log10_to_log(lmath, -0.0361));
TEST_EQUAL_LOG(ngram_score(lmset, "daines", "huggins", "david", NULL),
logmath_log10_to_log(lmath, -0.4105));
/* Test class probabilities. */
ngram_model_set_select(lmset, "100");
TEST_EQUAL_LOG(ngram_score(lmset, "scylla:scylla", NULL),
logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.4));
TEST_EQUAL_LOG(ngram_score(lmset, "scooby:scylla", NULL),
logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.1));
TEST_EQUAL_LOG(ngram_score(lmset, "apparently", "karybdis:scylla", NULL),
logmath_log10_to_log(lmath, -0.5172));
/* Test word ID mapping. */
ngram_model_set_select(lmset, "turtle");
TEST_EQUAL(ngram_wid(lmset, "ROBOMAN"),
ngram_wid(lmset, ngram_word(lmset, ngram_wid(lmset, "ROBOMAN"))));
TEST_EQUAL(ngram_wid(lmset, "bigbird"),
ngram_wid(lmset, ngram_word(lmset, ngram_wid(lmset, "bigbird"))));
TEST_EQUAL(ngram_wid(lmset, "quuxfuzz"), ngram_unknown_wid(lmset));
TEST_EQUAL(ngram_score(lmset, "quuxfuzz", NULL), ngram_zero(lmset));
ngram_model_set_map_words(lmset, words, n_words);
TEST_EQUAL(ngram_wid(lmset, "ROBOMAN"),
ngram_wid(lmset, ngram_word(lmset, ngram_wid(lmset, "ROBOMAN"))));
TEST_EQUAL(ngram_wid(lmset, "bigbird"),
ngram_wid(lmset, ngram_word(lmset, ngram_wid(lmset, "bigbird"))));
TEST_EQUAL(ngram_wid(lmset, "quuxfuzz"), 5);
TEST_EQUAL(ngram_score(lmset, "quuxfuzz", NULL), ngram_zero(lmset));
ngram_model_free(lmset);
logmath_free(lmath);
return 0;
}