358 lines
9.7 KiB
C
358 lines
9.7 KiB
C
/*************************************************************************/
|
|
/* */
|
|
/* Language Technologies Institute */
|
|
/* Carnegie Mellon University */
|
|
/* Copyright (c) 2000 */
|
|
/* All Rights Reserved. */
|
|
/* */
|
|
/* Permission is hereby granted, free of charge, to use and distribute */
|
|
/* this software and its documentation without restriction, including */
|
|
/* without limitation the rights to use, copy, modify, merge, publish, */
|
|
/* distribute, sublicense, and/or sell copies of this work, and to */
|
|
/* permit persons to whom this work is furnished to do so, subject to */
|
|
/* the following conditions: */
|
|
/* 1. The code must retain the above copyright notice, this list of */
|
|
/* conditions and the following disclaimer. */
|
|
/* 2. Any modifications must be clearly marked as such. */
|
|
/* 3. Original authors' names are not deleted. */
|
|
/* 4. The authors' names are not used to endorse or promote products */
|
|
/* derived from this software without specific prior written */
|
|
/* permission. */
|
|
/* */
|
|
/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
|
|
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
|
|
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
|
|
/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
|
|
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
|
|
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
|
|
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
|
|
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
|
|
/* THIS SOFTWARE. */
|
|
/* */
|
|
/*************************************************************************/
|
|
/* Author: Alan W Black (awb@cs.cmu.edu) */
|
|
/* Date: August 2000 */
|
|
/*************************************************************************/
|
|
/* */
|
|
/* A Viterbi decoder */
|
|
/* */
|
|
/*************************************************************************/
|
|
|
|
#include <limits.h>
|
|
#include "cst_viterbi.h"
|
|
|
|
static void vit_point_init_path_array(cst_vit_point *n,int num_states);
|
|
static void vit_point_init_dynamic_path_array(cst_vit_point *n,
|
|
cst_vit_cand *c);
|
|
static void vit_add_paths(cst_viterbi *vd,
|
|
cst_vit_point *point,
|
|
cst_vit_path *path);
|
|
static void vit_add_path(cst_viterbi *vd,cst_vit_point *p, cst_vit_path *np);
|
|
static cst_vit_path *find_best_path(cst_viterbi *vd);
|
|
static int betterthan(cst_viterbi *v,int a, int b);
|
|
|
|
cst_vit_cand *new_vit_cand()
|
|
{
|
|
cst_vit_cand *c = cst_alloc(struct cst_vit_cand_struct,1);
|
|
|
|
return c;
|
|
}
|
|
|
|
void vit_cand_set(cst_vit_cand *vc, cst_val *val)
|
|
{
|
|
if (vc->val)
|
|
delete_val(vc->val);
|
|
vc->val = val;
|
|
val_inc_refcount(vc->val);
|
|
}
|
|
|
|
void vit_cand_set_int(cst_vit_cand *vc, int ival)
|
|
{
|
|
vc->ival = ival;
|
|
vit_cand_set(vc, int_val(ival));
|
|
}
|
|
|
|
void delete_vit_cand(cst_vit_cand *vc)
|
|
{
|
|
if (vc)
|
|
{
|
|
delete_val(vc->val);
|
|
delete_vit_cand(vc->next);
|
|
cst_free(vc);
|
|
}
|
|
}
|
|
|
|
cst_vit_path *new_vit_path()
|
|
{
|
|
cst_vit_path *p = cst_alloc(struct cst_vit_path_struct,1);
|
|
|
|
return p;
|
|
}
|
|
|
|
void delete_vit_path(cst_vit_path *vp)
|
|
{
|
|
if (vp)
|
|
{
|
|
if (vp->f)
|
|
delete_features(vp->f);
|
|
delete_vit_path(vp->next);
|
|
cst_free(vp);
|
|
}
|
|
}
|
|
|
|
cst_vit_point *new_vit_point()
|
|
{
|
|
cst_vit_point *p = cst_alloc(struct cst_vit_point_struct,1);
|
|
|
|
return p;
|
|
}
|
|
|
|
void delete_vit_point(cst_vit_point *vp)
|
|
{
|
|
int i;
|
|
|
|
if (vp)
|
|
{
|
|
if (vp->paths)
|
|
delete_vit_path(vp->paths);
|
|
if (vp->num_states != 0)
|
|
{
|
|
for (i=0; i<vp->num_states; i++)
|
|
if (vp->state_paths[i])
|
|
delete_vit_path(vp->state_paths[i]);
|
|
cst_free(vp->state_paths);
|
|
}
|
|
delete_vit_cand(vp->cands);
|
|
delete_vit_point(vp->next);
|
|
cst_free(vp);
|
|
}
|
|
}
|
|
|
|
cst_viterbi *new_viterbi(cst_vit_cand_f_t *cand_func,
|
|
cst_vit_path_f_t *path_func)
|
|
{
|
|
cst_viterbi *v = cst_alloc(struct cst_viterbi_struct,1);
|
|
|
|
v->cand_func = cand_func;
|
|
v->path_func = path_func;
|
|
v->f = new_features();
|
|
return v;
|
|
}
|
|
|
|
void delete_viterbi(cst_viterbi *vd)
|
|
{
|
|
if (vd)
|
|
{
|
|
delete_vit_point(vd->timeline);
|
|
delete_features(vd->f);
|
|
cst_free(vd);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
void viterbi_initialise(cst_viterbi *vd,cst_relation *r)
|
|
{
|
|
cst_item *i;
|
|
cst_vit_point *last = 0;
|
|
cst_vit_point *n = 0;
|
|
|
|
/* Construct the timeline with points for each item in relation */
|
|
/* initiallising the state tables at each point */
|
|
for (i=relation_head(r); TRUE ; i=item_next(i))
|
|
{
|
|
n = new_vit_point();
|
|
n->item = i;
|
|
if (vd->num_states > 0) /* a state based viterbi search */
|
|
vit_point_init_path_array(n,vd->num_states);
|
|
if (last)
|
|
last->next = n;
|
|
else
|
|
vd->timeline = n;
|
|
last = n;
|
|
/* we need an extra one (even if there are no items) */
|
|
/* so we go one past end of the relation */
|
|
if (i == 0)
|
|
{
|
|
vd->last_point = n;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (vd->num_states == 0) /* its a general beam search */
|
|
vd->timeline->paths = new_vit_path();
|
|
if (vd->num_states == -1) /* Dynamic number of states (# cands) */
|
|
vit_point_init_path_array(vd->timeline,1);
|
|
|
|
}
|
|
|
|
static void vit_point_init_path_array(cst_vit_point *n,int num_states)
|
|
{
|
|
n->num_states = num_states;
|
|
n->state_paths = cst_alloc(cst_vit_path*,num_states);
|
|
}
|
|
|
|
static void vit_point_init_dynamic_path_array(cst_vit_point *n,
|
|
cst_vit_cand *c)
|
|
{
|
|
int i;
|
|
cst_vit_cand *cc;
|
|
|
|
for (cc=c,i=0; cc; i++,cc=cc->next)
|
|
cc->pos = i;
|
|
|
|
vit_point_init_path_array(n,i);
|
|
}
|
|
|
|
void viterbi_decode(cst_viterbi *vd)
|
|
{
|
|
cst_vit_point *p;
|
|
cst_vit_path *np;
|
|
cst_vit_cand *c;
|
|
int i;
|
|
|
|
/* For each time point */
|
|
for (p=vd->timeline; p->next != NULL; p=p->next)
|
|
{
|
|
/* Find the candidates */
|
|
p->cands = (*vd->cand_func)(p->item,vd);
|
|
if (vd->num_states != 0) /* true viterbi */
|
|
{
|
|
if (vd->num_states == -1) /* dynamic number of states (# cands) */
|
|
vit_point_init_dynamic_path_array(p->next,p->cands);
|
|
for (i=0; i<p->num_states; i++)
|
|
{
|
|
|
|
if (((p == vd->timeline) && i==0) ||
|
|
(p->state_paths[i] != 0))
|
|
for (c=p->cands; c; c=c->next)
|
|
{
|
|
np = (*vd->path_func)(p->state_paths[i],c,vd);
|
|
vit_add_paths(vd,p->next,np);
|
|
}
|
|
}
|
|
|
|
}
|
|
else /* general beam search */
|
|
{
|
|
cst_errmsg("viterbi, general beam search not implemented\n");
|
|
/*
|
|
for (t=p->paths; t; t=t->next)
|
|
{
|
|
for (c=p->cands; c; c=c->next)
|
|
{
|
|
np = (vd->path_func)(t,c,vd);
|
|
add_path(p->next,np);
|
|
}
|
|
}
|
|
*/
|
|
}
|
|
}
|
|
}
|
|
|
|
static void vit_add_paths(cst_viterbi *vd,
|
|
cst_vit_point *point,
|
|
cst_vit_path *path)
|
|
{
|
|
/* Add a list of paths at point */
|
|
cst_vit_path *p, *next_p;
|
|
|
|
for (p=path; p; p=next_p)
|
|
{
|
|
next_p = p->next; /* as p could be deleted is not required */
|
|
vit_add_path(vd,point,p);
|
|
}
|
|
}
|
|
|
|
static void vit_add_path(cst_viterbi *vd,cst_vit_point *p, cst_vit_path *np)
|
|
{
|
|
if (p->state_paths[np->state] == 0)
|
|
{ /* we don't have one yet so this is best */
|
|
p->state_paths[np->state] = np;
|
|
}
|
|
else if (betterthan(vd,np->score,p->state_paths[np->state]->score))
|
|
{ /* its better than what we have already */
|
|
delete_vit_path(p->state_paths[np->state]);
|
|
p->state_paths[np->state] = np;
|
|
}
|
|
else
|
|
delete_vit_path(np);
|
|
}
|
|
|
|
int viterbi_result(cst_viterbi *vd,const char *n)
|
|
{
|
|
/* Find best path through the decoder, adding field to item named n */
|
|
/* with choising value */
|
|
cst_vit_path *p;
|
|
|
|
if ((vd->timeline == 0) || (vd->timeline->next == 0))
|
|
return TRUE; /* it has succeeded in the null case */
|
|
p = find_best_path(vd);
|
|
if (p == NULL) /* bummer */
|
|
return FALSE;
|
|
|
|
for (; p; p=p->from)
|
|
if (p->cand)
|
|
{
|
|
item_set(p->cand->item,n,p->cand->val);
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
void viterbi_copy_feature(cst_viterbi *vd,const char *featname)
|
|
{
|
|
/* copy a feature from the best path to related item */
|
|
cst_vit_path *p;
|
|
|
|
p = find_best_path(vd);
|
|
if (p == NULL) /* nothing to copy, emtpy stream or no solution */
|
|
return;
|
|
|
|
for (; p; p=p->from)
|
|
if (p->cand && feat_present(p->f, featname))
|
|
item_set(p->cand->item,featname,feat_val(p->f,featname));
|
|
return;
|
|
}
|
|
|
|
static cst_vit_path *find_best_path(cst_viterbi *vd)
|
|
{
|
|
cst_vit_point *t;
|
|
int best,worst;
|
|
cst_vit_path *best_p=NULL;
|
|
int i;
|
|
|
|
if (vd->big_is_good)
|
|
worst = -INT_MAX;
|
|
else
|
|
worst = INT_MAX;
|
|
best = worst; /* though we should be able to find something better */
|
|
|
|
t = vd->last_point;
|
|
|
|
if (vd->num_states != 0)
|
|
{
|
|
for (i=0; i<t->num_states; i++)
|
|
{
|
|
if ((t->state_paths[i] != NULL) &&
|
|
(betterthan(vd,t->state_paths[i]->score, best)))
|
|
{
|
|
best = t->state_paths[i]->score;
|
|
best_p = t->state_paths[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
return best_p;
|
|
}
|
|
|
|
static int betterthan(cst_viterbi *v,int a, int b)
|
|
{
|
|
/* better may be most big or most small */
|
|
/* for probabalities big is good */
|
|
|
|
if (v->big_is_good)
|
|
return (a > b);
|
|
else
|
|
return (a < b);
|
|
}
|