114 lines
4.5 KiB
Bash
114 lines
4.5 KiB
Bash
#!/bin/sh
|
|
###########################################################################
|
|
## ##
|
|
## Language Technologies Institute ##
|
|
## Carnegie Mellon University ##
|
|
## Copyright (c) 2001-2008 ##
|
|
## All Rights Reserved. ##
|
|
## ##
|
|
## Permission is hereby granted, free of charge, to use and distribute ##
|
|
## this software and its documentation without restriction, including ##
|
|
## without limitation the rights to use, copy, modify, merge, publish, ##
|
|
## distribute, sublicense, and/or sell copies of this work, and to ##
|
|
## permit persons to whom this work is furnished to do so, subject to ##
|
|
## the following conditions: ##
|
|
## 1. The code must retain the above copyright notice, this list of ##
|
|
## conditions and the following disclaimer. ##
|
|
## 2. Any modifications must be clearly marked as such. ##
|
|
## 3. Original authors' names are not deleted. ##
|
|
## 4. The authors' names are not used to endorse or promote products ##
|
|
## derived from this software without specific prior written ##
|
|
## permission. ##
|
|
## ##
|
|
## CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK ##
|
|
## DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ##
|
|
## ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ##
|
|
## SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE ##
|
|
## FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ##
|
|
## WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ##
|
|
## AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ##
|
|
## ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ##
|
|
## THIS SOFTWARE. ##
|
|
## ##
|
|
###########################################################################
|
|
## ##
|
|
## Convert an EST WFST into C code (with awk and sed!) ##
|
|
## ##
|
|
###########################################################################
|
|
|
|
|
|
sed '1,/EST_Header_End/d' $1 |
|
|
awk 'BEGIN {
|
|
mmm["a"] = 97;
|
|
mmm["b"] = 98;
|
|
mmm["c"] = 99;
|
|
mmm["d"] = 100;
|
|
mmm["e"] = 101;
|
|
mmm["f"] = 102;
|
|
mmm["g"] = 103;
|
|
mmm["h"] = 104;
|
|
mmm["i"] = 105;
|
|
mmm["j"] = 106;
|
|
mmm["k"] = 107;
|
|
mmm["l"] = 108;
|
|
mmm["m"] = 109;
|
|
mmm["n"] = 110;
|
|
mmm["o"] = 111;
|
|
mmm["p"] = 112;
|
|
mmm["q"] = 113;
|
|
mmm["r"] = 114;
|
|
mmm["s"] = 115;
|
|
mmm["t"] = 116;
|
|
mmm["u"] = 117;
|
|
mmm["v"] = 118;
|
|
mmm["w"] = 119;
|
|
mmm["x"] = 120;
|
|
mmm["y"] = 121;
|
|
mmm["z"] = 122;
|
|
mmm["#"] = 35;
|
|
mmm["V"] = 86;
|
|
mmm["N"] = 78;
|
|
state = "0";
|
|
name="'`basename $1 .wfst`'";
|
|
}
|
|
function symstate(sym,state)
|
|
{
|
|
m = sprintf("((fsm_%s_state_%d * 128) + %d)",name,state,mmm[sym]);
|
|
return m;
|
|
}
|
|
{if (NF == 3)
|
|
{
|
|
state = substr($1,3);
|
|
if (state != "0")
|
|
{
|
|
printf("#define fsm_%s_trans_%d 0\n",name,trans);
|
|
trans++;
|
|
}
|
|
printf("#define fsm_%s_state_%d %d\n",name,state,trans);
|
|
}
|
|
else if (NF == 4)
|
|
{
|
|
printf("#define fsm_%s_trans_%d %s\n",name,trans,symstate($2,$3));
|
|
trans++;
|
|
}
|
|
}
|
|
END {printf("#define fsm_%s_trans_%d 0\n",name,trans);
|
|
trans++;
|
|
printf("static short fsm_%s_trans[%d] = {\n",name,trans);
|
|
for (i=0; i<trans-1; i++)
|
|
{
|
|
printf(" fsm_%s_trans_%d,\n",name,i);
|
|
}
|
|
printf(" fsm_%s_trans_%d\n",name,i);
|
|
printf("};\n");
|
|
printf("static cst_fsm fsm_%s = {\n",name);
|
|
printf(" 128, /* vocab size */\n");
|
|
printf(" %d, /* num_transitions */\n",trans);
|
|
printf(" fsm_%s_trans\n",name);
|
|
printf("};\n");
|
|
}'
|
|
|
|
|
|
|
|
|