291 lines
11 KiB
C
291 lines
11 KiB
C
/*************************************************************************/
|
|
/* */
|
|
/* Language Technologies Institute */
|
|
/* Carnegie Mellon University */
|
|
/* Copyright (c) 2001 */
|
|
/* All Rights Reserved. */
|
|
/* */
|
|
/* Permission is hereby granted, free of charge, to use and distribute */
|
|
/* this software and its documentation without restriction, including */
|
|
/* without limitation the rights to use, copy, modify, merge, publish, */
|
|
/* distribute, sublicense, and/or sell copies of this work, and to */
|
|
/* permit persons to whom this work is furnished to do so, subject to */
|
|
/* the following conditions: */
|
|
/* 1. The code must retain the above copyright notice, this list of */
|
|
/* conditions and the following disclaimer. */
|
|
/* 2. Any modifications must be clearly marked as such. */
|
|
/* 3. Original authors' names are not deleted. */
|
|
/* 4. The authors' names are not used to endorse or promote products */
|
|
/* derived from this software without specific prior written */
|
|
/* permission. */
|
|
/* */
|
|
/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
|
|
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
|
|
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
|
|
/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
|
|
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
|
|
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
|
|
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
|
|
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
|
|
/* THIS SOFTWARE. */
|
|
/* */
|
|
/*************************************************************************/
|
|
/* Author: Alan W Black (awb@cs.cmu.edu) */
|
|
/* Date: January 2001 */
|
|
/*************************************************************************/
|
|
/* Poor mans part of speech tagger */
|
|
/*************************************************************************/
|
|
|
|
#include "cst_val.h"
|
|
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_in,"in");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_of,"of");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_for,"for");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_on,"on");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_that,"that");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_with,"with");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_by,"by");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_at,"at");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_from,"from");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_as,"as");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_if,"if");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_against,"against");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_about,"about");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_before,"before");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_because,"because");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_under,"under");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_after,"after");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_over,"over");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_into,"into");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_while,"while");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_without,"without");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_through,"through");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_new,"new"); /* ??? */
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_between,"between");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_among,"among");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_until,"until");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_per,"per");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_up,"up");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_down,"down");
|
|
|
|
static const cst_val * const gpos_in_list[] = {
|
|
(cst_val *)&gpos_in,
|
|
(cst_val *)&gpos_of,
|
|
(cst_val *)&gpos_for,
|
|
(cst_val *)&gpos_in,
|
|
(cst_val *)&gpos_on,
|
|
(cst_val *)&gpos_that,
|
|
(cst_val *)&gpos_with,
|
|
(cst_val *)&gpos_by,
|
|
(cst_val *)&gpos_at,
|
|
(cst_val *)&gpos_from,
|
|
(cst_val *)&gpos_as,
|
|
(cst_val *)&gpos_if,
|
|
(cst_val *)&gpos_that,
|
|
(cst_val *)&gpos_against,
|
|
(cst_val *)&gpos_about,
|
|
(cst_val *)&gpos_before,
|
|
(cst_val *)&gpos_because,
|
|
(cst_val *)&gpos_under,
|
|
(cst_val *)&gpos_after,
|
|
(cst_val *)&gpos_over,
|
|
(cst_val *)&gpos_into,
|
|
(cst_val *)&gpos_while,
|
|
(cst_val *)&gpos_without,
|
|
(cst_val *)&gpos_through,
|
|
(cst_val *)&gpos_new,
|
|
(cst_val *)&gpos_between,
|
|
(cst_val *)&gpos_among,
|
|
(cst_val *)&gpos_until,
|
|
(cst_val *)&gpos_per,
|
|
(cst_val *)&gpos_up,
|
|
(cst_val *)&gpos_down,
|
|
0 };
|
|
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_to,"to");
|
|
|
|
static const cst_val * const gpos_to_list[] = {
|
|
(cst_val *)&gpos_to,
|
|
(cst_val *)&gpos_to,
|
|
0 };
|
|
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_det,"det");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_the,"the");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_a,"a");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_an,"an");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_some,"some");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_this,"this");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_each,"each");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_another,"another");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_those,"those");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_every,"every");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_all,"all");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_any,"any");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_these,"these");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_both,"both");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_neither,"neither");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_no,"no");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_many,"many");
|
|
|
|
static const cst_val * const gpos_det_list[] = {
|
|
(cst_val *)&gpos_det,
|
|
(cst_val *)&gpos_the,
|
|
(cst_val *)&gpos_a,
|
|
(cst_val *)&gpos_an,
|
|
(cst_val *)&gpos_no,
|
|
(cst_val *)&gpos_some,
|
|
(cst_val *)&gpos_this,
|
|
(cst_val *)&gpos_each,
|
|
(cst_val *)&gpos_another,
|
|
(cst_val *)&gpos_those,
|
|
(cst_val *)&gpos_every,
|
|
(cst_val *)&gpos_all,
|
|
(cst_val *)&gpos_any,
|
|
(cst_val *)&gpos_these,
|
|
(cst_val *)&gpos_both,
|
|
(cst_val *)&gpos_neither,
|
|
(cst_val *)&gpos_no,
|
|
(cst_val *)&gpos_many,
|
|
0 };
|
|
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_md,"md");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_will,"will");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_may,"may");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_would,"would");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_can,"can");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_could,"could");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_should,"should");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_must,"must");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_ought,"ought");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_might,"might");
|
|
|
|
static const cst_val * const gpos_md_list[] = {
|
|
(cst_val *)&gpos_md,
|
|
(cst_val *)&gpos_will,
|
|
(cst_val *)&gpos_may,
|
|
(cst_val *)&gpos_would,
|
|
(cst_val *)&gpos_can,
|
|
(cst_val *)&gpos_could,
|
|
(cst_val *)&gpos_should,
|
|
(cst_val *)&gpos_must,
|
|
(cst_val *)&gpos_ought,
|
|
(cst_val *)&gpos_might,
|
|
0 };
|
|
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_cc,"cc");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_and,"and");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_but,"but");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_or,"or");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_plus,"plus");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_yet,"yet");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_nor,"nor");
|
|
|
|
static const cst_val * const gpos_cc_list[] = {
|
|
(cst_val *)&gpos_cc,
|
|
(cst_val *)&gpos_and,
|
|
(cst_val *)&gpos_but,
|
|
(cst_val *)&gpos_or,
|
|
(cst_val *)&gpos_plus,
|
|
(cst_val *)&gpos_yet,
|
|
(cst_val *)&gpos_nor,
|
|
0 };
|
|
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_wp,"wp");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_who,"who");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_what,"what");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_where,"where");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_how,"how");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_when,"when");
|
|
|
|
static const cst_val * const gpos_wp_list[] = {
|
|
(cst_val *)&gpos_wp,
|
|
(cst_val *)&gpos_who,
|
|
(cst_val *)&gpos_what,
|
|
(cst_val *)&gpos_where,
|
|
(cst_val *)&gpos_how,
|
|
(cst_val *)&gpos_when,
|
|
0 };
|
|
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_pps,"pps");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_her,"her");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_his,"his");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_their,"their");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_its,"its");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_our,"our");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_mine,"mine");
|
|
|
|
static const cst_val * const gpos_pps_list[] = {
|
|
(cst_val *)&gpos_pps,
|
|
(cst_val *)&gpos_her,
|
|
(cst_val *)&gpos_his,
|
|
(cst_val *)&gpos_their,
|
|
(cst_val *)&gpos_its,
|
|
(cst_val *)&gpos_our,
|
|
(cst_val *)&gpos_mine,
|
|
0 };
|
|
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_aux,"aux");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_is,"is");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_am,"am");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_are,"are");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_was,"was");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_were,"were");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_has,"has");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_have,"have");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_had,"had");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_be,"be");
|
|
|
|
static const cst_val * const gpos_aux_list[] = {
|
|
(cst_val *)&gpos_aux,
|
|
(cst_val *)&gpos_is,
|
|
(cst_val *)&gpos_am,
|
|
(cst_val *)&gpos_are,
|
|
(cst_val *)&gpos_was,
|
|
(cst_val *)&gpos_were,
|
|
(cst_val *)&gpos_has,
|
|
(cst_val *)&gpos_have,
|
|
(cst_val *)&gpos_had,
|
|
(cst_val *)&gpos_be,
|
|
0 };
|
|
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_punc,"punc");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_dot,".");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_comma,",");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_colon,":");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_semicolon,";");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_dquote,"\"");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_squote,"'");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_leftparen,"(");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_qmark,"?");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_rightparen,")");
|
|
DEF_STATIC_CONST_VAL_STRING(gpos_emark,"!");
|
|
|
|
static const cst_val * const gpos_punc_list[] = {
|
|
(cst_val *)&gpos_punc,
|
|
(cst_val *)&gpos_dot,
|
|
(cst_val *)&gpos_comma,
|
|
(cst_val *)&gpos_colon,
|
|
(cst_val *)&gpos_semicolon,
|
|
(cst_val *)&gpos_dquote,
|
|
(cst_val *)&gpos_squote,
|
|
(cst_val *)&gpos_leftparen,
|
|
(cst_val *)&gpos_qmark,
|
|
(cst_val *)&gpos_rightparen,
|
|
(cst_val *)&gpos_emark,
|
|
0 };
|
|
|
|
const cst_val * const * const us_gpos[] = {
|
|
gpos_in_list,
|
|
gpos_to_list,
|
|
gpos_det_list,
|
|
gpos_md_list,
|
|
gpos_cc_list,
|
|
gpos_wp_list,
|
|
gpos_pps_list,
|
|
gpos_aux_list,
|
|
gpos_punc_list,
|
|
0 };
|
|
|
|
|
|
|