232 lines
9.7 KiB
Scheme
232 lines
9.7 KiB
Scheme
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; ;;;
|
|
;;; Language Technologies Institute ;;;
|
|
;;; Carnegie Mellon University ;;;
|
|
;;; Copyright (c) 1999 ;;;
|
|
;;; All Rights Reserved. ;;;
|
|
;;; ;;;
|
|
;;; Permission is hereby granted, free of charge, to use and distribute ;;;
|
|
;;; this software and its documentation without restriction, including ;;;
|
|
;;; without limitation the rights to use, copy, modify, merge, publish, ;;;
|
|
;;; distribute, sublicense, and/or sell copies of this work, and to ;;;
|
|
;;; permit persons to whom this work is furnished to do so, subject to ;;;
|
|
;;; the following conditions: ;;;
|
|
;;; 1. The code must retain the above copyright notice, this list of ;;;
|
|
;;; conditions and the following disclaimer. ;;;
|
|
;;; 2. Any modifications must be clearly marked as such. ;;;
|
|
;;; 3. Original authors' names are not deleted. ;;;
|
|
;;; 4. The authors' names are not used to endorse or promote products ;;;
|
|
;;; derived from this software without specific prior written ;;;
|
|
;;; permission. ;;;
|
|
;;; ;;;
|
|
;;; CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK ;;;
|
|
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;;
|
|
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;;
|
|
;;; SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE ;;;
|
|
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;;
|
|
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;;
|
|
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;;
|
|
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;;
|
|
;;; THIS SOFTWARE. ;;;
|
|
;;; ;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; Author: Alan W Black (awb@cs.cmu.edu) ;;;
|
|
;;; Date: December 1999 ;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; ;;;
|
|
;;; Generate a C compilable lts rules. ;;;
|
|
;;; ;;;
|
|
;;; Two modes, from decision graphs as wfsts or from CART trees ;;;
|
|
;;; ;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; These are preordained by the LTS building process
|
|
(set! lts_context_window_size 4)
|
|
(set! lts_context_extra_feats 1)
|
|
|
|
(define (ltsregextoC name trees idir odir)
|
|
"(ltsregextoC name idir odir)
|
|
Converts its wfsts to a C compilation structure for flite. Assumes
|
|
$idir/[a-z].tree.wfst to compile from."
|
|
(let
|
|
((ofde (fopen (path-append odir (string-append name "_lts_rules.c")) "w"))
|
|
(ofdm (fopen (path-append odir (string-append name "_lts_model.c")) "w"))
|
|
(ofdh (fopen (path-append odir (string-append name "_lts_model.h")) "w"))
|
|
(ifd)
|
|
(rule_index nil))
|
|
(set! lts_pos 0)
|
|
(set! phone_table (list "epsilon"))
|
|
(set! letter_table (list "nothing" "#" "0"))
|
|
(set! letter_table (append letter_table (mapcar car trees)))
|
|
|
|
(format ofde "/*******************************************************/\n")
|
|
(format ofde "/** Autogenerated lts rules (regex) for %s */\n" name)
|
|
(format ofde "/** from %s */\n" idir)
|
|
(format ofde "/*******************************************************/\n")
|
|
(format ofde "\n")
|
|
(format ofde "#include \"cst_string.h\"\n")
|
|
(format ofde "#include \"cst_lts.h\"\n")
|
|
(format ofde "#include \"cst_lexicon.h\"\n")
|
|
|
|
(format ofdm "/*******************************************************/\n")
|
|
(format ofdm "/** Autogenerated lts rules (regex) for %s */\n" name)
|
|
(format ofdm "/** from %s */\n" idir)
|
|
(format ofdm "/*******************************************************/\n")
|
|
(format ofdm "\n")
|
|
(format ofdm "#include \"cst_string.h\"\n")
|
|
(format ofdm "#include \"cst_lts.h\"\n")
|
|
(format ofdm "#include \"cst_lexicon.h\"\n")
|
|
(format ofdm "#include \"%s_lts_model.h\"\n\n" name)
|
|
(format ofdm "const cst_lts_model %s_lts_model[] = \n" name)
|
|
(format ofdm "{\n")
|
|
|
|
(set! ln 0)
|
|
(mapcar
|
|
(lambda (l)
|
|
(let ((ifd (fopen (path-append idir
|
|
(string-append l ".tree.wfst")) "r")))
|
|
(format t "doing: %s\n" l)
|
|
(format ofdm " /** letter \"%s\" **/\n" l)
|
|
(format ofdh " /** letter \"%s\" **/\n" l)
|
|
(set! rule_index (cons (list l lts_pos ln) rule_index))
|
|
(set! lts_pos (dump_lts_wfst ln ifd ofdm ofdh lts_pos))
|
|
(fclose ifd)
|
|
(set! ln (+ 1 ln))
|
|
))
|
|
(cdr (cddr letter_table))
|
|
)
|
|
(format ofdm " 0, 0, 0,0, 0,0\n")
|
|
(format ofdm "};\n")
|
|
|
|
;; Make the letter table be the same order as the rule inde (+2)
|
|
|
|
;; The phone table (bytes to phone names)
|
|
(format ofde "\n")
|
|
(format ofde "const char * const %s_lts_phone_table[%d] = \n"
|
|
name (+ 1 (length phone_table)))
|
|
(format ofde "{\n")
|
|
(mapcar (lambda (p) (format ofde " \"%s\",\n" p)) phone_table)
|
|
(format ofde " NULL\n")
|
|
(format ofde "};\n")
|
|
|
|
;; The letter_table (bytes to letter names)
|
|
(format ofde "\n")
|
|
(format ofde "const char * const %s_lts_letter_table[%d] = \n"
|
|
name (+ 1 (length letter_table)))
|
|
(format ofde "{\n")
|
|
(mapcar (lambda (p) (format ofde " \"%s\",\n" p)) letter_table)
|
|
(format ofde " NULL\n")
|
|
(format ofde "};\n")
|
|
|
|
;; Which rule starts where
|
|
(format ofde "\n")
|
|
(format ofde "const cst_lts_addr %s_lts_letter_index[%d] = \n"
|
|
name (+ 1 (length rule_index)) )
|
|
(format ofde "{\n")
|
|
(mapcar
|
|
(lambda (p) (format ofde " %d, /* %s */\n" (car (cdr p)) (car p)))
|
|
(reverse rule_index))
|
|
(format ofde " 0\n")
|
|
(format ofde "};\n")
|
|
|
|
(format ofde "\n")
|
|
|
|
; (format ofde "const cst_lts_rules %s_lts_rules = {\n" name)
|
|
; (format ofde " \"%s\",\n" name)
|
|
; (format ofde " %s_lts_letter_index,\n" name)
|
|
; (format ofde " %s_lts_model,\n" name)
|
|
; (format ofde " %s_lts_phone_table,\n" name)
|
|
; (format ofde " 4, /* context_window_size */\n")
|
|
; (format ofde " 1, /* context_extra_feats */\n")
|
|
; (format ofde " %s_lts_letter_table\n" name)
|
|
; (format ofde "};\n")
|
|
; (format ofde "\n")
|
|
|
|
(fclose ofde)
|
|
(fclose ofdh)
|
|
(fclose ofdm)
|
|
))
|
|
|
|
(define (dump_lts_wfst ln ifd ofde ofdh lts_pos)
|
|
"(dump_lts_wfst ifd ofde ofdh lts_pos)
|
|
Dump the WFST as a byte table to ifd. Jumps are dumped as
|
|
#define's to ofdh so forward references work. lts_pos is the
|
|
rule position. Each state is saves as
|
|
feature value true_addr false_addr
|
|
Feature and value are single bytes, which addrs are double bytes."
|
|
(let ((state))
|
|
;; Skip WFST header
|
|
(while (not (string-equal (set! state (readfp ifd)) "EST_Header_End"))
|
|
(if (equal? state (eof-val))
|
|
(error "eof in lts regex file")))
|
|
(while (not (equal? (set! state (readfp ifd)) (eof-val)))
|
|
(format ofdh "#define LTS_STATE_%d_%d %s\n"
|
|
ln (car (car state))
|
|
(lts_bytify lts_pos))
|
|
(cond
|
|
((string-equal "final" (car (cdr (car state))))
|
|
(set! lts_pos (- lts_pos 1))
|
|
t) ;; do nothing
|
|
((string-matches (car (car (cdr state))) ".*_.*")
|
|
(format ofde " %s, '%s', %s , %s , \n"
|
|
(lts_feat (car (car (cdr state))))
|
|
(lts_val (car (car (cdr state))))
|
|
; (lts_phone (lts_letter (car (car (cdr state)))) 0 letter_table)
|
|
(format nil "LTS_STATE_%d_%d" ln
|
|
(car (cdr (cdr (car (cdr (cdr state)))))))
|
|
(format nil "LTS_STATE_%d_%d" ln
|
|
(car (cdr (cdr (car (cdr state))))))))
|
|
(t ;; its a letter output state
|
|
(format ofde " 255, %s, 0,0 , 0,0 , \n"
|
|
(lts_phone (car (car (cdr state))) 0 phone_table))))
|
|
(set! lts_pos (+ 1 lts_pos)))
|
|
lts_pos))
|
|
|
|
(define (lts_feat trans)
|
|
"(lts_feat trans)
|
|
Returns the feature number represented in this transition name."
|
|
(let ((fname (substring trans 5 (- (length trans) 11))))
|
|
(if (string-matches fname ".*_i?")
|
|
(set! fname (string-before fname "_")))
|
|
(cond
|
|
((string-equal fname "p.p.p.p.name") 0)
|
|
((string-equal fname "p.p.p.name") 1)
|
|
((string-equal fname "p.p.name") 2)
|
|
((string-equal fname "p.name") 3)
|
|
((string-equal fname "n.name") 4)
|
|
((string-equal fname "n.n.name") 5)
|
|
((string-equal fname "n.n.n.name") 6)
|
|
((string-equal fname "n.n.n.n.name") 7)
|
|
(t (error (format nil "ltsregex2C: unknown feat %s %s\n" fname trans ))))))
|
|
|
|
(define (lts_letter trans)
|
|
"(lts_val trans)
|
|
The letter being tested."
|
|
(string-before (string-after trans "is_") "_"))
|
|
|
|
(define (lts_val trans)
|
|
"(lts_val trans)
|
|
The letter being tested."
|
|
(substring trans (- (length trans) 2) 1))
|
|
|
|
(define (lts_phone p n table)
|
|
(cond
|
|
((string-equal p (car table))
|
|
n)
|
|
((not (cdr table)) ;; new p
|
|
(set-cdr! table (list p))
|
|
(+ 1 n))
|
|
(t
|
|
(lts_phone p (+ 1 n) (cdr table)))))
|
|
|
|
(define (lts_bytify n)
|
|
"(lts_bytify n)
|
|
Return this short as a two byte comma separated string."
|
|
(let ((xx (format nil "%04x" n)))
|
|
;; This is unfortunately byte order specific
|
|
(format nil "0x%s,0x%s"
|
|
(substring xx 2 2)
|
|
(substring xx 0 2))))
|
|
|
|
(provide 'make_lts)
|