rhubarb-lip-sync/rhubarb/lib/flite-1.4/tools/make_lts.scm

232 lines
9.7 KiB
Scheme

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; ;;;
;;; Language Technologies Institute ;;;
;;; Carnegie Mellon University ;;;
;;; Copyright (c) 1999 ;;;
;;; All Rights Reserved. ;;;
;;; ;;;
;;; Permission is hereby granted, free of charge, to use and distribute ;;;
;;; this software and its documentation without restriction, including ;;;
;;; without limitation the rights to use, copy, modify, merge, publish, ;;;
;;; distribute, sublicense, and/or sell copies of this work, and to ;;;
;;; permit persons to whom this work is furnished to do so, subject to ;;;
;;; the following conditions: ;;;
;;; 1. The code must retain the above copyright notice, this list of ;;;
;;; conditions and the following disclaimer. ;;;
;;; 2. Any modifications must be clearly marked as such. ;;;
;;; 3. Original authors' names are not deleted. ;;;
;;; 4. The authors' names are not used to endorse or promote products ;;;
;;; derived from this software without specific prior written ;;;
;;; permission. ;;;
;;; ;;;
;;; CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK ;;;
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;;
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;;
;;; SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE ;;;
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;;
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;;
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;;
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;;
;;; THIS SOFTWARE. ;;;
;;; ;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Author: Alan W Black (awb@cs.cmu.edu) ;;;
;;; Date: December 1999 ;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; ;;;
;;; Generate a C compilable lts rules. ;;;
;;; ;;;
;;; Two modes, from decision graphs as wfsts or from CART trees ;;;
;;; ;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; These are preordained by the LTS building process
(set! lts_context_window_size 4)
(set! lts_context_extra_feats 1)
(define (ltsregextoC name trees idir odir)
"(ltsregextoC name idir odir)
Converts its wfsts to a C compilation structure for flite. Assumes
$idir/[a-z].tree.wfst to compile from."
(let
((ofde (fopen (path-append odir (string-append name "_lts_rules.c")) "w"))
(ofdm (fopen (path-append odir (string-append name "_lts_model.c")) "w"))
(ofdh (fopen (path-append odir (string-append name "_lts_model.h")) "w"))
(ifd)
(rule_index nil))
(set! lts_pos 0)
(set! phone_table (list "epsilon"))
(set! letter_table (list "nothing" "#" "0"))
(set! letter_table (append letter_table (mapcar car trees)))
(format ofde "/*******************************************************/\n")
(format ofde "/** Autogenerated lts rules (regex) for %s */\n" name)
(format ofde "/** from %s */\n" idir)
(format ofde "/*******************************************************/\n")
(format ofde "\n")
(format ofde "#include \"cst_string.h\"\n")
(format ofde "#include \"cst_lts.h\"\n")
(format ofde "#include \"cst_lexicon.h\"\n")
(format ofdm "/*******************************************************/\n")
(format ofdm "/** Autogenerated lts rules (regex) for %s */\n" name)
(format ofdm "/** from %s */\n" idir)
(format ofdm "/*******************************************************/\n")
(format ofdm "\n")
(format ofdm "#include \"cst_string.h\"\n")
(format ofdm "#include \"cst_lts.h\"\n")
(format ofdm "#include \"cst_lexicon.h\"\n")
(format ofdm "#include \"%s_lts_model.h\"\n\n" name)
(format ofdm "const cst_lts_model %s_lts_model[] = \n" name)
(format ofdm "{\n")
(set! ln 0)
(mapcar
(lambda (l)
(let ((ifd (fopen (path-append idir
(string-append l ".tree.wfst")) "r")))
(format t "doing: %s\n" l)
(format ofdm " /** letter \"%s\" **/\n" l)
(format ofdh " /** letter \"%s\" **/\n" l)
(set! rule_index (cons (list l lts_pos ln) rule_index))
(set! lts_pos (dump_lts_wfst ln ifd ofdm ofdh lts_pos))
(fclose ifd)
(set! ln (+ 1 ln))
))
(cdr (cddr letter_table))
)
(format ofdm " 0, 0, 0,0, 0,0\n")
(format ofdm "};\n")
;; Make the letter table be the same order as the rule inde (+2)
;; The phone table (bytes to phone names)
(format ofde "\n")
(format ofde "const char * const %s_lts_phone_table[%d] = \n"
name (+ 1 (length phone_table)))
(format ofde "{\n")
(mapcar (lambda (p) (format ofde " \"%s\",\n" p)) phone_table)
(format ofde " NULL\n")
(format ofde "};\n")
;; The letter_table (bytes to letter names)
(format ofde "\n")
(format ofde "const char * const %s_lts_letter_table[%d] = \n"
name (+ 1 (length letter_table)))
(format ofde "{\n")
(mapcar (lambda (p) (format ofde " \"%s\",\n" p)) letter_table)
(format ofde " NULL\n")
(format ofde "};\n")
;; Which rule starts where
(format ofde "\n")
(format ofde "const cst_lts_addr %s_lts_letter_index[%d] = \n"
name (+ 1 (length rule_index)) )
(format ofde "{\n")
(mapcar
(lambda (p) (format ofde " %d, /* %s */\n" (car (cdr p)) (car p)))
(reverse rule_index))
(format ofde " 0\n")
(format ofde "};\n")
(format ofde "\n")
; (format ofde "const cst_lts_rules %s_lts_rules = {\n" name)
; (format ofde " \"%s\",\n" name)
; (format ofde " %s_lts_letter_index,\n" name)
; (format ofde " %s_lts_model,\n" name)
; (format ofde " %s_lts_phone_table,\n" name)
; (format ofde " 4, /* context_window_size */\n")
; (format ofde " 1, /* context_extra_feats */\n")
; (format ofde " %s_lts_letter_table\n" name)
; (format ofde "};\n")
; (format ofde "\n")
(fclose ofde)
(fclose ofdh)
(fclose ofdm)
))
(define (dump_lts_wfst ln ifd ofde ofdh lts_pos)
"(dump_lts_wfst ifd ofde ofdh lts_pos)
Dump the WFST as a byte table to ifd. Jumps are dumped as
#define's to ofdh so forward references work. lts_pos is the
rule position. Each state is saves as
feature value true_addr false_addr
Feature and value are single bytes, which addrs are double bytes."
(let ((state))
;; Skip WFST header
(while (not (string-equal (set! state (readfp ifd)) "EST_Header_End"))
(if (equal? state (eof-val))
(error "eof in lts regex file")))
(while (not (equal? (set! state (readfp ifd)) (eof-val)))
(format ofdh "#define LTS_STATE_%d_%d %s\n"
ln (car (car state))
(lts_bytify lts_pos))
(cond
((string-equal "final" (car (cdr (car state))))
(set! lts_pos (- lts_pos 1))
t) ;; do nothing
((string-matches (car (car (cdr state))) ".*_.*")
(format ofde " %s, '%s', %s , %s , \n"
(lts_feat (car (car (cdr state))))
(lts_val (car (car (cdr state))))
; (lts_phone (lts_letter (car (car (cdr state)))) 0 letter_table)
(format nil "LTS_STATE_%d_%d" ln
(car (cdr (cdr (car (cdr (cdr state)))))))
(format nil "LTS_STATE_%d_%d" ln
(car (cdr (cdr (car (cdr state))))))))
(t ;; its a letter output state
(format ofde " 255, %s, 0,0 , 0,0 , \n"
(lts_phone (car (car (cdr state))) 0 phone_table))))
(set! lts_pos (+ 1 lts_pos)))
lts_pos))
(define (lts_feat trans)
"(lts_feat trans)
Returns the feature number represented in this transition name."
(let ((fname (substring trans 5 (- (length trans) 11))))
(if (string-matches fname ".*_i?")
(set! fname (string-before fname "_")))
(cond
((string-equal fname "p.p.p.p.name") 0)
((string-equal fname "p.p.p.name") 1)
((string-equal fname "p.p.name") 2)
((string-equal fname "p.name") 3)
((string-equal fname "n.name") 4)
((string-equal fname "n.n.name") 5)
((string-equal fname "n.n.n.name") 6)
((string-equal fname "n.n.n.n.name") 7)
(t (error (format nil "ltsregex2C: unknown feat %s %s\n" fname trans ))))))
(define (lts_letter trans)
"(lts_val trans)
The letter being tested."
(string-before (string-after trans "is_") "_"))
(define (lts_val trans)
"(lts_val trans)
The letter being tested."
(substring trans (- (length trans) 2) 1))
(define (lts_phone p n table)
(cond
((string-equal p (car table))
n)
((not (cdr table)) ;; new p
(set-cdr! table (list p))
(+ 1 n))
(t
(lts_phone p (+ 1 n) (cdr table)))))
(define (lts_bytify n)
"(lts_bytify n)
Return this short as a two byte comma separated string."
(let ((xx (format nil "%04x" n)))
;; This is unfortunately byte order specific
(format nil "0x%s,0x%s"
(substring xx 2 2)
(substring xx 0 2))))
(provide 'make_lts)