423 lines
15 KiB
Scheme
423 lines
15 KiB
Scheme
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; ;;;
|
|
;;; Language Technologies Institute ;;;
|
|
;;; Carnegie Mellon University ;;;
|
|
;;; Copyright (c) 2001-2005 ;;;
|
|
;;; All Rights Reserved. ;;;
|
|
;;; ;;;
|
|
;;; Permission is hereby granted, free of charge, to use and distribute ;;;
|
|
;;; this software and its documentation without restriction, including ;;;
|
|
;;; without limitation the rights to use, copy, modify, merge, publish, ;;;
|
|
;;; distribute, sublicense, and/or sell copies of this work, and to ;;;
|
|
;;; permit persons to whom this work is furnished to do so, subject to ;;;
|
|
;;; the following conditions: ;;;
|
|
;;; 1. The code must retain the above copyright notice, this list of ;;;
|
|
;;; conditions and the following disclaimer. ;;;
|
|
;;; 2. Any modifications must be clearly marked as such. ;;;
|
|
;;; 3. Original authors' names are not deleted. ;;;
|
|
;;; 4. The authors' names are not used to endorse or promote products ;;;
|
|
;;; derived from this software without specific prior written ;;;
|
|
;;; permission. ;;;
|
|
;;; ;;;
|
|
;;; CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK ;;;
|
|
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;;
|
|
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;;
|
|
;;; SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE ;;;
|
|
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;;
|
|
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;;
|
|
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;;
|
|
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;;
|
|
;;; THIS SOFTWARE. ;;;
|
|
;;; ;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; Author: Alan W Black (awb@cs.cmu.edu) ;;;
|
|
;;; Date: January 2005 ;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; ;;;
|
|
;;; Take a diphone index and generate a single C track and residual ;;;
|
|
;;; for each of diphones in given index. A new index with pm ;;;
|
|
;;; positions is provided. ;;;
|
|
;;; ;;;
|
|
;;; Strategy 2: Dump into individual files ;;;
|
|
;;; ;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(defvar lpc_min -7.992630)
|
|
(defvar lpc_max 7.829990)
|
|
|
|
(define (diphtoC diphindexfn name lpcdir odir)
|
|
"(diphtoC diphindexfn name lpcdir odir)
|
|
Convert a diphone index and its LPC representations into three files,
|
|
an single track file with LPC coefficients, a wave file with
|
|
the residual, and an index file relating diph names to positions in
|
|
the main files."
|
|
(let
|
|
((diphindex (load diphindexfn t))
|
|
(ofdidx (fopen (path-append odir (string-append name "_diphone.c")) "w"))
|
|
(ofdlpc (fopen (path-append odir (string-append name "_lpc.c")) "w"))
|
|
(ofdres (fopen (path-append odir (string-append name "_res.c")) "w"))
|
|
(ofdresi (fopen (path-append odir (string-append name "_residx.c")) "w")))
|
|
|
|
(format ofdidx "/*****************************************************/\n")
|
|
(format ofdidx "/** Autogenerated Diphone DB for %s */\n" name)
|
|
(format ofdidx "/*****************************************************/\n")
|
|
(format ofdidx "\n")
|
|
(format ofdidx "#include \"cst_diphone.h\"\n")
|
|
(format ofdidx "\n")
|
|
(format ofdidx "extern const unsigned short %s_lpc[];\n" name)
|
|
(format ofdidx "extern const unsigned char %s_res[];\n" name)
|
|
(format ofdidx "extern const unsigned int %s_resi[];\n" name)
|
|
|
|
(format ofdlpc "/*****************************************************/\n")
|
|
(format ofdlpc "/** Autogenerated Diphone LPC for %s */\n" name)
|
|
(format ofdlpc "/*****************************************************/\n")
|
|
(format ofdlpc "\n")
|
|
(format ofdlpc "const unsigned short %s_lpc[] = {\n" name)
|
|
|
|
(format ofdres "/*****************************************************/\n")
|
|
(format ofdres "/** Autogenerated Diphone Residual for %s */\n" name)
|
|
(format ofdres "/*****************************************************/\n")
|
|
(format ofdres "\n")
|
|
(format ofdres "const unsigned char %s_res[] = {\n" name)
|
|
|
|
(format ofdresi "/****************************************************/\n")
|
|
(format ofdresi "/** Autogenerated Diphone Residual index for %s */\n" name)
|
|
(format ofdresi "/****************************************************/\n")
|
|
(format ofdresi "\n")
|
|
(format ofdresi "const unsigned int %s_resi[] = {\n" name)
|
|
|
|
(set! pm_pos 0)
|
|
(set! res_byte_pos 0)
|
|
(set! sample_pos 0)
|
|
(set! times nil)
|
|
(set! diphone_entries nil)
|
|
(set! num_unit_entries (length diphindex))
|
|
|
|
(while diphindex
|
|
(set! pms (find_pm_pos
|
|
(car diphindex)
|
|
lpcdir
|
|
ofdlpc
|
|
ofdres
|
|
ofdresi
|
|
))
|
|
|
|
;; Output unit_entry for this diphone
|
|
(set! diphone_entries
|
|
(cons
|
|
(list
|
|
(nth 0 (car diphindex))
|
|
(nth 2 pms) ; start_pm
|
|
(nth 3 pms) ; phone_boundary_pm
|
|
(nth 4 pms)) ; end_pm
|
|
diphone_entries))
|
|
(set! diphindex (cdr diphindex)))
|
|
|
|
(format ofdidx "\n\n")
|
|
|
|
(set! i (length times))
|
|
|
|
(format ofdidx "cst_sts_list %s_sts = {\n" name)
|
|
(format ofdidx " 0,0,\n") ;; no general index
|
|
(format ofdidx "#ifdef CST_NO_STATIC_VOX\n") ;; don't link in data
|
|
(format ofdidx " 0,0,0,\n")
|
|
(format ofdidx "#else\n")
|
|
(format ofdidx " %s_lpc,\n" name)
|
|
(format ofdidx " %s_res,\n" name)
|
|
(format ofdidx " %s_resi,\n" name)
|
|
(format ofdidx "#endif\n")
|
|
(format ofdidx " %d,\n" i)
|
|
(format ofdidx " %d,\n" lpc_order)
|
|
(format ofdidx " %d,\n" sample_rate)
|
|
(format ofdidx " %f,\n" lpc_min)
|
|
(format ofdidx " %f\n" lpc_range)
|
|
(format ofdidx "};\n\n")
|
|
|
|
(format ofdidx "static const cst_diphone_entry %s_index[] = { \n" name)
|
|
(mapcar
|
|
(lambda (e)
|
|
(format ofdidx " { \"%s\",%d,%d,%d },\n"
|
|
(nth 0 e)
|
|
(nth 1 e) ; start_pm
|
|
(- (nth 2 e) (nth 1 e)) ; phone_boundary_pm (delta)
|
|
(- (nth 3 e) (nth 2 e)) ; end_pm (delta)
|
|
))
|
|
(reverse diphone_entries))
|
|
(format ofdidx " { 0,0,0,0 } };\n\n")
|
|
|
|
(format ofdidx "const cst_diphone_db %s_db = {\n" name)
|
|
(format ofdidx " \"%s\",\n" name)
|
|
(format ofdidx " %d,\n" num_unit_entries)
|
|
(format ofdidx " %s_index,\n" name)
|
|
(format ofdidx " &%s_sts\n" name)
|
|
(format ofdidx "};\n\n")
|
|
|
|
(format ofdlpc " 0 };\n\n")
|
|
(format ofdres " 0 };\n\n")
|
|
(format ofdresi " %d };\n\n" res_byte_pos)
|
|
|
|
(fclose ofdidx)
|
|
(fclose ofdlpc)
|
|
(fclose ofdres)
|
|
(fclose ofdresi)
|
|
))
|
|
|
|
(define (find_pm_pos entry stsdir ofdlpc ofdres ofdresi)
|
|
"(find_pm_pos entry lpddir)
|
|
Diphone dics give times in seconds here we want them as indexes. This
|
|
function converts the lpc to ascii and finds the pitch marks that
|
|
go with this unit. These are written to ofdsts with ulaw residual
|
|
as short term signal."
|
|
(let ((sts_coeffs (load
|
|
(format nil "%s/%s.sts" stsdir (cadr entry))
|
|
t))
|
|
(start_time (nth 2 entry))
|
|
(phoneboundary_time (nth 3 entry))
|
|
(end_time (nth 4 entry))
|
|
start_pm pb_pm end_pm
|
|
(ltime 0))
|
|
(format t "%l\n" entry)
|
|
(set! sts_info (car sts_coeffs))
|
|
(set! sts_coeffs (cdr sts_coeffs))
|
|
(set! ltime 0)
|
|
(set! size_to_now 0)
|
|
(while (and sts_coeffs
|
|
(> (absdiff start_time (car (car sts_coeffs)))
|
|
(absdiff start_time (car (cadr sts_coeffs)))))
|
|
(set! ltime (car (car sts_coeffs)))
|
|
(set! sts_coeffs (cdr sts_coeffs)))
|
|
(set! sample_rate (nth 2 sts_info))
|
|
(set! lpc_order (nth 1 sts_info))
|
|
(set! lpc_min (nth 3 sts_info))
|
|
(set! lpc_range (nth 4 sts_info))
|
|
(set! start_pm pm_pos)
|
|
(while (and sts_coeffs
|
|
(> (absdiff phoneboundary_time (car (car sts_coeffs)))
|
|
(absdiff phoneboundary_time (car (cadr sts_coeffs)))))
|
|
(output_sts (car sts_coeffs) ofdlpc ofdres ofdresi)
|
|
(set! sts_coeffs (cdr sts_coeffs)))
|
|
(set! pb_pm pm_pos)
|
|
(while (and sts_coeffs (cdr sts_coeffs)
|
|
(> (absdiff end_time (car (car sts_coeffs)))
|
|
(absdiff end_time (car (cadr sts_coeffs)))))
|
|
(output_sts (car sts_coeffs) ofdlpc ofdres ofdresi)
|
|
(set! sts_coeffs (cdr sts_coeffs)))
|
|
(set! end_pm pm_pos)
|
|
|
|
(list
|
|
(car entry)
|
|
(cadr entry)
|
|
start_pm
|
|
pb_pm
|
|
end_pm)))
|
|
|
|
(define (output_sts frame ofdlpc ofdres ofdresi)
|
|
"(output_sts frame residual ofd)
|
|
Ouput this LPC frame."
|
|
(let ((time (nth 0 frame))
|
|
(coeffs (nth 1 frame))
|
|
(size (nth 2 frame))
|
|
(r (nth 3 frame)))
|
|
(set! times (cons time times))
|
|
|
|
(format ofdlpc "/* %d */ " pm_pos)
|
|
(while coeffs
|
|
(format ofdlpc " %d," (car coeffs))
|
|
(set! coeffs (cdr coeffs)))
|
|
(format ofdlpc "\n")
|
|
|
|
(set! s size)
|
|
(format ofdres "/* %d */ " pm_pos)
|
|
;; Use difference between residuals indices as size
|
|
; ;; Residual size encoded as two bytes (could one at a push)
|
|
; (format ofdres " %d,%d, "
|
|
; (/ (- size (% size 256)) 256)
|
|
; (% size 256))
|
|
(while r
|
|
(format ofdres " %d," (car r))
|
|
(set! r (cdr r)))
|
|
(format ofdres " \n")
|
|
|
|
(format ofdresi " %d, /* %d */\n" res_byte_pos pm_pos)
|
|
(set! res_byte_pos (+ res_byte_pos size))
|
|
|
|
(set! pm_pos (+ 1 pm_pos))
|
|
))
|
|
|
|
(define (output_stsS frame ofdlpc ofdres ofdresi)
|
|
"(output_sts frame residual ofd)
|
|
Ouput this LPC frame."
|
|
(format t "Spike excited diphones\n")
|
|
(let ((time (nth 0 frame))
|
|
(coeffs (nth 1 frame))
|
|
(size (nth 2 frame))
|
|
(r (nth 3 frame)))
|
|
(set! times (cons time times))
|
|
|
|
(format ofdlpc "/* %d */ " pm_pos)
|
|
(while coeffs
|
|
(format ofdlpc " %d," (car coeffs))
|
|
(set! coeffs (cdr coeffs)))
|
|
(format ofdlpc "\n")
|
|
|
|
(set! s size)
|
|
(format ofdres "/* %d */ " pm_pos)
|
|
|
|
(set! sump 0)
|
|
(while (cdr r)
|
|
(set! sump (+ (* (car r) (car r)) sump))
|
|
(set! r (cdr r)))
|
|
(format ofdresi " %d, /* %d */\n" (sqrt sump) pm_pos)
|
|
|
|
(set! pm_pos (+ 1 pm_pos))
|
|
))
|
|
|
|
(defvar spike_excited t)
|
|
(if (boundp 'spike_excited)
|
|
(set! output_sts output_stsS))
|
|
|
|
(define (oldfind_pm_pos entry lpcdir ofdsts)
|
|
"(find_pm_pos entry lpddir)
|
|
Diphone dics give times in seconds here we want them as indexes. This
|
|
function converts the lpc to ascii and finds the pitch marks that
|
|
go with this unit. These are written to ofdsts with ulaw residual
|
|
as short term signal."
|
|
(let ((lpc_coeffs (load_ascii_track
|
|
(format nil "%s/%s.lpc" lpcdir (cadr entry))
|
|
(nth 2 entry)))
|
|
(ulaw_residual (load_ascii_ulaw_wave
|
|
(format nil "%s/%s.res" lpcdir (cadr entry))
|
|
(nth 2 entry)
|
|
(nth 4 entry)))
|
|
(start_time (nth 2 entry))
|
|
(phoneboundary_time (nth 3 entry))
|
|
(end_time (nth 4 entry))
|
|
start_pm pb_pm end_pm
|
|
(ltime 0))
|
|
(format t "%l\n" entry)
|
|
(while (and lpc_coeffs
|
|
(> (absdiff start_time (car (car lpc_coeffs)))
|
|
(absdiff start_time (car (cadr lpc_coeffs)))))
|
|
; (format t "%f %f\n" start_time (car (car lpc_coeffs)))
|
|
(set! ltime (car (car lpc_coeffs)))
|
|
(set! lpc_coeffs (cdr lpc_coeffs)))
|
|
; (format t "start time %f %f\n" start_time (car (car lpc_coeffs)))
|
|
(set! start_pm pm_pos)
|
|
(while (and lpc_coeffs
|
|
(> (absdiff phoneboundary_time (car (car lpc_coeffs)))
|
|
(absdiff phoneboundary_time (car (cadr lpc_coeffs)))))
|
|
(output_sts (car lpc_coeffs)
|
|
(- (car (car lpc_coeffs)) ltime)
|
|
ulaw_residual ofdsts)
|
|
(set! ulaw_residual
|
|
(cons (car ulaw_residual)
|
|
(nth_cdr (* (- (car (car lpc_coeffs)) ltime)
|
|
(car ulaw_residual))
|
|
(cdr ulaw_residual))))
|
|
(set! ltime (car (car lpc_coeffs)))
|
|
(set! lpc_coeffs (cdr lpc_coeffs)))
|
|
(set! pb_pm pm_pos)
|
|
(while (and lpc_coeffs (cdr lpc_coeffs)
|
|
(> (absdiff end_time (car (car lpc_coeffs)))
|
|
(absdiff end_time (car (cadr lpc_coeffs)))))
|
|
(output_sts (car lpc_coeffs)
|
|
(- (car (car lpc_coeffs)) ltime)
|
|
ulaw_residual
|
|
ofdsts)
|
|
(set! ulaw_residual
|
|
(cons (car ulaw_residual)
|
|
(nth_cdr (* (- (car (car lpc_coeffs)) ltime)
|
|
(car ulaw_residual))
|
|
(cdr ulaw_residual))))
|
|
(set! ltime (car (car lpc_coeffs)))
|
|
(set! lpc_coeffs (cdr lpc_coeffs)))
|
|
(set! end_pm pm_pos)
|
|
|
|
(list
|
|
(car entry)
|
|
(cadr entry)
|
|
start_pm
|
|
pb_pm
|
|
end_pm)))
|
|
|
|
(define (oldoutput_sts frame duration residual ofd)
|
|
"(output_sts frame residual ofd)
|
|
Ouput this LPC frame."
|
|
(let ((size (* duration (car residual)))
|
|
(end_samp (* (car frame) (car residual))))
|
|
(set! sample_rate (car residual))
|
|
(set! lpc_order (- (length frame) 3))
|
|
(set! times (cons (car frame) times))
|
|
|
|
(format ofd "static const unsigned short diphs_frame_%d[] = { \n" pm_pos)
|
|
(set! frame (cddr frame)) ;; skip the "1"
|
|
(set! frame (cdr frame)) ;; skip the energy
|
|
(while (cdr frame)
|
|
(format ofd " %d," (lpccoeff_norm (car frame)))
|
|
(set! frame (cdr frame))
|
|
(if (not (cdr frame)) (format ofd " %d" (lpccoeff_norm (car frame)))))
|
|
(format ofd " };\n")
|
|
|
|
(format ofd "static const unsigned char diphs_res_%d[] = { \n" pm_pos)
|
|
(set! s size)
|
|
; (set! r (nth_cdr (- end_samp size) residual))
|
|
(set! r (cdr residual))
|
|
; (format t "length of residual is dur %f %d\n" duration (length r))
|
|
(while (> s 1)
|
|
(format ofd " %d," (car r))
|
|
(set! r (cdr r))
|
|
(set! s (- s 1))
|
|
(if (equal? s 1) (format ofd " %d" (car r))))
|
|
(format ofd " };\n")
|
|
|
|
(format ofd "#define diphs_size_%d %d\n" pm_pos size)
|
|
(format ofd "\n")
|
|
|
|
(set! pm_pos (+ 1 pm_pos))
|
|
))
|
|
|
|
(define (oldlpccoeff_norm c)
|
|
(* (/ (- c lpc_min) (- lpc_max lpc_min))
|
|
65535))
|
|
|
|
(define (oldload_ascii_ulaw_wave wavefilename starttime)
|
|
"(load_ascii_ulaw_wave wavefilename starttime)
|
|
Coverts wavefilename to simple ascii representation."
|
|
(let ((tmpfile (make_tmp_filename))
|
|
b)
|
|
(system (format nil "ch_wave -info %s | grep Sample |
|
|
awk '{printf(\"%%d \\n\",$3)}' >%s"
|
|
wavefilename tmpfile))
|
|
(system (format nil "ch_wave -otype raw -ostype ascii -start %f %s | asciiS2U >>%s"
|
|
starttime wavefilename tmpfile))
|
|
(set! b (load tmpfile t))
|
|
(delete-file tmpfile)
|
|
b))
|
|
|
|
(define (oldload_ascii_track trackfilename starttime)
|
|
"(load_ascii_track trackfilename)
|
|
Coverts trackfilename to simple ascii representation."
|
|
(let ((tmpfile (make_tmp_filename))
|
|
(nicestarttime (if (> starttime 0.100)
|
|
(- starttime 0.100)
|
|
starttime))
|
|
b)
|
|
(system (format nil "ch_track -otype est -start %f %s |
|
|
awk '{if ($1 == \"EST_Header_End\")
|
|
header=1;
|
|
else if (header == 1)
|
|
printf(\"( %%s )\\n\",$0)}'>%s"
|
|
nicestarttime trackfilename tmpfile))
|
|
(set! b (load tmpfile t))
|
|
(delete-file tmpfile)
|
|
b))
|
|
|
|
(define (absdiff a b)
|
|
(let ((d (- a b )))
|
|
(if (< d 0)
|
|
(* -1 d)
|
|
d)))
|
|
|
|
|
|
(provide 'make_didb)
|