226 lines
7.9 KiB
C
226 lines
7.9 KiB
C
/*************************************************************************/
|
|
/* */
|
|
/* Language Technologies Institute */
|
|
/* Carnegie Mellon University */
|
|
/* Copyright (c) 2001 */
|
|
/* All Rights Reserved. */
|
|
/* */
|
|
/* Permission is hereby granted, free of charge, to use and distribute */
|
|
/* this software and its documentation without restriction, including */
|
|
/* without limitation the rights to use, copy, modify, merge, publish, */
|
|
/* distribute, sublicense, and/or sell copies of this work, and to */
|
|
/* permit persons to whom this work is furnished to do so, subject to */
|
|
/* the following conditions: */
|
|
/* 1. The code must retain the above copyright notice, this list of */
|
|
/* conditions and the following disclaimer. */
|
|
/* 2. Any modifications must be clearly marked as such. */
|
|
/* 3. Original authors' names are not deleted. */
|
|
/* 4. The authors' names are not used to endorse or promote products */
|
|
/* derived from this software without specific prior written */
|
|
/* permission. */
|
|
/* */
|
|
/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
|
|
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
|
|
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
|
|
/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
|
|
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
|
|
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
|
|
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
|
|
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
|
|
/* THIS SOFTWARE. */
|
|
/* */
|
|
/*************************************************************************/
|
|
/* Author: Alan W Black (awb@cs.cmu.edu) */
|
|
/* Date: January 2001 */
|
|
/*************************************************************************/
|
|
/* */
|
|
/* An F0 model */
|
|
/* This is derived fromthe f2b model freely distributed in Festival */
|
|
/* */
|
|
/*************************************************************************/
|
|
|
|
#include "cst_hrg.h"
|
|
#include "cst_phoneset.h"
|
|
#include "us_f0.h"
|
|
|
|
static void apply_lr_model(cst_item *s,
|
|
const us_f0_lr_term *f0_lr_terms,
|
|
float *start,
|
|
float *mid,
|
|
float *end)
|
|
{
|
|
int i;
|
|
const cst_val *v=0;
|
|
float fv;
|
|
|
|
/* Interceptors */
|
|
*start = f0_lr_terms[0].start;
|
|
*mid = f0_lr_terms[0].mid;
|
|
*end = f0_lr_terms[0].end;
|
|
for (i=1; f0_lr_terms[i].feature; i++)
|
|
{
|
|
if (!cst_streq(f0_lr_terms[i].feature,f0_lr_terms[i-1].feature))
|
|
v = ffeature(s,f0_lr_terms[i].feature);
|
|
if (f0_lr_terms[i].type)
|
|
{
|
|
if (cst_streq(val_string(v),f0_lr_terms[i].type))
|
|
fv = 1.0;
|
|
else
|
|
fv = 0.0;
|
|
}
|
|
else
|
|
fv = val_float(v);
|
|
(*start) += fv*f0_lr_terms[i].start;
|
|
(*mid) += fv*f0_lr_terms[i].mid;
|
|
(*end) += fv*f0_lr_terms[i].end;
|
|
/* printf("f %s start %f mid %f end %f\n",
|
|
f0_lr_terms[i].feature,
|
|
*start,*mid,*end); */
|
|
}
|
|
}
|
|
|
|
static void add_target_point(cst_relation *targ,float pos, float f0)
|
|
{
|
|
cst_item *t;
|
|
|
|
/* printf("target %f at %f\n",f0,pos); */
|
|
t = relation_append(targ,NULL);
|
|
item_set_float(t,"pos",pos);
|
|
/* them there can sometimes do silly things, so guard for that */
|
|
if (f0 > 500.0)
|
|
item_set_float(t,"f0",500.0);
|
|
else if (f0 < 50.0)
|
|
item_set_float(t,"f0",50.0);
|
|
else
|
|
item_set_float(t,"f0",f0);
|
|
}
|
|
|
|
/* model mean and stddev take from f2b/kal_diphone */
|
|
#define model_mean 170.0
|
|
#define model_stddev 34
|
|
#define map_f0(v,m,s) ((((v-model_mean)/model_stddev)*s)+m)
|
|
|
|
static int post_break(cst_item *syl)
|
|
{
|
|
if ((item_prev(syl) == 0) ||
|
|
(cst_streq("pau",
|
|
ffeature_string(syl,
|
|
"R:SylStructure.daughter.R:Segment.p.name"))))
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
}
|
|
|
|
static int pre_break(cst_item *syl)
|
|
{
|
|
if ((item_next(syl) == 0) ||
|
|
(cst_streq("pau",
|
|
ffeature_string(syl,
|
|
"R:SylStructure.daughtern.R:Segment.n.name"))))
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
}
|
|
|
|
static float vowel_mid(cst_item *syl)
|
|
{
|
|
/* return time point mid way in vowel in this syl */
|
|
cst_item *s;
|
|
cst_item *ts;
|
|
const cst_phoneset *ps = item_phoneset(syl);
|
|
|
|
ts = item_daughter(item_as(syl,"SylStructure"));
|
|
for (s=ts; s; s = item_next(s))
|
|
{
|
|
if (cst_streq("+", phone_feature_string(ps,item_feat_string(s,"name"),
|
|
"vc")))
|
|
{
|
|
return (item_feat_float(s,"end")+
|
|
ffeature_float(s,"R:Segment.p.end"))/2.0;
|
|
}
|
|
}
|
|
|
|
/* no segments, shouldn't happen */
|
|
if (ts == 0)
|
|
return 0;
|
|
|
|
/* no vowel in syllable, shouldn't happen */
|
|
return (item_feat_float(ts,"end")+
|
|
ffeature_float(ts,"R:Segment.p.end"))/2.0;
|
|
}
|
|
|
|
cst_utterance *us_f0_model(cst_utterance *u)
|
|
{
|
|
/* F0 target model: Black and Hunt ICSLP96, three points per syl */
|
|
cst_item *syl, *t, *nt;
|
|
cst_relation *targ_rel;
|
|
float mean, stddev, local_mean, local_stddev;
|
|
float start, mid, end, lend;
|
|
float seg_end;
|
|
|
|
if (feat_present(u->features,"no_f0_target_model"))
|
|
return u;
|
|
|
|
targ_rel = utt_relation_create(u,"Target");
|
|
mean = get_param_float(u->features,"int_f0_target_mean", 100.0);
|
|
mean *= get_param_float(u->features,"f0_shift", 1.0);
|
|
stddev = get_param_float(u->features,"int_f0_target_stddev", 12.0);
|
|
|
|
lend = 0;
|
|
for (syl=relation_head(utt_relation(u,"Syllable"));
|
|
syl;
|
|
syl = item_next(syl))
|
|
|
|
{
|
|
/* printf("word %s, accent %s endtone %s\n",
|
|
ffeature_string(syl,"R:SylStructure.parent.name"),
|
|
ffeature_string(syl,"accent"),
|
|
ffeature_string(syl,"endtone")); */
|
|
if (!item_daughter(item_as(syl,"SylStructure")))
|
|
continue; /* no segs in syl */
|
|
|
|
local_mean = ffeature_float(syl,"R:SylStructure.parent.R:Token.parent.local_f0_shift");
|
|
if (local_mean)
|
|
local_mean *= mean;
|
|
else
|
|
local_mean = mean;
|
|
local_stddev = ffeature_float(syl,"R:SylStructure.parent.R:Token.parent.local_f0_range");
|
|
if (local_stddev == 0.0)
|
|
local_stddev = stddev;
|
|
|
|
apply_lr_model(syl,f0_lr_terms,&start,&mid,&end);
|
|
if (post_break(syl))
|
|
lend = map_f0(start,local_mean,local_stddev);
|
|
add_target_point(targ_rel,
|
|
ffeature_float(syl,
|
|
"R:SylStructure.daughter.R:Segment.p.end"),
|
|
map_f0((start+lend)/2.0,local_mean,local_stddev));
|
|
add_target_point(targ_rel,
|
|
vowel_mid(syl),
|
|
map_f0(mid,local_mean,local_stddev));
|
|
lend = map_f0(end,local_mean,local_stddev);
|
|
if (pre_break(syl))
|
|
add_target_point(targ_rel,
|
|
ffeature_float(syl,"R:SylStructure.daughtern.end"),
|
|
map_f0(end,local_mean,local_stddev));
|
|
}
|
|
|
|
/* Guarantee targets go from start to end of utterance */
|
|
t = relation_head(targ_rel);
|
|
if (t == 0)
|
|
add_target_point(targ_rel,0,mean);
|
|
else if (item_feat_float(t,"pos") > 0)
|
|
{
|
|
nt = item_prepend(t,NULL);
|
|
item_set_float(nt,"pos",0.0);
|
|
item_set_float(nt,"f0",item_feat_float(t,"f0"));
|
|
}
|
|
|
|
t = relation_tail(targ_rel);
|
|
seg_end = item_feat_float(relation_tail(utt_relation(u,"Segment")),"end");
|
|
if (item_feat_float(t,"pos") < seg_end)
|
|
add_target_point(targ_rel,seg_end,item_feat_float(t,"f0"));
|
|
|
|
return u;
|
|
}
|