/*************************************************************************/ /* */ /* Language Technologies Institute */ /* Carnegie Mellon University */ /* Copyright (c) 2009 */ /* All Rights Reserved. */ /* */ /* Permission is hereby granted, free of charge, to use and distribute */ /* this software and its documentation without restriction, including */ /* without limitation the rights to use, copy, modify, merge, publish, */ /* distribute, sublicense, and/or sell copies of this work, and to */ /* permit persons to whom this work is furnished to do so, subject to */ /* the following conditions: */ /* 1. The code must retain the above copyright notice, this list of */ /* conditions and the following disclaimer. */ /* 2. Any modifications must be clearly marked as such. */ /* 3. Original authors' names are not deleted. */ /* 4. The authors' names are not used to endorse or promote products */ /* derived from this software without specific prior written */ /* permission. */ /* */ /* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */ /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ /* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */ /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ /* THIS SOFTWARE. */ /* */ /*************************************************************************/ /* Author: Alan W Black (awb@cs.cmu.edu) */ /* Date: January 2009 */ /*************************************************************************/ /* */ /* flowm functions for flite access */ /* */ /*************************************************************************/ #include #include #include #include "cst_wchar.h" #include "flite.h" #include "flowm.h" /* For debugging its sometimes good to switch off the actual synthesis */ #define DOTTS 1 static cst_audiodev *fl_ad = 0; #ifdef DOTTS cst_voice *register_cmu_us_kal(const char *voxdir); void unregister_cmu_us_kal(cst_voice *v); cst_voice *register_cmu_us_awb(const char *voxdir); void unregister_cmu_us_awb(cst_voice *v); cst_voice *register_cmu_us_rms(const char *voxdir); void unregister_cmu_us_rms(cst_voice *v); cst_voice *register_cmu_us_slt(const char *voxdir); void unregister_cmu_us_slt(cst_voice *v); #endif cst_wave *previous_wave = NULL; typedef struct VoxDef_struct { TCHAR *name; cst_voice *(*rv)(const char *voxdir); /* register_voice */ void (*urv)(cst_voice *v); /* unregister_voice */ int min_buffsize; /* for audio streaming */ cst_voice *v; } VoxDef; VoxDef VoxDefs[] = { #ifdef cmu_us_kal { L"kal", register_cmu_us_kal, unregister_cmu_us_kal, 256, NULL }, #endif #ifdef cmu_us_awb { L"awb", register_cmu_us_awb, unregister_cmu_us_awb, 2000, NULL }, #endif #ifdef cmu_us_rms { L"rms", register_cmu_us_rms, unregister_cmu_us_rms, 2000, NULL }, #endif #ifdef cmu_us_slt { L"slt", register_cmu_us_slt, unregister_cmu_us_slt, 2000, NULL }, #endif { NULL, NULL } }; cst_utterance *flowm_print_relation_callback(cst_utterance *u); cst_utterance *flowm_utt_callback(cst_utterance *u); int flowm_audio_stream_chunk(const cst_wave *w, int start, int size, int last, void *user); float flowm_find_file_percentage() { if (flowm_file_size <= 0) return 0.0; else return (flowm_file_pos*100.0)/flowm_file_size; } TCHAR *flowm_voice_name(int i) { /* In order not to have flite things in flowm_main, we provide an */ /* interface to the voice list */ return VoxDefs[i].name; } void flowm_init() { #ifdef DOTTS int i; cst_audio_streaming_info *asi; flite_init(); /* Initialize flite interface */ for (i=0; VoxDefs[i].name; i++) { VoxDefs[i].v = (VoxDefs[i].rv)(NULL); /* register voice */ /* Set up call back function for low level audio streaming */ /* This way it plays the waveform as it synthesizes it */ /* This is necessary for the slower (CG) voices */ asi = new_audio_streaming_info(); asi->asc = flowm_audio_stream_chunk; asi->min_buffsize = VoxDefs[i].min_buffsize; feat_set(VoxDefs[i].v->features, "streaming_info", audio_streaming_info_val(asi)); /* Set up call back function for sending what tokens are being */ /* synthesized and for keeping track of the current position in */ /* the file */ feat_set(VoxDefs[i].v->features, "utt_user_callback", uttfunc_val(flowm_utt_callback)); /* For outputing results of a relation (only used in play) */ feat_set(VoxDefs[i].v->features, "post_synth_hook_func", uttfunc_val(flowm_print_relation_callback)); } #endif return; } void flowm_terminate() { #ifdef DOTTS int i; for (i=0; VoxDefs[i].name; i++) { (VoxDefs[i].urv)(VoxDefs[i].v); /* unregister voice */ } #endif if (previous_wave) { delete_wave(previous_wave); previous_wave = NULL; } return; } int flowm_save_wave(TCHAR *filename) { /* Save the Last synthesized waveform file to filename */ char *sfilename; int rc; if (!previous_wave) return -1; sfilename = cst_wstr2cstr(filename); rc = cst_wave_save_riff(previous_wave,sfilename); cst_free(sfilename); return rc; } #ifdef DOTTS int flowm_say_text(TCHAR *text) { char *s; int ns; cst_voice *v; if (previous_wave) { delete_wave(previous_wave); previous_wave = NULL; } s = cst_wstr2cstr(text); /* text to synthesize */ v = VoxDefs[flowm_selected_voice].v; /* voice to synthesize with */ feat_remove(v->features,"print_info_relation"); if (flowm_selected_relation == 1) feat_set_string(v->features, "print_info_relation", "Word"); if (flowm_selected_relation == 2) feat_set_string(v->features, "print_info_relation", "Segment"); /* Do the synthesis */ previous_wave = flite_text_to_wave(s,v); ns = cst_wave_num_samples(previous_wave); cst_free(s); audio_flush(fl_ad); audio_close(fl_ad); fl_ad = NULL; return ns; } #else int flowm_say_text(TCHAR *text) { MessageBoxW(0,text,L"SayText",0); return 0; } #endif cst_utterance *flowm_print_relation_callback(cst_utterance *u) { /* Say the details of a named relation for display */ char rst[FL_MAX_MSG_CHARS]; const char *name; const char *relname; cst_item *item; char *space; space = ""; relname = get_param_string(u->features,"print_info_relation", NULL); cst_sprintf(rst,"%s: ",relname); if (!relname) { mbstowcs(fl_tts_msg,"",FL_MAX_MSG_CHARS); return u; } for (item=relation_head(utt_relation(u,relname)); item; item=item_next(item)) { name = item_feat_string(item,"name"); if (cst_strlen(name)+1+4 < FL_MAX_MSG_CHARS) cst_sprintf(rst,"%s%s%s",rst,space,name); else if (cst_strlen(rst)+4 < FL_MAX_MSG_CHARS) cst_sprintf(rst,"%s ...",rst); else break; space = " "; } mbstowcs(fl_tts_msg,rst,FL_MAX_MSG_CHARS); return u; } cst_utterance *flowm_utt_callback(cst_utterance *u) { char rst[FL_MAX_MSG_CHARS]; const char *tok; cst_item *item; char *space; int extend_length; /* In order to stop the synthesizer if the STOP button is pressed */ /* This stops the synthesis of the next utterance */ if ((flowm_play_status == FLOWM_PLAY) || (flowm_play_status == FLOWM_SKIP)) { if (TTSWindow) { rst[0] = '\0'; space = ""; for (item=relation_head(utt_relation(u,"Token")); item; item=item_next(item)) { tok = item_feat_string(item,"name"); if (cst_streq("",space)) /* Only do this on the first token/word */ flowm_file_pos = item_feat_int(item,"file_pos"); extend_length = cst_strlen(rst) + 1 + cst_strlen(item_feat_string(item,"prepunctuation"))+ cst_strlen(item_feat_string(item,"punc")); if (cst_strlen(tok)+extend_length+4 < FL_MAX_MSG_CHARS) cst_sprintf(rst,"%s%s%s%s%s",rst,space, item_feat_string(item,"prepunctuation"), tok, item_feat_string(item,"punc")); else { if (cst_strlen(rst)+4 < FL_MAX_MSG_CHARS) cst_sprintf(rst,"%s ...",rst); break; } space = " "; } if (flowm_file_pos > flowm_prev_utt_pos[flowm_utt_pos_pos]) { if ((flowm_utt_pos_pos+1) >= FLOWM_NUM_UTT_POS) { /* Filled it up, so move it down */ memmove(flowm_prev_utt_pos,&flowm_prev_utt_pos[1], sizeof(int)*(FLOWM_NUM_UTT_POS-10)); flowm_utt_pos_pos = (FLOWM_NUM_UTT_POS-10); } flowm_utt_pos_pos++; flowm_prev_utt_pos[flowm_utt_pos_pos] = flowm_file_pos; } /* Send text to TTSWindow */ mbstowcs(fl_tts_msg,rst,FL_MAX_MSG_CHARS); SetDlgItemText(TTSWindow, FL_SYNTHTEXT, fl_tts_msg); /* Update file pos percentage in FilePos window */ cst_sprintf(rst,"%2.3f",flowm_find_file_percentage()); mbstowcs(fl_fp_msg,rst,FL_MAX_MSG_CHARS); SetDlgItemText(TTSWindow, FL_FILEPOS, fl_fp_msg); SystemIdleTimerReset(); /* keep alive while synthesizing */ if (flowm_play_status == FLOWM_SKIP) flowm_play_status = FLOWM_PLAY; } return u; } else { delete_utterance(u); return 0; } } int flowm_audio_stream_chunk(const cst_wave *w, int start, int size, int last, void *user) { if (fl_ad == NULL) { fl_ad = audio_open(w->sample_rate,w->num_channels,CST_AUDIO_LINEAR16); } if (flowm_play_status == FLOWM_PLAY) { audio_write(fl_ad,&w->samples[start],size*sizeof(short)); return CST_AUDIO_STREAM_CONT; } else if (flowm_play_status == FLOWM_BENCH) { /* Do TTS but don't actually play it */ /* How much have we played */ flowm_duration += (size*1.0)/w->sample_rate; return CST_AUDIO_STREAM_CONT; } else { /* for STOP, and the SKIPS (if they get here) */ return CST_AUDIO_STREAM_STOP; } } #ifdef DOTTS int flowm_say_file(TCHAR *tfilename) { int rc = 0; char *filename; cst_voice *v; if (previous_wave) { /* This is really tidy up from Play -- but might say space */ delete_wave(previous_wave); previous_wave = NULL; } if (fl_ad) { MessageBoxW(0,L"audio fd still open",L"SayFile",0); audio_close(fl_ad); fl_ad = NULL; } v = VoxDefs[flowm_selected_voice].v; /* Where we want to start from */ feat_set_int(v->features, "file_start_position", flowm_file_pos); /* Only do print_info in play mode */ feat_remove(v->features,"print_info_relation"); filename = cst_wstr2cstr(tfilename); rc = flite_file_to_speech(filename, v, "stream"); cst_free(filename); audio_flush(fl_ad); audio_close(fl_ad); fl_ad = NULL; return rc; } #else int flowm_say_file(TCHAR *text) { MessageBoxW(0,text,L"SayFile",0); return 0; } #endif