rhubarb-lip-sync/rhubarb/lib/sphinxbase-rev13216/include/sphinxbase/agc.h

203 lines
6.0 KiB
C

/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
/* ====================================================================
* Copyright (c) 1999-2004 Carnegie Mellon University. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* This work was supported in part by funding from the Defense Advanced
* Research Projects Agency and the National Science Foundation of the
* United States of America, and the CMU Sphinx Speech Consortium.
*
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ====================================================================
*
*/
/*
* agc.h -- Various forms of automatic gain control (AGC)
*
* **********************************************
* CMU ARPA Speech Project
*
* Copyright (c) 1999 Carnegie Mellon University.
* ALL RIGHTS RESERVED.
* **********************************************
*
* HISTORY
* $Log$
* Revision 1.1 2006/04/05 20:27:30 dhdfu
* A Great Reorganzation of header files and executables
*
* Revision 1.8 2005/06/21 19:25:41 arthchan2003
* 1, Fixed doxygen documentation. 2, Added $ keyword.
*
* Revision 1.4 2005/06/13 04:02:56 archan
* Fixed most doxygen-style documentation under libs3decoder.
*
* Revision 1.3 2005/03/30 01:22:46 archan
* Fixed mistakes in last updates. Add
*
*
* 28-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
* Copied from previous version.
*/
#ifndef _S3_AGC_H_
#define _S3_AGC_H_
/* Win32/WinCE DLL gunk */
#include <sphinxbase/sphinxbase_export.h>
#include <sphinxbase/prim_type.h>
#include <sphinxbase/fe.h>
/** \file agc.h
* \brief routine that implements automatic gain control
*
* \warning This function may not be fully compatible with
* SphinxTrain's family of AGC.
*
* This implements AGC.
*/
#ifdef __cplusplus
extern "C" {
#endif
#if 0
/* Fool Emacs. */
}
#endif
/**
* Types of acoustic gain control to apply to the features.
*/
typedef enum agc_type_e {
AGC_NONE = 0,
AGC_MAX,
AGC_EMAX,
AGC_NOISE
} agc_type_t;
/** Convert string representation (from command-line) to agc_type_t */
SPHINXBASE_EXPORT
agc_type_t agc_type_from_str(const char *str);
/** String representations of agc_type_t values. */
SPHINXBASE_EXPORT
extern const char *agc_type_str[];
/**
* Structure holding data for doing AGC.
**/
typedef struct agc_s {
mfcc_t max; /**< Estimated max for current utterance (for AGC_EMAX) */
mfcc_t obs_max; /**< Observed max in current utterance */
int32 obs_frame; /**< Whether any data was observed after prev update */
int32 obs_utt; /**< Whether any utterances have been observed */
mfcc_t obs_max_sum;
mfcc_t noise_thresh; /**< Noise threshold (for AGC_NOISE only) */
} agc_t;
/**
* Initialize AGC structure with default values.
*/
SPHINXBASE_EXPORT
agc_t *agc_init(void);
/**
* Free AGC structure.
*/
SPHINXBASE_EXPORT
void agc_free(agc_t *agc);
/**
* Apply AGC to the given mfc vectors (normalize all C0 mfc coefficients in the given
* input such that the max C0 value is 0, by subtracting the input max C0 from all).
* This function operates on an entire utterance at a time. Hence, the entire utterance
* must be available beforehand (batchmode).
*/
SPHINXBASE_EXPORT
void agc_max(agc_t *agc, /**< In: AGC structure (not used) */
mfcc_t **mfc, /**< In/Out: mfc[f] = cepstrum vector in frame f */
int32 n_frame /**< In: number of frames of cepstrum vectors supplied */
);
/**
* Apply AGC to the given block of MFC vectors.
* Unlike agc_max() this does not require the entire utterance to be
* available. Call agc_emax_update() at the end of each utterance to
* update the AGC parameters. */
SPHINXBASE_EXPORT
void agc_emax(agc_t *agc, /**< In: AGC structure */
mfcc_t **mfc, /**< In/Out: mfc[f] = cepstrum vector in frame f */
int32 n_frame /**< In: number of frames of cepstrum vectors supplied */
);
/**
* Update AGC parameters for next utterance.
**/
SPHINXBASE_EXPORT
void agc_emax_update(agc_t *agc /**< In: AGC structure */
);
/**
* Get the current AGC maximum estimate.
**/
SPHINXBASE_EXPORT
float32 agc_emax_get(agc_t *agc);
/**
* Set the current AGC maximum estimate.
**/
SPHINXBASE_EXPORT
void agc_emax_set(agc_t *agc, float32 m);
/**
* Apply AGC using noise threshold to the given block of MFC vectors.
**/
SPHINXBASE_EXPORT
void agc_noise(agc_t *agc, /**< In: AGC structure */
mfcc_t **mfc, /**< In/Out: mfc[f] = cepstrum vector in frame f */
int32 n_frame /**< In: number of frames of cepstrum vectors supplied */
);
/**
* Get the current AGC noise threshold.
**/
SPHINXBASE_EXPORT
float32 agc_get_threshold(agc_t *agc);
/**
* Set the current AGC noise threshold.
**/
SPHINXBASE_EXPORT
void agc_set_threshold(agc_t *agc, float32 threshold);
#ifdef __cplusplus
}
#endif
#endif