Remove redundant semicolons

This commit is contained in:
Daniel Wolf 2019-10-02 20:54:12 +02:00
parent 62c179863e
commit 00c19a26f2
1 changed files with 136 additions and 136 deletions

View File

@ -162,8 +162,8 @@ data class GaussianProbabilityResult(
val delta: Int val delta: Int
) )
val kCompVar = 22005; val kCompVar = 22005
val kLog2Exp = 5909; // log2(exp(1)) in Q12. val kLog2Exp = 5909 // log2(exp(1)) in Q12.
// Calculates the probability for [input], given that [input] comes from a // Calculates the probability for [input], given that [input] comes from a
// normal distribution with mean and standard deviation ([mean], [std]). // normal distribution with mean and standard deviation ([mean], [std]).
@ -235,7 +235,7 @@ fun GaussianProbability(input: Int, mean: Int, std: Int): GaussianProbabilityRes
exp_value = 0x0400 or (tmp16 and 0x03FF) exp_value = 0x0400 or (tmp16 and 0x03FF)
tmp16 = tmp16 xor 0xFFFF tmp16 = tmp16 xor 0xFFFF
tmp16 = tmp16 shr 10 tmp16 = tmp16 shr 10
tmp16 += 1; tmp16 += 1
// Get [exp_value] = exp(-[tmp32]) in Q10. // Get [exp_value] = exp(-[tmp32]) in Q10.
exp_value = exp_value shr tmp16 exp_value = exp_value shr tmp16
} }
@ -397,7 +397,7 @@ fun WeightedAverage(data: IntArray, channel: Int, offset: Int, weights: IntArray
// //
// - returns : the VAD decision (0 - noise, 1 - speech). // - returns : the VAD decision (0 - noise, 1 - speech).
fun GmmProbability(self: VadInstT, features: List<Int>, total_power: Int, frame_length: Int): Int { fun GmmProbability(self: VadInstT, features: List<Int>, total_power: Int, frame_length: Int): Int {
var vadflag = 0; var vadflag = 0
var tmp_s16: Int var tmp_s16: Int
var tmp1_s16: Int var tmp1_s16: Int
var tmp2_s16: Int var tmp2_s16: Int
@ -409,7 +409,7 @@ fun GmmProbability(self: VadInstT, features: List<Int>, total_power: Int, frame_
val noise_probability = IntArray(kNumGaussians) val noise_probability = IntArray(kNumGaussians)
val speech_probability = IntArray(kNumGaussians) val speech_probability = IntArray(kNumGaussians)
assert(frame_length == 80); assert(frame_length == 80)
if (total_power > kMinEnergy) { if (total_power > kMinEnergy) {
// The signal power of current frame is large enough for processing. The // The signal power of current frame is large enough for processing. The
@ -430,21 +430,21 @@ fun GmmProbability(self: VadInstT, features: List<Int>, total_power: Int, frame_
var h0_test = 0 var h0_test = 0
var h1_test = 0 var h1_test = 0
for (k in 0 until kNumGaussians) { for (k in 0 until kNumGaussians) {
val gaussian = channel + k * kNumChannels; val gaussian = channel + k * kNumChannels
// Probability under H0, that is, probability of frame being noise. // Probability under H0, that is, probability of frame being noise.
// Value given in Q27 = Q7 * Q20. // Value given in Q27 = Q7 * Q20.
val pNoise = GaussianProbability(features[channel], self.noise_means[gaussian], self.noise_stds[gaussian]) val pNoise = GaussianProbability(features[channel], self.noise_means[gaussian], self.noise_stds[gaussian])
deltaN[gaussian] = pNoise.delta deltaN[gaussian] = pNoise.delta
noise_probability[k] = kNoiseDataWeights[gaussian] * pNoise.probability noise_probability[k] = kNoiseDataWeights[gaussian] * pNoise.probability
h0_test += noise_probability[k]; // Q27 h0_test += noise_probability[k] // Q27
// Probability under H1, that is, probability of frame being speech. // Probability under H1, that is, probability of frame being speech.
// Value given in Q27 = Q7 * Q20. // Value given in Q27 = Q7 * Q20.
val pSpeech = GaussianProbability(features[channel], self.speech_means[gaussian], self.speech_stds[gaussian]) val pSpeech = GaussianProbability(features[channel], self.speech_means[gaussian], self.speech_stds[gaussian])
speech_probability[k] = kSpeechDataWeights[gaussian] * pSpeech.probability speech_probability[k] = kSpeechDataWeights[gaussian] * pSpeech.probability
deltaS[gaussian] = pSpeech.delta deltaS[gaussian] = pSpeech.delta
h1_test += speech_probability[k]; // Q27 h1_test += speech_probability[k] // Q27
} }
// Calculate the log likelihood ratio: log2(Pr{X|H1} / Pr{X|H1}). // Calculate the log likelihood ratio: log2(Pr{X|H1} / Pr{X|H1}).
@ -461,7 +461,7 @@ fun GmmProbability(self: VadInstT, features: List<Int>, total_power: Int, frame_
// Further, b0 and b1 are independent and on the average the two terms cancel. // Further, b0 and b1 are independent and on the average the two terms cancel.
val shifts_h0 = if (h0_test != 0) NormW32(h0_test) else 31 val shifts_h0 = if (h0_test != 0) NormW32(h0_test) else 31
val shifts_h1 = if (h1_test != 0) NormW32(h1_test) else 31 val shifts_h1 = if (h1_test != 0) NormW32(h1_test) else 31
val log_likelihood_ratio = shifts_h0 - shifts_h1; val log_likelihood_ratio = shifts_h0 - shifts_h1
// Update [sum_log_likelihood_ratios] with spectrum weighting. This is // Update [sum_log_likelihood_ratios] with spectrum weighting. This is
// used for the global VAD decision. // used for the global VAD decision.
@ -480,22 +480,22 @@ fun GmmProbability(self: VadInstT, features: List<Int>, total_power: Int, frame_
// High probability of noise. Assign conditional probabilities for each // High probability of noise. Assign conditional probabilities for each
// Gaussian in the GMM. // Gaussian in the GMM.
val tmp = (noise_probability[0] and 0xFFFFF000u.toInt()) shl 2 // Q29 val tmp = (noise_probability[0] and 0xFFFFF000u.toInt()) shl 2 // Q29
ngprvec[channel] = DivW32W16(tmp, h0); // Q14 ngprvec[channel] = DivW32W16(tmp, h0) // Q14
ngprvec[channel + kNumChannels] = 16384 - ngprvec[channel]; ngprvec[channel + kNumChannels] = 16384 - ngprvec[channel]
} else { } else {
// Low noise probability. Assign conditional probability 1 to the first // Low noise probability. Assign conditional probability 1 to the first
// Gaussian and 0 to the rest (which is already set at initialization). // Gaussian and 0 to the rest (which is already set at initialization).
ngprvec[channel] = 16384; ngprvec[channel] = 16384
} }
// Calculate local speech probabilities used later when updating the GMM. // Calculate local speech probabilities used later when updating the GMM.
val h1 = (h1_test shr 12); // Q15 val h1 = (h1_test shr 12) // Q15
if (h1 > 0) { if (h1 > 0) {
// High probability of speech. Assign conditional probabilities for each // High probability of speech. Assign conditional probabilities for each
// Gaussian in the GMM. Otherwise use the initialized values, i.e., 0. // Gaussian in the GMM. Otherwise use the initialized values, i.e., 0.
val tmp = (speech_probability[0] and 0xFFFFF000u.toInt()) shl 2; // Q29 val tmp = (speech_probability[0] and 0xFFFFF000u.toInt()) shl 2 // Q29
sgprvec[channel] = DivW32W16(tmp, h1) // Q14 sgprvec[channel] = DivW32W16(tmp, h1) // Q14
sgprvec[channel + kNumChannels] = 16384 - sgprvec[channel]; sgprvec[channel + kNumChannels] = 16384 - sgprvec[channel]
} }
} }
@ -515,13 +515,13 @@ fun GmmProbability(self: VadInstT, features: List<Int>, total_power: Int, frame_
for (k in 0 until kNumGaussians) { for (k in 0 until kNumGaussians) {
val gaussian = channel + k * kNumChannels val gaussian = channel + k * kNumChannels
val nmk = self.noise_means[gaussian]; val nmk = self.noise_means[gaussian]
val smk = self.speech_means[gaussian]; val smk = self.speech_means[gaussian]
var nsk = self.noise_stds[gaussian]; var nsk = self.noise_stds[gaussian]
var ssk = self.speech_stds[gaussian]; var ssk = self.speech_stds[gaussian]
// Update noise mean vector if the frame consists of noise only. // Update noise mean vector if the frame consists of noise only.
var nmk2 = nmk; var nmk2 = nmk
if (vadflag == 0) { if (vadflag == 0) {
// deltaN = (x-mu)/sigma^2 // deltaN = (x-mu)/sigma^2
// ngprvec[k] = |noise_probability[k]| / // ngprvec[k] = |noise_probability[k]| /
@ -535,7 +535,7 @@ fun GmmProbability(self: VadInstT, features: List<Int>, total_power: Int, frame_
// Long term correction of the noise mean. // Long term correction of the noise mean.
// Q8 - Q8 = Q8. // Q8 - Q8 = Q8.
val ndelt = (feature_minimum shl 4) - tmp1_s16; val ndelt = (feature_minimum shl 4) - tmp1_s16
// Q7 + (Q8 * Q8) shr 9 = Q7. // Q7 + (Q8 * Q8) shr 9 = Q7.
var nmk3 = nmk2 + ((ndelt * kBackEta) shr 9) var nmk3 = nmk2 + ((ndelt * kBackEta) shr 9)
@ -548,7 +548,7 @@ fun GmmProbability(self: VadInstT, features: List<Int>, total_power: Int, frame_
if (nmk3 > tmp_s16) { if (nmk3 > tmp_s16) {
nmk3 = tmp_s16 nmk3 = tmp_s16
} }
self.noise_means[gaussian] = nmk3; self.noise_means[gaussian] = nmk3
if (vadflag != 0) { if (vadflag != 0) {
// Update speech mean vector: // Update speech mean vector:
@ -561,76 +561,76 @@ fun GmmProbability(self: VadInstT, features: List<Int>, total_power: Int, frame_
// Q14 * Q15 shr 21 = Q8. // Q14 * Q15 shr 21 = Q8.
tmp_s16 = (delt * kSpeechUpdateConst) shr 21 tmp_s16 = (delt * kSpeechUpdateConst) shr 21
// Q7 + (Q8 shr 1) = Q7. With rounding. // Q7 + (Q8 shr 1) = Q7. With rounding.
var smk2 = smk + ((tmp_s16 + 1) shr 1); var smk2 = smk + ((tmp_s16 + 1) shr 1)
// Control that the speech mean does not drift to much. // Control that the speech mean does not drift to much.
val maxmu = maxspe + 640; val maxmu = maxspe + 640
if (smk2 < kMinimumMean[k]) { if (smk2 < kMinimumMean[k]) {
smk2 = kMinimumMean[k]; smk2 = kMinimumMean[k]
} }
if (smk2 > maxmu) { if (smk2 > maxmu) {
smk2 = maxmu; smk2 = maxmu
} }
self.speech_means[gaussian] = smk2; // Q7. self.speech_means[gaussian] = smk2 // Q7.
// (Q7 shr 3) = Q4. With rounding. // (Q7 shr 3) = Q4. With rounding.
tmp_s16 = ((smk + 4) shr 3); tmp_s16 = ((smk + 4) shr 3)
tmp_s16 = features[channel] - tmp_s16; // Q4 tmp_s16 = features[channel] - tmp_s16 // Q4
// (Q11 * Q4 shr 3) = Q12. // (Q11 * Q4 shr 3) = Q12.
var tmp1_s32 = (deltaS[gaussian] * tmp_s16) shr 3; var tmp1_s32 = (deltaS[gaussian] * tmp_s16) shr 3
var tmp2_s32 = tmp1_s32 - 4096; var tmp2_s32 = tmp1_s32 - 4096
tmp_s16 = sgprvec[gaussian] shr 2; tmp_s16 = sgprvec[gaussian] shr 2
// (Q14 shr 2) * Q12 = Q24. // (Q14 shr 2) * Q12 = Q24.
tmp1_s32 = tmp_s16 * tmp2_s32; tmp1_s32 = tmp_s16 * tmp2_s32
tmp2_s32 = tmp1_s32 shr 4; // Q20 tmp2_s32 = tmp1_s32 shr 4 // Q20
// 0.1 * Q20 / Q7 = Q13. // 0.1 * Q20 / Q7 = Q13.
if (tmp2_s32 > 0) { if (tmp2_s32 > 0) {
tmp_s16 = DivW32W16(tmp2_s32, ssk * 10); tmp_s16 = DivW32W16(tmp2_s32, ssk * 10)
} else { } else {
tmp_s16 = DivW32W16(-tmp2_s32, ssk * 10); tmp_s16 = DivW32W16(-tmp2_s32, ssk * 10)
tmp_s16 = -tmp_s16; tmp_s16 = -tmp_s16
} }
// Divide by 4 giving an update factor of 0.025 (= 0.1 / 4). // Divide by 4 giving an update factor of 0.025 (= 0.1 / 4).
// Note that division by 4 equals shift by 2, hence, // Note that division by 4 equals shift by 2, hence,
// (Q13 shr 8) = (Q13 shr 6) / 4 = Q7. // (Q13 shr 8) = (Q13 shr 6) / 4 = Q7.
tmp_s16 += 128; // Rounding. tmp_s16 += 128 // Rounding.
ssk += (tmp_s16 shr 8); ssk += (tmp_s16 shr 8)
if (ssk < kMinStd) { if (ssk < kMinStd) {
ssk = kMinStd; ssk = kMinStd
} }
self.speech_stds[gaussian] = ssk; self.speech_stds[gaussian] = ssk
} else { } else {
// Update GMM variance vectors. // Update GMM variance vectors.
// deltaN * (features[channel] - nmk) - 1 // deltaN * (features[channel] - nmk) - 1
// Q4 - (Q7 shr 3) = Q4. // Q4 - (Q7 shr 3) = Q4.
tmp_s16 = features[channel] - (nmk shr 3); tmp_s16 = features[channel] - (nmk shr 3)
// (Q11 * Q4 shr 3) = Q12. // (Q11 * Q4 shr 3) = Q12.
var tmp1_s32 = (deltaN[gaussian] * tmp_s16) shr 3; var tmp1_s32 = (deltaN[gaussian] * tmp_s16) shr 3
tmp1_s32 -= 4096; tmp1_s32 -= 4096
// (Q14 shr 2) * Q12 = Q24. // (Q14 shr 2) * Q12 = Q24.
tmp_s16 = (ngprvec[gaussian] + 2) shr 2; tmp_s16 = (ngprvec[gaussian] + 2) shr 2
val tmp2_s32 = tmp_s16 * tmp1_s32; val tmp2_s32 = tmp_s16 * tmp1_s32
// Q20 * approx 0.001 (2^-10=0.0009766), hence, // Q20 * approx 0.001 (2^-10=0.0009766), hence,
// (Q24 shr 14) = (Q24 shr 4) / 2^10 = Q20. // (Q24 shr 14) = (Q24 shr 4) / 2^10 = Q20.
tmp1_s32 = tmp2_s32 shr 14; tmp1_s32 = tmp2_s32 shr 14
// Q20 / Q7 = Q13. // Q20 / Q7 = Q13.
if (tmp1_s32 > 0) { if (tmp1_s32 > 0) {
tmp_s16 = DivW32W16(tmp1_s32, nsk); tmp_s16 = DivW32W16(tmp1_s32, nsk)
} else { } else {
tmp_s16 = DivW32W16(-tmp1_s32, nsk); tmp_s16 = DivW32W16(-tmp1_s32, nsk)
tmp_s16 = -tmp_s16; tmp_s16 = -tmp_s16
} }
tmp_s16 += 32; // Rounding tmp_s16 += 32 // Rounding
nsk += tmp_s16 shr 6; // Q13 shr 6 = Q7. nsk += tmp_s16 shr 6 // Q13 shr 6 = Q7.
if (nsk < kMinStd) { if (nsk < kMinStd) {
nsk = kMinStd; nsk = kMinStd
} }
self.noise_stds[gaussian] = nsk; self.noise_stds[gaussian] = nsk
} }
} }
@ -643,9 +643,9 @@ fun GmmProbability(self: VadInstT, features: List<Int>, total_power: Int, frame_
// [diff] = "global" speech mean - "global" noise mean. // [diff] = "global" speech mean - "global" noise mean.
// (Q14 shr 9) - (Q14 shr 9) = Q5. // (Q14 shr 9) - (Q14 shr 9) = Q5.
val diff = (speech_global_mean shr 9) - (noise_global_mean shr 9); val diff = (speech_global_mean shr 9) - (noise_global_mean shr 9)
if (diff < kMinimumDifference[channel]) { if (diff < kMinimumDifference[channel]) {
tmp_s16 = kMinimumDifference[channel] - diff; tmp_s16 = kMinimumDifference[channel] - diff
// [tmp1_s16] = ~0.8 * (kMinimumDifference - diff) in Q7. // [tmp1_s16] = ~0.8 * (kMinimumDifference - diff) in Q7.
// [tmp2_s16] = ~0.2 * (kMinimumDifference - diff) in Q7. // [tmp2_s16] = ~0.2 * (kMinimumDifference - diff) in Q7.
@ -664,53 +664,53 @@ fun GmmProbability(self: VadInstT, features: List<Int>, total_power: Int, frame_
} }
// Control that the speech & noise means do not drift to much. // Control that the speech & noise means do not drift to much.
maxspe = kMaximumSpeech[channel]; maxspe = kMaximumSpeech[channel]
tmp2_s16 = speech_global_mean shr 7 tmp2_s16 = speech_global_mean shr 7
if (tmp2_s16 > maxspe) { if (tmp2_s16 > maxspe) {
// Upper limit of speech model. // Upper limit of speech model.
tmp2_s16 -= maxspe; tmp2_s16 -= maxspe
for (k in 0 until kNumGaussians) { for (k in 0 until kNumGaussians) {
self.speech_means[channel + k * kNumChannels] -= tmp2_s16; self.speech_means[channel + k * kNumChannels] -= tmp2_s16
} }
} }
tmp2_s16 = noise_global_mean shr 7 tmp2_s16 = noise_global_mean shr 7
if (tmp2_s16 > kMaximumNoise[channel]) { if (tmp2_s16 > kMaximumNoise[channel]) {
tmp2_s16 -= kMaximumNoise[channel]; tmp2_s16 -= kMaximumNoise[channel]
for (k in 0 until kNumGaussians) { for (k in 0 until kNumGaussians) {
self.noise_means[channel + k * kNumChannels] -= tmp2_s16; self.noise_means[channel + k * kNumChannels] -= tmp2_s16
} }
} }
} }
self.frame_counter++; self.frame_counter++
} }
// Smooth with respect to transition hysteresis. // Smooth with respect to transition hysteresis.
if (vadflag == 0) { if (vadflag == 0) {
if (self.over_hang > 0) { if (self.over_hang > 0) {
vadflag = 2 + self.over_hang; vadflag = 2 + self.over_hang
self.over_hang--; self.over_hang--
} }
self.num_of_speech = 0; self.num_of_speech = 0
} else { } else {
self.num_of_speech++; self.num_of_speech++
if (self.num_of_speech > kMaxSpeechFrames) { if (self.num_of_speech > kMaxSpeechFrames) {
self.num_of_speech = kMaxSpeechFrames; self.num_of_speech = kMaxSpeechFrames
self.over_hang = self.over_hang_max_2; self.over_hang = self.over_hang_max_2
} else { } else {
self.over_hang = self.over_hang_max_1; self.over_hang = self.over_hang_max_1
} }
} }
return vadflag; return vadflag
} }
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
// webrtc/common_audio/vad/vad_sp.c // webrtc/common_audio/vad/vad_sp.c
val kSmoothingDown = 6553; // 0.2 in Q15. val kSmoothingDown = 6553 // 0.2 in Q15.
val kSmoothingUp = 32439; // 0.99 in Q15. val kSmoothingUp = 32439 // 0.99 in Q15.
// Updates and returns the smoothed feature minimum. As minimum we use the // Updates and returns the smoothed feature minimum. As minimum we use the
// median of the five smallest feature values in a 100 frames long window. // median of the five smallest feature values in a 100 frames long window.
@ -731,9 +731,9 @@ val kSmoothingUp = 32439; // 0.99 in Q15.
// of the five smallest values. // of the five smallest values.
fun FindMinimum(self: VadInstT, feature_value: Int, channel: Int): Int { fun FindMinimum(self: VadInstT, feature_value: Int, channel: Int): Int {
var position = -1 var position = -1
var current_median = 1600; var current_median = 1600
var alpha = 0; var alpha = 0
var tmp32 = 0; var tmp32 = 0
val offset = channel shl 4 val offset = channel shl 4
// Accessor for the age of each value of the [channel] // Accessor for the age of each value of the [channel]
@ -748,21 +748,21 @@ fun FindMinimum(self: VadInstT, feature_value: Int, channel: Int): Int {
inline operator fun set(i: Int, value: Int) { self.low_value_vector[offset + i] = value } inline operator fun set(i: Int, value: Int) { self.low_value_vector[offset + i] = value }
} }
assert(channel < kNumChannels); assert(channel < kNumChannels)
// Each value in [smallest_values] is getting 1 loop older. Update [age], and // Each value in [smallest_values] is getting 1 loop older. Update [age], and
// remove old values. // remove old values.
for (i in 0 until 16) { for (i in 0 until 16) {
if (age[i] != 100) { if (age[i] != 100) {
age[i]++; age[i]++
} else { } else {
// Too old value. Remove from memory and shift larger values downwards. // Too old value. Remove from memory and shift larger values downwards.
for (j in i until 16) { for (j in i until 16) {
smallest_values[j] = smallest_values[j + 1]; smallest_values[j] = smallest_values[j + 1]
age[j] = age[j + 1]; age[j] = age[j + 1]
} }
age[15] = 101; age[15] = 101
smallest_values[15] = 10000; smallest_values[15] = 10000
} }
} }
@ -773,49 +773,49 @@ fun FindMinimum(self: VadInstT, feature_value: Int, channel: Int): Int {
if (feature_value < smallest_values[3]) { if (feature_value < smallest_values[3]) {
if (feature_value < smallest_values[1]) { if (feature_value < smallest_values[1]) {
if (feature_value < smallest_values[0]) { if (feature_value < smallest_values[0]) {
position = 0; position = 0
} else { } else {
position = 1; position = 1
} }
} else if (feature_value < smallest_values[2]) { } else if (feature_value < smallest_values[2]) {
position = 2; position = 2
} else { } else {
position = 3; position = 3
} }
} else if (feature_value < smallest_values[5]) { } else if (feature_value < smallest_values[5]) {
if (feature_value < smallest_values[4]) { if (feature_value < smallest_values[4]) {
position = 4; position = 4
} else { } else {
position = 5; position = 5
} }
} else if (feature_value < smallest_values[6]) { } else if (feature_value < smallest_values[6]) {
position = 6; position = 6
} else { } else {
position = 7; position = 7
} }
} else if (feature_value < smallest_values[15]) { } else if (feature_value < smallest_values[15]) {
if (feature_value < smallest_values[11]) { if (feature_value < smallest_values[11]) {
if (feature_value < smallest_values[9]) { if (feature_value < smallest_values[9]) {
if (feature_value < smallest_values[8]) { if (feature_value < smallest_values[8]) {
position = 8; position = 8
} else { } else {
position = 9; position = 9
} }
} else if (feature_value < smallest_values[10]) { } else if (feature_value < smallest_values[10]) {
position = 10; position = 10
} else { } else {
position = 11; position = 11
} }
} else if (feature_value < smallest_values[13]) { } else if (feature_value < smallest_values[13]) {
if (feature_value < smallest_values[12]) { if (feature_value < smallest_values[12]) {
position = 12; position = 12
} else { } else {
position = 13; position = 13
} }
} else if (feature_value < smallest_values[14]) { } else if (feature_value < smallest_values[14]) {
position = 14; position = 14
} else { } else {
position = 15; position = 15
} }
} }
@ -823,42 +823,42 @@ fun FindMinimum(self: VadInstT, feature_value: Int, channel: Int): Int {
// and shift larger values up. // and shift larger values up.
if (position > -1) { if (position > -1) {
for (i in 15 downTo position + 1) { for (i in 15 downTo position + 1) {
smallest_values[i] = smallest_values[i - 1]; smallest_values[i] = smallest_values[i - 1]
age[i] = age[i - 1]; age[i] = age[i - 1]
} }
smallest_values[position] = feature_value; smallest_values[position] = feature_value
age[position] = 1; age[position] = 1
} }
// Get [current_median]. // Get [current_median].
if (self.frame_counter > 2) { if (self.frame_counter > 2) {
current_median = smallest_values[2]; current_median = smallest_values[2]
} else if (self.frame_counter > 0) { } else if (self.frame_counter > 0) {
current_median = smallest_values[0]; current_median = smallest_values[0]
} }
// Smooth the median value. // Smooth the median value.
if (self.frame_counter > 0) { if (self.frame_counter > 0) {
if (current_median < self.mean_value[channel]) { if (current_median < self.mean_value[channel]) {
alpha = kSmoothingDown; // 0.2 in Q15. alpha = kSmoothingDown // 0.2 in Q15.
} else { } else {
alpha = kSmoothingUp; // 0.99 in Q15. alpha = kSmoothingUp // 0.99 in Q15.
} }
} }
tmp32 = (alpha + 1) * self.mean_value[channel]; tmp32 = (alpha + 1) * self.mean_value[channel]
tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median; tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median
tmp32 += 16384; tmp32 += 16384
self.mean_value[channel] = tmp32 shr 15 self.mean_value[channel] = tmp32 shr 15
return self.mean_value[channel]; return self.mean_value[channel]
} }
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
// webrtc/common_audio/vad/vad_filterbank.c // webrtc/common_audio/vad/vad_filterbank.c
// Constants used in LogOfEnergy(). // Constants used in LogOfEnergy().
val kLogConst = 24660; // 160*log10(2) in Q9. val kLogConst = 24660 // 160*log10(2) in Q9.
val kLogEnergyIntPart = 14336; // 14 in Q10 val kLogEnergyIntPart = 14336 // 14 in Q10
// Coefficients used by HighPassFilter, Q14. // Coefficients used by HighPassFilter, Q14.
val kHpZeroCoefs = intArrayOf(6631, -13262, 6631) val kHpZeroCoefs = intArrayOf(6631, -13262, 6631)
@ -893,15 +893,15 @@ fun HighPassFilter(input: AudioBuffer, filter_state: IntArray): AudioBuffer {
for (i in 0 until input.size) { for (i in 0 until input.size) {
// All-zero section (filter coefficients in Q14). // All-zero section (filter coefficients in Q14).
var tmp32 = kHpZeroCoefs[0] * input[i] var tmp32 = kHpZeroCoefs[0] * input[i]
tmp32 += kHpZeroCoefs[1] * filter_state[0]; tmp32 += kHpZeroCoefs[1] * filter_state[0]
tmp32 += kHpZeroCoefs[2] * filter_state[1]; tmp32 += kHpZeroCoefs[2] * filter_state[1]
filter_state[1] = filter_state[0]; filter_state[1] = filter_state[0]
filter_state[0] = input[i].toInt() filter_state[0] = input[i].toInt()
// All-pole section (filter coefficients in Q14). // All-pole section (filter coefficients in Q14).
tmp32 -= kHpPoleCoefs[1] * filter_state[2]; tmp32 -= kHpPoleCoefs[1] * filter_state[2]
tmp32 -= kHpPoleCoefs[2] * filter_state[3]; tmp32 -= kHpPoleCoefs[2] * filter_state[3]
filter_state[3] = filter_state[2]; filter_state[3] = filter_state[2]
filter_state[2] = tmp32 shr 14 filter_state[2] = tmp32 shr 14
result[i] = filter_state[2].toShort() result[i] = filter_state[2].toShort()
} }
@ -931,8 +931,8 @@ fun AllPassFilter(input: AudioBuffer, filter_coefficient: Int, filter_state: Mut
val tmp32 = state32 + filter_coefficient * input[i] val tmp32 = state32 + filter_coefficient * input[i]
val tmp16 = tmp32 shr 16 // Q(-1) val tmp16 = tmp32 shr 16 // Q(-1)
result[i / 2] = tmp16.toShort() result[i / 2] = tmp16.toShort()
state32 = (input[i] * (1 shl 14)) - filter_coefficient * tmp16; // Q14 state32 = (input[i] * (1 shl 14)) - filter_coefficient * tmp16 // Q14
state32 *= 2; // Q15. state32 *= 2 // Q15.
} }
filter_state.setValue(state32 shr 16) // Q(-1) filter_state.setValue(state32 shr 16) // Q(-1)
@ -989,7 +989,7 @@ fun SplitFilter(input: AudioBuffer, upper_state: MutableInt, lower_state: Mutabl
// [total_energy] <= [kMinEnergy]. // [total_energy] <= [kMinEnergy].
// - log_energy [o] : 10 * log10("energy of [data_in]") given in Q4. // - log_energy [o] : 10 * log10("energy of [data_in]") given in Q4.
fun LogOfEnergy(input: AudioBuffer, offset: Int, total_energy: MutableInt): Int { fun LogOfEnergy(input: AudioBuffer, offset: Int, total_energy: MutableInt): Int {
assert(input.size > 0); assert(input.size > 0)
val energyResult = Energy(input) val energyResult = Energy(input)
// [tot_rshifts] accumulates the number of right shifts performed on [energy]. // [tot_rshifts] accumulates the number of right shifts performed on [energy].
@ -1004,13 +1004,13 @@ fun LogOfEnergy(input: AudioBuffer, offset: Int, total_energy: MutableInt): Int
// By construction, normalizing to 15 bits is equivalent with 17 leading // By construction, normalizing to 15 bits is equivalent with 17 leading
// zeros of an unsigned 32 bit value. // zeros of an unsigned 32 bit value.
val normalizing_rshifts = 17 - NormU32(energy); val normalizing_rshifts = 17 - NormU32(energy)
// In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is // In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is
// (14 shl 10), which is what we initialize [log2_energy] with. For a more // (14 shl 10), which is what we initialize [log2_energy] with. For a more
// detailed derivations, see below. // detailed derivations, see below.
var log2_energy = kLogEnergyIntPart; var log2_energy = kLogEnergyIntPart
tot_rshifts += normalizing_rshifts; tot_rshifts += normalizing_rshifts
// Normalize [energy] to 15 bits. // Normalize [energy] to 15 bits.
// [tot_rshifts] is now the total number of right shifts performed on // [tot_rshifts] is now the total number of right shifts performed on
// [energy] after normalization. This means that [energy] is in // [energy] after normalization. This means that [energy] is in
@ -1048,10 +1048,10 @@ fun LogOfEnergy(input: AudioBuffer, offset: Int, total_energy: MutableInt): Int
var log_energy = (((kLogConst * log2_energy) shr 19) + (tot_rshifts * kLogConst) shr 9) var log_energy = (((kLogConst * log2_energy) shr 19) + (tot_rshifts * kLogConst) shr 9)
if (log_energy < 0) { if (log_energy < 0) {
log_energy = 0; log_energy = 0
} }
log_energy += offset; log_energy += offset
// Update the approximate [total_energy] with the energy of [data_in], if // Update the approximate [total_energy] with the energy of [data_in], if
// [total_energy] has not exceeded [kMinEnergy]. [total_energy] is used as an // [total_energy] has not exceeded [kMinEnergy]. [total_energy] is used as an
@ -1066,7 +1066,7 @@ fun LogOfEnergy(input: AudioBuffer, offset: Int, total_energy: MutableInt): Int
// right shifted [energy] will fit in an Int. In addition, adding the // right shifted [energy] will fit in an Int. In addition, adding the
// value to [total_energy] is wrap around safe as long as // value to [total_energy] is wrap around safe as long as
// [kMinEnergy] < 8192. // [kMinEnergy] < 8192.
total_energy.add((energy shr -tot_rshifts).toInt()); // Q0. total_energy.add((energy shr -tot_rshifts).toInt()) // Q0.
} }
} }
@ -1108,7 +1108,7 @@ fun CalculateFeatures(self: VadInstT, input: AudioBuffer): FeatureResult {
var frequency_band = 0 var frequency_band = 0
val `0 to 4000 Hz` = input val `0 to 4000 Hz` = input
val (`2000 to 4000 Hz`, `0 to 2000 Hz`) = val (`2000 to 4000 Hz`, `0 to 2000 Hz`) =
SplitFilter(`0 to 4000 Hz`, self.upper_state[frequency_band], self.lower_state[frequency_band]); SplitFilter(`0 to 4000 Hz`, self.upper_state[frequency_band], self.lower_state[frequency_band])
// For the upper band (2000 to 4000 Hz) split at 3000 Hz and downsample. // For the upper band (2000 to 4000 Hz) split at 3000 Hz and downsample.
frequency_band = 1 frequency_band = 1
@ -1116,17 +1116,17 @@ fun CalculateFeatures(self: VadInstT, input: AudioBuffer): FeatureResult {
SplitFilter(`2000 to 4000 Hz`, self.upper_state[frequency_band], self.lower_state[frequency_band]) SplitFilter(`2000 to 4000 Hz`, self.upper_state[frequency_band], self.lower_state[frequency_band])
// For the lower band (0 to 2000 Hz) split at 1000 Hz and downsample. // For the lower band (0 to 2000 Hz) split at 1000 Hz and downsample.
frequency_band = 2; frequency_band = 2
val (`1000 to 2000 Hz`, `0 to 1000 Hz`) = val (`1000 to 2000 Hz`, `0 to 1000 Hz`) =
SplitFilter(`0 to 2000 Hz`, self.upper_state[frequency_band], self.lower_state[frequency_band]) SplitFilter(`0 to 2000 Hz`, self.upper_state[frequency_band], self.lower_state[frequency_band])
// For the lower band (0 to 1000 Hz) split at 500 Hz and downsample. // For the lower band (0 to 1000 Hz) split at 500 Hz and downsample.
frequency_band = 3; frequency_band = 3
val (`500 to 1000 Hz`, `0 to 500 Hz`) = val (`500 to 1000 Hz`, `0 to 500 Hz`) =
SplitFilter(`0 to 1000 Hz`, self.upper_state[frequency_band], self.lower_state[frequency_band]); SplitFilter(`0 to 1000 Hz`, self.upper_state[frequency_band], self.lower_state[frequency_band])
// For the lower band (0 t0 500 Hz) split at 250 Hz and downsample. // For the lower band (0 t0 500 Hz) split at 250 Hz and downsample.
frequency_band = 4; frequency_band = 4
val (`250 to 500 Hz`, `0 to 250 Hz`) = val (`250 to 500 Hz`, `0 to 250 Hz`) =
SplitFilter(`0 to 500 Hz`, self.upper_state[frequency_band], self.lower_state[frequency_band]) SplitFilter(`0 to 500 Hz`, self.upper_state[frequency_band], self.lower_state[frequency_band])
@ -1139,7 +1139,7 @@ fun CalculateFeatures(self: VadInstT, input: AudioBuffer): FeatureResult {
val `energy in 1000 to 2000 Hz` = LogOfEnergy(`1000 to 2000 Hz`, kOffsetVector[3], total_energy) val `energy in 1000 to 2000 Hz` = LogOfEnergy(`1000 to 2000 Hz`, kOffsetVector[3], total_energy)
val `energy in 500 to 1000 Hz` = LogOfEnergy(`500 to 1000 Hz`, kOffsetVector[2], total_energy) val `energy in 500 to 1000 Hz` = LogOfEnergy(`500 to 1000 Hz`, kOffsetVector[2], total_energy)
val `energy in 250 to 500 Hz` = LogOfEnergy(`250 to 500 Hz`, kOffsetVector[1], total_energy) val `energy in 250 to 500 Hz` = LogOfEnergy(`250 to 500 Hz`, kOffsetVector[1], total_energy)
val `energy in 50 to 250 Hz` = LogOfEnergy(`80 to 250 Hz`, kOffsetVector[0], total_energy); val `energy in 50 to 250 Hz` = LogOfEnergy(`80 to 250 Hz`, kOffsetVector[0], total_energy)
val features = listOf( val features = listOf(
`energy in 50 to 250 Hz`, `energy in 50 to 250 Hz`,
@ -1179,12 +1179,12 @@ fun CalculateFeatures(self: VadInstT, input: AudioBuffer): FeatureResult {
*/ */
fun CalcVad8khz(inst: VadInstT, speech_frame: AudioBuffer): Int { fun CalcVad8khz(inst: VadInstT, speech_frame: AudioBuffer): Int {
// Get power in the bands // Get power in the bands
val (features, totalEnergy) = CalculateFeatures(inst, speech_frame); val (features, totalEnergy) = CalculateFeatures(inst, speech_frame)
// Make a VAD // Make a VAD
inst.vad = GmmProbability(inst, features, totalEnergy, speech_frame.size); inst.vad = GmmProbability(inst, features, totalEnergy, speech_frame.size)
return inst.vad; return inst.vad
} }
// Calculates a VAD decision for the [audio_frame]. For valid sampling rates // Calculates a VAD decision for the [audio_frame]. For valid sampling rates
@ -1202,6 +1202,6 @@ fun CalcVad8khz(inst: VadInstT, speech_frame: AudioBuffer): Int {
fun ProcessVad(self: VadInstT, fs: Int, audio_frame: AudioBuffer): Boolean { fun ProcessVad(self: VadInstT, fs: Int, audio_frame: AudioBuffer): Boolean {
assert(fs == 8000) assert(fs == 8000)
val vad = CalcVad8khz(self, audio_frame); val vad = CalcVad8khz(self, audio_frame)
return vad != 0 return vad != 0
} }