andrew@webrtc.org | b015cbe | 2012-10-22 18:19:23 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | |
| 12 | /* |
| 13 | * This header file includes the descriptions of the core VAD calls. |
| 14 | */ |
| 15 | |
| 16 | #ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ |
| 17 | #define WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ |
| 18 | |
pbos@webrtc.org | f24ac59 | 2013-05-27 09:49:58 | [diff] [blame] | 19 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" |
| 20 | #include "webrtc/typedefs.h" |
andrew@webrtc.org | b015cbe | 2012-10-22 18:19:23 | [diff] [blame] | 21 | |
| 22 | enum { kNumChannels = 6 }; // Number of frequency bands (named channels). |
| 23 | enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM. |
| 24 | enum { kTableSize = kNumChannels * kNumGaussians }; |
| 25 | enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal. |
| 26 | |
oprypin | 0f20d58 | 2017-03-09 14:25:06 | [diff] [blame] | 27 | typedef struct VadInstT_ { |
andrew@webrtc.org | b015cbe | 2012-10-22 18:19:23 | [diff] [blame] | 28 | int vad; |
| 29 | int32_t downsampling_filter_states[4]; |
| 30 | WebRtcSpl_State48khzTo8khz state_48_to_8; |
| 31 | int16_t noise_means[kTableSize]; |
| 32 | int16_t speech_means[kTableSize]; |
| 33 | int16_t noise_stds[kTableSize]; |
| 34 | int16_t speech_stds[kTableSize]; |
| 35 | // TODO(bjornv): Change to |frame_count|. |
| 36 | int32_t frame_counter; |
oprypin | 0f20d58 | 2017-03-09 14:25:06 | [diff] [blame] | 37 | int16_t over_hang; // Over Hang |
andrew@webrtc.org | b015cbe | 2012-10-22 18:19:23 | [diff] [blame] | 38 | int16_t num_of_speech; |
| 39 | // TODO(bjornv): Change to |age_vector|. |
| 40 | int16_t index_vector[16 * kNumChannels]; |
| 41 | int16_t low_value_vector[16 * kNumChannels]; |
| 42 | // TODO(bjornv): Change to |median|. |
| 43 | int16_t mean_value[kNumChannels]; |
| 44 | int16_t upper_state[5]; |
| 45 | int16_t lower_state[5]; |
| 46 | int16_t hp_filter_state[4]; |
| 47 | int16_t over_hang_max_1[3]; |
| 48 | int16_t over_hang_max_2[3]; |
| 49 | int16_t individual[3]; |
| 50 | int16_t total[3]; |
| 51 | |
| 52 | int init_flag; |
andrew@webrtc.org | b015cbe | 2012-10-22 18:19:23 | [diff] [blame] | 53 | } VadInstT; |
| 54 | |
| 55 | // Initializes the core VAD component. The default aggressiveness mode is |
| 56 | // controlled by |kDefaultMode| in vad_core.c. |
| 57 | // |
| 58 | // - self [i/o] : Instance that should be initialized |
| 59 | // |
deadbeef | 9617a87 | 2017-02-26 12:18:12 | [diff] [blame] | 60 | // returns : 0 (OK), -1 (null pointer in or if the default mode can't be |
andrew@webrtc.org | b015cbe | 2012-10-22 18:19:23 | [diff] [blame] | 61 | // set) |
| 62 | int WebRtcVad_InitCore(VadInstT* self); |
| 63 | |
| 64 | /**************************************************************************** |
| 65 | * WebRtcVad_set_mode_core(...) |
| 66 | * |
| 67 | * This function changes the VAD settings |
| 68 | * |
| 69 | * Input: |
| 70 | * - inst : VAD instance |
| 71 | * - mode : Aggressiveness degree |
| 72 | * 0 (High quality) - 3 (Highly aggressive) |
| 73 | * |
| 74 | * Output: |
| 75 | * - inst : Changed instance |
| 76 | * |
| 77 | * Return value : 0 - Ok |
| 78 | * -1 - Error |
| 79 | */ |
| 80 | |
| 81 | int WebRtcVad_set_mode_core(VadInstT* self, int mode); |
| 82 | |
| 83 | /**************************************************************************** |
| 84 | * WebRtcVad_CalcVad48khz(...) |
andrew@webrtc.org | 785c2fd | 2014-04-30 16:44:13 | [diff] [blame] | 85 | * WebRtcVad_CalcVad32khz(...) |
| 86 | * WebRtcVad_CalcVad16khz(...) |
| 87 | * WebRtcVad_CalcVad8khz(...) |
andrew@webrtc.org | b015cbe | 2012-10-22 18:19:23 | [diff] [blame] | 88 | * |
| 89 | * Calculate probability for active speech and make VAD decision. |
| 90 | * |
| 91 | * Input: |
| 92 | * - inst : Instance that should be initialized |
| 93 | * - speech_frame : Input speech frame |
| 94 | * - frame_length : Number of input samples |
| 95 | * |
| 96 | * Output: |
| 97 | * - inst : Updated filter states etc. |
| 98 | * |
| 99 | * Return value : VAD decision |
| 100 | * 0 - No active speech |
| 101 | * 1-6 - Active speech |
| 102 | */ |
andrew@webrtc.org | 785c2fd | 2014-04-30 16:44:13 | [diff] [blame] | 103 | int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, |
Peter Kasting | a0ad248 | 2015-08-24 21:52:23 | [diff] [blame] | 104 | size_t frame_length); |
andrew@webrtc.org | 785c2fd | 2014-04-30 16:44:13 | [diff] [blame] | 105 | int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, |
Peter Kasting | a0ad248 | 2015-08-24 21:52:23 | [diff] [blame] | 106 | size_t frame_length); |
andrew@webrtc.org | 785c2fd | 2014-04-30 16:44:13 | [diff] [blame] | 107 | int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, |
Peter Kasting | a0ad248 | 2015-08-24 21:52:23 | [diff] [blame] | 108 | size_t frame_length); |
andrew@webrtc.org | 785c2fd | 2014-04-30 16:44:13 | [diff] [blame] | 109 | int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, |
Peter Kasting | a0ad248 | 2015-08-24 21:52:23 | [diff] [blame] | 110 | size_t frame_length); |
andrew@webrtc.org | b015cbe | 2012-10-22 18:19:23 | [diff] [blame] | 111 | |
| 112 | #endif // WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ |