blob: 47e2d8843ebe848c3bdd297aa009e7993a2fc7f5 [file] [log] [blame]
andrew@webrtc.orgb015cbe2012-10-22 18:19:231/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12/*
13 * This header file includes the descriptions of the core VAD calls.
14 */
15
16#ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
17#define WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
18
pbos@webrtc.orgf24ac592013-05-27 09:49:5819#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
20#include "webrtc/typedefs.h"
andrew@webrtc.orgb015cbe2012-10-22 18:19:2321
22enum { kNumChannels = 6 }; // Number of frequency bands (named channels).
23enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM.
24enum { kTableSize = kNumChannels * kNumGaussians };
25enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal.
26
oprypin0f20d582017-03-09 14:25:0627typedef struct VadInstT_ {
andrew@webrtc.orgb015cbe2012-10-22 18:19:2328 int vad;
29 int32_t downsampling_filter_states[4];
30 WebRtcSpl_State48khzTo8khz state_48_to_8;
31 int16_t noise_means[kTableSize];
32 int16_t speech_means[kTableSize];
33 int16_t noise_stds[kTableSize];
34 int16_t speech_stds[kTableSize];
35 // TODO(bjornv): Change to |frame_count|.
36 int32_t frame_counter;
oprypin0f20d582017-03-09 14:25:0637 int16_t over_hang; // Over Hang
andrew@webrtc.orgb015cbe2012-10-22 18:19:2338 int16_t num_of_speech;
39 // TODO(bjornv): Change to |age_vector|.
40 int16_t index_vector[16 * kNumChannels];
41 int16_t low_value_vector[16 * kNumChannels];
42 // TODO(bjornv): Change to |median|.
43 int16_t mean_value[kNumChannels];
44 int16_t upper_state[5];
45 int16_t lower_state[5];
46 int16_t hp_filter_state[4];
47 int16_t over_hang_max_1[3];
48 int16_t over_hang_max_2[3];
49 int16_t individual[3];
50 int16_t total[3];
51
52 int init_flag;
andrew@webrtc.orgb015cbe2012-10-22 18:19:2353} VadInstT;
54
55// Initializes the core VAD component. The default aggressiveness mode is
56// controlled by |kDefaultMode| in vad_core.c.
57//
58// - self [i/o] : Instance that should be initialized
59//
deadbeef9617a872017-02-26 12:18:1260// returns : 0 (OK), -1 (null pointer in or if the default mode can't be
andrew@webrtc.orgb015cbe2012-10-22 18:19:2361// set)
62int WebRtcVad_InitCore(VadInstT* self);
63
64/****************************************************************************
65 * WebRtcVad_set_mode_core(...)
66 *
67 * This function changes the VAD settings
68 *
69 * Input:
70 * - inst : VAD instance
71 * - mode : Aggressiveness degree
72 * 0 (High quality) - 3 (Highly aggressive)
73 *
74 * Output:
75 * - inst : Changed instance
76 *
77 * Return value : 0 - Ok
78 * -1 - Error
79 */
80
81int WebRtcVad_set_mode_core(VadInstT* self, int mode);
82
83/****************************************************************************
84 * WebRtcVad_CalcVad48khz(...)
andrew@webrtc.org785c2fd2014-04-30 16:44:1385 * WebRtcVad_CalcVad32khz(...)
86 * WebRtcVad_CalcVad16khz(...)
87 * WebRtcVad_CalcVad8khz(...)
andrew@webrtc.orgb015cbe2012-10-22 18:19:2388 *
89 * Calculate probability for active speech and make VAD decision.
90 *
91 * Input:
92 * - inst : Instance that should be initialized
93 * - speech_frame : Input speech frame
94 * - frame_length : Number of input samples
95 *
96 * Output:
97 * - inst : Updated filter states etc.
98 *
99 * Return value : VAD decision
100 * 0 - No active speech
101 * 1-6 - Active speech
102 */
andrew@webrtc.org785c2fd2014-04-30 16:44:13103int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,
Peter Kastinga0ad2482015-08-24 21:52:23104 size_t frame_length);
andrew@webrtc.org785c2fd2014-04-30 16:44:13105int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame,
Peter Kastinga0ad2482015-08-24 21:52:23106 size_t frame_length);
andrew@webrtc.org785c2fd2014-04-30 16:44:13107int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame,
Peter Kastinga0ad2482015-08-24 21:52:23108 size_t frame_length);
andrew@webrtc.org785c2fd2014-04-30 16:44:13109int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame,
Peter Kastinga0ad2482015-08-24 21:52:23110 size_t frame_length);
andrew@webrtc.orgb015cbe2012-10-22 18:19:23111
112#endif // WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_