blob: dd484be4f1bc00785a754e02edf127a0603cfe78 [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:251/*
andrew@webrtc.org648af742012-02-08 01:57:292 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
niklase@google.com470e71d2011-07-07 08:21:253 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 04:47:3111#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
12#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
niklase@google.com470e71d2011-07-07 08:21:2513
Alejandro Luebscb3f9bd2015-10-30 01:21:3414// MSVC++ requires this to be set before any other includes to get M_PI.
Patrik Höglund3ff90f12017-12-12 13:41:5315#ifndef _USE_MATH_DEFINES
Alejandro Luebscb3f9bd2015-10-30 01:21:3416#define _USE_MATH_DEFINES
Patrik Höglund3ff90f12017-12-12 13:41:5317#endif
Alejandro Luebscb3f9bd2015-10-30 01:21:3418
19#include <math.h>
andrew@webrtc.orgd72b3d62012-11-15 21:46:0620#include <stddef.h> // size_t
Yves Gerey665174f2018-06-19 13:03:0521#include <stdio.h> // FILE
peah8cee56f2017-08-25 05:36:5322#include <string.h>
Jonas Olssona4d87372019-07-05 17:08:3323
aluebs@webrtc.orgfb7a0392015-01-05 21:58:5824#include <vector>
ajm@google.com22e65152011-07-18 18:03:0125
Danil Chapovalov1ecf29c2024-01-09 10:52:1026#include "absl/base/nullability.h"
Ali Tofigh1fa87c42022-07-25 20:07:0827#include "absl/strings/string_view.h"
Danil Chapovalovdb9f7ab2018-06-19 08:50:1128#include "absl/types/optional.h"
Sam Zackrissonab866a22020-05-07 11:07:4929#include "api/array_view.h"
Gustaf Ullbergbffa3002018-02-14 14:12:0030#include "api/audio/echo_canceller3_config.h"
Gustaf Ullbergfd4ce502018-02-15 09:09:0931#include "api/audio/echo_control.h"
Harald Alvestrand78f905e2023-11-02 14:09:2632#include "api/ref_count.h"
Mirko Bonadeid9708072019-01-25 19:26:4833#include "api/scoped_refptr.h"
Danil Chapovalov1ecf29c2024-01-09 10:52:1034#include "api/task_queue/task_queue_base.h"
Ivo Creusen56d460902017-11-24 16:29:5935#include "modules/audio_processing/include/audio_processing_statistics.h"
Mirko Bonadei92ea95e2017-09-15 04:47:3136#include "rtc_base/arraysize.h"
Per Åhgren09e9a832020-05-11 09:03:4737#include "rtc_base/system/file_wrapper.h"
Mirko Bonadei3d255302018-10-11 08:50:4538#include "rtc_base/system/rtc_export.h"
niklase@google.com470e71d2011-07-07 08:21:2539
niklase@google.com470e71d2011-07-07 08:21:2540namespace webrtc {
41
aleloi868f32f2017-05-23 14:20:0542class AecDump;
Sam Zackrisson0beac582017-09-25 10:04:0243class AudioBuffer;
Michael Graczykdfa36052015-03-25 23:37:2744
Michael Graczyk86c6d332015-07-23 18:41:3945class StreamConfig;
46class ProcessingConfig;
47
Ivo Creusen09fa4b02018-01-11 15:08:5448class EchoDetector;
Valeriia Nemychnikovaf06eb572018-08-29 08:37:0949class CustomAudioAnalyzer;
Alex Loiko5825aa62017-12-18 15:02:4050class CustomProcessing;
niklase@google.com470e71d2011-07-07 08:21:2551
52// The Audio Processing Module (APM) provides a collection of voice processing
53// components designed for real-time communications software.
54//
55// APM operates on two audio streams on a frame-by-frame basis. Frames of the
56// primary stream, on which all processing is applied, are passed to
Artem Titov0b489302021-07-28 18:50:0357// `ProcessStream()`. Frames of the reverse direction stream are passed to
58// `ProcessReverseStream()`. On the client-side, this will typically be the
aluebsb0319552016-03-18 03:39:5359// near-end (capture) and far-end (render) streams, respectively. APM should be
60// placed in the signal chain as close to the audio hardware abstraction layer
61// (HAL) as possible.
niklase@google.com470e71d2011-07-07 08:21:2562//
63// On the server-side, the reverse stream will normally not be used, with
64// processing occurring on each incoming stream.
65//
66// Component interfaces follow a similar pattern and are accessed through
67// corresponding getters in APM. All components are disabled at create-time,
68// with default settings that are recommended for most situations. New settings
69// can be applied without enabling a component. Enabling a component triggers
70// memory allocation and initialization to allow it to start processing the
71// streams.
72//
73// Thread safety is provided with the following assumptions to reduce locking
74// overhead:
75// 1. The stream getters and setters are called from the same thread as
76// ProcessStream(). More precisely, stream functions are never called
77// concurrently with ProcessStream().
78// 2. Parameter getters are never called concurrently with the corresponding
79// setter.
80//
Sam Zackrisson3bd444f2022-08-03 12:37:0081// APM accepts only linear PCM audio data in chunks of ~10 ms (see
Sam Zackrisson5dd54822022-11-17 10:26:5882// AudioProcessing::GetFrameSize() for details) and sample rates ranging from
83// 8000 Hz to 384000 Hz. The int16 interfaces use interleaved data, while the
84// float interfaces use deinterleaved data.
niklase@google.com470e71d2011-07-07 08:21:2585//
86// Usage example, omitting error checking:
Sam Zackrisson5dd54822022-11-17 10:26:5887// rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
niklase@google.com470e71d2011-07-07 08:21:2588//
peah88ac8532016-09-12 23:47:2589// AudioProcessing::Config config;
Sam Zackrissoncdf0e6d2018-09-17 09:05:1790// config.echo_canceller.enabled = true;
91// config.echo_canceller.mobile_mode = false;
Sam Zackrisson41478c72019-10-15 08:10:2692//
93// config.gain_controller1.enabled = true;
94// config.gain_controller1.mode =
95// AudioProcessing::Config::GainController1::kAdaptiveAnalog;
96// config.gain_controller1.analog_level_minimum = 0;
97// config.gain_controller1.analog_level_maximum = 255;
98//
Sam Zackrissonab1aee02018-03-05 14:59:0699// config.gain_controller2.enabled = true;
Sam Zackrisson41478c72019-10-15 08:10:26100//
101// config.high_pass_filter.enabled = true;
102//
peah88ac8532016-09-12 23:47:25103// apm->ApplyConfig(config)
104//
niklase@google.com470e71d2011-07-07 08:21:25105// // Start a voice call...
106//
107// // ... Render frame arrives bound for the audio HAL ...
aluebsb0319552016-03-18 03:39:53108// apm->ProcessReverseStream(render_frame);
niklase@google.com470e71d2011-07-07 08:21:25109//
110// // ... Capture frame arrives from the audio HAL ...
111// // Call required set_stream_ functions.
112// apm->set_stream_delay_ms(delay_ms);
Sam Zackrisson41478c72019-10-15 08:10:26113// apm->set_stream_analog_level(analog_level);
niklase@google.com470e71d2011-07-07 08:21:25114//
115// apm->ProcessStream(capture_frame);
116//
117// // Call required stream_ functions.
Sam Zackrisson41478c72019-10-15 08:10:26118// analog_level = apm->recommended_stream_analog_level();
niklase@google.com470e71d2011-07-07 08:21:25119// has_voice = apm->stream_has_voice();
120//
Hua, Chunboe61a40e2021-01-08 08:34:49121// // Repeat render and capture processing for the duration of the call...
niklase@google.com470e71d2011-07-07 08:21:25122// // Start a new call...
123// apm->Initialize();
124//
125// // Close the application...
Sam Zackrisson5dd54822022-11-17 10:26:58126// apm.reset();
niklase@google.com470e71d2011-07-07 08:21:25127//
Harald Alvestrand78f905e2023-11-02 14:09:26128class RTC_EXPORT AudioProcessing : public RefCountInterface {
niklase@google.com470e71d2011-07-07 08:21:25129 public:
peah88ac8532016-09-12 23:47:25130 // The struct below constitutes the new parameter scheme for the audio
131 // processing. It is being introduced gradually and until it is fully
132 // introduced, it is prone to change.
133 // TODO(peah): Remove this comment once the new config scheme is fully rolled
134 // out.
135 //
136 // The parameters and behavior of the audio processing module are controlled
137 // by changing the default values in the AudioProcessing::Config struct.
138 // The config is applied by passing the struct to the ApplyConfig method.
Sam Zackrissonf0d1c032019-03-27 12:28:08139 //
140 // This config is intended to be used during setup, and to enable/disable
141 // top-level processing effects. Use during processing may cause undesired
142 // submodule resets, affecting the audio quality. Use the RuntimeSetting
143 // construct for runtime configuration.
Mirko Bonadeid4002a72019-11-12 19:11:48144 struct RTC_EXPORT Config {
Per Åhgrenfcbe4072019-09-14 22:27:58145 // Sets the properties of the audio processing pipeline.
Mirko Bonadeid4002a72019-11-12 19:11:48146 struct RTC_EXPORT Pipeline {
Alessio Bazzica504bd592022-12-01 12:26:26147 // Ways to downmix a multi-channel track to mono.
148 enum class DownmixMethod {
149 kAverageChannels, // Average across channels.
150 kUseFirstChannel // Use the first channel.
151 };
152
Per Åhgrenfcbe4072019-09-14 22:27:58153 // Maximum allowed processing rate used internally. May only be set to
Per Åhgren68c225d2021-01-21 22:03:32154 // 32000 or 48000 and any differing values will be treated as 48000.
155 int maximum_internal_processing_rate = 48000;
Per Åhgrene14cb992019-11-27 08:34:22156 // Allow multi-channel processing of render audio.
157 bool multi_channel_render = false;
158 // Allow multi-channel processing of capture audio when AEC3 is active
159 // or a custom AEC is injected..
160 bool multi_channel_capture = false;
Alessio Bazzica504bd592022-12-01 12:26:26161 // Indicates how to downmix multi-channel capture audio to mono (when
162 // needed).
163 DownmixMethod capture_downmix_method = DownmixMethod::kAverageChannels;
Per Åhgrenfcbe4072019-09-14 22:27:58164 } pipeline;
165
Sam Zackrisson23513132019-01-11 14:10:32166 // Enabled the pre-amplifier. It amplifies the capture signal
167 // before any other processing is done.
Per Åhgrendb5d7282021-03-15 16:31:04168 // TODO(webrtc:5298): Deprecate and use the pre-gain functionality in
169 // capture_level_adjustment instead.
Sam Zackrisson23513132019-01-11 14:10:32170 struct PreAmplifier {
171 bool enabled = false;
Alessio Bazzica841d74e2021-03-31 13:04:03172 float fixed_gain_factor = 1.0f;
Sam Zackrisson23513132019-01-11 14:10:32173 } pre_amplifier;
174
Per Åhgrendb5d7282021-03-15 16:31:04175 // Functionality for general level adjustment in the capture pipeline. This
176 // should not be used together with the legacy PreAmplifier functionality.
177 struct CaptureLevelAdjustment {
178 bool operator==(const CaptureLevelAdjustment& rhs) const;
179 bool operator!=(const CaptureLevelAdjustment& rhs) const {
180 return !(*this == rhs);
181 }
182 bool enabled = false;
183 // The `pre_gain_factor` scales the signal before any processing is done.
Alessio Bazzica841d74e2021-03-31 13:04:03184 float pre_gain_factor = 1.0f;
Per Åhgrendb5d7282021-03-15 16:31:04185 // The `post_gain_factor` scales the signal after all processing is done.
Alessio Bazzica841d74e2021-03-31 13:04:03186 float post_gain_factor = 1.0f;
Per Åhgrendb5d7282021-03-15 16:31:04187 struct AnalogMicGainEmulation {
188 bool operator==(const AnalogMicGainEmulation& rhs) const;
189 bool operator!=(const AnalogMicGainEmulation& rhs) const {
190 return !(*this == rhs);
191 }
192 bool enabled = false;
193 // Initial analog gain level to use for the emulated analog gain. Must
194 // be in the range [0...255].
195 int initial_level = 255;
196 } analog_mic_gain_emulation;
197 } capture_level_adjustment;
198
Sam Zackrisson23513132019-01-11 14:10:32199 struct HighPassFilter {
200 bool enabled = false;
Per Åhgrenc0424252019-12-10 12:04:15201 bool apply_in_full_band = true;
Sam Zackrisson23513132019-01-11 14:10:32202 } high_pass_filter;
203
Sam Zackrisson8b5d2cc2018-07-27 11:27:23204 struct EchoCanceller {
205 bool enabled = false;
206 bool mobile_mode = false;
Per Åhgrenc20a19c2019-11-13 10:12:29207 bool export_linear_aec_output = false;
Per Åhgrenb8106462019-12-04 07:34:12208 // Enforce the highpass filter to be on (has no effect for the mobile
209 // mode).
Per Åhgrenbcce4532019-12-03 12:52:40210 bool enforce_high_pass_filtering = true;
Sam Zackrisson8b5d2cc2018-07-27 11:27:23211 } echo_canceller;
212
Sam Zackrisson23513132019-01-11 14:10:32213 // Enables background noise suppression.
214 struct NoiseSuppression {
peah8271d042016-11-22 15:24:52215 bool enabled = false;
Sam Zackrisson23513132019-01-11 14:10:32216 enum Level { kLow, kModerate, kHigh, kVeryHigh };
217 Level level = kModerate;
Per Åhgren2e8e1c62019-12-19 23:42:22218 bool analyze_linear_aec_output_when_available = false;
Sam Zackrisson23513132019-01-11 14:10:32219 } noise_suppression;
peahe0eae3c2016-12-14 09:16:23220
Per Åhgrenc0734712020-01-02 14:15:36221 // Enables transient suppression.
222 struct TransientSuppression {
223 bool enabled = false;
224 } transient_suppression;
225
Sam Zackrissonf0d1c032019-03-27 12:28:08226 // Enables automatic gain control (AGC) functionality.
227 // The automatic gain control (AGC) component brings the signal to an
228 // appropriate range. This is done by applying a digital gain directly and,
229 // in the analog mode, prescribing an analog gain to be applied at the audio
230 // HAL.
231 // Recommended to be enabled on the client-side.
Alessio Bazzicadfc11d52021-05-07 09:58:11232 struct RTC_EXPORT GainController1 {
Alessio Bazzica3438a932020-10-14 10:47:50233 bool operator==(const GainController1& rhs) const;
234 bool operator!=(const GainController1& rhs) const {
235 return !(*this == rhs);
236 }
237
Sam Zackrissonf0d1c032019-03-27 12:28:08238 bool enabled = false;
239 enum Mode {
240 // Adaptive mode intended for use if an analog volume control is
241 // available on the capture device. It will require the user to provide
242 // coupling between the OS mixer controls and AGC through the
243 // stream_analog_level() functions.
244 // It consists of an analog gain prescription for the audio device and a
245 // digital compression stage.
246 kAdaptiveAnalog,
247 // Adaptive mode intended for situations in which an analog volume
248 // control is unavailable. It operates in a similar fashion to the
249 // adaptive analog mode, but with scaling instead applied in the digital
250 // domain. As with the analog mode, it additionally uses a digital
251 // compression stage.
252 kAdaptiveDigital,
253 // Fixed mode which enables only the digital compression stage also used
254 // by the two adaptive modes.
255 // It is distinguished from the adaptive modes by considering only a
256 // short time-window of the input signal. It applies a fixed gain
257 // through most of the input level range, and compresses (gradually
258 // reduces gain with increasing level) the input signal at higher
259 // levels. This mode is preferred on embedded devices where the capture
260 // signal level is predictable, so that a known gain can be applied.
261 kFixedDigital
262 };
263 Mode mode = kAdaptiveAnalog;
264 // Sets the target peak level (or envelope) of the AGC in dBFs (decibels
265 // from digital full-scale). The convention is to use positive values. For
266 // instance, passing in a value of 3 corresponds to -3 dBFs, or a target
267 // level 3 dB below full-scale. Limited to [0, 31].
268 int target_level_dbfs = 3;
269 // Sets the maximum gain the digital compression stage may apply, in dB. A
270 // higher number corresponds to greater compression, while a value of 0
271 // will leave the signal uncompressed. Limited to [0, 90].
272 // For updates after APM setup, use a RuntimeSetting instead.
273 int compression_gain_db = 9;
274 // When enabled, the compression stage will hard limit the signal to the
275 // target level. Otherwise, the signal will be compressed but not limited
276 // above the target level.
277 bool enable_limiter = true;
Per Åhgren0695df12020-01-13 13:43:13278
279 // Enables the analog gain controller functionality.
280 struct AnalogGainController {
281 bool enabled = true;
Alessio Bazzica7afd6982022-10-13 15:15:36282 // TODO(bugs.webrtc.org/7494): Deprecated. Stop using and remove.
283 int startup_min_volume = 0;
Per Åhgren0695df12020-01-13 13:43:13284 // Lowest analog microphone level that will be applied in response to
285 // clipping.
Alessio Bazzica488f6692022-10-13 11:06:05286 int clipped_level_min = 70;
Alessio Bazzica866caeb2022-07-19 10:18:38287 // If true, an adaptive digital gain is applied.
Per Åhgren0695df12020-01-13 13:43:13288 bool enable_digital_adaptive = true;
Hanna Silenb8dc7fa2021-05-20 15:37:56289 // Amount the microphone level is lowered with every clipping event.
290 // Limited to (0, 255].
291 int clipped_level_step = 15;
292 // Proportion of clipped samples required to declare a clipping event.
293 // Limited to (0.f, 1.f).
294 float clipped_ratio_threshold = 0.1f;
295 // Time in frames to wait after a clipping event before checking again.
296 // Limited to values higher than 0.
297 int clipped_wait_frames = 300;
Hanna Silena43953a2021-06-02 15:13:24298
299 // Enables clipping prediction functionality.
300 struct ClippingPredictor {
301 bool enabled = false;
302 enum Mode {
Alessio Bazzicab237a872021-06-11 10:37:54303 // Clipping event prediction mode with fixed step estimation.
Hanna Silena43953a2021-06-02 15:13:24304 kClippingEventPrediction,
Alessio Bazzicab237a872021-06-11 10:37:54305 // Clipped peak estimation mode with adaptive step estimation.
Hanna Silena43953a2021-06-02 15:13:24306 kAdaptiveStepClippingPeakPrediction,
Alessio Bazzicab237a872021-06-11 10:37:54307 // Clipped peak estimation mode with fixed step estimation.
Hanna Silena43953a2021-06-02 15:13:24308 kFixedStepClippingPeakPrediction,
309 };
310 Mode mode = kClippingEventPrediction;
Alessio Bazzicab237a872021-06-11 10:37:54311 // Number of frames in the sliding analysis window.
Hanna Silena43953a2021-06-02 15:13:24312 int window_length = 5;
Alessio Bazzicab237a872021-06-11 10:37:54313 // Number of frames in the sliding reference window.
Hanna Silena43953a2021-06-02 15:13:24314 int reference_window_length = 5;
Alessio Bazzicab237a872021-06-11 10:37:54315 // Reference window delay (unit: number of frames).
Hanna Silena43953a2021-06-02 15:13:24316 int reference_window_delay = 5;
Alessio Bazzicab237a872021-06-11 10:37:54317 // Clipping prediction threshold (dBFS).
Hanna Silena43953a2021-06-02 15:13:24318 float clipping_threshold = -1.0f;
319 // Crest factor drop threshold (dB).
320 float crest_factor_margin = 3.0f;
Alessio Bazzica42dacda2021-06-17 15:18:46321 // If true, the recommended clipped level step is used to modify the
322 // analog gain. Otherwise, the predictor runs without affecting the
323 // analog gain.
324 bool use_predicted_step = true;
Hanna Silena43953a2021-06-02 15:13:24325 } clipping_predictor;
Per Åhgren0695df12020-01-13 13:43:13326 } analog_gain_controller;
Sam Zackrissonf0d1c032019-03-27 12:28:08327 } gain_controller1;
328
Alessio Bazzica4366c542022-12-05 15:31:16329 // Parameters for AGC2, an Automatic Gain Control (AGC) sub-module which
330 // replaces the AGC sub-module parametrized by `gain_controller1`.
331 // AGC2 brings the captured audio signal to the desired level by combining
332 // three different controllers (namely, input volume controller, adapative
333 // digital controller and fixed digital controller) and a limiter.
334 // TODO(bugs.webrtc.org:7494): Name `GainController` when AGC1 removed.
Alessio Bazzicadfc11d52021-05-07 09:58:11335 struct RTC_EXPORT GainController2 {
Alessio Bazzica3438a932020-10-14 10:47:50336 bool operator==(const GainController2& rhs) const;
337 bool operator!=(const GainController2& rhs) const {
338 return !(*this == rhs);
339 }
340
Alessio Bazzica4366c542022-12-05 15:31:16341 // AGC2 must be created if and only if `enabled` is true.
alessiob3ec96df2017-05-22 13:57:06342 bool enabled = false;
Alessio Bazzica4366c542022-12-05 15:31:16343
344 // Parameters for the input volume controller, which adjusts the input
345 // volume applied when the audio is captured (e.g., microphone volume on
346 // a soundcard, input volume on HAL).
347 struct InputVolumeController {
348 bool operator==(const InputVolumeController& rhs) const;
349 bool operator!=(const InputVolumeController& rhs) const {
350 return !(*this == rhs);
351 }
352 bool enabled = false;
353 } input_volume_controller;
354
355 // Parameters for the adaptive digital controller, which adjusts and
356 // applies a digital gain after echo cancellation and after noise
357 // suppression.
Alessio Bazzicadfc11d52021-05-07 09:58:11358 struct RTC_EXPORT AdaptiveDigital {
Alessio Bazzicaa2efd152021-04-29 14:17:49359 bool operator==(const AdaptiveDigital& rhs) const;
360 bool operator!=(const AdaptiveDigital& rhs) const {
361 return !(*this == rhs);
362 }
Alessio Bazzica8da7b352018-11-21 09:50:58363 bool enabled = false;
Alessio Bazzicaa850e6c2021-10-04 11:35:55364 float headroom_db = 6.0f;
Alessio Bazzicaa850e6c2021-10-04 11:35:55365 float max_gain_db = 30.0f;
366 float initial_gain_db = 8.0f;
Alessio Bazzica841d74e2021-03-31 13:04:03367 float max_gain_change_db_per_second = 3.0f;
Alessio Bazzica980c4602021-04-14 17:09:17368 float max_output_noise_level_dbfs = -50.0f;
Alessio Bazzica1e2542f2018-11-13 13:44:15369 } adaptive_digital;
Hanna Silen9f06ef12022-11-01 16:17:54370
Alessio Bazzica4366c542022-12-05 15:31:16371 // Parameters for the fixed digital controller, which applies a fixed
372 // digital gain after the adaptive digital controller and before the
373 // limiter.
374 struct FixedDigital {
375 // By setting `gain_db` to a value greater than zero, the limiter can be
376 // turned into a compressor that first applies a fixed gain.
377 float gain_db = 0.0f;
378 } fixed_digital;
alessiob3ec96df2017-05-22 13:57:06379 } gain_controller2;
peah8cee56f2017-08-25 05:36:53380
Artem Titov59bbd652019-08-02 09:31:37381 std::string ToString() const;
peah88ac8532016-09-12 23:47:25382 };
383
Alessio Bazzicac054e782018-04-16 10:10:09384 // Specifies the properties of a setting to be passed to AudioProcessing at
385 // runtime.
386 class RuntimeSetting {
387 public:
Alex Loiko73ec0192018-05-15 08:52:28388 enum class Type {
389 kNotSpecified,
390 kCapturePreGain,
Sam Zackrissonf0d1c032019-03-27 12:28:08391 kCaptureCompressionGain,
Per Åhgren6ee75fd2019-04-26 09:33:37392 kCaptureFixedPostGain,
Fredrik Hernqvistca362852019-05-10 13:50:02393 kPlayoutVolumeChange,
Alessio Bazzica7c19a702019-11-07 12:22:00394 kCustomRenderProcessingRuntimeSetting,
Per Åhgren552d3e32020-08-12 06:46:47395 kPlayoutAudioDeviceChange,
Per Åhgrendb5d7282021-03-15 16:31:04396 kCapturePostGain,
Per Åhgren552d3e32020-08-12 06:46:47397 kCaptureOutputUsed
Alessio Bazzica7c19a702019-11-07 12:22:00398 };
399
400 // Play-out audio device properties.
401 struct PlayoutAudioDeviceInfo {
402 int id; // Identifies the audio device.
403 int max_volume; // Maximum play-out volume.
Alex Loiko73ec0192018-05-15 08:52:28404 };
Alessio Bazzicac054e782018-04-16 10:10:09405
Alessio Bazzica841d74e2021-03-31 13:04:03406 RuntimeSetting() : type_(Type::kNotSpecified), value_(0.0f) {}
Alessio Bazzicac054e782018-04-16 10:10:09407 ~RuntimeSetting() = default;
408
409 static RuntimeSetting CreateCapturePreGain(float gain) {
Alessio Bazzicac054e782018-04-16 10:10:09410 return {Type::kCapturePreGain, gain};
411 }
412
Per Åhgrendb5d7282021-03-15 16:31:04413 static RuntimeSetting CreateCapturePostGain(float gain) {
414 return {Type::kCapturePostGain, gain};
415 }
416
Sam Zackrissonf0d1c032019-03-27 12:28:08417 // Corresponds to Config::GainController1::compression_gain_db, but for
418 // runtime configuration.
419 static RuntimeSetting CreateCompressionGainDb(int gain_db) {
420 RTC_DCHECK_GE(gain_db, 0);
421 RTC_DCHECK_LE(gain_db, 90);
422 return {Type::kCaptureCompressionGain, static_cast<float>(gain_db)};
423 }
424
Per Åhgren6ee75fd2019-04-26 09:33:37425 // Corresponds to Config::GainController2::fixed_digital::gain_db, but for
426 // runtime configuration.
427 static RuntimeSetting CreateCaptureFixedPostGain(float gain_db) {
Alessio Bazzica841d74e2021-03-31 13:04:03428 RTC_DCHECK_GE(gain_db, 0.0f);
429 RTC_DCHECK_LE(gain_db, 90.0f);
Per Åhgren6ee75fd2019-04-26 09:33:37430 return {Type::kCaptureFixedPostGain, gain_db};
431 }
432
Alessio Bazzica7c19a702019-11-07 12:22:00433 // Creates a runtime setting to notify play-out (aka render) audio device
434 // changes.
435 static RuntimeSetting CreatePlayoutAudioDeviceChange(
436 PlayoutAudioDeviceInfo audio_device) {
437 return {Type::kPlayoutAudioDeviceChange, audio_device};
438 }
439
440 // Creates a runtime setting to notify play-out (aka render) volume changes.
Artem Titov0b489302021-07-28 18:50:03441 // `volume` is the unnormalized volume, the maximum of which
Fredrik Hernqvistca362852019-05-10 13:50:02442 static RuntimeSetting CreatePlayoutVolumeChange(int volume) {
443 return {Type::kPlayoutVolumeChange, volume};
444 }
445
Alex Loiko73ec0192018-05-15 08:52:28446 static RuntimeSetting CreateCustomRenderSetting(float payload) {
447 return {Type::kCustomRenderProcessingRuntimeSetting, payload};
448 }
449
Per Åhgren652ada52021-03-03 10:52:44450 static RuntimeSetting CreateCaptureOutputUsedSetting(
451 bool capture_output_used) {
452 return {Type::kCaptureOutputUsed, capture_output_used};
Per Åhgren552d3e32020-08-12 06:46:47453 }
454
Alessio Bazzicac054e782018-04-16 10:10:09455 Type type() const { return type_; }
Alessio Bazzica7c19a702019-11-07 12:22:00456 // Getters do not return a value but instead modify the argument to protect
457 // from implicit casting.
Alessio Bazzicac054e782018-04-16 10:10:09458 void GetFloat(float* value) const {
459 RTC_DCHECK(value);
Fredrik Hernqvistca362852019-05-10 13:50:02460 *value = value_.float_value;
461 }
462 void GetInt(int* value) const {
463 RTC_DCHECK(value);
464 *value = value_.int_value;
Alessio Bazzicac054e782018-04-16 10:10:09465 }
Per Åhgren552d3e32020-08-12 06:46:47466 void GetBool(bool* value) const {
467 RTC_DCHECK(value);
468 *value = value_.bool_value;
469 }
Alessio Bazzica7c19a702019-11-07 12:22:00470 void GetPlayoutAudioDeviceInfo(PlayoutAudioDeviceInfo* value) const {
471 RTC_DCHECK(value);
472 *value = value_.playout_audio_device_info;
473 }
Alessio Bazzicac054e782018-04-16 10:10:09474
475 private:
476 RuntimeSetting(Type id, float value) : type_(id), value_(value) {}
Fredrik Hernqvistca362852019-05-10 13:50:02477 RuntimeSetting(Type id, int value) : type_(id), value_(value) {}
Alessio Bazzica7c19a702019-11-07 12:22:00478 RuntimeSetting(Type id, PlayoutAudioDeviceInfo value)
479 : type_(id), value_(value) {}
Alessio Bazzicac054e782018-04-16 10:10:09480 Type type_;
Fredrik Hernqvistca362852019-05-10 13:50:02481 union U {
482 U() {}
483 U(int value) : int_value(value) {}
484 U(float value) : float_value(value) {}
Alessio Bazzica7c19a702019-11-07 12:22:00485 U(PlayoutAudioDeviceInfo value) : playout_audio_device_info(value) {}
Fredrik Hernqvistca362852019-05-10 13:50:02486 float float_value;
487 int int_value;
Per Åhgren552d3e32020-08-12 06:46:47488 bool bool_value;
Alessio Bazzica7c19a702019-11-07 12:22:00489 PlayoutAudioDeviceInfo playout_audio_device_info;
Fredrik Hernqvistca362852019-05-10 13:50:02490 } value_;
Alessio Bazzicac054e782018-04-16 10:10:09491 };
492
peaha9cc40b2017-06-29 15:32:09493 ~AudioProcessing() override {}
niklase@google.com470e71d2011-07-07 08:21:25494
niklase@google.com470e71d2011-07-07 08:21:25495 // Initializes internal states, while retaining all user settings. This
496 // should be called before beginning to process a new audio stream. However,
497 // it is not necessary to call before processing the first stream after
andrew@webrtc.orgddbb8a22014-04-22 21:00:04498 // creation.
499 //
500 // It is also not necessary to call if the audio parameters (sample
andrew@webrtc.org60730cf2014-01-07 17:45:09501 // rate and number of channels) have changed. Passing updated parameters
Artem Titov0b489302021-07-28 18:50:03502 // directly to `ProcessStream()` and `ProcessReverseStream()` is permissible.
andrew@webrtc.orgddbb8a22014-04-22 21:00:04503 // If the parameters are known at init-time though, they may be provided.
Per Åhgren0ade9832020-09-01 21:57:20504 // TODO(webrtc:5298): Change to return void.
niklase@google.com470e71d2011-07-07 08:21:25505 virtual int Initialize() = 0;
andrew@webrtc.orgddbb8a22014-04-22 21:00:04506
507 // The int16 interfaces require:
Artem Titov0b489302021-07-28 18:50:03508 // - only `NativeRate`s be used
andrew@webrtc.orgddbb8a22014-04-22 21:00:04509 // - that the input, output and reverse rates must match
Artem Titovcfea2182021-08-09 23:22:31510 // - that `processing_config.output_stream()` matches
511 // `processing_config.input_stream()`.
andrew@webrtc.orgddbb8a22014-04-22 21:00:04512 //
Michael Graczyk86c6d332015-07-23 18:41:39513 // The float interfaces accept arbitrary rates and support differing input and
514 // output layouts, but the output must have either one channel or the same
515 // number of channels as the input.
516 virtual int Initialize(const ProcessingConfig& processing_config) = 0;
517
peah88ac8532016-09-12 23:47:25518 // TODO(peah): This method is a temporary solution used to take control
519 // over the parameters in the audio processing module and is likely to change.
520 virtual void ApplyConfig(const Config& config) = 0;
521
andrew@webrtc.orgddbb8a22014-04-22 21:00:04522 // TODO(ajm): Only intended for internal use. Make private and friend the
523 // necessary classes?
524 virtual int proc_sample_rate_hz() const = 0;
525 virtual int proc_split_sample_rate_hz() const = 0;
Peter Kasting69558702016-01-13 00:26:35526 virtual size_t num_input_channels() const = 0;
527 virtual size_t num_proc_channels() const = 0;
528 virtual size_t num_output_channels() const = 0;
529 virtual size_t num_reverse_channels() const = 0;
niklase@google.com470e71d2011-07-07 08:21:25530
andrew@webrtc.org17342e52014-02-12 22:28:31531 // Set to true when the output of AudioProcessing will be muted or in some
532 // other way not used. Ideally, the captured audio would still be processed,
533 // but some components may change behavior based on this information.
Per Åhgren0a144a72021-02-09 07:47:51534 // Default false. This method takes a lock. To achieve this in a lock-less
535 // manner the PostRuntimeSetting can instead be used.
andrew@webrtc.org17342e52014-02-12 22:28:31536 virtual void set_output_will_be_muted(bool muted) = 0;
andrew@webrtc.org17342e52014-02-12 22:28:31537
Per Åhgren0a144a72021-02-09 07:47:51538 // Enqueues a runtime setting.
Alessio Bazzicac054e782018-04-16 10:10:09539 virtual void SetRuntimeSetting(RuntimeSetting setting) = 0;
540
Per Åhgren0a144a72021-02-09 07:47:51541 // Enqueues a runtime setting. Returns a bool indicating whether the
542 // enqueueing was successfull.
Per Åhgren8eea1172021-02-09 22:15:07543 virtual bool PostRuntimeSetting(RuntimeSetting setting) = 0;
Per Åhgren0a144a72021-02-09 07:47:51544
Sam Zackrisson3bd444f2022-08-03 12:37:00545 // Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio as
Artem Titov0b489302021-07-28 18:50:03546 // specified in `input_config` and `output_config`. `src` and `dest` may use
Per Åhgren645f24c2020-03-16 11:06:02547 // the same memory, if desired.
548 virtual int ProcessStream(const int16_t* const src,
549 const StreamConfig& input_config,
550 const StreamConfig& output_config,
Per Åhgrendc5522b2020-03-19 13:55:58551 int16_t* const dest) = 0;
Per Åhgren645f24c2020-03-16 11:06:02552
Michael Graczyk86c6d332015-07-23 18:41:39553 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
Artem Titov0b489302021-07-28 18:50:03554 // `src` points to a channel buffer, arranged according to `input_stream`. At
555 // output, the channels will be arranged according to `output_stream` in
556 // `dest`.
Michael Graczyk86c6d332015-07-23 18:41:39557 //
Artem Titov0b489302021-07-28 18:50:03558 // The output must have one channel or as many channels as the input. `src`
559 // and `dest` may use the same memory, if desired.
Michael Graczyk86c6d332015-07-23 18:41:39560 virtual int ProcessStream(const float* const* src,
561 const StreamConfig& input_config,
562 const StreamConfig& output_config,
563 float* const* dest) = 0;
564
Sam Zackrisson3bd444f2022-08-03 12:37:00565 // Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio for
Artem Titov0b489302021-07-28 18:50:03566 // the reverse direction audio stream as specified in `input_config` and
567 // `output_config`. `src` and `dest` may use the same memory, if desired.
Per Åhgren645f24c2020-03-16 11:06:02568 virtual int ProcessReverseStream(const int16_t* const src,
569 const StreamConfig& input_config,
570 const StreamConfig& output_config,
571 int16_t* const dest) = 0;
572
Michael Graczyk86c6d332015-07-23 18:41:39573 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
Artem Titov0b489302021-07-28 18:50:03574 // `data` points to a channel buffer, arranged according to `reverse_config`.
ekmeyerson60d9b332015-08-14 17:35:55575 virtual int ProcessReverseStream(const float* const* src,
peahde65ddc2016-09-16 22:02:15576 const StreamConfig& input_config,
577 const StreamConfig& output_config,
ekmeyerson60d9b332015-08-14 17:35:55578 float* const* dest) = 0;
Michael Graczyk86c6d332015-07-23 18:41:39579
Gustaf Ullbergcb307262019-10-29 08:30:44580 // Accepts deinterleaved float audio with the range [-1, 1]. Each element
Artem Titov0b489302021-07-28 18:50:03581 // of `data` points to a channel buffer, arranged according to
582 // `reverse_config`.
Gustaf Ullbergcb307262019-10-29 08:30:44583 virtual int AnalyzeReverseStream(const float* const* data,
584 const StreamConfig& reverse_config) = 0;
585
Sam Zackrisson3bd444f2022-08-03 12:37:00586 // Returns the most recently produced ~10 ms of the linear AEC output at a
587 // rate of 16 kHz. If there is more than one capture channel, a mono
588 // representation of the input is returned. Returns true/false to indicate
589 // whether an output returned.
Per Åhgrenc20a19c2019-11-13 10:12:29590 virtual bool GetLinearAecOutput(
591 rtc::ArrayView<std::array<float, 160>> linear_output) const = 0;
592
Sam Zackrissonf0d1c032019-03-27 12:28:08593 // This must be called prior to ProcessStream() if and only if adaptive analog
594 // gain control is enabled, to pass the current analog level from the audio
Hanna Silencd597042021-11-02 10:02:48595 // HAL. Must be within the range [0, 255].
Sam Zackrissonf0d1c032019-03-27 12:28:08596 virtual void set_stream_analog_level(int level) = 0;
597
Alessio Bazzicafcf1af32022-09-07 15:14:26598 // When an analog mode is set, this should be called after
599 // `set_stream_analog_level()` and `ProcessStream()` to obtain the recommended
600 // new analog level for the audio HAL. It is the user's responsibility to
601 // apply this level.
Sam Zackrissonf0d1c032019-03-27 12:28:08602 virtual int recommended_stream_analog_level() const = 0;
603
niklase@google.com470e71d2011-07-07 08:21:25604 // This must be called if and only if echo processing is enabled.
605 //
Artem Titov0b489302021-07-28 18:50:03606 // Sets the `delay` in ms between ProcessReverseStream() receiving a far-end
niklase@google.com470e71d2011-07-07 08:21:25607 // frame and ProcessStream() receiving a near-end frame containing the
608 // corresponding echo. On the client-side this can be expressed as
609 // delay = (t_render - t_analyze) + (t_process - t_capture)
610 // where,
aluebsb0319552016-03-18 03:39:53611 // - t_analyze is the time a frame is passed to ProcessReverseStream() and
niklase@google.com470e71d2011-07-07 08:21:25612 // t_render is the time the first sample of the same frame is rendered by
613 // the audio hardware.
614 // - t_capture is the time the first sample of a frame is captured by the
alessiob13fc1802017-04-19 12:35:51615 // audio hardware and t_process is the time the same frame is passed to
niklase@google.com470e71d2011-07-07 08:21:25616 // ProcessStream().
617 virtual int set_stream_delay_ms(int delay) = 0;
618 virtual int stream_delay_ms() const = 0;
619
andrew@webrtc.org75dd2882014-02-11 20:52:30620 // Call to signal that a key press occurred (true) or did not occur (false)
621 // with this chunk of audio.
622 virtual void set_stream_key_pressed(bool key_pressed) = 0;
andrew@webrtc.org75dd2882014-02-11 20:52:30623
Per Åhgren09e9a832020-05-11 09:03:47624 // Creates and attaches an webrtc::AecDump for recording debugging
625 // information.
Artem Titov0b489302021-07-28 18:50:03626 // The `worker_queue` may not be null and must outlive the created
Per Åhgren09e9a832020-05-11 09:03:47627 // AecDump instance. |max_log_size_bytes == -1| means the log size
Artem Titov0b489302021-07-28 18:50:03628 // will be unlimited. `handle` may not be null. The AecDump takes
629 // responsibility for `handle` and closes it in the destructor. A
Per Åhgren09e9a832020-05-11 09:03:47630 // return value of true indicates that the file has been
631 // sucessfully opened, while a value of false indicates that
632 // opening the file failed.
Danil Chapovalov1ecf29c2024-01-09 10:52:10633 virtual bool CreateAndAttachAecDump(
634 absl::string_view file_name,
635 int64_t max_log_size_bytes,
Danil Chapovalove052eee2024-01-15 10:42:13636 absl::Nonnull<TaskQueueBase*> worker_queue) = 0;
Danil Chapovalov1ecf29c2024-01-09 10:52:10637 virtual bool CreateAndAttachAecDump(
638 absl::Nonnull<FILE*> handle,
639 int64_t max_log_size_bytes,
Danil Chapovalove052eee2024-01-15 10:42:13640 absl::Nonnull<TaskQueueBase*> worker_queue) = 0;
Per Åhgren09e9a832020-05-11 09:03:47641
642 // TODO(webrtc:5298) Deprecated variant.
aleloi868f32f2017-05-23 14:20:05643 // Attaches provided webrtc::AecDump for recording debugging
644 // information. Log file and maximum file size logic is supposed to
645 // be handled by implementing instance of AecDump. Calling this
646 // method when another AecDump is attached resets the active AecDump
647 // with a new one. This causes the d-tor of the earlier AecDump to
648 // be called. The d-tor call may block until all pending logging
649 // tasks are completed.
Alex Loikobe767e02017-06-08 07:45:03650 virtual void AttachAecDump(std::unique_ptr<AecDump> aec_dump) = 0;
aleloi868f32f2017-05-23 14:20:05651
652 // If no AecDump is attached, this has no effect. If an AecDump is
653 // attached, it's destructor is called. The d-tor may block until
654 // all pending logging tasks are completed.
Alex Loikobe767e02017-06-08 07:45:03655 virtual void DetachAecDump() = 0;
aleloi868f32f2017-05-23 14:20:05656
Per Åhgrencf4c8722019-12-30 13:32:14657 // Get audio processing statistics.
658 virtual AudioProcessingStats GetStatistics() = 0;
Artem Titov0b489302021-07-28 18:50:03659 // TODO(webrtc:5298) Deprecated variant. The `has_remote_tracks` argument
Per Åhgrencf4c8722019-12-30 13:32:14660 // should be set if there are active remote tracks (this would usually be true
661 // during a call). If there are no remote tracks some of the stats will not be
662 // set by AudioProcessing, because they only make sense if there is at least
663 // one remote track.
664 virtual AudioProcessingStats GetStatistics(bool has_remote_tracks) = 0;
Ivo Creusenae0260962017-11-20 12:07:16665
henrik.lundinadf06352017-04-05 12:48:24666 // Returns the last applied configuration.
henrik.lundin77492862017-04-07 06:28:09667 virtual AudioProcessing::Config GetConfig() const = 0;
henrik.lundinadf06352017-04-05 12:48:24668
andrew@webrtc.org648af742012-02-08 01:57:29669 enum Error {
670 // Fatal errors.
niklase@google.com470e71d2011-07-07 08:21:25671 kNoError = 0,
672 kUnspecifiedError = -1,
673 kCreationFailedError = -2,
674 kUnsupportedComponentError = -3,
675 kUnsupportedFunctionError = -4,
676 kNullPointerError = -5,
677 kBadParameterError = -6,
678 kBadSampleRateError = -7,
679 kBadDataLengthError = -8,
680 kBadNumberChannelsError = -9,
681 kFileError = -10,
682 kStreamParameterNotSetError = -11,
andrew@webrtc.org648af742012-02-08 01:57:29683 kNotEnabledError = -12,
niklase@google.com470e71d2011-07-07 08:21:25684
andrew@webrtc.org648af742012-02-08 01:57:29685 // Warnings are non-fatal.
niklase@google.com470e71d2011-07-07 08:21:25686 // This results when a set_stream_ parameter is out of range. Processing
687 // will continue, but the parameter may have been truncated.
andrew@webrtc.org648af742012-02-08 01:57:29688 kBadStreamParameterWarning = -13
niklase@google.com470e71d2011-07-07 08:21:25689 };
andrew@webrtc.org56e4a052014-02-27 22:23:17690
Per Åhgren2507f8c2020-03-19 11:33:29691 // Native rates supported by the integer interfaces.
andrew@webrtc.orgddbb8a22014-04-22 21:00:04692 enum NativeRate {
andrew@webrtc.org56e4a052014-02-27 22:23:17693 kSampleRate8kHz = 8000,
694 kSampleRate16kHz = 16000,
aluebs@webrtc.org087da132014-11-17 23:01:23695 kSampleRate32kHz = 32000,
696 kSampleRate48kHz = 48000
andrew@webrtc.org56e4a052014-02-27 22:23:17697 };
andrew@webrtc.orgddbb8a22014-04-22 21:00:04698
kwibergd59d3bb2016-09-13 14:49:33699 // TODO(kwiberg): We currently need to support a compiler (Visual C++) that
700 // complains if we don't explicitly state the size of the array here. Remove
701 // the size when that's no longer the case.
702 static constexpr int kNativeSampleRatesHz[4] = {
703 kSampleRate8kHz, kSampleRate16kHz, kSampleRate32kHz, kSampleRate48kHz};
704 static constexpr size_t kNumNativeSampleRates =
705 arraysize(kNativeSampleRatesHz);
706 static constexpr int kMaxNativeSampleRateHz =
707 kNativeSampleRatesHz[kNumNativeSampleRates - 1];
Alejandro Luebscdfe20b2015-09-23 19:49:12708
Sam Zackrisson3bd444f2022-08-03 12:37:00709 // APM processes audio in chunks of about 10 ms. See GetFrameSize() for
710 // details.
Per Åhgren12dc2742020-12-08 08:40:35711 static constexpr int kChunkSizeMs = 10;
Sam Zackrisson3bd444f2022-08-03 12:37:00712
713 // Returns floor(sample_rate_hz/100): the number of samples per channel used
714 // as input and output to the audio processing module in calls to
715 // ProcessStream, ProcessReverseStream, AnalyzeReverseStream, and
716 // GetLinearAecOutput.
717 //
718 // This is exactly 10 ms for sample rates divisible by 100. For example:
719 // - 48000 Hz (480 samples per channel),
720 // - 44100 Hz (441 samples per channel),
721 // - 16000 Hz (160 samples per channel).
722 //
723 // Sample rates not divisible by 100 are received/produced in frames of
724 // approximately 10 ms. For example:
725 // - 22050 Hz (220 samples per channel, or ~9.98 ms per frame),
726 // - 11025 Hz (110 samples per channel, or ~9.98 ms per frame).
727 // These nondivisible sample rates yield lower audio quality compared to
728 // multiples of 100. Internal resampling to 10 ms frames causes a simulated
729 // clock drift effect which impacts the performance of (for example) echo
730 // cancellation.
731 static int GetFrameSize(int sample_rate_hz) { return sample_rate_hz / 100; }
niklase@google.com470e71d2011-07-07 08:21:25732};
733
Mirko Bonadei3d255302018-10-11 08:50:45734class RTC_EXPORT AudioProcessingBuilder {
Ivo Creusen5ec7e122017-12-22 10:35:59735 public:
736 AudioProcessingBuilder();
Alessio Bazzica20a9ac62021-10-14 08:55:08737 AudioProcessingBuilder(const AudioProcessingBuilder&) = delete;
738 AudioProcessingBuilder& operator=(const AudioProcessingBuilder&) = delete;
Ivo Creusen5ec7e122017-12-22 10:35:59739 ~AudioProcessingBuilder();
Alessio Bazzica20a9ac62021-10-14 08:55:08740
741 // Sets the APM configuration.
742 AudioProcessingBuilder& SetConfig(const AudioProcessing::Config& config) {
743 config_ = config;
744 return *this;
745 }
746
747 // Sets the echo controller factory to inject when APM is created.
Ivo Creusen5ec7e122017-12-22 10:35:59748 AudioProcessingBuilder& SetEchoControlFactory(
Per Åhgrencc73ed32020-04-26 21:56:17749 std::unique_ptr<EchoControlFactory> echo_control_factory) {
750 echo_control_factory_ = std::move(echo_control_factory);
751 return *this;
752 }
Alessio Bazzica20a9ac62021-10-14 08:55:08753
754 // Sets the capture post-processing sub-module to inject when APM is created.
Ivo Creusen5ec7e122017-12-22 10:35:59755 AudioProcessingBuilder& SetCapturePostProcessing(
Per Åhgrencc73ed32020-04-26 21:56:17756 std::unique_ptr<CustomProcessing> capture_post_processing) {
757 capture_post_processing_ = std::move(capture_post_processing);
758 return *this;
759 }
Alessio Bazzica20a9ac62021-10-14 08:55:08760
761 // Sets the render pre-processing sub-module to inject when APM is created.
Ivo Creusen5ec7e122017-12-22 10:35:59762 AudioProcessingBuilder& SetRenderPreProcessing(
Per Åhgrencc73ed32020-04-26 21:56:17763 std::unique_ptr<CustomProcessing> render_pre_processing) {
764 render_pre_processing_ = std::move(render_pre_processing);
765 return *this;
766 }
Alessio Bazzica20a9ac62021-10-14 08:55:08767
768 // Sets the echo detector to inject when APM is created.
Ivo Creusen09fa4b02018-01-11 15:08:54769 AudioProcessingBuilder& SetEchoDetector(
Per Åhgrencc73ed32020-04-26 21:56:17770 rtc::scoped_refptr<EchoDetector> echo_detector) {
771 echo_detector_ = std::move(echo_detector);
772 return *this;
773 }
Alessio Bazzica20a9ac62021-10-14 08:55:08774
775 // Sets the capture analyzer sub-module to inject when APM is created.
Valeriia Nemychnikovaf06eb572018-08-29 08:37:09776 AudioProcessingBuilder& SetCaptureAnalyzer(
Per Åhgrencc73ed32020-04-26 21:56:17777 std::unique_ptr<CustomAudioAnalyzer> capture_analyzer) {
778 capture_analyzer_ = std::move(capture_analyzer);
779 return *this;
780 }
Alessio Bazzica20a9ac62021-10-14 08:55:08781
782 // Creates an APM instance with the specified config or the default one if
783 // unspecified. Injects the specified components transferring the ownership
784 // to the newly created APM instance - i.e., except for the config, the
785 // builder is reset to its initial state.
Niels Möller4f776ac2021-07-02 09:30:54786 rtc::scoped_refptr<AudioProcessing> Create();
Ivo Creusen5ec7e122017-12-22 10:35:59787
788 private:
Alessio Bazzica20a9ac62021-10-14 08:55:08789 AudioProcessing::Config config_;
Ivo Creusen5ec7e122017-12-22 10:35:59790 std::unique_ptr<EchoControlFactory> echo_control_factory_;
791 std::unique_ptr<CustomProcessing> capture_post_processing_;
792 std::unique_ptr<CustomProcessing> render_pre_processing_;
Ivo Creusend1f970d2018-06-14 09:02:03793 rtc::scoped_refptr<EchoDetector> echo_detector_;
Valeriia Nemychnikovaf06eb572018-08-29 08:37:09794 std::unique_ptr<CustomAudioAnalyzer> capture_analyzer_;
Ivo Creusen5ec7e122017-12-22 10:35:59795};
796
Michael Graczyk86c6d332015-07-23 18:41:39797class StreamConfig {
798 public:
799 // sample_rate_hz: The sampling rate of the stream.
Henrik Lundin64253a92022-02-04 09:02:48800 // num_channels: The number of audio channels in the stream.
Alessio Bazzicac7d0e422022-02-04 16:06:55801 StreamConfig(int sample_rate_hz = 0, size_t num_channels = 0)
Michael Graczyk86c6d332015-07-23 18:41:39802 : sample_rate_hz_(sample_rate_hz),
803 num_channels_(num_channels),
Michael Graczyk86c6d332015-07-23 18:41:39804 num_frames_(calculate_frames(sample_rate_hz)) {}
805
806 void set_sample_rate_hz(int value) {
807 sample_rate_hz_ = value;
808 num_frames_ = calculate_frames(value);
809 }
Peter Kasting69558702016-01-13 00:26:35810 void set_num_channels(size_t value) { num_channels_ = value; }
Michael Graczyk86c6d332015-07-23 18:41:39811
812 int sample_rate_hz() const { return sample_rate_hz_; }
813
Henrik Lundin64253a92022-02-04 09:02:48814 // The number of channels in the stream.
Peter Kasting69558702016-01-13 00:26:35815 size_t num_channels() const { return num_channels_; }
Michael Graczyk86c6d332015-07-23 18:41:39816
Peter Kastingdce40cf2015-08-24 21:52:23817 size_t num_frames() const { return num_frames_; }
818 size_t num_samples() const { return num_channels_ * num_frames_; }
Michael Graczyk86c6d332015-07-23 18:41:39819
820 bool operator==(const StreamConfig& other) const {
821 return sample_rate_hz_ == other.sample_rate_hz_ &&
Henrik Lundin64253a92022-02-04 09:02:48822 num_channels_ == other.num_channels_;
Michael Graczyk86c6d332015-07-23 18:41:39823 }
824
825 bool operator!=(const StreamConfig& other) const { return !(*this == other); }
826
827 private:
Peter Kastingdce40cf2015-08-24 21:52:23828 static size_t calculate_frames(int sample_rate_hz) {
Sam Zackrisson3bd444f2022-08-03 12:37:00829 return static_cast<size_t>(AudioProcessing::GetFrameSize(sample_rate_hz));
Michael Graczyk86c6d332015-07-23 18:41:39830 }
831
832 int sample_rate_hz_;
Peter Kasting69558702016-01-13 00:26:35833 size_t num_channels_;
Peter Kastingdce40cf2015-08-24 21:52:23834 size_t num_frames_;
Michael Graczyk86c6d332015-07-23 18:41:39835};
836
837class ProcessingConfig {
838 public:
839 enum StreamName {
840 kInputStream,
841 kOutputStream,
ekmeyerson60d9b332015-08-14 17:35:55842 kReverseInputStream,
843 kReverseOutputStream,
Michael Graczyk86c6d332015-07-23 18:41:39844 kNumStreamNames,
845 };
846
847 const StreamConfig& input_stream() const {
848 return streams[StreamName::kInputStream];
849 }
850 const StreamConfig& output_stream() const {
851 return streams[StreamName::kOutputStream];
852 }
ekmeyerson60d9b332015-08-14 17:35:55853 const StreamConfig& reverse_input_stream() const {
854 return streams[StreamName::kReverseInputStream];
855 }
856 const StreamConfig& reverse_output_stream() const {
857 return streams[StreamName::kReverseOutputStream];
Michael Graczyk86c6d332015-07-23 18:41:39858 }
859
860 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
861 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
ekmeyerson60d9b332015-08-14 17:35:55862 StreamConfig& reverse_input_stream() {
863 return streams[StreamName::kReverseInputStream];
864 }
865 StreamConfig& reverse_output_stream() {
866 return streams[StreamName::kReverseOutputStream];
867 }
Michael Graczyk86c6d332015-07-23 18:41:39868
869 bool operator==(const ProcessingConfig& other) const {
870 for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
871 if (this->streams[i] != other.streams[i]) {
872 return false;
873 }
874 }
875 return true;
876 }
877
878 bool operator!=(const ProcessingConfig& other) const {
879 return !(*this == other);
880 }
881
882 StreamConfig streams[StreamName::kNumStreamNames];
883};
884
Valeriia Nemychnikovaf06eb572018-08-29 08:37:09885// Experimental interface for a custom analysis submodule.
886class CustomAudioAnalyzer {
887 public:
888 // (Re-) Initializes the submodule.
889 virtual void Initialize(int sample_rate_hz, int num_channels) = 0;
890 // Analyzes the given capture or render signal.
891 virtual void Analyze(const AudioBuffer* audio) = 0;
892 // Returns a string representation of the module state.
893 virtual std::string ToString() const = 0;
894
895 virtual ~CustomAudioAnalyzer() {}
896};
897
Alex Loiko5825aa62017-12-18 15:02:40898// Interface for a custom processing submodule.
899class CustomProcessing {
Sam Zackrisson0beac582017-09-25 10:04:02900 public:
901 // (Re-)Initializes the submodule.
902 virtual void Initialize(int sample_rate_hz, int num_channels) = 0;
903 // Processes the given capture or render signal.
904 virtual void Process(AudioBuffer* audio) = 0;
905 // Returns a string representation of the module state.
906 virtual std::string ToString() const = 0;
Alex Loiko73ec0192018-05-15 08:52:28907 // Handles RuntimeSettings. TODO(webrtc:9262): make pure virtual
908 // after updating dependencies.
909 virtual void SetRuntimeSetting(AudioProcessing::RuntimeSetting setting);
Sam Zackrisson0beac582017-09-25 10:04:02910
Alex Loiko5825aa62017-12-18 15:02:40911 virtual ~CustomProcessing() {}
Sam Zackrisson0beac582017-09-25 10:04:02912};
913
Ivo Creusen09fa4b02018-01-11 15:08:54914// Interface for an echo detector submodule.
Harald Alvestrand78f905e2023-11-02 14:09:26915class EchoDetector : public RefCountInterface {
Ivo Creusen09fa4b02018-01-11 15:08:54916 public:
917 // (Re-)Initializes the submodule.
Ivo Creusen647ef092018-03-14 16:13:48918 virtual void Initialize(int capture_sample_rate_hz,
919 int num_capture_channels,
920 int render_sample_rate_hz,
921 int num_render_channels) = 0;
Ivo Creusen09fa4b02018-01-11 15:08:54922
Sam Zackrisson03cb7e52021-12-06 14:40:04923 // Analysis (not changing) of the first channel of the render signal.
Ivo Creusen09fa4b02018-01-11 15:08:54924 virtual void AnalyzeRenderAudio(rtc::ArrayView<const float> render_audio) = 0;
925
926 // Analysis (not changing) of the capture signal.
927 virtual void AnalyzeCaptureAudio(
928 rtc::ArrayView<const float> capture_audio) = 0;
929
Ivo Creusen09fa4b02018-01-11 15:08:54930 struct Metrics {
Ivo Creusenbb826c92020-04-29 12:34:48931 absl::optional<double> echo_likelihood;
932 absl::optional<double> echo_likelihood_recent_max;
Ivo Creusen09fa4b02018-01-11 15:08:54933 };
934
935 // Collect current metrics from the echo detector.
936 virtual Metrics GetMetrics() const = 0;
Ivo Creusen09fa4b02018-01-11 15:08:54937};
938
niklase@google.com470e71d2011-07-07 08:21:25939} // namespace webrtc
940
Mirko Bonadei92ea95e2017-09-15 04:47:31941#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_