blob: 8710ced9b73d92290c0609e210703d048772a5a3 [file] [log] [blame]
Fredrik Solenberg2a877972017-12-15 15:42:151/*
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "audio/audio_transport_impl.h"
12
13#include <algorithm>
14#include <memory>
15#include <utility>
16
Fredrik Solenberga8b7c7f2018-01-17 10:18:3117#include "audio/remix_resample.h"
Fredrik Solenberg2a877972017-12-15 15:42:1518#include "audio/utility/audio_frame_operations.h"
Tim Nab8c775a2020-01-10 18:33:0519#include "call/audio_sender.h"
Olga Sharonova09ceed22020-09-30 16:27:3920#include "modules/async_audio_processing/async_audio_processing.h"
Per Åhgren71652f42020-03-17 12:23:5821#include "modules/audio_processing/include/audio_frame_proxies.h"
Yves Gerey988cc082018-10-23 10:03:0122#include "rtc_base/checks.h"
Fredrik Solenberg2a877972017-12-15 15:42:1523
24namespace webrtc {
25
26namespace {
27
28// We want to process at the lowest sample rate and channel count possible
29// without losing information. Choose the lowest native rate at least equal to
30// the minimum of input and codec rates, choose lowest channel count, and
31// configure the audio frame.
32void InitializeCaptureFrame(int input_sample_rate,
33 int send_sample_rate_hz,
34 size_t input_num_channels,
35 size_t send_num_channels,
36 AudioFrame* audio_frame) {
37 RTC_DCHECK(audio_frame);
38 int min_processing_rate_hz = std::min(input_sample_rate, send_sample_rate_hz);
39 for (int native_rate_hz : AudioProcessing::kNativeSampleRatesHz) {
40 audio_frame->sample_rate_hz_ = native_rate_hz;
41 if (audio_frame->sample_rate_hz_ >= min_processing_rate_hz) {
42 break;
43 }
44 }
45 audio_frame->num_channels_ = std::min(input_num_channels, send_num_channels);
46}
47
henrika649a3852017-12-22 12:58:2948void ProcessCaptureFrame(uint32_t delay_ms,
Fredrik Solenberg2a877972017-12-15 15:42:1549 bool key_pressed,
50 bool swap_stereo_channels,
51 AudioProcessing* audio_processing,
52 AudioFrame* audio_frame) {
Fredrik Solenberg2a877972017-12-15 15:42:1553 RTC_DCHECK(audio_frame);
Per Åhgrencc73ed32020-04-26 21:56:1754 if (audio_processing) {
55 audio_processing->set_stream_delay_ms(delay_ms);
56 audio_processing->set_stream_key_pressed(key_pressed);
57 int error = ProcessAudioFrame(audio_processing, audio_frame);
Per Åhgren71652f42020-03-17 12:23:5858
Per Åhgrencc73ed32020-04-26 21:56:1759 RTC_DCHECK_EQ(0, error) << "ProcessStream() error: " << error;
60 }
61
Fredrik Solenberg2a877972017-12-15 15:42:1562 if (swap_stereo_channels) {
63 AudioFrameOperations::SwapStereoChannels(audio_frame);
64 }
65}
66
67// Resample audio in |frame| to given sample rate preserving the
68// channel count and place the result in |destination|.
69int Resample(const AudioFrame& frame,
70 const int destination_sample_rate,
71 PushResampler<int16_t>* resampler,
72 int16_t* destination) {
73 const int number_of_channels = static_cast<int>(frame.num_channels_);
74 const int target_number_of_samples_per_channel =
75 destination_sample_rate / 100;
76 resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate,
77 number_of_channels);
78
79 // TODO(yujo): make resampler take an AudioFrame, and add special case
80 // handling of muted frames.
81 return resampler->Resample(
82 frame.data(), frame.samples_per_channel_ * number_of_channels,
83 destination, number_of_channels * target_number_of_samples_per_channel);
84}
85} // namespace
86
Olga Sharonova09ceed22020-09-30 16:27:3987AudioTransportImpl::AudioTransportImpl(
88 AudioMixer* mixer,
89 AudioProcessing* audio_processing,
90 AsyncAudioProcessing::Factory* async_audio_processing_factory)
91 : audio_processing_(audio_processing),
92 async_audio_processing_(
93 async_audio_processing_factory
94 ? async_audio_processing_factory->CreateAsyncAudioProcessing(
95 [this](std::unique_ptr<AudioFrame> frame) {
96 this->SendProcessedData(std::move(frame));
97 })
98 : nullptr),
99 mixer_(mixer) {
Fredrik Solenberg2a877972017-12-15 15:42:15100 RTC_DCHECK(mixer);
Fredrik Solenberg2a877972017-12-15 15:42:15101}
102
103AudioTransportImpl::~AudioTransportImpl() {}
104
105// Not used in Chromium. Process captured audio and distribute to all sending
106// streams, and try to do this at the lowest possible sample rate.
107int32_t AudioTransportImpl::RecordedDataIsAvailable(
108 const void* audio_data,
109 const size_t number_of_frames,
110 const size_t bytes_per_sample,
111 const size_t number_of_channels,
112 const uint32_t sample_rate,
113 const uint32_t audio_delay_milliseconds,
114 const int32_t /*clock_drift*/,
henrika649a3852017-12-22 12:58:29115 const uint32_t /*volume*/,
Fredrik Solenberg2a877972017-12-15 15:42:15116 const bool key_pressed,
117 uint32_t& /*new_mic_volume*/) { // NOLINT: to avoid changing APIs
118 RTC_DCHECK(audio_data);
119 RTC_DCHECK_GE(number_of_channels, 1);
120 RTC_DCHECK_LE(number_of_channels, 2);
121 RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample);
122 RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
123 // 100 = 1 second / data duration (10 ms).
124 RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
125 RTC_DCHECK_LE(bytes_per_sample * number_of_frames * number_of_channels,
126 AudioFrame::kMaxDataSizeBytes);
127
Fredrik Solenberg2a877972017-12-15 15:42:15128 int send_sample_rate_hz = 0;
129 size_t send_num_channels = 0;
130 bool swap_stereo_channels = false;
131 {
Markus Handell62872802020-07-06 13:15:07132 MutexLock lock(&capture_lock_);
Fredrik Solenberg2a877972017-12-15 15:42:15133 send_sample_rate_hz = send_sample_rate_hz_;
134 send_num_channels = send_num_channels_;
135 swap_stereo_channels = swap_stereo_channels_;
136 }
137
138 std::unique_ptr<AudioFrame> audio_frame(new AudioFrame());
Yves Gerey665174f2018-06-19 13:03:05139 InitializeCaptureFrame(sample_rate, send_sample_rate_hz, number_of_channels,
140 send_num_channels, audio_frame.get());
Fredrik Solenberg2a877972017-12-15 15:42:15141 voe::RemixAndResample(static_cast<const int16_t*>(audio_data),
142 number_of_frames, number_of_channels, sample_rate,
143 &capture_resampler_, audio_frame.get());
henrika649a3852017-12-22 12:58:29144 ProcessCaptureFrame(audio_delay_milliseconds, key_pressed,
Fredrik Solenberg2a877972017-12-15 15:42:15145 swap_stereo_channels, audio_processing_,
146 audio_frame.get());
147
148 // Typing detection (utilizes the APM/VAD decision). We let the VAD determine
149 // if we're using this feature or not.
Sam Zackrissonba502232019-01-04 09:36:48150 // TODO(solenberg): GetConfig() takes a lock. Work around that.
Fredrik Solenberg2a877972017-12-15 15:42:15151 bool typing_detected = false;
Per Åhgrencc73ed32020-04-26 21:56:17152 if (audio_processing_ &&
153 audio_processing_->GetConfig().voice_detection.enabled) {
Fredrik Solenberg2a877972017-12-15 15:42:15154 if (audio_frame->vad_activity_ != AudioFrame::kVadUnknown) {
155 bool vad_active = audio_frame->vad_activity_ == AudioFrame::kVadActive;
156 typing_detected = typing_detection_.Process(key_pressed, vad_active);
157 }
158 }
159
Fredrik Solenberg2a877972017-12-15 15:42:15160 // Copy frame and push to each sending stream. The copy is required since an
161 // encoding task will be posted internally to each stream.
162 {
Markus Handell62872802020-07-06 13:15:07163 MutexLock lock(&capture_lock_);
Fredrik Solenberg2a877972017-12-15 15:42:15164 typing_noise_detected_ = typing_detected;
Fredrik Solenberg2a877972017-12-15 15:42:15165 }
166
Olga Sharonova09ceed22020-09-30 16:27:39167 RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
168 if (async_audio_processing_)
169 async_audio_processing_->Process(std::move(audio_frame));
170 else
171 SendProcessedData(std::move(audio_frame));
172
Fredrik Solenberg2a877972017-12-15 15:42:15173 return 0;
174}
175
Olga Sharonova09ceed22020-09-30 16:27:39176void AudioTransportImpl::SendProcessedData(
177 std::unique_ptr<AudioFrame> audio_frame) {
178 RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
179 MutexLock lock(&capture_lock_);
180 if (audio_senders_.empty())
181 return;
182
183 auto it = audio_senders_.begin();
184 while (++it != audio_senders_.end()) {
185 auto audio_frame_copy = std::make_unique<AudioFrame>();
186 audio_frame_copy->CopyFrom(*audio_frame);
187 (*it)->SendAudioData(std::move(audio_frame_copy));
188 }
189 // Send the original frame to the first stream w/o copying.
190 (*audio_senders_.begin())->SendAudioData(std::move(audio_frame));
191}
192
Fredrik Solenberg2a877972017-12-15 15:42:15193// Mix all received streams, feed the result to the AudioProcessing module, then
194// resample the result to the requested output rate.
195int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples,
Yves Gerey665174f2018-06-19 13:03:05196 const size_t nBytesPerSample,
197 const size_t nChannels,
198 const uint32_t samplesPerSec,
199 void* audioSamples,
200 size_t& nSamplesOut,
201 int64_t* elapsed_time_ms,
202 int64_t* ntp_time_ms) {
Fredrik Solenberg2a877972017-12-15 15:42:15203 RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample);
204 RTC_DCHECK_GE(nChannels, 1);
205 RTC_DCHECK_LE(nChannels, 2);
206 RTC_DCHECK_GE(
207 samplesPerSec,
208 static_cast<uint32_t>(AudioProcessing::NativeRate::kSampleRate8kHz));
209
210 // 100 = 1 second / data duration (10 ms).
211 RTC_DCHECK_EQ(nSamples * 100, samplesPerSec);
212 RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels,
213 AudioFrame::kMaxDataSizeBytes);
214
215 mixer_->Mix(nChannels, &mixed_frame_);
216 *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
217 *ntp_time_ms = mixed_frame_.ntp_time_ms_;
218
Per Åhgrencc73ed32020-04-26 21:56:17219 if (audio_processing_) {
220 const auto error =
221 ProcessReverseAudioFrame(audio_processing_, &mixed_frame_);
222 RTC_DCHECK_EQ(error, AudioProcessing::kNoError);
223 }
Fredrik Solenberg2a877972017-12-15 15:42:15224
225 nSamplesOut = Resample(mixed_frame_, samplesPerSec, &render_resampler_,
226 static_cast<int16_t*>(audioSamples));
227 RTC_DCHECK_EQ(nSamplesOut, nChannels * nSamples);
228 return 0;
229}
230
231// Used by Chromium - same as NeedMorePlayData() but because Chrome has its
232// own APM instance, does not call audio_processing_->ProcessReverseStream().
233void AudioTransportImpl::PullRenderData(int bits_per_sample,
Yves Gerey665174f2018-06-19 13:03:05234 int sample_rate,
235 size_t number_of_channels,
236 size_t number_of_frames,
237 void* audio_data,
238 int64_t* elapsed_time_ms,
239 int64_t* ntp_time_ms) {
Fredrik Solenberg2a877972017-12-15 15:42:15240 RTC_DCHECK_EQ(bits_per_sample, 16);
241 RTC_DCHECK_GE(number_of_channels, 1);
Fredrik Solenberg2a877972017-12-15 15:42:15242 RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
243
244 // 100 = 1 second / data duration (10 ms).
245 RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
246
247 // 8 = bits per byte.
248 RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels,
249 AudioFrame::kMaxDataSizeBytes);
250 mixer_->Mix(number_of_channels, &mixed_frame_);
251 *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
252 *ntp_time_ms = mixed_frame_.ntp_time_ms_;
253
254 auto output_samples = Resample(mixed_frame_, sample_rate, &render_resampler_,
255 static_cast<int16_t*>(audio_data));
256 RTC_DCHECK_EQ(output_samples, number_of_channels * number_of_frames);
257}
258
Tim Nab8c775a2020-01-10 18:33:05259void AudioTransportImpl::UpdateAudioSenders(std::vector<AudioSender*> senders,
260 int send_sample_rate_hz,
261 size_t send_num_channels) {
Markus Handell62872802020-07-06 13:15:07262 MutexLock lock(&capture_lock_);
Tim Nab8c775a2020-01-10 18:33:05263 audio_senders_ = std::move(senders);
Fredrik Solenberg2a877972017-12-15 15:42:15264 send_sample_rate_hz_ = send_sample_rate_hz;
265 send_num_channels_ = send_num_channels;
266}
267
268void AudioTransportImpl::SetStereoChannelSwapping(bool enable) {
Markus Handell62872802020-07-06 13:15:07269 MutexLock lock(&capture_lock_);
Fredrik Solenberg2a877972017-12-15 15:42:15270 swap_stereo_channels_ = enable;
271}
272
273bool AudioTransportImpl::typing_noise_detected() const {
Markus Handell62872802020-07-06 13:15:07274 MutexLock lock(&capture_lock_);
Fredrik Solenberg2a877972017-12-15 15:42:15275 return typing_noise_detected_;
276}
277} // namespace webrtc