aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 04:47:31 | [diff] [blame] | 11 | #include "modules/audio_mixer/frame_combiner.h" |
aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 12 | |
| 13 | #include <algorithm> |
| 14 | #include <array> |
Yves Gerey | 3e70781 | 2018-11-28 15:47:49 | [diff] [blame] | 15 | #include <cstdint> |
| 16 | #include <iterator> |
| 17 | #include <string> |
aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 18 | |
Mirko Bonadei | 92ea95e | 2017-09-15 04:47:31 | [diff] [blame] | 19 | #include "api/array_view.h" |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 20 | #include "common_audio/include/audio_util.h" |
Mirko Bonadei | 92ea95e | 2017-09-15 04:47:31 | [diff] [blame] | 21 | #include "modules/audio_mixer/audio_frame_manipulator.h" |
| 22 | #include "modules/audio_mixer/audio_mixer_impl.h" |
Yves Gerey | 3e70781 | 2018-11-28 15:47:49 | [diff] [blame] | 23 | #include "modules/audio_processing/include/audio_frame_view.h" |
Alex Loiko | 8396e34 | 2018-06-21 10:04:05 | [diff] [blame] | 24 | #include "modules/audio_processing/include/audio_processing.h" |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 25 | #include "modules/audio_processing/logging/apm_data_dumper.h" |
Alex Loiko | 6f2fcb4 | 2018-03-14 11:27:05 | [diff] [blame] | 26 | #include "rtc_base/arraysize.h" |
Mirko Bonadei | 92ea95e | 2017-09-15 04:47:31 | [diff] [blame] | 27 | #include "rtc_base/checks.h" |
Alex Loiko | 6f2fcb4 | 2018-03-14 11:27:05 | [diff] [blame] | 28 | #include "system_wrappers/include/metrics.h" |
aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 29 | |
| 30 | namespace webrtc { |
| 31 | namespace { |
| 32 | |
| 33 | // Stereo, 48 kHz, 10 ms. |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 34 | constexpr int kMaximumAmountOfChannels = 2; |
| 35 | constexpr int kMaximumChannelSize = 48 * AudioMixerImpl::kFrameDurationInMs; |
aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 36 | |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 37 | using OneChannelBuffer = std::array<float, kMaximumChannelSize>; |
aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 38 | |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 39 | void SetAudioFrameFields(const std::vector<AudioFrame*>& mix_list, |
| 40 | size_t number_of_channels, |
| 41 | int sample_rate, |
| 42 | size_t number_of_streams, |
| 43 | AudioFrame* audio_frame_for_mixing) { |
| 44 | const size_t samples_per_channel = static_cast<size_t>( |
| 45 | (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); |
| 46 | |
| 47 | // TODO(minyue): Issue bugs.webrtc.org/3390. |
| 48 | // Audio frame timestamp. The 'timestamp_' field is set to dummy |
| 49 | // value '0', because it is only supported in the one channel case and |
| 50 | // is then updated in the helper functions. |
| 51 | audio_frame_for_mixing->UpdateFrame( |
| 52 | 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined, |
| 53 | AudioFrame::kVadUnknown, number_of_channels); |
| 54 | |
| 55 | if (mix_list.empty()) { |
| 56 | audio_frame_for_mixing->elapsed_time_ms_ = -1; |
| 57 | } else if (mix_list.size() == 1) { |
| 58 | audio_frame_for_mixing->timestamp_ = mix_list[0]->timestamp_; |
| 59 | audio_frame_for_mixing->elapsed_time_ms_ = mix_list[0]->elapsed_time_ms_; |
Niklas Enbom | ef8a3eb | 2018-10-04 23:21:47 | [diff] [blame] | 60 | audio_frame_for_mixing->ntp_time_ms_ = mix_list[0]->ntp_time_ms_; |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 61 | } |
| 62 | } |
| 63 | |
| 64 | void MixFewFramesWithNoLimiter(const std::vector<AudioFrame*>& mix_list, |
| 65 | AudioFrame* audio_frame_for_mixing) { |
| 66 | if (mix_list.empty()) { |
| 67 | audio_frame_for_mixing->Mute(); |
| 68 | return; |
| 69 | } |
| 70 | RTC_DCHECK_LE(mix_list.size(), 1); |
| 71 | std::copy(mix_list[0]->data(), |
| 72 | mix_list[0]->data() + |
| 73 | mix_list[0]->num_channels_ * mix_list[0]->samples_per_channel_, |
| 74 | audio_frame_for_mixing->mutable_data()); |
| 75 | } |
| 76 | |
| 77 | std::array<OneChannelBuffer, kMaximumAmountOfChannels> MixToFloatFrame( |
| 78 | const std::vector<AudioFrame*>& mix_list, |
| 79 | size_t samples_per_channel, |
| 80 | size_t number_of_channels) { |
| 81 | // Convert to FloatS16 and mix. |
| 82 | using OneChannelBuffer = std::array<float, kMaximumChannelSize>; |
| 83 | std::array<OneChannelBuffer, kMaximumAmountOfChannels> mixing_buffer{}; |
| 84 | |
| 85 | for (size_t i = 0; i < mix_list.size(); ++i) { |
| 86 | const AudioFrame* const frame = mix_list[i]; |
| 87 | for (size_t j = 0; j < number_of_channels; ++j) { |
| 88 | for (size_t k = 0; k < samples_per_channel; ++k) { |
| 89 | mixing_buffer[j][k] += frame->data()[number_of_channels * k + j]; |
| 90 | } |
| 91 | } |
| 92 | } |
| 93 | return mixing_buffer; |
| 94 | } |
| 95 | |
Alessio Bazzica | 3e4c77f | 2018-11-01 20:31:38 | [diff] [blame] | 96 | void RunLimiter(AudioFrameView<float> mixing_buffer_view, Limiter* limiter) { |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 97 | const size_t sample_rate = mixing_buffer_view.samples_per_channel() * 1000 / |
| 98 | AudioMixerImpl::kFrameDurationInMs; |
Alessio Bazzica | 3e4c77f | 2018-11-01 20:31:38 | [diff] [blame] | 99 | // TODO(alessiob): Avoid calling SetSampleRate every time. |
Alex Loiko | 8396e34 | 2018-06-21 10:04:05 | [diff] [blame] | 100 | limiter->SetSampleRate(sample_rate); |
| 101 | limiter->Process(mixing_buffer_view); |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 102 | } |
| 103 | |
| 104 | // Both interleaves and rounds. |
| 105 | void InterleaveToAudioFrame(AudioFrameView<const float> mixing_buffer_view, |
| 106 | AudioFrame* audio_frame_for_mixing) { |
| 107 | const size_t number_of_channels = mixing_buffer_view.num_channels(); |
| 108 | const size_t samples_per_channel = mixing_buffer_view.samples_per_channel(); |
| 109 | // Put data in the result frame. |
| 110 | for (size_t i = 0; i < number_of_channels; ++i) { |
| 111 | for (size_t j = 0; j < samples_per_channel; ++j) { |
| 112 | audio_frame_for_mixing->mutable_data()[number_of_channels * j + i] = |
| 113 | FloatS16ToS16(mixing_buffer_view.channel(i)[j]); |
| 114 | } |
| 115 | } |
| 116 | } |
aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 117 | } // namespace |
| 118 | |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 119 | FrameCombiner::FrameCombiner(bool use_limiter) |
Alex Loiko | 8396e34 | 2018-06-21 10:04:05 | [diff] [blame] | 120 | : data_dumper_(new ApmDataDumper(0)), |
Alessio Bazzica | 3e4c77f | 2018-11-01 20:31:38 | [diff] [blame] | 121 | limiter_(static_cast<size_t>(48000), data_dumper_.get(), "AudioMixer"), |
| 122 | use_limiter_(use_limiter) {} |
aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 123 | |
| 124 | FrameCombiner::~FrameCombiner() = default; |
| 125 | |
| 126 | void FrameCombiner::Combine(const std::vector<AudioFrame*>& mix_list, |
| 127 | size_t number_of_channels, |
| 128 | int sample_rate, |
aleloi | 2c9306e | 2017-03-29 11:25:16 | [diff] [blame] | 129 | size_t number_of_streams, |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 130 | AudioFrame* audio_frame_for_mixing) { |
aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 131 | RTC_DCHECK(audio_frame_for_mixing); |
Alex Loiko | b9a02e5 | 2018-03-19 13:32:05 | [diff] [blame] | 132 | |
| 133 | LogMixingStats(mix_list, sample_rate, number_of_streams); |
| 134 | |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 135 | SetAudioFrameFields(mix_list, number_of_channels, sample_rate, |
| 136 | number_of_streams, audio_frame_for_mixing); |
| 137 | |
aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 138 | const size_t samples_per_channel = static_cast<size_t>( |
| 139 | (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); |
| 140 | |
| 141 | for (const auto* frame : mix_list) { |
| 142 | RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_); |
| 143 | RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_); |
| 144 | } |
| 145 | |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 146 | // The 'num_channels_' field of frames in 'mix_list' could be |
| 147 | // different from 'number_of_channels'. |
aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 148 | for (auto* frame : mix_list) { |
| 149 | RemixFrame(number_of_channels, frame); |
| 150 | } |
| 151 | |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 152 | if (number_of_streams <= 1) { |
| 153 | MixFewFramesWithNoLimiter(mix_list, audio_frame_for_mixing); |
| 154 | return; |
aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 155 | } |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 156 | |
| 157 | std::array<OneChannelBuffer, kMaximumAmountOfChannels> mixing_buffer = |
| 158 | MixToFloatFrame(mix_list, samples_per_channel, number_of_channels); |
| 159 | |
| 160 | // Put float data in an AudioFrameView. |
| 161 | std::array<float*, kMaximumAmountOfChannels> channel_pointers{}; |
| 162 | for (size_t i = 0; i < number_of_channels; ++i) { |
| 163 | channel_pointers[i] = &mixing_buffer[i][0]; |
| 164 | } |
| 165 | AudioFrameView<float> mixing_buffer_view( |
| 166 | &channel_pointers[0], number_of_channels, samples_per_channel); |
| 167 | |
Alex Loiko | 8396e34 | 2018-06-21 10:04:05 | [diff] [blame] | 168 | if (use_limiter_) { |
| 169 | RunLimiter(mixing_buffer_view, &limiter_); |
Alex Loiko | 507e8d1 | 2018-02-27 12:51:47 | [diff] [blame] | 170 | } |
| 171 | |
| 172 | InterleaveToAudioFrame(mixing_buffer_view, audio_frame_for_mixing); |
Alex Loiko | 6f2fcb4 | 2018-03-14 11:27:05 | [diff] [blame] | 173 | } |
| 174 | |
| 175 | void FrameCombiner::LogMixingStats(const std::vector<AudioFrame*>& mix_list, |
| 176 | int sample_rate, |
| 177 | size_t number_of_streams) const { |
| 178 | // Log every second. |
| 179 | uma_logging_counter_++; |
| 180 | if (uma_logging_counter_ > 1000 / AudioMixerImpl::kFrameDurationInMs) { |
| 181 | uma_logging_counter_ = 0; |
| 182 | RTC_HISTOGRAM_COUNTS_100("WebRTC.Audio.AudioMixer.NumIncomingStreams", |
| 183 | static_cast<int>(number_of_streams)); |
| 184 | RTC_HISTOGRAM_ENUMERATION( |
| 185 | "WebRTC.Audio.AudioMixer.NumIncomingActiveStreams", |
| 186 | static_cast<int>(mix_list.size()), |
| 187 | AudioMixerImpl::kMaximumAmountOfMixedAudioSources); |
| 188 | |
| 189 | using NativeRate = AudioProcessing::NativeRate; |
| 190 | static constexpr NativeRate native_rates[] = { |
| 191 | NativeRate::kSampleRate8kHz, NativeRate::kSampleRate16kHz, |
| 192 | NativeRate::kSampleRate32kHz, NativeRate::kSampleRate48kHz}; |
| 193 | const auto* rate_position = std::lower_bound( |
| 194 | std::begin(native_rates), std::end(native_rates), sample_rate); |
| 195 | |
| 196 | RTC_HISTOGRAM_ENUMERATION( |
| 197 | "WebRTC.Audio.AudioMixer.MixingRate", |
| 198 | std::distance(std::begin(native_rates), rate_position), |
| 199 | arraysize(native_rates)); |
| 200 | } |
aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 201 | } |
yujo | 36b1a5f | 2017-06-12 19:45:32 | [diff] [blame] | 202 | |
aleloi | 24899e5 | 2017-02-21 13:06:29 | [diff] [blame] | 203 | } // namespace webrtc |