modules/audio_mixer/frame_combiner.cc - src/webrtc - Git at Google

 /*
  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include "webrtc/modules/audio_mixer/frame_combiner.h"

 #include <algorithm>
 #include <array>
 #include <functional>
 #include <memory>

 #include "webrtc/audio/utility/audio_frame_operations.h"
 #include "webrtc/modules/audio_mixer/audio_frame_manipulator.h"
 #include "webrtc/modules/audio_mixer/audio_mixer_impl.h"
 #include "webrtc/rtc_base/array_view.h"
 #include "webrtc/rtc_base/checks.h"
 #include "webrtc/rtc_base/logging.h"

 namespace webrtc {
 namespace {

 // Stereo, 48 kHz, 10 ms.
 constexpr int kMaximalFrameSize = 2 * 48 * 10;

 void CombineZeroFrames(bool use_limiter,
                        AudioProcessing* limiter,
                        AudioFrame* audio_frame_for_mixing) {
   audio_frame_for_mixing->elapsed_time_ms_ = -1;
   AudioFrameOperations::Mute(audio_frame_for_mixing);
   // The limiter should still process a zero frame to avoid jumps in
   // its gain curve.
   if (use_limiter) {
     RTC_DCHECK(limiter);
     // The limiter smoothly increases frames with half gain to full
     // volume.  Here there's no need to apply half gain, since the frame
     // is zero anyway.
     limiter->ProcessStream(audio_frame_for_mixing);
   }
 }

 void CombineOneFrame(const AudioFrame* input_frame,
                      bool use_limiter,
                      AudioProcessing* limiter,
                      AudioFrame* audio_frame_for_mixing) {
   audio_frame_for_mixing->timestamp_ = input_frame->timestamp_;
   audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_;
   // TODO(yujo): can we optimize muted frames?
   std::copy(input_frame->data(),
             input_frame->data() +
                 input_frame->num_channels_ * input_frame->samples_per_channel_,
             audio_frame_for_mixing->mutable_data());
   if (use_limiter) {
     AudioFrameOperations::ApplyHalfGain(audio_frame_for_mixing);
     RTC_DCHECK(limiter);
     limiter->ProcessStream(audio_frame_for_mixing);
     AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing);
   }
 }

 // Lower-level helper function called from Combine(...) when there
 // are several input frames.
 //
 // TODO(aleloi): change interface to ArrayView<int16_t> output_frame
 // once we have gotten rid of the APM limiter.
 //
 // Only the 'data' field of output_frame should be modified. The
 // rest are used for potentially sending the output to the APM
 // limiter.
 void CombineMultipleFrames(
     const std::vector<rtc::ArrayView<const int16_t>>& input_frames,
     bool use_limiter,
     AudioProcessing* limiter,
     AudioFrame* audio_frame_for_mixing) {
   RTC_DCHECK(!input_frames.empty());
   RTC_DCHECK(audio_frame_for_mixing);

   const size_t frame_length = input_frames.front().size();
   for (const auto& frame : input_frames) {
     RTC_DCHECK_EQ(frame_length, frame.size());
   }

   // Algorithm: int16 frames are added to a sufficiently large
   // statically allocated int32 buffer. For > 2 participants this is
   // more efficient than addition in place in the int16 audio
   // frame. The audio quality loss due to halving the samples is
   // smaller than 16-bit addition in place.
   RTC_DCHECK_GE(kMaximalFrameSize, frame_length);
   std::array<int32_t, kMaximalFrameSize> add_buffer;

   add_buffer.fill(0);

   for (const auto& frame : input_frames) {
     // TODO(yujo): skip this for muted frames.
     std::transform(frame.begin(), frame.end(), add_buffer.begin(),
                    add_buffer.begin(), std::plus<int32_t>());
   }

   if (use_limiter) {
     // Halve all samples to avoid saturation before limiting.
     std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
                    audio_frame_for_mixing->mutable_data(), [](int32_t a) {
                      return rtc::saturated_cast<int16_t>(a / 2);
                    });

     // Smoothly limit the audio.
     RTC_DCHECK(limiter);
     const int error = limiter->ProcessStream(audio_frame_for_mixing);
     if (error != limiter->kNoError) {
       LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error;
       RTC_NOTREACHED();
     }

     // And now we can safely restore the level. This procedure results in
     // some loss of resolution, deemed acceptable.
     //
     // It's possible to apply the gain in the AGC (with a target level of 0 dbFS
     // and compression gain of 6 dB). However, in the transition frame when this
     // is enabled (moving from one to two audio sources) it has the potential to
     // create discontinuities in the mixed frame.
     //
     // Instead we double the frame (with addition since left-shifting a
     // negative value is undefined).
     AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing);
   } else {
     std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
                    audio_frame_for_mixing->mutable_data(),
                    [](int32_t a) { return rtc::saturated_cast<int16_t>(a); });
   }
 }

 std::unique_ptr<AudioProcessing> CreateLimiter() {
   Config config;
   config.Set<ExperimentalAgc>(new ExperimentalAgc(false));

   std::unique_ptr<AudioProcessing> limiter(AudioProcessing::Create(config));
   RTC_DCHECK(limiter);

   webrtc::AudioProcessing::Config apm_config;
   apm_config.residual_echo_detector.enabled = false;
   limiter->ApplyConfig(apm_config);

   const auto check_no_error = [](int x) {
     RTC_DCHECK_EQ(x, AudioProcessing::kNoError);
   };
   auto* const gain_control = limiter->gain_control();
   check_no_error(gain_control->set_mode(GainControl::kFixedDigital));

   // We smoothly limit the mixed frame to -7 dbFS. -6 would correspond to the
   // divide-by-2 but -7 is used instead to give a bit of headroom since the
   // AGC is not a hard limiter.
   check_no_error(gain_control->set_target_level_dbfs(7));

   check_no_error(gain_control->set_compression_gain_db(0));
   check_no_error(gain_control->enable_limiter(true));
   check_no_error(gain_control->Enable(true));
   return limiter;
 }
 }  // namespace

 FrameCombiner::FrameCombiner(bool use_apm_limiter)
     : use_apm_limiter_(use_apm_limiter),
       limiter_(use_apm_limiter ? CreateLimiter() : nullptr) {}

 FrameCombiner::~FrameCombiner() = default;

 void FrameCombiner::Combine(const std::vector<AudioFrame*>& mix_list,
                             size_t number_of_channels,
                             int sample_rate,
                             size_t number_of_streams,
                             AudioFrame* audio_frame_for_mixing) const {
   RTC_DCHECK(audio_frame_for_mixing);
   const size_t samples_per_channel = static_cast<size_t>(
       (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000);

   for (const auto* frame : mix_list) {
     RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_);
     RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_);
   }

   // Frames could be both stereo and mono.
   for (auto* frame : mix_list) {
     RemixFrame(number_of_channels, frame);
   }

   // TODO(aleloi): Issue bugs.webrtc.org/3390.
   // Audio frame timestamp. The 'timestamp_' field is set to dummy
   // value '0', because it is only supported in the one channel case and
   // is then updated in the helper functions.
   audio_frame_for_mixing->UpdateFrame(
       -1, 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined,
       AudioFrame::kVadUnknown, number_of_channels);

   const bool use_limiter_this_round = use_apm_limiter_ && number_of_streams > 1;

   if (mix_list.empty()) {
     CombineZeroFrames(use_limiter_this_round, limiter_.get(),
                       audio_frame_for_mixing);
   } else if (mix_list.size() == 1) {
     CombineOneFrame(mix_list.front(), use_limiter_this_round, limiter_.get(),
                     audio_frame_for_mixing);
   } else {
     std::vector<rtc::ArrayView<const int16_t>> input_frames;
     for (size_t i = 0; i < mix_list.size(); ++i) {
       input_frames.push_back(rtc::ArrayView<const int16_t>(
           mix_list[i]->data(), samples_per_channel * number_of_channels));
     }
     CombineMultipleFrames(input_frames, use_limiter_this_round, limiter_.get(),
                           audio_frame_for_mixing);
   }
 }

 }  // namespace webrtc
	/*
	* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include "webrtc/modules/audio_mixer/frame_combiner.h"

	#include <algorithm>
	#include <array>
	#include <functional>
	#include <memory>

	#include "webrtc/audio/utility/audio_frame_operations.h"
	#include "webrtc/modules/audio_mixer/audio_frame_manipulator.h"
	#include "webrtc/modules/audio_mixer/audio_mixer_impl.h"
	#include "webrtc/rtc_base/array_view.h"
	#include "webrtc/rtc_base/checks.h"
	#include "webrtc/rtc_base/logging.h"

	namespace webrtc {
	namespace {

	// Stereo, 48 kHz, 10 ms.
	constexpr int kMaximalFrameSize = 2 * 48 * 10;

	void CombineZeroFrames(bool use_limiter,
	AudioProcessing* limiter,
	AudioFrame* audio_frame_for_mixing) {
	audio_frame_for_mixing->elapsed_time_ms_ = -1;
	AudioFrameOperations::Mute(audio_frame_for_mixing);
	// The limiter should still process a zero frame to avoid jumps in
	// its gain curve.
	if (use_limiter) {
	RTC_DCHECK(limiter);
	// The limiter smoothly increases frames with half gain to full
	// volume. Here there's no need to apply half gain, since the frame
	// is zero anyway.
	limiter->ProcessStream(audio_frame_for_mixing);
	}
	}

	void CombineOneFrame(const AudioFrame* input_frame,
	bool use_limiter,
	AudioProcessing* limiter,
	AudioFrame* audio_frame_for_mixing) {
	audio_frame_for_mixing->timestamp_ = input_frame->timestamp_;
	audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_;
	// TODO(yujo): can we optimize muted frames?
	std::copy(input_frame->data(),
	input_frame->data() +
	input_frame->num_channels_ * input_frame->samples_per_channel_,
	audio_frame_for_mixing->mutable_data());
	if (use_limiter) {
	AudioFrameOperations::ApplyHalfGain(audio_frame_for_mixing);
	RTC_DCHECK(limiter);
	limiter->ProcessStream(audio_frame_for_mixing);
	AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing);
	}
	}

	// Lower-level helper function called from Combine(...) when there
	// are several input frames.
	//
	// TODO(aleloi): change interface to ArrayView<int16_t> output_frame
	// once we have gotten rid of the APM limiter.
	//
	// Only the 'data' field of output_frame should be modified. The
	// rest are used for potentially sending the output to the APM
	// limiter.
	void CombineMultipleFrames(
	const std::vector<rtc::ArrayView<const int16_t>>& input_frames,
	bool use_limiter,
	AudioProcessing* limiter,
	AudioFrame* audio_frame_for_mixing) {
	RTC_DCHECK(!input_frames.empty());
	RTC_DCHECK(audio_frame_for_mixing);

	const size_t frame_length = input_frames.front().size();
	for (const auto& frame : input_frames) {
	RTC_DCHECK_EQ(frame_length, frame.size());
	}

	// Algorithm: int16 frames are added to a sufficiently large
	// statically allocated int32 buffer. For > 2 participants this is
	// more efficient than addition in place in the int16 audio
	// frame. The audio quality loss due to halving the samples is
	// smaller than 16-bit addition in place.
	RTC_DCHECK_GE(kMaximalFrameSize, frame_length);
	std::array<int32_t, kMaximalFrameSize> add_buffer;

	add_buffer.fill(0);

	for (const auto& frame : input_frames) {
	// TODO(yujo): skip this for muted frames.
	std::transform(frame.begin(), frame.end(), add_buffer.begin(),
	add_buffer.begin(), std::plus<int32_t>());
	}

	if (use_limiter) {
	// Halve all samples to avoid saturation before limiting.
	std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
	audio_frame_for_mixing->mutable_data(), [](int32_t a) {
	return rtc::saturated_cast<int16_t>(a / 2);
	});

	// Smoothly limit the audio.
	RTC_DCHECK(limiter);
	const int error = limiter->ProcessStream(audio_frame_for_mixing);
	if (error != limiter->kNoError) {
	LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error;
	RTC_NOTREACHED();
	}

	// And now we can safely restore the level. This procedure results in
	// some loss of resolution, deemed acceptable.
	//
	// It's possible to apply the gain in the AGC (with a target level of 0 dbFS
	// and compression gain of 6 dB). However, in the transition frame when this
	// is enabled (moving from one to two audio sources) it has the potential to
	// create discontinuities in the mixed frame.
	//
	// Instead we double the frame (with addition since left-shifting a
	// negative value is undefined).
	AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing);
	} else {
	std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
	audio_frame_for_mixing->mutable_data(),
	[](int32_t a) { return rtc::saturated_cast<int16_t>(a); });
	}
	}

	std::unique_ptr<AudioProcessing> CreateLimiter() {
	Config config;
	config.Set<ExperimentalAgc>(new ExperimentalAgc(false));

	std::unique_ptr<AudioProcessing> limiter(AudioProcessing::Create(config));
	RTC_DCHECK(limiter);

	webrtc::AudioProcessing::Config apm_config;
	apm_config.residual_echo_detector.enabled = false;
	limiter->ApplyConfig(apm_config);

	const auto check_no_error = [](int x) {
	RTC_DCHECK_EQ(x, AudioProcessing::kNoError);
	};
	auto* const gain_control = limiter->gain_control();
	check_no_error(gain_control->set_mode(GainControl::kFixedDigital));

	// We smoothly limit the mixed frame to -7 dbFS. -6 would correspond to the
	// divide-by-2 but -7 is used instead to give a bit of headroom since the
	// AGC is not a hard limiter.
	check_no_error(gain_control->set_target_level_dbfs(7));

	check_no_error(gain_control->set_compression_gain_db(0));
	check_no_error(gain_control->enable_limiter(true));
	check_no_error(gain_control->Enable(true));
	return limiter;
	}
	} // namespace

	FrameCombiner::FrameCombiner(bool use_apm_limiter)
	: use_apm_limiter_(use_apm_limiter),
	limiter_(use_apm_limiter ? CreateLimiter() : nullptr) {}

	FrameCombiner::~FrameCombiner() = default;

	void FrameCombiner::Combine(const std::vector<AudioFrame*>& mix_list,
	size_t number_of_channels,
	int sample_rate,
	size_t number_of_streams,
	AudioFrame* audio_frame_for_mixing) const {
	RTC_DCHECK(audio_frame_for_mixing);
	const size_t samples_per_channel = static_cast<size_t>(
	(sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000);

	for (const auto* frame : mix_list) {
	RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_);
	RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_);
	}

	// Frames could be both stereo and mono.
	for (auto* frame : mix_list) {
	RemixFrame(number_of_channels, frame);
	}

	// TODO(aleloi): Issue bugs.webrtc.org/3390.
	// Audio frame timestamp. The 'timestamp_' field is set to dummy
	// value '0', because it is only supported in the one channel case and
	// is then updated in the helper functions.
	audio_frame_for_mixing->UpdateFrame(
	-1, 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined,
	AudioFrame::kVadUnknown, number_of_channels);

	const bool use_limiter_this_round = use_apm_limiter_ && number_of_streams > 1;

	if (mix_list.empty()) {
	CombineZeroFrames(use_limiter_this_round, limiter_.get(),
	audio_frame_for_mixing);
	} else if (mix_list.size() == 1) {
	CombineOneFrame(mix_list.front(), use_limiter_this_round, limiter_.get(),
	audio_frame_for_mixing);
	} else {
	std::vector<rtc::ArrayView<const int16_t>> input_frames;
	for (size_t i = 0; i < mix_list.size(); ++i) {
	input_frames.push_back(rtc::ArrayView<const int16_t>(
	mix_list[i]->data(), samples_per_channel * number_of_channels));
	}
	CombineMultipleFrames(input_frames, use_limiter_this_round, limiter_.get(),
	audio_frame_for_mixing);
	}
	}

	} // namespace webrtc