Move audio frame memory handling inside AudioMixer.

Simplify the AudioMixer::Source interface and update the mixer
implementation to the new interface.

Instead of asking a mixer source to provide a pointer to an AudioFrame
during each mixing iteration, a mixer should supply a pointer to its
own AudioFrame.

This simplifies lifetime issues as sources do not give away an
internal pointer.

Implementation: when an audio source is added, the mixer allocates a
new AudioFrame. The audio frame is kept together in the internal class
SourceStatus together with the audio source pointer until the source
is removed.

NOTRY=True
BUG=webrtc:6346

Review-Url: https://codereview.webrtc.org/2420913002
Cr-Original-Commit-Position: refs/heads/master@{#14713}
Cr-Mirrored-From: https://chromium.googlesource.com/external/webrtc
Cr-Mirrored-Commit: 6c278491adae678cdd95c71c444a02103012df56
diff --git a/api/audio/audio_mixer.h b/api/audio/audio_mixer.h
index 960adbb..ee39daa 100644
--- a/api/audio/audio_mixer.h
+++ b/api/audio/audio_mixer.h
@@ -35,33 +35,18 @@
       kError,   // The audio_frame will not be used.
     };
 
-    struct AudioFrameWithInfo {
-      AudioFrame* audio_frame;
-      AudioFrameInfo audio_frame_info;
-    };
-
-    // The implementation of GetAudioFrameWithInfo should update
-    // audio_frame with new audio every time it's called. Implementing
-    // classes are allowed to return the same AudioFrame pointer on
-    // different calls. The pointer must stay valid until the next
-    // mixing call or until this audio source is disconnected from the
-    // mixer. The mixer may modify the contents of the passed
-    // AudioFrame pointer at any time until the next call to
-    // GetAudioFrameWithInfo, or until the source is removed from the
-    // mixer.
-    virtual AudioFrameWithInfo GetAudioFrameWithInfo(int sample_rate_hz) = 0;
+    // Overwrites |audio_frame|. The data_ field is overwritten with
+    // 10 ms of new audio (either 1 or 2 interleaved channels) at
+    // |sample_rate_hz|. All fields in |audio_frame| must be updated.
+    virtual AudioFrameInfo GetAudioFrameWithInfo(int sample_rate_hz,
+                                                 AudioFrame* audio_frame) = 0;
 
     // A way for a mixer implementation to distinguish participants.
     virtual int Ssrc() = 0;
 
-   protected:
     virtual ~Source() {}
   };
 
-  // Since the mixer is reference counted, the destructor may be
-  // called from any thread.
-  ~AudioMixer() override {}
-
   // Returns true if adding/removing was successful. A source is never
   // added twice and removal is never attempted if a source has not
   // been successfully added to the mixer. Addition and removal can
@@ -70,12 +55,18 @@
   virtual bool RemoveSource(Source* audio_source) = 0;
 
   // Performs mixing by asking registered audio sources for audio. The
-  // mixed result is placed in the provided AudioFrame. Will only be
-  // called from a single thread. The rate and channels arguments
-  // specify the rate and number of channels of the mix result.
+  // mixed result is placed in the provided AudioFrame. This method
+  // will only be called from a single thread. The rate and channels
+  // arguments specify the rate and number of channels of the mix
+  // result. All fields in |audio_frame_for_mixing| must be updated.
   virtual void Mix(int sample_rate_hz,
                    size_t number_of_channels,
                    AudioFrame* audio_frame_for_mixing) = 0;
+
+ protected:
+  // Since the mixer is reference counted, the destructor may be
+  // called from any thread.
+  ~AudioMixer() override {}
 };
 }  // namespace webrtc
 
diff --git a/audio/audio_receive_stream.cc b/audio/audio_receive_stream.cc
index d059342..12b8b3f 100644
--- a/audio/audio_receive_stream.cc
+++ b/audio/audio_receive_stream.cc
@@ -272,9 +272,10 @@
   return channel_proxy_->ReceivedRTPPacket(packet, length, packet_time);
 }
 
-AudioMixer::Source::AudioFrameWithInfo
-AudioReceiveStream::GetAudioFrameWithInfo(int sample_rate_hz) {
-  return channel_proxy_->GetAudioFrameWithInfo(sample_rate_hz);
+AudioMixer::Source::AudioFrameInfo AudioReceiveStream::GetAudioFrameWithInfo(
+    int sample_rate_hz,
+    AudioFrame* audio_frame) {
+  return channel_proxy_->GetAudioFrameWithInfo(sample_rate_hz, audio_frame);
 }
 
 int AudioReceiveStream::Ssrc() {
diff --git a/audio/audio_receive_stream.h b/audio/audio_receive_stream.h
index 82c642a..e6416cd 100644
--- a/audio/audio_receive_stream.h
+++ b/audio/audio_receive_stream.h
@@ -55,7 +55,8 @@
   const webrtc::AudioReceiveStream::Config& config() const;
 
   // AudioMixer::Source
-  AudioFrameWithInfo GetAudioFrameWithInfo(int sample_rate_hz) override;
+  AudioFrameInfo GetAudioFrameWithInfo(int sample_rate_hz,
+                                       AudioFrame* audio_frame) override;
   int Ssrc() override;
 
  private:
diff --git a/modules/audio_mixer/audio_mixer_impl.cc b/modules/audio_mixer/audio_mixer_impl.cc
index 3fcd3e3..c646fcb 100644
--- a/modules/audio_mixer/audio_mixer_impl.cc
+++ b/modules/audio_mixer/audio_mixer_impl.cc
@@ -119,20 +119,22 @@
 AudioMixerImpl::SourceStatusList::const_iterator FindSourceInList(
     AudioMixerImpl::Source const* audio_source,
     AudioMixerImpl::SourceStatusList const* audio_source_list) {
-  return std::find_if(audio_source_list->begin(), audio_source_list->end(),
-                      [audio_source](const AudioMixerImpl::SourceStatus& p) {
-                        return p.audio_source == audio_source;
-                      });
+  return std::find_if(
+      audio_source_list->begin(), audio_source_list->end(),
+      [audio_source](const std::unique_ptr<AudioMixerImpl::SourceStatus>& p) {
+        return p->audio_source == audio_source;
+      });
 }
 
 // TODO(aleloi): remove non-const version when WEBRTC only supports modern STL.
 AudioMixerImpl::SourceStatusList::iterator FindSourceInList(
     AudioMixerImpl::Source const* audio_source,
     AudioMixerImpl::SourceStatusList* audio_source_list) {
-  return std::find_if(audio_source_list->begin(), audio_source_list->end(),
-                      [audio_source](const AudioMixerImpl::SourceStatus& p) {
-                        return p.audio_source == audio_source;
-                      });
+  return std::find_if(
+      audio_source_list->begin(), audio_source_list->end(),
+      [audio_source](const std::unique_ptr<AudioMixerImpl::SourceStatus>& p) {
+        return p->audio_source == audio_source;
+      });
 }
 
 }  // namespace
@@ -244,7 +246,7 @@
   RTC_DCHECK(FindSourceInList(audio_source, &audio_source_list_) ==
              audio_source_list_.end())
       << "Source already added to mixer";
-  audio_source_list_.emplace_back(audio_source, false, 0);
+  audio_source_list_.emplace_back(new SourceStatus(audio_source, false, 0));
   return true;
 }
 
@@ -263,21 +265,18 @@
   std::vector<SourceFrame> audio_source_mixing_data_list;
   std::vector<SourceFrame> ramp_list;
 
-  // Get audio source audio and put it in the struct vector.
+  // Get audio from the audio sources and put it in the SourceFrame vector.
   for (auto& source_and_status : audio_source_list_) {
-    auto audio_frame_with_info =
-        source_and_status.audio_source->GetAudioFrameWithInfo(
-            static_cast<int>(OutputFrequency()));
-
-    const auto audio_frame_info = audio_frame_with_info.audio_frame_info;
-    AudioFrame* audio_source_audio_frame = audio_frame_with_info.audio_frame;
+    const auto audio_frame_info =
+        source_and_status->audio_source->GetAudioFrameWithInfo(
+            OutputFrequency(), &source_and_status->audio_frame);
 
     if (audio_frame_info == Source::AudioFrameInfo::kError) {
       LOG_F(LS_WARNING) << "failed to GetAudioFrameWithInfo() from source";
       continue;
     }
     audio_source_mixing_data_list.emplace_back(
-        &source_and_status, audio_source_audio_frame,
+        source_and_status.get(), &source_and_status->audio_frame,
         audio_frame_info == Source::AudioFrameInfo::kMuted);
   }
 
@@ -344,10 +343,9 @@
   RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
   rtc::CritScope lock(&crit_);
 
-  const auto non_anonymous_iter =
-      FindSourceInList(audio_source, &audio_source_list_);
-  if (non_anonymous_iter != audio_source_list_.end()) {
-    return non_anonymous_iter->is_mixed;
+  const auto iter = FindSourceInList(audio_source, &audio_source_list_);
+  if (iter != audio_source_list_.end()) {
+    return (*iter)->is_mixed;
   }
 
   LOG(LS_ERROR) << "Audio source unknown";
diff --git a/modules/audio_mixer/audio_mixer_impl.h b/modules/audio_mixer/audio_mixer_impl.h
index e011e8a..876284f 100644
--- a/modules/audio_mixer/audio_mixer_impl.h
+++ b/modules/audio_mixer/audio_mixer_impl.h
@@ -35,9 +35,12 @@
     Source* audio_source = nullptr;
     bool is_mixed = false;
     float gain = 0.0f;
+
+    // A frame that will be passed to audio_source->GetAudioFrameWithInfo.
+    AudioFrame audio_frame;
   };
 
-  typedef std::vector<SourceStatus> SourceStatusList;
+  using SourceStatusList = std::vector<std::unique_ptr<SourceStatus>>;
 
   // AudioProcessing only accepts 10 ms frames.
   static const int kFrameDurationInMs = 10;
diff --git a/modules/audio_mixer/audio_mixer_impl_unittest.cc b/modules/audio_mixer/audio_mixer_impl_unittest.cc
index 9147a15..28976fa 100644
--- a/modules/audio_mixer/audio_mixer_impl_unittest.cc
+++ b/modules/audio_mixer/audio_mixer_impl_unittest.cc
@@ -53,12 +53,13 @@
  public:
   MockMixerAudioSource()
       : fake_audio_frame_info_(AudioMixer::Source::AudioFrameInfo::kNormal) {
-    ON_CALL(*this, GetAudioFrameWithInfo(_))
+    ON_CALL(*this, GetAudioFrameWithInfo(_, _))
         .WillByDefault(
             Invoke(this, &MockMixerAudioSource::FakeAudioFrameWithInfo));
   }
 
-  MOCK_METHOD1(GetAudioFrameWithInfo, AudioFrameWithInfo(int sample_rate_hz));
+  MOCK_METHOD2(GetAudioFrameWithInfo,
+               AudioFrameInfo(int sample_rate_hz, AudioFrame* audio_frame));
 
   MOCK_METHOD0(Ssrc, int());
 
@@ -69,14 +70,12 @@
   }
 
  private:
-  AudioFrame fake_frame_, fake_output_frame_;
+  AudioFrame fake_frame_;
   AudioFrameInfo fake_audio_frame_info_;
-  AudioFrameWithInfo FakeAudioFrameWithInfo(int sample_rate_hz) {
-    fake_output_frame_.CopyFrom(fake_frame_);
-    return {
-        &fake_output_frame_,  // audio_frame_pointer
-        fake_info(),          // audio_frame_info
-    };
+  AudioFrameInfo FakeAudioFrameWithInfo(int sample_rate_hz,
+                                        AudioFrame* audio_frame) {
+    audio_frame->CopyFrom(fake_frame_);
+    return fake_info();
   }
 };
 
@@ -100,7 +99,7 @@
 
   for (int i = 0; i < num_audio_sources; i++) {
     EXPECT_TRUE(mixer->AddSource(&participants[i]));
-    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz))
+    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
         .Times(Exactly(1));
   }
 
@@ -129,7 +128,7 @@
     participants[i].fake_frame()->data_[80] = i;
 
     EXPECT_TRUE(mixer->AddSource(&participants[i]));
-    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(_)).Times(Exactly(1));
+    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(_, _)).Times(Exactly(1));
   }
 
   // Last participant gives audio frame with passive VAD, although it has the
@@ -171,7 +170,7 @@
   }
 
   EXPECT_TRUE(mixer->AddSource(&participant));
-  EXPECT_CALL(participant, GetAudioFrameWithInfo(_)).Times(Exactly(2));
+  EXPECT_CALL(participant, GetAudioFrameWithInfo(_, _)).Times(Exactly(2));
 
   AudioFrame audio_frame;
   // Two mix iteration to compare after the ramp-up step.
@@ -193,7 +192,7 @@
 
   EXPECT_TRUE(mixer->AddSource(&participant));
   for (auto frequency : {8000, 16000, 32000, 48000}) {
-    EXPECT_CALL(participant, GetAudioFrameWithInfo(frequency))
+    EXPECT_CALL(participant, GetAudioFrameWithInfo(frequency, _))
         .Times(Exactly(1));
     participant.fake_frame()->sample_rate_hz_ = frequency;
     participant.fake_frame()->samples_per_channel_ = frequency / 100;
@@ -210,7 +209,7 @@
 
   EXPECT_TRUE(mixer->AddSource(&participant));
   for (size_t number_of_channels : {1, 2}) {
-    EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz))
+    EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
         .Times(Exactly(1));
     mixer->Mix(kDefaultSampleRateHz, number_of_channels, &frame_for_mixing);
     EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_);
@@ -236,7 +235,7 @@
   // Add all participants but the loudest for mixing.
   for (int i = 0; i < kAudioSources - 1; i++) {
     EXPECT_TRUE(mixer->AddSource(&participants[i]));
-    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz))
+    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
         .Times(Exactly(1));
   }
 
@@ -252,7 +251,7 @@
   // Add new participant with higher energy.
   EXPECT_TRUE(mixer->AddSource(&participants[kAudioSources - 1]));
   for (int i = 0; i < kAudioSources; i++) {
-    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz))
+    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
         .Times(Exactly(1));
   }
 
@@ -287,7 +286,7 @@
       RTC_FROM_HERE,
       rtc::Bind(&AudioMixer::AddSource, mixer.get(), &participant)));
 
-  EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz))
+  EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
       .Times(Exactly(1));
 
   // Do one mixer iteration
diff --git a/voice_engine/channel.cc b/voice_engine/channel.cc
index 44a1c73..6d05332 100644
--- a/voice_engine/channel.cc
+++ b/voice_engine/channel.cc
@@ -712,11 +712,12 @@
                : MixerParticipant::AudioFrameInfo::kNormal;
 }
 
-AudioMixer::Source::AudioFrameWithInfo Channel::GetAudioFrameWithInfo(
-    int sample_rate_hz) {
-  mix_audio_frame_.sample_rate_hz_ = sample_rate_hz;
+AudioMixer::Source::AudioFrameInfo Channel::GetAudioFrameWithInfo(
+    int sample_rate_hz,
+    AudioFrame* audio_frame) {
+  audio_frame->sample_rate_hz_ = sample_rate_hz;
 
-  const auto frame_info = GetAudioFrameWithMuted(-1, &mix_audio_frame_);
+  const auto frame_info = GetAudioFrameWithMuted(-1, audio_frame);
 
   using FrameInfo = AudioMixer::Source::AudioFrameInfo;
   FrameInfo new_audio_frame_info = FrameInfo::kError;
@@ -731,7 +732,7 @@
       new_audio_frame_info = FrameInfo::kError;
       break;
   }
-  return {&mix_audio_frame_, new_audio_frame_info};
+  return new_audio_frame_info;
 }
 
 int32_t Channel::NeededFrequency(int32_t id) const {
diff --git a/voice_engine/channel.h b/voice_engine/channel.h
index 97340be..3b478ca 100644
--- a/voice_engine/channel.h
+++ b/voice_engine/channel.h
@@ -379,8 +379,9 @@
   int32_t NeededFrequency(int32_t id) const override;
 
   // From AudioMixer::Source.
-  AudioMixer::Source::AudioFrameWithInfo GetAudioFrameWithInfo(
-      int sample_rate_hz);
+  AudioMixer::Source::AudioFrameInfo GetAudioFrameWithInfo(
+      int sample_rate_hz,
+      AudioFrame* audio_frame);
 
   // From FileCallback
   void PlayNotification(int32_t id, uint32_t durationMs) override;
@@ -475,7 +476,6 @@
   AudioLevel _outputAudioLevel;
   bool _externalTransport;
   AudioFrame _audioFrame;
-  AudioFrame mix_audio_frame_;
   // Downsamples to the codec rate if necessary.
   PushResampler<int16_t> input_resampler_;
   std::unique_ptr<FilePlayer> input_file_player_;
diff --git a/voice_engine/channel_proxy.cc b/voice_engine/channel_proxy.cc
index dc01c9b..ba58502 100644
--- a/voice_engine/channel_proxy.cc
+++ b/voice_engine/channel_proxy.cc
@@ -214,10 +214,11 @@
   channel()->SetRtcEventLog(event_log);
 }
 
-AudioMixer::Source::AudioFrameWithInfo ChannelProxy::GetAudioFrameWithInfo(
-    int sample_rate_hz) {
+AudioMixer::Source::AudioFrameInfo ChannelProxy::GetAudioFrameWithInfo(
+    int sample_rate_hz,
+    AudioFrame* audio_frame) {
   RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
-  return channel()->GetAudioFrameWithInfo(sample_rate_hz);
+  return channel()->GetAudioFrameWithInfo(sample_rate_hz, audio_frame);
 }
 
 Channel* ChannelProxy::channel() const {
diff --git a/voice_engine/channel_proxy.h b/voice_engine/channel_proxy.h
index b09a56c..d19c009 100644
--- a/voice_engine/channel_proxy.h
+++ b/voice_engine/channel_proxy.h
@@ -93,8 +93,9 @@
 
   virtual void SetRtcEventLog(RtcEventLog* event_log);
 
-  virtual AudioMixer::Source::AudioFrameWithInfo GetAudioFrameWithInfo(
-      int sample_rate_hz);
+  virtual AudioMixer::Source::AudioFrameInfo GetAudioFrameWithInfo(
+      int sample_rate_hz,
+      AudioFrame* audio_frame);
 
  private:
   Channel* channel() const;