Add mute state field to AudioFrame and switch some callers to use it. Also make AudioFrame::data_ private and instead provide:

const int16_t* data() const;
int16_t* mutable_data();

- data() returns a zeroed static buffer on muted frames (to avoid unnecessary zeroing of the member buffer) and directly returns AudioFrame::data_ on unmuted frames.
- mutable_data(), lazily zeroes AudioFrame::data_ if the frame is currently muted, sets muted=false, and returns AudioFrame::data_.

These accessors serve to "force" callers to be aware of the mute state field, i.e. lazy zeroing is not the primary motivation.

This change only optimizes handling of muted frames where it is somewhat trivial to do so. Other improvements requiring more significant structural changes will come later.

BUG=webrtc:7343
TBR=henrika

Review-Url: https://codereview.webrtc.org/2750783004
Cr-Commit-Position: refs/heads/master@{#18543}
diff --git a/webrtc/voice_engine/BUILD.gn b/webrtc/voice_engine/BUILD.gn
index 253eacb..e16b176 100644
--- a/webrtc/voice_engine/BUILD.gn
+++ b/webrtc/voice_engine/BUILD.gn
@@ -57,6 +57,7 @@
   deps = [
     ":audio_coder",
     "..:webrtc_common",
+    "../audio/utility:audio_frame_operations",
     "../base:rtc_base_approved",
     "../common_audio",
     "../modules:module_api",
diff --git a/webrtc/voice_engine/audio_level.cc b/webrtc/voice_engine/audio_level.cc
index b2f4df4..27a7dde 100644
--- a/webrtc/voice_engine/audio_level.cc
+++ b/webrtc/voice_engine/audio_level.cc
@@ -50,9 +50,10 @@
 
 void AudioLevel::ComputeLevel(const AudioFrame& audioFrame) {
   // Check speech level (works for 2 channels as well)
-  int16_t abs_value = WebRtcSpl_MaxAbsValueW16(
-      audioFrame.data_,
-      audioFrame.samples_per_channel_ * audioFrame.num_channels_);
+  int16_t abs_value = audioFrame.muted() ? 0 :
+      WebRtcSpl_MaxAbsValueW16(
+          audioFrame.data(),
+          audioFrame.samples_per_channel_ * audioFrame.num_channels_);
 
   // Protect member access using a lock since this method is called on a
   // dedicated audio thread in the RecordedDataIsAvailable() callback.
diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc
index 0a9e9fce..1612270 100644
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -660,7 +660,7 @@
     rtc::CritScope cs(&_callbackCritSect);
     if (audio_sink_) {
       AudioSinkInterface::Data data(
-          &audioFrame->data_[0], audioFrame->samples_per_channel_,
+          audioFrame->data(), audioFrame->samples_per_channel_,
           audioFrame->sample_rate_hz_, audioFrame->num_channels_,
           audioFrame->timestamp_);
       audio_sink_->OnData(data);
@@ -2786,12 +2786,12 @@
   if (_includeAudioLevelIndication) {
     size_t length =
         audio_input->samples_per_channel_ * audio_input->num_channels_;
-    RTC_CHECK_LE(length, sizeof(audio_input->data_));
+    RTC_CHECK_LE(length, AudioFrame::kMaxDataSizeBytes);
     if (is_muted && previous_frame_muted_) {
       rms_level_.AnalyzeMuted(length);
     } else {
       rms_level_.Analyze(
-          rtc::ArrayView<const int16_t>(audio_input->data_, length));
+          rtc::ArrayView<const int16_t>(audio_input->data(), length));
     }
   }
   previous_frame_muted_ = is_muted;
@@ -2951,8 +2951,8 @@
   if (_mixFileWithMicrophone) {
     // Currently file stream is always mono.
     // TODO(xians): Change the code when FilePlayer supports real stereo.
-    MixWithSat(audio_input->data_, audio_input->num_channels_, fileBuffer.get(),
-               1, fileSamples);
+    MixWithSat(audio_input->mutable_data(), audio_input->num_channels_,
+               fileBuffer.get(), 1, fileSamples);
   } else {
     // Replace ACM audio with file.
     // Currently file stream is always mono.
@@ -2991,8 +2991,8 @@
   if (audioFrame.samples_per_channel_ == fileSamples) {
     // Currently file stream is always mono.
     // TODO(xians): Change the code when FilePlayer supports real stereo.
-    MixWithSat(audioFrame.data_, audioFrame.num_channels_, fileBuffer.get(), 1,
-               fileSamples);
+    MixWithSat(audioFrame.mutable_data(), audioFrame.num_channels_,
+               fileBuffer.get(), 1, fileSamples);
   } else {
     WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId, _channelId),
                  "Channel::MixAudioWithFile() samples_per_channel_(%" PRIuS
diff --git a/webrtc/voice_engine/file_player.cc b/webrtc/voice_engine/file_player.cc
index a7db182..d2eadae 100644
--- a/webrtc/voice_engine/file_player.cc
+++ b/webrtc/voice_engine/file_player.cc
@@ -126,9 +126,9 @@
     unresampledAudioFrame.sample_rate_hz_ = _codec.plfreq;
 
     // L16 is un-encoded data. Just pull 10 ms.
-    size_t lengthInBytes = sizeof(unresampledAudioFrame.data_);
+    size_t lengthInBytes = AudioFrame::kMaxDataSizeBytes;
     if (_fileModule.PlayoutAudioData(
-            reinterpret_cast<int8_t*>(unresampledAudioFrame.data_),
+            reinterpret_cast<int8_t*>(unresampledAudioFrame.mutable_data()),
             lengthInBytes) == -1) {
       // End of file reached.
       return -1;
@@ -173,7 +173,7 @@
     memset(outBuffer, 0, outLen * sizeof(int16_t));
     return 0;
   }
-  _resampler.Push(unresampledAudioFrame.data_,
+  _resampler.Push(unresampledAudioFrame.data(),
                   unresampledAudioFrame.samples_per_channel_, outBuffer,
                   MAX_AUDIO_BUFFER_IN_SAMPLES, outLen);
 
diff --git a/webrtc/voice_engine/file_recorder.cc b/webrtc/voice_engine/file_recorder.cc
index 5448451..eed3c07 100644
--- a/webrtc/voice_engine/file_recorder.cc
+++ b/webrtc/voice_engine/file_recorder.cc
@@ -12,6 +12,7 @@
 
 #include <list>
 
+#include "webrtc/audio/utility/audio_frame_operations.h"
 #include "webrtc/base/logging.h"
 #include "webrtc/base/platform_thread.h"
 #include "webrtc/common_audio/resampler/include/resampler.h"
@@ -159,12 +160,10 @@
     tempAudioFrame.sample_rate_hz_ = incomingAudioFrame.sample_rate_hz_;
     tempAudioFrame.samples_per_channel_ =
         incomingAudioFrame.samples_per_channel_;
-    for (size_t i = 0; i < (incomingAudioFrame.samples_per_channel_); i++) {
-      // Sample value is the average of left and right buffer rounded to
-      // closest integer value. Note samples can be either 1 or 2 byte.
-      tempAudioFrame.data_[i] = ((incomingAudioFrame.data_[2 * i] +
-                                  incomingAudioFrame.data_[(2 * i) + 1] + 1) >>
-                                 1);
+    if (!incomingAudioFrame.muted()) {
+      AudioFrameOperations::StereoToMono(
+          incomingAudioFrame.data(), incomingAudioFrame.samples_per_channel_,
+          tempAudioFrame.mutable_data());
     }
   } else if (incomingAudioFrame.num_channels_ == 1 && _moduleFile->IsStereo()) {
     // Recording stereo but incoming audio is mono.
@@ -172,10 +171,10 @@
     tempAudioFrame.sample_rate_hz_ = incomingAudioFrame.sample_rate_hz_;
     tempAudioFrame.samples_per_channel_ =
         incomingAudioFrame.samples_per_channel_;
-    for (size_t i = 0; i < (incomingAudioFrame.samples_per_channel_); i++) {
-      // Duplicate sample to both channels
-      tempAudioFrame.data_[2 * i] = incomingAudioFrame.data_[i];
-      tempAudioFrame.data_[2 * i + 1] = incomingAudioFrame.data_[i];
+    if (!incomingAudioFrame.muted()) {
+      AudioFrameOperations::MonoToStereo(
+          incomingAudioFrame.data(), incomingAudioFrame.samples_per_channel_,
+          tempAudioFrame.mutable_data());
     }
   }
 
@@ -204,8 +203,9 @@
     _audioResampler.ResetIfNeeded(ptrAudioFrame->sample_rate_hz_,
                                   codec_info_.plfreq,
                                   ptrAudioFrame->num_channels_);
+    // TODO(yujo): skip resample if frame is muted.
     _audioResampler.Push(
-        ptrAudioFrame->data_,
+        ptrAudioFrame->data(),
         ptrAudioFrame->samples_per_channel_ * ptrAudioFrame->num_channels_,
         reinterpret_cast<int16_t*>(_audioBuffer), MAX_AUDIO_BUFFER_IN_BYTES,
         outLen);
diff --git a/webrtc/voice_engine/transmit_mixer.cc b/webrtc/voice_engine/transmit_mixer.cc
index e14b03f..6796f84 100644
--- a/webrtc/voice_engine/transmit_mixer.cc
+++ b/webrtc/voice_engine/transmit_mixer.cc
@@ -936,7 +936,7 @@
     {
         // Currently file stream is always mono.
         // TODO(xians): Change the code when FilePlayer supports real stereo.
-        MixWithSat(_audioFrame.data_,
+        MixWithSat(_audioFrame.mutable_data(),
                    _audioFrame.num_channels_,
                    fileBuffer.get(),
                    1,
diff --git a/webrtc/voice_engine/utility.cc b/webrtc/voice_engine/utility.cc
index f394762..f877c43 100644
--- a/webrtc/voice_engine/utility.cc
+++ b/webrtc/voice_engine/utility.cc
@@ -25,7 +25,7 @@
 void RemixAndResample(const AudioFrame& src_frame,
                       PushResampler<int16_t>* resampler,
                       AudioFrame* dst_frame) {
-  RemixAndResample(src_frame.data_, src_frame.samples_per_channel_,
+  RemixAndResample(src_frame.data(), src_frame.samples_per_channel_,
                    src_frame.num_channels_, src_frame.sample_rate_hz_,
                    resampler, dst_frame);
   dst_frame->timestamp_ = src_frame.timestamp_;
@@ -64,13 +64,18 @@
             << ", audio_ptr_num_channels = " << audio_ptr_num_channels;
   }
 
+  // TODO(yujo): for muted input frames, don't resample. Either 1) allow
+  // resampler to return output length without doing the resample, so we know
+  // how much to zero here; or 2) make resampler accept a hint that the input is
+  // zeroed.
   const size_t src_length = samples_per_channel * audio_ptr_num_channels;
-  int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
+  int out_length = resampler->Resample(audio_ptr, src_length,
+                                       dst_frame->mutable_data(),
                                        AudioFrame::kMaxDataSizeSamples);
   if (out_length == -1) {
     FATAL() << "Resample failed: audio_ptr = " << audio_ptr
             << ", src_length = " << src_length
-            << ", dst_frame->data_ = " << dst_frame->data_;
+            << ", dst_frame->mutable_data() = " << dst_frame->mutable_data();
   }
   dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;
 
diff --git a/webrtc/voice_engine/utility_unittest.cc b/webrtc/voice_engine/utility_unittest.cc
index 94abc0f..992cb71 100644
--- a/webrtc/voice_engine/utility_unittest.cc
+++ b/webrtc/voice_engine/utility_unittest.cc
@@ -47,12 +47,13 @@
 // used so non-integer values result in rounding error, but not an accumulating
 // error.
 void SetMonoFrame(float data, int sample_rate_hz, AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
   frame->num_channels_ = 1;
   frame->sample_rate_hz_ = sample_rate_hz;
   frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i] = static_cast<int16_t>(data * i);
+    frame_data[i] = static_cast<int16_t>(data * i);
   }
 }
 
@@ -67,13 +68,14 @@
                     float right,
                     int sample_rate_hz,
                     AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
   frame->num_channels_ = 2;
   frame->sample_rate_hz_ = sample_rate_hz;
   frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i * 2] = static_cast<int16_t>(left * i);
-    frame->data_[i * 2 + 1] = static_cast<int16_t>(right * i);
+    frame_data[i * 2] = static_cast<int16_t>(left * i);
+    frame_data[i * 2 + 1] = static_cast<int16_t>(right * i);
   }
 }
 
@@ -90,15 +92,16 @@
                   float ch4,
                   int sample_rate_hz,
                   AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
   frame->num_channels_ = 4;
   frame->sample_rate_hz_ = sample_rate_hz;
   frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i * 4] = static_cast<int16_t>(ch1 * i);
-    frame->data_[i * 4 + 1] = static_cast<int16_t>(ch2 * i);
-    frame->data_[i * 4 + 2] = static_cast<int16_t>(ch3 * i);
-    frame->data_[i * 4 + 3] = static_cast<int16_t>(ch4 * i);
+    frame_data[i * 4] = static_cast<int16_t>(ch1 * i);
+    frame_data[i * 4 + 1] = static_cast<int16_t>(ch2 * i);
+    frame_data[i * 4 + 2] = static_cast<int16_t>(ch3 * i);
+    frame_data[i * 4 + 3] = static_cast<int16_t>(ch4 * i);
   }
 }
 
@@ -119,11 +122,13 @@
   for (size_t delay = 0; delay <= max_delay; delay++) {
     float mse = 0;
     float variance = 0;
+    const int16_t* ref_frame_data = ref_frame.data();
+    const int16_t* test_frame_data = test_frame.data();
     for (size_t i = 0; i < ref_frame.samples_per_channel_ *
         ref_frame.num_channels_ - delay; i++) {
-      int error = ref_frame.data_[i] - test_frame.data_[i + delay];
+      int error = ref_frame_data[i] - test_frame_data[i + delay];
       mse += error * error;
-      variance += ref_frame.data_[i] * ref_frame.data_[i];
+      variance += ref_frame_data[i] * ref_frame_data[i];
     }
     float snr = 100;  // We assign 100 dB to the zero-error case.
     if (mse > 0)
@@ -140,9 +145,11 @@
 void VerifyFramesAreEqual(const AudioFrame& ref_frame,
                           const AudioFrame& test_frame) {
   VerifyParams(ref_frame, test_frame);
+  const int16_t* ref_frame_data = ref_frame.data();
+  const int16_t* test_frame_data  = test_frame.data();
   for (size_t i = 0;
        i < ref_frame.samples_per_channel_ * ref_frame.num_channels_; i++) {
-    EXPECT_EQ(ref_frame.data_[i], test_frame.data_[i]);
+    EXPECT_EQ(ref_frame_data[i], test_frame_data[i]);
   }
 }
 
diff --git a/webrtc/voice_engine/voe_base_impl.cc b/webrtc/voice_engine/voe_base_impl.cc
index 8072cc8..1ddf53c 100644
--- a/webrtc/voice_engine/voe_base_impl.cc
+++ b/webrtc/voice_engine/voe_base_impl.cc
@@ -716,7 +716,7 @@
   assert(sample_rate == audioFrame_.sample_rate_hz_);
 
   // Deliver audio (PCM) samples to the ADM
-  memcpy(audio_data, audioFrame_.data_,
+  memcpy(audio_data, audioFrame_.data(),
          sizeof(int16_t) * number_of_frames * number_of_channels);
 
   *elapsed_time_ms = audioFrame_.elapsed_time_ms_;