Permit mixing mono and stereo streams.

Add mixing tests based on older ones from the extended tests.

BUG=issue534
TEST=manual, voe_auto_test

Review URL: https://webrtc-codereview.appspot.com/576014

git-svn-id: http://webrtc.googlecode.com/svn/trunk@2265 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/src/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc b/src/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
index 64e6fbc..a2f2184 100644
--- a/src/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
+++ b/src/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
@@ -14,16 +14,51 @@
 #include "audio_processing.h"
 #include "critical_section_wrapper.h"
 #include "map_wrapper.h"
+#include "voice_engine/main/source/audio_frame_operations.h"
 #include "trace.h"
 
 namespace webrtc {
 namespace {
+
+// Mix |frame| into |mixed_frame|, with saturation protection and upmixing.
+// These effects are applied to |frame| itself prior to mixing. Assumes that
+// |mixed_frame| always has at least as many channels as |frame|. Supports
+// stereo at most.
+//
+// TODO(andrew): consider not modifying |frame| here.
+void MixFrames(AudioFrame* mixed_frame, AudioFrame* frame) {
+  assert(mixed_frame->num_channels_ >= frame->num_channels_);
+  // Divide by two to avoid saturation in the mixing.
+  *frame >>= 1;
+  if (mixed_frame->num_channels_ > frame->num_channels_) {
+    // We only support mono-to-stereo.
+    assert(mixed_frame->num_channels_ == 2 &&
+           frame->num_channels_ == 1);
+    AudioFrameOperations::MonoToStereo(*frame);
+  }
+
+  *mixed_frame += *frame;
+}
+
+// Return the max number of channels from a |list| composed of AudioFrames.
+int MaxNumChannels(const ListWrapper& list) {
+  ListItem* item = list.First();
+  int max_num_channels = 1;
+  while (item) {
+    AudioFrame* frame = static_cast<AudioFrame*>(item->GetItem());
+    max_num_channels = std::max(max_num_channels, frame->num_channels_);
+    item = list.Next(item);
+  }
+  return max_num_channels;
+}
+
 void SetParticipantStatistics(ParticipantStatistics* stats,
                               const AudioFrame& frame)
 {
     stats->participant = frame.id_;
     stats->level = 0;  // TODO(andrew): to what should this be set?
 }
+
 }  // namespace
 
 MixerParticipant::MixerParticipant()
@@ -283,25 +318,22 @@
     int retval = 0;
     WebRtc_Word32 audioLevel = 0;
     {
-        const ListItem* firstItem = mixList.First();
-        // Assume mono.
-        WebRtc_UWord8 numberOfChannels = 1;
-        if(firstItem != NULL)
-        {
-            // Use the same number of channels as the first frame to be mixed.
-            numberOfChannels = static_cast<const AudioFrame*>(
-                firstItem->GetItem())->num_channels_;
-        }
+        CriticalSectionScoped cs(_crit.get());
+
         // TODO(henrike): it might be better to decide the number of channels
         //                with an API instead of dynamically.
 
-        CriticalSectionScoped cs(_crit.get());
-        if (!SetNumLimiterChannels(numberOfChannels))
+        // Find the max channels over all mixing lists.
+        const int num_mixed_channels = std::max(MaxNumChannels(mixList),
+            std::max(MaxNumChannels(additionalFramesList),
+                     MaxNumChannels(rampOutList)));
+
+        if (!SetNumLimiterChannels(num_mixed_channels))
             retval = -1;
 
         mixedAudio->UpdateFrame(-1, _timeStamp, NULL, 0, _outputFrequency,
                                 AudioFrame::kNormalSpeech,
-                                AudioFrame::kVadPassive, numberOfChannels);
+                                AudioFrame::kVadPassive, num_mixed_channels);
 
         _timeStamp += _sampleSize;
 
@@ -1108,10 +1140,7 @@
             position = 0;
         }
         AudioFrame* audioFrame = static_cast<AudioFrame*>(item->GetItem());
-
-        // Divide by two to avoid saturation in the mixing.
-        *audioFrame >>= 1;
-        mixedAudio += *audioFrame;
+        MixFrames(&mixedAudio, audioFrame);
 
         SetParticipantStatistics(&_scratchMixedParticipants[position],
                                  *audioFrame);
@@ -1145,9 +1174,7 @@
     while(item != NULL)
     {
         AudioFrame* audioFrame = static_cast<AudioFrame*>(item->GetItem());
-        // Divide by two to avoid saturation in the mixing.
-        *audioFrame >>= 1;
-        mixedAudio += *audioFrame;
+        MixFrames(&mixedAudio, audioFrame);
         item = audioFrameList.Next(item);
     }
     return 0;
diff --git a/src/voice_engine/main/source/audio_frame_operations.cc b/src/voice_engine/main/source/audio_frame_operations.cc
index 123dc92..28f5ca8 100644
--- a/src/voice_engine/main/source/audio_frame_operations.cc
+++ b/src/voice_engine/main/source/audio_frame_operations.cc
@@ -12,7 +12,6 @@
 #include "module_common_types.h"
 
 namespace webrtc {
-namespace voe {
 
 int AudioFrameOperations::MonoToStereo(AudioFrame& frame) {
   if (frame.num_channels_ != 1) {
@@ -101,6 +100,5 @@
   return 0;
 }
 
-}  //  namespace voe
 }  //  namespace webrtc
 
diff --git a/src/voice_engine/main/source/audio_frame_operations.h b/src/voice_engine/main/source/audio_frame_operations.h
index e680dcb..753e4bf 100644
--- a/src/voice_engine/main/source/audio_frame_operations.h
+++ b/src/voice_engine/main/source/audio_frame_operations.h
@@ -17,10 +17,9 @@
 
 class AudioFrame;
 
-namespace voe {
-
-// TODO(andrew): unify this with utility.h. Change reference parameters to
-// pointers.
+// TODO(andrew): consolidate this with utility.h and audio_frame_manipulator.h.
+// Change reference parameters to pointers. Move out of VoE to a common place.
+// Consider using a namespace rather than class.
 class AudioFrameOperations {
  public:
   static int MonoToStereo(AudioFrame& frame);
@@ -38,7 +37,6 @@
   static int ScaleWithSat(float scale, AudioFrame& frame);
 };
 
-}  //  namespace voe
 }  //  namespace webrtc
 
 #endif  // #ifndef WEBRTC_VOICE_ENGINE_AUDIO_FRAME_OPERATIONS_H_
diff --git a/src/voice_engine/main/test/auto_test/standard/mixing_test.cc b/src/voice_engine/main/test/auto_test/standard/mixing_test.cc
new file mode 100644
index 0000000..5e6ca4a
--- /dev/null
+++ b/src/voice_engine/main/test/auto_test/standard/mixing_test.cc
@@ -0,0 +1,243 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+
+#include <string>
+
+#include "after_initialization_fixture.h"
+#include "test/testsupport/fileutils.h"
+
+namespace webrtc {
+namespace {
+
+const int16_t kLimiterHeadroom = 29204;  // == -1 dbFS
+const int16_t kInt16Max = 0x7fff;
+const int kSampleRateHz = 16000;
+const int kTestDurationMs = 4000;
+
+}  // namespace
+
+class MixingTest : public AfterInitializationFixture {
+ protected:
+  MixingTest()
+    : input_filename_(test::OutputPath() + "mixing_test_input.pcm"),
+      output_filename_(test::OutputPath() + "mixing_test_output.pcm") {
+  }
+
+  // Creates and mixes |num_remote_streams| which play a file "as microphone"
+  // with |num_local_streams| which play a file "locally", using a constant
+  // amplitude of |input_value|. The local streams manifest as "anonymous"
+  // mixing participants, meaning they will be mixed regardless of the number
+  // of participants. (A stream is a VoiceEngine "channel").
+  //
+  // The mixed output is verified to always fall between |max_output_value| and
+  // |min_output_value|, after a startup phase.
+  //
+  // |num_remote_streams_using_mono| of the remote streams use mono, with the
+  // remainder using stereo.
+  void RunMixingTest(int num_remote_streams,
+                     int num_local_streams,
+                     int num_remote_streams_using_mono,
+                     int16_t input_value,
+                     int16_t max_output_value,
+                     int16_t min_output_value) {
+    ASSERT_LE(num_remote_streams_using_mono, num_remote_streams);
+
+    GenerateInputFile(input_value);
+
+    std::vector<int> local_streams(num_local_streams);
+    for (size_t i = 0; i < local_streams.size(); ++i) {
+      local_streams[i] = voe_base_->CreateChannel();
+      EXPECT_NE(-1, local_streams[i]);
+    }
+    StartLocalStreams(local_streams);
+    TEST_LOG("Playing %d local streams.\n", num_local_streams);
+
+    std::vector<int> remote_streams(num_remote_streams);
+    for (size_t i = 0; i < remote_streams.size(); ++i) {
+      remote_streams[i] = voe_base_->CreateChannel();
+      EXPECT_NE(-1, remote_streams[i]);
+    }
+    StartRemoteStreams(remote_streams, num_remote_streams_using_mono);
+    TEST_LOG("Playing %d remote streams.\n", num_remote_streams);
+
+    // Start recording the mixed output and wait.
+    EXPECT_EQ(0, voe_file_->StartRecordingPlayout(-1 /* record meeting */,
+        output_filename_.c_str()));
+    Sleep(kTestDurationMs);
+    EXPECT_EQ(0, voe_file_->StopRecordingPlayout(-1));
+
+    StopLocalStreams(local_streams);
+    StopRemoteStreams(remote_streams);
+
+    VerifyMixedOutput(max_output_value, min_output_value);
+
+    // Cleanup the files in case another test uses different lengths.
+    ASSERT_EQ(0, remove(input_filename_.c_str()));
+    ASSERT_EQ(0, remove(output_filename_.c_str()));
+  }
+
+ private:
+  // Generate input file with constant values equal to |input_value|. The file
+  // will be one second longer than the duration of the test.
+  void GenerateInputFile(int16_t input_value) {
+    FILE* input_file = fopen(input_filename_.c_str(), "wb");
+    ASSERT_TRUE(input_file != NULL);
+    for (int i = 0; i < kSampleRateHz / 1000 * (kTestDurationMs + 1000); i++) {
+      ASSERT_EQ(1u, fwrite(&input_value, sizeof(input_value), 1, input_file));
+    }
+    ASSERT_EQ(0, fclose(input_file));
+  }
+
+  void VerifyMixedOutput(int16_t max_output_value, int16_t min_output_value) {
+    // Verify the mixed output.
+    FILE* output_file = fopen(output_filename_.c_str(), "rb");
+    ASSERT_TRUE(output_file != NULL);
+    int16_t output_value = 0;
+    // Skip the first 100 ms to avoid initialization and ramping-in effects.
+    EXPECT_EQ(0, fseek(output_file, sizeof(output_value) * kSampleRateHz / 10,
+                       SEEK_SET));
+    int samples_read = 0;
+    while (fread(&output_value, sizeof(output_value), 1, output_file) == 1) {
+      samples_read++;
+      EXPECT_LE(output_value, max_output_value);
+      EXPECT_GE(output_value, min_output_value);
+    }
+    // Ensure the recording length is close to the duration of the test.
+    ASSERT_GE((samples_read * 1000.0f) / kSampleRateHz,
+              0.9f * kTestDurationMs);
+    // Ensure we read the entire file.
+    ASSERT_NE(0, feof(output_file));
+    ASSERT_EQ(0, fclose(output_file));
+  }
+
+  // Start up local streams ("anonymous" participants).
+  void StartLocalStreams(const std::vector<int>& streams) {
+    for (size_t i = 0; i < streams.size(); ++i) {
+      EXPECT_EQ(0, voe_base_->StartPlayout(streams[i]));
+      EXPECT_EQ(0, voe_file_->StartPlayingFileLocally(streams[i],
+          input_filename_.c_str(), true));
+    }
+  }
+
+  void StopLocalStreams(const std::vector<int>& streams) {
+    for (size_t i = 0; i < streams.size(); ++i) {
+      EXPECT_EQ(0, voe_base_->StopPlayout(streams[i]));
+      EXPECT_EQ(0, voe_base_->DeleteChannel(streams[i]));
+    }
+  }
+
+  // Start up remote streams ("normal" participants).
+  void StartRemoteStreams(const std::vector<int>& streams,
+                          int num_remote_streams_using_mono) {
+    // Use L16 at 16kHz to minimize distortion (file recording is 16kHz and
+    // resampling will cause distortion).
+    CodecInst codec_inst;
+    strcpy(codec_inst.plname, "L16");
+    codec_inst.channels = 1;
+    codec_inst.plfreq = kSampleRateHz;
+    codec_inst.pltype = 105;
+    codec_inst.pacsize = codec_inst.plfreq / 100;
+    codec_inst.rate = codec_inst.plfreq * sizeof(int16_t) * 8;  // 8 bits/byte.
+
+    for (int i = 0; i < num_remote_streams_using_mono; ++i) {
+      StartRemoteStream(streams[i], codec_inst, 1234 + 2 * i);
+    }
+
+    // The remainder of the streams will use stereo.
+    codec_inst.channels = 2;
+    codec_inst.pltype++;
+    for (size_t i = num_remote_streams_using_mono; i < streams.size(); ++i) {
+      StartRemoteStream(streams[i], codec_inst, 1234 + 2 * i);
+    }
+  }
+
+  // Start up a single remote stream.
+  void StartRemoteStream(int stream, const CodecInst& codec_inst, int port) {
+    EXPECT_EQ(0, voe_codec_->SetRecPayloadType(stream, codec_inst));
+    EXPECT_EQ(0, voe_base_->SetLocalReceiver(stream, port));
+    EXPECT_EQ(0, voe_base_->SetSendDestination(stream, port, "127.0.0.1"));
+    EXPECT_EQ(0, voe_base_->StartReceive(stream));
+    EXPECT_EQ(0, voe_base_->StartPlayout(stream));
+    EXPECT_EQ(0, voe_codec_->SetSendCodec(stream, codec_inst));
+    EXPECT_EQ(0, voe_base_->StartSend(stream));
+    EXPECT_EQ(0, voe_file_->StartPlayingFileAsMicrophone(stream,
+        input_filename_.c_str(), true));
+  }
+
+  void StopRemoteStreams(const std::vector<int>& streams) {
+    for (size_t i = 0; i < streams.size(); ++i) {
+      EXPECT_EQ(0, voe_base_->StopSend(streams[i]));
+      EXPECT_EQ(0, voe_base_->StopPlayout(streams[i]));
+      EXPECT_EQ(0, voe_base_->StopReceive(streams[i]));
+      EXPECT_EQ(0, voe_base_->DeleteChannel(streams[i]));
+    }
+  }
+
+  const std::string input_filename_;
+  const std::string output_filename_;
+};
+
+// These tests assume a maximum of three mixed participants. We typically allow
+// a +/- 10% range around the expected output level to account for distortion
+// from coding and processing in the loopback chain.
+TEST_F(MixingTest, FourChannelsWithOnlyThreeMixed) {
+  const int16_t kInputValue = 1000;
+  const int16_t kExpectedOutput = kInputValue * 3;
+  RunMixingTest(4, 0, 4, kInputValue, 1.1 * kExpectedOutput,
+                0.9 * kExpectedOutput);
+}
+
+// Ensure the mixing saturation protection is working. We can do this because
+// the mixing limiter is given some headroom, so the expected output is less
+// than full scale.
+TEST_F(MixingTest, VerifySaturationProtection) {
+  const int16_t kInputValue = 20000;
+  const int16_t kExpectedOutput = kLimiterHeadroom;
+  // If this isn't satisfied, we're not testing anything.
+  ASSERT_GT(kInputValue * 3, kInt16Max);
+  ASSERT_LT(1.1 * kExpectedOutput, kInt16Max);
+  RunMixingTest(3, 0, 3, kInputValue, 1.1 * kExpectedOutput,
+               0.9 * kExpectedOutput);
+}
+
+TEST_F(MixingTest, SaturationProtectionHasNoEffectOnOneChannel) {
+  const int16_t kInputValue = kInt16Max;
+  const int16_t kExpectedOutput = kInt16Max;
+  // If this isn't satisfied, we're not testing anything.
+  ASSERT_GT(0.95 * kExpectedOutput, kLimiterHeadroom);
+  // Tighter constraints are required here to properly test this.
+  RunMixingTest(1, 0, 1, kInputValue, kExpectedOutput,
+                0.95 * kExpectedOutput);
+}
+
+TEST_F(MixingTest, VerifyAnonymousAndNormalParticipantMixing) {
+  const int16_t kInputValue = 1000;
+  const int16_t kExpectedOutput = kInputValue * 2;
+  RunMixingTest(1, 1, 1, kInputValue, 1.1 * kExpectedOutput,
+                0.9 * kExpectedOutput);
+}
+
+TEST_F(MixingTest, AnonymousParticipantsAreAlwaysMixed) {
+  const int16_t kInputValue = 1000;
+  const int16_t kExpectedOutput = kInputValue * 4;
+  RunMixingTest(3, 1, 3, kInputValue, 1.1 * kExpectedOutput,
+                0.9 * kExpectedOutput);
+}
+
+TEST_F(MixingTest, VerifyStereoAndMonoMixing) {
+  const int16_t kInputValue = 1000;
+  const int16_t kExpectedOutput = kInputValue * 2;
+  RunMixingTest(2, 0, 1, kInputValue, 1.1 * kExpectedOutput,
+                0.9 * kExpectedOutput);
+}
+
+}  // namespace webrtc
diff --git a/src/voice_engine/main/test/voice_engine_tests.gypi b/src/voice_engine/main/test/voice_engine_tests.gypi
index ee5f407..4cd192e 100644
--- a/src/voice_engine/main/test/voice_engine_tests.gypi
+++ b/src/voice_engine/main/test/voice_engine_tests.gypi
@@ -52,6 +52,7 @@
         'auto_test/standard/hardware_before_streaming_test.cc',
         'auto_test/standard/hardware_test.cc',
         'auto_test/standard/manual_hold_test.cc',
+        'auto_test/standard/mixing_test.cc',
         'auto_test/standard/neteq_stats_test.cc',
         'auto_test/standard/neteq_test.cc',
         'auto_test/standard/network_before_streaming_test.cc',