Support 4 channel mic in Windows Core Audio

BUG=webrtc:7220

Review-Url: https://codereview.webrtc.org/2712743004
Cr-Commit-Position: refs/heads/master@{#16940}
diff --git a/AUTHORS b/AUTHORS
index a1c8a67..e014bfb 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -18,6 +18,7 @@
 Graham Yoakum <gyoakum@skobalt.com>
 Jake Hilton <jakehilton@gmail.com>
 James H. Brown <jbrown@burgoyne.com>
+Jens Nielsen <jens.nielsen@berotec.se>
 Jiawei Ou <jiawei.ou@gmail.com>
 Jie Mao <maojie0924@gmail.com>
 Luke Weber <luke.weber@gmail.com>
diff --git a/webrtc/audio/utility/audio_frame_operations.cc b/webrtc/audio/utility/audio_frame_operations.cc
index 6fcb84e..475539f 100644
--- a/webrtc/audio/utility/audio_frame_operations.cc
+++ b/webrtc/audio/utility/audio_frame_operations.cc
@@ -99,7 +99,8 @@
                                         size_t samples_per_channel,
                                         int16_t* dst_audio) {
   for (size_t i = 0; i < samples_per_channel; i++) {
-    dst_audio[i] = (src_audio[2 * i] + src_audio[2 * i + 1]) >> 1;
+    dst_audio[i] =
+        (static_cast<int32_t>(src_audio[2 * i]) + src_audio[2 * i + 1]) >> 1;
   }
 }
 
@@ -108,12 +109,98 @@
     return -1;
   }
 
+  RTC_DCHECK_LE(frame->samples_per_channel_ * 2,
+                AudioFrame::kMaxDataSizeSamples);
+
   StereoToMono(frame->data_, frame->samples_per_channel_, frame->data_);
   frame->num_channels_ = 1;
 
   return 0;
 }
 
+void AudioFrameOperations::QuadToStereo(const int16_t* src_audio,
+                                        size_t samples_per_channel,
+                                        int16_t* dst_audio) {
+  for (size_t i = 0; i < samples_per_channel; i++) {
+    dst_audio[i * 2] =
+        (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1]) >> 1;
+    dst_audio[i * 2 + 1] =
+        (static_cast<int32_t>(src_audio[4 * i + 2]) + src_audio[4 * i + 3]) >>
+        1;
+  }
+}
+
+int AudioFrameOperations::QuadToStereo(AudioFrame* frame) {
+  if (frame->num_channels_ != 4) {
+    return -1;
+  }
+
+  RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
+                AudioFrame::kMaxDataSizeSamples);
+
+  QuadToStereo(frame->data_, frame->samples_per_channel_, frame->data_);
+  frame->num_channels_ = 2;
+
+  return 0;
+}
+
+void AudioFrameOperations::QuadToMono(const int16_t* src_audio,
+                                      size_t samples_per_channel,
+                                      int16_t* dst_audio) {
+  for (size_t i = 0; i < samples_per_channel; i++) {
+    dst_audio[i] =
+        (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1] +
+         src_audio[4 * i + 2] + src_audio[4 * i + 3]) >> 2;
+  }
+}
+
+int AudioFrameOperations::QuadToMono(AudioFrame* frame) {
+  if (frame->num_channels_ != 4) {
+    return -1;
+  }
+
+  RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
+                AudioFrame::kMaxDataSizeSamples);
+
+  QuadToMono(frame->data_, frame->samples_per_channel_, frame->data_);
+  frame->num_channels_ = 1;
+
+  return 0;
+}
+
+void AudioFrameOperations::DownmixChannels(const int16_t* src_audio,
+                                           size_t src_channels,
+                                           size_t samples_per_channel,
+                                           size_t dst_channels,
+                                           int16_t* dst_audio) {
+  if (src_channels == 2 && dst_channels == 1) {
+    StereoToMono(src_audio, samples_per_channel, dst_audio);
+    return;
+  } else if (src_channels == 4 && dst_channels == 2) {
+    QuadToStereo(src_audio, samples_per_channel, dst_audio);
+    return;
+  } else if (src_channels == 4 && dst_channels == 1) {
+    QuadToMono(src_audio, samples_per_channel, dst_audio);
+    return;
+  }
+
+  RTC_NOTREACHED() << "src_channels: " << src_channels
+                   << ", dst_channels: " << dst_channels;
+}
+
+int AudioFrameOperations::DownmixChannels(size_t dst_channels,
+                                          AudioFrame* frame) {
+  if (frame->num_channels_ == 2 && dst_channels == 1) {
+    return StereoToMono(frame);
+  } else if (frame->num_channels_ == 4 && dst_channels == 2) {
+    return QuadToStereo(frame);
+  } else if (frame->num_channels_ == 4 && dst_channels == 1) {
+    return QuadToMono(frame);
+  }
+
+  return -1;
+}
+
 void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
   RTC_DCHECK(frame);
   if (frame->num_channels_ != 2) {
diff --git a/webrtc/audio/utility/audio_frame_operations.h b/webrtc/audio/utility/audio_frame_operations.h
index d16b163..de6fdc4 100644
--- a/webrtc/audio/utility/audio_frame_operations.h
+++ b/webrtc/audio/utility/audio_frame_operations.h
@@ -40,6 +40,7 @@
   static void MonoToStereo(const int16_t* src_audio,
                            size_t samples_per_channel,
                            int16_t* dst_audio);
+
   // |frame.num_channels_| will be updated. This version checks for sufficient
   // buffer size and that |num_channels_| is mono.
   static int MonoToStereo(AudioFrame* frame);
@@ -50,10 +51,49 @@
   static void StereoToMono(const int16_t* src_audio,
                            size_t samples_per_channel,
                            int16_t* dst_audio);
+
   // |frame.num_channels_| will be updated. This version checks that
   // |num_channels_| is stereo.
   static int StereoToMono(AudioFrame* frame);
 
+  // Downmixes 4 channels |src_audio| to stereo |dst_audio|. This is an in-place
+  // operation, meaning |src_audio| and |dst_audio| may point to the same
+  // buffer.
+  static void QuadToStereo(const int16_t* src_audio,
+                           size_t samples_per_channel,
+                           int16_t* dst_audio);
+
+  // |frame.num_channels_| will be updated. This version checks that
+  // |num_channels_| is 4 channels.
+  static int QuadToStereo(AudioFrame* frame);
+
+  // Downmixes 4 channels |src_audio| to mono |dst_audio|. This is an in-place
+  // operation, meaning |src_audio| and |dst_audio| may point to the same
+  // buffer.
+  static void QuadToMono(const int16_t* src_audio,
+                         size_t samples_per_channel,
+                         int16_t* dst_audio);
+
+  // |frame.num_channels_| will be updated. This version checks that
+  // |num_channels_| is 4 channels.
+  static int QuadToMono(AudioFrame* frame);
+
+  // Downmixes |src_channels| |src_audio| to |dst_channels| |dst_audio|.
+  // This is an in-place operation, meaning |src_audio| and |dst_audio|
+  // may point to the same buffer. Supported channel combinations are
+  // Stereo to Mono, Quad to Mono, and Quad to Stereo.
+  static void DownmixChannels(const int16_t* src_audio,
+                              size_t src_channels,
+                              size_t samples_per_channel,
+                              size_t dst_channels,
+                              int16_t* dst_audio);
+
+  // |frame.num_channels_| will be updated. This version checks that
+  // |num_channels_| and |dst_channels| are valid and performs relevant
+  // downmix.  Supported channel combinations are Stereo to Mono, Quad to Mono,
+  // and Quad to Stereo.
+  static int DownmixChannels(size_t dst_channels, AudioFrame* frame);
+
   // Swap the left and right channels of |frame|. Fails silently if |frame| is
   // not stereo.
   static void SwapStereoChannels(AudioFrame* frame);
diff --git a/webrtc/audio/utility/audio_frame_operations_unittest.cc b/webrtc/audio/utility/audio_frame_operations_unittest.cc
index 36377bd..af8ae24 100644
--- a/webrtc/audio/utility/audio_frame_operations_unittest.cc
+++ b/webrtc/audio/utility/audio_frame_operations_unittest.cc
@@ -27,14 +27,27 @@
   AudioFrame frame_;
 };
 
-void SetFrameData(AudioFrame* frame, int16_t left, int16_t right) {
+void SetFrameData(int16_t ch1,
+                  int16_t ch2,
+                  int16_t ch3,
+                  int16_t ch4,
+                  AudioFrame* frame) {
+  for (size_t i = 0; i < frame->samples_per_channel_ * 4; i += 4) {
+    frame->data_[i] = ch1;
+    frame->data_[i + 1] = ch2;
+    frame->data_[i + 2] = ch3;
+    frame->data_[i + 3] = ch4;
+  }
+}
+
+void SetFrameData(int16_t left, int16_t right, AudioFrame* frame) {
   for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
     frame->data_[i] = left;
     frame->data_[i + 1] = right;
   }
 }
 
-void SetFrameData(AudioFrame* frame, int16_t data) {
+void SetFrameData(int16_t data, AudioFrame* frame) {
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
     frame->data_[i] = data;
   }
@@ -59,9 +72,9 @@
   frame->samples_per_channel_ = samples_per_channel;
   frame->num_channels_ = channels;
   if (channels == 2) {
-    SetFrameData(frame, left_data, right_data);
+    SetFrameData(left_data, right_data, frame);
   } else if (channels == 1) {
-    SetFrameData(frame, left_data);
+    SetFrameData(left_data, frame);
   }
 }
 
@@ -90,23 +103,33 @@
 
 TEST_F(AudioFrameOperationsTest, MonoToStereoSucceeds) {
   frame_.num_channels_ = 1;
-  SetFrameData(&frame_, 1);
-  AudioFrame temp_frame;
-  temp_frame.CopyFrom(frame_);
+  SetFrameData(1, &frame_);
+
   EXPECT_EQ(0, AudioFrameOperations::MonoToStereo(&frame_));
 
   AudioFrame stereo_frame;
   stereo_frame.samples_per_channel_ = 320;
   stereo_frame.num_channels_ = 2;
-  SetFrameData(&stereo_frame, 1, 1);
+  SetFrameData(1, 1, &stereo_frame);
   VerifyFramesAreEqual(stereo_frame, frame_);
+}
 
-  SetFrameData(&frame_, 0);
-  AudioFrameOperations::MonoToStereo(temp_frame.data_,
-                                     frame_.samples_per_channel_,
-                                     frame_.data_);
-  frame_.num_channels_ = 2;  // Need to set manually.
-  VerifyFramesAreEqual(stereo_frame, frame_);
+TEST_F(AudioFrameOperationsTest, MonoToStereoBufferSucceeds) {
+  AudioFrame target_frame;
+  frame_.num_channels_ = 1;
+  SetFrameData(4, &frame_);
+
+  target_frame.num_channels_ = 2;
+  target_frame.samples_per_channel_ = frame_.samples_per_channel_;
+
+  AudioFrameOperations::MonoToStereo(frame_.data_, frame_.samples_per_channel_,
+                                     target_frame.data_);
+
+  AudioFrame stereo_frame;
+  stereo_frame.samples_per_channel_ = 320;
+  stereo_frame.num_channels_ = 2;
+  SetFrameData(4, 4, &stereo_frame);
+  VerifyFramesAreEqual(stereo_frame, target_frame);
 }
 
 TEST_F(AudioFrameOperationsTest, StereoToMonoFailsWithBadParameters) {
@@ -115,43 +138,148 @@
 }
 
 TEST_F(AudioFrameOperationsTest, StereoToMonoSucceeds) {
-  SetFrameData(&frame_, 4, 2);
-  AudioFrame temp_frame;
-  temp_frame.CopyFrom(frame_);
+  SetFrameData(4, 2, &frame_);
   EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_));
 
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;
   mono_frame.num_channels_ = 1;
-  SetFrameData(&mono_frame, 3);
+  SetFrameData(3, &mono_frame);
   VerifyFramesAreEqual(mono_frame, frame_);
+}
 
-  SetFrameData(&frame_, 0);
-  AudioFrameOperations::StereoToMono(temp_frame.data_,
-                                     frame_.samples_per_channel_,
-                                     frame_.data_);
-  frame_.num_channels_ = 1;  // Need to set manually.
-  VerifyFramesAreEqual(mono_frame, frame_);
+TEST_F(AudioFrameOperationsTest, StereoToMonoBufferSucceeds) {
+  AudioFrame target_frame;
+  SetFrameData(4, 2, &frame_);
+
+  target_frame.num_channels_ = 1;
+  target_frame.samples_per_channel_ = frame_.samples_per_channel_;
+
+  AudioFrameOperations::StereoToMono(frame_.data_, frame_.samples_per_channel_,
+                                     target_frame.data_);
+
+  AudioFrame mono_frame;
+  mono_frame.samples_per_channel_ = 320;
+  mono_frame.num_channels_ = 1;
+  SetFrameData(3, &mono_frame);
+  VerifyFramesAreEqual(mono_frame, target_frame);
 }
 
 TEST_F(AudioFrameOperationsTest, StereoToMonoDoesNotWrapAround) {
-  SetFrameData(&frame_, -32768, -32768);
+  SetFrameData(-32768, -32768, &frame_);
   EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_));
 
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;
   mono_frame.num_channels_ = 1;
-  SetFrameData(&mono_frame, -32768);
+  SetFrameData(-32768, &mono_frame);
   VerifyFramesAreEqual(mono_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, QuadToMonoFailsWithBadParameters) {
+  frame_.num_channels_ = 1;
+  EXPECT_EQ(-1, AudioFrameOperations::QuadToMono(&frame_));
+  frame_.num_channels_ = 2;
+  EXPECT_EQ(-1, AudioFrameOperations::QuadToMono(&frame_));
+}
+
+TEST_F(AudioFrameOperationsTest, QuadToMonoSucceeds) {
+  frame_.num_channels_ = 4;
+  SetFrameData(4, 2, 6, 8, &frame_);
+
+  EXPECT_EQ(0, AudioFrameOperations::QuadToMono(&frame_));
+
+  AudioFrame mono_frame;
+  mono_frame.samples_per_channel_ = 320;
+  mono_frame.num_channels_ = 1;
+  SetFrameData(5, &mono_frame);
+  VerifyFramesAreEqual(mono_frame, frame_);
+}
+
+TEST_F(AudioFrameOperationsTest, QuadToMonoBufferSucceeds) {
+  AudioFrame target_frame;
+  frame_.num_channels_ = 4;
+  SetFrameData(4, 2, 6, 8, &frame_);
+
+  target_frame.num_channels_ = 1;
+  target_frame.samples_per_channel_ = frame_.samples_per_channel_;
+
+  AudioFrameOperations::QuadToMono(frame_.data_, frame_.samples_per_channel_,
+                                   target_frame.data_);
+  AudioFrame mono_frame;
+  mono_frame.samples_per_channel_ = 320;
+  mono_frame.num_channels_ = 1;
+  SetFrameData(5, &mono_frame);
+  VerifyFramesAreEqual(mono_frame, target_frame);
+}
+
+TEST_F(AudioFrameOperationsTest, QuadToMonoDoesNotWrapAround) {
+  frame_.num_channels_ = 4;
+  SetFrameData(-32768, -32768, -32768, -32768, &frame_);
+  EXPECT_EQ(0, AudioFrameOperations::QuadToMono(&frame_));
+
+  AudioFrame mono_frame;
+  mono_frame.samples_per_channel_ = 320;
+  mono_frame.num_channels_ = 1;
+  SetFrameData(-32768, &mono_frame);
+  VerifyFramesAreEqual(mono_frame, frame_);
+}
+
+TEST_F(AudioFrameOperationsTest, QuadToStereoFailsWithBadParameters) {
+  frame_.num_channels_ = 1;
+  EXPECT_EQ(-1, AudioFrameOperations::QuadToStereo(&frame_));
+  frame_.num_channels_ = 2;
+  EXPECT_EQ(-1, AudioFrameOperations::QuadToStereo(&frame_));
+}
+
+TEST_F(AudioFrameOperationsTest, QuadToStereoSucceeds) {
+  frame_.num_channels_ = 4;
+  SetFrameData(4, 2, 6, 8, &frame_);
+  EXPECT_EQ(0, AudioFrameOperations::QuadToStereo(&frame_));
+
+  AudioFrame stereo_frame;
+  stereo_frame.samples_per_channel_ = 320;
+  stereo_frame.num_channels_ = 2;
+  SetFrameData(3, 7, &stereo_frame);
+  VerifyFramesAreEqual(stereo_frame, frame_);
+}
+
+TEST_F(AudioFrameOperationsTest, QuadToStereoBufferSucceeds) {
+  AudioFrame target_frame;
+  frame_.num_channels_ = 4;
+  SetFrameData(4, 2, 6, 8, &frame_);
+
+  target_frame.num_channels_ = 2;
+  target_frame.samples_per_channel_ = frame_.samples_per_channel_;
+
+  AudioFrameOperations::QuadToStereo(frame_.data_, frame_.samples_per_channel_,
+                                     target_frame.data_);
+  AudioFrame stereo_frame;
+  stereo_frame.samples_per_channel_ = 320;
+  stereo_frame.num_channels_ = 2;
+  SetFrameData(3, 7, &stereo_frame);
+  VerifyFramesAreEqual(stereo_frame, target_frame);
+}
+
+TEST_F(AudioFrameOperationsTest, QuadToStereoDoesNotWrapAround) {
+  frame_.num_channels_ = 4;
+  SetFrameData(-32768, -32768, -32768, -32768, &frame_);
+  EXPECT_EQ(0, AudioFrameOperations::QuadToStereo(&frame_));
+
+  AudioFrame stereo_frame;
+  stereo_frame.samples_per_channel_ = 320;
+  stereo_frame.num_channels_ = 2;
+  SetFrameData(-32768, -32768, &stereo_frame);
+  VerifyFramesAreEqual(stereo_frame, frame_);
+}
+
 TEST_F(AudioFrameOperationsTest, SwapStereoChannelsSucceedsOnStereo) {
-  SetFrameData(&frame_, 0, 1);
+  SetFrameData(0, 1, &frame_);
 
   AudioFrame swapped_frame;
   swapped_frame.samples_per_channel_ = 320;
   swapped_frame.num_channels_ = 2;
-  SetFrameData(&swapped_frame, 1, 0);
+  SetFrameData(1, 0, &swapped_frame);
 
   AudioFrameOperations::SwapStereoChannels(&frame_);
   VerifyFramesAreEqual(swapped_frame, frame_);
@@ -160,7 +288,7 @@
 TEST_F(AudioFrameOperationsTest, SwapStereoChannelsFailsOnMono) {
   frame_.num_channels_ = 1;
   // Set data to "stereo", despite it being a mono frame.
-  SetFrameData(&frame_, 0, 1);
+  SetFrameData(0, 1, &frame_);
 
   AudioFrame orig_frame;
   orig_frame.CopyFrom(frame_);
@@ -170,24 +298,24 @@
 }
 
 TEST_F(AudioFrameOperationsTest, MuteDisabled) {
-  SetFrameData(&frame_, 1000, -1000);
+  SetFrameData(1000, -1000, &frame_);
   AudioFrameOperations::Mute(&frame_, false, false);
 
   AudioFrame muted_frame;
   muted_frame.samples_per_channel_ = 320;
   muted_frame.num_channels_ = 2;
-  SetFrameData(&muted_frame, 1000, -1000);
+  SetFrameData(1000, -1000, &muted_frame);
   VerifyFramesAreEqual(muted_frame, frame_);
 }
 
 TEST_F(AudioFrameOperationsTest, MuteEnabled) {
-  SetFrameData(&frame_, 1000, -1000);
+  SetFrameData(1000, -1000, &frame_);
   AudioFrameOperations::Mute(&frame_, true, true);
 
   AudioFrame muted_frame;
   muted_frame.samples_per_channel_ = 320;
   muted_frame.num_channels_ = 2;
-  SetFrameData(&muted_frame, 0, 0);
+  SetFrameData(0, 0, &muted_frame);
   VerifyFramesAreEqual(muted_frame, frame_);
 }
 
@@ -310,24 +438,24 @@
 
 // TODO(andrew): fix the wraparound bug. We should always saturate.
 TEST_F(AudioFrameOperationsTest, DISABLED_ScaleDoesNotWrapAround) {
-  SetFrameData(&frame_, 4000, -4000);
+  SetFrameData(4000, -4000, &frame_);
   EXPECT_EQ(0, AudioFrameOperations::Scale(10.0, 10.0, frame_));
 
   AudioFrame clipped_frame;
   clipped_frame.samples_per_channel_ = 320;
   clipped_frame.num_channels_ = 2;
-  SetFrameData(&clipped_frame, 32767, -32768);
+  SetFrameData(32767, -32768, &clipped_frame);
   VerifyFramesAreEqual(clipped_frame, frame_);
 }
 
 TEST_F(AudioFrameOperationsTest, ScaleSucceeds) {
-  SetFrameData(&frame_, 1, -1);
+  SetFrameData(1, -1, &frame_);
   EXPECT_EQ(0, AudioFrameOperations::Scale(2.0, 3.0, frame_));
 
   AudioFrame scaled_frame;
   scaled_frame.samples_per_channel_ = 320;
   scaled_frame.num_channels_ = 2;
-  SetFrameData(&scaled_frame, 2, -3);
+  SetFrameData(2, -3, &scaled_frame);
   VerifyFramesAreEqual(scaled_frame, frame_);
 }
 
@@ -338,30 +466,30 @@
 
 TEST_F(AudioFrameOperationsTest, ScaleWithSatDoesNotWrapAround) {
   frame_.num_channels_ = 1;
-  SetFrameData(&frame_, 4000);
+  SetFrameData(4000, &frame_);
   EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(10.0, frame_));
 
   AudioFrame clipped_frame;
   clipped_frame.samples_per_channel_ = 320;
   clipped_frame.num_channels_ = 1;
-  SetFrameData(&clipped_frame, 32767);
+  SetFrameData(32767, &clipped_frame);
   VerifyFramesAreEqual(clipped_frame, frame_);
 
-  SetFrameData(&frame_, -4000);
+  SetFrameData(-4000, &frame_);
   EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(10.0, frame_));
-  SetFrameData(&clipped_frame, -32768);
+  SetFrameData(-32768, &clipped_frame);
   VerifyFramesAreEqual(clipped_frame, frame_);
 }
 
 TEST_F(AudioFrameOperationsTest, ScaleWithSatSucceeds) {
   frame_.num_channels_ = 1;
-  SetFrameData(&frame_, 1);
+  SetFrameData(1, &frame_);
   EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(2.0, frame_));
 
   AudioFrame scaled_frame;
   scaled_frame.samples_per_channel_ = 320;
   scaled_frame.num_channels_ = 1;
-  SetFrameData(&scaled_frame, 2);
+  SetFrameData(2, &scaled_frame);
   VerifyFramesAreEqual(scaled_frame, frame_);
 }
 
@@ -379,10 +507,10 @@
   AudioFrame frame_to_add_to;
   frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_;
   frame_to_add_to.num_channels_ = frame_.num_channels_;
-  SetFrameData(&frame_to_add_to, 1000);
+  SetFrameData(1000, &frame_to_add_to);
 
   AudioFrameOperations::Add(frame_, &frame_to_add_to);
-  SetFrameData(&frame_, frame_.data_[0] + 1000);
+  SetFrameData(frame_.data_[0] + 1000, &frame_);
   VerifyFramesAreEqual(frame_, frame_to_add_to);
 }
 }  // namespace
diff --git a/webrtc/modules/audio_device/win/audio_device_core_win.cc b/webrtc/modules/audio_device/win/audio_device_core_win.cc
index 9a70239..111457b 100644
--- a/webrtc/modules/audio_device/win/audio_device_core_win.cc
+++ b/webrtc/modules/audio_device/win/audio_device_core_win.cc
@@ -524,6 +524,7 @@
     // list of number of channels to use on recording side
     _recChannelsPrioList[0] = 2;    // stereo is prio 1
     _recChannelsPrioList[1] = 1;    // mono is prio 2
+    _recChannelsPrioList[2] = 4;    // quad is prio 3
 
     // list of number of channels to use on playout side
     _playChannelsPrioList[0] = 2;    // stereo is prio 1
@@ -2531,7 +2532,7 @@
 
     HRESULT hr = S_OK;
     WAVEFORMATEX* pWfxIn = NULL;
-    WAVEFORMATEX Wfx = WAVEFORMATEX();
+    WAVEFORMATEXTENSIBLE Wfx = WAVEFORMATEXTENSIBLE();
     WAVEFORMATEX* pWfxClosestMatch = NULL;
 
     // Create COM object with IAudioClient interface.
@@ -2565,9 +2566,12 @@
     }
 
     // Set wave format
-    Wfx.wFormatTag = WAVE_FORMAT_PCM;
-    Wfx.wBitsPerSample = 16;
-    Wfx.cbSize = 0;
+    Wfx.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
+    Wfx.Format.wBitsPerSample = 16;
+    Wfx.Format.cbSize = 22;
+    Wfx.dwChannelMask = 0;
+    Wfx.Samples.wValidBitsPerSample = Wfx.Format.wBitsPerSample;
+    Wfx.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
 
     const int freqs[6] = {48000, 44100, 16000, 96000, 32000, 8000};
     hr = S_FALSE;
@@ -2577,16 +2581,18 @@
     {
         for (unsigned int chan = 0; chan < sizeof(_recChannelsPrioList)/sizeof(_recChannelsPrioList[0]); chan++)
         {
-            Wfx.nChannels = _recChannelsPrioList[chan];
-            Wfx.nSamplesPerSec = freqs[freq];
-            Wfx.nBlockAlign = Wfx.nChannels * Wfx.wBitsPerSample / 8;
-            Wfx.nAvgBytesPerSec = Wfx.nSamplesPerSec * Wfx.nBlockAlign;
+            Wfx.Format.nChannels = _recChannelsPrioList[chan];
+            Wfx.Format.nSamplesPerSec = freqs[freq];
+            Wfx.Format.nBlockAlign = Wfx.Format.nChannels *
+                                     Wfx.Format.wBitsPerSample / 8;
+            Wfx.Format.nAvgBytesPerSec = Wfx.Format.nSamplesPerSec *
+                                         Wfx.Format.nBlockAlign;
             // If the method succeeds and the audio endpoint device supports the specified stream format,
             // it returns S_OK. If the method succeeds and provides a closest match to the specified format,
             // it returns S_FALSE.
             hr = _ptrClientIn->IsFormatSupported(
                                   AUDCLNT_SHAREMODE_SHARED,
-                                  &Wfx,
+                                  (WAVEFORMATEX*)&Wfx,
                                   &pWfxClosestMatch);
             if (hr == S_OK)
             {
@@ -2595,7 +2601,7 @@
             else
             {
                 WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "nChannels=%d, nSamplesPerSec=%d is not supported",
-                    Wfx.nChannels, Wfx.nSamplesPerSec);
+                    Wfx.Format.nChannels, Wfx.Format.nSamplesPerSec);
             }
         }
         if (hr == S_OK)
@@ -2604,19 +2610,20 @@
 
     if (hr == S_OK)
     {
-        _recAudioFrameSize = Wfx.nBlockAlign;
-        _recSampleRate = Wfx.nSamplesPerSec;
-        _recBlockSize = Wfx.nSamplesPerSec/100;
-        _recChannels = Wfx.nChannels;
+        _recAudioFrameSize = Wfx.Format.nBlockAlign;
+        _recSampleRate = Wfx.Format.nSamplesPerSec;
+        _recBlockSize = Wfx.Format.nSamplesPerSec/100;
+        _recChannels = Wfx.Format.nChannels;
 
         WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "VoE selected this capturing format:");
-        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "wFormatTag        : 0x%X (%u)", Wfx.wFormatTag, Wfx.wFormatTag);
-        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "nChannels         : %d", Wfx.nChannels);
-        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "nSamplesPerSec    : %d", Wfx.nSamplesPerSec);
-        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "nAvgBytesPerSec   : %d", Wfx.nAvgBytesPerSec);
-        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "nBlockAlign       : %d", Wfx.nBlockAlign);
-        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "wBitsPerSample    : %d", Wfx.wBitsPerSample);
-        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "cbSize            : %d", Wfx.cbSize);
+        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "wFormatTag        : 0x%X (%u)", Wfx.Format.wFormatTag,
+                                                                                          Wfx.Format.wFormatTag);
+        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "nChannels         : %d", Wfx.Format.nChannels);
+        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "nSamplesPerSec    : %d", Wfx.Format.nSamplesPerSec);
+        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "nAvgBytesPerSec   : %d", Wfx.Format.nAvgBytesPerSec);
+        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "nBlockAlign       : %d", Wfx.Format.nBlockAlign);
+        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "wBitsPerSample    : %d", Wfx.Format.wBitsPerSample);
+        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "cbSize            : %d", Wfx.Format.cbSize);
         WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "Additional settings:");
         WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "_recAudioFrameSize: %d", _recAudioFrameSize);
         WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "_recBlockSize     : %d", _recBlockSize);
@@ -2630,7 +2637,7 @@
                           AUDCLNT_STREAMFLAGS_NOPERSIST,        // volume and mute settings for an audio session will not persist across system restarts
                           0,                                    // required for event-driven shared mode
                           0,                                    // periodicity
-                          &Wfx,                                 // selected wave format
+                          (WAVEFORMATEX*)&Wfx,                  // selected wave format
                           NULL);                                // session GUID
 
 
diff --git a/webrtc/modules/audio_device/win/audio_device_core_win.h b/webrtc/modules/audio_device/win/audio_device_core_win.h
index 0ff2474..5e813cc 100644
--- a/webrtc/modules/audio_device/win/audio_device_core_win.h
+++ b/webrtc/modules/audio_device/win/audio_device_core_win.h
@@ -332,7 +332,7 @@
     UINT64                                  _readSamples;
     uint32_t                          _sndCardRecDelay;
 
-    uint16_t                          _recChannelsPrioList[2];
+    uint16_t                          _recChannelsPrioList[3];
     uint16_t                          _playChannelsPrioList[2];
 
     LARGE_INTEGER                           _perfCounterFreq;
diff --git a/webrtc/voice_engine/utility.cc b/webrtc/voice_engine/utility.cc
index 595c711..720817f 100644
--- a/webrtc/voice_engine/utility.cc
+++ b/webrtc/voice_engine/utility.cc
@@ -41,14 +41,20 @@
                       AudioFrame* dst_frame) {
   const int16_t* audio_ptr = src_data;
   size_t audio_ptr_num_channels = num_channels;
-  int16_t mono_audio[AudioFrame::kMaxDataSizeSamples];
+  int16_t downsampled_audio[AudioFrame::kMaxDataSizeSamples];
 
   // Downmix before resampling.
-  if (num_channels == 2 && dst_frame->num_channels_ == 1) {
-    AudioFrameOperations::StereoToMono(src_data, samples_per_channel,
-                                       mono_audio);
-    audio_ptr = mono_audio;
-    audio_ptr_num_channels = 1;
+  if (num_channels > dst_frame->num_channels_) {
+    RTC_DCHECK(num_channels == 2 || num_channels == 4)
+        << "num_channels: " << num_channels;
+    RTC_DCHECK(dst_frame->num_channels_ == 1 || dst_frame->num_channels_ == 2)
+        << "dst_frame->num_channels_: " << dst_frame->num_channels_;
+
+    AudioFrameOperations::DownmixChannels(
+        src_data, num_channels, samples_per_channel, dst_frame->num_channels_,
+        downsampled_audio);
+    audio_ptr = downsampled_audio;
+    audio_ptr_num_channels = dst_frame->num_channels_;
   }
 
   if (resampler->InitializeIfNeeded(sample_rate_hz, dst_frame->sample_rate_hz_,
diff --git a/webrtc/voice_engine/utility_unittest.cc b/webrtc/voice_engine/utility_unittest.cc
index ecd0baa..94abc0f 100644
--- a/webrtc/voice_engine/utility_unittest.cc
+++ b/webrtc/voice_engine/utility_unittest.cc
@@ -16,6 +16,7 @@
 #include "webrtc/test/gtest.h"
 #include "webrtc/voice_engine/utility.h"
 #include "webrtc/voice_engine/voice_engine_defines.h"
+#include "webrtc/base/arraysize.h"
 
 namespace webrtc {
 namespace voe {
@@ -45,29 +46,31 @@
 // Sets the signal value to increase by |data| with every sample. Floats are
 // used so non-integer values result in rounding error, but not an accumulating
 // error.
-void SetMonoFrame(AudioFrame* frame, float data, int sample_rate_hz) {
+void SetMonoFrame(float data, int sample_rate_hz, AudioFrame* frame) {
   memset(frame->data_, 0, sizeof(frame->data_));
   frame->num_channels_ = 1;
   frame->sample_rate_hz_ = sample_rate_hz;
-  frame->samples_per_channel_ = sample_rate_hz / 100;
+  frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
     frame->data_[i] = static_cast<int16_t>(data * i);
   }
 }
 
 // Keep the existing sample rate.
-void SetMonoFrame(AudioFrame* frame, float data) {
-  SetMonoFrame(frame, data, frame->sample_rate_hz_);
+void SetMonoFrame(float data, AudioFrame* frame) {
+  SetMonoFrame(data, frame->sample_rate_hz_, frame);
 }
 
 // Sets the signal value to increase by |left| and |right| with every sample in
 // each channel respectively.
-void SetStereoFrame(AudioFrame* frame, float left, float right,
-                    int sample_rate_hz) {
+void SetStereoFrame(float left,
+                    float right,
+                    int sample_rate_hz,
+                    AudioFrame* frame) {
   memset(frame->data_, 0, sizeof(frame->data_));
   frame->num_channels_ = 2;
   frame->sample_rate_hz_ = sample_rate_hz;
-  frame->samples_per_channel_ = sample_rate_hz / 100;
+  frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
     frame->data_[i * 2] = static_cast<int16_t>(left * i);
     frame->data_[i * 2 + 1] = static_cast<int16_t>(right * i);
@@ -75,8 +78,28 @@
 }
 
 // Keep the existing sample rate.
-void SetStereoFrame(AudioFrame* frame, float left, float right) {
-  SetStereoFrame(frame, left, right, frame->sample_rate_hz_);
+void SetStereoFrame(float left, float right, AudioFrame* frame) {
+  SetStereoFrame(left, right, frame->sample_rate_hz_, frame);
+}
+
+// Sets the signal value to increase by |ch1|, |ch2|, |ch3|, |ch4| with every
+// sample in each channel respectively.
+void SetQuadFrame(float ch1,
+                  float ch2,
+                  float ch3,
+                  float ch4,
+                  int sample_rate_hz,
+                  AudioFrame* frame) {
+  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->num_channels_ = 4;
+  frame->sample_rate_hz_ = sample_rate_hz;
+  frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  for (size_t i = 0; i < frame->samples_per_channel_; i++) {
+    frame->data_[i * 4] = static_cast<int16_t>(ch1 * i);
+    frame->data_[i * 4 + 1] = static_cast<int16_t>(ch2 * i);
+    frame->data_[i * 4 + 2] = static_cast<int16_t>(ch3 * i);
+    frame->data_[i * 4 + 3] = static_cast<int16_t>(ch4 * i);
+  }
 }
 
 void VerifyParams(const AudioFrame& ref_frame, const AudioFrame& test_frame) {
@@ -128,30 +151,45 @@
                                   int dst_channels,
                                   int dst_sample_rate_hz) {
   PushResampler<int16_t> resampler;  // Create a new one with every test.
-  const int16_t kSrcLeft = 30;  // Shouldn't overflow for any used sample rate.
-  const int16_t kSrcRight = 15;
+  const int16_t kSrcCh1 = 30;  // Shouldn't overflow for any used sample rate.
+  const int16_t kSrcCh2 = 15;
+  const int16_t kSrcCh3 = 22;
+  const int16_t kSrcCh4 = 8;
   const float resampling_factor = (1.0 * src_sample_rate_hz) /
       dst_sample_rate_hz;
-  const float dst_left = resampling_factor * kSrcLeft;
-  const float dst_right = resampling_factor * kSrcRight;
-  const float dst_mono = (dst_left + dst_right) / 2;
+  const float dst_ch1 = resampling_factor * kSrcCh1;
+  const float dst_ch2 = resampling_factor * kSrcCh2;
+  const float dst_ch3 = resampling_factor * kSrcCh3;
+  const float dst_ch4 = resampling_factor * kSrcCh4;
+  const float dst_stereo_to_mono = (dst_ch1 + dst_ch2) / 2;
+  const float dst_quad_to_mono = (dst_ch1 + dst_ch2 + dst_ch3 + dst_ch4) / 4;
+  const float dst_quad_to_stereo_ch1 = (dst_ch1 + dst_ch2) / 2;
+  const float dst_quad_to_stereo_ch2 = (dst_ch3 + dst_ch4) / 2;
   if (src_channels == 1)
-    SetMonoFrame(&src_frame_, kSrcLeft, src_sample_rate_hz);
+    SetMonoFrame(kSrcCh1, src_sample_rate_hz, &src_frame_);
+  else if (src_channels == 2)
+    SetStereoFrame(kSrcCh1, kSrcCh2, src_sample_rate_hz, &src_frame_);
   else
-    SetStereoFrame(&src_frame_, kSrcLeft, kSrcRight, src_sample_rate_hz);
+    SetQuadFrame(kSrcCh1, kSrcCh2, kSrcCh3, kSrcCh4, src_sample_rate_hz,
+                 &src_frame_);
 
   if (dst_channels == 1) {
-    SetMonoFrame(&dst_frame_, 0, dst_sample_rate_hz);
+    SetMonoFrame(0, dst_sample_rate_hz, &dst_frame_);
     if (src_channels == 1)
-      SetMonoFrame(&golden_frame_, dst_left, dst_sample_rate_hz);
+      SetMonoFrame(dst_ch1, dst_sample_rate_hz, &golden_frame_);
+    else if (src_channels == 2)
+      SetMonoFrame(dst_stereo_to_mono, dst_sample_rate_hz, &golden_frame_);
     else
-      SetMonoFrame(&golden_frame_, dst_mono, dst_sample_rate_hz);
+      SetMonoFrame(dst_quad_to_mono, dst_sample_rate_hz, &golden_frame_);
   } else {
-    SetStereoFrame(&dst_frame_, 0, 0, dst_sample_rate_hz);
+    SetStereoFrame(0, 0, dst_sample_rate_hz, &dst_frame_);
     if (src_channels == 1)
-      SetStereoFrame(&golden_frame_, dst_left, dst_left, dst_sample_rate_hz);
+      SetStereoFrame(dst_ch1, dst_ch1, dst_sample_rate_hz, &golden_frame_);
+    else if (src_channels == 2)
+      SetStereoFrame(dst_ch1, dst_ch2, dst_sample_rate_hz, &golden_frame_);
     else
-      SetStereoFrame(&golden_frame_, dst_left, dst_right, dst_sample_rate_hz);
+      SetStereoFrame(dst_quad_to_stereo_ch1, dst_quad_to_stereo_ch2,
+                     dst_sample_rate_hz, &golden_frame_);
   }
 
   // The sinc resampler has a known delay, which we compute here. Multiplying by
@@ -176,45 +214,50 @@
 
 TEST_F(UtilityTest, RemixAndResampleCopyFrameSucceeds) {
   // Stereo -> stereo.
-  SetStereoFrame(&src_frame_, 10, 10);
-  SetStereoFrame(&dst_frame_, 0, 0);
+  SetStereoFrame(10, 10, &src_frame_);
+  SetStereoFrame(0, 0, &dst_frame_);
   RemixAndResample(src_frame_, &resampler_, &dst_frame_);
   VerifyFramesAreEqual(src_frame_, dst_frame_);
 
   // Mono -> mono.
-  SetMonoFrame(&src_frame_, 20);
-  SetMonoFrame(&dst_frame_, 0);
+  SetMonoFrame(20, &src_frame_);
+  SetMonoFrame(0, &dst_frame_);
   RemixAndResample(src_frame_, &resampler_, &dst_frame_);
   VerifyFramesAreEqual(src_frame_, dst_frame_);
 }
 
 TEST_F(UtilityTest, RemixAndResampleMixingOnlySucceeds) {
   // Stereo -> mono.
-  SetStereoFrame(&dst_frame_, 0, 0);
-  SetMonoFrame(&src_frame_, 10);
-  SetStereoFrame(&golden_frame_, 10, 10);
+  SetStereoFrame(0, 0, &dst_frame_);
+  SetMonoFrame(10, &src_frame_);
+  SetStereoFrame(10, 10, &golden_frame_);
   RemixAndResample(src_frame_, &resampler_, &dst_frame_);
   VerifyFramesAreEqual(dst_frame_, golden_frame_);
 
   // Mono -> stereo.
-  SetMonoFrame(&dst_frame_, 0);
-  SetStereoFrame(&src_frame_, 10, 20);
-  SetMonoFrame(&golden_frame_, 15);
+  SetMonoFrame(0, &dst_frame_);
+  SetStereoFrame(10, 20, &src_frame_);
+  SetMonoFrame(15, &golden_frame_);
   RemixAndResample(src_frame_, &resampler_, &dst_frame_);
   VerifyFramesAreEqual(golden_frame_, dst_frame_);
 }
 
 TEST_F(UtilityTest, RemixAndResampleSucceeds) {
   const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000};
-  const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
-  const int kChannels[] = {1, 2};
-  const int kChannelsSize = sizeof(kChannels) / sizeof(*kChannels);
+  const int kSampleRatesSize = arraysize(kSampleRates);
+  const int kSrcChannels[] = {1, 2, 4};
+  const int kSrcChannelsSize = arraysize(kSrcChannels);
+  const int kDstChannels[] = {1, 2};
+  const int kDstChannelsSize = arraysize(kDstChannels);
+
   for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) {
     for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) {
-      for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
-        for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
-          RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
-                          kChannels[dst_channel], kSampleRates[dst_rate]);
+      for (int src_channel = 0; src_channel < kSrcChannelsSize;
+           src_channel++) {
+        for (int dst_channel = 0; dst_channel < kDstChannelsSize;
+             dst_channel++) {
+          RunResampleTest(kSrcChannels[src_channel], kSampleRates[src_rate],
+                          kDstChannels[dst_channel], kSampleRates[dst_rate]);
         }
       }
     }