Audio: Reduce max channels to 16 to prevent buffer overflow

Lowers kMaxNumberOfAudioChannels from 24 to 16 to fit within the statically allocated AudioFrame buffer (7680 max samples) when resampling at 48kHz (7680 = 16 channels * 480 samples). Also implies safe channel capping in resampler to prevent buffer overflows.

Bug: webrtc:495018167
Change-Id: I60b30832b34508deff036a3a63fcabac1a35c889
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/460680
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Commit-Queue: Tomas Gunnarsson <tommi@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#47284}
diff --git a/api/audio/audio_view.h b/api/audio/audio_view.h
index b35c95e..f3fcf8d 100644
--- a/api/audio/audio_view.h
+++ b/api/audio/audio_view.h
@@ -49,10 +49,9 @@
 
 // The maximum number of audio channels supported by WebRTC encoders, decoders
 // and the AudioFrame class.
-// TODO(peah, tommi): Should kMaxNumberOfAudioChannels be 16 rather than 24?
-// The reason is that AudioFrame's max number of samples is 7680, which can
-// hold 16 10ms 16bit channels at 48 kHz (and not 24 channels).
-static constexpr size_t kMaxNumberOfAudioChannels = 24;
+// AudioFrame's max number of samples is 7680, which can hold 16 10ms 16bit
+// channels at 48 kHz.
+static constexpr size_t kMaxNumberOfAudioChannels = 16;
 
 // InterleavedView<> is a view over an interleaved audio buffer (e.g. from
 // AudioFrame).
diff --git a/modules/audio_coding/acm2/acm_resampler.cc b/modules/audio_coding/acm2/acm_resampler.cc
index e9c2861..ddefdbe 100644
--- a/modules/audio_coding/acm2/acm_resampler.cc
+++ b/modules/audio_coding/acm2/acm_resampler.cc
@@ -17,6 +17,7 @@
 #include "absl/algorithm/container.h"
 #include "api/audio/audio_frame.h"
 #include "api/audio/audio_view.h"
+#include "api/audio/channel_layout.h"
 #include "audio/utility/audio_frame_operations.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
@@ -49,6 +50,9 @@
       RTC_LOG(LS_ERROR) << "AudioFrame cannot hold resampled data.";
       AudioFrameOperations::Mute(audio_frame);
       audio_frame->SetSampleRateAndChannelSize(desired_sample_rate_hz);
+      audio_frame->SetLayoutAndNumChannels(
+          CHANNEL_LAYOUT_UNSUPPORTED,
+          AudioFrame::kMaxDataSizeSamples / audio_frame->samples_per_channel());
       return false;
     }
   }
diff --git a/modules/audio_coding/acm2/acm_resampler_unittest.cc b/modules/audio_coding/acm2/acm_resampler_unittest.cc
index f597151..49713a9 100644
--- a/modules/audio_coding/acm2/acm_resampler_unittest.cc
+++ b/modules/audio_coding/acm2/acm_resampler_unittest.cc
@@ -24,30 +24,33 @@
   ResamplerHelper resampler;
   AudioFrame audio_frame;
 
-  // Create an audio frame that requires resampling from 32kHz to 48kHz
-  // with a very high number of channels (24).
-  const int kCurrentSampleRateHz = 32000;
-  const int kDesiredSampleRateHz = 48000;
-  const size_t kChannels = 24;
+  // Create an audio frame that requires resampling from 48kHz to 96kHz
+  // with a high number of channels (16) to exceed the buffer size.
+  const int kCurrentSampleRateHz = 48000;
+  const int kDesiredSampleRateHz = 96000;
+  const size_t kChannels = 16;
 
-  // 10 ms of data at 32kHz = 320 samples per channel.
-  std::vector<int16_t> dummy_data(320 * 24, 0);
-  audio_frame.UpdateFrame(0, dummy_data.data(), 320, kCurrentSampleRateHz,
+  // 10 ms of data at 48kHz = 480 samples per channel.
+  std::vector<int16_t> dummy_data(480 * 16, 0);
+  audio_frame.UpdateFrame(0, dummy_data.data(), 480, kCurrentSampleRateHz,
                           AudioFrame::kNormalSpeech, AudioFrame::kVadActive,
                           kChannels);
 
   // The resampler prime path will attempt to allocate a buffer that is
-  // kChannels * (kDesiredSampleRateHz / 100) = 24 * 480 = 11520 samples,
+  // kChannels * (kDesiredSampleRateHz / 100) = 16 * 960 = 15360 samples,
   // which exceeds AudioFrame::kMaxDataSizeSamples (7680).
   const bool resample_success =
       resampler.MaybeResample(kDesiredSampleRateHz, &audio_frame);
 
   // Verify that MaybeResample correctly detects the buffer size condition and
-  // safely aborts the operation by returning false and muting the frame.
+  // safely aborts the operation by returning false, muting the frame, and
+  // capping the channel count to avoid a buffer overflow in the muted data
+  // array.
   EXPECT_FALSE(resample_success);
   EXPECT_TRUE(audio_frame.muted());
   EXPECT_EQ(audio_frame.sample_rate_hz_, kDesiredSampleRateHz);
-  EXPECT_EQ(audio_frame.num_channels_, kChannels);
+  EXPECT_EQ(audio_frame.num_channels_, AudioFrame::kMaxDataSizeSamples /
+                                           audio_frame.samples_per_channel());
 }
 
 TEST(ResamplerHelperTest, MaybeResampleValidMaxSize) {
diff --git a/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc b/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc
index fe3adbd..0d678de 100644
--- a/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc
+++ b/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc
@@ -64,9 +64,9 @@
   const Environment env = CreateEnvironment();
   scoped_refptr<AudioDecoderFactory> adf = CreateBuiltinAudioDecoderFactory();
   ASSERT_TRUE(adf);
-  // L16 supports any clock rate and any number of channels up to 24.
+  // L16 supports any clock rate and any number of channels up to 16.
   const int clockrates[] = {8000, 16000, 32000, 48000};
-  const int num_channels[] = {1, 2, 3, 24};
+  const int num_channels[] = {1, 2, 3, 16};
   for (int clockrate : clockrates) {
     EXPECT_FALSE(
         adf->Create(env, SdpAudioFormat("l16", clockrate, 0), std::nullopt));
diff --git a/modules/audio_mixer/frame_combiner_unittest.cc b/modules/audio_mixer/frame_combiner_unittest.cc
index df01237..ae44046 100644
--- a/modules/audio_mixer/frame_combiner_unittest.cc
+++ b/modules/audio_mixer/frame_combiner_unittest.cc
@@ -19,7 +19,6 @@
 
 #include "api/array_view.h"
 #include "api/audio/audio_frame.h"
-#include "api/audio/audio_view.h"
 #include "api/audio/channel_layout.h"
 #include "api/rtp_packet_info.h"
 #include "api/rtp_packet_infos.h"
@@ -146,15 +145,12 @@
 TEST(FrameCombinerDeathTest, BuildCrashesWithManyChannels) {
   FrameCombiner combiner(true);
   for (const int rate : {8000, 18000, 34000, 48000}) {
-    for (const int number_of_channels : {10, 20, 21}) {
-      RTC_DCHECK_LE(number_of_channels, kMaxNumberOfAudioChannels);
+    for (const int number_of_channels : {10, 15, 17}) {
       if (static_cast<size_t>(rate / 100 * number_of_channels) >
           AudioFrame::kMaxDataSizeSamples) {
         continue;
       }
       const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
-      SetUpFrames(rate, number_of_channels);
-
       const int number_of_frames = 2;
       SCOPED_TRACE(
           ProduceDebugText(rate, number_of_channels, number_of_frames));
@@ -162,8 +158,11 @@
           all_frames.begin(), all_frames.begin() + number_of_frames);
       AudioFrame audio_frame_for_mixing;
       EXPECT_DEATH(
-          combiner.Combine(frames_to_combine, number_of_channels, rate,
-                           frames_to_combine.size(), &audio_frame_for_mixing),
+          {
+            SetUpFrames(rate, number_of_channels);
+            combiner.Combine(frames_to_combine, number_of_channels, rate,
+                             frames_to_combine.size(), &audio_frame_for_mixing);
+          },
           "");
     }
   }