Hide Av1 specfic logic from RtpVideoReceiver into depacketizer interface.

Bug: None
Change-Id: I0498d9e82cbc876d54bebc7f3265e3ae6da61614
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/171062
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30872}
diff --git a/modules/rtp_rtcp/BUILD.gn b/modules/rtp_rtcp/BUILD.gn
index 2826d0f..997cacc 100644
--- a/modules/rtp_rtcp/BUILD.gn
+++ b/modules/rtp_rtcp/BUILD.gn
@@ -215,6 +215,7 @@
     "source/ulpfec_receiver_impl.cc",
     "source/ulpfec_receiver_impl.h",
     "source/video_fec_generator.h",
+    "source/video_rtp_depacketizer.cc",
     "source/video_rtp_depacketizer.h",
     "source/video_rtp_depacketizer_av1.cc",
     "source/video_rtp_depacketizer_av1.h",
diff --git a/modules/rtp_rtcp/source/rtp_packetizer_av1_unittest.cc b/modules/rtp_rtcp/source/rtp_packetizer_av1_unittest.cc
index 5930f4c..0529e98 100644
--- a/modules/rtp_rtcp/source/rtp_packetizer_av1_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_packetizer_av1_unittest.cc
@@ -104,7 +104,7 @@
   for (size_t i = 0; i < rtp_payloads.size(); ++i) {
     payloads[i] = rtp_payloads[i];
   }
-  return Av1Frame(VideoRtpDepacketizerAv1::AssembleFrame(payloads));
+  return Av1Frame(VideoRtpDepacketizerAv1().AssembleFrame(payloads));
 }
 
 class Obu {
diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer.cc b/modules/rtp_rtcp/source/video_rtp_depacketizer.cc
new file mode 100644
index 0000000..bb0bf09
--- /dev/null
+++ b/modules/rtp_rtcp/source/video_rtp_depacketizer.cc
@@ -0,0 +1,42 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/rtp_rtcp/source/video_rtp_depacketizer.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "api/array_view.h"
+#include "api/scoped_refptr.h"
+#include "api/video/encoded_image.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+rtc::scoped_refptr<EncodedImageBuffer> VideoRtpDepacketizer::AssembleFrame(
+    rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads) {
+  size_t frame_size = 0;
+  for (rtc::ArrayView<const uint8_t> payload : rtp_payloads) {
+    frame_size += payload.size();
+  }
+
+  rtc::scoped_refptr<EncodedImageBuffer> bitstream =
+      EncodedImageBuffer::Create(frame_size);
+
+  uint8_t* write_at = bitstream->data();
+  for (rtc::ArrayView<const uint8_t> payload : rtp_payloads) {
+    memcpy(write_at, payload.data(), payload.size());
+    write_at += payload.size();
+  }
+  RTC_DCHECK_EQ(write_at - bitstream->data(), bitstream->size());
+  return bitstream;
+}
+
+}  // namespace webrtc
diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer.h b/modules/rtp_rtcp/source/video_rtp_depacketizer.h
index 0420e4e..2266120 100644
--- a/modules/rtp_rtcp/source/video_rtp_depacketizer.h
+++ b/modules/rtp_rtcp/source/video_rtp_depacketizer.h
@@ -11,7 +11,12 @@
 #ifndef MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H_
 #define MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H_
 
+#include <stdint.h>
+
 #include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "api/scoped_refptr.h"
+#include "api/video/encoded_image.h"
 #include "modules/rtp_rtcp/source/rtp_video_header.h"
 #include "rtc_base/copy_on_write_buffer.h"
 
@@ -27,6 +32,8 @@
   virtual ~VideoRtpDepacketizer() = default;
   virtual absl::optional<ParsedRtpPayload> Parse(
       rtc::CopyOnWriteBuffer rtp_payload) = 0;
+  virtual rtc::scoped_refptr<EncodedImageBuffer> AssembleFrame(
+      rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads);
 };
 
 }  // namespace webrtc
diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h b/modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h
index 9758d84..ac8c7e6 100644
--- a/modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h
+++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h
@@ -30,8 +30,9 @@
   VideoRtpDepacketizerAv1& operator=(const VideoRtpDepacketizerAv1&) = delete;
   ~VideoRtpDepacketizerAv1() override = default;
 
-  static rtc::scoped_refptr<EncodedImageBuffer> AssembleFrame(
-      rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads);
+  rtc::scoped_refptr<EncodedImageBuffer> AssembleFrame(
+      rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads)
+      override;
 
   absl::optional<ParsedRtpPayload> Parse(
       rtc::CopyOnWriteBuffer rtp_payload) override;
diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_av1_unittest.cc b/modules/rtp_rtcp/source/video_rtp_depacketizer_av1_unittest.cc
index 9a3e1f7..e9ad1a1 100644
--- a/modules/rtp_rtcp/source/video_rtp_depacketizer_av1_unittest.cc
+++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_av1_unittest.cc
@@ -118,7 +118,7 @@
                               0b0'0110'000,  // /  Frame
                               20, 30, 40};   // \  OBU
   rtc::ArrayView<const uint8_t> payloads[] = {payload1};
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   rtc::ArrayView<const uint8_t> frame_view(*frame);
   EXPECT_TRUE(frame_view[0] & kObuHeaderHasSize);
@@ -133,7 +133,7 @@
                               30,
                               40};  // \  obu_payload
   rtc::ArrayView<const uint8_t> payloads[] = {payload1};
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   rtc::ArrayView<const uint8_t> frame_view(*frame);
   EXPECT_TRUE(frame_view[0] & kObuHeaderHasSize);
@@ -147,7 +147,7 @@
                               0b010'01'000,           // | extension_header
                               20,           30, 40};  // \  OBU
   rtc::ArrayView<const uint8_t> payloads[] = {payload1};
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   rtc::ArrayView<const uint8_t> frame_view(*frame);
   EXPECT_TRUE(frame_view[0] & kObuHeaderHasSize);
@@ -164,7 +164,7 @@
                               30,
                               40};  // \  obu_payload
   rtc::ArrayView<const uint8_t> payloads[] = {payload1};
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   rtc::ArrayView<const uint8_t> frame_view(*frame);
   EXPECT_TRUE(frame_view[0] & kObuHeaderHasSize);
@@ -176,7 +176,7 @@
                               0b0'0110'000,  // /  Frame
                               20};           // \  OBU
   rtc::ArrayView<const uint8_t> payloads[] = {payload1};
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   EXPECT_THAT(rtc::ArrayView<const uint8_t>(*frame),
               ElementsAre(0b0'0110'010, 1, 20));
@@ -190,7 +190,7 @@
                               0b0'0110'000,  // /  Frame
                               20};           // \  OBU
   rtc::ArrayView<const uint8_t> payloads[] = {payload1};
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   EXPECT_THAT(rtc::ArrayView<const uint8_t>(*frame),
               ElementsAre(0b0'0001'010, 1, 10,    // Sequence Header OBU
@@ -203,7 +203,7 @@
   const uint8_t payload2[] = {0b10'01'0000,  // aggregation header
                               40};
   rtc::ArrayView<const uint8_t> payloads[] = {payload1, payload2};
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   EXPECT_THAT(rtc::ArrayView<const uint8_t>(*frame),
               ElementsAre(0b0'0110'010, 3, 20, 30, 40));
@@ -220,7 +220,7 @@
   const uint8_t payload2[] = {0b10'01'0000,  // aggregation header
                               40};           //
   rtc::ArrayView<const uint8_t> payloads[] = {payload1, payload2};
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   EXPECT_THAT(rtc::ArrayView<const uint8_t>(*frame),
               ElementsAre(0b0'0001'010, 1, 10,            // SH
@@ -251,7 +251,7 @@
                               70, 80, 90};   // \  tail of the frame OBU
 
   rtc::ArrayView<const uint8_t> payloads[] = {payload1, payload2};
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   EXPECT_THAT(rtc::ArrayView<const uint8_t>(*frame),
               ElementsAre(  // Sequence header OBU
@@ -276,7 +276,7 @@
 
   rtc::ArrayView<const uint8_t> payloads[] = {payload1, payload2, payload3,
                                               payload4};
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   EXPECT_THAT(rtc::ArrayView<const uint8_t>(*frame),
               ElementsAre(0b0'0110'010, 8, 11, 12, 13, 14, 15, 16, 17, 18));
@@ -308,7 +308,7 @@
                               33, 34, 35, 36};
   rtc::ArrayView<const uint8_t> payloads[] = {payload1, payload2, payload3,
                                               payload4};
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   EXPECT_THAT(rtc::ArrayView<const uint8_t>(*frame),
               ElementsAre(0b0'0011'010, 2, 11, 12,  // Frame header
@@ -327,7 +327,7 @@
   payload1[3] = 0b0'0110'000;  // obu_header with size and extension bits unset.
   payload1[4 + 42] = 0x42;
   rtc::ArrayView<const uint8_t> payloads[] = {payload1};
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   EXPECT_EQ(frame->size(), 2 + 127u);
   rtc::ArrayView<const uint8_t> frame_view(*frame);
@@ -352,7 +352,7 @@
   payload2[2 + 20] = 0x20;
 
   rtc::ArrayView<const uint8_t> payloads[] = {payload1, payload2};
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   EXPECT_EQ(frame->size(), 3 + 128u);
   rtc::ArrayView<const uint8_t> frame_view(*frame);
@@ -370,7 +370,7 @@
   const uint8_t payload2[] = {0b10'01'0000, 0b0'0110'000, 10, 20, 30};
   rtc::ArrayView<const uint8_t> payloads[] = {payload1, payload2};
 
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   EXPECT_THAT(rtc::ArrayView<const uint8_t>(*frame),
               ElementsAre(0b0'0110'010, 3, 10, 20, 30));
@@ -382,7 +382,7 @@
   const uint8_t payload2[] = {0b10'01'0000};
   rtc::ArrayView<const uint8_t> payloads[] = {payload1, payload2};
 
-  auto frame = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
+  auto frame = VideoRtpDepacketizerAv1().AssembleFrame(payloads);
   ASSERT_TRUE(frame);
   EXPECT_THAT(rtc::ArrayView<const uint8_t>(*frame),
               ElementsAre(0b0'0110'010, 3, 10, 20, 30));
diff --git a/test/fuzzers/rtp_depacketizer_av1_assemble_frame_fuzzer.cc b/test/fuzzers/rtp_depacketizer_av1_assemble_frame_fuzzer.cc
index 6c6b1d3..168e7b6 100644
--- a/test/fuzzers/rtp_depacketizer_av1_assemble_frame_fuzzer.cc
+++ b/test/fuzzers/rtp_depacketizer_av1_assemble_frame_fuzzer.cc
@@ -34,6 +34,6 @@
     rtp_payloads.push_back(fuzz_input.ReadByteArray(next_size));
   }
   // Run code under test.
-  VideoRtpDepacketizerAv1::AssembleFrame(rtp_payloads);
+  VideoRtpDepacketizerAv1().AssembleFrame(rtp_payloads);
 }
 }  // namespace webrtc
diff --git a/video/rtp_video_stream_receiver.cc b/video/rtp_video_stream_receiver.cc
index df060cf..4a2eb8d 100644
--- a/video/rtp_video_stream_receiver.cc
+++ b/video/rtp_video_stream_receiver.cc
@@ -36,7 +36,6 @@
 #include "modules/rtp_rtcp/source/rtp_packet_received.h"
 #include "modules/rtp_rtcp/source/rtp_rtcp_config.h"
 #include "modules/rtp_rtcp/source/video_rtp_depacketizer.h"
-#include "modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h"
 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_raw.h"
 #include "modules/utility/include/process_thread.h"
 #include "modules/video_coding/frame_object.h"
@@ -712,7 +711,6 @@
   int max_nack_count;
   int64_t min_recv_time;
   int64_t max_recv_time;
-  int frame_size;
   std::vector<rtc::ArrayView<const uint8_t>> payloads;
   RtpPacketInfos::vector_type packet_infos;
 
@@ -726,7 +724,6 @@
       max_nack_count = packet->times_nacked;
       min_recv_time = packet->packet_info.receive_time_ms();
       max_recv_time = packet->packet_info.receive_time_ms();
-      frame_size = packet->video_payload.size();
       payloads.clear();
       packet_infos.clear();
     } else {
@@ -735,31 +732,22 @@
           std::min(min_recv_time, packet->packet_info.receive_time_ms());
       max_recv_time =
           std::max(max_recv_time, packet->packet_info.receive_time_ms());
-      frame_size += packet->video_payload.size();
     }
     payloads.emplace_back(packet->video_payload);
     packet_infos.push_back(packet->packet_info);
 
     frame_boundary = packet->is_last_packet_in_frame();
     if (packet->is_last_packet_in_frame()) {
-      rtc::scoped_refptr<EncodedImageBuffer> bitstream;
-      // TODO(danilchap): Hide codec-specific code paths behind an interface.
-      if (first_packet->codec() == VideoCodecType::kVideoCodecAV1) {
-        bitstream = VideoRtpDepacketizerAv1::AssembleFrame(payloads);
-        if (!bitstream) {
-          // Failed to assemble a frame. Discard and continue.
-          continue;
-        }
-      } else {
-        bitstream = EncodedImageBuffer::Create(frame_size);
+      auto depacketizer_it = payload_type_map_.find(first_packet->payload_type);
+      RTC_CHECK(depacketizer_it != payload_type_map_.end());
 
-        uint8_t* write_at = bitstream->data();
-        for (rtc::ArrayView<const uint8_t> payload : payloads) {
-          memcpy(write_at, payload.data(), payload.size());
-          write_at += payload.size();
-        }
-        RTC_DCHECK_EQ(write_at - bitstream->data(), bitstream->size());
+      rtc::scoped_refptr<EncodedImageBuffer> bitstream =
+          depacketizer_it->second->AssembleFrame(payloads);
+      if (!bitstream) {
+        // Failed to assemble a frame. Discard and continue.
+        continue;
       }
+
       const video_coding::PacketBuffer::Packet& last_packet = *packet;
       OnAssembledFrame(std::make_unique<video_coding::RtpFrameObject>(
           first_packet->seq_num,                    //