Use AV1 packetizer/depacketizer for AV1 bitstreams

Bug: webrtc:11042
Change-Id: Ibf45a99d8016dccbe109d946ac967efa927312e4
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/161011
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29953}
diff --git a/modules/rtp_rtcp/source/rtp_format.cc b/modules/rtp_rtcp/source/rtp_format.cc
index 47838cb..2448f82 100644
--- a/modules/rtp_rtcp/source/rtp_format.cc
+++ b/modules/rtp_rtcp/source/rtp_format.cc
@@ -13,10 +13,12 @@
 #include <memory>
 
 #include "absl/types/variant.h"
+#include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h"
 #include "modules/rtp_rtcp/source/rtp_format_h264.h"
 #include "modules/rtp_rtcp/source/rtp_format_video_generic.h"
 #include "modules/rtp_rtcp/source/rtp_format_vp8.h"
 #include "modules/rtp_rtcp/source/rtp_format_vp9.h"
+#include "modules/rtp_rtcp/source/rtp_packetizer_av1.h"
 #include "modules/video_coding/codecs/h264/include/h264_globals.h"
 #include "modules/video_coding/codecs/vp8/include/vp8_globals.h"
 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
@@ -54,6 +56,9 @@
           absl::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header);
       return std::make_unique<RtpPacketizerVp9>(payload, limits, vp9);
     }
+    case kVideoCodecAV1:
+      return std::make_unique<RtpPacketizerAv1>(payload, limits,
+                                                rtp_video_header.frame_type);
     default: {
       return std::make_unique<RtpPacketizerGeneric>(payload, limits,
                                                     rtp_video_header);
@@ -151,6 +156,8 @@
       return new RtpDepacketizerVp8();
     case kVideoCodecVP9:
       return new RtpDepacketizerVp9();
+    case kVideoCodecAV1:
+      return new RtpDepacketizerAv1();
     default:
       return new RtpDepacketizerGeneric(/*generic_header_enabled=*/true);
   }
diff --git a/modules/rtp_rtcp/source/rtp_sender_video.cc b/modules/rtp_rtcp/source/rtp_sender_video.cc
index 9592b06..5f7735e 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_video.cc
@@ -366,10 +366,12 @@
           continue;
       }
     }
-    RTC_DCHECK_GE(packetized_payload_size, unpacketized_payload_size);
-    packetization_overhead_bitrate_.Update(
-        packetized_payload_size - unpacketized_payload_size,
-        clock_->TimeInMilliseconds());
+    // AV1 packetizer may produce less packetized bytes than unpacketized.
+    if (packetized_payload_size >= unpacketized_payload_size) {
+      packetization_overhead_bitrate_.Update(
+          packetized_payload_size - unpacketized_payload_size,
+          clock_->TimeInMilliseconds());
+    }
   }
 
   rtp_sender_->EnqueuePackets(std::move(packets));
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index 71e14fa..ceee019 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -136,6 +136,7 @@
     "../../api/video:video_bitrate_allocator",
     "../../api/video:video_frame",
     "../../api/video:video_frame_i420",
+    "../../api/video:video_frame_type",
     "../../api/video:video_rtp_headers",
     "../../api/video_codecs:video_codecs_api",
     "../../common_video",
@@ -156,6 +157,7 @@
     "../../rtc_base/third_party/base64",
     "../../rtc_base/time:timestamp_extrapolator",
     "../../system_wrappers",
+    "../rtp_rtcp",
     "../rtp_rtcp:rtp_rtcp_format",
     "//third_party/abseil-cpp/absl/container:inlined_vector",
     "//third_party/abseil-cpp/absl/types:optional",
diff --git a/modules/video_coding/packet_buffer.cc b/modules/video_coding/packet_buffer.cc
index 3e8b11f..30dfc21 100644
--- a/modules/video_coding/packet_buffer.cc
+++ b/modules/video_coding/packet_buffer.cc
@@ -22,7 +22,9 @@
 #include "api/array_view.h"
 #include "api/rtp_packet_info.h"
 #include "api/video/encoded_frame.h"
+#include "api/video/video_frame_type.h"
 #include "common_video/h264/h264_common.h"
+#include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h"
 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
 #include "modules/rtp_rtcp/source/rtp_packet_received.h"
 #include "modules/rtp_rtcp/source/rtp_video_header.h"
@@ -398,7 +400,12 @@
         }
       }
 
-      found_frames.push_back(AssembleFrame(start_seq_num, seq_num));
+      if (auto frame = AssembleFrame(start_seq_num, seq_num)) {
+        found_frames.push_back(std::move(frame));
+      } else {
+        RTC_LOG(LS_ERROR) << "Failed to assemble frame from packets "
+                          << start_seq_num << "-" << seq_num;
+      }
 
       missing_packets_.erase(missing_packets_.begin(),
                              missing_packets_.upper_bound(seq_num));
@@ -437,16 +444,25 @@
     packet_infos.push_back(packet.packet_info);
   }
 
-  auto bitstream = EncodedImageBuffer::Create(frame_size);
-
-  uint8_t* write_at = bitstream->data();
-  for (rtc::ArrayView<const uint8_t> payload : payloads) {
-    memcpy(write_at, payload.data(), payload.size());
-    write_at += payload.size();
-  }
-  RTC_DCHECK_EQ(write_at - bitstream->data(), bitstream->size());
-
   const Packet& first_packet = GetPacket(first_seq_num);
+  rtc::scoped_refptr<EncodedImageBuffer> bitstream;
+  // TODO(danilchap): Hide codec-specific code paths behind an interface.
+  if (first_packet.codec() == VideoCodecType::kVideoCodecAV1) {
+    bitstream = RtpDepacketizerAv1::AssembleFrame(payloads);
+    if (!bitstream) {
+      // Failed to assemble a frame. Discard and continue.
+      return nullptr;
+    }
+  } else {
+    bitstream = EncodedImageBuffer::Create(frame_size);
+
+    uint8_t* write_at = bitstream->data();
+    for (rtc::ArrayView<const uint8_t> payload : payloads) {
+      memcpy(write_at, payload.data(), payload.size());
+      write_at += payload.size();
+    }
+    RTC_DCHECK_EQ(write_at - bitstream->data(), bitstream->size());
+  }
   const Packet& last_packet = GetPacket(last_seq_num);
   return std::make_unique<RtpFrameObject>(
       first_seq_num,                            //
diff --git a/modules/video_coding/packet_buffer_unittest.cc b/modules/video_coding/packet_buffer_unittest.cc
index 3385f78..b146853 100644
--- a/modules/video_coding/packet_buffer_unittest.cc
+++ b/modules/video_coding/packet_buffer_unittest.cc
@@ -444,6 +444,74 @@
               ElementsAreArray(expected));
 }
 
+TEST_F(PacketBufferTest, GetBitstreamAv1) {
+  const uint8_t data1[] = {0b01'01'0000, 0b0'0100'000, 'm', 'a', 'n', 'y', ' '};
+  const uint8_t data2[] = {0b10'01'0000, 'b', 'i', 't', 's', 0};
+
+  uint8_t* new_data1 = new uint8_t[sizeof(data1)];
+  memcpy(new_data1, data1, sizeof(data1));
+  uint8_t* new_data2 = new uint8_t[sizeof(data2)];
+  memcpy(new_data2, data2, sizeof(data2));
+
+  PacketBuffer::Packet packet1;
+  packet1.video_header.codec = kVideoCodecAV1;
+  packet1.seq_num = 13;
+  packet1.video_header.is_first_packet_in_frame = true;
+  packet1.video_header.is_last_packet_in_frame = false;
+  packet1.size_bytes = sizeof(data1);
+  packet1.data = new_data1;
+  auto frames = packet_buffer_.InsertPacket(&packet1).frames;
+  EXPECT_THAT(frames, IsEmpty());
+
+  PacketBuffer::Packet packet2;
+  packet2.video_header.codec = kVideoCodecAV1;
+  packet2.seq_num = 14;
+  packet2.video_header.is_first_packet_in_frame = false;
+  packet2.video_header.is_last_packet_in_frame = true;
+  packet2.size_bytes = sizeof(data2);
+  packet2.data = new_data2;
+  frames = packet_buffer_.InsertPacket(&packet2).frames;
+
+  ASSERT_THAT(frames, SizeIs(1));
+  EXPECT_EQ(frames[0]->first_seq_num(), 13);
+  EXPECT_THAT(rtc::MakeArrayView(frames[0]->data(), 2),
+              ElementsAre(0b0'0100'010, 10));  // obu_header and obu_size.
+  EXPECT_THAT(rtc::MakeArrayView(frames[0]->data() + 2, frames[0]->size() - 2),
+              ElementsAreArray("many bits"));
+}
+
+TEST_F(PacketBufferTest, GetBitstreamInvalidAv1) {
+  // Two av1 payloads that can't be combined into proper frame.
+  const uint8_t data1[] = {0b01'01'0000, 0b0'0100'000, 'm', 'a', 'n', 'y', ' '};
+  const uint8_t data2[] = {0b00'01'0000, 'b', 'i', 't', 's', 0};
+
+  uint8_t* new_data1 = new uint8_t[sizeof(data1)];
+  memcpy(new_data1, data1, sizeof(data1));
+  uint8_t* new_data2 = new uint8_t[sizeof(data2)];
+  memcpy(new_data2, data2, sizeof(data2));
+
+  PacketBuffer::Packet packet1;
+  packet1.video_header.codec = kVideoCodecAV1;
+  packet1.seq_num = 13;
+  packet1.video_header.is_first_packet_in_frame = true;
+  packet1.video_header.is_last_packet_in_frame = false;
+  packet1.size_bytes = sizeof(data1);
+  packet1.data = new_data1;
+  auto frames = packet_buffer_.InsertPacket(&packet1).frames;
+  EXPECT_THAT(frames, IsEmpty());
+
+  PacketBuffer::Packet packet2;
+  packet2.video_header.codec = kVideoCodecAV1;
+  packet2.seq_num = 14;
+  packet2.video_header.is_first_packet_in_frame = false;
+  packet2.video_header.is_last_packet_in_frame = true;
+  packet2.size_bytes = sizeof(data2);
+  packet2.data = new_data2;
+  frames = packet_buffer_.InsertPacket(&packet2).frames;
+
+  EXPECT_THAT(frames, IsEmpty());
+}
+
 TEST_F(PacketBufferTest, InsertPacketAfterSequenceNumberWrapAround) {
   uint16_t kFirstSeqNum = 0;
   uint32_t kTimestampDelta = 100;