Use H26xPacketBuffer for H.264 and H.265 packets.

This CL updates RtpVideoStreamReceiver2 to use H26xPacketBuffer for
H.264 and H.265 packets. H.264 specific fixes are moved to
H26xPacketBuffer as well.

H26xPacketBuffer is behind field trial WebRTC-Video-H26xPacketBuffer.

Bug: webrtc:13485
Change-Id: I1874c5a624b94c2d75ce607cf10c939619d7b5b9
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/346280
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#42062}
diff --git a/experiments/field_trials.py b/experiments/field_trials.py
index 58f6a47..10c94e6 100755
--- a/experiments/field_trials.py
+++ b/experiments/field_trials.py
@@ -150,6 +150,9 @@
     FieldTrial('WebRTC-ZeroHertzQueueOverload',
                'webrtc:332381',
                date(2024, 7, 1)),
+    FieldTrial('WebRTC-Video-H26xPacketBuffer',
+               'webrtc:13485',
+               date(2024, 6, 1)),
     # keep-sorted end
 ])  # yapf: disable
 
diff --git a/video/BUILD.gn b/video/BUILD.gn
index 9f20746..cf724b7 100644
--- a/video/BUILD.gn
+++ b/video/BUILD.gn
@@ -129,6 +129,7 @@
     "../modules/rtp_rtcp:rtp_video_header",
     "../modules/video_coding",
     "../modules/video_coding:h264_sprop_parameter_sets",
+    "../modules/video_coding:h26x_packet_buffer",
     "../modules/video_coding:nack_requester",
     "../modules/video_coding:packet_buffer",
     "../modules/video_coding:video_codec_interface",
@@ -902,6 +903,7 @@
       "../modules/video_coding",
       "../modules/video_coding:codec_globals_headers",
       "../modules/video_coding:encoded_frame",
+      "../modules/video_coding:h26x_packet_buffer",
       "../modules/video_coding:packet_buffer",
       "../modules/video_coding:video_codec_interface",
       "../modules/video_coding:video_coding_utility",
diff --git a/video/rtp_video_stream_receiver2.cc b/video/rtp_video_stream_receiver2.cc
index 6b41acc..8fef92c 100644
--- a/video/rtp_video_stream_receiver2.cc
+++ b/video/rtp_video_stream_receiver2.cc
@@ -362,6 +362,7 @@
   if (codec_params.count(cricket::kH264FmtpSpsPpsIdrInKeyframe) > 0 ||
       field_trials_.IsEnabled("WebRTC-SpsPpsIdrIsH264Keyframe")) {
     packet_buffer_.ForceSpsPpsIdrIsH264Keyframe();
+    sps_pps_idr_is_h264_keyframe_ = true;
   }
   payload_type_map_.emplace(
       payload_type, raw_payload ? std::make_unique<VideoRtpDepacketizerRaw>()
@@ -369,39 +370,13 @@
   pt_codec_params_.emplace(payload_type, codec_params);
 }
 
-void RtpVideoStreamReceiver2::RemoveReceiveCodec(uint8_t payload_type) {
-  RTC_DCHECK_RUN_ON(&packet_sequence_checker_);
-  auto codec_params_it = pt_codec_params_.find(payload_type);
-  if (codec_params_it == pt_codec_params_.end())
-    return;
-
-  const bool sps_pps_idr_in_key_frame =
-      codec_params_it->second.count(cricket::kH264FmtpSpsPpsIdrInKeyframe) > 0;
-
-  pt_codec_params_.erase(codec_params_it);
-  payload_type_map_.erase(payload_type);
-
-  if (sps_pps_idr_in_key_frame) {
-    bool reset_setting = true;
-    for (auto& [unused, codec_params] : pt_codec_params_) {
-      if (codec_params.count(cricket::kH264FmtpSpsPpsIdrInKeyframe) > 0) {
-        reset_setting = false;
-        break;
-      }
-    }
-
-    if (reset_setting) {
-      packet_buffer_.ResetSpsPpsIdrIsH264Keyframe();
-    }
-  }
-}
-
 void RtpVideoStreamReceiver2::RemoveReceiveCodecs() {
   RTC_DCHECK_RUN_ON(&packet_sequence_checker_);
 
   pt_codec_params_.clear();
   payload_type_map_.clear();
   packet_buffer_.ResetSpsPpsIdrIsH264Keyframe();
+  h26x_packet_buffer_.reset();
 }
 
 absl::optional<Syncable::Info> RtpVideoStreamReceiver2::GetSyncInfo() const {
@@ -676,7 +651,9 @@
       last_payload_type_ = packet->payload_type;
       InsertSpsPpsIntoTracker(packet->payload_type);
     }
+  }
 
+  if (packet->codec() == kVideoCodecH264 && !h26x_packet_buffer_) {
     video_coding::H264SpsPpsTracker::FixedBitstream fixed =
         tracker_.CopyAndFixBitstream(
             rtc::MakeArrayView(codec_payload.cdata(), codec_payload.size()),
@@ -700,7 +677,14 @@
 
   rtcp_feedback_buffer_.SendBufferedRtcpFeedback();
   frame_counter_.Add(packet->timestamp);
-  OnInsertedPacket(packet_buffer_.InsertPacket(std::move(packet)));
+
+  if ((packet->codec() == kVideoCodecH264 ||
+       packet->codec() == kVideoCodecH265) &&
+      h26x_packet_buffer_) {
+    OnInsertedPacket(h26x_packet_buffer_->InsertPacket(std::move(packet)));
+  } else {
+    OnInsertedPacket(packet_buffer_.InsertPacket(std::move(packet)));
+  }
   return false;
 }
 
@@ -1254,6 +1238,13 @@
 
 void RtpVideoStreamReceiver2::StartReceive() {
   RTC_DCHECK_RUN_ON(&packet_sequence_checker_);
+  // |h26x_packet_buffer_| is created here instead of in the ctor because we
+  // need to know the value of |sps_pps_id_is_h264_keyframe_|.
+  if (field_trials_.IsEnabled("WebRTC-Video-H26xPacketBuffer") &&
+      !h26x_packet_buffer_) {
+    h26x_packet_buffer_ =
+        std::make_unique<H26xPacketBuffer>(!sps_pps_idr_is_h264_keyframe_);
+  }
   if (!receiving_ && packet_router_) {
     // Change REMB candidate egibility.
     packet_router_->RemoveReceiveRtpModule(rtp_rtcp_.get());
@@ -1298,6 +1289,10 @@
 
   tracker_.InsertSpsPpsNalus(sprop_decoder.sps_nalu(),
                              sprop_decoder.pps_nalu());
+
+  if (h26x_packet_buffer_) {
+    h26x_packet_buffer_->SetSpropParameterSets(sprop_base64_it->second);
+  }
 }
 
 void RtpVideoStreamReceiver2::UpdatePacketReceiveTimestamps(
diff --git a/video/rtp_video_stream_receiver2.h b/video/rtp_video_stream_receiver2.h
index d436aa3..1621283 100644
--- a/video/rtp_video_stream_receiver2.h
+++ b/video/rtp_video_stream_receiver2.h
@@ -39,6 +39,7 @@
 #include "modules/rtp_rtcp/source/rtp_video_stream_receiver_frame_transformer_delegate.h"
 #include "modules/rtp_rtcp/source/video_rtp_depacketizer.h"
 #include "modules/video_coding/h264_sps_pps_tracker.h"
+#include "modules/video_coding/h26x_packet_buffer.h"
 #include "modules/video_coding/loss_notification_controller.h"
 #include "modules/video_coding/nack_requester.h"
 #include "modules/video_coding/packet_buffer.h"
@@ -104,7 +105,6 @@
                        VideoCodecType video_codec,
                        const webrtc::CodecParameterMap& codec_params,
                        bool raw_payload);
-  void RemoveReceiveCodec(uint8_t payload_type);
 
   // Clears state for all receive codecs added via `AddReceiveCodec`.
   void RemoveReceiveCodecs();
@@ -360,6 +360,10 @@
 
   video_coding::PacketBuffer packet_buffer_
       RTC_GUARDED_BY(packet_sequence_checker_);
+  // h26x_packet_buffer_ is nullptr if codec list doens't contain H.264 or
+  // H.265, or field trial WebRTC-Video-H26xPacketBuffer is not enabled.
+  std::unique_ptr<H26xPacketBuffer> h26x_packet_buffer_
+      RTC_GUARDED_BY(packet_sequence_checker_);
   UniqueTimestampCounter frame_counter_
       RTC_GUARDED_BY(packet_sequence_checker_);
   SeqNumUnwrapper<uint16_t> frame_id_unwrapper_
@@ -438,6 +442,7 @@
 
   Timestamp next_keyframe_request_for_missing_video_structure_ =
       Timestamp::MinusInfinity();
+  bool sps_pps_idr_is_h264_keyframe_ = false;
 };
 
 }  // namespace webrtc
diff --git a/video/rtp_video_stream_receiver2_unittest.cc b/video/rtp_video_stream_receiver2_unittest.cc
index 6962540..2d5956a 100644
--- a/video/rtp_video_stream_receiver2_unittest.cc
+++ b/video/rtp_video_stream_receiver2_unittest.cc
@@ -518,17 +518,28 @@
   RtpVideoStreamReceiver2TestH264() : RtpVideoStreamReceiver2Test(GetParam()) {}
 };
 
-INSTANTIATE_TEST_SUITE_P(SpsPpsIdrIsKeyframe,
+INSTANTIATE_TEST_SUITE_P(SpsPpsIdrIsKeyframeAndH26xPacketBuffer,
                          RtpVideoStreamReceiver2TestH264,
-                         Values("", "WebRTC-SpsPpsIdrIsH264Keyframe/Enabled/"));
+                         Values("",
+                                "WebRTC-SpsPpsIdrIsH264Keyframe/Enabled/",
+                                "WebRTC-Video-H26xPacketBuffer/Enabled/",
+                                "WebRTC-SpsPpsIdrIsH264Keyframe/Enabled/"
+                                "WebRTC-Video-H26xPacketBuffer/Enabled/"));
 
 TEST_P(RtpVideoStreamReceiver2TestH264, InBandSpsPps) {
+  constexpr int kH264PayloadType = 98;
+  webrtc::CodecParameterMap codec_params;
+  rtp_video_stream_receiver_->AddReceiveCodec(kH264PayloadType, kVideoCodecH264,
+                                              codec_params,
+                                              /*raw_payload=*/false);
+  rtp_video_stream_receiver_->StartReceive();
+
   rtc::CopyOnWriteBuffer sps_data;
   RtpPacketReceived rtp_packet;
   RTPVideoHeader sps_video_header = GetDefaultH264VideoHeader();
   AddSps(&sps_video_header, 0, &sps_data);
   rtp_packet.SetSequenceNumber(0);
-  rtp_packet.SetPayloadType(kPayloadType);
+  rtp_packet.SetPayloadType(kH264PayloadType);
   sps_video_header.is_first_packet_in_frame = true;
   sps_video_header.frame_type = VideoFrameType::kEmptyFrame;
   mock_on_complete_frame_callback_.AppendExpectedBitstream(
@@ -542,6 +553,7 @@
   RTPVideoHeader pps_video_header = GetDefaultH264VideoHeader();
   AddPps(&pps_video_header, 0, 1, &pps_data);
   rtp_packet.SetSequenceNumber(1);
+  rtp_packet.SetPayloadType(kH264PayloadType);
   pps_video_header.is_first_packet_in_frame = true;
   pps_video_header.frame_type = VideoFrameType::kEmptyFrame;
   mock_on_complete_frame_callback_.AppendExpectedBitstream(
@@ -555,6 +567,8 @@
   RTPVideoHeader idr_video_header = GetDefaultH264VideoHeader();
   AddIdr(&idr_video_header, 1);
   rtp_packet.SetSequenceNumber(2);
+  rtp_packet.SetPayloadType(kH264PayloadType);
+  rtp_packet.SetMarker(true);
   idr_video_header.is_first_packet_in_frame = true;
   idr_video_header.is_last_packet_in_frame = true;
   idr_video_header.frame_type = VideoFrameType::kVideoFrameKey;
@@ -579,6 +593,7 @@
   rtp_video_stream_receiver_->AddReceiveCodec(kPayloadType, kVideoCodecH264,
                                               codec_params,
                                               /*raw_payload=*/false);
+  rtp_video_stream_receiver_->StartReceive();
   const uint8_t binary_sps[] = {0x67, 0x42, 0x00, 0x0a, 0x96,
                                 0x53, 0x05, 0x89, 0x88};
   mock_on_complete_frame_callback_.AppendExpectedBitstream(
@@ -596,6 +611,7 @@
   AddIdr(&video_header, 0);
   rtp_packet.SetPayloadType(kPayloadType);
   rtp_packet.SetSequenceNumber(2);
+  rtp_packet.SetMarker(true);
   video_header.is_first_packet_in_frame = true;
   video_header.is_last_packet_in_frame = true;
   video_header.codec = kVideoCodecH264;
@@ -605,7 +621,13 @@
       kH264StartCode, sizeof(kH264StartCode));
   mock_on_complete_frame_callback_.AppendExpectedBitstream(data.data(),
                                                            data.size());
-  EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame(_));
+  // IDR frames without SPS/PPS are not returned by
+  // |H26xPacketBuffer.InsertPacket| until SPS and PPS are received when
+  // WebRTC-SpsPpsIdrIsH264Keyframe is enabled.
+  if (!field_trials_.IsEnabled("WebRTC-SpsPpsIdrIsH264Keyframe") ||
+      !field_trials_.IsEnabled("WebRTC-Video-H26xPacketBuffer")) {
+    EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame(_));
+  }
   rtp_video_stream_receiver_->OnReceivedPayloadData(data, rtp_packet,
                                                     video_header, 0);
 }
@@ -613,13 +635,14 @@
 TEST_P(RtpVideoStreamReceiver2TestH264, ForceSpsPpsIdrIsKeyframe) {
   constexpr int kPayloadType = 99;
   webrtc::CodecParameterMap codec_params;
-  if (GetParam() ==
-      "") {  // Forcing can be done either with field trial or codec_params.
+  // Forcing can be done either with field trial or codec_params.
+  if (!field_trials_.IsEnabled("WebRTC-SpsPpsIdrIsH264Keyframe")) {
     codec_params.insert({cricket::kH264FmtpSpsPpsIdrInKeyframe, ""});
   }
   rtp_video_stream_receiver_->AddReceiveCodec(kPayloadType, kVideoCodecH264,
                                               codec_params,
                                               /*raw_payload=*/false);
+  rtp_video_stream_receiver_->StartReceive();
   rtc::CopyOnWriteBuffer sps_data;
   RtpPacketReceived rtp_packet;
   RTPVideoHeader sps_video_header = GetDefaultH264VideoHeader();
@@ -652,6 +675,7 @@
   RTPVideoHeader idr_video_header = GetDefaultH264VideoHeader();
   AddIdr(&idr_video_header, 1);
   rtp_packet.SetSequenceNumber(2);
+  rtp_packet.SetMarker(true);
   idr_video_header.is_first_packet_in_frame = true;
   idr_video_header.is_last_packet_in_frame = true;
   idr_video_header.frame_type = VideoFrameType::kVideoFrameKey;
@@ -672,9 +696,16 @@
   mock_on_complete_frame_callback_.AppendExpectedBitstream(idr_data.data(),
                                                            idr_data.size());
   rtp_packet.SetSequenceNumber(3);
-  EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame)
-      .WillOnce(
-          [&](EncodedFrame* frame) { EXPECT_FALSE(frame->is_keyframe()); });
+  // IDR frames without SPS/PPS are not returned by
+  // |H26xPacketBuffer.InsertPacket| until SPS and PPS are received, while
+  // |PacketBuffer| returns it as a delta frame.
+  if (field_trials_.IsEnabled("WebRTC-Video-H26xPacketBuffer")) {
+    EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame).Times(0);
+  } else {
+    EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame)
+        .WillOnce(
+            [&](EncodedFrame* frame) { EXPECT_FALSE(frame->is_keyframe()); });
+  }
   rtp_video_stream_receiver_->OnReceivedPayloadData(idr_data, rtp_packet,
                                                     idr_video_header, 0);
 }
@@ -1333,4 +1364,73 @@
       received_packet.PayloadBuffer(), received_packet, video_header, 0);
 }
 
+#ifdef RTC_ENABLE_H265
+RTPVideoHeader GetDefaultH265VideoHeader() {
+  RTPVideoHeader video_header;
+  video_header.codec = kVideoCodecH265;
+  return video_header;
+}
+
+class RtpVideoStreamReceiver2TestH265 : public RtpVideoStreamReceiver2Test {
+ protected:
+  RtpVideoStreamReceiver2TestH265()
+      : RtpVideoStreamReceiver2Test("WebRTC-Video-H26xPacketBuffer/Enabled/") {}
+};
+
+TEST_F(RtpVideoStreamReceiver2TestH265, H265Bitstream) {
+  constexpr int kH265PayloadType = 98;
+  webrtc::CodecParameterMap codec_params;
+  rtp_video_stream_receiver_->AddReceiveCodec(kH265PayloadType, kVideoCodecH265,
+                                              codec_params,
+                                              /*raw_payload=*/false);
+  rtp_video_stream_receiver_->StartReceive();
+
+  // Data is generated by WebCodecs H265 encoder, with 720p fake media capturer.
+  // IDR is not complete.
+  constexpr uint8_t vps[] = {0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0c, 0x01,
+                             0xff, 0xff, 0x21, 0x40, 0x00, 0x00, 0x03, 0x00,
+                             0x90, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00,
+                             0x7b, 0x3c, 0x0c, 0x00, 0x00, 0x03, 0x00, 0x04,
+                             0x00, 0x00, 0x03, 0x00, 0x79, 0x40};
+  constexpr uint8_t sps[] = {
+      0x00, 0x00, 0x00, 0x01, 0x42, 0x01, 0x01, 0x21, 0x40, 0x00, 0x00, 0x03,
+      0x00, 0x90, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x7b, 0xa0, 0x02,
+      0x80, 0x80, 0x2d, 0x16, 0x8f, 0x92, 0x46, 0xd9, 0x3f, 0xf6, 0x02, 0x80,
+      0x10, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x03, 0x01, 0xe6, 0x45,
+      0xde, 0xf7, 0xe0, 0x04, 0x65, 0x00, 0x23, 0x28, 0x80};
+  constexpr uint8_t pps[] = {0x00, 0x00, 0x00, 0x01, 0x44, 0x01,
+                             0xc0, 0x25, 0x64, 0xc0, 0xed, 0x90};
+  constexpr uint8_t idr[] = {0x00, 0x00, 0x00, 0x01, 0x26, 0x01, 0xaf,
+                             0xb0, 0x87, 0x11, 0x7a, 0xc1, 0x45, 0x57,
+                             0x3f, 0xff, 0x57, 0x14, 0x5f, 0xf7, 0x7a,
+                             0x37, 0xfd, 0xe3, 0xd9};
+
+  RtpPacketReceived rtp_packet;
+  rtp_packet.SetPayloadType(kPayloadType);
+  rtp_packet.SetSequenceNumber(0);
+  rtp_packet.SetPayloadType(kH265PayloadType);
+  RTPVideoHeader video_header = GetDefaultH265VideoHeader();
+  mock_on_complete_frame_callback_.AppendExpectedBitstream(vps, sizeof(vps));
+  rtp_video_stream_receiver_->OnReceivedPayloadData(
+      rtc::CopyOnWriteBuffer(vps, sizeof(vps)), rtp_packet, video_header, 0);
+
+  rtp_packet.SetSequenceNumber(1);
+  mock_on_complete_frame_callback_.AppendExpectedBitstream(sps, sizeof(sps));
+  rtp_video_stream_receiver_->OnReceivedPayloadData(
+      rtc::CopyOnWriteBuffer(sps, sizeof(sps)), rtp_packet, video_header, 0);
+
+  rtp_packet.SetSequenceNumber(2);
+  mock_on_complete_frame_callback_.AppendExpectedBitstream(pps, sizeof(pps));
+  rtp_video_stream_receiver_->OnReceivedPayloadData(
+      rtc::CopyOnWriteBuffer(pps, sizeof(pps)), rtp_packet, video_header, 0);
+
+  rtp_packet.SetSequenceNumber(3);
+  rtp_packet.SetMarker(true);
+  mock_on_complete_frame_callback_.AppendExpectedBitstream(idr, sizeof(idr));
+  EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame(_));
+  rtp_video_stream_receiver_->OnReceivedPayloadData(
+      rtc::CopyOnWriteBuffer(idr, sizeof(idr)), rtp_packet, video_header, 0);
+}
+#endif  // RTC_ENABLE_H265
+
 }  // namespace webrtc