Continue looking for frames after padding packets

In H264, reordered packets can cause a frame following padding to become stuck in the packet buffer.
A minimal example:
_, P, 1  - padding packet p and frame 1. Frame 1 has not been returned because of missing packet 0
0, P, 1  - when packet 0 arrives, FindFrames will stop incrementing i when it sees padding packet P, and frame 1 will never be returned

Bug: webrtc:14216
Change-Id: I78b76df9709fa8593c5025d647e2b868af3749f2
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/266465
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#37357}
diff --git a/modules/video_coding/packet_buffer.cc b/modules/video_coding/packet_buffer.cc
index adc18dd..477ba10 100644
--- a/modules/video_coding/packet_buffer.cc
+++ b/modules/video_coding/packet_buffer.cc
@@ -107,6 +107,10 @@
 
   UpdateMissingPackets(seq_num);
 
+  received_padding_.erase(
+      received_padding_.begin(),
+      received_padding_.lower_bound(seq_num - (buffer_.size() / 4)));
+
   result.packets = FindFrames(seq_num);
   return result;
 }
@@ -140,11 +144,11 @@
   first_seq_num_ = seq_num;
 
   is_cleared_to_first_seq_num_ = true;
-  auto clear_to_it = missing_packets_.upper_bound(seq_num);
-  if (clear_to_it != missing_packets_.begin()) {
-    --clear_to_it;
-    missing_packets_.erase(missing_packets_.begin(), clear_to_it);
-  }
+  missing_packets_.erase(missing_packets_.begin(),
+                         missing_packets_.lower_bound(seq_num));
+
+  received_padding_.erase(received_padding_.begin(),
+                          received_padding_.lower_bound(seq_num));
 }
 
 void PacketBuffer::Clear() {
@@ -154,6 +158,7 @@
 PacketBuffer::InsertResult PacketBuffer::InsertPadding(uint16_t seq_num) {
   PacketBuffer::InsertResult result;
   UpdateMissingPackets(seq_num);
+  received_padding_.insert(seq_num);
   result.packets = FindFrames(static_cast<uint16_t>(seq_num + 1));
   return result;
 }
@@ -171,6 +176,7 @@
   is_cleared_to_first_seq_num_ = false;
   newest_inserted_seq_num_.reset();
   missing_packets_.clear();
+  received_padding_.clear();
 }
 
 bool PacketBuffer::ExpandBufferSize() {
@@ -219,7 +225,18 @@
 std::vector<std::unique_ptr<PacketBuffer::Packet>> PacketBuffer::FindFrames(
     uint16_t seq_num) {
   std::vector<std::unique_ptr<PacketBuffer::Packet>> found_frames;
-  for (size_t i = 0; i < buffer_.size() && PotentialNewFrame(seq_num); ++i) {
+  auto start = seq_num;
+
+  for (size_t i = 0; i < buffer_.size(); ++i) {
+    if (received_padding_.find(seq_num) != received_padding_.end()) {
+      seq_num += 1;
+      continue;
+    }
+
+    if (!PotentialNewFrame(seq_num)) {
+      break;
+    }
+
     size_t index = seq_num % buffer_.size();
     buffer_[index]->continuous = true;
 
@@ -359,6 +376,8 @@
 
         missing_packets_.erase(missing_packets_.begin(),
                                missing_packets_.upper_bound(seq_num));
+        received_padding_.erase(received_padding_.lower_bound(start),
+                                received_padding_.upper_bound(seq_num));
       }
     }
     ++seq_num;
diff --git a/modules/video_coding/packet_buffer.h b/modules/video_coding/packet_buffer.h
index 51528a6..680955a 100644
--- a/modules/video_coding/packet_buffer.h
+++ b/modules/video_coding/packet_buffer.h
@@ -117,6 +117,8 @@
   absl::optional<uint16_t> newest_inserted_seq_num_;
   std::set<uint16_t, DescendingSeqNumComp<uint16_t>> missing_packets_;
 
+  std::set<uint16_t, DescendingSeqNumComp<uint16_t>> received_padding_;
+
   // Indicates if we should require SPS, PPS, and IDR for a particular
   // RTP timestamp to treat the corresponding frame as a keyframe.
   bool sps_pps_idr_is_h264_keyframe_;
diff --git a/modules/video_coding/packet_buffer_unittest.cc b/modules/video_coding/packet_buffer_unittest.cc
index 2ad7714..49afa14 100644
--- a/modules/video_coding/packet_buffer_unittest.cc
+++ b/modules/video_coding/packet_buffer_unittest.cc
@@ -719,6 +719,18 @@
   EXPECT_THAT(packet_buffer_.InsertPadding(1), StartSeqNumsAre(2));
 }
 
+TEST_P(PacketBufferH264ParameterizedTest, FindFramesOnReorderedPadding) {
+  EXPECT_THAT(InsertH264(0, kKeyFrame, kFirst, kLast, 1001),
+              StartSeqNumsAre(0));
+  EXPECT_THAT(InsertH264(1, kDeltaFrame, kFirst, kNotLast, 1002).packets,
+              IsEmpty());
+  EXPECT_THAT(packet_buffer_.InsertPadding(3).packets, IsEmpty());
+  EXPECT_THAT(InsertH264(4, kDeltaFrame, kFirst, kLast, 1003).packets,
+              IsEmpty());
+  EXPECT_THAT(InsertH264(2, kDeltaFrame, kNotFirst, kLast, 1002),
+              StartSeqNumsAre(1, 4));
+}
+
 class PacketBufferH264XIsKeyframeTest : public PacketBufferH264Test {
  protected:
   const uint16_t kSeqNum = 5;