Save unwrapped `tl0_pic_idx` for inserted VP9 frames.

As stashed frames are retried their `tl0_pic_idx` are again unwrapped which can lead to the `tl0_unwrapper_` to unwrap the `tl0_pic_idx` of newer frames backwards. Instead unwrap the `tl0_pid_idx` only once and save it with the frame if necessary.

In this CL
  - Only unwrap the TL0 once in ManageFrame.
  - Split ManageFrameInternal into ManageFrameFlexible and ManageFrameGof.
  - Save the unwrapped TL0 with the stashed frame.

Bug: none
Change-Id: I56e6b071c0082682e010c049c537d66060635567
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/253844
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#36146}
diff --git a/modules/video_coding/rtp_vp9_ref_finder.cc b/modules/video_coding/rtp_vp9_ref_finder.cc
index d4e3005..7a1f946 100644
--- a/modules/video_coding/rtp_vp9_ref_finder.cc
+++ b/modules/video_coding/rtp_vp9_ref_finder.cc
@@ -16,17 +16,44 @@
 #include "rtc_base/logging.h"
 
 namespace webrtc {
-
 RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame(
     std::unique_ptr<RtpFrameObject> frame) {
-  FrameDecision decision = ManageFrameInternal(frame.get());
+  const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
+      frame->GetRtpVideoHeader().video_type_header);
+
+  frame->SetSpatialIndex(codec_header.spatial_idx);
+  frame->SetId(codec_header.picture_id & (kFrameIdLength - 1));
+
+  FrameDecision decision;
+  if (codec_header.temporal_idx >= kMaxTemporalLayers ||
+      codec_header.spatial_idx >= kMaxSpatialLayers) {
+    decision = kDrop;
+  } else if (codec_header.flexible_mode) {
+    decision = ManageFrameFlexible(frame.get(), codec_header);
+  } else {
+    if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
+      RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
+                             "non-flexible mode.";
+      decision = kDrop;
+    } else {
+      int64_t unwrapped_tl0 =
+          tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
+      decision = ManageFrameGof(frame.get(), codec_header, unwrapped_tl0);
+
+      if (decision == kStash) {
+        if (stashed_frames_.size() > kMaxStashedFrames) {
+          stashed_frames_.pop_back();
+        }
+
+        stashed_frames_.push_front(
+            {.unwrapped_tl0 = unwrapped_tl0, .frame = std::move(frame)});
+      }
+    }
+  }
 
   RtpFrameReferenceFinder::ReturnVector res;
   switch (decision) {
     case kStash:
-      if (stashed_frames_.size() > kMaxStashedFrames)
-        stashed_frames_.pop_back();
-      stashed_frames_.push_front(std::move(frame));
       return res;
     case kHandOff:
       res.push_back(std::move(frame));
@@ -39,43 +66,28 @@
   return res;
 }
 
-RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameInternal(
-    RtpFrameObject* frame) {
-  const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
-  const RTPVideoHeaderVP9& codec_header =
-      absl::get<RTPVideoHeaderVP9>(video_header.video_type_header);
-
-  // Protect against corrupted packets with arbitrary large temporal idx.
-  if (codec_header.temporal_idx >= kMaxTemporalLayers ||
-      codec_header.spatial_idx >= kMaxSpatialLayers)
-    return kDrop;
-
-  frame->SetSpatialIndex(codec_header.spatial_idx);
-  frame->SetId(codec_header.picture_id & (kFrameIdLength - 1));
-
-  if (codec_header.flexible_mode) {
-    if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
-      return kDrop;
-    }
-    frame->num_references = codec_header.num_ref_pics;
-    for (size_t i = 0; i < frame->num_references; ++i) {
-      frame->references[i] =
-          Subtract<kFrameIdLength>(frame->Id(), codec_header.pid_diff[i]);
-    }
-
-    FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
-    return kHandOff;
-  }
-
-  if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
-    RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
-                           "non-flexible mode.";
+RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameFlexible(
+    RtpFrameObject* frame,
+    const RTPVideoHeaderVP9& codec_header) {
+  if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
     return kDrop;
   }
 
+  frame->num_references = codec_header.num_ref_pics;
+  for (size_t i = 0; i < frame->num_references; ++i) {
+    frame->references[i] =
+        Subtract<kFrameIdLength>(frame->Id(), codec_header.pid_diff[i]);
+  }
+
+  FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
+  return kHandOff;
+}
+
+RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameGof(
+    RtpFrameObject* frame,
+    const RTPVideoHeaderVP9& codec_header,
+    int64_t unwrapped_tl0) {
   GofInfo* info;
-  int64_t unwrapped_tl0 =
-      tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
   if (codec_header.ss_data_available) {
     if (codec_header.temporal_idx != 0) {
       RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
@@ -300,20 +312,23 @@
   bool complete_frame = false;
   do {
     complete_frame = false;
-    for (auto frame_it = stashed_frames_.begin();
-         frame_it != stashed_frames_.end();) {
-      FrameDecision decision = ManageFrameInternal(frame_it->get());
+    for (auto it = stashed_frames_.begin(); it != stashed_frames_.end();) {
+      const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
+          it->frame->GetRtpVideoHeader().video_type_header);
+      RTC_DCHECK(!codec_header.flexible_mode);
+      FrameDecision decision =
+          ManageFrameGof(it->frame.get(), codec_header, it->unwrapped_tl0);
 
       switch (decision) {
         case kStash:
-          ++frame_it;
+          ++it;
           break;
         case kHandOff:
           complete_frame = true;
-          res.push_back(std::move(*frame_it));
+          res.push_back(std::move(it->frame));
           [[fallthrough]];
         case kDrop:
-          frame_it = stashed_frames_.erase(frame_it);
+          it = stashed_frames_.erase(it);
       }
     }
   } while (complete_frame);
@@ -339,7 +354,7 @@
 void RtpVp9RefFinder::ClearTo(uint16_t seq_num) {
   auto it = stashed_frames_.begin();
   while (it != stashed_frames_.end()) {
-    if (AheadOf<uint16_t>(seq_num, (*it)->first_seq_num())) {
+    if (AheadOf<uint16_t>(seq_num, it->frame->first_seq_num())) {
       it = stashed_frames_.erase(it);
     } else {
       ++it;
diff --git a/modules/video_coding/rtp_vp9_ref_finder.h b/modules/video_coding/rtp_vp9_ref_finder.h
index 00de8ca..2971f68 100644
--- a/modules/video_coding/rtp_vp9_ref_finder.h
+++ b/modules/video_coding/rtp_vp9_ref_finder.h
@@ -48,7 +48,16 @@
     uint16_t last_picture_id;
   };
 
-  FrameDecision ManageFrameInternal(RtpFrameObject* frame);
+  struct UnwrappedTl0Frame {
+    int64_t unwrapped_tl0;
+    std::unique_ptr<RtpFrameObject> frame;
+  };
+
+  FrameDecision ManageFrameFlexible(RtpFrameObject* frame,
+                                    const RTPVideoHeaderVP9& vp9_header);
+  FrameDecision ManageFrameGof(RtpFrameObject* frame,
+                               const RTPVideoHeaderVP9& vp9_header,
+                               int64_t unwrapped_tl0);
   void RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector& res);
 
   bool MissingRequiredFrameVp9(uint16_t picture_id, const GofInfo& info);
@@ -62,7 +71,7 @@
 
   // Frames that have been fully received but didn't have all the information
   // needed to determine their references.
-  std::deque<std::unique_ptr<RtpFrameObject>> stashed_frames_;
+  std::deque<UnwrappedTl0Frame> stashed_frames_;
 
   // Where the current scalability structure is in the
   // `scalability_structures_` array.
diff --git a/modules/video_coding/rtp_vp9_ref_finder_unittest.cc b/modules/video_coding/rtp_vp9_ref_finder_unittest.cc
index 6de7ce1..66b284f 100644
--- a/modules/video_coding/rtp_vp9_ref_finder_unittest.cc
+++ b/modules/video_coding/rtp_vp9_ref_finder_unittest.cc
@@ -23,6 +23,7 @@
 using ::testing::MatchResultListener;
 using ::testing::Pointee;
 using ::testing::Property;
+using ::testing::SizeIs;
 using ::testing::UnorderedElementsAreArray;
 
 namespace webrtc {
@@ -702,4 +703,17 @@
               Contains(Pointee(Property(&EncodedFrame::SpatialIndex, 2))));
 }
 
+TEST_F(RtpVp9RefFinderTest, StashedFramesDoNotWrapTl0Backwards) {
+  GofInfoVP9 ss;
+  ss.SetGofInfoVP9(kTemporalStructureMode1);
+
+  Insert(Frame().Pid(0).SidAndTid(0, 0).Tl0(0));
+  EXPECT_THAT(frames_, SizeIs(0));
+
+  Insert(Frame().Pid(128).SidAndTid(0, 0).Tl0(128).AsKeyFrame().Gof(&ss));
+  EXPECT_THAT(frames_, SizeIs(1));
+  Insert(Frame().Pid(129).SidAndTid(0, 0).Tl0(129));
+  EXPECT_THAT(frames_, SizeIs(2));
+}
+
 }  // namespace webrtc