Save unwrapped `tl0_pic_idx` for inserted VP9 frames.
As stashed frames are retried their `tl0_pic_idx` are again unwrapped which can lead to the `tl0_unwrapper_` to unwrap the `tl0_pic_idx` of newer frames backwards. Instead unwrap the `tl0_pid_idx` only once and save it with the frame if necessary.
In this CL
- Only unwrap the TL0 once in ManageFrame.
- Split ManageFrameInternal into ManageFrameFlexible and ManageFrameGof.
- Save the unwrapped TL0 with the stashed frame.
Bug: none
Change-Id: I56e6b071c0082682e010c049c537d66060635567
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/253844
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#36146}
diff --git a/modules/video_coding/rtp_vp9_ref_finder.cc b/modules/video_coding/rtp_vp9_ref_finder.cc
index d4e3005..7a1f946 100644
--- a/modules/video_coding/rtp_vp9_ref_finder.cc
+++ b/modules/video_coding/rtp_vp9_ref_finder.cc
@@ -16,17 +16,44 @@
#include "rtc_base/logging.h"
namespace webrtc {
-
RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame(
std::unique_ptr<RtpFrameObject> frame) {
- FrameDecision decision = ManageFrameInternal(frame.get());
+ const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
+ frame->GetRtpVideoHeader().video_type_header);
+
+ frame->SetSpatialIndex(codec_header.spatial_idx);
+ frame->SetId(codec_header.picture_id & (kFrameIdLength - 1));
+
+ FrameDecision decision;
+ if (codec_header.temporal_idx >= kMaxTemporalLayers ||
+ codec_header.spatial_idx >= kMaxSpatialLayers) {
+ decision = kDrop;
+ } else if (codec_header.flexible_mode) {
+ decision = ManageFrameFlexible(frame.get(), codec_header);
+ } else {
+ if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
+ RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
+ "non-flexible mode.";
+ decision = kDrop;
+ } else {
+ int64_t unwrapped_tl0 =
+ tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
+ decision = ManageFrameGof(frame.get(), codec_header, unwrapped_tl0);
+
+ if (decision == kStash) {
+ if (stashed_frames_.size() > kMaxStashedFrames) {
+ stashed_frames_.pop_back();
+ }
+
+ stashed_frames_.push_front(
+ {.unwrapped_tl0 = unwrapped_tl0, .frame = std::move(frame)});
+ }
+ }
+ }
RtpFrameReferenceFinder::ReturnVector res;
switch (decision) {
case kStash:
- if (stashed_frames_.size() > kMaxStashedFrames)
- stashed_frames_.pop_back();
- stashed_frames_.push_front(std::move(frame));
return res;
case kHandOff:
res.push_back(std::move(frame));
@@ -39,43 +66,28 @@
return res;
}
-RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameInternal(
- RtpFrameObject* frame) {
- const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
- const RTPVideoHeaderVP9& codec_header =
- absl::get<RTPVideoHeaderVP9>(video_header.video_type_header);
-
- // Protect against corrupted packets with arbitrary large temporal idx.
- if (codec_header.temporal_idx >= kMaxTemporalLayers ||
- codec_header.spatial_idx >= kMaxSpatialLayers)
- return kDrop;
-
- frame->SetSpatialIndex(codec_header.spatial_idx);
- frame->SetId(codec_header.picture_id & (kFrameIdLength - 1));
-
- if (codec_header.flexible_mode) {
- if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
- return kDrop;
- }
- frame->num_references = codec_header.num_ref_pics;
- for (size_t i = 0; i < frame->num_references; ++i) {
- frame->references[i] =
- Subtract<kFrameIdLength>(frame->Id(), codec_header.pid_diff[i]);
- }
-
- FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
- return kHandOff;
- }
-
- if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
- RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
- "non-flexible mode.";
+RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameFlexible(
+ RtpFrameObject* frame,
+ const RTPVideoHeaderVP9& codec_header) {
+ if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
return kDrop;
}
+ frame->num_references = codec_header.num_ref_pics;
+ for (size_t i = 0; i < frame->num_references; ++i) {
+ frame->references[i] =
+ Subtract<kFrameIdLength>(frame->Id(), codec_header.pid_diff[i]);
+ }
+
+ FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
+ return kHandOff;
+}
+
+RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameGof(
+ RtpFrameObject* frame,
+ const RTPVideoHeaderVP9& codec_header,
+ int64_t unwrapped_tl0) {
GofInfo* info;
- int64_t unwrapped_tl0 =
- tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
if (codec_header.ss_data_available) {
if (codec_header.temporal_idx != 0) {
RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
@@ -300,20 +312,23 @@
bool complete_frame = false;
do {
complete_frame = false;
- for (auto frame_it = stashed_frames_.begin();
- frame_it != stashed_frames_.end();) {
- FrameDecision decision = ManageFrameInternal(frame_it->get());
+ for (auto it = stashed_frames_.begin(); it != stashed_frames_.end();) {
+ const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
+ it->frame->GetRtpVideoHeader().video_type_header);
+ RTC_DCHECK(!codec_header.flexible_mode);
+ FrameDecision decision =
+ ManageFrameGof(it->frame.get(), codec_header, it->unwrapped_tl0);
switch (decision) {
case kStash:
- ++frame_it;
+ ++it;
break;
case kHandOff:
complete_frame = true;
- res.push_back(std::move(*frame_it));
+ res.push_back(std::move(it->frame));
[[fallthrough]];
case kDrop:
- frame_it = stashed_frames_.erase(frame_it);
+ it = stashed_frames_.erase(it);
}
}
} while (complete_frame);
@@ -339,7 +354,7 @@
void RtpVp9RefFinder::ClearTo(uint16_t seq_num) {
auto it = stashed_frames_.begin();
while (it != stashed_frames_.end()) {
- if (AheadOf<uint16_t>(seq_num, (*it)->first_seq_num())) {
+ if (AheadOf<uint16_t>(seq_num, it->frame->first_seq_num())) {
it = stashed_frames_.erase(it);
} else {
++it;
diff --git a/modules/video_coding/rtp_vp9_ref_finder.h b/modules/video_coding/rtp_vp9_ref_finder.h
index 00de8ca..2971f68 100644
--- a/modules/video_coding/rtp_vp9_ref_finder.h
+++ b/modules/video_coding/rtp_vp9_ref_finder.h
@@ -48,7 +48,16 @@
uint16_t last_picture_id;
};
- FrameDecision ManageFrameInternal(RtpFrameObject* frame);
+ struct UnwrappedTl0Frame {
+ int64_t unwrapped_tl0;
+ std::unique_ptr<RtpFrameObject> frame;
+ };
+
+ FrameDecision ManageFrameFlexible(RtpFrameObject* frame,
+ const RTPVideoHeaderVP9& vp9_header);
+ FrameDecision ManageFrameGof(RtpFrameObject* frame,
+ const RTPVideoHeaderVP9& vp9_header,
+ int64_t unwrapped_tl0);
void RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector& res);
bool MissingRequiredFrameVp9(uint16_t picture_id, const GofInfo& info);
@@ -62,7 +71,7 @@
// Frames that have been fully received but didn't have all the information
// needed to determine their references.
- std::deque<std::unique_ptr<RtpFrameObject>> stashed_frames_;
+ std::deque<UnwrappedTl0Frame> stashed_frames_;
// Where the current scalability structure is in the
// `scalability_structures_` array.
diff --git a/modules/video_coding/rtp_vp9_ref_finder_unittest.cc b/modules/video_coding/rtp_vp9_ref_finder_unittest.cc
index 6de7ce1..66b284f 100644
--- a/modules/video_coding/rtp_vp9_ref_finder_unittest.cc
+++ b/modules/video_coding/rtp_vp9_ref_finder_unittest.cc
@@ -23,6 +23,7 @@
using ::testing::MatchResultListener;
using ::testing::Pointee;
using ::testing::Property;
+using ::testing::SizeIs;
using ::testing::UnorderedElementsAreArray;
namespace webrtc {
@@ -702,4 +703,17 @@
Contains(Pointee(Property(&EncodedFrame::SpatialIndex, 2))));
}
+TEST_F(RtpVp9RefFinderTest, StashedFramesDoNotWrapTl0Backwards) {
+ GofInfoVP9 ss;
+ ss.SetGofInfoVP9(kTemporalStructureMode1);
+
+ Insert(Frame().Pid(0).SidAndTid(0, 0).Tl0(0));
+ EXPECT_THAT(frames_, SizeIs(0));
+
+ Insert(Frame().Pid(128).SidAndTid(0, 0).Tl0(128).AsKeyFrame().Gof(&ss));
+ EXPECT_THAT(frames_, SizeIs(1));
+ Insert(Frame().Pid(129).SidAndTid(0, 0).Tl0(129));
+ EXPECT_THAT(frames_, SizeIs(2));
+}
+
} // namespace webrtc