Support conversion of VP9 non-flexible mode to generic descriptor for non-layered streams only.
When VP9 HW encoders don't provide any metadata a minimal non-flexible mode structure is generated for the stream: (https://source.chromium.org/chromium/chromium/src/+/refs/heads/main:third_party/blink/renderer/platform/peerconnection/rtc_video_encoder.cc;l=1275-1298;drc=f80633b34538615fcb73515ad8c4bc56a748abfe).
Bug: chromium:1455428, b/286993839, b/287458300
Change-Id: I72628f20927d685e9c8ba1744126d763896bd804
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/309380
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Henrik Boström <hbos@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#40316}
diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc
index 18e6d91..f0347bc 100644
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc
@@ -604,23 +604,42 @@
// Create the array only if it is ever used.
last_vp9_frame_id_.resize(kPictureDiffLimit);
}
- if (vp9_header.inter_layer_predicted && spatial_index > 0) {
- result.dependencies.push_back(
- last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
- [spatial_index - 1]);
- }
- if (vp9_header.inter_pic_predicted) {
- for (size_t i = 0; i < vp9_header.num_ref_pics; ++i) {
- // picture_id is 15 bit number that wraps around. Though undeflow may
- // produce picture that exceeds 2^15, it is ok because in this
- // code block only last 7 bits of the picture_id are used.
- uint16_t depend_on = vp9_header.picture_id - vp9_header.pid_diff[i];
+
+ if (vp9_header.flexible_mode) {
+ if (vp9_header.inter_layer_predicted && spatial_index > 0) {
result.dependencies.push_back(
- last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]);
+ last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
+ [spatial_index - 1]);
}
+ if (vp9_header.inter_pic_predicted) {
+ for (size_t i = 0; i < vp9_header.num_ref_pics; ++i) {
+ // picture_id is 15 bit number that wraps around. Though undeflow may
+ // produce picture that exceeds 2^15, it is ok because in this
+ // code block only last 7 bits of the picture_id are used.
+ uint16_t depend_on = vp9_header.picture_id - vp9_header.pid_diff[i];
+ result.dependencies.push_back(
+ last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]);
+ }
+ }
+ last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
+ [spatial_index] = shared_frame_id;
+ } else {
+ // Implementing general conversion logic for non-flexible mode requires some
+ // work and we will almost certainly never need it, so for now support only
+ // non-layerd streams.
+ if (spatial_index > 0 || temporal_index > 0) {
+ // Prefer to generate no generic layering than an inconsistent one.
+ rtp_video_header.generic.reset();
+ return;
+ }
+
+ if (vp9_header.inter_pic_predicted) {
+ // Since we only support non-scalable streams we only need to save the
+ // last frame id.
+ result.dependencies.push_back(last_vp9_frame_id_[0][0]);
+ }
+ last_vp9_frame_id_[0][0] = shared_frame_id;
}
- last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit][spatial_index] =
- shared_frame_id;
result.active_decode_targets =
((uint32_t{1} << num_temporal_layers * num_active_spatial_layers) - 1);
diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc
index cfd0101..8481b5f 100644
--- a/call/rtp_payload_params_unittest.cc
+++ b/call/rtp_payload_params_unittest.cc
@@ -567,6 +567,7 @@
EncodedImage encoded_image;
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecVP9;
+ codec_info.codecSpecific.VP9.flexible_mode = true;
codec_info.codecSpecific.VP9.num_spatial_layers = 1;
codec_info.codecSpecific.VP9.temporal_idx = kNoTemporalIdx;
codec_info.codecSpecific.VP9.first_frame_in_picture = true;
@@ -611,6 +612,55 @@
EXPECT_EQ(header.generic->chain_diffs[0], 3 - 1);
}
+TEST(RtpPayloadParamsVp9ToGenericTest, NoScalabilityNonFlexibleMode) {
+ RtpPayloadState state;
+ RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig());
+
+ EncodedImage encoded_image;
+ CodecSpecificInfo codec_info;
+ codec_info.codecType = kVideoCodecVP9;
+ codec_info.codecSpecific.VP9.flexible_mode = false;
+ codec_info.codecSpecific.VP9.num_spatial_layers = 1;
+ codec_info.codecSpecific.VP9.temporal_idx = kNoTemporalIdx;
+ codec_info.codecSpecific.VP9.first_frame_in_picture = true;
+ codec_info.end_of_picture = true;
+
+ // Key frame.
+ encoded_image._frameType = VideoFrameType::kVideoFrameKey;
+ codec_info.codecSpecific.VP9.inter_pic_predicted = false;
+ RTPVideoHeader key_header =
+ params.GetRtpVideoHeader(encoded_image, &codec_info,
+ /*shared_frame_id=*/1);
+
+ ASSERT_TRUE(key_header.generic);
+ EXPECT_EQ(key_header.generic->spatial_index, 0);
+ EXPECT_EQ(key_header.generic->temporal_index, 0);
+ EXPECT_EQ(key_header.generic->frame_id, 1);
+ ASSERT_THAT(key_header.generic->decode_target_indications, Not(IsEmpty()));
+ EXPECT_EQ(key_header.generic->decode_target_indications[0],
+ DecodeTargetIndication::kSwitch);
+ EXPECT_THAT(key_header.generic->dependencies, IsEmpty());
+ ASSERT_THAT(key_header.generic->chain_diffs, Not(IsEmpty()));
+ EXPECT_EQ(key_header.generic->chain_diffs[0], 0);
+
+ encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
+ codec_info.codecSpecific.VP9.inter_pic_predicted = true;
+ RTPVideoHeader delta_header =
+ params.GetRtpVideoHeader(encoded_image, &codec_info,
+ /*shared_frame_id=*/3);
+
+ ASSERT_TRUE(delta_header.generic);
+ EXPECT_EQ(delta_header.generic->spatial_index, 0);
+ EXPECT_EQ(delta_header.generic->temporal_index, 0);
+ EXPECT_EQ(delta_header.generic->frame_id, 3);
+ ASSERT_THAT(delta_header.generic->decode_target_indications, Not(IsEmpty()));
+ EXPECT_EQ(delta_header.generic->decode_target_indications[0],
+ DecodeTargetIndication::kSwitch);
+ EXPECT_THAT(delta_header.generic->dependencies, ElementsAre(1));
+ ASSERT_THAT(delta_header.generic->chain_diffs, Not(IsEmpty()));
+ EXPECT_EQ(delta_header.generic->chain_diffs[0], 3 - 1);
+}
+
TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) {
// Test with 2 temporal layers structure that is not used by webrtc:
// 1---3 5
@@ -622,6 +672,7 @@
EncodedImage image;
CodecSpecificInfo info;
info.codecType = kVideoCodecVP9;
+ info.codecSpecific.VP9.flexible_mode = true;
info.codecSpecific.VP9.num_spatial_layers = 1;
info.codecSpecific.VP9.first_frame_in_picture = true;
info.end_of_picture = true;
@@ -732,6 +783,7 @@
EncodedImage image;
CodecSpecificInfo info;
info.codecType = kVideoCodecVP9;
+ info.codecSpecific.VP9.flexible_mode = true;
info.codecSpecific.VP9.num_spatial_layers = 1;
info.codecSpecific.VP9.first_frame_in_picture = true;
info.end_of_picture = true;
@@ -885,6 +937,7 @@
EncodedImage image;
CodecSpecificInfo info;
info.codecType = kVideoCodecVP9;
+ info.codecSpecific.VP9.flexible_mode = true;
info.codecSpecific.VP9.num_spatial_layers = 2;
info.codecSpecific.VP9.first_frame_in_picture = true;
@@ -993,6 +1046,7 @@
EncodedImage image;
CodecSpecificInfo info;
info.codecType = kVideoCodecVP9;
+ info.codecSpecific.VP9.flexible_mode = true;
info.codecSpecific.VP9.num_spatial_layers = 1;
info.codecSpecific.VP9.first_frame_in_picture = true;