Fix enabling DependencyDescriptor for VP9 with spatial layers
DependencyDescriptor and vp9 wrapper understand key frame differently
when it comes to the first layer frame with spatial_id>0
This CL adds and use DD's interpretation of the key frame when deciding
if DD should be supported going forward.
Bug: webrtc:11999
Change-Id: I11a809a315e18bd856bb391576c6ea1f427e33be
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/202760
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#33046}
diff --git a/call/rtp_video_sender.cc b/call/rtp_video_sender.cc
index 041427a..e8d5db9 100644
--- a/call/rtp_video_sender.cc
+++ b/call/rtp_video_sender.cc
@@ -301,6 +301,38 @@
return ext.uri == RtpExtension::kTransportSequenceNumberUri;
});
}
+
+// Returns true when some coded video sequence can be decoded starting with
+// this frame without requiring any previous frames.
+// e.g. it is the same as a key frame when spatial scalability is not used.
+// When spatial scalability is used, then it is true for layer frames of
+// a key frame without inter-layer dependencies.
+bool IsFirstFrameOfACodedVideoSequence(
+ const EncodedImage& encoded_image,
+ const CodecSpecificInfo* codec_specific_info) {
+ if (encoded_image._frameType != VideoFrameType::kVideoFrameKey) {
+ return false;
+ }
+
+ if (codec_specific_info != nullptr &&
+ codec_specific_info->generic_frame_info.has_value()) {
+ // This function is used before
+ // `codec_specific_info->generic_frame_info->frame_diffs` are calculated, so
+ // need to use more complicated way to check for presence of dependencies.
+ return absl::c_none_of(
+ codec_specific_info->generic_frame_info->encoder_buffers,
+ [](const CodecBufferUsage& buffer) { return buffer.referenced; });
+ }
+
+ // Without depenedencies described in generic format do an educated guess.
+ // It might be wrong for VP9 with spatial layer 0 skipped or higher spatial
+ // layer not depending on the spatial layer 0. This corner case is unimportant
+ // for current usage of this helper function.
+
+ // Use <= to accept both 0 (i.e. the first) and nullopt (i.e. the only).
+ return encoded_image.SpatialIndex() <= 0;
+}
+
} // namespace
RtpVideoSender::RtpVideoSender(
@@ -526,7 +558,7 @@
rtp_streams_[stream_index].rtp_rtcp->ExpectedRetransmissionTimeMs();
}
- if (encoded_image._frameType == VideoFrameType::kVideoFrameKey) {
+ if (IsFirstFrameOfACodedVideoSequence(encoded_image, codec_specific_info)) {
// If encoder adapter produce FrameDependencyStructure, pass it so that
// dependency descriptor rtp header extension can be used.
// If not supported, disable using dependency descriptor by passing nullptr.
diff --git a/call/rtp_video_sender_unittest.cc b/call/rtp_video_sender_unittest.cc
index 5b14a93..b738c21 100644
--- a/call/rtp_video_sender_unittest.cc
+++ b/call/rtp_video_sender_unittest.cc
@@ -713,6 +713,61 @@
sent_packets.back().HasExtension<RtpDependencyDescriptorExtension>());
}
+TEST(RtpVideoSenderTest, SupportsDependencyDescriptorForVp9) {
+ RtpVideoSenderTestFixture test({kSsrc1}, {}, kPayloadType, {});
+ test.router()->SetActive(true);
+
+ RtpHeaderExtensionMap extensions;
+ extensions.Register<RtpDependencyDescriptorExtension>(
+ kDependencyDescriptorExtensionId);
+ std::vector<RtpPacket> sent_packets;
+ ON_CALL(test.transport(), SendRtp)
+ .WillByDefault([&](const uint8_t* packet, size_t length,
+ const PacketOptions& options) {
+ sent_packets.emplace_back(&extensions);
+ EXPECT_TRUE(sent_packets.back().Parse(packet, length));
+ return true;
+ });
+
+ const uint8_t kPayload[1] = {'a'};
+ EncodedImage encoded_image;
+ encoded_image.SetTimestamp(1);
+ encoded_image.capture_time_ms_ = 2;
+ encoded_image._frameType = VideoFrameType::kVideoFrameKey;
+ encoded_image.SetEncodedData(
+ EncodedImageBuffer::Create(kPayload, sizeof(kPayload)));
+
+ CodecSpecificInfo codec_specific;
+ codec_specific.codecType = VideoCodecType::kVideoCodecVP9;
+ codec_specific.template_structure.emplace();
+ codec_specific.template_structure->num_decode_targets = 2;
+ codec_specific.template_structure->templates = {
+ FrameDependencyTemplate().S(0).Dtis("SS"),
+ FrameDependencyTemplate().S(1).Dtis("-S").FrameDiffs({1}),
+ };
+
+ // Send two tiny images, each mapping to single RTP packet.
+ // Send in key frame for the base spatial layer.
+ codec_specific.generic_frame_info =
+ GenericFrameInfo::Builder().S(0).Dtis("SS").Build();
+ codec_specific.generic_frame_info->encoder_buffers = {{0, false, true}};
+ EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error,
+ EncodedImageCallback::Result::OK);
+ // Send in 2nd spatial layer.
+ codec_specific.template_structure = absl::nullopt;
+ codec_specific.generic_frame_info =
+ GenericFrameInfo::Builder().S(1).Dtis("-S").Build();
+ codec_specific.generic_frame_info->encoder_buffers = {{0, true, false},
+ {1, false, true}};
+ EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error,
+ EncodedImageCallback::Result::OK);
+
+ test.AdvanceTime(TimeDelta::Millis(33));
+ ASSERT_THAT(sent_packets, SizeIs(2));
+ EXPECT_TRUE(sent_packets[0].HasExtension<RtpDependencyDescriptorExtension>());
+ EXPECT_TRUE(sent_packets[1].HasExtension<RtpDependencyDescriptorExtension>());
+}
+
TEST(RtpVideoSenderTest, SupportsStoppingUsingDependencyDescriptor) {
RtpVideoSenderTestFixture test({kSsrc1}, {}, kPayloadType, {});
test.router()->SetActive(true);