In RtpDepacketizerAV1 use aggregation header to detect key frames
instead of guessing based on presence of the sequence header OBU.
Bug: webrtc:11042
Change-Id: I9a0674249feceebb07299ea965c62e87499f6baf
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/161013
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29958}
diff --git a/modules/rtp_rtcp/source/rtp_depacketizer_av1.cc b/modules/rtp_rtcp/source/rtp_depacketizer_av1.cc
index 52c62f8..71890e9 100644
--- a/modules/rtp_rtcp/source/rtp_depacketizer_av1.cc
+++ b/modules/rtp_rtcp/source/rtp_depacketizer_av1.cc
@@ -26,7 +26,7 @@
// RTP payload syntax:
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
-// |Z|Y| W |-|-|-|-| (REQUIRED)
+// |Z|Y| W |N|-|-|-| (REQUIRED)
// +=+=+=+=+=+=+=+=+ (REPEATED W-1 times, or any times if W = 0)
// |1| |
// +-+ OBU fragment|
@@ -154,7 +154,6 @@
// also has Sequence Header OBU.
using VectorObuInfo = absl::InlinedVector<ObuInfo, 4>;
-constexpr int kObuTypeSequenceHeader = 1;
constexpr uint8_t kObuSizePresentBit = 0b0'0000'010;
bool ObuHasExtension(uint8_t obu_header) {
@@ -165,10 +164,6 @@
return obu_header & kObuSizePresentBit;
}
-int ObuType(uint8_t obu_header) {
- return (obu_header & 0b0'1111'000u) >> 3;
-}
-
bool RtpStartsWithFragment(uint8_t aggregation_header) {
return aggregation_header & 0b1000'0000u;
}
@@ -178,6 +173,9 @@
int RtpNumObus(uint8_t aggregation_header) { // 0 for any number of obus.
return (aggregation_header & 0b0011'0000u) >> 4;
}
+int RtpStartsNewCodedVideoSequence(uint8_t aggregation_header) {
+ return aggregation_header & 0b0000'1000u;
+}
// Reorgonizes array of rtp payloads into array of obus:
// fills ObuInfo::data field.
@@ -373,16 +371,18 @@
RTC_DLOG(LS_ERROR) << "Empty rtp payload.";
return false;
}
+ uint8_t aggregation_header = payload_data[0];
+ if (RtpStartsNewCodedVideoSequence(aggregation_header) &&
+ RtpStartsWithFragment(aggregation_header)) {
+ // new coded video sequence can't start from an OBU fragment.
+ return false;
+ }
+
// To assemble frame, all of the rtp payload is required, including
// aggregation header.
parsed_payload->payload = payload_data;
parsed_payload->payload_length = payload_data_length;
- rtc::ByteBufferReader payload(reinterpret_cast<const char*>(payload_data),
- payload_data_length);
- uint8_t aggregation_header;
- RTC_CHECK(payload.ReadUInt8(&aggregation_header));
-
parsed_payload->video.codec = VideoCodecType::kVideoCodecAV1;
// These are not accurate since frame may consist of several packet aligned
// chunks of obus, but should be good enough for most cases. It might produce
@@ -393,57 +393,11 @@
!RtpStartsWithFragment(aggregation_header);
parsed_payload->video.is_last_packet_in_frame =
!RtpEndsWithFragment(aggregation_header);
- parsed_payload->video.frame_type = VideoFrameType::kVideoFrameDelta;
- // If packet starts a frame, check if it contains Sequence Header OBU.
- // In that case treat it as key frame packet.
- if (parsed_payload->video.is_first_packet_in_frame) {
- int num_expected_obus = RtpNumObus(aggregation_header);
- // The only OBU that can preceed SequenceHeader is a TemporalDelimiter OBU,
- // so check no more than two OBUs while searching for SH.
- for (int obu_index = 1; payload.Length() > 0 && obu_index <= 2;
- ++obu_index) {
- uint64_t fragment_size;
- // When num_expected_obus > 0, last OBU (fragment) is not preceeded by
- // the size field. See W field in
- // https://aomediacodec.github.io/av1-rtp-spec/#43-av1-aggregation-header
- bool has_fragment_size = (obu_index != num_expected_obus);
- if (has_fragment_size) {
- if (!payload.ReadUVarint(&fragment_size)) {
- RTC_DLOG(LS_WARNING)
- << "Failed to read OBU fragment size for OBU#" << obu_index;
- return false;
- }
- if (fragment_size > payload.Length()) {
- RTC_DLOG(LS_WARNING) << "OBU fragment size " << fragment_size
- << " exceeds remaining payload size "
- << payload.Length() << " for OBU#" << obu_index;
- // Malformed input: written size is larger than remaining buffer.
- return false;
- }
- } else {
- fragment_size = payload.Length();
- }
- // Though it is inpractical to pass empty fragments, it is allowed.
- if (fragment_size == 0) {
- RTC_LOG(LS_WARNING)
- << "Weird obu of size 0 at offset "
- << (payload_data_length - payload.Length()) << ", skipping.";
- continue;
- }
- uint8_t obu_header = *reinterpret_cast<const uint8_t*>(payload.Data());
- if (ObuType(obu_header) == kObuTypeSequenceHeader) {
- // TODO(bugs.webrtc.org/11042): Check frame_header OBU and/or frame OBU
- // too for other conditions of the start of a new coded video sequence.
- // For proper checks checking single packet might not be enough. See
- // https://aomediacodec.github.io/av1-spec/av1-spec.pdf section 7.5
- parsed_payload->video.frame_type = VideoFrameType::kVideoFrameKey;
- break;
- }
- payload.Consume(fragment_size);
- }
- }
-
+ parsed_payload->video.frame_type =
+ RtpStartsNewCodedVideoSequence(aggregation_header)
+ ? VideoFrameType::kVideoFrameKey
+ : VideoFrameType::kVideoFrameDelta;
return true;
}
diff --git a/modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc b/modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc
index cf55aae..d0d0670 100644
--- a/modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc
@@ -19,12 +19,9 @@
using ::testing::ElementsAre;
// Signals number of the OBU (fragments) in the packet.
-constexpr uint8_t kObuCountAny = 0b0000'0000;
-constexpr uint8_t kObuCountOne = 0b0001'0000;
-constexpr uint8_t kObuCountTwo = 0b0010'0000;
+constexpr uint8_t kObuCountOne = 0b00'01'0000;
constexpr uint8_t kObuHeaderSequenceHeader = 0b0'0001'000;
-constexpr uint8_t kObuHeaderTemporalDelimiter = 0b0'0010'000;
constexpr uint8_t kObuHeaderFrame = 0b0'0110'000;
constexpr uint8_t kObuHeaderHasSize = 0b0'0000'010;
@@ -73,131 +70,37 @@
EXPECT_TRUE(parsed.video.is_last_packet_in_frame);
}
-TEST(RtpDepacketizerAv1Test, ParseTreatsStartOfSequenceHeaderAsKeyFrame) {
- const uint8_t packet[] = {kObuCountOne, kObuHeaderSequenceHeader};
- RtpDepacketizerAv1 depacketizer;
- RtpDepacketizer::ParsedPayload parsed;
- ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
- EXPECT_TRUE(parsed.video.is_first_packet_in_frame);
- EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
-}
-
-TEST(RtpDepacketizerAv1Test, ParseTreatsNotStartOfFrameAsDeltaFrame) {
- const uint8_t packet[] = {
- (uint8_t{1} << 7) | kObuCountOne,
- // Byte that look like start of sequence header, but since it is not start
- // of an OBU, it is actually not a start of sequence header.
- kObuHeaderSequenceHeader};
- RtpDepacketizerAv1 depacketizer;
- RtpDepacketizer::ParsedPayload parsed;
- ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
- EXPECT_FALSE(parsed.video.is_first_packet_in_frame);
- EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameDelta);
-}
-
TEST(RtpDepacketizerAv1Test,
- ParseTreatsStartOfFrameWithoutSequenceHeaderAsDeltaFrame) {
- const uint8_t packet[] = {kObuCountOne, kObuHeaderFrame};
- RtpDepacketizerAv1 depacketizer;
- RtpDepacketizer::ParsedPayload parsed;
- ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
- EXPECT_TRUE(parsed.video.is_first_packet_in_frame);
- EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameDelta);
-}
-
-TEST(RtpDepacketizerAv1Test, ParseFindsSequenceHeaderBehindFragmentSize1) {
- const uint8_t packet[] = {kObuCountAny,
- 1, // size of the next fragment
+ ParseUsesNewCodedVideoSequenceBitAsKeyFrameIndidcator) {
+ const uint8_t packet[] = {(uint8_t{1} << 3) | kObuCountOne,
kObuHeaderSequenceHeader};
RtpDepacketizerAv1 depacketizer;
RtpDepacketizer::ParsedPayload parsed;
ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
- EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
-}
-
-TEST(RtpDepacketizerAv1Test, ParseFindsSequenceHeaderBehindFragmentSize2) {
- const uint8_t packet[] = {kObuCountTwo,
- 2, // size of the next fragment
- kObuHeaderSequenceHeader,
- 42, // SH payload.
- kObuHeaderFrame};
- RtpDepacketizerAv1 depacketizer;
- RtpDepacketizer::ParsedPayload parsed;
- ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_TRUE(parsed.video.is_first_packet_in_frame);
EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
}
TEST(RtpDepacketizerAv1Test,
- ParseFindsSequenceHeaderBehindMultiByteFragmentSize) {
- const uint8_t packet[] = {kObuCountTwo,
- 0b1000'0101, // leb128 encoded value of 5
- 0b1000'0000, // using 3 bytes
- 0b0000'0000, // to encode the value.
- kObuHeaderSequenceHeader,
- 8, // 4 bytes of SH payload.
- 0,
- 0,
- 0,
- kObuHeaderFrame};
+ ParseUsesUnsetNewCodedVideoSequenceBitAsDeltaFrameIndidcator) {
+ const uint8_t packet[] = {(uint8_t{0} << 3) | kObuCountOne,
+ kObuHeaderSequenceHeader};
RtpDepacketizerAv1 depacketizer;
RtpDepacketizer::ParsedPayload parsed;
ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
- EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
-}
-
-TEST(RtpDepacketizerAv1Test, ParseFindsSequenceHeaderBehindTemporalDelimiter) {
- const uint8_t packet[] = {kObuCountTwo,
- 1, // size of the next fragment
- kObuHeaderTemporalDelimiter,
- kObuHeaderSequenceHeader,
- 8, // 4 bytes of SH payload.
- 0,
- 0,
- 0};
- RtpDepacketizerAv1 depacketizer;
- RtpDepacketizer::ParsedPayload parsed;
- ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
- EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
-}
-
-TEST(RtpDepacketizerAv1Test,
- ParseFindsSequenceHeaderBehindTemporalDelimiterAndSize) {
- const uint8_t packet[] = {kObuCountAny,
- 1, // size of the next fragment
- kObuHeaderTemporalDelimiter,
- 5, // size of the next fragment
- kObuHeaderSequenceHeader,
- 8, // 4 bytes of SH payload.
- 0,
- 0,
- 0,
- 1, // size of the next fragment
- kObuHeaderFrame};
- RtpDepacketizerAv1 depacketizer;
- RtpDepacketizer::ParsedPayload parsed;
- ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
- EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey);
-}
-
-TEST(RtpDepacketizerAv1Test, ParseSkipsEmptyFragments) {
- static_assert(kObuHeaderSequenceHeader == 8, "");
- const uint8_t packet[] = {kObuCountAny,
- 0, // size of the next fragment
- 8, // size of the next fragment that look like SH
- kObuHeaderFrame,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7};
- RtpDepacketizerAv1 depacketizer;
- RtpDepacketizer::ParsedPayload parsed;
- ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+ EXPECT_TRUE(parsed.video.is_first_packet_in_frame);
EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameDelta);
}
+TEST(RtpDepacketizerAv1Test,
+ ParseRejectsPacketWithNewCVSAndContinuationFlagsBothSet) {
+ const uint8_t packet[] = {0b10'00'1000 | kObuCountOne,
+ kObuHeaderSequenceHeader};
+ RtpDepacketizerAv1 depacketizer;
+ RtpDepacketizer::ParsedPayload parsed;
+ EXPECT_FALSE(depacketizer.Parse(&parsed, packet, sizeof(packet)));
+}
+
TEST(RtpDepacketizerAv1Test, AssembleFrameSetsOBUPayloadSizeWhenAbsent) {
const uint8_t payload1[] = {0b00'01'0000, // aggregation header
0b0'0110'000, // / Frame