Fix header length and set layer_id/temporal_id with lowest value of aggregated NALU for AP packet in H265 RTP packetizer
Bug: webrtc:41480904
Change-Id: I56047b20933ba1f251ef88dc73a40c4967e8f89e
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/362560
Reviewed-by: Erik Språng <sprang@webrtc.org>
Commit-Queue: Qiujiao Wu <qiujiao.wu@intel.com>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#43220}
diff --git a/modules/rtp_rtcp/source/rtp_packet_h265_common.h b/modules/rtp_rtcp/source/rtp_packet_h265_common.h
index 8655a02..6f77ebf 100644
--- a/modules/rtp_rtcp/source/rtp_packet_h265_common.h
+++ b/modules/rtp_rtcp/source/rtp_packet_h265_common.h
@@ -18,6 +18,8 @@
// fields (F, Type, LayerId and TID) as the NAL unit header. Refer to
// section 4.4 in RFC 7798.
constexpr size_t kH265PayloadHeaderSizeBytes = 2;
+constexpr uint8_t kH265MaxLayerId = 127;
+constexpr uint8_t kH265MaxTemporalId = 7;
// Unlike H.264, H.265 NAL header is 2-bytes.
constexpr size_t kH265NalHeaderSizeBytes = 2;
// H.265's FU is constructed of 2-byte payload header, 1-byte FU header and FU
diff --git a/modules/rtp_rtcp/source/rtp_packetizer_h265.cc b/modules/rtp_rtcp/source/rtp_packetizer_h265.cc
index efc0d8a..599de8d 100644
--- a/modules/rtp_rtcp/source/rtp_packetizer_h265.cc
+++ b/modules/rtp_rtcp/source/rtp_packetizer_h265.cc
@@ -10,6 +10,7 @@
#include "modules/rtp_rtcp/source/rtp_packetizer_h265.h"
+#include <algorithm>
#include <optional>
#include <vector>
@@ -160,13 +161,14 @@
return fragment_size;
};
+ uint16_t header = (fragment[0] << 8) | fragment[1];
while (payload_size_left >= payload_size_needed()) {
RTC_CHECK_GT(fragment.size(), 0);
packets_.push({.source_fragment = fragment,
.first_fragment = (aggregated_fragments == 0),
.last_fragment = false,
.aggregated = true,
- .header = fragment[0]});
+ .header = header});
payload_size_left -= fragment.size();
payload_size_left -= fragment_headers_length;
@@ -234,22 +236,25 @@
|F| Type | LayerId | TID |
+-------------+-----------------+
*/
- // Refer to section section 4.4.2 for aggregation packets and modify type to
+ // Refer to section 4.4.2 for aggregation packets and modify type to
// 48 in PayloadHdr for aggregate packet. Do not support DONL for aggregation
// packets, DONL field is not present.
- uint8_t payload_hdr_h = packet->header >> 8;
- uint8_t payload_hdr_l = packet->header & 0xFF;
- uint8_t layer_id_h = payload_hdr_h & kH265LayerIDHMask;
- payload_hdr_h = (payload_hdr_h & kH265TypeMaskN) |
- (H265::NaluType::kAp << 1) | layer_id_h;
- buffer[0] = payload_hdr_h;
- buffer[1] = payload_hdr_l;
-
int index = kH265PayloadHeaderSizeBytes;
bool is_last_fragment = packet->last_fragment;
+
+ // Refer to section 4.4.2 for aggregation packets and calculate the lowest
+ // value of LayerId and TID of all the aggregated NAL units
+ uint8_t layer_id_min = kH265MaxLayerId;
+ uint8_t temporal_id_min = kH265MaxTemporalId;
while (packet->aggregated) {
// Add NAL unit length field.
rtc::ArrayView<const uint8_t> fragment = packet->source_fragment;
+ uint8_t layer_id = ((fragment[0] & kH265LayerIDHMask) << 5) |
+ ((fragment[1] & kH265LayerIDLMask) >> 3);
+ layer_id_min = std::min(layer_id_min, layer_id);
+ uint8_t temporal_id = fragment[1] & kH265TIDMask;
+ temporal_id_min = std::min(temporal_id_min, temporal_id);
+
ByteWriter<uint16_t>::WriteBigEndian(&buffer[index], fragment.size());
index += kH265LengthFieldSizeBytes;
// Add NAL unit.
@@ -263,6 +268,9 @@
packet = &packets_.front();
is_last_fragment = packet->last_fragment;
}
+
+ buffer[0] = (H265::NaluType::kAp << 1) | (layer_id_min >> 5);
+ buffer[1] = (layer_id_min << 3) | temporal_id_min;
RTC_CHECK(is_last_fragment);
rtp_packet->SetPayloadSize(index);
}
diff --git a/modules/rtp_rtcp/source/rtp_packetizer_h265_unittest.cc b/modules/rtp_rtcp/source/rtp_packetizer_h265_unittest.cc
index 8f739e8..5355c5d 100644
--- a/modules/rtp_rtcp/source/rtp_packetizer_h265_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_packetizer_h265_unittest.cc
@@ -37,13 +37,19 @@
constexpr size_t kFuHeaderSizeBytes =
kH265FuHeaderSizeBytes + kH265PayloadHeaderSizeBytes;
-// Creates Buffer that looks like nal unit of given size.
-rtc::Buffer GenerateNalUnit(size_t size) {
+struct NalUnitHeader {
+ uint8_t forbidden_zero_bit = 0;
+ uint8_t nal_unit_type = 0;
+ uint8_t nuh_layer_id = 0;
+ uint8_t nuh_temporal_id_plus1 = 0;
+};
+
+// Creates Buffer that looks like nal unit of given header and size.
+rtc::Buffer GenerateNalUnit(NalUnitHeader header, size_t size) {
RTC_CHECK_GT(size, 0);
rtc::Buffer buffer(size);
- // Set some valid header with type TRAIL_R and temporal id
- buffer[0] = 2;
- buffer[1] = 2;
+ buffer[0] = (header.nal_unit_type << 1) | (header.nuh_layer_id >> 5);
+ buffer[1] = (header.nuh_layer_id << 3) | header.nuh_temporal_id_plus1;
for (size_t i = 2; i < size; ++i) {
buffer[i] = static_cast<uint8_t>(i);
}
@@ -123,8 +129,15 @@
TEST(RtpPacketizerH265Test, SingleNaluTwoPackets) {
RtpPacketizer::PayloadSizeLimits limits;
limits.max_payload_len = kMaxPayloadSizeBytes;
- rtc::Buffer nalus[] = {GenerateNalUnit(kMaxPayloadSizeBytes),
- GenerateNalUnit(100)};
+ rtc::Buffer nalus[] = {
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ kMaxPayloadSizeBytes),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ 100)};
rtc::Buffer frame = CreateFrame(nalus);
RtpPacketizerH265 packetizer(frame, limits);
@@ -140,9 +153,19 @@
RtpPacketizer::PayloadSizeLimits limits;
limits.max_payload_len = 200;
limits.first_packet_reduction_len = 5;
- rtc::Buffer nalus[] = {GenerateNalUnit(/*size=*/195),
- GenerateNalUnit(/*size=*/200),
- GenerateNalUnit(/*size=*/200)};
+ rtc::Buffer nalus[] = {
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/195),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/200),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/200)};
rtc::Buffer frame = CreateFrame(nalus);
RtpPacketizerH265 packetizer(frame, limits);
@@ -159,9 +182,19 @@
RtpPacketizer::PayloadSizeLimits limits;
limits.max_payload_len = 200;
limits.last_packet_reduction_len = 5;
- rtc::Buffer nalus[] = {GenerateNalUnit(/*size=*/200),
- GenerateNalUnit(/*size=*/200),
- GenerateNalUnit(/*size=*/195)};
+ rtc::Buffer nalus[] = {
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/200),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/200),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/195)};
rtc::Buffer frame = CreateFrame(nalus);
RtpPacketizerH265 packetizer(frame, limits);
@@ -189,9 +222,19 @@
// Aggregation tests.
TEST(RtpPacketizerH265Test, ApRespectsNoPacketReduction) {
- rtc::Buffer nalus[] = {GenerateNalUnit(/*size=*/2),
- GenerateNalUnit(/*size=*/2),
- GenerateNalUnit(/*size=*/0x123)};
+ rtc::Buffer nalus[] = {
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/3),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/3),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/0x123)};
rtc::Buffer frame = CreateFrame(nalus);
RtpPacketizerH265 packetizer(frame, kNoLimits);
@@ -201,22 +244,22 @@
auto payload = packets[0].payload();
int type = H265::ParseNaluType(payload[0]);
EXPECT_EQ(payload.size(), kH265NalHeaderSizeBytes +
- 3 * kH265LengthFieldSizeBytes + 2 + 2 + 0x123);
+ 3 * kH265LengthFieldSizeBytes + 3 + 3 + 0x123);
EXPECT_EQ(type, H265::NaluType::kAp);
payload = payload.subview(kH265NalHeaderSizeBytes);
// 1st fragment.
EXPECT_THAT(payload.subview(0, kH265LengthFieldSizeBytes),
- ElementsAre(0, 2)); // Size.
- EXPECT_THAT(payload.subview(kH265LengthFieldSizeBytes, 2),
+ ElementsAre(0, 3)); // Size.
+ EXPECT_THAT(payload.subview(kH265LengthFieldSizeBytes, 3),
ElementsAreArray(nalus[0]));
- payload = payload.subview(kH265LengthFieldSizeBytes + 2);
+ payload = payload.subview(kH265LengthFieldSizeBytes + 3);
// 2nd fragment.
EXPECT_THAT(payload.subview(0, kH265LengthFieldSizeBytes),
- ElementsAre(0, 2)); // Size.
- EXPECT_THAT(payload.subview(kH265LengthFieldSizeBytes, 2),
+ ElementsAre(0, 3)); // Size.
+ EXPECT_THAT(payload.subview(kH265LengthFieldSizeBytes, 3),
ElementsAreArray(nalus[1]));
- payload = payload.subview(kH265LengthFieldSizeBytes + 2);
+ payload = payload.subview(kH265LengthFieldSizeBytes + 3);
// 3rd fragment.
EXPECT_THAT(payload.subview(0, kH265LengthFieldSizeBytes),
ElementsAre(0x1, 0x23)); // Size.
@@ -224,15 +267,80 @@
ElementsAreArray(nalus[2]));
}
+TEST(RtpPacketizerH265Test, ApRespectsLayerIdAndTemporalId) {
+ // Generate 3 NALUs: NALU 1 with nuh_layer_id 2 and nuh_temporal_id_plus1 6,
+ // NALU 2 with nuh_layer_id 0 and nuh_temporal_id_plus1 1,
+ // NALU 3 with nuh_layer_id 32 and nuh_temporal_id_plus1 2,
+ // So in the AP packet header, nuh_layer_id should be 0 which is the lowest
+ // nuh_layer_id value of 3 NALUs and nuh_temporal_id_plus1 should be 1 which
+ // is the lowest nuh_temporal_id_plus1 value of 3 NALUs
+ rtc::Buffer nalus[] = {
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 2,
+ .nuh_temporal_id_plus1 = 6},
+ /*size=*/3),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 0,
+ .nuh_temporal_id_plus1 = 1},
+ /*size=*/3),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/0x123)};
+ rtc::Buffer frame = CreateFrame(nalus);
+
+ RtpPacketizerH265 packetizer(frame, kNoLimits);
+ std::vector<RtpPacketToSend> packets = FetchAllPackets(&packetizer);
+
+ ASSERT_THAT(packets, SizeIs(1));
+ auto payload = packets[0].payload();
+ uint8_t type = H265::ParseNaluType(payload[0]);
+ uint8_t layer_id = ((payload[0] & kH265LayerIDHMask) << 5) |
+ ((payload[1] & kH265LayerIDLMask) >> 3);
+ uint8_t temporal_id = payload[1] & kH265TIDMask;
+ EXPECT_EQ(payload.size(), kH265NalHeaderSizeBytes +
+ 3 * kH265LengthFieldSizeBytes + 3 + 3 + 0x123);
+
+ EXPECT_EQ(type, H265::NaluType::kAp);
+ EXPECT_EQ(layer_id, 0);
+ EXPECT_EQ(temporal_id, 1);
+ payload = payload.subview(kH265NalHeaderSizeBytes);
+ // 1st fragment.
+ EXPECT_THAT(payload.subview(0, kH265LengthFieldSizeBytes), ElementsAre(0, 3));
+ EXPECT_THAT(payload.subview(kH265LengthFieldSizeBytes, 3),
+ ElementsAreArray(nalus[0]));
+ payload = payload.subview(kH265LengthFieldSizeBytes + 3);
+ // 2nd fragment.
+ EXPECT_THAT(payload.subview(0, kH265LengthFieldSizeBytes), ElementsAre(0, 3));
+ EXPECT_THAT(payload.subview(kH265LengthFieldSizeBytes, 3),
+ ElementsAreArray(nalus[1]));
+ payload = payload.subview(kH265LengthFieldSizeBytes + 3);
+ // 3rd fragment.
+ EXPECT_THAT(payload.subview(0, kH265LengthFieldSizeBytes),
+ ElementsAre(0x1, 0x23));
+ EXPECT_THAT(payload.subview(kH265LengthFieldSizeBytes),
+ ElementsAreArray(nalus[2]));
+}
+
TEST(RtpPacketizerH265Test, ApRespectsFirstPacketReduction) {
RtpPacketizer::PayloadSizeLimits limits;
limits.max_payload_len = 1000;
limits.first_packet_reduction_len = 100;
const size_t kFirstFragmentSize =
limits.max_payload_len - limits.first_packet_reduction_len;
- rtc::Buffer nalus[] = {GenerateNalUnit(/*size=*/kFirstFragmentSize),
- GenerateNalUnit(/*size=*/2),
- GenerateNalUnit(/*size=*/2)};
+ rtc::Buffer nalus[] = {
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/kFirstFragmentSize),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/3),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/3)};
rtc::Buffer frame = CreateFrame(nalus);
RtpPacketizerH265 packetizer(frame, limits);
@@ -243,13 +351,13 @@
EXPECT_THAT(packets[0].payload(), ElementsAreArray(nalus[0]));
// Expect 2nd packet is aggregate of last two fragments.
// The size of H265 nal_unit_header is 2 bytes, according to 7.3.1.2
- // in H265 spec. Aggregation packet type is 48, and nuh_temporal_id_plus1
- // is 2, so the nal_unit_header should be "01100000 00000010",
- // which is 96 and 2.
+ // in H265 spec. Aggregation packet type is 48, nuh_layer_id is 32 and
+ // nuh_temporal_id_plus1 is 2, so the nal_unit_header should be "01100001
+ // 00000010", which is 97 and 2.
EXPECT_THAT(packets[1].payload(),
- ElementsAre(96, 2, //
- 0, 2, nalus[1][0], nalus[1][1], //
- 0, 2, nalus[2][0], nalus[2][1]));
+ ElementsAre(97, 2, //
+ 0, 3, nalus[1][0], nalus[1][1], nalus[1][2], //
+ 0, 3, nalus[2][0], nalus[2][1], nalus[2][2]));
}
TEST(RtpPacketizerH265Test, ApRespectsLastPacketReduction) {
@@ -258,9 +366,19 @@
limits.last_packet_reduction_len = 100;
const size_t kLastFragmentSize =
limits.max_payload_len - limits.last_packet_reduction_len;
- rtc::Buffer nalus[] = {GenerateNalUnit(/*size=*/2),
- GenerateNalUnit(/*size=*/2),
- GenerateNalUnit(/*size=*/kLastFragmentSize)};
+ rtc::Buffer nalus[] = {
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/3),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/3),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/kLastFragmentSize)};
rtc::Buffer frame = CreateFrame(nalus);
RtpPacketizerH265 packetizer(frame, limits);
@@ -269,9 +387,9 @@
ASSERT_THAT(packets, SizeIs(2));
// Expect 1st packet is aggregate of 1st two fragments.
EXPECT_THAT(packets[0].payload(),
- ElementsAre(96, 2, //
- 0, 2, nalus[0][0], nalus[0][1], //
- 0, 2, nalus[1][0], nalus[1][1]));
+ ElementsAre(97, 2, //
+ 0, 3, nalus[0][0], nalus[0][1], nalus[0][2], //
+ 0, 3, nalus[1][0], nalus[1][1], nalus[1][2]));
// Expect 2nd packet is single nalu.
EXPECT_THAT(packets[1].payload(), ElementsAreArray(nalus[2]));
}
@@ -281,9 +399,19 @@
limits.max_payload_len = 1000;
const size_t kLastFragmentSize =
limits.max_payload_len - 3 * kH265LengthFieldSizeBytes - 4;
- rtc::Buffer nalus[] = {GenerateNalUnit(/*size=*/2),
- GenerateNalUnit(/*size=*/2),
- GenerateNalUnit(/*size=*/kLastFragmentSize)};
+ rtc::Buffer nalus[] = {
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/3),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/3),
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ /*size=*/kLastFragmentSize)};
rtc::Buffer frame = CreateFrame(nalus);
RtpPacketizerH265 packetizer(frame, limits);
@@ -292,9 +420,9 @@
ASSERT_THAT(packets, SizeIs(2));
// Expect 1st packet is aggregate of 1st two fragments.
EXPECT_THAT(packets[0].payload(),
- ElementsAre(96, 2, //
- 0, 2, nalus[0][0], nalus[0][1], //
- 0, 2, nalus[1][0], nalus[1][1]));
+ ElementsAre(97, 2, //
+ 0, 3, nalus[0][0], nalus[0][1], nalus[0][2], //
+ 0, 3, nalus[1][0], nalus[1][1], nalus[1][2]));
// Expect 2nd packet is single nalu.
EXPECT_THAT(packets[1].payload(), ElementsAreArray(nalus[2]));
}
@@ -323,7 +451,10 @@
std::vector<int> TestFu(size_t frame_payload_size,
const RtpPacketizer::PayloadSizeLimits& limits) {
rtc::Buffer nalu[] = {
- GenerateNalUnit(kH265NalHeaderSizeBytes + frame_payload_size)};
+ GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ kH265NalHeaderSizeBytes + frame_payload_size)};
rtc::Buffer frame = CreateFrame(nalu);
RtpPacketizerH265 packetizer(frame, limits);
@@ -433,7 +564,10 @@
// Generate nalus according to size specified in paramters
for (size_t index = 0; index < params.nalus.size(); index++) {
- nalus.push_back(GenerateNalUnit(params.nalus[index]));
+ nalus.push_back(GenerateNalUnit({.nal_unit_type = H265::NaluType::kIdrNLp,
+ .nuh_layer_id = 32,
+ .nuh_temporal_id_plus1 = 2},
+ params.nalus[index]));
}
rtc::Buffer frame = CreateFrame(nalus);
@@ -462,9 +596,9 @@
uint8_t fu_header = 0;
fu_header |= (expected_packet.first_fragment ? kH265SBitMask : 0);
fu_header |= (expected_packet.last_fragment ? kH265EBitMask : 0);
- fu_header |= H265::NaluType::kTrailR;
+ fu_header |= H265::NaluType::kIdrNLp;
EXPECT_THAT(packets[i].payload().subview(0, kFuHeaderSizeBytes),
- ElementsAre(98, 2, fu_header));
+ ElementsAre(99, 2, fu_header));
EXPECT_THAT(packets[i].payload().subview(kFuHeaderSizeBytes),
ElementsAreArray(nalus[expected_packet.nalu_index].data() +
kH265NalHeaderSizeBytes +