Set marker bit on last encoded spatial layer.
In order to handle per-layer frame dropping both VP9 encoder wrapper
and RTP packetizer were modified.
- Encoder wrapper buffers last encoded frame and passes it to
packetizer after frame of next layer is encoded or encoding of
superframe is finished.
- Encoder wrapper sets end_of_superframe flag on last encoded frame of
superframe before passing it to packetizer.
- If end_of_superframe is True then packetizer sets marker bit on last
packet of frame.
Bug: webrtc:9066
Change-Id: I1d45319fbe6bc63d01721ea67bfb7440d4c29275
Reviewed-on: https://webrtc-review.googlesource.com/65540
Commit-Queue: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Åsa Persson <asapersson@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22722}
diff --git a/modules/rtp_rtcp/source/rtp_format_vp9.cc b/modules/rtp_rtcp/source/rtp_format_vp9.cc
index bb3edfced..ad0a7cf 100644
--- a/modules/rtp_rtcp/source/rtp_format_vp9.cc
+++ b/modules/rtp_rtcp/source/rtp_format_vp9.cc
@@ -576,9 +576,13 @@
if (!WriteHeaderAndPayload(packet_info, packet, packets_.empty())) {
return false;
}
- packet->SetMarker(packets_.empty() &&
- (hdr_.spatial_idx == kNoSpatialIdx ||
- hdr_.spatial_idx == hdr_.num_spatial_layers - 1));
+
+ // Ensure end_of_superframe is always set on top spatial layer when it is not
+ // dropped.
+ RTC_DCHECK(hdr_.spatial_idx < hdr_.num_spatial_layers - 1 ||
+ hdr_.end_of_superframe);
+
+ packet->SetMarker(packets_.empty() && hdr_.end_of_superframe);
return true;
}
diff --git a/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc b/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
index c8987df..d9083fb 100644
--- a/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
@@ -478,7 +478,7 @@
CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
}
-TEST_F(RtpPacketizerVp9Test, TestOnlyHighestSpatialLayerSetMarker) {
+TEST_F(RtpPacketizerVp9Test, EndOfSuperframeSetsSetMarker) {
const size_t kFrameSize = 10;
const size_t kPacketSize = 8;
const size_t kLastPacketReductionLen = 0;
@@ -492,32 +492,21 @@
RtpPacketToSend packet(kNoExtensions);
- vp9_header.spatial_idx = 0;
- RtpPacketizerVp9 packetizer0(vp9_header, kPacketSize,
- kLastPacketReductionLen);
- packetizer0.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation);
- ASSERT_TRUE(packetizer0.NextPacket(&packet));
- EXPECT_FALSE(packet.Marker());
- ASSERT_TRUE(packetizer0.NextPacket(&packet));
- EXPECT_FALSE(packet.Marker());
-
- vp9_header.spatial_idx = 1;
- RtpPacketizerVp9 packetizer1(vp9_header, kPacketSize,
- kLastPacketReductionLen);
- packetizer1.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation);
- ASSERT_TRUE(packetizer1.NextPacket(&packet));
- EXPECT_FALSE(packet.Marker());
- ASSERT_TRUE(packetizer1.NextPacket(&packet));
- EXPECT_FALSE(packet.Marker());
-
- vp9_header.spatial_idx = 2;
- RtpPacketizerVp9 packetizer2(vp9_header, kPacketSize,
- kLastPacketReductionLen);
- packetizer2.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation);
- ASSERT_TRUE(packetizer2.NextPacket(&packet));
- EXPECT_FALSE(packet.Marker());
- ASSERT_TRUE(packetizer2.NextPacket(&packet));
- EXPECT_TRUE(packet.Marker());
+ // Drop top layer and ensure that marker bit is set on last encoded layer.
+ for (size_t spatial_idx = 0; spatial_idx < vp9_header.num_spatial_layers - 1;
+ ++spatial_idx) {
+ const bool end_of_superframe =
+ spatial_idx + 1 == vp9_header.num_spatial_layers - 1;
+ vp9_header.spatial_idx = spatial_idx;
+ vp9_header.end_of_superframe = end_of_superframe;
+ RtpPacketizerVp9 packetizer(vp9_header, kPacketSize,
+ kLastPacketReductionLen);
+ packetizer.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation);
+ ASSERT_TRUE(packetizer.NextPacket(&packet));
+ EXPECT_FALSE(packet.Marker());
+ ASSERT_TRUE(packetizer.NextPacket(&packet));
+ EXPECT_EQ(packet.Marker(), end_of_superframe);
+ }
}
TEST_F(RtpPacketizerVp9Test, TestGeneratesMinimumNumberOfPackets) {
diff --git a/modules/video_coding/codecs/vp9/include/vp9_globals.h b/modules/video_coding/codecs/vp9/include/vp9_globals.h
index 91507ed..f24ab3e 100644
--- a/modules/video_coding/codecs/vp9/include/vp9_globals.h
+++ b/modules/video_coding/codecs/vp9/include/vp9_globals.h
@@ -172,6 +172,7 @@
gof_idx = kNoGofIdx;
num_ref_pics = 0;
num_spatial_layers = 1;
+ end_of_superframe = true;
}
bool inter_pic_predicted; // This layer frame is dependent on previously
@@ -208,6 +209,8 @@
uint16_t width[kMaxVp9NumberOfSpatialLayers];
uint16_t height[kMaxVp9NumberOfSpatialLayers];
GofInfoVP9 gof;
+
+ bool end_of_superframe; // This frame is last frame in superframe.
};
} // namespace webrtc
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index 66ceabe..b16419b 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -262,4 +262,60 @@
}
}
+TEST_F(TestVp9Impl, EndOfSuperframe) {
+ const size_t num_spatial_layers = 2;
+ const size_t num_temporal_layers = 1;
+ codec_settings_.VP9()->numberOfSpatialLayers =
+ static_cast<unsigned char>(num_spatial_layers);
+ codec_settings_.VP9()->numberOfTemporalLayers =
+ static_cast<unsigned char>(num_temporal_layers);
+
+ std::vector<SpatialLayer> layers =
+ GetSvcConfig(codec_settings_.width, codec_settings_.height,
+ num_spatial_layers, num_temporal_layers);
+ for (size_t i = 0; i < layers.size(); ++i) {
+ codec_settings_.spatialLayers[i] = layers[i];
+ }
+
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
+ 0 /* max payload size (unused) */));
+
+ // Encode both base and upper layers. Check that end-of-superframe flag is
+ // set on upper layer frame but not on base layer frame.
+ BitrateAllocation bitrate_allocation;
+ bitrate_allocation.SetBitrate(0, 0, layers[0].targetBitrate * 1000);
+ bitrate_allocation.SetBitrate(1, 0, layers[1].targetBitrate * 1000);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->SetRateAllocation(bitrate_allocation,
+ codec_settings_.maxFramerate));
+ SetWaitForEncodedFramesThreshold(2);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
+
+ std::vector<EncodedImage> frames;
+ std::vector<CodecSpecificInfo> codec_specific;
+ ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
+ EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.end_of_superframe);
+ EXPECT_TRUE(codec_specific[1].codecSpecific.VP9.end_of_superframe);
+
+ // Encode only base layer. Check that end-of-superframe flag is
+ // set on base layer frame.
+ bitrate_allocation.SetBitrate(1, 0, 0);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->SetRateAllocation(bitrate_allocation,
+ codec_settings_.maxFramerate));
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
+ 0 /* max payload size (unused) */));
+
+ SetWaitForEncodedFramesThreshold(1);
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
+
+ ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
+ EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0);
+ EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.end_of_superframe);
+}
+
} // namespace webrtc
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index e329536..c1f7d5d 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -586,6 +586,9 @@
}
timestamp_ += duration;
+ const bool end_of_superframe = true;
+ DeliverBufferedFrame(end_of_superframe);
+
return WEBRTC_VIDEO_CODEC_OK;
}
@@ -688,6 +691,14 @@
int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
RTC_DCHECK_EQ(pkt->kind, VPX_CODEC_CX_FRAME_PKT);
+ if (pkt->data.frame.sz == 0) {
+ // Ignore dropped frame.
+ return WEBRTC_VIDEO_CODEC_OK;
+ }
+
+ const bool end_of_superframe = false;
+ DeliverBufferedFrame(end_of_superframe);
+
if (pkt->data.frame.sz > encoded_image_._size) {
delete[] encoded_image_._buffer;
encoded_image_._size = pkt->data.frame.sz;
@@ -696,15 +707,6 @@
memcpy(encoded_image_._buffer, pkt->data.frame.buf, pkt->data.frame.sz);
encoded_image_._length = pkt->data.frame.sz;
- // No data partitioning in VP9, so 1 partition only.
- int part_idx = 0;
- RTPFragmentationHeader frag_info;
- frag_info.VerifyAndAllocateFragmentationHeader(1);
- frag_info.fragmentationOffset[part_idx] = 0;
- frag_info.fragmentationLength[part_idx] = pkt->data.frame.sz;
- frag_info.fragmentationPlType[part_idx] = 0;
- frag_info.fragmentationTimeDiff[part_idx] = 0;
-
vpx_svc_layer_id_t layer_id = {0};
vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
if (is_flexible_mode_ && codec_.mode == kScreensharing)
@@ -720,32 +722,47 @@
}
RTC_DCHECK_LE(encoded_image_._length, encoded_image_._size);
- CodecSpecificInfo codec_specific;
- PopulateCodecSpecific(&codec_specific, *pkt, input_image_->timestamp());
+ memset(&codec_specific_, 0, sizeof(codec_specific_));
+ PopulateCodecSpecific(&codec_specific_, *pkt, input_image_->timestamp());
- if (encoded_image_._length > 0) {
- TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length);
- encoded_image_._timeStamp = input_image_->timestamp();
- encoded_image_.capture_time_ms_ = input_image_->render_time_ms();
- encoded_image_.rotation_ = input_image_->rotation();
- encoded_image_.content_type_ = (codec_.mode == kScreensharing)
- ? VideoContentType::SCREENSHARE
- : VideoContentType::UNSPECIFIED;
- encoded_image_._encodedHeight =
- pkt->data.frame.height[layer_id.spatial_layer_id];
- encoded_image_._encodedWidth =
- pkt->data.frame.width[layer_id.spatial_layer_id];
- encoded_image_.timing_.flags = TimingFrameFlags::kInvalid;
- int qp = -1;
- vpx_codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp);
- encoded_image_.qp_ = qp;
+ TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length);
+ encoded_image_._timeStamp = input_image_->timestamp();
+ encoded_image_.capture_time_ms_ = input_image_->render_time_ms();
+ encoded_image_.rotation_ = input_image_->rotation();
+ encoded_image_.content_type_ = (codec_.mode == kScreensharing)
+ ? VideoContentType::SCREENSHARE
+ : VideoContentType::UNSPECIFIED;
+ encoded_image_._encodedHeight =
+ pkt->data.frame.height[layer_id.spatial_layer_id];
+ encoded_image_._encodedWidth =
+ pkt->data.frame.width[layer_id.spatial_layer_id];
+ encoded_image_.timing_.flags = TimingFrameFlags::kInvalid;
+ int qp = -1;
+ vpx_codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp);
+ encoded_image_.qp_ = qp;
- encoded_complete_callback_->OnEncodedImage(encoded_image_, &codec_specific,
- &frag_info);
- }
return WEBRTC_VIDEO_CODEC_OK;
}
+void VP9EncoderImpl::DeliverBufferedFrame(bool end_of_superframe) {
+ if (encoded_image_._length > 0) {
+ codec_specific_.codecSpecific.VP9.end_of_superframe = end_of_superframe;
+
+ // No data partitioning in VP9, so 1 partition only.
+ int part_idx = 0;
+ RTPFragmentationHeader frag_info;
+ frag_info.VerifyAndAllocateFragmentationHeader(1);
+ frag_info.fragmentationOffset[part_idx] = 0;
+ frag_info.fragmentationLength[part_idx] = encoded_image_._length;
+ frag_info.fragmentationPlType[part_idx] = 0;
+ frag_info.fragmentationTimeDiff[part_idx] = 0;
+
+ encoded_complete_callback_->OnEncodedImage(encoded_image_, &codec_specific_,
+ &frag_info);
+ encoded_image_._length = 0;
+ }
+}
+
vpx_svc_ref_frame_config VP9EncoderImpl::GenerateRefsAndFlags(
const SuperFrameRefSettings& settings) {
static const vpx_enc_frame_flags_t kAllFlags =
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h
index 37076f8..cdc8a83 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@@ -94,6 +94,8 @@
static void EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
void* user_data);
+ void DeliverBufferedFrame(bool end_of_superframe);
+
// Determine maximum target for Intra frames
//
// Input:
@@ -103,6 +105,7 @@
uint32_t MaxIntraTarget(uint32_t optimal_buffer_size);
EncodedImage encoded_image_;
+ CodecSpecificInfo codec_specific_;
EncodedImageCallback* encoded_complete_callback_;
VideoCodec codec_;
bool inited_;
diff --git a/modules/video_coding/include/video_codec_interface.h b/modules/video_coding/include/video_codec_interface.h
index b534f64..204098c 100644
--- a/modules/video_coding/include/video_codec_interface.h
+++ b/modules/video_coding/include/video_codec_interface.h
@@ -68,6 +68,8 @@
// Frame reference data.
uint8_t num_ref_pics;
uint8_t p_diff[kMaxVp9RefPics];
+
+ bool end_of_superframe;
};
struct CodecSpecificInfoGeneric {
diff --git a/video/payload_router.cc b/video/payload_router.cc
index f980bc4..b06908f 100644
--- a/video/payload_router.cc
+++ b/video/payload_router.cc
@@ -67,8 +67,11 @@
}
rtp->codecHeader.VP9.num_ref_pics = info->codecSpecific.VP9.num_ref_pics;
- for (int i = 0; i < info->codecSpecific.VP9.num_ref_pics; ++i)
+ for (int i = 0; i < info->codecSpecific.VP9.num_ref_pics; ++i) {
rtp->codecHeader.VP9.pid_diff[i] = info->codecSpecific.VP9.p_diff[i];
+ }
+ rtp->codecHeader.VP9.end_of_superframe =
+ info->codecSpecific.VP9.end_of_superframe;
return;
}
case kVideoCodecH264:
diff --git a/video/payload_router_unittest.cc b/video/payload_router_unittest.cc
index 42cafc1..af57442 100644
--- a/video/payload_router_unittest.cc
+++ b/video/payload_router_unittest.cc
@@ -352,6 +352,84 @@
payload_router.OnEncodedImage(encoded_image, &codec_info, nullptr).error);
}
+TEST(PayloadRouterTest, InfoMappedToRtpVideoHeader_Vp9) {
+ RtpPayloadState state;
+ state.picture_id = kPictureId;
+ state.tl0_pic_idx = kTl0PicIdx;
+ std::map<uint32_t, RtpPayloadState> states = {{kSsrc1, state}};
+
+ NiceMock<MockRtpRtcp> rtp;
+ std::vector<RtpRtcp*> modules = {&rtp};
+ PayloadRouter router(modules, {kSsrc1}, kPayloadType, states);
+ router.SetActive(true);
+
+ EncodedImage encoded_image;
+ encoded_image.rotation_ = kVideoRotation_90;
+ encoded_image.content_type_ = VideoContentType::SCREENSHARE;
+
+ CodecSpecificInfo codec_info;
+ memset(&codec_info, 0, sizeof(CodecSpecificInfo));
+ codec_info.codecType = kVideoCodecVP9;
+ codec_info.codecSpecific.VP9.num_spatial_layers = 3;
+ codec_info.codecSpecific.VP9.first_frame_in_picture = true;
+ codec_info.codecSpecific.VP9.spatial_idx = 0;
+ codec_info.codecSpecific.VP9.temporal_idx = 2;
+ codec_info.codecSpecific.VP9.end_of_superframe = false;
+
+ EXPECT_CALL(rtp, SendOutgoingData(_, _, _, _, _, _, nullptr, _, _))
+ .WillOnce(
+ Invoke([&codec_info](Unused, Unused, Unused, Unused, Unused, Unused,
+ Unused, const RTPVideoHeader* header, Unused) {
+ EXPECT_EQ(kVideoRotation_90, header->rotation);
+ EXPECT_EQ(VideoContentType::SCREENSHARE, header->content_type);
+ EXPECT_EQ(kRtpVideoVp9, header->codec);
+ EXPECT_EQ(kPictureId + 1, header->codecHeader.VP9.picture_id);
+ EXPECT_EQ(kTl0PicIdx, header->codecHeader.VP9.tl0_pic_idx);
+ EXPECT_EQ(header->codecHeader.VP9.temporal_idx,
+ codec_info.codecSpecific.VP9.temporal_idx);
+ EXPECT_EQ(header->codecHeader.VP9.spatial_idx,
+ codec_info.codecSpecific.VP9.spatial_idx);
+ EXPECT_EQ(header->codecHeader.VP9.num_spatial_layers,
+ codec_info.codecSpecific.VP9.num_spatial_layers);
+ EXPECT_EQ(header->codecHeader.VP9.end_of_superframe,
+ codec_info.codecSpecific.VP9.end_of_superframe);
+ return true;
+ }));
+ EXPECT_CALL(rtp, Sending()).WillOnce(Return(true));
+
+ EXPECT_EQ(EncodedImageCallback::Result::OK,
+ router.OnEncodedImage(encoded_image, &codec_info, nullptr).error);
+
+ // Next spatial layer.
+ codec_info.codecSpecific.VP9.first_frame_in_picture = false;
+ codec_info.codecSpecific.VP9.spatial_idx += 1;
+ codec_info.codecSpecific.VP9.end_of_superframe = true;
+
+ EXPECT_CALL(rtp, SendOutgoingData(_, _, _, _, _, _, nullptr, _, _))
+ .WillOnce(
+ Invoke([&codec_info](Unused, Unused, Unused, Unused, Unused, Unused,
+ Unused, const RTPVideoHeader* header, Unused) {
+ EXPECT_EQ(kVideoRotation_90, header->rotation);
+ EXPECT_EQ(VideoContentType::SCREENSHARE, header->content_type);
+ EXPECT_EQ(kRtpVideoVp9, header->codec);
+ EXPECT_EQ(kPictureId + 1, header->codecHeader.VP9.picture_id);
+ EXPECT_EQ(kTl0PicIdx, header->codecHeader.VP9.tl0_pic_idx);
+ EXPECT_EQ(header->codecHeader.VP9.temporal_idx,
+ codec_info.codecSpecific.VP9.temporal_idx);
+ EXPECT_EQ(header->codecHeader.VP9.spatial_idx,
+ codec_info.codecSpecific.VP9.spatial_idx);
+ EXPECT_EQ(header->codecHeader.VP9.num_spatial_layers,
+ codec_info.codecSpecific.VP9.num_spatial_layers);
+ EXPECT_EQ(header->codecHeader.VP9.end_of_superframe,
+ codec_info.codecSpecific.VP9.end_of_superframe);
+ return true;
+ }));
+ EXPECT_CALL(rtp, Sending()).WillOnce(Return(true));
+
+ EXPECT_EQ(EncodedImageCallback::Result::OK,
+ router.OnEncodedImage(encoded_image, &codec_info, nullptr).error);
+}
+
TEST(PayloadRouterTest, InfoMappedToRtpVideoHeader_H264) {
NiceMock<MockRtpRtcp> rtp1;
std::vector<RtpRtcp*> modules = {&rtp1};