Revert "Revert of Use sps and pps to determine decodability of H.264 frames. (patchset #4 id:60001 of https://codereview.webrtc.org/2341713002/ )"
This reverts commit 3cdfcd88a14449a9b116cb6149e1348d3a1e4cb2.
NOPRESUBMIT=true
BUG=webrtc:6208
Review-Url: https://codereview.webrtc.org/2385143002
Cr-Commit-Position: refs/heads/master@{#14551}
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
index 19467ca..81dcb8b 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
@@ -484,6 +484,8 @@
parsed_payload->type.Video.width = sps->width;
parsed_payload->type.Video.height = sps->height;
nalu.sps_id = sps->id;
+ } else {
+ LOG(LS_WARNING) << "Failed to parse SPS id from SPS slice.";
}
parsed_payload->frame_type = kVideoFrameKey;
break;
@@ -502,18 +504,32 @@
}
break;
}
- case H264::NaluType::kSei:
- FALLTHROUGH();
case H264::NaluType::kIdr:
parsed_payload->frame_type = kVideoFrameKey;
FALLTHROUGH();
- default: {
+ case H264::NaluType::kSlice: {
rtc::Optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice(
&payload_data[start_offset], end_offset - start_offset);
- if (pps_id)
+ if (pps_id) {
nalu.pps_id = *pps_id;
+ } else {
+ LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: "
+ << static_cast<int>(nalu.type);
+ }
break;
}
+ // Slices below don't contain SPS or PPS ids.
+ case H264::NaluType::kAud:
+ case H264::NaluType::kEndOfSequence:
+ case H264::NaluType::kEndOfStream:
+ case H264::NaluType::kFiller:
+ break;
+ case H264::NaluType::kSei:
+ parsed_payload->frame_type = kVideoFrameKey;
+ break;
+ case H264::NaluType::kStapA:
+ case H264::NaluType::kFuA:
+ RTC_NOTREACHED();
}
RTPVideoHeaderH264* h264 = &parsed_payload->type.Video.codecHeader.H264;
if (h264->nalus_length == kMaxNalusPerPacket) {
@@ -547,8 +563,13 @@
length_ -= kNalHeaderSize;
rtc::Optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice(
payload_data + 2 * kNalHeaderSize, length_ - kNalHeaderSize);
- if (pps_id)
+ if (pps_id) {
nalu.pps_id = *pps_id;
+ } else {
+ LOG(LS_WARNING) << "Failed to parse PPS from first fragment of FU-A NAL "
+ "unit with original type: "
+ << static_cast<int>(nalu.type);
+ }
uint8_t original_nal_header = fnri | original_nal_type;
modified_buffer_.reset(new rtc::Buffer());
modified_buffer_->AppendData(payload_data + kNalHeaderSize, length_);
@@ -570,8 +591,10 @@
RTPVideoHeaderH264* h264 = &parsed_payload->type.Video.codecHeader.H264;
h264->packetization_type = kH264FuA;
h264->nalu_type = original_nal_type;
- h264->nalus[h264->nalus_length] = nalu;
- h264->nalus_length = 1;
+ if (first_fragment) {
+ h264->nalus[h264->nalus_length] = nalu;
+ h264->nalus_length = 1;
+ }
return true;
}
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc
index 81ff499..894415d 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc
@@ -771,10 +771,8 @@
const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264;
EXPECT_EQ(kH264FuA, h264.packetization_type);
EXPECT_EQ(kIdr, h264.nalu_type);
- ASSERT_EQ(1u, h264.nalus_length);
- EXPECT_EQ(static_cast<H264::NaluType>(kIdr), h264.nalus[0].type);
- EXPECT_EQ(-1, h264.nalus[0].sps_id);
- EXPECT_EQ(-1, h264.nalus[0].pps_id);
+ // NALU info is only expected for the first FU-A packet.
+ EXPECT_EQ(0u, h264.nalus_length);
}
payload = RtpDepacketizer::ParsedPayload();
@@ -787,10 +785,8 @@
const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264;
EXPECT_EQ(kH264FuA, h264.packetization_type);
EXPECT_EQ(kIdr, h264.nalu_type);
- ASSERT_EQ(1u, h264.nalus_length);
- EXPECT_EQ(static_cast<H264::NaluType>(kIdr), h264.nalus[0].type);
- EXPECT_EQ(-1, h264.nalus[0].sps_id);
- EXPECT_EQ(-1, h264.nalus[0].pps_id);
+ // NALU info is only expected for the first FU-A packet.
+ ASSERT_EQ(0u, h264.nalus_length);
}
}
@@ -825,4 +821,20 @@
EXPECT_TRUE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
}
+TEST_F(RtpDepacketizerH264Test, TestSeiPacket) {
+ const uint8_t kPayload[] = {
+ kSei, // F=0, NRI=0, Type=6.
+ 0x03, 0x03, 0x03, 0x03 // Payload.
+ };
+ RtpDepacketizer::ParsedPayload payload;
+ ASSERT_TRUE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
+ const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264;
+ EXPECT_EQ(kH264SingleNalu, h264.packetization_type);
+ EXPECT_EQ(kSei, h264.nalu_type);
+ ASSERT_EQ(1u, h264.nalus_length);
+ EXPECT_EQ(static_cast<H264::NaluType>(kSei), h264.nalus[0].type);
+ EXPECT_EQ(-1, h264.nalus[0].sps_id);
+ EXPECT_EQ(-1, h264.nalus[0].pps_id);
+}
+
} // namespace webrtc
diff --git a/webrtc/modules/video_coding/decoding_state.cc b/webrtc/modules/video_coding/decoding_state.cc
index 89be9b6..2295552 100644
--- a/webrtc/modules/video_coding/decoding_state.cc
+++ b/webrtc/modules/video_coding/decoding_state.cc
@@ -10,6 +10,8 @@
#include "webrtc/modules/video_coding/decoding_state.h"
+#include "webrtc/base/logging.h"
+#include "webrtc/common_video/h264/h264_common.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/modules/video_coding/frame_buffer.h"
#include "webrtc/modules/video_coding/jitter_buffer_common.h"
@@ -40,6 +42,8 @@
full_sync_ = true;
in_initial_state_ = true;
memset(frame_decoded_, 0, sizeof(frame_decoded_));
+ received_sps_.clear();
+ received_pps_.clear();
}
uint32_t VCMDecodingState::time_stamp() const {
@@ -74,6 +78,24 @@
temporal_id_ = frame->TemporalId();
tl0_pic_id_ = frame->Tl0PicId();
+ for (const NaluInfo& nalu : frame->GetNaluInfos()) {
+ if (nalu.type == H264::NaluType::kPps) {
+ if (nalu.pps_id < 0) {
+ LOG(LS_WARNING) << "Received pps without pps id.";
+ } else if (nalu.sps_id < 0) {
+ LOG(LS_WARNING) << "Received pps without sps id.";
+ } else {
+ received_pps_[nalu.pps_id] = nalu.sps_id;
+ }
+ } else if (nalu.type == H264::NaluType::kSps) {
+ if (nalu.sps_id < 0) {
+ LOG(LS_WARNING) << "Received sps without sps id.";
+ } else {
+ received_sps_.insert(nalu.sps_id);
+ }
+ }
+ }
+
if (UsingFlexibleMode(frame)) {
uint16_t frame_index = picture_id_ % kFrameDecodedLength;
if (in_initial_state_) {
@@ -106,6 +128,8 @@
in_initial_state_ = state.in_initial_state_;
frame_decoded_cleared_to_ = state.frame_decoded_cleared_to_;
memcpy(frame_decoded_, state.frame_decoded_, sizeof(frame_decoded_));
+ received_sps_ = state.received_sps_;
+ received_pps_ = state.received_pps_;
}
bool VCMDecodingState::UpdateEmptyFrame(const VCMFrameBuffer* frame) {
@@ -183,8 +207,10 @@
// A key frame is always considered continuous as it doesn't refer to any
// frames and therefore won't introduce any errors even if prior frames are
// missing.
- if (frame->FrameType() == kVideoFrameKey)
+ if (frame->FrameType() == kVideoFrameKey &&
+ HaveSpsAndPps(frame->GetNaluInfos())) {
return true;
+ }
// When in the initial state we always require a key frame to start decoding.
if (in_initial_state_)
return false;
@@ -205,7 +231,8 @@
return ContinuousPictureId(frame->PictureId());
}
} else {
- return ContinuousSeqNum(static_cast<uint16_t>(frame->GetLowSeqNum()));
+ return ContinuousSeqNum(static_cast<uint16_t>(frame->GetLowSeqNum())) &&
+ HaveSpsAndPps(frame->GetNaluInfos());
}
}
@@ -282,4 +309,51 @@
return diff > kFrameDecodedLength / 2;
}
+bool VCMDecodingState::HaveSpsAndPps(const std::vector<NaluInfo>& nalus) const {
+ std::set<int> new_sps;
+ std::map<int, int> new_pps;
+ for (const NaluInfo& nalu : nalus) {
+ // Check if this nalu actually contains sps/pps information or dependencies.
+ if (nalu.sps_id == -1 && nalu.pps_id == -1)
+ continue;
+ switch (nalu.type) {
+ case H264::NaluType::kPps:
+ if (nalu.pps_id < 0) {
+ LOG(LS_WARNING) << "Received pps without pps id.";
+ } else if (nalu.sps_id < 0) {
+ LOG(LS_WARNING) << "Received pps without sps id.";
+ } else {
+ new_pps[nalu.pps_id] = nalu.sps_id;
+ }
+ break;
+ case H264::NaluType::kSps:
+ if (nalu.sps_id < 0) {
+ LOG(LS_WARNING) << "Received sps without sps id.";
+ } else {
+ new_sps.insert(nalu.sps_id);
+ }
+ break;
+ default: {
+ int needed_sps = -1;
+ auto pps_it = new_pps.find(nalu.pps_id);
+ if (pps_it != new_pps.end()) {
+ needed_sps = pps_it->second;
+ } else {
+ auto pps_it2 = received_pps_.find(nalu.pps_id);
+ if (pps_it2 == received_pps_.end()) {
+ return false;
+ }
+ needed_sps = pps_it2->second;
+ }
+ if (new_sps.find(needed_sps) == new_sps.end() &&
+ received_sps_.find(needed_sps) == received_sps_.end()) {
+ return false;
+ }
+ break;
+ }
+ }
+ }
+ return true;
+}
+
} // namespace webrtc
diff --git a/webrtc/modules/video_coding/decoding_state.h b/webrtc/modules/video_coding/decoding_state.h
index f4ea8ae..c734080 100644
--- a/webrtc/modules/video_coding/decoding_state.h
+++ b/webrtc/modules/video_coding/decoding_state.h
@@ -11,11 +11,16 @@
#ifndef WEBRTC_MODULES_VIDEO_CODING_DECODING_STATE_H_
#define WEBRTC_MODULES_VIDEO_CODING_DECODING_STATE_H_
+#include <map>
+#include <set>
+#include <vector>
+
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations
+struct NaluInfo;
class VCMFrameBuffer;
class VCMPacket;
@@ -61,6 +66,7 @@
bool UsingPictureId(const VCMFrameBuffer* frame) const;
bool UsingFlexibleMode(const VCMFrameBuffer* frame) const;
bool AheadOfFramesDecodedClearedTo(uint16_t index) const;
+ bool HaveSpsAndPps(const std::vector<NaluInfo>& nalus) const;
// Keep state of last decoded frame.
// TODO(mikhal/stefan): create designated classes to handle these types.
@@ -75,6 +81,8 @@
// Used to check references in flexible mode.
bool frame_decoded_[kFrameDecodedLength];
uint16_t frame_decoded_cleared_to_;
+ std::set<int> received_sps_;
+ std::map<int, int> received_pps_;
};
} // namespace webrtc
diff --git a/webrtc/modules/video_coding/frame_buffer.cc b/webrtc/modules/video_coding/frame_buffer.cc
index b332a00c..bb8ec93 100644
--- a/webrtc/modules/video_coding/frame_buffer.cc
+++ b/webrtc/modules/video_coding/frame_buffer.cc
@@ -66,6 +66,10 @@
return _sessionInfo.NonReference();
}
+std::vector<NaluInfo> VCMFrameBuffer::GetNaluInfos() const {
+ return _sessionInfo.GetNaluInfos();
+}
+
void VCMFrameBuffer::SetGofInfo(const GofInfoVP9& gof_info, size_t idx) {
_sessionInfo.SetGofInfo(gof_info, idx);
// TODO(asapersson): Consider adding hdr->VP9.ref_picture_id for testing.
diff --git a/webrtc/modules/video_coding/frame_buffer.h b/webrtc/modules/video_coding/frame_buffer.h
index f5a707e..95c5985 100644
--- a/webrtc/modules/video_coding/frame_buffer.h
+++ b/webrtc/modules/video_coding/frame_buffer.h
@@ -11,6 +11,8 @@
#ifndef WEBRTC_MODULES_VIDEO_CODING_FRAME_BUFFER_H_
#define WEBRTC_MODULES_VIDEO_CODING_FRAME_BUFFER_H_
+#include <vector>
+
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/modules/video_coding/include/video_coding.h"
#include "webrtc/modules/video_coding/encoded_frame.h"
@@ -61,6 +63,8 @@
int Tl0PicId() const;
bool NonReference() const;
+ std::vector<NaluInfo> GetNaluInfos() const;
+
void SetGofInfo(const GofInfoVP9& gof_info, size_t idx);
// Increments a counter to keep track of the number of packets of this frame
diff --git a/webrtc/modules/video_coding/jitter_buffer_unittest.cc b/webrtc/modules/video_coding/jitter_buffer_unittest.cc
index f2e5f10..425a4e9 100644
--- a/webrtc/modules/video_coding/jitter_buffer_unittest.cc
+++ b/webrtc/modules/video_coding/jitter_buffer_unittest.cc
@@ -12,7 +12,9 @@
#include <list>
#include <memory>
+#include <vector>
+#include "webrtc/common_video/h264/h264_common.h"
#include "webrtc/modules/video_coding/frame_buffer.h"
#include "webrtc/modules/video_coding/jitter_buffer.h"
#include "webrtc/modules/video_coding/media_opt_util.h"
@@ -1160,6 +1162,89 @@
jitter_buffer_->ReleaseFrame(frame_out);
}
+TEST_F(TestBasicJitterBuffer, SpsAndPpsHandling) {
+ jitter_buffer_->SetDecodeErrorMode(kNoErrors);
+
+ packet_->timestamp = timestamp_;
+ packet_->frameType = kVideoFrameKey;
+ packet_->isFirstPacket = true;
+ packet_->markerBit = true;
+ packet_->codec = kVideoCodecH264;
+ packet_->video_header.codec = kRtpVideoH264;
+ packet_->video_header.codecHeader.H264.nalu_type = H264::NaluType::kIdr;
+ packet_->video_header.codecHeader.H264.nalus[0].type = H264::NaluType::kIdr;
+ packet_->video_header.codecHeader.H264.nalus[0].sps_id = -1;
+ packet_->video_header.codecHeader.H264.nalus[0].pps_id = 0;
+ packet_->video_header.codecHeader.H264.nalus_length = 1;
+ bool retransmitted = false;
+ EXPECT_EQ(kCompleteSession,
+ jitter_buffer_->InsertPacket(*packet_, &retransmitted));
+ // Not decodable since sps and pps are missing.
+ EXPECT_EQ(nullptr, DecodeCompleteFrame());
+
+ timestamp_ += 3000;
+ packet_->timestamp = timestamp_;
+ ++seq_num_;
+ packet_->seqNum = seq_num_;
+ packet_->frameType = kVideoFrameKey;
+ packet_->isFirstPacket = true;
+ packet_->markerBit = false;
+ packet_->codec = kVideoCodecH264;
+ packet_->video_header.codec = kRtpVideoH264;
+ packet_->video_header.codecHeader.H264.nalu_type = H264::NaluType::kStapA;
+ packet_->video_header.codecHeader.H264.nalus[0].type = H264::NaluType::kSps;
+ packet_->video_header.codecHeader.H264.nalus[0].sps_id = 0;
+ packet_->video_header.codecHeader.H264.nalus[0].pps_id = -1;
+ packet_->video_header.codecHeader.H264.nalus[1].type = H264::NaluType::kPps;
+ packet_->video_header.codecHeader.H264.nalus[1].sps_id = 0;
+ packet_->video_header.codecHeader.H264.nalus[1].pps_id = 0;
+ packet_->video_header.codecHeader.H264.nalus_length = 2;
+ // Not complete since the marker bit hasn't been received.
+ EXPECT_EQ(kIncomplete,
+ jitter_buffer_->InsertPacket(*packet_, &retransmitted));
+
+ ++seq_num_;
+ packet_->seqNum = seq_num_;
+ packet_->frameType = kVideoFrameKey;
+ packet_->isFirstPacket = false;
+ packet_->markerBit = true;
+ packet_->codec = kVideoCodecH264;
+ packet_->video_header.codec = kRtpVideoH264;
+ packet_->video_header.codecHeader.H264.nalu_type = H264::NaluType::kIdr;
+ packet_->video_header.codecHeader.H264.nalus[0].type = H264::NaluType::kIdr;
+ packet_->video_header.codecHeader.H264.nalus[0].sps_id = -1;
+ packet_->video_header.codecHeader.H264.nalus[0].pps_id = 0;
+ packet_->video_header.codecHeader.H264.nalus_length = 1;
+ // Complete and decodable since the pps and sps are received in the first
+ // packet of this frame.
+ EXPECT_EQ(kCompleteSession,
+ jitter_buffer_->InsertPacket(*packet_, &retransmitted));
+ VCMEncodedFrame* frame_out = DecodeCompleteFrame();
+ ASSERT_NE(nullptr, frame_out);
+ jitter_buffer_->ReleaseFrame(frame_out);
+
+ timestamp_ += 3000;
+ packet_->timestamp = timestamp_;
+ ++seq_num_;
+ packet_->seqNum = seq_num_;
+ packet_->frameType = kVideoFrameDelta;
+ packet_->isFirstPacket = true;
+ packet_->markerBit = true;
+ packet_->codec = kVideoCodecH264;
+ packet_->video_header.codec = kRtpVideoH264;
+ packet_->video_header.codecHeader.H264.nalu_type = H264::NaluType::kSlice;
+ packet_->video_header.codecHeader.H264.nalus[0].type = H264::NaluType::kSlice;
+ packet_->video_header.codecHeader.H264.nalus[0].sps_id = -1;
+ packet_->video_header.codecHeader.H264.nalus[0].pps_id = 0;
+ packet_->video_header.codecHeader.H264.nalus_length = 1;
+ // Complete and decodable since sps, pps and key frame has been received.
+ EXPECT_EQ(kCompleteSession,
+ jitter_buffer_->InsertPacket(*packet_, &retransmitted));
+ frame_out = DecodeCompleteFrame();
+ ASSERT_NE(nullptr, frame_out);
+ jitter_buffer_->ReleaseFrame(frame_out);
+}
+
// Test threshold conditions of decodable state.
TEST_F(TestBasicJitterBuffer, PacketLossWithSelectiveErrorsThresholdCheck) {
jitter_buffer_->SetDecodeErrorMode(kSelectiveErrors);
@@ -2633,5 +2718,4 @@
nack_list = jitter_buffer_->GetNackList(&extended);
EXPECT_EQ(0u, nack_list.size());
}
-
} // namespace webrtc
diff --git a/webrtc/modules/video_coding/session_info.cc b/webrtc/modules/video_coding/session_info.cc
index 8f136ee..39c1c0b 100644
--- a/webrtc/modules/video_coding/session_info.cc
+++ b/webrtc/modules/video_coding/session_info.cc
@@ -111,6 +111,19 @@
return packets_.front().video_header.codecHeader.VP8.nonReference;
}
+std::vector<NaluInfo> VCMSessionInfo::GetNaluInfos() const {
+ if (packets_.empty() || packets_.front().video_header.codec != kRtpVideoH264)
+ return std::vector<NaluInfo>();
+ std::vector<NaluInfo> nalu_infos;
+ for (const VCMPacket& packet : packets_) {
+ for (size_t i = 0; i < packet.video_header.codecHeader.H264.nalus_length;
+ ++i) {
+ nalu_infos.push_back(packet.video_header.codecHeader.H264.nalus[i]);
+ }
+ }
+ return nalu_infos;
+}
+
void VCMSessionInfo::SetGofInfo(const GofInfoVP9& gof_info, size_t idx) {
if (packets_.empty() || packets_.front().video_header.codec != kRtpVideoVp9 ||
packets_.front().video_header.codecHeader.VP9.flexible_mode) {
diff --git a/webrtc/modules/video_coding/session_info.h b/webrtc/modules/video_coding/session_info.h
index c2de1ac..a4eb405 100644
--- a/webrtc/modules/video_coding/session_info.h
+++ b/webrtc/modules/video_coding/session_info.h
@@ -12,6 +12,7 @@
#define WEBRTC_MODULES_VIDEO_CODING_SESSION_INFO_H_
#include <list>
+#include <vector>
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/modules/video_coding/include/video_coding.h"
@@ -81,6 +82,8 @@
int Tl0PicId() const;
bool NonReference() const;
+ std::vector<NaluInfo> GetNaluInfos() const;
+
void SetGofInfo(const GofInfoVP9& gof_info, size_t idx);
// The number of packets discarded because the decoder can't make use of