Add H264 bitstream rewriting to limit frame reordering marker in header
The VUI part an SPS may specify max_num_reorder_frames and
max_dec_frame_buffering. These may cause a decoder to buffer a number
of frame prior allowing decode, leading to delays, even if no frames
using such references (ie B-frames) are sent.
Because of this we update any SPS block emitted by the encoder.
Also, a bunch of refactoring of H264-related code to reduce code
duplication.
BUG=
Review-Url: https://codereview.webrtc.org/1979443004
Cr-Commit-Position: refs/heads/master@{#13010}
diff --git a/webrtc/base/bitbuffer.cc b/webrtc/base/bitbuffer.cc
index 1aa245e..48a1d0c 100644
--- a/webrtc/base/bitbuffer.cc
+++ b/webrtc/base/bitbuffer.cc
@@ -293,4 +293,18 @@
return WriteBits(val_to_encode, CountBits(val_to_encode) * 2 - 1);
}
+bool BitBufferWriter::WriteSignedExponentialGolomb(int32_t val) {
+ if (val == 0) {
+ return WriteExponentialGolomb(0);
+ } else if (val > 0) {
+ uint32_t signed_val = val;
+ return WriteExponentialGolomb((signed_val * 2) - 1);
+ } else {
+ if (val == std::numeric_limits<int32_t>::min())
+ return false; // Not supported, would cause overflow.
+ uint32_t signed_val = -val;
+ return WriteExponentialGolomb(signed_val * 2);
+ }
+}
+
} // namespace rtc
diff --git a/webrtc/base/bitbuffer.h b/webrtc/base/bitbuffer.h
index 8ea044e..b2baaa9 100644
--- a/webrtc/base/bitbuffer.h
+++ b/webrtc/base/bitbuffer.h
@@ -109,6 +109,10 @@
// Writes the exponential golomb encoded version of the supplied value.
// Returns false if there isn't enough room left for the value.
bool WriteExponentialGolomb(uint32_t val);
+ // Writes the signed exponential golomb version of the supplied value.
+ // Signed exponential golomb values are just the unsigned values mapped to the
+ // sequence 0, 1, -1, 2, -2, etc. in order.
+ bool WriteSignedExponentialGolomb(int32_t val);
private:
// The buffer, as a writable array.
diff --git a/webrtc/base/buffer.h b/webrtc/base/buffer.h
index b8b8fc0..2bb241d 100644
--- a/webrtc/base/buffer.h
+++ b/webrtc/base/buffer.h
@@ -235,6 +235,13 @@
void AppendData(const BufferT& buf) { AppendData(buf.data(), buf.size()); }
+ template <typename U,
+ typename std::enable_if<
+ internal::BufferCompat<T, U>::value>::type* = nullptr>
+ void AppendData(const U& item) {
+ AppendData(&item, 1);
+ }
+
// Append at most |max_elements| to the end of the buffer, using the function
// |setter|, which should have the following signature:
// size_t setter(ArrayView<U> view)
diff --git a/webrtc/common_video/BUILD.gn b/webrtc/common_video/BUILD.gn
index 7474974..4714606 100644
--- a/webrtc/common_video/BUILD.gn
+++ b/webrtc/common_video/BUILD.gn
@@ -18,6 +18,14 @@
source_set("common_video") {
sources = [
"bitrate_adjuster.cc",
+ "h264/h264_common.cc",
+ "h264/h264_common.h",
+ "h264/pps_parser.cc",
+ "h264/pps_parser.h",
+ "h264/sps_parser.cc",
+ "h264/sps_parser.h",
+ "h264/sps_vui_rewriter.cc",
+ "h264/sps_vui_rewriter.h",
"i420_buffer_pool.cc",
"include/bitrate_adjuster.h",
"include/frame_callback.h",
diff --git a/webrtc/common_video/common_video.gyp b/webrtc/common_video/common_video.gyp
index 47ba3e5..e3b6fa4 100644
--- a/webrtc/common_video/common_video.gyp
+++ b/webrtc/common_video/common_video.gyp
@@ -53,6 +53,14 @@
],
'sources': [
'bitrate_adjuster.cc',
+ 'h264/sps_vui_rewriter.cc',
+ 'h264/sps_vui_rewriter.h',
+ 'h264/h264_common.cc',
+ 'h264/h264_common.h',
+ 'h264/pps_parser.cc',
+ 'h264/pps_parser.h',
+ 'h264/sps_parser.cc',
+ 'h264/sps_parser.h',
'i420_buffer_pool.cc',
'video_frame.cc',
'incoming_video_stream.cc',
diff --git a/webrtc/common_video/common_video_unittests.gyp b/webrtc/common_video/common_video_unittests.gyp
index 874a16f..cb6102f 100644
--- a/webrtc/common_video/common_video_unittests.gyp
+++ b/webrtc/common_video/common_video_unittests.gyp
@@ -21,6 +21,9 @@
],
'sources': [
'bitrate_adjuster_unittest.cc',
+ 'h264/pps_parser_unittest.cc',
+ 'h264/sps_parser_unittest.cc',
+ 'h264/sps_vui_rewriter_unittest.cc',
'i420_buffer_pool_unittest.cc',
'i420_video_frame_unittest.cc',
'libyuv/libyuv_unittest.cc',
diff --git a/webrtc/common_video/h264/h264_common.cc b/webrtc/common_video/h264/h264_common.cc
new file mode 100644
index 0000000..efdaded
--- /dev/null
+++ b/webrtc/common_video/h264/h264_common.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_video/h264/h264_common.h"
+
+namespace webrtc {
+namespace H264 {
+
+const uint8_t kNaluTypeMask = 0x1F;
+
+std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer,
+ size_t buffer_size) {
+ // This is sorta like Boyer-Moore, but with only the first optimization step:
+ // given a 3-byte sequence we're looking at, if the 3rd byte isn't 1 or 0,
+ // skip ahead to the next 3-byte sequence. 0s and 1s are relatively rare, so
+ // this will skip the majority of reads/checks.
+ RTC_CHECK_GE(buffer_size, kNaluShortStartSequenceSize);
+ std::vector<NaluIndex> sequences;
+ const size_t end = buffer_size - kNaluShortStartSequenceSize;
+ for (size_t i = 0; i < end;) {
+ if (buffer[i + 2] > 1) {
+ i += 3;
+ } else if (buffer[i + 2] == 1 && buffer[i + 1] == 0 && buffer[i] == 0) {
+ // We found a start sequence, now check if it was a 3 of 4 byte one.
+ NaluIndex index = {i, i + 3, 0};
+ if (index.start_offset > 0 && buffer[index.start_offset - 1] == 0)
+ --index.start_offset;
+
+ // Update length of previous entry.
+ auto it = sequences.rbegin();
+ if (it != sequences.rend())
+ it->payload_size = index.start_offset - it->payload_start_offset;
+
+ sequences.push_back(index);
+
+ i += 3;
+ } else {
+ ++i;
+ }
+ }
+
+ // Update length of last entry, if any.
+ auto it = sequences.rbegin();
+ if (it != sequences.rend())
+ it->payload_size = buffer_size - it->payload_start_offset;
+
+ return sequences;
+}
+
+NaluType ParseNaluType(uint8_t data) {
+ return static_cast<NaluType>(data & kNaluTypeMask);
+}
+
+std::unique_ptr<rtc::Buffer> ParseRbsp(const uint8_t* data, size_t length) {
+ std::unique_ptr<rtc::Buffer> rbsp_buffer(new rtc::Buffer());
+ const char* sps_bytes = reinterpret_cast<const char*>(data);
+ for (size_t i = 0; i < length;) {
+ // Be careful about over/underflow here. byte_length_ - 3 can underflow, and
+ // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_
+ // above, and that expression will produce the number of bytes left in
+ // the stream including the byte at i.
+ if (length - i >= 3 && data[i] == 0 && data[i + 1] == 0 &&
+ data[i + 2] == 3) {
+ // Two rbsp bytes + the emulation byte.
+ rbsp_buffer->AppendData(sps_bytes + i, 2);
+ i += 3;
+ } else {
+ // Single rbsp byte.
+ rbsp_buffer->AppendData(sps_bytes[i]);
+ ++i;
+ }
+ }
+ return rbsp_buffer;
+}
+
+void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination) {
+ static const uint8_t kZerosInStartSequence = 2;
+ static const uint8_t kEmulationByte = 0x03u;
+ size_t num_consecutive_zeros = 0;
+
+ for (size_t i = 0; i < length; ++i) {
+ uint8_t byte = bytes[i];
+ if (byte <= kEmulationByte &&
+ num_consecutive_zeros >= kZerosInStartSequence) {
+ // Need to escape.
+ destination->AppendData(kEmulationByte);
+ num_consecutive_zeros = 0;
+ }
+ destination->AppendData(byte);
+ if (byte == 0) {
+ ++num_consecutive_zeros;
+ } else {
+ num_consecutive_zeros = 0;
+ }
+ }
+}
+
+} // namespace H264
+} // namespace webrtc
diff --git a/webrtc/common_video/h264/h264_common.h b/webrtc/common_video/h264/h264_common.h
new file mode 100644
index 0000000..c5cbd55
--- /dev/null
+++ b/webrtc/common_video/h264/h264_common.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_COMMON_VIDEO_H264_H264_COMMON_H_
+#define WEBRTC_COMMON_VIDEO_H264_H264_COMMON_H_
+
+#include <memory>
+#include <vector>
+
+#include "webrtc/base/common.h"
+#include "webrtc/base/buffer.h"
+
+namespace webrtc {
+
+namespace H264 {
+// The size of a full NALU start sequence {0 0 0 1}, used for the first NALU
+// of an access unit, and for SPS and PPS blocks.
+const size_t kNaluLongStartSequenceSize = 4;
+
+// The size of a shortened NALU start sequence {0 0 1}, that may be used if
+// not the first NALU of an access unit or an SPS or PPS block.
+const size_t kNaluShortStartSequenceSize = 3;
+
+// The size of the NALU type byte (1).
+const size_t kNaluTypeSize = 1;
+
+enum NaluType : uint8_t {
+ kSlice = 1,
+ kIdr = 5,
+ kSei = 6,
+ kSps = 7,
+ kPps = 8,
+ kAud = 9,
+ kEndOfSequence = 10,
+ kEndOfStream = 11,
+ kFiller = 12,
+ kStapA = 24,
+ kFuA = 28
+};
+
+enum SliceType : uint8_t { kP = 0, kB = 1, kI = 2, kSp = 3, kSi = 4 };
+
+struct NaluIndex {
+ // Start index of NALU, including start sequence.
+ size_t start_offset;
+ // Start index of NALU payload, typically type header.
+ size_t payload_start_offset;
+ // Length of NALU payload, in bytes, counting from payload_start_offset.
+ size_t payload_size;
+};
+
+// Returns a vector of the NALU indices in the given buffer.
+std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer,
+ size_t buffer_size);
+
+// Get the NAL type from the header byte immediately following start sequence.
+NaluType ParseNaluType(uint8_t data);
+
+// Methods for parsing and writing RBSP. See section 7.4.1 of the H264 spec.
+//
+// The following sequences are illegal, and need to be escaped when encoding:
+// 00 00 00 -> 00 00 03 00
+// 00 00 01 -> 00 00 03 01
+// 00 00 02 -> 00 00 03 02
+// And things in the source that look like the emulation byte pattern (00 00 03)
+// need to have an extra emulation byte added, so it's removed when decoding:
+// 00 00 03 -> 00 00 03 03
+//
+// Decoding is simply a matter of finding any 00 00 03 sequence and removing
+// the 03 emulation byte.
+
+// Parse the given data and remove any emulation byte escaping.
+std::unique_ptr<rtc::Buffer> ParseRbsp(const uint8_t* data, size_t length);
+
+// Write the given data to the destination buffer, inserting and emulation
+// bytes in order to escape any data the could be interpreted as a start
+// sequence.
+void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination);
+} // namespace H264
+} // namespace webrtc
+
+#endif // WEBRTC_COMMON_VIDEO_H264_H264_COMMON_H_
diff --git a/webrtc/common_video/h264/pps_parser.cc b/webrtc/common_video/h264/pps_parser.cc
new file mode 100644
index 0000000..dd5d63d
--- /dev/null
+++ b/webrtc/common_video/h264/pps_parser.cc
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_video/h264/pps_parser.h"
+
+#include "webrtc/common_video/h264/h264_common.h"
+#include "webrtc/base/bitbuffer.h"
+#include "webrtc/base/buffer.h"
+#include "webrtc/base/logging.h"
+
+#define RETURN_EMPTY_ON_FAIL(x) \
+ if (!(x)) { \
+ return rtc::Optional<PpsParser::PpsState>(); \
+ }
+
+namespace webrtc {
+
+// General note: this is based off the 02/2014 version of the H.264 standard.
+// You can find it on this page:
+// http://www.itu.int/rec/T-REC-H.264
+
+rtc::Optional<PpsParser::PpsState> PpsParser::ParsePps(const uint8_t* data,
+ size_t length) {
+ // First, parse out rbsp, which is basically the source buffer minus emulation
+ // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in
+ // section 7.3.1 of the H.264 standard.
+ std::unique_ptr<rtc::Buffer> unpacked_buffer = H264::ParseRbsp(data, length);
+ rtc::BitBuffer bit_buffer(unpacked_buffer->data(), unpacked_buffer->size());
+ return ParseInternal(&bit_buffer);
+}
+
+rtc::Optional<PpsParser::PpsState> PpsParser::ParseInternal(
+ rtc::BitBuffer* bit_buffer) {
+ PpsState pps;
+
+ uint32_t bits_tmp;
+ uint32_t golomb_ignored;
+ // pic_parameter_set_id: ue(v)
+ RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
+ // seq_parameter_set_id: ue(v)
+ RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
+ // entropy_coding_mode_flag: u(1)
+ uint32_t entropy_coding_mode_flag;
+ RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&entropy_coding_mode_flag, 1));
+ // TODO(pbos): Implement CABAC support if spotted in the wild.
+ RTC_CHECK(entropy_coding_mode_flag == 0)
+ << "Don't know how to parse CABAC streams.";
+ // bottom_field_pic_order_in_frame_present_flag: u(1)
+ uint32_t bottom_field_pic_order_in_frame_present_flag;
+ RETURN_EMPTY_ON_FAIL(
+ bit_buffer->ReadBits(&bottom_field_pic_order_in_frame_present_flag, 1));
+ pps.bottom_field_pic_order_in_frame_present_flag =
+ bottom_field_pic_order_in_frame_present_flag != 0;
+
+ // num_slice_groups_minus1: ue(v)
+ uint32_t num_slice_groups_minus1;
+ RETURN_EMPTY_ON_FAIL(
+ bit_buffer->ReadExponentialGolomb(&num_slice_groups_minus1));
+ if (num_slice_groups_minus1 > 0) {
+ uint32_t slice_group_map_type;
+ // slice_group_map_type: ue(v)
+ RETURN_EMPTY_ON_FAIL(
+ bit_buffer->ReadExponentialGolomb(&slice_group_map_type));
+ if (slice_group_map_type == 0) {
+ for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1;
+ ++i_group) {
+ // run_length_minus1[iGroup]: ue(v)
+ RETURN_EMPTY_ON_FAIL(
+ bit_buffer->ReadExponentialGolomb(&golomb_ignored));
+ }
+ } else if (slice_group_map_type == 1) {
+ // TODO(sprang): Implement support for dispersed slice group map type.
+ // See 8.2.2.2 Specification for dispersed slice group map type.
+ } else if (slice_group_map_type == 2) {
+ for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1;
+ ++i_group) {
+ // top_left[iGroup]: ue(v)
+ RETURN_EMPTY_ON_FAIL(
+ bit_buffer->ReadExponentialGolomb(&golomb_ignored));
+ // bottom_right[iGroup]: ue(v)
+ RETURN_EMPTY_ON_FAIL(
+ bit_buffer->ReadExponentialGolomb(&golomb_ignored));
+ }
+ } else if (slice_group_map_type == 3 || slice_group_map_type == 4 ||
+ slice_group_map_type == 5) {
+ // slice_group_change_direction_flag: u(1)
+ RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
+ // slice_group_change_rate_minus1: ue(v)
+ RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
+ } else if (slice_group_map_type == 6) {
+ // pic_size_in_map_units_minus1: ue(v)
+ uint32_t pic_size_in_map_units_minus1;
+ RETURN_EMPTY_ON_FAIL(
+ bit_buffer->ReadExponentialGolomb(&pic_size_in_map_units_minus1));
+ uint32_t slice_group_id_bits = 0;
+ uint32_t num_slice_groups = num_slice_groups_minus1 + 1;
+ // If num_slice_groups is not a power of two an additional bit is required
+ // to account for the ceil() of log2() below.
+ if ((num_slice_groups & (num_slice_groups - 1)) != 0)
+ ++slice_group_id_bits;
+ while (num_slice_groups > 0) {
+ num_slice_groups >>= 1;
+ ++slice_group_id_bits;
+ }
+ for (uint32_t i = 0; i <= pic_size_in_map_units_minus1; i++) {
+ // slice_group_id[i]: u(v)
+ // Represented by ceil(log2(num_slice_groups_minus1 + 1)) bits.
+ RETURN_EMPTY_ON_FAIL(
+ bit_buffer->ReadBits(&bits_tmp, slice_group_id_bits));
+ }
+ }
+ }
+ // num_ref_idx_l0_default_active_minus1: ue(v)
+ RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
+ // num_ref_idx_l1_default_active_minus1: ue(v)
+ RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
+ // weighted_pred_flag: u(1)
+ uint32_t weighted_pred_flag;
+ RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&weighted_pred_flag, 1));
+ pps.weighted_pred_flag = weighted_pred_flag != 0;
+ // weighted_bipred_idc: u(2)
+ RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.weighted_bipred_idc, 2));
+
+ // pic_init_qp_minus26: se(v)
+ RETURN_EMPTY_ON_FAIL(
+ bit_buffer->ReadSignedExponentialGolomb(&pps.pic_init_qp_minus26));
+ // pic_init_qs_minus26: se(v)
+ RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
+ // chroma_qp_index_offset: se(v)
+ RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
+ // deblocking_filter_control_present_flag: u(1)
+ // constrained_intra_pred_flag: u(1)
+ RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 2));
+ // redundant_pic_cnt_present_flag: u(1)
+ RETURN_EMPTY_ON_FAIL(
+ bit_buffer->ReadBits(&pps.redundant_pic_cnt_present_flag, 1));
+
+ return rtc::Optional<PpsParser::PpsState>(pps);
+}
+
+} // namespace webrtc
diff --git a/webrtc/common_video/h264/pps_parser.h b/webrtc/common_video/h264/pps_parser.h
new file mode 100644
index 0000000..f91b65d
--- /dev/null
+++ b/webrtc/common_video/h264/pps_parser.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_COMMON_VIDEO_H264_PPS_PARSER_H_
+#define WEBRTC_COMMON_VIDEO_H264_PPS_PARSER_H_
+
+#include "webrtc/base/common.h"
+#include "webrtc/base/optional.h"
+
+namespace rtc {
+class BitBuffer;
+}
+
+namespace webrtc {
+
+// A class for parsing out picture parameter set (PPS) data from a H264 NALU.
+class PpsParser {
+ public:
+ // The parsed state of the PPS. Only some select values are stored.
+ // Add more as they are actually needed.
+ struct PpsState {
+ PpsState() = default;
+
+ bool bottom_field_pic_order_in_frame_present_flag = false;
+ bool weighted_pred_flag = false;
+ uint32_t weighted_bipred_idc = false;
+ uint32_t redundant_pic_cnt_present_flag = 0;
+ int pic_init_qp_minus26 = 0;
+ };
+
+ // Unpack RBSP and parse PPS state from the supplied buffer.
+ static rtc::Optional<PpsState> ParsePps(const uint8_t* data, size_t length);
+
+ protected:
+ // Parse the PPS state, for a bit buffer where RBSP decoding has already been
+ // performed.
+ static rtc::Optional<PpsState> ParseInternal(rtc::BitBuffer* bit_buffer);
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_COMMON_VIDEO_H264_PPS_PARSER_H_
diff --git a/webrtc/common_video/h264/pps_parser_unittest.cc b/webrtc/common_video/h264/pps_parser_unittest.cc
new file mode 100644
index 0000000..1983439
--- /dev/null
+++ b/webrtc/common_video/h264/pps_parser_unittest.cc
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_video/h264/pps_parser.h"
+
+#include <limits>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+#include "webrtc/base/bitbuffer.h"
+#include "webrtc/base/buffer.h"
+#include "webrtc/common_video/h264/h264_common.h"
+
+namespace webrtc {
+
+static const size_t kPpsBufferMaxSize = 256;
+static const uint32_t kIgnored = 0;
+
+void WritePps(const PpsParser::PpsState& pps,
+ int slice_group_map_type,
+ int num_slice_groups,
+ int pic_size_in_map_units,
+ rtc::Buffer* out_buffer) {
+ uint8_t data[kPpsBufferMaxSize] = {0};
+ rtc::BitBufferWriter bit_buffer(data, kPpsBufferMaxSize);
+
+ // pic_parameter_set_id: ue(v)
+ bit_buffer.WriteExponentialGolomb(kIgnored);
+ // seq_parameter_set_id: ue(v)
+ bit_buffer.WriteExponentialGolomb(kIgnored);
+ // entropy_coding_mode_flag: u(1)
+ bit_buffer.WriteBits(kIgnored, 1);
+ // bottom_field_pic_order_in_frame_present_flag: u(1)
+ bit_buffer.WriteBits(pps.bottom_field_pic_order_in_frame_present_flag ? 1 : 0,
+ 1);
+ // num_slice_groups_minus1: ue(v)
+ RTC_CHECK_GT(num_slice_groups, 0);
+ bit_buffer.WriteExponentialGolomb(num_slice_groups - 1);
+
+ if (num_slice_groups > 1) {
+ // slice_group_map_type: ue(v)
+ bit_buffer.WriteExponentialGolomb(slice_group_map_type);
+ switch (slice_group_map_type) {
+ case 0:
+ for (int i = 0; i < num_slice_groups; ++i) {
+ // run_length_minus1[iGroup]: ue(v)
+ bit_buffer.WriteExponentialGolomb(kIgnored);
+ }
+ break;
+ case 2:
+ for (int i = 0; i < num_slice_groups; ++i) {
+ // top_left[iGroup]: ue(v)
+ bit_buffer.WriteExponentialGolomb(kIgnored);
+ // bottom_right[iGroup]: ue(v)
+ bit_buffer.WriteExponentialGolomb(kIgnored);
+ }
+ break;
+ case 3:
+ case 4:
+ case 5:
+ // slice_group_change_direction_flag: u(1)
+ bit_buffer.WriteBits(kIgnored, 1);
+ // slice_group_change_rate_minus1: ue(v)
+ bit_buffer.WriteExponentialGolomb(kIgnored);
+ break;
+ case 6: {
+ bit_buffer.WriteExponentialGolomb(pic_size_in_map_units - 1);
+
+ uint32_t slice_group_id_bits = 0;
+ // If num_slice_groups is not a power of two an additional bit is
+ // required
+ // to account for the ceil() of log2() below.
+ if ((num_slice_groups & (num_slice_groups - 1)) != 0)
+ ++slice_group_id_bits;
+ while (num_slice_groups > 0) {
+ num_slice_groups >>= 1;
+ ++slice_group_id_bits;
+ }
+
+ for (int i = 0; i < pic_size_in_map_units; ++i) {
+ // slice_group_id[i]: u(v)
+ // Represented by ceil(log2(num_slice_groups_minus1 + 1)) bits.
+ bit_buffer.WriteBits(kIgnored, slice_group_id_bits);
+ }
+ break;
+ }
+ default:
+ RTC_NOTREACHED();
+ }
+ }
+
+ // num_ref_idx_l0_default_active_minus1: ue(v)
+ bit_buffer.WriteExponentialGolomb(kIgnored);
+ // num_ref_idx_l1_default_active_minus1: ue(v)
+ bit_buffer.WriteExponentialGolomb(kIgnored);
+ // weighted_pred_flag: u(1)
+ bit_buffer.WriteBits(pps.weighted_pred_flag ? 1 : 0, 1);
+ // weighted_bipred_idc: u(2)
+ bit_buffer.WriteBits(pps.weighted_bipred_idc, 2);
+
+ // pic_init_qp_minus26: se(v)
+ bit_buffer.WriteSignedExponentialGolomb(pps.pic_init_qp_minus26);
+ // pic_init_qs_minus26: se(v)
+ bit_buffer.WriteExponentialGolomb(kIgnored);
+ // chroma_qp_index_offset: se(v)
+ bit_buffer.WriteExponentialGolomb(kIgnored);
+ // deblocking_filter_control_present_flag: u(1)
+ // constrained_intra_pred_flag: u(1)
+ bit_buffer.WriteBits(kIgnored, 2);
+ // redundant_pic_cnt_present_flag: u(1)
+ bit_buffer.WriteBits(pps.redundant_pic_cnt_present_flag, 1);
+
+ size_t byte_offset;
+ size_t bit_offset;
+ bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset);
+ if (bit_offset > 0) {
+ bit_buffer.WriteBits(0, 8 - bit_offset);
+ bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset);
+ }
+
+ H264::WriteRbsp(data, byte_offset, out_buffer);
+}
+
+class PpsParserTest : public ::testing::Test {
+ public:
+ PpsParserTest() {}
+ virtual ~PpsParserTest() {}
+
+ void RunTest() {
+ VerifyParsing(generated_pps_, 0, 1, 0);
+ const int kMaxSliceGroups = 17; // Arbitrarily large.
+ const int kMaxMapType = 6;
+ int slice_group_bits = 0;
+ for (int slice_group = 2; slice_group < kMaxSliceGroups; ++slice_group) {
+ if ((slice_group & (slice_group - 1)) == 0) {
+ // Slice group at a new power of two - increase slice_group_bits.
+ ++slice_group_bits;
+ }
+ for (int map_type = 0; map_type <= kMaxMapType; ++map_type) {
+ if (map_type == 1) {
+ // TODO(sprang): Implement support for dispersed slice group map type.
+ // See 8.2.2.2 Specification for dispersed slice group map type.
+ continue;
+ } else if (map_type == 6) {
+ int max_pic_size = 1 << slice_group_bits;
+ for (int pic_size = 1; pic_size < max_pic_size; ++pic_size)
+ VerifyParsing(generated_pps_, map_type, slice_group, pic_size);
+ } else {
+ VerifyParsing(generated_pps_, map_type, slice_group, 0);
+ }
+ }
+ }
+ }
+
+ void VerifyParsing(const PpsParser::PpsState& pps,
+ int slice_group_map_type,
+ int num_slice_groups,
+ int pic_size_in_map_units) {
+ buffer_.Clear();
+ WritePps(pps, slice_group_map_type, num_slice_groups, pic_size_in_map_units,
+ &buffer_);
+ parsed_pps_ = PpsParser::ParsePps(buffer_.data(), buffer_.size());
+ EXPECT_TRUE(static_cast<bool>(parsed_pps_));
+ EXPECT_EQ(pps.bottom_field_pic_order_in_frame_present_flag,
+ parsed_pps_->bottom_field_pic_order_in_frame_present_flag);
+ EXPECT_EQ(pps.weighted_pred_flag, parsed_pps_->weighted_pred_flag);
+ EXPECT_EQ(pps.weighted_bipred_idc, parsed_pps_->weighted_bipred_idc);
+ EXPECT_EQ(pps.redundant_pic_cnt_present_flag,
+ parsed_pps_->redundant_pic_cnt_present_flag);
+ EXPECT_EQ(pps.pic_init_qp_minus26, parsed_pps_->pic_init_qp_minus26);
+ }
+
+ PpsParser::PpsState generated_pps_;
+ rtc::Buffer buffer_;
+ rtc::Optional<PpsParser::PpsState> parsed_pps_;
+};
+
+TEST_F(PpsParserTest, ZeroPps) {
+ RunTest();
+}
+
+TEST_F(PpsParserTest, MaxPps) {
+ generated_pps_.bottom_field_pic_order_in_frame_present_flag = true;
+ generated_pps_.pic_init_qp_minus26 = std::numeric_limits<int32_t>::max();
+ generated_pps_.redundant_pic_cnt_present_flag = 1; // 1 bit value.
+ generated_pps_.weighted_bipred_idc = (1 << 2) - 1; // 2 bit value.
+ generated_pps_.weighted_pred_flag = true;
+ RunTest();
+
+ generated_pps_.pic_init_qp_minus26 = std::numeric_limits<int32_t>::min() + 1;
+ RunTest();
+}
+
+} // namespace webrtc
diff --git a/webrtc/common_video/h264/sps_parser.cc b/webrtc/common_video/h264/sps_parser.cc
new file mode 100644
index 0000000..cf4b36d
--- /dev/null
+++ b/webrtc/common_video/h264/sps_parser.cc
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_video/h264/sps_parser.h"
+
+#include "webrtc/common_video/h264/h264_common.h"
+#include "webrtc/base/bitbuffer.h"
+#include "webrtc/base/bytebuffer.h"
+#include "webrtc/base/logging.h"
+
+typedef rtc::Optional<webrtc::SpsParser::SpsState> OptionalSps;
+
+#define RETURN_EMPTY_ON_FAIL(x) \
+ if (!(x)) { \
+ return OptionalSps(); \
+ }
+
+namespace webrtc {
+
+// General note: this is based off the 02/2014 version of the H.264 standard.
+// You can find it on this page:
+// http://www.itu.int/rec/T-REC-H.264
+
+// Unpack RBSP and parse SPS state from the supplied buffer.
+rtc::Optional<SpsParser::SpsState> SpsParser::ParseSps(const uint8_t* data,
+ size_t length) {
+ std::unique_ptr<rtc::Buffer> unpacked_buffer = H264::ParseRbsp(data, length);
+ rtc::BitBuffer bit_buffer(unpacked_buffer->data(), unpacked_buffer->size());
+ return ParseSpsUpToVui(&bit_buffer);
+}
+
+rtc::Optional<SpsParser::SpsState> SpsParser::ParseSpsUpToVui(
+ rtc::BitBuffer* buffer) {
+ // Now, we need to use a bit buffer to parse through the actual AVC SPS
+ // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the
+ // H.264 standard for a complete description.
+ // Since we only care about resolution, we ignore the majority of fields, but
+ // we still have to actively parse through a lot of the data, since many of
+ // the fields have variable size.
+ // We're particularly interested in:
+ // chroma_format_idc -> affects crop units
+ // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).
+ // frame_crop_*_offset -> crop information
+
+ SpsState sps;
+
+ // The golomb values we have to read, not just consume.
+ uint32_t golomb_ignored;
+
+ // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is
+ // 0. It defaults to 1, when not specified.
+ uint32_t chroma_format_idc = 1;
+
+ // profile_idc: u(8). We need it to determine if we need to read/skip chroma
+ // formats.
+ uint8_t profile_idc;
+ RETURN_EMPTY_ON_FAIL(buffer->ReadUInt8(&profile_idc));
+ // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits
+ // 1 bit each for the flags + 2 bits = 8 bits = 1 byte.
+ RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
+ // level_idc: u(8)
+ RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
+ // seq_parameter_set_id: ue(v)
+ RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
+ sps.separate_colour_plane_flag = 0;
+ // See if profile_idc has chroma format information.
+ if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
+ profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
+ profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
+ profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
+ // chroma_format_idc: ue(v)
+ RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&chroma_format_idc));
+ if (chroma_format_idc == 3) {
+ // separate_colour_plane_flag: u(1)
+ RETURN_EMPTY_ON_FAIL(
+ buffer->ReadBits(&sps.separate_colour_plane_flag, 1));
+ }
+ // bit_depth_luma_minus8: ue(v)
+ RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
+ // bit_depth_chroma_minus8: ue(v)
+ RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
+ // qpprime_y_zero_transform_bypass_flag: u(1)
+ RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
+ // seq_scaling_matrix_present_flag: u(1)
+ uint32_t seq_scaling_matrix_present_flag;
+ RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&seq_scaling_matrix_present_flag, 1));
+ if (seq_scaling_matrix_present_flag) {
+ // seq_scaling_list_present_flags. Either 8 or 12, depending on
+ // chroma_format_idc.
+ uint32_t seq_scaling_list_present_flags;
+ if (chroma_format_idc != 3) {
+ RETURN_EMPTY_ON_FAIL(
+ buffer->ReadBits(&seq_scaling_list_present_flags, 8));
+ } else {
+ RETURN_EMPTY_ON_FAIL(
+ buffer->ReadBits(&seq_scaling_list_present_flags, 12));
+ }
+ // We don't support reading the sequence scaling list, and we don't really
+ // see/use them in practice, so we'll just reject the full sps if we see
+ // any provided.
+ if (seq_scaling_list_present_flags > 0) {
+ LOG(LS_WARNING) << "SPS contains scaling lists, which are unsupported.";
+ return OptionalSps();
+ }
+ }
+ }
+ // log2_max_frame_num_minus4: ue(v)
+ RETURN_EMPTY_ON_FAIL(
+ buffer->ReadExponentialGolomb(&sps.log2_max_frame_num_minus4));
+ // pic_order_cnt_type: ue(v)
+ RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.pic_order_cnt_type));
+ if (sps.pic_order_cnt_type == 0) {
+ // log2_max_pic_order_cnt_lsb_minus4: ue(v)
+ RETURN_EMPTY_ON_FAIL(
+ buffer->ReadExponentialGolomb(&sps.log2_max_pic_order_cnt_lsb_minus4));
+ } else if (sps.pic_order_cnt_type == 1) {
+ // delta_pic_order_always_zero_flag: u(1)
+ RETURN_EMPTY_ON_FAIL(
+ buffer->ReadBits(&sps.delta_pic_order_always_zero_flag, 1));
+ // offset_for_non_ref_pic: se(v)
+ RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
+ // offset_for_top_to_bottom_field: se(v)
+ RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
+ // num_ref_frames_in_pic_order_cnt_cycle: ue(v)
+ uint32_t num_ref_frames_in_pic_order_cnt_cycle;
+ RETURN_EMPTY_ON_FAIL(
+ buffer->ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle));
+ for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
+ // offset_for_ref_frame[i]: se(v)
+ RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
+ }
+ }
+ // max_num_ref_frames: ue(v)
+ RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.max_num_ref_frames));
+ // gaps_in_frame_num_value_allowed_flag: u(1)
+ RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
+ //
+ // IMPORTANT ONES! Now we're getting to resolution. First we read the pic
+ // width/height in macroblocks (16x16), which gives us the base resolution,
+ // and then we continue on until we hit the frame crop offsets, which are used
+ // to signify resolutions that aren't multiples of 16.
+ //
+ // pic_width_in_mbs_minus1: ue(v)
+ uint32_t pic_width_in_mbs_minus1;
+ RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&pic_width_in_mbs_minus1));
+ // pic_height_in_map_units_minus1: ue(v)
+ uint32_t pic_height_in_map_units_minus1;
+ RETURN_EMPTY_ON_FAIL(
+ buffer->ReadExponentialGolomb(&pic_height_in_map_units_minus1));
+ // frame_mbs_only_flag: u(1)
+ RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.frame_mbs_only_flag, 1));
+ if (!sps.frame_mbs_only_flag) {
+ // mb_adaptive_frame_field_flag: u(1)
+ RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
+ }
+ // direct_8x8_inference_flag: u(1)
+ RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
+ //
+ // MORE IMPORTANT ONES! Now we're at the frame crop information.
+ //
+ // frame_cropping_flag: u(1)
+ uint32_t frame_cropping_flag;
+ uint32_t frame_crop_left_offset = 0;
+ uint32_t frame_crop_right_offset = 0;
+ uint32_t frame_crop_top_offset = 0;
+ uint32_t frame_crop_bottom_offset = 0;
+ RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&frame_cropping_flag, 1));
+ if (frame_cropping_flag) {
+ // frame_crop_{left, right, top, bottom}_offset: ue(v)
+ RETURN_EMPTY_ON_FAIL(
+ buffer->ReadExponentialGolomb(&frame_crop_left_offset));
+ RETURN_EMPTY_ON_FAIL(
+ buffer->ReadExponentialGolomb(&frame_crop_right_offset));
+ RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&frame_crop_top_offset));
+ RETURN_EMPTY_ON_FAIL(
+ buffer->ReadExponentialGolomb(&frame_crop_bottom_offset));
+ }
+ // vui_parameters_present_flag: u(1)
+ RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.vui_params_present, 1));
+
+ // Far enough! We don't use the rest of the SPS.
+
+ // Start with the resolution determined by the pic_width/pic_height fields.
+ sps.width = 16 * (pic_width_in_mbs_minus1 + 1);
+ sps.height =
+ 16 * (2 - sps.frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1);
+
+ // Figure out the crop units in pixels. That's based on the chroma format's
+ // sampling, which is indicated by chroma_format_idc.
+ if (sps.separate_colour_plane_flag || chroma_format_idc == 0) {
+ frame_crop_bottom_offset *= (2 - sps.frame_mbs_only_flag);
+ frame_crop_top_offset *= (2 - sps.frame_mbs_only_flag);
+ } else if (!sps.separate_colour_plane_flag && chroma_format_idc > 0) {
+ // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2).
+ if (chroma_format_idc == 1 || chroma_format_idc == 2) {
+ frame_crop_left_offset *= 2;
+ frame_crop_right_offset *= 2;
+ }
+ // Height multipliers for format 1 (4:2:0).
+ if (chroma_format_idc == 1) {
+ frame_crop_top_offset *= 2;
+ frame_crop_bottom_offset *= 2;
+ }
+ }
+ // Subtract the crop for each dimension.
+ sps.width -= (frame_crop_left_offset + frame_crop_right_offset);
+ sps.height -= (frame_crop_top_offset + frame_crop_bottom_offset);
+
+ return OptionalSps(sps);
+}
+
+} // namespace webrtc
diff --git a/webrtc/common_video/h264/sps_parser.h b/webrtc/common_video/h264/sps_parser.h
new file mode 100644
index 0000000..d04d9a0
--- /dev/null
+++ b/webrtc/common_video/h264/sps_parser.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_COMMON_VIDEO_H264_SPS_PARSER_H_
+#define WEBRTC_COMMON_VIDEO_H264_SPS_PARSER_H_
+
+#include "webrtc/base/common.h"
+#include "webrtc/base/optional.h"
+
+namespace rtc {
+class BitBuffer;
+}
+
+namespace webrtc {
+
+// A class for parsing out sequence parameter set (SPS) data from an H264 NALU.
+class SpsParser {
+ public:
+ // The parsed state of the SPS. Only some select values are stored.
+ // Add more as they are actually needed.
+ struct SpsState {
+ SpsState() = default;
+
+ uint32_t width = 0;
+ uint32_t height = 0;
+ uint32_t delta_pic_order_always_zero_flag = 0;
+ uint32_t separate_colour_plane_flag = 0;
+ uint32_t frame_mbs_only_flag = 0;
+ uint32_t log2_max_frame_num_minus4 = 0;
+ uint32_t log2_max_pic_order_cnt_lsb_minus4 = 0;
+ uint32_t pic_order_cnt_type = 0;
+ uint32_t max_num_ref_frames = 0;
+ uint32_t vui_params_present = 0;
+ };
+
+ // Unpack RBSP and parse SPS state from the supplied buffer.
+ static rtc::Optional<SpsState> ParseSps(const uint8_t* data, size_t length);
+
+ protected:
+ // Parse the SPS state, up till the VUI part, for a bit buffer where RBSP
+ // decoding has already been performed.
+ static rtc::Optional<SpsState> ParseSpsUpToVui(rtc::BitBuffer* buffer);
+};
+
+} // namespace webrtc
+#endif // WEBRTC_COMMON_VIDEO_H264_SPS_PARSER_H_
diff --git a/webrtc/modules/rtp_rtcp/source/h264_sps_parser_unittest.cc b/webrtc/common_video/h264/sps_parser_unittest.cc
similarity index 70%
rename from webrtc/modules/rtp_rtcp/source/h264_sps_parser_unittest.cc
rename to webrtc/common_video/h264/sps_parser_unittest.cc
index 7a7e3ed..60e55d2 100644
--- a/webrtc/modules/rtp_rtcp/source/h264_sps_parser_unittest.cc
+++ b/webrtc/common_video/h264/sps_parser_unittest.cc
@@ -8,12 +8,14 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
+#include "webrtc/common_video/h264/sps_parser.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/base/arraysize.h"
#include "webrtc/base/bitbuffer.h"
+#include "webrtc/base/buffer.h"
+#include "webrtc/common_video/h264/h264_common.h"
namespace webrtc {
@@ -39,7 +41,7 @@
// The fake SPS that this generates also always has at least one emulation byte
// at offset 2, since the first two bytes are always 0, and has a 0x3 as the
// level_idc, to make sure the parser doesn't eat all 0x3 bytes.
-void GenerateFakeSps(uint16_t width, uint16_t height, uint8_t buffer[]) {
+void GenerateFakeSps(uint16_t width, uint16_t height, rtc::Buffer* out_buffer) {
uint8_t rbsp[kSpsBufferMaxSize] = {0};
rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize);
// Profile byte.
@@ -90,6 +92,9 @@
writer.WriteExponentialGolomb(((16 - (height % 16)) % 16) / 2);
writer.WriteExponentialGolomb(0);
+ // vui_parameters_present_flag: u(1)
+ writer.WriteBits(0, 1);
+
// Get the number of bytes written (including the last partial byte).
size_t byte_count, bit_offset;
writer.GetCurrentOffset(&byte_count, &bit_offset);
@@ -97,77 +102,69 @@
byte_count++;
}
- // Now, we need to write the rbsp into bytes. To do that, we'll need to add
- // emulation 0x03 bytes if there's ever a sequence of 00 00 01 or 00 00 00 01.
- // To be simple, just add a 0x03 after every 0x00. Extra emulation doesn't
- // hurt.
- for (size_t i = 0; i < byte_count;) {
- // The -3 is intentional; we never need to write an emulation byte if the 00
- // is at the end.
- if (i < byte_count - 3 && rbsp[i] == 0 && rbsp[i + 1] == 0) {
- *buffer++ = rbsp[i];
- *buffer++ = rbsp[i + 1];
- *buffer++ = 0x3u;
- i += 2;
- } else {
- *buffer++ = rbsp[i];
- ++i;
- }
- }
+ H264::WriteRbsp(rbsp, byte_count, out_buffer);
}
-TEST(H264SpsParserTest, TestSampleSPSHdLandscape) {
+class H264SpsParserTest : public ::testing::Test {
+ public:
+ H264SpsParserTest() {}
+ virtual ~H264SpsParserTest() {}
+
+ rtc::Optional<SpsParser::SpsState> sps_;
+};
+
+TEST_F(H264SpsParserTest, TestSampleSPSHdLandscape) {
// SPS for a 1280x720 camera capture from ffmpeg on osx. Contains
// emulation bytes but no cropping.
const uint8_t buffer[] = {0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05,
0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00,
0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60};
- H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer));
- EXPECT_TRUE(parser.Parse());
- EXPECT_EQ(1280u, parser.width());
- EXPECT_EQ(720u, parser.height());
+ EXPECT_TRUE(
+ static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
+ EXPECT_EQ(1280u, sps_->width);
+ EXPECT_EQ(720u, sps_->height);
}
-TEST(H264SpsParserTest, TestSampleSPSVgaLandscape) {
+TEST_F(H264SpsParserTest, TestSampleSPSVgaLandscape) {
// SPS for a 640x360 camera capture from ffmpeg on osx. Contains emulation
// bytes and cropping (360 isn't divisible by 16).
const uint8_t buffer[] = {0x7A, 0x00, 0x1E, 0xBC, 0xD9, 0x40, 0xA0, 0x2F,
0xF8, 0x98, 0x40, 0x00, 0x00, 0x03, 0x01, 0x80,
0x00, 0x00, 0x56, 0x83, 0xC5, 0x8B, 0x65, 0x80};
- H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer));
- EXPECT_TRUE(parser.Parse());
- EXPECT_EQ(640u, parser.width());
- EXPECT_EQ(360u, parser.height());
+ EXPECT_TRUE(
+ static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
+ EXPECT_EQ(640u, sps_->width);
+ EXPECT_EQ(360u, sps_->height);
}
-TEST(H264SpsParserTest, TestSampleSPSWeirdResolution) {
+TEST_F(H264SpsParserTest, TestSampleSPSWeirdResolution) {
// SPS for a 200x400 camera capture from ffmpeg on osx. Horizontal and
// veritcal crop (neither dimension is divisible by 16).
const uint8_t buffer[] = {0x7A, 0x00, 0x0D, 0xBC, 0xD9, 0x43, 0x43, 0x3E,
0x5E, 0x10, 0x00, 0x00, 0x03, 0x00, 0x60, 0x00,
0x00, 0x15, 0xA0, 0xF1, 0x42, 0x99, 0x60};
- H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer));
- EXPECT_TRUE(parser.Parse());
- EXPECT_EQ(200u, parser.width());
- EXPECT_EQ(400u, parser.height());
+ EXPECT_TRUE(
+ static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
+ EXPECT_EQ(200u, sps_->width);
+ EXPECT_EQ(400u, sps_->height);
}
-TEST(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) {
- uint8_t buffer[kSpsBufferMaxSize] = {0};
- GenerateFakeSps(320u, 180u, buffer);
- H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer));
- EXPECT_TRUE(parser.Parse());
- EXPECT_EQ(320u, parser.width());
- EXPECT_EQ(180u, parser.height());
+TEST_F(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) {
+ rtc::Buffer buffer;
+ GenerateFakeSps(320u, 180u, &buffer);
+ EXPECT_TRUE(static_cast<bool>(
+ sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
+ EXPECT_EQ(320u, sps_->width);
+ EXPECT_EQ(180u, sps_->height);
}
-TEST(H264SpsParserTest, TestSyntheticSPSWeirdResolution) {
- uint8_t buffer[kSpsBufferMaxSize] = {0};
- GenerateFakeSps(156u, 122u, buffer);
- H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer));
- EXPECT_TRUE(parser.Parse());
- EXPECT_EQ(156u, parser.width());
- EXPECT_EQ(122u, parser.height());
+TEST_F(H264SpsParserTest, TestSyntheticSPSWeirdResolution) {
+ rtc::Buffer buffer;
+ GenerateFakeSps(156u, 122u, &buffer);
+ EXPECT_TRUE(static_cast<bool>(
+ sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
+ EXPECT_EQ(156u, sps_->width);
+ EXPECT_EQ(122u, sps_->height);
}
} // namespace webrtc
diff --git a/webrtc/common_video/h264/sps_vui_rewriter.cc b/webrtc/common_video/h264/sps_vui_rewriter.cc
new file mode 100644
index 0000000..4472213
--- /dev/null
+++ b/webrtc/common_video/h264/sps_vui_rewriter.cc
@@ -0,0 +1,359 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "webrtc/common_video/h264/sps_vui_rewriter.h"
+
+#include <algorithm>
+#include <memory>
+
+#include "webrtc/base/bitbuffer.h"
+#include "webrtc/base/checks.h"
+#include "webrtc/base/logging.h"
+
+#include "webrtc/common_video/h264/h264_common.h"
+#include "webrtc/common_video/h264/sps_parser.h"
+
+namespace webrtc {
+
+// The maximum expected growth from adding a VUI to the SPS. It's actually
+// closer to 24 or so, but better safe than sorry.
+const size_t kMaxVuiSpsIncrease = 64;
+
+#define RETURN_FALSE_ON_FAIL(x) \
+ if (!(x)) { \
+ LOG_F(LS_ERROR) << " (line:" << __LINE__ << ") FAILED: " #x; \
+ return false; \
+ }
+
+#define COPY_UINT8(src, dest, tmp) \
+ do { \
+ RETURN_FALSE_ON_FAIL((src)->ReadUInt8(&tmp)); \
+ if (dest) \
+ RETURN_FALSE_ON_FAIL((dest)->WriteUInt8(tmp)); \
+ } while (0)
+
+#define COPY_EXP_GOLOMB(src, dest, tmp) \
+ do { \
+ RETURN_FALSE_ON_FAIL((src)->ReadExponentialGolomb(&tmp)); \
+ if (dest) \
+ RETURN_FALSE_ON_FAIL((dest)->WriteExponentialGolomb(tmp)); \
+ } while (0)
+
+#define COPY_BITS(src, dest, tmp, bits) \
+ do { \
+ RETURN_FALSE_ON_FAIL((src)->ReadBits(&tmp, bits)); \
+ if (dest) \
+ RETURN_FALSE_ON_FAIL((dest)->WriteBits(tmp, bits)); \
+ } while (0)
+
+typedef const SpsParser::SpsState& Sps;
+
+bool CopyAndRewriteVui(Sps sps,
+ rtc::BitBuffer* source,
+ rtc::BitBufferWriter* destination,
+ SpsVuiRewriter::ParseResult* out_vui_rewritten);
+bool CopyHrdParameters(rtc::BitBuffer* source,
+ rtc::BitBufferWriter* destination);
+bool AddBitstreamRestriction(rtc::BitBufferWriter* destination,
+ uint32_t max_num_ref_frames);
+bool CopyRemainingBits(rtc::BitBuffer* source,
+ rtc::BitBufferWriter* destination);
+
+SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps(
+ const uint8_t* buffer,
+ size_t length,
+ rtc::Optional<SpsParser::SpsState>* sps,
+ rtc::Buffer* destination) {
+ rtc::BitBuffer source_buffer(buffer, length);
+ rtc::Optional<SpsParser::SpsState> sps_state =
+ SpsParser::ParseSpsUpToVui(&source_buffer);
+ if (!sps_state)
+ return ParseResult::kFailure;
+
+ *sps = sps_state;
+
+ if (sps_state->pic_order_cnt_type >= 2) {
+ // No need to rewrite VUI in this case.
+ return ParseResult::kPocOk;
+ }
+
+ // We're going to completely muck up alignment, so we need a BitBuffer to
+ // write with.
+ rtc::Buffer out_buffer(length + kMaxVuiSpsIncrease);
+ rtc::BitBufferWriter sps_writer(out_buffer.data(), out_buffer.size());
+
+ // Check how far the SpsParser has read, and copy that data in bulk.
+ size_t byte_offset;
+ size_t bit_offset;
+ source_buffer.GetCurrentOffset(&byte_offset, &bit_offset);
+ memcpy(out_buffer.data(), buffer,
+ byte_offset + (bit_offset > 0 ? 1 : 0)); // OK to copy the last bits.
+
+ // SpsParser will have read the vui_params_present flag, which we want to
+ // modify, so back off a bit;
+ if (bit_offset == 0) {
+ --byte_offset;
+ bit_offset = 7;
+ } else {
+ --bit_offset;
+ }
+ sps_writer.Seek(byte_offset, bit_offset);
+
+ ParseResult vui_updated;
+ if (!CopyAndRewriteVui(*sps_state, &source_buffer, &sps_writer,
+ &vui_updated)) {
+ LOG(LS_ERROR) << "Failed to parse/copy SPS VUI.";
+ return ParseResult::kFailure;
+ }
+
+ if (vui_updated == ParseResult::kVuiOk) {
+ // No update necessary after all, just return.
+ return vui_updated;
+ }
+
+ if (!CopyRemainingBits(&source_buffer, &sps_writer)) {
+ LOG(LS_ERROR) << "Failed to parse/copy SPS VUI.";
+ return ParseResult::kFailure;
+ }
+
+ // Pad up to next byte with zero bits.
+ sps_writer.GetCurrentOffset(&byte_offset, &bit_offset);
+ if (bit_offset > 0) {
+ sps_writer.WriteBits(0, 8 - bit_offset);
+ ++byte_offset;
+ bit_offset = 0;
+ }
+
+ RTC_DCHECK(byte_offset <= length + kMaxVuiSpsIncrease);
+ RTC_CHECK(destination != nullptr);
+
+ out_buffer.SetSize(byte_offset);
+
+ // Write updates SPS to destination with added RBSP
+ H264::WriteRbsp(out_buffer.data(), out_buffer.size(), destination);
+
+ return ParseResult::kVuiRewritten;
+}
+
+bool CopyAndRewriteVui(Sps sps,
+ rtc::BitBuffer* source,
+ rtc::BitBufferWriter* destination,
+ SpsVuiRewriter::ParseResult* out_vui_rewritten) {
+ uint32_t golomb_tmp;
+ uint32_t bits_tmp;
+
+ //
+ // vui_parameters_present_flag: u(1)
+ //
+ RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1));
+
+ // ********* IMPORTANT! **********
+ // Now we're at the VUI, so we want to (1) add it if it isn't present, and
+ // (2) rewrite frame reordering values so no reordering is allowed.
+ if (!sps.vui_params_present) {
+ // Write a simple VUI with the parameters we want and 0 for all other flags.
+ // There are 8 flags to be off before the bitstream restriction flag.
+ RETURN_FALSE_ON_FAIL(destination->WriteBits(0, 8));
+ // bitstream_restriction_flag: u(1)
+ RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1));
+ RETURN_FALSE_ON_FAIL(
+ AddBitstreamRestriction(destination, sps.max_num_ref_frames));
+ } else {
+ // Parse out the full VUI.
+ // aspect_ratio_info_present_flag: u(1)
+ COPY_BITS(source, destination, bits_tmp, 1);
+ if (bits_tmp == 1) {
+ // aspect_ratio_idc: u(8)
+ COPY_BITS(source, destination, bits_tmp, 8);
+ if (bits_tmp == 255u) { // Extended_SAR
+ // sar_width/sar_height: u(16) each.
+ COPY_BITS(source, destination, bits_tmp, 32);
+ }
+ }
+ // overscan_info_present_flag: u(1)
+ COPY_BITS(source, destination, bits_tmp, 1);
+ if (bits_tmp == 1) {
+ // overscan_appropriate_flag: u(1)
+ COPY_BITS(source, destination, bits_tmp, 1);
+ }
+ // video_signal_type_present_flag: u(1)
+ COPY_BITS(source, destination, bits_tmp, 1);
+ if (bits_tmp == 1) {
+ // video_format + video_full_range_flag: u(3) + u(1)
+ COPY_BITS(source, destination, bits_tmp, 4);
+ // colour_description_present_flag: u(1)
+ COPY_BITS(source, destination, bits_tmp, 1);
+ if (bits_tmp == 1) {
+ // colour_primaries, transfer_characteristics, matrix_coefficients:
+ // u(8) each.
+ COPY_BITS(source, destination, bits_tmp, 24);
+ }
+ }
+ // chroma_loc_info_present_flag: u(1)
+ COPY_BITS(source, destination, bits_tmp, 1);
+ if (bits_tmp == 1) {
+ // chroma_sample_loc_type_(top|bottom)_field: ue(v) each.
+ COPY_EXP_GOLOMB(source, destination, golomb_tmp);
+ COPY_EXP_GOLOMB(source, destination, golomb_tmp);
+ }
+ // timing_info_present_flag: u(1)
+ COPY_BITS(source, destination, bits_tmp, 1);
+ if (bits_tmp == 1) {
+ // num_units_in_tick, time_scale: u(32) each
+ COPY_BITS(source, destination, bits_tmp, 32);
+ COPY_BITS(source, destination, bits_tmp, 32);
+ // fixed_frame_rate_flag: u(1)
+ COPY_BITS(source, destination, bits_tmp, 1);
+ }
+ // nal_hrd_parameters_present_flag: u(1)
+ uint32_t nal_hrd_parameters_present_flag;
+ COPY_BITS(source, destination, nal_hrd_parameters_present_flag, 1);
+ if (nal_hrd_parameters_present_flag == 1) {
+ RETURN_FALSE_ON_FAIL(CopyHrdParameters(source, destination));
+ }
+ // vcl_hrd_parameters_present_flag: u(1)
+ uint32_t vcl_hrd_parameters_present_flag;
+ COPY_BITS(source, destination, vcl_hrd_parameters_present_flag, 1);
+ if (vcl_hrd_parameters_present_flag == 1) {
+ RETURN_FALSE_ON_FAIL(CopyHrdParameters(source, destination));
+ }
+ if (nal_hrd_parameters_present_flag == 1 ||
+ vcl_hrd_parameters_present_flag == 1) {
+ // low_delay_hrd_flag: u(1)
+ COPY_BITS(source, destination, bits_tmp, 1);
+ }
+ // pic_struct_present_flag: u(1)
+ COPY_BITS(source, destination, bits_tmp, 1);
+
+ // bitstream_restriction_flag: u(1)
+ uint32_t bitstream_restriction_flag;
+ RETURN_FALSE_ON_FAIL(source->ReadBits(&bitstream_restriction_flag, 1));
+ RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1));
+ if (bitstream_restriction_flag == 0) {
+ // We're adding one from scratch.
+ RETURN_FALSE_ON_FAIL(
+ AddBitstreamRestriction(destination, sps.max_num_ref_frames));
+ } else {
+ // We're replacing.
+ // motion_vectors_over_pic_boundaries_flag: u(1)
+ COPY_BITS(source, destination, bits_tmp, 1);
+ // max_bytes_per_pic_denom: ue(v)
+ COPY_EXP_GOLOMB(source, destination, golomb_tmp);
+ // max_bits_per_mb_denom: ue(v)
+ COPY_EXP_GOLOMB(source, destination, golomb_tmp);
+ // log2_max_mv_length_horizontal: ue(v)
+ COPY_EXP_GOLOMB(source, destination, golomb_tmp);
+ // log2_max_mv_length_vertical: ue(v)
+ COPY_EXP_GOLOMB(source, destination, golomb_tmp);
+ // ********* IMPORTANT! **********
+ // The next two are the ones we need to set to low numbers:
+ // max_num_reorder_frames: ue(v)
+ // max_dec_frame_buffering: ue(v)
+ // However, if they are already set to no greater than the numbers we
+ // want, then we don't need to be rewriting.
+ uint32_t max_num_reorder_frames, max_dec_frame_buffering;
+ RETURN_FALSE_ON_FAIL(
+ source->ReadExponentialGolomb(&max_num_reorder_frames));
+ RETURN_FALSE_ON_FAIL(
+ source->ReadExponentialGolomb(&max_dec_frame_buffering));
+ if (max_num_reorder_frames == 0 &&
+ max_dec_frame_buffering <= sps.max_num_ref_frames) {
+ LOG(LS_INFO) << "VUI bitstream already contains an optimal VUI.";
+ *out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiOk;
+ return true;
+ }
+ RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(0));
+ RETURN_FALSE_ON_FAIL(
+ destination->WriteExponentialGolomb(sps.max_num_ref_frames));
+ }
+ }
+ *out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiRewritten;
+ return true;
+}
+
+// Copies a VUI HRD parameters segment.
+bool CopyHrdParameters(rtc::BitBuffer* source,
+ rtc::BitBufferWriter* destination) {
+ uint32_t golomb_tmp;
+ uint32_t bits_tmp;
+
+ // cbp_cnt_minus1: ue(v)
+ uint32_t cbp_cnt_minus1;
+ COPY_EXP_GOLOMB(source, destination, cbp_cnt_minus1);
+ // bit_rate_scale and cbp_size_scale: u(4) each
+ COPY_BITS(source, destination, bits_tmp, 8);
+ for (size_t i = 0; i <= cbp_cnt_minus1; ++i) {
+ // bit_rate_value_minus1 and cbp_size_value_minus1: ue(v) each
+ COPY_EXP_GOLOMB(source, destination, golomb_tmp);
+ COPY_EXP_GOLOMB(source, destination, golomb_tmp);
+ // cbr_flag: u(1)
+ COPY_BITS(source, destination, bits_tmp, 1);
+ }
+ // initial_cbp_removal_delay_length_minus1: u(5)
+ COPY_BITS(source, destination, bits_tmp, 5);
+ // cbp_removal_delay_length_minus1: u(5)
+ COPY_BITS(source, destination, bits_tmp, 5);
+ // dbp_output_delay_length_minus1: u(5)
+ COPY_BITS(source, destination, bits_tmp, 5);
+ // time_offset_length: u(5)
+ COPY_BITS(source, destination, bits_tmp, 5);
+ return true;
+}
+
+// These functions are similar to webrtc::H264SpsParser::Parse, and based on the
+// same version of the H.264 standard. You can find it here:
+// http://www.itu.int/rec/T-REC-H.264
+
+// Adds a bitstream restriction VUI segment.
+bool AddBitstreamRestriction(rtc::BitBufferWriter* destination,
+ uint32_t max_num_ref_frames) {
+ // motion_vectors_over_pic_boundaries_flag: u(1)
+ // Default is 1 when not present.
+ RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1));
+ // max_bytes_per_pic_denom: ue(v)
+ // Default is 2 when not present.
+ RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(2));
+ // max_bits_per_mb_denom: ue(v)
+ // Default is 1 when not present.
+ RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(1));
+ // log2_max_mv_length_horizontal: ue(v)
+ // log2_max_mv_length_vertical: ue(v)
+ // Both default to 16 when not present.
+ RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(16));
+ RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(16));
+
+ // ********* IMPORTANT! **********
+ // max_num_reorder_frames: ue(v)
+ RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(0));
+ // max_dec_frame_buffering: ue(v)
+ RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(max_num_ref_frames));
+ return true;
+}
+
+bool CopyRemainingBits(rtc::BitBuffer* source,
+ rtc::BitBufferWriter* destination) {
+ uint32_t bits_tmp;
+ // Try to get at least the destination aligned.
+ if (source->RemainingBitCount() > 0 && source->RemainingBitCount() % 8 != 0) {
+ size_t misaligned_bits = source->RemainingBitCount() % 8;
+ COPY_BITS(source, destination, bits_tmp, misaligned_bits);
+ }
+ while (source->RemainingBitCount() > 0) {
+ size_t count = std::min(static_cast<size_t>(32u),
+ static_cast<size_t>(source->RemainingBitCount()));
+ COPY_BITS(source, destination, bits_tmp, count);
+ }
+ // TODO(noahric): The last byte could be all zeroes now, which we should just
+ // strip.
+ return true;
+}
+
+} // namespace webrtc
diff --git a/webrtc/common_video/h264/sps_vui_rewriter.h b/webrtc/common_video/h264/sps_vui_rewriter.h
new file mode 100644
index 0000000..bf052b3
--- /dev/null
+++ b/webrtc/common_video/h264/sps_vui_rewriter.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#ifndef WEBRTC_COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_
+#define WEBRTC_COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_
+
+#include "webrtc/base/buffer.h"
+#include "webrtc/base/optional.h"
+#include "webrtc/common_video/h264/sps_parser.h"
+
+namespace rtc {
+class BitBuffer;
+}
+
+namespace webrtc {
+
+// A class that can parse an SPS block of a NAL unit and if necessary
+// creates a copy with updated settings to allow for faster decoding for streams
+// that use picture order count type 0. Streams in that format incur additional
+// delay because it allows decode order to differ from render order.
+// The mechanism used is to rewrite (edit or add) the SPS's VUI to contain
+// restrictions on the maximum number of reordered pictures. This reduces
+// latency significantly, though it still adds about a frame of latency to
+// decoding.
+class SpsVuiRewriter : private SpsParser {
+ public:
+ enum class ParseResult { kFailure, kPocOk, kVuiOk, kVuiRewritten };
+
+ // Parses an SPS block and if necessary copies it and rewrites the VUI.
+ // Returns kFailure on failure, kParseOk if parsing succeeded and no update
+ // was necessary and kParsedAndModified if an updated copy of buffer was
+ // written to destination. destination may be populated with some data even if
+ // no rewrite was necessary, but the end offset should remain unchanged.
+ // Unless parsing fails, the sps parameter will be populated with the parsed
+ // SPS state. This function assumes that any previous headers
+ // (NALU start, type, Stap-A, etc) have already been parsed and that RBSP
+ // decoding has been performed.
+ static ParseResult ParseAndRewriteSps(const uint8_t* buffer,
+ size_t length,
+ rtc::Optional<SpsParser::SpsState>* sps,
+ rtc::Buffer* destination);
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_
diff --git a/webrtc/common_video/h264/sps_vui_rewriter_unittest.cc b/webrtc/common_video/h264/sps_vui_rewriter_unittest.cc
new file mode 100644
index 0000000..6c8baee
--- /dev/null
+++ b/webrtc/common_video/h264/sps_vui_rewriter_unittest.cc
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+#include "webrtc/base/bitbuffer.h"
+#include "webrtc/base/buffer.h"
+#include "webrtc/base/fileutils.h"
+#include "webrtc/base/logging.h"
+#include "webrtc/base/pathutils.h"
+#include "webrtc/base/stream.h"
+
+#include "webrtc/common_video/h264/sps_vui_rewriter.h"
+#include "webrtc/common_video/h264/h264_common.h"
+
+namespace webrtc {
+
+enum SpsMode {
+ kNoRewriteRequired_PocCorrect,
+ kNoRewriteRequired_VuiOptimal,
+ kRewriteRequired_NoVui,
+ kRewriteRequired_NoBitstreamRestriction,
+ kRewriteRequired_VuiSuboptimal,
+};
+
+static const size_t kSpsBufferMaxSize = 256;
+static const size_t kWidth = 640;
+static const size_t kHeight = 480;
+
+// Generates a fake SPS with basically everything empty and with characteristics
+// based off SpsMode.
+// Pass in a buffer of at least kSpsBufferMaxSize.
+// The fake SPS that this generates also always has at least one emulation byte
+// at offset 2, since the first two bytes are always 0, and has a 0x3 as the
+// level_idc, to make sure the parser doesn't eat all 0x3 bytes.
+void GenerateFakeSps(SpsMode mode, rtc::Buffer* out_buffer) {
+ uint8_t rbsp[kSpsBufferMaxSize] = {0};
+ rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize);
+ // Profile byte.
+ writer.WriteUInt8(0);
+ // Constraint sets and reserved zero bits.
+ writer.WriteUInt8(0);
+ // level_idc.
+ writer.WriteUInt8(3);
+ // seq_paramter_set_id.
+ writer.WriteExponentialGolomb(0);
+ // Profile is not special, so we skip all the chroma format settings.
+
+ // Now some bit magic.
+ // log2_max_frame_num_minus4: ue(v). 0 is fine.
+ writer.WriteExponentialGolomb(0);
+ // pic_order_cnt_type: ue(v).
+ // POC type 2 is the one that doesn't need to be rewritten.
+ if (mode == kNoRewriteRequired_PocCorrect) {
+ writer.WriteExponentialGolomb(2);
+ } else {
+ writer.WriteExponentialGolomb(0);
+ // log2_max_pic_order_cnt_lsb_minus4: ue(v). 0 is fine.
+ writer.WriteExponentialGolomb(0);
+ }
+ // max_num_ref_frames: ue(v). Use 1, to make optimal/suboptimal more obvious.
+ writer.WriteExponentialGolomb(1);
+ // gaps_in_frame_num_value_allowed_flag: u(1).
+ writer.WriteBits(0, 1);
+ // Next are width/height. First, calculate the mbs/map_units versions.
+ uint16_t width_in_mbs_minus1 = (kWidth + 15) / 16 - 1;
+
+ // For the height, we're going to define frame_mbs_only_flag, so we need to
+ // divide by 2. See the parser for the full calculation.
+ uint16_t height_in_map_units_minus1 = ((kHeight + 15) / 16 - 1) / 2;
+ // Write each as ue(v).
+ writer.WriteExponentialGolomb(width_in_mbs_minus1);
+ writer.WriteExponentialGolomb(height_in_map_units_minus1);
+ // frame_mbs_only_flag: u(1). Needs to be false.
+ writer.WriteBits(0, 1);
+ // mb_adaptive_frame_field_flag: u(1).
+ writer.WriteBits(0, 1);
+ // direct_8x8_inferene_flag: u(1).
+ writer.WriteBits(0, 1);
+ // frame_cropping_flag: u(1). 1, so we can supply crop.
+ writer.WriteBits(1, 1);
+ // Now we write the left/right/top/bottom crop. For simplicity, we'll put all
+ // the crop at the left/top.
+ // We picked a 4:2:0 format, so the crops are 1/2 the pixel crop values.
+ // Left/right.
+ writer.WriteExponentialGolomb(((16 - (kWidth % 16)) % 16) / 2);
+ writer.WriteExponentialGolomb(0);
+ // Top/bottom.
+ writer.WriteExponentialGolomb(((16 - (kHeight % 16)) % 16) / 2);
+ writer.WriteExponentialGolomb(0);
+
+ // Finally! The VUI.
+ // vui_parameters_present_flag: u(1)
+ if (mode == kNoRewriteRequired_PocCorrect || mode == kRewriteRequired_NoVui) {
+ writer.WriteBits(0, 1);
+ } else {
+ writer.WriteBits(1, 1);
+ // VUI time. 8 flags to ignore followed by the bitstream restriction flag.
+ writer.WriteBits(0, 8);
+ if (mode == kRewriteRequired_NoBitstreamRestriction) {
+ writer.WriteBits(0, 1);
+ } else {
+ writer.WriteBits(1, 1);
+ // Write some defaults. Shouldn't matter for parsing, though.
+ // motion_vectors_over_pic_boundaries_flag: u(1)
+ writer.WriteBits(1, 1);
+ // max_bytes_per_pic_denom: ue(v)
+ writer.WriteExponentialGolomb(2);
+ // max_bits_per_mb_denom: ue(v)
+ writer.WriteExponentialGolomb(1);
+ // log2_max_mv_length_horizontal: ue(v)
+ // log2_max_mv_length_vertical: ue(v)
+ writer.WriteExponentialGolomb(16);
+ writer.WriteExponentialGolomb(16);
+
+ // Next are the limits we care about.
+ // max_num_reorder_frames: ue(v)
+ // max_dec_frame_buffering: ue(v)
+ if (mode == kRewriteRequired_VuiSuboptimal) {
+ writer.WriteExponentialGolomb(4);
+ writer.WriteExponentialGolomb(4);
+ } else if (kNoRewriteRequired_VuiOptimal) {
+ writer.WriteExponentialGolomb(0);
+ writer.WriteExponentialGolomb(1);
+ }
+ }
+ }
+
+ // Get the number of bytes written (including the last partial byte).
+ size_t byte_count, bit_offset;
+ writer.GetCurrentOffset(&byte_count, &bit_offset);
+ if (bit_offset > 0) {
+ byte_count++;
+ }
+
+ // Write the NALU header and type; {0 0 0 1} and 7 for the SPS header type.
+ uint8_t header[] = {0, 0, 0, 1, 7};
+ out_buffer->AppendData(header, sizeof(header));
+
+ H264::WriteRbsp(rbsp, byte_count, out_buffer);
+}
+
+void TestSps(SpsMode mode, SpsVuiRewriter::ParseResult expected_parse_result) {
+ rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
+ rtc::Buffer buffer;
+ GenerateFakeSps(mode, &buffer);
+ std::vector<H264::NaluIndex> start_offsets =
+ H264::FindNaluIndices(buffer.data(), buffer.size());
+ EXPECT_EQ(1u, start_offsets.size());
+ H264::NaluIndex index = start_offsets[0];
+
+ H264::NaluType nal_type =
+ H264::ParseNaluType(buffer[index.payload_start_offset]);
+ EXPECT_EQ(H264::kSps, nal_type);
+ index.payload_start_offset += H264::kNaluTypeSize;
+ index.payload_size -= H264::kNaluTypeSize;
+
+ std::unique_ptr<rtc::Buffer> rbsp_decoded =
+ H264::ParseRbsp(&buffer[index.payload_start_offset], index.payload_size);
+ rtc::Optional<SpsParser::SpsState> sps;
+ rtc::Buffer out_buffer;
+ SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
+ rbsp_decoded->data(), rbsp_decoded->size(), &sps, &out_buffer);
+ EXPECT_EQ(expected_parse_result, result);
+}
+
+#define REWRITE_TEST(test_name, mode, expected_parse_result) \
+ TEST(SpsVuiRewriterTest, test_name) { TestSps(mode, expected_parse_result); }
+
+REWRITE_TEST(PocCorrect,
+ kNoRewriteRequired_PocCorrect,
+ SpsVuiRewriter::ParseResult::kPocOk);
+REWRITE_TEST(VuiAlreadyOptimal,
+ kNoRewriteRequired_VuiOptimal,
+ SpsVuiRewriter::ParseResult::kVuiOk);
+REWRITE_TEST(RewriteFullVui,
+ kRewriteRequired_NoVui,
+ SpsVuiRewriter::ParseResult::kVuiRewritten);
+REWRITE_TEST(AddBitstreamRestriction,
+ kRewriteRequired_NoBitstreamRestriction,
+ SpsVuiRewriter::ParseResult::kVuiRewritten);
+REWRITE_TEST(RewriteSuboptimalVui,
+ kRewriteRequired_VuiSuboptimal,
+ SpsVuiRewriter::ParseResult::kVuiRewritten);
+
+} // namespace webrtc
diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp
index 8adb4cb..b2d62f4 100644
--- a/webrtc/modules/modules.gyp
+++ b/webrtc/modules/modules.gyp
@@ -296,7 +296,6 @@
'rtp_rtcp/source/fec_receiver_unittest.cc',
'rtp_rtcp/source/fec_test_helper.cc',
'rtp_rtcp/source/fec_test_helper.h',
- 'rtp_rtcp/source/h264_sps_parser_unittest.cc',
'rtp_rtcp/source/nack_rtx_unittest.cc',
'rtp_rtcp/source/packet_loss_stats_unittest.cc',
'rtp_rtcp/source/producer_fec_unittest.cc',
diff --git a/webrtc/modules/rtp_rtcp/BUILD.gn b/webrtc/modules/rtp_rtcp/BUILD.gn
index 0f9dd8a..df6f92e 100644
--- a/webrtc/modules/rtp_rtcp/BUILD.gn
+++ b/webrtc/modules/rtp_rtcp/BUILD.gn
@@ -32,8 +32,6 @@
"source/forward_error_correction.h",
"source/forward_error_correction_internal.cc",
"source/forward_error_correction_internal.h",
- "source/h264_sps_parser.cc",
- "source/h264_sps_parser.h",
"source/mock/mock_rtp_payload_strategy.h",
"source/packet_loss_stats.cc",
"source/packet_loss_stats.h",
@@ -167,6 +165,7 @@
deps = [
"../..:webrtc_common",
+ "../../common_video",
"../../system_wrappers",
"../remote_bitrate_estimator",
]
diff --git a/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi b/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi
index cfb3916..1c89e54 100644
--- a/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi
+++ b/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi
@@ -12,8 +12,9 @@
'target_name': 'rtp_rtcp',
'type': 'static_library',
'dependencies': [
- '<(webrtc_root)/system_wrappers/system_wrappers.gyp:system_wrappers',
+ '<(webrtc_root)/common_video/common_video.gyp:common_video',
'<(webrtc_root)/modules/modules.gyp:remote_bitrate_estimator',
+ '<(webrtc_root)/system_wrappers/system_wrappers.gyp:system_wrappers',
],
'sources': [
# Common
@@ -135,8 +136,6 @@
'source/forward_error_correction.h',
'source/forward_error_correction_internal.cc',
'source/forward_error_correction_internal.h',
- 'source/h264_sps_parser.cc',
- 'source/h264_sps_parser.h',
'source/producer_fec.cc',
'source/producer_fec.h',
'source/rtp_packet_history.cc',
diff --git a/webrtc/modules/rtp_rtcp/source/h264_sps_parser.cc b/webrtc/modules/rtp_rtcp/source/h264_sps_parser.cc
deleted file mode 100644
index 904a9e2..0000000
--- a/webrtc/modules/rtp_rtcp/source/h264_sps_parser.cc
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
-
-#include "webrtc/base/bitbuffer.h"
-#include "webrtc/base/bytebuffer.h"
-#include "webrtc/base/logging.h"
-
-#define RETURN_FALSE_ON_FAIL(x) \
- if (!(x)) { \
- return false; \
- }
-
-namespace webrtc {
-
-H264SpsParser::H264SpsParser(const uint8_t* sps, size_t byte_length)
- : sps_(sps), byte_length_(byte_length), width_(), height_() {
-}
-
-bool H264SpsParser::Parse() {
- // General note: this is based off the 02/2014 version of the H.264 standard.
- // You can find it on this page:
- // http://www.itu.int/rec/T-REC-H.264
-
- const char* sps_bytes = reinterpret_cast<const char*>(sps_);
- // First, parse out rbsp, which is basically the source buffer minus emulation
- // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in
- // section 7.3.1 of the H.264 standard.
- rtc::ByteBufferWriter rbsp_buffer;
- for (size_t i = 0; i < byte_length_;) {
- // Be careful about over/underflow here. byte_length_ - 3 can underflow, and
- // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_
- // above, and that expression will produce the number of bytes left in
- // the stream including the byte at i.
- if (byte_length_ - i >= 3 && sps_[i] == 0 && sps_[i + 1] == 0 &&
- sps_[i + 2] == 3) {
- // Two rbsp bytes + the emulation byte.
- rbsp_buffer.WriteBytes(sps_bytes + i, 2);
- i += 3;
- } else {
- // Single rbsp byte.
- rbsp_buffer.WriteBytes(sps_bytes + i, 1);
- i++;
- }
- }
-
- // Now, we need to use a bit buffer to parse through the actual AVC SPS
- // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the
- // H.264 standard for a complete description.
- // Since we only care about resolution, we ignore the majority of fields, but
- // we still have to actively parse through a lot of the data, since many of
- // the fields have variable size.
- // We're particularly interested in:
- // chroma_format_idc -> affects crop units
- // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).
- // frame_crop_*_offset -> crop information
- rtc::BitBuffer parser(reinterpret_cast<const uint8_t*>(rbsp_buffer.Data()),
- rbsp_buffer.Length());
-
- // The golomb values we have to read, not just consume.
- uint32_t golomb_ignored;
-
- // separate_colour_plane_flag is optional (assumed 0), but has implications
- // about the ChromaArrayType, which modifies how we treat crop coordinates.
- uint32_t separate_colour_plane_flag = 0;
- // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is
- // 0. It defaults to 1, when not specified.
- uint32_t chroma_format_idc = 1;
-
- // profile_idc: u(8). We need it to determine if we need to read/skip chroma
- // formats.
- uint8_t profile_idc;
- RETURN_FALSE_ON_FAIL(parser.ReadUInt8(&profile_idc));
- // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits
- // 1 bit each for the flags + 2 bits = 8 bits = 1 byte.
- RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));
- // level_idc: u(8)
- RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));
- // seq_parameter_set_id: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // See if profile_idc has chroma format information.
- if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
- profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
- profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
- profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
- // chroma_format_idc: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&chroma_format_idc));
- if (chroma_format_idc == 3) {
- // separate_colour_plane_flag: u(1)
- RETURN_FALSE_ON_FAIL(parser.ReadBits(&separate_colour_plane_flag, 1));
- }
- // bit_depth_luma_minus8: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // bit_depth_chroma_minus8: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // qpprime_y_zero_transform_bypass_flag: u(1)
- RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
- // seq_scaling_matrix_present_flag: u(1)
- uint32_t seq_scaling_matrix_present_flag;
- RETURN_FALSE_ON_FAIL(parser.ReadBits(&seq_scaling_matrix_present_flag, 1));
- if (seq_scaling_matrix_present_flag) {
- // seq_scaling_list_present_flags. Either 8 or 12, depending on
- // chroma_format_idc.
- uint32_t seq_scaling_list_present_flags;
- if (chroma_format_idc != 3) {
- RETURN_FALSE_ON_FAIL(
- parser.ReadBits(&seq_scaling_list_present_flags, 8));
- } else {
- RETURN_FALSE_ON_FAIL(
- parser.ReadBits(&seq_scaling_list_present_flags, 12));
- }
- // We don't support reading the sequence scaling list, and we don't really
- // see/use them in practice, so we'll just reject the full sps if we see
- // any provided.
- if (seq_scaling_list_present_flags > 0) {
- LOG(LS_WARNING) << "SPS contains scaling lists, which are unsupported.";
- return false;
- }
- }
- }
- // log2_max_frame_num_minus4: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // pic_order_cnt_type: ue(v)
- uint32_t pic_order_cnt_type;
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_order_cnt_type));
- if (pic_order_cnt_type == 0) {
- // log2_max_pic_order_cnt_lsb_minus4: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- } else if (pic_order_cnt_type == 1) {
- // delta_pic_order_always_zero_flag: u(1)
- RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
- // offset_for_non_ref_pic: se(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // offset_for_top_to_bottom_field: se(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // num_ref_frames_in_pic_order_cnt_cycle: ue(v)
- uint32_t num_ref_frames_in_pic_order_cnt_cycle;
- RETURN_FALSE_ON_FAIL(
- parser.ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle));
- for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
- // offset_for_ref_frame[i]: se(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- }
- }
- // max_num_ref_frames: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // gaps_in_frame_num_value_allowed_flag: u(1)
- RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
- //
- // IMPORTANT ONES! Now we're getting to resolution. First we read the pic
- // width/height in macroblocks (16x16), which gives us the base resolution,
- // and then we continue on until we hit the frame crop offsets, which are used
- // to signify resolutions that aren't multiples of 16.
- //
- // pic_width_in_mbs_minus1: ue(v)
- uint32_t pic_width_in_mbs_minus1;
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_width_in_mbs_minus1));
- // pic_height_in_map_units_minus1: ue(v)
- uint32_t pic_height_in_map_units_minus1;
- RETURN_FALSE_ON_FAIL(
- parser.ReadExponentialGolomb(&pic_height_in_map_units_minus1));
- // frame_mbs_only_flag: u(1)
- uint32_t frame_mbs_only_flag;
- RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_mbs_only_flag, 1));
- if (!frame_mbs_only_flag) {
- // mb_adaptive_frame_field_flag: u(1)
- RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
- }
- // direct_8x8_inference_flag: u(1)
- RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
- //
- // MORE IMPORTANT ONES! Now we're at the frame crop information.
- //
- // frame_cropping_flag: u(1)
- uint32_t frame_cropping_flag;
- uint32_t frame_crop_left_offset = 0;
- uint32_t frame_crop_right_offset = 0;
- uint32_t frame_crop_top_offset = 0;
- uint32_t frame_crop_bottom_offset = 0;
- RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_cropping_flag, 1));
- if (frame_cropping_flag) {
- // frame_crop_{left, right, top, bottom}_offset: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_left_offset));
- RETURN_FALSE_ON_FAIL(
- parser.ReadExponentialGolomb(&frame_crop_right_offset));
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_top_offset));
- RETURN_FALSE_ON_FAIL(
- parser.ReadExponentialGolomb(&frame_crop_bottom_offset));
- }
-
- // Far enough! We don't use the rest of the SPS.
-
- // Start with the resolution determined by the pic_width/pic_height fields.
- int width = 16 * (pic_width_in_mbs_minus1 + 1);
- int height =
- 16 * (2 - frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1);
-
- // Figure out the crop units in pixels. That's based on the chroma format's
- // sampling, which is indicated by chroma_format_idc.
- if (separate_colour_plane_flag || chroma_format_idc == 0) {
- frame_crop_bottom_offset *= (2 - frame_mbs_only_flag);
- frame_crop_top_offset *= (2 - frame_mbs_only_flag);
- } else if (!separate_colour_plane_flag && chroma_format_idc > 0) {
- // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2).
- if (chroma_format_idc == 1 || chroma_format_idc == 2) {
- frame_crop_left_offset *= 2;
- frame_crop_right_offset *= 2;
- }
- // Height multipliers for format 1 (4:2:0).
- if (chroma_format_idc == 1) {
- frame_crop_top_offset *= 2;
- frame_crop_bottom_offset *= 2;
- }
- }
- // Subtract the crop for each dimension.
- width -= (frame_crop_left_offset + frame_crop_right_offset);
- height -= (frame_crop_top_offset + frame_crop_bottom_offset);
-
- width_ = width;
- height_ = height;
- return true;
-}
-
-} // namespace webrtc
diff --git a/webrtc/modules/rtp_rtcp/source/h264_sps_parser.h b/webrtc/modules/rtp_rtcp/source/h264_sps_parser.h
deleted file mode 100644
index c05ee67..0000000
--- a/webrtc/modules/rtp_rtcp/source/h264_sps_parser.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_
-#define WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_
-
-#include "webrtc/base/common.h"
-
-namespace webrtc {
-
-// A class for parsing out sequence parameter set (SPS) data from an H264 NALU.
-// Currently, only resolution is read without being ignored.
-class H264SpsParser {
- public:
- H264SpsParser(const uint8_t* sps, size_t byte_length);
- // Parses the SPS to completion. Returns true if the SPS was parsed correctly.
- bool Parse();
- uint16_t width() { return width_; }
- uint16_t height() { return height_; }
-
- private:
- const uint8_t* const sps_;
- const size_t byte_length_;
-
- uint16_t width_;
- uint16_t height_;
-};
-
-} // namespace webrtc
-#endif // WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format.h b/webrtc/modules/rtp_rtcp/source/rtp_format.h
index 3519499..8cad6a9 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_format.h
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format.h
@@ -50,6 +50,10 @@
virtual std::string ToString() = 0;
};
+// TODO(sprang): Update the depacketizer to return a std::unqie_ptr with a copy
+// of the parsed payload, rather than just a pointer into the incoming buffer.
+// This way we can move some parsing out from the jitter buffer into here, and
+// the jitter buffer can just store that pointer rather than doing a copy there.
class RtpDepacketizer {
public:
struct ParsedPayload {
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
index 86b6f6e..64a2a94 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
@@ -8,235 +8,214 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include <string.h>
+#include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h"
+#include <string.h>
+#include <vector>
+
+#include "webrtc/base/checks.h"
#include "webrtc/base/logging.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/modules/rtp_rtcp/source/byte_io.h"
-#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
-#include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h"
+#include "webrtc/common_video/h264/sps_vui_rewriter.h"
+#include "webrtc/common_video/h264/h264_common.h"
+#include "webrtc/common_video/h264/sps_parser.h"
+#include "webrtc/system_wrappers/include/metrics.h"
namespace webrtc {
namespace {
-enum Nalu {
- kSlice = 1,
- kIdr = 5,
- kSei = 6,
- kSps = 7,
- kPps = 8,
- kStapA = 24,
- kFuA = 28
-};
-
static const size_t kNalHeaderSize = 1;
static const size_t kFuAHeaderSize = 2;
static const size_t kLengthFieldSize = 2;
static const size_t kStapAHeaderSize = kNalHeaderSize + kLengthFieldSize;
+static const char* kSpsValidHistogramName = "WebRTC.Video.H264.SpsValid";
+enum SpsValidEvent {
+ kReceivedSpsPocOk = 0,
+ kReceivedSpsVuiOk = 1,
+ kReceivedSpsRewritten = 2,
+ kReceivedSpsParseFailure = 3,
+ kSentSpsPocOk = 4,
+ kSentSpsVuiOk = 5,
+ kSentSpsRewritten = 6,
+ kSentSpsParseFailure = 7,
+ kSpsRewrittenMax = 8
+};
+
// Bit masks for FU (A and B) indicators.
-enum NalDefs { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
+enum NalDefs : uint8_t { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
// Bit masks for FU (A and B) headers.
-enum FuDefs { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 };
+enum FuDefs : uint8_t { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 };
// TODO(pbos): Avoid parsing this here as well as inside the jitter buffer.
-bool VerifyStapANaluLengths(const uint8_t* nalu_ptr, size_t length_remaining) {
+bool ParseStapAStartOffsets(const uint8_t* nalu_ptr,
+ size_t length_remaining,
+ std::vector<size_t>* offsets) {
+ size_t offset = 0;
while (length_remaining > 0) {
// Buffer doesn't contain room for additional nalu length.
if (length_remaining < sizeof(uint16_t))
return false;
- uint16_t nalu_size = nalu_ptr[0] << 8 | nalu_ptr[1];
+ uint16_t nalu_size = ByteReader<uint16_t>::ReadBigEndian(nalu_ptr);
nalu_ptr += sizeof(uint16_t);
length_remaining -= sizeof(uint16_t);
if (nalu_size > length_remaining)
return false;
nalu_ptr += nalu_size;
length_remaining -= nalu_size;
+
+ offsets->push_back(offset + kStapAHeaderSize);
+ offset += kLengthFieldSize + nalu_size;
}
return true;
}
-bool ParseSingleNalu(RtpDepacketizer::ParsedPayload* parsed_payload,
- const uint8_t* payload_data,
- size_t payload_data_length) {
- parsed_payload->type.Video.width = 0;
- parsed_payload->type.Video.height = 0;
- parsed_payload->type.Video.codec = kRtpVideoH264;
- parsed_payload->type.Video.isFirstPacket = true;
- RTPVideoHeaderH264* h264_header =
- &parsed_payload->type.Video.codecHeader.H264;
-
- const uint8_t* nalu_start = payload_data + kNalHeaderSize;
- size_t nalu_length = payload_data_length - kNalHeaderSize;
- uint8_t nal_type = payload_data[0] & kTypeMask;
- if (nal_type == kStapA) {
- // Skip the StapA header (StapA nal type + length).
- if (payload_data_length <= kStapAHeaderSize) {
- LOG(LS_ERROR) << "StapA header truncated.";
- return false;
- }
- if (!VerifyStapANaluLengths(nalu_start, nalu_length)) {
- LOG(LS_ERROR) << "StapA packet with incorrect NALU packet lengths.";
- return false;
- }
-
- nal_type = payload_data[kStapAHeaderSize] & kTypeMask;
- nalu_start += kStapAHeaderSize;
- nalu_length -= kStapAHeaderSize;
- h264_header->packetization_type = kH264StapA;
- } else {
- h264_header->packetization_type = kH264SingleNalu;
- }
- h264_header->nalu_type = nal_type;
-
- // We can read resolution out of sps packets.
- if (nal_type == kSps) {
- H264SpsParser parser(nalu_start, nalu_length);
- if (parser.Parse()) {
- parsed_payload->type.Video.width = parser.width();
- parsed_payload->type.Video.height = parser.height();
- }
- }
- switch (nal_type) {
- case kSps:
- case kPps:
- case kSei:
- case kIdr:
- parsed_payload->frame_type = kVideoFrameKey;
- break;
- default:
- parsed_payload->frame_type = kVideoFrameDelta;
- break;
- }
- return true;
-}
-
-bool ParseFuaNalu(RtpDepacketizer::ParsedPayload* parsed_payload,
- const uint8_t* payload_data,
- size_t payload_data_length,
- size_t* offset) {
- if (payload_data_length < kFuAHeaderSize) {
- LOG(LS_ERROR) << "FU-A NAL units truncated.";
- return false;
- }
- uint8_t fnri = payload_data[0] & (kFBit | kNriMask);
- uint8_t original_nal_type = payload_data[1] & kTypeMask;
- bool first_fragment = (payload_data[1] & kSBit) > 0;
-
- uint8_t original_nal_header = fnri | original_nal_type;
- if (first_fragment) {
- *offset = kNalHeaderSize;
- uint8_t* payload = const_cast<uint8_t*>(payload_data + *offset);
- payload[0] = original_nal_header;
- } else {
- *offset = kFuAHeaderSize;
- }
-
- if (original_nal_type == kIdr) {
- parsed_payload->frame_type = kVideoFrameKey;
- } else {
- parsed_payload->frame_type = kVideoFrameDelta;
- }
- parsed_payload->type.Video.width = 0;
- parsed_payload->type.Video.height = 0;
- parsed_payload->type.Video.codec = kRtpVideoH264;
- parsed_payload->type.Video.isFirstPacket = first_fragment;
- RTPVideoHeaderH264* h264_header =
- &parsed_payload->type.Video.codecHeader.H264;
- h264_header->packetization_type = kH264FuA;
- h264_header->nalu_type = original_nal_type;
- return true;
-}
} // namespace
RtpPacketizerH264::RtpPacketizerH264(FrameType frame_type,
size_t max_payload_len)
- : payload_data_(NULL),
- payload_size_(0),
- max_payload_len_(max_payload_len) {
-}
+ : max_payload_len_(max_payload_len) {}
RtpPacketizerH264::~RtpPacketizerH264() {
}
+RtpPacketizerH264::Fragment::Fragment(const uint8_t* buffer, size_t length)
+ : buffer(buffer), length(length) {}
+RtpPacketizerH264::Fragment::Fragment(const Fragment& fragment)
+ : buffer(fragment.buffer), length(fragment.length) {}
+
void RtpPacketizerH264::SetPayloadData(
const uint8_t* payload_data,
size_t payload_size,
const RTPFragmentationHeader* fragmentation) {
- assert(packets_.empty());
- assert(fragmentation);
- payload_data_ = payload_data;
- payload_size_ = payload_size;
- fragmentation_.CopyFrom(*fragmentation);
+ RTC_DCHECK(packets_.empty());
+ RTC_DCHECK(input_fragments_.empty());
+ RTC_DCHECK(fragmentation);
+ for (int i = 0; i < fragmentation->fragmentationVectorSize; ++i) {
+ const uint8_t* buffer =
+ &payload_data[fragmentation->fragmentationOffset[i]];
+ size_t length = fragmentation->fragmentationLength[i];
+
+ bool updated_sps = false;
+ H264::NaluType nalu_type = H264::ParseNaluType(buffer[0]);
+ if (nalu_type == H264::NaluType::kSps) {
+ // Check if stream uses picture order count type 0, and if so rewrite it
+ // to enable faster decoding. Streams in that format incur additional
+ // delay because it allows decode order to differ from render order.
+ // The mechanism used is to rewrite (edit or add) the SPS's VUI to contain
+ // restrictions on the maximum number of reordered pictures. This reduces
+ // latency significantly, though it still adds about a frame of latency to
+ // decoding.
+ // Note that we do this rewriting both here (send side, in order to
+ // protect legacy receive clients) and below in
+ // RtpDepacketizerH264::ParseSingleNalu (receive side, in orderer to
+ // protect us from unknown or legacy send clients).
+
+ // Create temporary RBSP decoded buffer of the payload (exlcuding the
+ // leading nalu type header byte (the SpsParser uses only the payload).
+ std::unique_ptr<rtc::Buffer> rbsp_buffer = H264::ParseRbsp(
+ buffer + H264::kNaluTypeSize, length - H264::kNaluTypeSize);
+ rtc::Optional<SpsParser::SpsState> sps;
+
+ std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer());
+ // Add the type header to the output buffer first, so that the rewriter
+ // can append modified payload on top of that.
+ output_buffer->AppendData(buffer[0]);
+ SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
+ rbsp_buffer->data(), rbsp_buffer->size(), &sps, output_buffer.get());
+
+ switch (result) {
+ case SpsVuiRewriter::ParseResult::kVuiRewritten:
+ input_fragments_.push_back(
+ Fragment(output_buffer->data(), output_buffer->size()));
+ input_fragments_.rbegin()->tmp_buffer = std::move(output_buffer);
+ updated_sps = true;
+ RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
+ SpsValidEvent::kSentSpsRewritten,
+ SpsValidEvent::kSpsRewrittenMax);
+ break;
+ case SpsVuiRewriter::ParseResult::kPocOk:
+ RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
+ SpsValidEvent::kSentSpsPocOk,
+ SpsValidEvent::kSpsRewrittenMax);
+ break;
+ case SpsVuiRewriter::ParseResult::kVuiOk:
+ RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
+ SpsValidEvent::kSentSpsVuiOk,
+ SpsValidEvent::kSpsRewrittenMax);
+ break;
+ case SpsVuiRewriter::ParseResult::kFailure:
+ RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
+ SpsValidEvent::kSentSpsParseFailure,
+ SpsValidEvent::kSpsRewrittenMax);
+ break;
+ }
+ }
+
+ if (!updated_sps)
+ input_fragments_.push_back(Fragment(buffer, length));
+ }
GeneratePackets();
}
void RtpPacketizerH264::GeneratePackets() {
- for (size_t i = 0; i < fragmentation_.fragmentationVectorSize;) {
- size_t fragment_offset = fragmentation_.fragmentationOffset[i];
- size_t fragment_length = fragmentation_.fragmentationLength[i];
- if (fragment_length > max_payload_len_) {
- PacketizeFuA(fragment_offset, fragment_length);
+ for (size_t i = 0; i < input_fragments_.size();) {
+ if (input_fragments_[i].length > max_payload_len_) {
+ PacketizeFuA(i);
++i;
} else {
- i = PacketizeStapA(i, fragment_offset, fragment_length);
+ i = PacketizeStapA(i);
}
}
}
-void RtpPacketizerH264::PacketizeFuA(size_t fragment_offset,
- size_t fragment_length) {
+void RtpPacketizerH264::PacketizeFuA(size_t fragment_index) {
// Fragment payload into packets (FU-A).
// Strip out the original header and leave room for the FU-A header.
- fragment_length -= kNalHeaderSize;
- size_t offset = fragment_offset + kNalHeaderSize;
+ const Fragment& fragment = input_fragments_[fragment_index];
+
+ size_t fragment_length = fragment.length - kNalHeaderSize;
+ size_t offset = kNalHeaderSize;
size_t bytes_available = max_payload_len_ - kFuAHeaderSize;
- size_t fragments =
+ const size_t num_fragments =
(fragment_length + (bytes_available - 1)) / bytes_available;
- size_t avg_size = (fragment_length + fragments - 1) / fragments;
+
+ const size_t avg_size = (fragment_length + num_fragments - 1) / num_fragments;
while (fragment_length > 0) {
size_t packet_length = avg_size;
if (fragment_length < avg_size)
packet_length = fragment_length;
- uint8_t header = payload_data_[fragment_offset];
- packets_.push(Packet(offset,
- packet_length,
- offset - kNalHeaderSize == fragment_offset,
- fragment_length == packet_length,
- false,
- header));
+ packets_.push(PacketUnit(Fragment(fragment.buffer + offset, packet_length),
+ offset - kNalHeaderSize == 0,
+ fragment_length == packet_length, false,
+ fragment.buffer[0]));
offset += packet_length;
fragment_length -= packet_length;
}
+ RTC_CHECK_EQ(0u, fragment_length);
}
-int RtpPacketizerH264::PacketizeStapA(size_t fragment_index,
- size_t fragment_offset,
- size_t fragment_length) {
+size_t RtpPacketizerH264::PacketizeStapA(size_t fragment_index) {
// Aggregate fragments into one packet (STAP-A).
size_t payload_size_left = max_payload_len_;
int aggregated_fragments = 0;
size_t fragment_headers_length = 0;
- assert(payload_size_left >= fragment_length);
- while (payload_size_left >= fragment_length + fragment_headers_length) {
- assert(fragment_length > 0);
- uint8_t header = payload_data_[fragment_offset];
- packets_.push(Packet(fragment_offset,
- fragment_length,
- aggregated_fragments == 0,
- false,
- true,
- header));
- payload_size_left -= fragment_length;
+ const Fragment* fragment = &input_fragments_[fragment_index];
+ RTC_CHECK_GE(payload_size_left, fragment->length);
+ while (payload_size_left >= fragment->length + fragment_headers_length) {
+ RTC_CHECK_GT(fragment->length, 0u);
+ packets_.push(PacketUnit(*fragment, aggregated_fragments == 0, false, true,
+ fragment->buffer[0]));
+ payload_size_left -= fragment->length;
payload_size_left -= fragment_headers_length;
// Next fragment.
++fragment_index;
- if (fragment_index == fragmentation_.fragmentationVectorSize)
+ if (fragment_index == input_fragments_.size())
break;
- fragment_offset = fragmentation_.fragmentationOffset[fragment_index];
- fragment_length = fragmentation_.fragmentationLength[fragment_index];
+ fragment = &input_fragments_[fragment_index];
fragment_headers_length = kLengthFieldSize;
// If we are going to try to aggregate more fragments into this packet
@@ -260,20 +239,21 @@
return false;
}
- Packet packet = packets_.front();
+ PacketUnit packet = packets_.front();
if (packet.first_fragment && packet.last_fragment) {
// Single NAL unit packet.
- *bytes_to_send = packet.size;
- memcpy(buffer, &payload_data_[packet.offset], packet.size);
+ *bytes_to_send = packet.source_fragment.length;
+ memcpy(buffer, packet.source_fragment.buffer, *bytes_to_send);
packets_.pop();
- assert(*bytes_to_send <= max_payload_len_);
+ input_fragments_.pop_front();
+ RTC_CHECK_LE(*bytes_to_send, max_payload_len_);
} else if (packet.aggregated) {
NextAggregatePacket(buffer, bytes_to_send);
- assert(*bytes_to_send <= max_payload_len_);
+ RTC_CHECK_LE(*bytes_to_send, max_payload_len_);
} else {
NextFragmentPacket(buffer, bytes_to_send);
- assert(*bytes_to_send <= max_payload_len_);
+ RTC_CHECK_LE(*bytes_to_send, max_payload_len_);
}
*last_packet = packets_.empty();
return true;
@@ -281,53 +261,54 @@
void RtpPacketizerH264::NextAggregatePacket(uint8_t* buffer,
size_t* bytes_to_send) {
- Packet packet = packets_.front();
- assert(packet.first_fragment);
+ PacketUnit* packet = &packets_.front();
+ RTC_CHECK(packet->first_fragment);
// STAP-A NALU header.
- buffer[0] = (packet.header & (kFBit | kNriMask)) | kStapA;
+ buffer[0] = (packet->header & (kFBit | kNriMask)) | H264::NaluType::kStapA;
int index = kNalHeaderSize;
*bytes_to_send += kNalHeaderSize;
- while (packet.aggregated) {
+ while (packet->aggregated) {
+ const Fragment& fragment = packet->source_fragment;
// Add NAL unit length field.
- ByteWriter<uint16_t>::WriteBigEndian(&buffer[index], packet.size);
+ ByteWriter<uint16_t>::WriteBigEndian(&buffer[index], fragment.length);
index += kLengthFieldSize;
*bytes_to_send += kLengthFieldSize;
// Add NAL unit.
- memcpy(&buffer[index], &payload_data_[packet.offset], packet.size);
- index += packet.size;
- *bytes_to_send += packet.size;
+ memcpy(&buffer[index], fragment.buffer, fragment.length);
+ index += fragment.length;
+ *bytes_to_send += fragment.length;
packets_.pop();
- if (packet.last_fragment)
+ input_fragments_.pop_front();
+ if (packet->last_fragment)
break;
- packet = packets_.front();
+ packet = &packets_.front();
}
- assert(packet.last_fragment);
+ RTC_CHECK(packet->last_fragment);
}
void RtpPacketizerH264::NextFragmentPacket(uint8_t* buffer,
size_t* bytes_to_send) {
- Packet packet = packets_.front();
+ PacketUnit* packet = &packets_.front();
// NAL unit fragmented over multiple packets (FU-A).
// We do not send original NALU header, so it will be replaced by the
// FU indicator header of the first packet.
- uint8_t fu_indicator = (packet.header & (kFBit | kNriMask)) | kFuA;
+ uint8_t fu_indicator =
+ (packet->header & (kFBit | kNriMask)) | H264::NaluType::kFuA;
uint8_t fu_header = 0;
// S | E | R | 5 bit type.
- fu_header |= (packet.first_fragment ? kSBit : 0);
- fu_header |= (packet.last_fragment ? kEBit : 0);
- uint8_t type = packet.header & kTypeMask;
+ fu_header |= (packet->first_fragment ? kSBit : 0);
+ fu_header |= (packet->last_fragment ? kEBit : 0);
+ uint8_t type = packet->header & kTypeMask;
fu_header |= type;
buffer[0] = fu_indicator;
buffer[1] = fu_header;
- if (packet.last_fragment) {
- *bytes_to_send = packet.size + kFuAHeaderSize;
- memcpy(buffer + kFuAHeaderSize, &payload_data_[packet.offset], packet.size);
- } else {
- *bytes_to_send = packet.size + kFuAHeaderSize;
- memcpy(buffer + kFuAHeaderSize, &payload_data_[packet.offset], packet.size);
- }
+ const Fragment& fragment = packet->source_fragment;
+ *bytes_to_send = fragment.length + kFuAHeaderSize;
+ memcpy(buffer + kFuAHeaderSize, fragment.buffer, fragment.length);
+ if (packet->last_fragment)
+ input_fragments_.pop_front();
packets_.pop();
}
@@ -344,32 +325,202 @@
return "RtpPacketizerH264";
}
+RtpDepacketizerH264::RtpDepacketizerH264() : offset_(0), length_(0) {}
+RtpDepacketizerH264::~RtpDepacketizerH264() {}
+
bool RtpDepacketizerH264::Parse(ParsedPayload* parsed_payload,
const uint8_t* payload_data,
size_t payload_data_length) {
- assert(parsed_payload != NULL);
+ RTC_CHECK(parsed_payload != nullptr);
if (payload_data_length == 0) {
LOG(LS_ERROR) << "Empty payload.";
return false;
}
+ offset_ = 0;
+ length_ = payload_data_length;
+ modified_buffer_.reset();
+
uint8_t nal_type = payload_data[0] & kTypeMask;
- size_t offset = 0;
- if (nal_type == kFuA) {
+ if (nal_type == H264::NaluType::kFuA) {
// Fragmented NAL units (FU-A).
- if (!ParseFuaNalu(
- parsed_payload, payload_data, payload_data_length, &offset)) {
+ if (!ParseFuaNalu(parsed_payload, payload_data))
return false;
- }
} else {
// We handle STAP-A and single NALU's the same way here. The jitter buffer
// will depacketize the STAP-A into NAL units later.
- if (!ParseSingleNalu(parsed_payload, payload_data, payload_data_length))
+ // TODO(sprang): Parse STAP-A offsets here and store in fragmentation vec.
+ if (!ProcessStapAOrSingleNalu(parsed_payload, payload_data))
return false;
}
- parsed_payload->payload = payload_data + offset;
- parsed_payload->payload_length = payload_data_length - offset;
+ const uint8_t* payload =
+ modified_buffer_ ? modified_buffer_->data() : payload_data;
+
+ parsed_payload->payload = payload + offset_;
+ parsed_payload->payload_length = length_;
return true;
}
+
+bool RtpDepacketizerH264::ProcessStapAOrSingleNalu(
+ ParsedPayload* parsed_payload,
+ const uint8_t* payload_data) {
+ parsed_payload->type.Video.width = 0;
+ parsed_payload->type.Video.height = 0;
+ parsed_payload->type.Video.codec = kRtpVideoH264;
+ parsed_payload->type.Video.isFirstPacket = true;
+ RTPVideoHeaderH264* h264_header =
+ &parsed_payload->type.Video.codecHeader.H264;
+
+ const uint8_t* nalu_start = payload_data + kNalHeaderSize;
+ const size_t nalu_length = length_ - kNalHeaderSize;
+ uint8_t nal_type = payload_data[0] & kTypeMask;
+ std::vector<size_t> nalu_start_offsets;
+ if (nal_type == H264::NaluType::kStapA) {
+ // Skip the StapA header (StapA NAL type + length).
+ if (length_ <= kStapAHeaderSize) {
+ LOG(LS_ERROR) << "StapA header truncated.";
+ return false;
+ }
+
+ if (!ParseStapAStartOffsets(nalu_start, nalu_length, &nalu_start_offsets)) {
+ LOG(LS_ERROR) << "StapA packet with incorrect NALU packet lengths.";
+ return false;
+ }
+
+ h264_header->packetization_type = kH264StapA;
+ nal_type = payload_data[kStapAHeaderSize] & kTypeMask;
+ } else {
+ h264_header->packetization_type = kH264SingleNalu;
+ nalu_start_offsets.push_back(0);
+ }
+ h264_header->nalu_type = nal_type;
+ parsed_payload->frame_type = kVideoFrameDelta;
+
+ nalu_start_offsets.push_back(length_ + kLengthFieldSize); // End offset.
+ for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) {
+ size_t start_offset = nalu_start_offsets[i];
+ // End offset is actually start offset for next unit, excluding length field
+ // so remove that from this units length.
+ size_t end_offset = nalu_start_offsets[i + 1] - kLengthFieldSize;
+ nal_type = payload_data[start_offset] & kTypeMask;
+ start_offset += H264::kNaluTypeSize;
+
+ if (nal_type == H264::NaluType::kSps) {
+ // Check if VUI is present in SPS and if it needs to be modified to avoid
+ // excessive decoder latency.
+
+ // Copy any previous data first (likely just the first header).
+ std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer());
+ if (start_offset)
+ output_buffer->AppendData(payload_data, start_offset);
+
+ // RBSP decode of payload data.
+ std::unique_ptr<rtc::Buffer> rbsp_buffer = H264::ParseRbsp(
+ &payload_data[start_offset], end_offset - start_offset);
+ rtc::Optional<SpsParser::SpsState> sps;
+
+ SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
+ rbsp_buffer->data(), rbsp_buffer->size(), &sps, output_buffer.get());
+ switch (result) {
+ case SpsVuiRewriter::ParseResult::kVuiRewritten:
+ if (modified_buffer_) {
+ LOG(LS_WARNING) << "More than one H264 SPS NAL units needing "
+ "rewriting found within a single STAP-A packet. "
+ "Keeping the first and rewriting the last.";
+ }
+
+ // Rewrite length field to new SPS size.
+ if (h264_header->packetization_type == kH264StapA) {
+ size_t length_field_offset =
+ start_offset - (H264::kNaluTypeSize + kLengthFieldSize);
+ // Stap-A Length includes payload data and type header.
+ size_t rewritten_size =
+ output_buffer->size() - start_offset + H264::kNaluTypeSize;
+ ByteWriter<uint16_t>::WriteBigEndian(
+ &(*output_buffer)[length_field_offset], rewritten_size);
+ }
+
+ // Append rest of packet.
+ output_buffer->AppendData(&payload_data[end_offset],
+ nalu_length + kNalHeaderSize - end_offset);
+
+ modified_buffer_ = std::move(output_buffer);
+ length_ = modified_buffer_->size();
+
+ RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
+ SpsValidEvent::kReceivedSpsRewritten,
+ SpsValidEvent::kSpsRewrittenMax);
+ break;
+ case SpsVuiRewriter::ParseResult::kPocOk:
+ RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
+ SpsValidEvent::kReceivedSpsPocOk,
+ SpsValidEvent::kSpsRewrittenMax);
+ break;
+ case SpsVuiRewriter::ParseResult::kVuiOk:
+ RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
+ SpsValidEvent::kReceivedSpsVuiOk,
+ SpsValidEvent::kSpsRewrittenMax);
+ break;
+ case SpsVuiRewriter::ParseResult::kFailure:
+ RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
+ SpsValidEvent::kReceivedSpsParseFailure,
+ SpsValidEvent::kSpsRewrittenMax);
+ break;
+ }
+
+ if (sps) {
+ parsed_payload->type.Video.width = sps->width;
+ parsed_payload->type.Video.height = sps->height;
+ }
+ parsed_payload->frame_type = kVideoFrameKey;
+ } else if (nal_type == H264::NaluType::kPps ||
+ nal_type == H264::NaluType::kSei ||
+ nal_type == H264::NaluType::kIdr) {
+ parsed_payload->frame_type = kVideoFrameKey;
+ }
+ }
+
+ return true;
+}
+
+bool RtpDepacketizerH264::ParseFuaNalu(
+ RtpDepacketizer::ParsedPayload* parsed_payload,
+ const uint8_t* payload_data) {
+ if (length_ < kFuAHeaderSize) {
+ LOG(LS_ERROR) << "FU-A NAL units truncated.";
+ return false;
+ }
+ uint8_t fnri = payload_data[0] & (kFBit | kNriMask);
+ uint8_t original_nal_type = payload_data[1] & kTypeMask;
+ bool first_fragment = (payload_data[1] & kSBit) > 0;
+
+ if (first_fragment) {
+ offset_ = 0;
+ length_ -= kNalHeaderSize;
+ uint8_t original_nal_header = fnri | original_nal_type;
+ modified_buffer_.reset(new rtc::Buffer());
+ modified_buffer_->AppendData(payload_data + kNalHeaderSize, length_);
+ (*modified_buffer_)[0] = original_nal_header;
+ } else {
+ offset_ = kFuAHeaderSize;
+ length_ -= kFuAHeaderSize;
+ }
+
+ if (original_nal_type == H264::NaluType::kIdr) {
+ parsed_payload->frame_type = kVideoFrameKey;
+ } else {
+ parsed_payload->frame_type = kVideoFrameDelta;
+ }
+ parsed_payload->type.Video.width = 0;
+ parsed_payload->type.Video.height = 0;
+ parsed_payload->type.Video.codec = kRtpVideoH264;
+ parsed_payload->type.Video.isFirstPacket = first_fragment;
+ RTPVideoHeaderH264* h264_header =
+ &parsed_payload->type.Video.codecHeader.H264;
+ h264_header->packetization_type = kH264FuA;
+ h264_header->nalu_type = original_nal_type;
+ return true;
+}
+
} // namespace webrtc
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.h b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.h
index 88258df..9cf3150 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.h
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.h
@@ -11,9 +11,11 @@
#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H264_H_
#define WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H264_H_
+#include <deque>
#include <queue>
#include <string>
+#include "webrtc/base/buffer.h"
#include "webrtc/base/constructormagic.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format.h"
@@ -49,42 +51,50 @@
std::string ToString() override;
private:
- struct Packet {
- Packet(size_t offset,
- size_t size,
- bool first_fragment,
- bool last_fragment,
- bool aggregated,
- uint8_t header)
- : offset(offset),
- size(size),
+ // Input fragments (NAL units), with an optionally owned temporary buffer,
+ // used in case the fragment gets modified.
+ struct Fragment {
+ Fragment(const uint8_t* buffer, size_t length);
+ explicit Fragment(const Fragment& fragment);
+ const uint8_t* buffer = nullptr;
+ size_t length = 0;
+ std::unique_ptr<rtc::Buffer> tmp_buffer;
+ };
+
+ // A packet unit (H264 packet), to be put into an RTP packet:
+ // If a NAL unit is too large for an RTP packet, this packet unit will
+ // represent a FU-A packet of a single fragment of the NAL unit.
+ // If a NAL unit is small enough to fit within a single RTP packet, this
+ // packet unit may represent a single NAL unit or a STAP-A packet, of which
+ // there may be multiple in a single RTP packet (if so, aggregated = true).
+ struct PacketUnit {
+ PacketUnit(const Fragment& source_fragment,
+ bool first_fragment,
+ bool last_fragment,
+ bool aggregated,
+ uint8_t header)
+ : source_fragment(source_fragment),
first_fragment(first_fragment),
last_fragment(last_fragment),
aggregated(aggregated),
header(header) {}
- size_t offset;
- size_t size;
+ const Fragment source_fragment;
bool first_fragment;
bool last_fragment;
bool aggregated;
uint8_t header;
};
- typedef std::queue<Packet> PacketQueue;
void GeneratePackets();
- void PacketizeFuA(size_t fragment_offset, size_t fragment_length);
- int PacketizeStapA(size_t fragment_index,
- size_t fragment_offset,
- size_t fragment_length);
+ void PacketizeFuA(size_t fragment_index);
+ size_t PacketizeStapA(size_t fragment_index);
void NextAggregatePacket(uint8_t* buffer, size_t* bytes_to_send);
void NextFragmentPacket(uint8_t* buffer, size_t* bytes_to_send);
- const uint8_t* payload_data_;
- size_t payload_size_;
const size_t max_payload_len_;
- RTPFragmentationHeader fragmentation_;
- PacketQueue packets_;
+ std::deque<Fragment> input_fragments_;
+ std::queue<PacketUnit> packets_;
RTC_DISALLOW_COPY_AND_ASSIGN(RtpPacketizerH264);
};
@@ -92,11 +102,22 @@
// Depacketizer for H264.
class RtpDepacketizerH264 : public RtpDepacketizer {
public:
- virtual ~RtpDepacketizerH264() {}
+ RtpDepacketizerH264();
+ virtual ~RtpDepacketizerH264();
bool Parse(ParsedPayload* parsed_payload,
const uint8_t* payload_data,
size_t payload_data_length) override;
+
+ private:
+ bool ParseFuaNalu(RtpDepacketizer::ParsedPayload* parsed_payload,
+ const uint8_t* payload_data);
+ bool ProcessStapAOrSingleNalu(RtpDepacketizer::ParsedPayload* parsed_payload,
+ const uint8_t* payload_data);
+
+ size_t offset_;
+ size_t length_;
+ std::unique_ptr<rtc::Buffer> modified_buffer_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H264_H_
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc
index 12c2db5..d4cffae 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc
@@ -15,6 +15,8 @@
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/modules/rtp_rtcp/mocks/mock_rtp_rtcp.h"
+#include "webrtc/modules/rtp_rtcp/source/byte_io.h"
+#include "webrtc/common_video/h264/h264_common.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format.h"
namespace webrtc {
@@ -379,6 +381,106 @@
sizeof(kExpectedPayloadSizes) / sizeof(size_t)));
}
+namespace {
+const uint8_t kStartSequence[] = {0x00, 0x00, 0x00, 0x01};
+const uint8_t kOriginalSps[] = {kSps, 0x00, 0x00, 0x03, 0x03,
+ 0xF4, 0x05, 0x03, 0xC7, 0xC0};
+const uint8_t kRewrittenSps[] = {kSps, 0x00, 0x00, 0x03, 0x03, 0xF4, 0x05, 0x03,
+ 0xC7, 0xE0, 0x1B, 0x41, 0x10, 0x8D, 0x00};
+const uint8_t kIdrOne[] = {kIdr, 0xFF, 0x00, 0x00, 0x04};
+const uint8_t kIdrTwo[] = {kIdr, 0xFF, 0x00, 0x11};
+}
+
+class RtpPacketizerH264TestSpsRewriting : public ::testing::Test {
+ public:
+ void SetUp() override {
+ fragmentation_header_.VerifyAndAllocateFragmentationHeader(3);
+ fragmentation_header_.fragmentationVectorSize = 3;
+ in_buffer_.AppendData(kStartSequence);
+
+ fragmentation_header_.fragmentationOffset[0] = in_buffer_.size();
+ fragmentation_header_.fragmentationLength[0] = sizeof(kOriginalSps);
+ in_buffer_.AppendData(kOriginalSps);
+
+ fragmentation_header_.fragmentationOffset[1] = in_buffer_.size();
+ fragmentation_header_.fragmentationLength[1] = sizeof(kIdrOne);
+ in_buffer_.AppendData(kIdrOne);
+
+ fragmentation_header_.fragmentationOffset[2] = in_buffer_.size();
+ fragmentation_header_.fragmentationLength[2] = sizeof(kIdrTwo);
+ in_buffer_.AppendData(kIdrTwo);
+ }
+
+ protected:
+ rtc::Buffer in_buffer_;
+ RTPFragmentationHeader fragmentation_header_;
+ std::unique_ptr<RtpPacketizer> packetizer_;
+};
+
+TEST_F(RtpPacketizerH264TestSpsRewriting, FuASps) {
+ const size_t kHeaderOverhead = kFuAHeaderSize + 1;
+
+ // Set size to fragment SPS into two FU-A packets.
+ packetizer_.reset(RtpPacketizer::Create(
+ kRtpVideoH264, sizeof(kOriginalSps) - 2 + kHeaderOverhead, nullptr,
+ kEmptyFrame));
+
+ packetizer_->SetPayloadData(in_buffer_.data(), in_buffer_.size(),
+ &fragmentation_header_);
+
+ bool last_packet = true;
+ uint8_t buffer[sizeof(kOriginalSps) + kHeaderOverhead] = {};
+ size_t num_bytes = 0;
+
+ EXPECT_TRUE(packetizer_->NextPacket(buffer, &num_bytes, &last_packet));
+ size_t offset = H264::kNaluTypeSize;
+ size_t length = num_bytes - kFuAHeaderSize;
+ std::vector<uint8_t> expected_payload(&kRewrittenSps[offset],
+ &kRewrittenSps[offset + length]);
+ EXPECT_THAT(expected_payload,
+ ::testing::ElementsAreArray(&buffer[kFuAHeaderSize], length));
+ offset += length;
+
+ EXPECT_TRUE(packetizer_->NextPacket(buffer, &num_bytes, &last_packet));
+ length = num_bytes - kFuAHeaderSize;
+ expected_payload = std::vector<uint8_t>(&kRewrittenSps[offset],
+ &kRewrittenSps[offset + length]);
+ EXPECT_THAT(expected_payload,
+ ::testing::ElementsAreArray(&buffer[kFuAHeaderSize], length));
+ offset += length;
+
+ EXPECT_EQ(offset, sizeof(kRewrittenSps));
+}
+
+TEST_F(RtpPacketizerH264TestSpsRewriting, StapASps) {
+ const size_t kHeaderOverhead = kFuAHeaderSize + 1;
+ const size_t kExpectedTotalSize = H264::kNaluTypeSize + // Stap-A type.
+ sizeof(kRewrittenSps) + sizeof(kIdrOne) +
+ sizeof(kIdrTwo) + (kLengthFieldLength * 3);
+
+ // Set size to include SPS and the rest of the packets in a Stap-A package.
+ packetizer_.reset(RtpPacketizer::Create(kRtpVideoH264,
+ kExpectedTotalSize + kHeaderOverhead,
+ nullptr, kEmptyFrame));
+
+ packetizer_->SetPayloadData(in_buffer_.data(), in_buffer_.size(),
+ &fragmentation_header_);
+
+ bool last_packet = true;
+ uint8_t buffer[kExpectedTotalSize + kHeaderOverhead] = {};
+ size_t num_bytes = 0;
+
+ EXPECT_TRUE(packetizer_->NextPacket(buffer, &num_bytes, &last_packet));
+ EXPECT_EQ(kExpectedTotalSize, num_bytes);
+
+ std::vector<uint8_t> expected_payload(kRewrittenSps,
+ &kRewrittenSps[sizeof(kRewrittenSps)]);
+ EXPECT_THAT(expected_payload,
+ ::testing::ElementsAreArray(
+ &buffer[H264::kNaluTypeSize + kLengthFieldLength],
+ sizeof(kRewrittenSps)));
+}
+
class RtpDepacketizerH264Test : public ::testing::Test {
protected:
RtpDepacketizerH264Test()
@@ -414,7 +516,7 @@
TEST_F(RtpDepacketizerH264Test, TestSingleNaluSpsWithResolution) {
uint8_t packet[] = {kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50,
0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0,
- 0x00, 0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60};
+ 0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25};
RtpDepacketizer::ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
@@ -449,11 +551,12 @@
TEST_F(RtpDepacketizerH264Test, TestStapANaluSpsWithResolution) {
uint8_t packet[] = {kStapA, // F=0, NRI=0, Type=24.
// Length (2 bytes), nal header, payload.
- 0, 24, kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9,
- 0x40, 0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03,
- 0x00, 0xC0, 0x00, 0x00, 0x2A, 0xE0, 0xF1, 0x83,
- 0x19, 0x60, 0, 0x03, kIdr, 0xFF, 0x00, 0,
- 0x04, kIdr, 0xFF, 0x00, 0x11};
+ 0x00, 0x19, kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40,
+ 0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0,
+ 0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25, 0x80,
+ 0x00, 0x03, kIdr, 0xFF, 0x00, 0x00, 0x04, kIdr, 0xFF,
+ 0x00, 0x11};
+
RtpDepacketizer::ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
@@ -466,6 +569,92 @@
EXPECT_EQ(720u, payload.type.Video.height);
}
+TEST_F(RtpDepacketizerH264Test, DepacketizeWithRewriting) {
+ rtc::Buffer in_buffer;
+ rtc::Buffer out_buffer;
+
+ uint8_t kHeader[2] = {kStapA};
+ in_buffer.AppendData(kHeader, 1);
+ out_buffer.AppendData(kHeader, 1);
+
+ ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kOriginalSps));
+ in_buffer.AppendData(kHeader, 2);
+ in_buffer.AppendData(kOriginalSps);
+ ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kRewrittenSps));
+ out_buffer.AppendData(kHeader, 2);
+ out_buffer.AppendData(kRewrittenSps);
+
+ ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrOne));
+ in_buffer.AppendData(kHeader, 2);
+ in_buffer.AppendData(kIdrOne);
+ out_buffer.AppendData(kHeader, 2);
+ out_buffer.AppendData(kIdrOne);
+
+ ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrTwo));
+ in_buffer.AppendData(kHeader, 2);
+ in_buffer.AppendData(kIdrTwo);
+ out_buffer.AppendData(kHeader, 2);
+ out_buffer.AppendData(kIdrTwo);
+
+ RtpDepacketizer::ParsedPayload payload;
+ EXPECT_TRUE(
+ depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size()));
+
+ std::vector<uint8_t> expected_packet_payload(
+ out_buffer.data(), &out_buffer.data()[out_buffer.size()]);
+
+ EXPECT_THAT(
+ expected_packet_payload,
+ ::testing::ElementsAreArray(payload.payload, payload.payload_length));
+}
+
+TEST_F(RtpDepacketizerH264Test, DepacketizeWithDoubleRewriting) {
+ rtc::Buffer in_buffer;
+ rtc::Buffer out_buffer;
+
+ uint8_t kHeader[2] = {kStapA};
+ in_buffer.AppendData(kHeader, 1);
+ out_buffer.AppendData(kHeader, 1);
+
+ // First SPS will be kept...
+ ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kOriginalSps));
+ in_buffer.AppendData(kHeader, 2);
+ in_buffer.AppendData(kOriginalSps);
+ out_buffer.AppendData(kHeader, 2);
+ out_buffer.AppendData(kOriginalSps);
+
+ // ...only the second one will be rewritten.
+ ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kOriginalSps));
+ in_buffer.AppendData(kHeader, 2);
+ in_buffer.AppendData(kOriginalSps);
+ ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kRewrittenSps));
+ out_buffer.AppendData(kHeader, 2);
+ out_buffer.AppendData(kRewrittenSps);
+
+ ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrOne));
+ in_buffer.AppendData(kHeader, 2);
+ in_buffer.AppendData(kIdrOne);
+ out_buffer.AppendData(kHeader, 2);
+ out_buffer.AppendData(kIdrOne);
+
+ ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrTwo));
+ in_buffer.AppendData(kHeader, 2);
+ in_buffer.AppendData(kIdrTwo);
+ out_buffer.AppendData(kHeader, 2);
+ out_buffer.AppendData(kIdrTwo);
+
+ RtpDepacketizer::ParsedPayload payload;
+ EXPECT_TRUE(
+ depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size()));
+
+ std::vector<uint8_t> expected_packet_payload(
+ out_buffer.data(), &out_buffer.data()[out_buffer.size()]);
+
+ EXPECT_THAT(
+ expected_packet_payload,
+ ::testing::ElementsAreArray(payload.payload, payload.payload_length));
+}
+
TEST_F(RtpDepacketizerH264Test, TestStapADelta) {
uint8_t packet[16] = {kStapA, // F=0, NRI=0, Type=24.
// Length, nal header, payload.
diff --git a/webrtc/modules/video_coding/utility/h264_bitstream_parser.cc b/webrtc/modules/video_coding/utility/h264_bitstream_parser.cc
index 9102a6a..97cd003 100644
--- a/webrtc/modules/video_coding/utility/h264_bitstream_parser.cc
+++ b/webrtc/modules/video_coding/utility/h264_bitstream_parser.cc
@@ -15,74 +15,11 @@
#include "webrtc/base/bitbuffer.h"
#include "webrtc/base/bytebuffer.h"
#include "webrtc/base/checks.h"
+
+#include "webrtc/common_video/h264/h264_common.h"
#include "webrtc/base/logging.h"
namespace webrtc {
-namespace {
-// The size of a NALU header {0 0 0 1}.
-static const size_t kNaluHeaderSize = 4;
-
-// The size of a NALU header plus the type byte.
-static const size_t kNaluHeaderAndTypeSize = kNaluHeaderSize + 1;
-
-// The NALU type.
-static const uint8_t kNaluSps = 0x7;
-static const uint8_t kNaluPps = 0x8;
-static const uint8_t kNaluIdr = 0x5;
-static const uint8_t kNaluTypeMask = 0x1F;
-
-static const uint8_t kSliceTypeP = 0x0;
-static const uint8_t kSliceTypeB = 0x1;
-static const uint8_t kSliceTypeSp = 0x3;
-
-// Returns a vector of the NALU start sequences (0 0 0 1) in the given buffer.
-std::vector<size_t> FindNaluStartSequences(const uint8_t* buffer,
- size_t buffer_size) {
- std::vector<size_t> sequences;
- // This is sorta like Boyer-Moore, but with only the first optimization step:
- // given a 4-byte sequence we're looking at, if the 4th byte isn't 1 or 0,
- // skip ahead to the next 4-byte sequence. 0s and 1s are relatively rare, so
- // this will skip the majority of reads/checks.
- const uint8_t* end = buffer + buffer_size - 4;
- for (const uint8_t* head = buffer; head < end;) {
- if (head[3] > 1) {
- head += 4;
- } else if (head[3] == 1 && head[2] == 0 && head[1] == 0 && head[0] == 0) {
- sequences.push_back(static_cast<size_t>(head - buffer));
- head += 4;
- } else {
- head++;
- }
- }
-
- return sequences;
-}
-} // namespace
-
-// Parses RBSP from source bytes. Removes emulation bytes, but leaves the
-// rbsp_trailing_bits() in the stream, since none of the parsing reads all the
-// way to the end of a parsed RBSP sequence. When writing, that means the
-// rbsp_trailing_bits() should be preserved and don't need to be restored (i.e.
-// the rbsp_stop_one_bit, which is just a 1, then zero padded), and alignment
-// should "just work".
-// TODO(pbos): Make parsing RBSP something that can be integrated into BitBuffer
-// so we don't have to copy the entire frames when only interested in the
-// headers.
-rtc::ByteBufferWriter* ParseRbsp(const uint8_t* bytes, size_t length) {
- // Copied from webrtc::H264SpsParser::Parse.
- rtc::ByteBufferWriter* rbsp_buffer = new rtc::ByteBufferWriter();
- for (size_t i = 0; i < length;) {
- if (length - i >= 3 && bytes[i] == 0 && bytes[i + 1] == 0 &&
- bytes[i + 2] == 3) {
- rbsp_buffer->WriteBytes(reinterpret_cast<const char*>(bytes) + i, 2);
- i += 3;
- } else {
- rbsp_buffer->WriteBytes(reinterpret_cast<const char*>(bytes) + i, 1);
- i++;
- }
- }
- return rbsp_buffer;
-}
#define RETURN_FALSE_ON_FAIL(x) \
if (!(x)) { \
@@ -90,242 +27,23 @@
return false; \
}
-H264BitstreamParser::PpsState::PpsState() {}
-
-H264BitstreamParser::SpsState::SpsState() {}
-
-// These functions are similar to webrtc::H264SpsParser::Parse, and based on the
-// same version of the H.264 standard. You can find it here:
-// http://www.itu.int/rec/T-REC-H.264
-bool H264BitstreamParser::ParseSpsNalu(const uint8_t* sps, size_t length) {
- // Reset SPS state.
- sps_ = SpsState();
- sps_parsed_ = false;
- // Parse out the SPS RBSP. It should be small, so it's ok that we create a
- // copy. We'll eventually write this back.
- std::unique_ptr<rtc::ByteBufferWriter> sps_rbsp(
- ParseRbsp(sps + kNaluHeaderAndTypeSize, length - kNaluHeaderAndTypeSize));
- rtc::BitBuffer sps_parser(reinterpret_cast<const uint8_t*>(sps_rbsp->Data()),
- sps_rbsp->Length());
-
- uint8_t byte_tmp;
- uint32_t golomb_tmp;
- uint32_t bits_tmp;
-
- // profile_idc: u(8).
- uint8_t profile_idc;
- RETURN_FALSE_ON_FAIL(sps_parser.ReadUInt8(&profile_idc));
- // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits
- // 1 bit each for the flags + 2 bits = 8 bits = 1 byte.
- RETURN_FALSE_ON_FAIL(sps_parser.ReadUInt8(&byte_tmp));
- // level_idc: u(8)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadUInt8(&byte_tmp));
- // seq_parameter_set_id: ue(v)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
- sps_.separate_colour_plane_flag = 0;
- // See if profile_idc has chroma format information.
- if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
- profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
- profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
- profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
- // chroma_format_idc: ue(v)
- uint32_t chroma_format_idc;
- RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&chroma_format_idc));
- if (chroma_format_idc == 3) {
- // separate_colour_plane_flag: u(1)
- RETURN_FALSE_ON_FAIL(
- sps_parser.ReadBits(&sps_.separate_colour_plane_flag, 1));
- }
- // bit_depth_luma_minus8: ue(v)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
- // bit_depth_chroma_minus8: ue(v)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
- // qpprime_y_zero_transform_bypass_flag: u(1)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadBits(&bits_tmp, 1));
- // seq_scaling_matrix_present_flag: u(1)
- uint32_t seq_scaling_matrix_present_flag;
- RETURN_FALSE_ON_FAIL(
- sps_parser.ReadBits(&seq_scaling_matrix_present_flag, 1));
- if (seq_scaling_matrix_present_flag) {
- // seq_scaling_list_present_flags. Either 8 or 12, depending on
- // chroma_format_idc.
- uint32_t seq_scaling_list_present_flags;
- if (chroma_format_idc != 3) {
- RETURN_FALSE_ON_FAIL(
- sps_parser.ReadBits(&seq_scaling_list_present_flags, 8));
- } else {
- RETURN_FALSE_ON_FAIL(
- sps_parser.ReadBits(&seq_scaling_list_present_flags, 12));
- }
- // TODO(pbos): Support parsing scaling lists if they're seen in practice.
- RTC_CHECK(seq_scaling_list_present_flags == 0)
- << "SPS contains scaling lists, which are unsupported.";
- }
- }
- // log2_max_frame_num_minus4: ue(v)
- RETURN_FALSE_ON_FAIL(
- sps_parser.ReadExponentialGolomb(&sps_.log2_max_frame_num_minus4));
- // pic_order_cnt_type: ue(v)
- RETURN_FALSE_ON_FAIL(
- sps_parser.ReadExponentialGolomb(&sps_.pic_order_cnt_type));
-
- if (sps_.pic_order_cnt_type == 0) {
- // log2_max_pic_order_cnt_lsb_minus4: ue(v)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(
- &sps_.log2_max_pic_order_cnt_lsb_minus4));
- } else if (sps_.pic_order_cnt_type == 1) {
- // delta_pic_order_always_zero_flag: u(1)
- RETURN_FALSE_ON_FAIL(
- sps_parser.ReadBits(&sps_.delta_pic_order_always_zero_flag, 1));
- // offset_for_non_ref_pic: se(v)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
- // offset_for_top_to_bottom_field: se(v)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
- uint32_t num_ref_frames_in_pic_order_cnt_cycle;
- // num_ref_frames_in_pic_order_cnt_cycle: ue(v)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(
- &num_ref_frames_in_pic_order_cnt_cycle));
- for (uint32_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) {
- // offset_for_ref_frame[i]: se(v)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
- }
- }
- // max_num_ref_frames: ue(v)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
- // gaps_in_frame_num_value_allowed_flag: u(1)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadBits(&bits_tmp, 1));
- // pic_width_in_mbs_minus1: ue(v)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
- // pic_height_in_map_units_minus1: ue(v)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
- // frame_mbs_only_flag: u(1)
- RETURN_FALSE_ON_FAIL(sps_parser.ReadBits(&sps_.frame_mbs_only_flag, 1));
- sps_parsed_ = true;
- return true;
-}
-
-bool H264BitstreamParser::ParsePpsNalu(const uint8_t* pps, size_t length) {
- RTC_CHECK(sps_parsed_);
- // We're starting a new stream, so reset picture type rewriting values.
- pps_ = PpsState();
- pps_parsed_ = false;
- std::unique_ptr<rtc::ByteBufferWriter> buffer(
- ParseRbsp(pps + kNaluHeaderAndTypeSize, length - kNaluHeaderAndTypeSize));
- rtc::BitBuffer parser(reinterpret_cast<const uint8_t*>(buffer->Data()),
- buffer->Length());
-
- uint32_t bits_tmp;
- uint32_t golomb_ignored;
- // pic_parameter_set_id: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // seq_parameter_set_id: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // entropy_coding_mode_flag: u(1)
- uint32_t entropy_coding_mode_flag;
- RETURN_FALSE_ON_FAIL(parser.ReadBits(&entropy_coding_mode_flag, 1));
- // TODO(pbos): Implement CABAC support if spotted in the wild.
- RTC_CHECK(entropy_coding_mode_flag == 0)
- << "Don't know how to parse CABAC streams.";
- // bottom_field_pic_order_in_frame_present_flag: u(1)
- uint32_t bottom_field_pic_order_in_frame_present_flag;
- RETURN_FALSE_ON_FAIL(
- parser.ReadBits(&bottom_field_pic_order_in_frame_present_flag, 1));
- pps_.bottom_field_pic_order_in_frame_present_flag =
- bottom_field_pic_order_in_frame_present_flag != 0;
-
- // num_slice_groups_minus1: ue(v)
- uint32_t num_slice_groups_minus1;
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&num_slice_groups_minus1));
- if (num_slice_groups_minus1 > 0) {
- uint32_t slice_group_map_type;
- // slice_group_map_type: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&slice_group_map_type));
- if (slice_group_map_type == 0) {
- for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1;
- ++i_group) {
- // run_length_minus1[iGroup]: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- }
- } else if (slice_group_map_type == 2) {
- for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1;
- ++i_group) {
- // top_left[iGroup]: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // bottom_right[iGroup]: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- }
- } else if (slice_group_map_type == 3 || slice_group_map_type == 4 ||
- slice_group_map_type == 5) {
- // slice_group_change_direction_flag: u(1)
- RETURN_FALSE_ON_FAIL(parser.ReadBits(&bits_tmp, 1));
- // slice_group_change_rate_minus1: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- } else if (slice_group_map_type == 6) {
- // pic_size_in_map_units_minus1: ue(v)
- uint32_t pic_size_in_map_units_minus1;
- RETURN_FALSE_ON_FAIL(
- parser.ReadExponentialGolomb(&pic_size_in_map_units_minus1));
- uint32_t slice_group_id_bits = 0;
- uint32_t num_slice_groups = num_slice_groups_minus1 + 1;
- // If num_slice_groups is not a power of two an additional bit is required
- // to account for the ceil() of log2() below.
- if ((num_slice_groups & (num_slice_groups - 1)) != 0)
- ++slice_group_id_bits;
- while (num_slice_groups > 0) {
- num_slice_groups >>= 1;
- ++slice_group_id_bits;
- }
- for (uint32_t i = 0; i <= pic_size_in_map_units_minus1; i++) {
- // slice_group_id[i]: u(v)
- // Represented by ceil(log2(num_slice_groups_minus1 + 1)) bits.
- RETURN_FALSE_ON_FAIL(parser.ReadBits(&bits_tmp, slice_group_id_bits));
- }
- }
- }
- // num_ref_idx_l0_default_active_minus1: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // num_ref_idx_l1_default_active_minus1: ue(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // weighted_pred_flag: u(1)
- uint32_t weighted_pred_flag;
- RETURN_FALSE_ON_FAIL(parser.ReadBits(&weighted_pred_flag, 1));
- pps_.weighted_pred_flag = weighted_pred_flag != 0;
- // weighted_bipred_idc: u(2)
- RETURN_FALSE_ON_FAIL(parser.ReadBits(&pps_.weighted_bipred_idc, 2));
-
- // pic_init_qp_minus26: se(v)
- RETURN_FALSE_ON_FAIL(
- parser.ReadSignedExponentialGolomb(&pps_.pic_init_qp_minus26));
- // pic_init_qs_minus26: se(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // chroma_qp_index_offset: se(v)
- RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
- // deblocking_filter_control_present_flag: u(1)
- // constrained_intra_pred_flag: u(1)
- RETURN_FALSE_ON_FAIL(parser.ReadBits(&bits_tmp, 2));
- // redundant_pic_cnt_present_flag: u(1)
- RETURN_FALSE_ON_FAIL(
- parser.ReadBits(&pps_.redundant_pic_cnt_present_flag, 1));
-
- pps_parsed_ = true;
- return true;
-}
+H264BitstreamParser::H264BitstreamParser() {}
+H264BitstreamParser::~H264BitstreamParser() {}
bool H264BitstreamParser::ParseNonParameterSetNalu(const uint8_t* source,
size_t source_length,
uint8_t nalu_type) {
- RTC_CHECK(sps_parsed_);
- RTC_CHECK(pps_parsed_);
- last_slice_qp_delta_parsed_ = false;
- std::unique_ptr<rtc::ByteBufferWriter> slice_rbsp(ParseRbsp(
- source + kNaluHeaderAndTypeSize, source_length - kNaluHeaderAndTypeSize));
- rtc::BitBuffer slice_reader(
- reinterpret_cast<const uint8_t*>(slice_rbsp->Data()),
- slice_rbsp->Length());
+ RTC_CHECK(sps_);
+ RTC_CHECK(pps_);
+ last_slice_qp_delta_ = rtc::Optional<int32_t>();
+ std::unique_ptr<rtc::Buffer> slice_rbsp(
+ H264::ParseRbsp(source, source_length));
+ rtc::BitBuffer slice_reader(slice_rbsp->data() + H264::kNaluTypeSize,
+ slice_rbsp->size() - H264::kNaluTypeSize);
// Check to see if this is an IDR slice, which has an extra field to parse
// out.
- bool is_idr = (source[kNaluHeaderSize] & 0x0F) == kNaluIdr;
- uint8_t nal_ref_idc = (source[kNaluHeaderSize] & 0x60) >> 5;
+ bool is_idr = (source[0] & 0x0F) == H264::NaluType::kIdr;
+ uint8_t nal_ref_idc = (source[0] & 0x60) >> 5;
uint32_t golomb_tmp;
uint32_t bits_tmp;
@@ -340,16 +58,16 @@
slice_type %= 5;
// pic_parameter_set_id: ue(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
- if (sps_.separate_colour_plane_flag == 1) {
+ if (sps_->separate_colour_plane_flag == 1) {
// colour_plane_id
RETURN_FALSE_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 2));
}
// frame_num: u(v)
// Represented by log2_max_frame_num_minus4 + 4 bits.
RETURN_FALSE_ON_FAIL(
- slice_reader.ReadBits(&bits_tmp, sps_.log2_max_frame_num_minus4 + 4));
+ slice_reader.ReadBits(&bits_tmp, sps_->log2_max_frame_num_minus4 + 4));
uint32_t field_pic_flag = 0;
- if (sps_.frame_mbs_only_flag == 0) {
+ if (sps_->frame_mbs_only_flag == 0) {
// field_pic_flag: u(1)
RETURN_FALSE_ON_FAIL(slice_reader.ReadBits(&field_pic_flag, 1));
if (field_pic_flag != 0) {
@@ -363,45 +81,51 @@
}
// pic_order_cnt_lsb: u(v)
// Represented by sps_.log2_max_pic_order_cnt_lsb_minus4 + 4 bits.
- if (sps_.pic_order_cnt_type == 0) {
+ if (sps_->pic_order_cnt_type == 0) {
RETURN_FALSE_ON_FAIL(slice_reader.ReadBits(
- &bits_tmp, sps_.log2_max_pic_order_cnt_lsb_minus4 + 4));
- if (pps_.bottom_field_pic_order_in_frame_present_flag &&
+ &bits_tmp, sps_->log2_max_pic_order_cnt_lsb_minus4 + 4));
+ if (pps_->bottom_field_pic_order_in_frame_present_flag &&
field_pic_flag == 0) {
// delta_pic_order_cnt_bottom: se(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
}
}
- if (sps_.pic_order_cnt_type == 1 && !sps_.delta_pic_order_always_zero_flag) {
+ if (sps_->pic_order_cnt_type == 1 &&
+ !sps_->delta_pic_order_always_zero_flag) {
// delta_pic_order_cnt[0]: se(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
- if (pps_.bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) {
+ if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) {
// delta_pic_order_cnt[1]: se(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
}
}
- if (pps_.redundant_pic_cnt_present_flag) {
+ if (pps_->redundant_pic_cnt_present_flag) {
// redundant_pic_cnt: ue(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
}
- if (slice_type == kSliceTypeB) {
+ if (slice_type == H264::SliceType::kB) {
// direct_spatial_mv_pred_flag: u(1)
RETURN_FALSE_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
}
- if (slice_type == kSliceTypeP || slice_type == kSliceTypeSp ||
- slice_type == kSliceTypeB) {
- uint32_t num_ref_idx_active_override_flag;
- // num_ref_idx_active_override_flag: u(1)
- RETURN_FALSE_ON_FAIL(
- slice_reader.ReadBits(&num_ref_idx_active_override_flag, 1));
- if (num_ref_idx_active_override_flag != 0) {
- // num_ref_idx_l0_active_minus1: ue(v)
- RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
- if (slice_type == kSliceTypeB) {
- // num_ref_idx_l1_active_minus1: ue(v)
+ switch (slice_type) {
+ case H264::SliceType::kP:
+ case H264::SliceType::kB:
+ case H264::SliceType::kSp:
+ uint32_t num_ref_idx_active_override_flag;
+ // num_ref_idx_active_override_flag: u(1)
+ RETURN_FALSE_ON_FAIL(
+ slice_reader.ReadBits(&num_ref_idx_active_override_flag, 1));
+ if (num_ref_idx_active_override_flag != 0) {
+ // num_ref_idx_l0_active_minus1: ue(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
+ if (slice_type == H264::SliceType::kB) {
+ // num_ref_idx_l1_active_minus1: ue(v)
+ RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
+ }
}
- }
+ break;
+ default:
+ break;
}
// assume nal_unit_type != 20 && nal_unit_type != 21:
RTC_CHECK_NE(nalu_type, 20);
@@ -464,9 +188,10 @@
}
}
// TODO(pbos): Do we need support for pred_weight_table()?
- RTC_CHECK(!((pps_.weighted_pred_flag &&
- (slice_type == kSliceTypeP || slice_type == kSliceTypeSp)) ||
- (pps_.weighted_bipred_idc != 0 && slice_type == kSliceTypeB)))
+ RTC_CHECK(
+ !((pps_->weighted_pred_flag && (slice_type == H264::SliceType::kP ||
+ slice_type == H264::SliceType::kSp)) ||
+ (pps_->weighted_bipred_idc != 0 && slice_type == H264::SliceType::kB)))
<< "Missing support for pred_weight_table().";
// if ((weighted_pred_flag && (slice_type == P || slice_type == SP)) ||
// (weighted_bipred_idc == 1 && slice_type == B)) {
@@ -518,23 +243,30 @@
// cabac not supported: entropy_coding_mode_flag == 0 asserted above.
// if (entropy_coding_mode_flag && slice_type != I && slice_type != SI)
// cabac_init_idc
+ int32_t last_slice_qp_delta;
RETURN_FALSE_ON_FAIL(
- slice_reader.ReadSignedExponentialGolomb(&last_slice_qp_delta_));
- last_slice_qp_delta_parsed_ = true;
+ slice_reader.ReadSignedExponentialGolomb(&last_slice_qp_delta));
+ last_slice_qp_delta_ = rtc::Optional<int32_t>(last_slice_qp_delta);
return true;
}
void H264BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) {
- uint8_t nalu_type = slice[4] & kNaluTypeMask;
+ H264::NaluType nalu_type = H264::ParseNaluType(slice[0]);
switch (nalu_type) {
- case kNaluSps:
- RTC_CHECK(ParseSpsNalu(slice, length))
- << "Failed to parse bitstream SPS.";
+ case H264::NaluType::kSps: {
+ sps_ = SpsParser::ParseSps(slice + H264::kNaluTypeSize,
+ length - H264::kNaluTypeSize);
+ if (!sps_)
+ FATAL() << "Unable to parse SPS from H264 bitstream.";
break;
- case kNaluPps:
- RTC_CHECK(ParsePpsNalu(slice, length))
- << "Failed to parse bitstream PPS.";
+ }
+ case H264::NaluType::kPps: {
+ pps_ = PpsParser::ParsePps(slice + H264::kNaluTypeSize,
+ length - H264::kNaluTypeSize);
+ if (!pps_)
+ FATAL() << "Unable to parse PPS from H264 bitstream.";
break;
+ }
default:
RTC_CHECK(ParseNonParameterSetNalu(slice, length, nalu_type))
<< "Failed to parse picture slice.";
@@ -544,21 +276,17 @@
void H264BitstreamParser::ParseBitstream(const uint8_t* bitstream,
size_t length) {
- RTC_CHECK_GE(length, 4u);
- std::vector<size_t> slice_markers = FindNaluStartSequences(bitstream, length);
- RTC_CHECK(!slice_markers.empty());
- for (size_t i = 0; i < slice_markers.size() - 1; ++i) {
- ParseSlice(bitstream + slice_markers[i],
- slice_markers[i + 1] - slice_markers[i]);
- }
- // Parse the last slice.
- ParseSlice(bitstream + slice_markers.back(), length - slice_markers.back());
+ std::vector<H264::NaluIndex> nalu_indices =
+ H264::FindNaluIndices(bitstream, length);
+ RTC_CHECK(!nalu_indices.empty());
+ for (const H264::NaluIndex& index : nalu_indices)
+ ParseSlice(&bitstream[index.payload_start_offset], index.payload_size);
}
bool H264BitstreamParser::GetLastSliceQp(int* qp) const {
- if (!last_slice_qp_delta_parsed_)
+ if (!last_slice_qp_delta_ || !pps_)
return false;
- *qp = 26 + pps_.pic_init_qp_minus26 + last_slice_qp_delta_;
+ *qp = 26 + pps_->pic_init_qp_minus26 + *last_slice_qp_delta_;
return true;
}
diff --git a/webrtc/modules/video_coding/utility/h264_bitstream_parser.h b/webrtc/modules/video_coding/utility/h264_bitstream_parser.h
index b6fe1aa..6a779e1 100644
--- a/webrtc/modules/video_coding/utility/h264_bitstream_parser.h
+++ b/webrtc/modules/video_coding/utility/h264_bitstream_parser.h
@@ -10,12 +10,15 @@
#ifndef WEBRTC_MODULES_VIDEO_CODING_UTILITY_H264_BITSTREAM_PARSER_H_
#define WEBRTC_MODULES_VIDEO_CODING_UTILITY_H264_BITSTREAM_PARSER_H_
-
#include <stddef.h>
#include <stdint.h>
+#include "webrtc/base/optional.h"
+#include "webrtc/common_video/h264/pps_parser.h"
+#include "webrtc/common_video/h264/sps_parser.h"
+
namespace rtc {
-class BitBuffer;
+class BitBufferWriter;
}
namespace webrtc {
@@ -28,51 +31,27 @@
// bitstreams.
class H264BitstreamParser {
public:
+ H264BitstreamParser();
+ virtual ~H264BitstreamParser();
+
// Parse an additional chunk of H264 bitstream.
void ParseBitstream(const uint8_t* bitstream, size_t length);
// Get the last extracted QP value from the parsed bitstream.
bool GetLastSliceQp(int* qp) const;
- private:
- // Captured in SPS and used when parsing slice NALUs.
- struct SpsState {
- SpsState();
-
- uint32_t delta_pic_order_always_zero_flag = 0;
- uint32_t separate_colour_plane_flag = 0;
- uint32_t frame_mbs_only_flag = 0;
- uint32_t log2_max_frame_num_minus4 = 0;
- uint32_t log2_max_pic_order_cnt_lsb_minus4 = 0;
- uint32_t pic_order_cnt_type = 0;
- };
-
- struct PpsState {
- PpsState();
-
- bool bottom_field_pic_order_in_frame_present_flag = false;
- bool weighted_pred_flag = false;
- uint32_t weighted_bipred_idc = false;
- uint32_t redundant_pic_cnt_present_flag = 0;
- int pic_init_qp_minus26 = 0;
- };
-
+ protected:
void ParseSlice(const uint8_t* slice, size_t length);
- bool ParseSpsNalu(const uint8_t* sps_nalu, size_t length);
- bool ParsePpsNalu(const uint8_t* pps_nalu, size_t length);
bool ParseNonParameterSetNalu(const uint8_t* source,
size_t source_length,
uint8_t nalu_type);
// SPS/PPS state, updated when parsing new SPS/PPS, used to parse slices.
- bool sps_parsed_ = false;
- SpsState sps_;
- bool pps_parsed_ = false;
- PpsState pps_;
+ rtc::Optional<SpsParser::SpsState> sps_;
+ rtc::Optional<PpsParser::PpsState> pps_;
// Last parsed slice QP.
- bool last_slice_qp_delta_parsed_ = false;
- int32_t last_slice_qp_delta_ = 0;
+ rtc::Optional<int32_t> last_slice_qp_delta_;
};
} // namespace webrtc