Reland "VP9 decoder: Sets thread count based on resolution, reinit on change."
This is a reland of d5925756980f6e82a55f57532c8d855e954459fb
Patchset 2 is a reland of
https://webrtc-review.googlesource.com/c/src/+/177012
Patchset 3 is a fix for a potential crash when InitDecode()is called from
VideoStreamDecoderImpl::GetDecoder(), where the decoder_settings
parameter is a but surprisingly set to nullptr.
Original change's description:
> VP9 decoder: Sets thread count based on resolution, reinit on change.
>
> Previously, number of decoder threads for VP9 were always set to 8 but
> with a cap at number of cores. This was done since we "can't know" the
> resolution that will be used.
>
> With this change, we now intialize the number of threads based on
> resolution given in InitDecode(). If a resolution change happens in
> flight, it requires a keyframe. We therefore parse the header from
> any key frame and if it has a new resolution, we re-initialize the
> decoder.
>
> The number of threads used is based on pixel count. We set one thread
> as target for 1280x720, and scale up lineraly from there. The 8-thread
> cap is gone, but still limit it core count.
>
> This means for instance: 1 <= 720p, 2 for 1080p, 4 for 1440p, 9 for 4K.
>
> Bug: webrtc:11551
> Change-Id: I14c169a6c651c50bd1b870c4b22bc4495c8448fd
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/174460
> Commit-Queue: Erik Språng <sprang@webrtc.org>
> Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#31507}
Bug: webrtc:11551
Change-Id: I2b4b146d0b8319f07ce1660202d6aa4b374eb015
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/177246
Reviewed-by: Johannes Kron <kron@webrtc.org>
Commit-Queue: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31527}
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index 3221c55..4d5b849 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -1733,4 +1733,12 @@
EXPECT_EQ(encoded_frames[0]._frameType, VideoFrameType::kVideoFrameDelta);
}
+TEST_F(TestVp9Impl, HandlesEmptyInitDecode) {
+ std::unique_ptr<VideoDecoder> decoder = CreateDecoder();
+ // Check that nullptr settings are ok for decoder.
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+ decoder->InitDecode(/*codec_settings=*/nullptr, 1));
+ EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder->Release());
+}
+
} // namespace webrtc
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index 568f133..f557594 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -25,6 +25,7 @@
#include "common_video/libyuv/include/webrtc_libyuv.h"
#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
#include "modules/video_coding/codecs/vp9/svc_rate_allocator.h"
+#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h"
#include "rtc_base/checks.h"
#include "rtc_base/experiments/rate_control_settings.h"
#include "rtc_base/keep_ref_until_done.h"
@@ -45,8 +46,6 @@
uint8_t kRefBufIdx[4] = {0, 0, 0, 1};
uint8_t kUpdBufIdx[4] = {0, 0, 1, 0};
-int kMaxNumTiles4kVideo = 8;
-
// Maximum allowed PID difference for differnet per-layer frame-rate case.
const int kMaxAllowedPidDiff = 30;
@@ -1668,14 +1667,32 @@
// errors earlier than the multi-threads version.
// - Make peak CPU usage under control (not depending on input)
cfg.threads = 1;
- (void)kMaxNumTiles4kVideo; // unused
#else
- // We want to use multithreading when decoding high resolution videos. But,
- // since we don't know resolution of input stream at this stage, we always
- // enable it.
- cfg.threads = std::min(number_of_cores, kMaxNumTiles4kVideo);
+ if (!inst) {
+ // No config provided - don't know resolution to decode yet.
+ // Set thread count to one in the meantime.
+ cfg.threads = 1;
+ } else {
+ // We want to use multithreading when decoding high resolution videos. But
+ // not too many in order to avoid overhead when many stream are decoded
+ // concurrently.
+ // Set 2 thread as target for 1280x720 pixel count, and then scale up
+ // linearly from there - but cap at physical core count.
+ // For common resolutions this results in:
+ // 1 for 360p
+ // 2 for 720p
+ // 4 for 1080p
+ // 8 for 1440p
+ // 18 for 4K
+ int num_threads =
+ std::max(1, 2 * (inst->width * inst->height) / (1280 * 720));
+ cfg.threads = std::min(number_of_cores, num_threads);
+ current_codec_ = *inst;
+ }
#endif
+ num_cores_ = number_of_cores;
+
vpx_codec_flags_t flags = 0;
if (vpx_codec_dec_init(decoder_, vpx_codec_vp9_dx(), &cfg, flags)) {
return WEBRTC_VIDEO_CODEC_MEMORY;
@@ -1705,6 +1722,29 @@
if (decode_complete_callback_ == nullptr) {
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
}
+
+ if (input_image._frameType == VideoFrameType::kVideoFrameKey) {
+ absl::optional<vp9::FrameInfo> frame_info =
+ vp9::ParseIntraFrameInfo(input_image.data(), input_image.size());
+ if (frame_info) {
+ if (frame_info->frame_width != current_codec_.width ||
+ frame_info->frame_height != current_codec_.height) {
+ // Resolution has changed, tear down and re-init a new decoder in
+ // order to get correct sizing.
+ Release();
+ current_codec_.width = frame_info->frame_width;
+ current_codec_.height = frame_info->frame_height;
+ int reinit_status = InitDecode(¤t_codec_, num_cores_);
+ if (reinit_status != WEBRTC_VIDEO_CODEC_OK) {
+ RTC_LOG(LS_WARNING) << "Failed to re-init decoder.";
+ return reinit_status;
+ }
+ }
+ } else {
+ RTC_LOG(LS_WARNING) << "Failed to parse VP9 header from key-frame.";
+ }
+ }
+
// Always start with a complete key frame.
if (key_frame_required_) {
if (input_image._frameType != VideoFrameType::kVideoFrameKey)
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h
index 2126044..066ce20 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@@ -210,6 +210,8 @@
bool inited_;
vpx_codec_ctx_t* decoder_;
bool key_frame_required_;
+ VideoCodec current_codec_;
+ int num_cores_;
};
} // namespace webrtc
diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser.cc b/modules/video_coding/utility/vp9_uncompressed_header_parser.cc
index 9c89235..f8ddd4d 100644
--- a/modules/video_coding/utility/vp9_uncompressed_header_parser.cc
+++ b/modules/video_coding/utility/vp9_uncompressed_header_parser.cc
@@ -52,40 +52,65 @@
return true;
}
-bool Vp9ReadColorConfig(rtc::BitBuffer* br, uint8_t profile) {
- if (profile == 2 || profile == 3) {
- // Bitdepth.
- RETURN_FALSE_IF_ERROR(br->ConsumeBits(1));
+bool Vp9ReadColorConfig(rtc::BitBuffer* br,
+ uint8_t profile,
+ FrameInfo* frame_info) {
+ if (profile == 0 || profile == 1) {
+ frame_info->bit_detph = BitDept::k8Bit;
+ } else if (profile == 2 || profile == 3) {
+ uint32_t ten_or_twelve_bits;
+ RETURN_FALSE_IF_ERROR(br->ReadBits(&ten_or_twelve_bits, 1));
+ frame_info->bit_detph =
+ ten_or_twelve_bits ? BitDept::k12Bit : BitDept::k10Bit;
}
uint32_t color_space;
RETURN_FALSE_IF_ERROR(br->ReadBits(&color_space, 3));
+ frame_info->color_space = static_cast<ColorSpace>(color_space);
// SRGB is 7.
if (color_space != 7) {
- // YUV range flag.
- RETURN_FALSE_IF_ERROR(br->ConsumeBits(1));
+ uint32_t color_range;
+ RETURN_FALSE_IF_ERROR(br->ReadBits(&color_range, 1));
+ frame_info->color_range =
+ color_range ? ColorRange::kFull : ColorRange::kStudio;
+
if (profile == 1 || profile == 3) {
- // 1 bit: subsampling x.
- // 1 bit: subsampling y.
- RETURN_FALSE_IF_ERROR(br->ConsumeBits(2));
- uint32_t reserved_bit;
- RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1));
- if (reserved_bit) {
- RTC_LOG(LS_WARNING) << "Failed to get QP. Reserved bit set.";
- return false;
+ uint32_t subsampling_x;
+ uint32_t subsampling_y;
+ RETURN_FALSE_IF_ERROR(br->ReadBits(&subsampling_x, 1));
+ RETURN_FALSE_IF_ERROR(br->ReadBits(&subsampling_y, 1));
+ if (subsampling_x) {
+ frame_info->sub_sampling =
+ subsampling_y ? YuvSubsampling::k420 : YuvSubsampling::k422;
+ } else {
+ frame_info->sub_sampling =
+ subsampling_y ? YuvSubsampling::k440 : YuvSubsampling::k444;
}
- }
- } else {
- if (profile == 1 || profile == 3) {
+
uint32_t reserved_bit;
RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1));
if (reserved_bit) {
- RTC_LOG(LS_WARNING) << "Failed to get QP. Reserved bit set.";
+ RTC_LOG(LS_WARNING) << "Failed to parse header. Reserved bit set.";
return false;
}
} else {
- RTC_LOG(LS_WARNING) << "Failed to get QP. 4:4:4 color not supported in "
- "profile 0 or 2.";
+ // Profile 0 or 2.
+ frame_info->sub_sampling = YuvSubsampling::k420;
+ }
+ } else {
+ // SRGB
+ frame_info->color_range = ColorRange::kFull;
+ if (profile == 1 || profile == 3) {
+ frame_info->sub_sampling = YuvSubsampling::k444;
+ uint32_t reserved_bit;
+ RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1));
+ if (reserved_bit) {
+ RTC_LOG(LS_WARNING) << "Failed to parse header. Reserved bit set.";
+ return false;
+ }
+ } else {
+ RTC_LOG(LS_WARNING) << "Failed to parse header. 4:4:4 color not supported"
+ " in profile 0 or 2.";
return false;
}
}
@@ -93,24 +118,38 @@
return true;
}
-bool Vp9ReadFrameSize(rtc::BitBuffer* br) {
- // 2 bytes: frame width.
- // 2 bytes: frame height.
- return br->ConsumeBytes(4);
+bool Vp9ReadFrameSize(rtc::BitBuffer* br, FrameInfo* frame_info) {
+ // 16 bits: frame width - 1.
+ uint16_t frame_width_minus_one;
+ RETURN_FALSE_IF_ERROR(br->ReadUInt16(&frame_width_minus_one));
+ // 16 bits: frame height - 1.
+ uint16_t frame_height_minus_one;
+ RETURN_FALSE_IF_ERROR(br->ReadUInt16(&frame_height_minus_one));
+ frame_info->frame_width = frame_width_minus_one + 1;
+ frame_info->frame_height = frame_height_minus_one + 1;
+ return true;
}
-bool Vp9ReadRenderSize(rtc::BitBuffer* br) {
- uint32_t bit;
- RETURN_FALSE_IF_ERROR(br->ReadBits(&bit, 1));
- if (bit) {
- // 2 bytes: render width.
- // 2 bytes: render height.
- RETURN_FALSE_IF_ERROR(br->ConsumeBytes(4));
+bool Vp9ReadRenderSize(rtc::BitBuffer* br, FrameInfo* frame_info) {
+ uint32_t render_and_frame_size_different;
+ RETURN_FALSE_IF_ERROR(br->ReadBits(&render_and_frame_size_different, 1));
+ if (render_and_frame_size_different) {
+ // 16 bits: render width - 1.
+ uint16_t render_width_minus_one;
+ RETURN_FALSE_IF_ERROR(br->ReadUInt16(&render_width_minus_one));
+ // 16 bits: render height - 1.
+ uint16_t render_height_minus_one;
+ RETURN_FALSE_IF_ERROR(br->ReadUInt16(&render_height_minus_one));
+ frame_info->render_width = render_width_minus_one + 1;
+ frame_info->render_height = render_height_minus_one + 1;
+ } else {
+ frame_info->render_width = frame_info->frame_width;
+ frame_info->render_height = frame_info->frame_height;
}
return true;
}
-bool Vp9ReadFrameSizeFromRefs(rtc::BitBuffer* br) {
+bool Vp9ReadFrameSizeFromRefs(rtc::BitBuffer* br, FrameInfo* frame_info) {
uint32_t found_ref = 0;
for (size_t i = 0; i < kVp9NumRefsPerFrame; i++) {
// Size in refs.
@@ -120,11 +159,11 @@
}
if (!found_ref) {
- if (!Vp9ReadFrameSize(br)) {
+ if (!Vp9ReadFrameSize(br, frame_info)) {
return false;
}
}
- return Vp9ReadRenderSize(br);
+ return Vp9ReadRenderSize(br, frame_info);
}
bool Vp9ReadInterpolationFilter(rtc::BitBuffer* br) {
@@ -166,14 +205,14 @@
}
} // namespace
-bool GetQp(const uint8_t* buf, size_t length, int* qp) {
+bool Parse(const uint8_t* buf, size_t length, int* qp, FrameInfo* frame_info) {
rtc::BitBuffer br(buf, length);
// Frame marker.
uint32_t frame_marker;
RETURN_FALSE_IF_ERROR(br.ReadBits(&frame_marker, 2));
if (frame_marker != 0x2) {
- RTC_LOG(LS_WARNING) << "Failed to get QP. Frame marker should be 2.";
+ RTC_LOG(LS_WARNING) << "Failed to parse header. Frame marker should be 2.";
return false;
}
@@ -181,6 +220,7 @@
uint8_t profile;
if (!Vp9ReadProfile(&br, &profile))
return false;
+ frame_info->profile = profile;
// Show existing frame.
uint32_t show_existing_frame;
@@ -195,18 +235,21 @@
RETURN_FALSE_IF_ERROR(br.ReadBits(&frame_type, 1));
RETURN_FALSE_IF_ERROR(br.ReadBits(&show_frame, 1));
RETURN_FALSE_IF_ERROR(br.ReadBits(&error_resilient, 1));
+ frame_info->show_frame = show_frame;
+ frame_info->error_resilient = error_resilient;
- if (!frame_type) {
+ if (frame_type == 0) {
+ // Key-frame.
if (!Vp9ReadSyncCode(&br))
return false;
- if (!Vp9ReadColorConfig(&br, profile))
+ if (!Vp9ReadColorConfig(&br, profile, frame_info))
return false;
- if (!Vp9ReadFrameSize(&br))
+ if (!Vp9ReadFrameSize(&br, frame_info))
return false;
- if (!Vp9ReadRenderSize(&br))
+ if (!Vp9ReadRenderSize(&br, frame_info))
return false;
-
} else {
+ // Non-keyframe.
uint32_t intra_only = 0;
if (!show_frame)
RETURN_FALSE_IF_ERROR(br.ReadBits(&intra_only, 1));
@@ -218,14 +261,14 @@
return false;
if (profile > 0) {
- if (!Vp9ReadColorConfig(&br, profile))
+ if (!Vp9ReadColorConfig(&br, profile, frame_info))
return false;
}
// Refresh frame flags.
RETURN_FALSE_IF_ERROR(br.ConsumeBits(8));
- if (!Vp9ReadFrameSize(&br))
+ if (!Vp9ReadFrameSize(&br, frame_info))
return false;
- if (!Vp9ReadRenderSize(&br))
+ if (!Vp9ReadRenderSize(&br, frame_info))
return false;
} else {
// Refresh frame flags.
@@ -237,7 +280,7 @@
RETURN_FALSE_IF_ERROR(br.ConsumeBits(4));
}
- if (!Vp9ReadFrameSizeFromRefs(&br))
+ if (!Vp9ReadFrameSizeFromRefs(&br, frame_info))
return false;
// Allow high precision mv.
@@ -267,6 +310,20 @@
return true;
}
-} // namespace vp9
+bool GetQp(const uint8_t* buf, size_t length, int* qp) {
+ FrameInfo frame_info;
+ return Parse(buf, length, qp, &frame_info);
+}
+absl::optional<FrameInfo> ParseIntraFrameInfo(const uint8_t* buf,
+ size_t length) {
+ int qp = 0;
+ FrameInfo frame_info;
+ if (Parse(buf, length, &qp, &frame_info) && frame_info.frame_width > 0) {
+ return frame_info;
+ }
+ return absl::nullopt;
+}
+
+} // namespace vp9
} // namespace webrtc
diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser.h b/modules/video_coding/utility/vp9_uncompressed_header_parser.h
index 69e8de8..a7f0467 100644
--- a/modules/video_coding/utility/vp9_uncompressed_header_parser.h
+++ b/modules/video_coding/utility/vp9_uncompressed_header_parser.h
@@ -13,6 +13,7 @@
#include <stddef.h>
#include <stdint.h>
+#include "absl/types/optional.h"
namespace webrtc {
@@ -22,6 +23,65 @@
// Returns true on success, false otherwise.
bool GetQp(const uint8_t* buf, size_t length, int* qp);
+// Bit depth per channel. Support varies by profile.
+enum class BitDept : uint8_t {
+ k8Bit = 8,
+ k10Bit = 10,
+ k12Bit = 12,
+};
+
+enum class ColorSpace : uint8_t {
+ CS_UNKNOWN = 0, // Unknown (in this case the color space must be signaled
+ // outside the VP9 bitstream).
+ CS_BT_601 = 1, // CS_BT_601 Rec. ITU-R BT.601-7
+ CS_BT_709 = 2, // Rec. ITU-R BT.709-6
+ CS_SMPTE_170 = 3, // SMPTE-170
+ CS_SMPTE_240 = 4, // SMPTE-240
+ CS_BT_2020 = 5, // Rec. ITU-R BT.2020-2
+ CS_RESERVED = 6, // Reserved
+ CS_RGB = 7, // sRGB (IEC 61966-2-1)
+};
+
+enum class ColorRange {
+ kStudio, // Studio swing:
+ // For BitDepth equals 8:
+ // Y is between 16 and 235 inclusive.
+ // U and V are between 16 and 240 inclusive.
+ // For BitDepth equals 10:
+ // Y is between 64 and 940 inclusive.
+ // U and V are between 64 and 960 inclusive.
+ // For BitDepth equals 12:
+ // Y is between 256 and 3760.
+ // U and V are between 256 and 3840 inclusive.
+ kFull // Full swing; no restriction on Y, U, V values.
+};
+
+enum class YuvSubsampling {
+ k444,
+ k440,
+ k422,
+ k420,
+};
+
+struct FrameInfo {
+ int profile = 0; // Profile 0-3 are valid.
+ bool show_frame = false;
+ bool error_resilient = false;
+ BitDept bit_detph = BitDept::k8Bit;
+ ColorSpace color_space = ColorSpace::CS_UNKNOWN;
+ ColorRange color_range;
+ YuvSubsampling sub_sampling;
+ int frame_width = 0;
+ int frame_height = 0;
+ int render_width = 0;
+ int render_height = 0;
+};
+
+// Parses frame information for a VP9 key-frame or all-intra frame from a
+// bitstream. Returns nullopt on failure or if not a key-frame.
+absl::optional<FrameInfo> ParseIntraFrameInfo(const uint8_t* buf,
+ size_t length);
+
} // namespace vp9
} // namespace webrtc