Reland "VP9 decoder: Sets thread count based on resolution, reinit on change."

This is a reland of d5925756980f6e82a55f57532c8d855e954459fb

Patchset 2 is a reland of
https://webrtc-review.googlesource.com/c/src/+/177012

Patchset 3 is a fix for a potential crash when InitDecode()is called from
VideoStreamDecoderImpl::GetDecoder(), where the decoder_settings
parameter is a but surprisingly set to nullptr.

Original change's description:
> VP9 decoder: Sets thread count based on resolution, reinit on change.
>
> Previously, number of decoder threads for VP9 were always set to 8 but
> with a cap at number of cores. This was done since we "can't know" the
> resolution that will be used.
>
> With this change, we now intialize the number of threads based on
> resolution given in InitDecode(). If a resolution change happens in
> flight, it requires a keyframe. We therefore parse the header from
> any key frame and if it has a new resolution, we re-initialize the
> decoder.
>
> The number of threads used is based on pixel count. We set one thread
> as target for 1280x720, and scale up lineraly from there. The 8-thread
> cap is gone, but still limit it core count.
>
> This means for instance: 1 <= 720p, 2 for 1080p, 4 for 1440p, 9 for 4K.
>
> Bug: webrtc:11551
> Change-Id: I14c169a6c651c50bd1b870c4b22bc4495c8448fd
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/174460
> Commit-Queue: Erik Språng <sprang@webrtc.org>
> Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#31507}

Bug: webrtc:11551
Change-Id: I2b4b146d0b8319f07ce1660202d6aa4b374eb015
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/177246
Reviewed-by: Johannes Kron <kron@webrtc.org>
Commit-Queue: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31527}
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index 3221c55..4d5b849 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -1733,4 +1733,12 @@
   EXPECT_EQ(encoded_frames[0]._frameType, VideoFrameType::kVideoFrameDelta);
 }
 
+TEST_F(TestVp9Impl, HandlesEmptyInitDecode) {
+  std::unique_ptr<VideoDecoder> decoder = CreateDecoder();
+  // Check that nullptr settings are ok for decoder.
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            decoder->InitDecode(/*codec_settings=*/nullptr, 1));
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder->Release());
+}
+
 }  // namespace webrtc
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index 568f133..f557594 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -25,6 +25,7 @@
 #include "common_video/libyuv/include/webrtc_libyuv.h"
 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
 #include "modules/video_coding/codecs/vp9/svc_rate_allocator.h"
+#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/experiments/rate_control_settings.h"
 #include "rtc_base/keep_ref_until_done.h"
@@ -45,8 +46,6 @@
 uint8_t kRefBufIdx[4] = {0, 0, 0, 1};
 uint8_t kUpdBufIdx[4] = {0, 0, 1, 0};
 
-int kMaxNumTiles4kVideo = 8;
-
 // Maximum allowed PID difference for differnet per-layer frame-rate case.
 const int kMaxAllowedPidDiff = 30;
 
@@ -1668,14 +1667,32 @@
   //    errors earlier than the multi-threads version.
   //  - Make peak CPU usage under control (not depending on input)
   cfg.threads = 1;
-  (void)kMaxNumTiles4kVideo;  // unused
 #else
-  // We want to use multithreading when decoding high resolution videos. But,
-  // since we don't know resolution of input stream at this stage, we always
-  // enable it.
-  cfg.threads = std::min(number_of_cores, kMaxNumTiles4kVideo);
+  if (!inst) {
+    // No config provided - don't know resolution to decode yet.
+    // Set thread count to one in the meantime.
+    cfg.threads = 1;
+  } else {
+    // We want to use multithreading when decoding high resolution videos. But
+    // not too many in order to avoid overhead when many stream are decoded
+    // concurrently.
+    // Set 2 thread as target for 1280x720 pixel count, and then scale up
+    // linearly from there - but cap at physical core count.
+    // For common resolutions this results in:
+    // 1 for 360p
+    // 2 for 720p
+    // 4 for 1080p
+    // 8 for 1440p
+    // 18 for 4K
+    int num_threads =
+        std::max(1, 2 * (inst->width * inst->height) / (1280 * 720));
+    cfg.threads = std::min(number_of_cores, num_threads);
+    current_codec_ = *inst;
+  }
 #endif
 
+  num_cores_ = number_of_cores;
+
   vpx_codec_flags_t flags = 0;
   if (vpx_codec_dec_init(decoder_, vpx_codec_vp9_dx(), &cfg, flags)) {
     return WEBRTC_VIDEO_CODEC_MEMORY;
@@ -1705,6 +1722,29 @@
   if (decode_complete_callback_ == nullptr) {
     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
   }
+
+  if (input_image._frameType == VideoFrameType::kVideoFrameKey) {
+    absl::optional<vp9::FrameInfo> frame_info =
+        vp9::ParseIntraFrameInfo(input_image.data(), input_image.size());
+    if (frame_info) {
+      if (frame_info->frame_width != current_codec_.width ||
+          frame_info->frame_height != current_codec_.height) {
+        // Resolution has changed, tear down and re-init a new decoder in
+        // order to get correct sizing.
+        Release();
+        current_codec_.width = frame_info->frame_width;
+        current_codec_.height = frame_info->frame_height;
+        int reinit_status = InitDecode(&current_codec_, num_cores_);
+        if (reinit_status != WEBRTC_VIDEO_CODEC_OK) {
+          RTC_LOG(LS_WARNING) << "Failed to re-init decoder.";
+          return reinit_status;
+        }
+      }
+    } else {
+      RTC_LOG(LS_WARNING) << "Failed to parse VP9 header from key-frame.";
+    }
+  }
+
   // Always start with a complete key frame.
   if (key_frame_required_) {
     if (input_image._frameType != VideoFrameType::kVideoFrameKey)
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h
index 2126044..066ce20 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@@ -210,6 +210,8 @@
   bool inited_;
   vpx_codec_ctx_t* decoder_;
   bool key_frame_required_;
+  VideoCodec current_codec_;
+  int num_cores_;
 };
 }  // namespace webrtc
 
diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser.cc b/modules/video_coding/utility/vp9_uncompressed_header_parser.cc
index 9c89235..f8ddd4d 100644
--- a/modules/video_coding/utility/vp9_uncompressed_header_parser.cc
+++ b/modules/video_coding/utility/vp9_uncompressed_header_parser.cc
@@ -52,40 +52,65 @@
   return true;
 }
 
-bool Vp9ReadColorConfig(rtc::BitBuffer* br, uint8_t profile) {
-  if (profile == 2 || profile == 3) {
-    // Bitdepth.
-    RETURN_FALSE_IF_ERROR(br->ConsumeBits(1));
+bool Vp9ReadColorConfig(rtc::BitBuffer* br,
+                        uint8_t profile,
+                        FrameInfo* frame_info) {
+  if (profile == 0 || profile == 1) {
+    frame_info->bit_detph = BitDept::k8Bit;
+  } else if (profile == 2 || profile == 3) {
+    uint32_t ten_or_twelve_bits;
+    RETURN_FALSE_IF_ERROR(br->ReadBits(&ten_or_twelve_bits, 1));
+    frame_info->bit_detph =
+        ten_or_twelve_bits ? BitDept::k12Bit : BitDept::k10Bit;
   }
   uint32_t color_space;
   RETURN_FALSE_IF_ERROR(br->ReadBits(&color_space, 3));
+  frame_info->color_space = static_cast<ColorSpace>(color_space);
 
   // SRGB is 7.
   if (color_space != 7) {
-    // YUV range flag.
-    RETURN_FALSE_IF_ERROR(br->ConsumeBits(1));
+    uint32_t color_range;
+    RETURN_FALSE_IF_ERROR(br->ReadBits(&color_range, 1));
+    frame_info->color_range =
+        color_range ? ColorRange::kFull : ColorRange::kStudio;
+
     if (profile == 1 || profile == 3) {
-      // 1 bit: subsampling x.
-      // 1 bit: subsampling y.
-      RETURN_FALSE_IF_ERROR(br->ConsumeBits(2));
-      uint32_t reserved_bit;
-      RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1));
-      if (reserved_bit) {
-        RTC_LOG(LS_WARNING) << "Failed to get QP. Reserved bit set.";
-        return false;
+      uint32_t subsampling_x;
+      uint32_t subsampling_y;
+      RETURN_FALSE_IF_ERROR(br->ReadBits(&subsampling_x, 1));
+      RETURN_FALSE_IF_ERROR(br->ReadBits(&subsampling_y, 1));
+      if (subsampling_x) {
+        frame_info->sub_sampling =
+            subsampling_y ? YuvSubsampling::k420 : YuvSubsampling::k422;
+      } else {
+        frame_info->sub_sampling =
+            subsampling_y ? YuvSubsampling::k440 : YuvSubsampling::k444;
       }
-    }
-  } else {
-    if (profile == 1 || profile == 3) {
+
       uint32_t reserved_bit;
       RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1));
       if (reserved_bit) {
-        RTC_LOG(LS_WARNING) << "Failed to get QP. Reserved bit set.";
+        RTC_LOG(LS_WARNING) << "Failed to parse header. Reserved bit set.";
         return false;
       }
     } else {
-      RTC_LOG(LS_WARNING) << "Failed to get QP. 4:4:4 color not supported in "
-                             "profile 0 or 2.";
+      // Profile 0 or 2.
+      frame_info->sub_sampling = YuvSubsampling::k420;
+    }
+  } else {
+    // SRGB
+    frame_info->color_range = ColorRange::kFull;
+    if (profile == 1 || profile == 3) {
+      frame_info->sub_sampling = YuvSubsampling::k444;
+      uint32_t reserved_bit;
+      RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1));
+      if (reserved_bit) {
+        RTC_LOG(LS_WARNING) << "Failed to parse header. Reserved bit set.";
+        return false;
+      }
+    } else {
+      RTC_LOG(LS_WARNING) << "Failed to parse header. 4:4:4 color not supported"
+                             " in profile 0 or 2.";
       return false;
     }
   }
@@ -93,24 +118,38 @@
   return true;
 }
 
-bool Vp9ReadFrameSize(rtc::BitBuffer* br) {
-  // 2 bytes: frame width.
-  // 2 bytes: frame height.
-  return br->ConsumeBytes(4);
+bool Vp9ReadFrameSize(rtc::BitBuffer* br, FrameInfo* frame_info) {
+  // 16 bits: frame width - 1.
+  uint16_t frame_width_minus_one;
+  RETURN_FALSE_IF_ERROR(br->ReadUInt16(&frame_width_minus_one));
+  // 16 bits: frame height - 1.
+  uint16_t frame_height_minus_one;
+  RETURN_FALSE_IF_ERROR(br->ReadUInt16(&frame_height_minus_one));
+  frame_info->frame_width = frame_width_minus_one + 1;
+  frame_info->frame_height = frame_height_minus_one + 1;
+  return true;
 }
 
-bool Vp9ReadRenderSize(rtc::BitBuffer* br) {
-  uint32_t bit;
-  RETURN_FALSE_IF_ERROR(br->ReadBits(&bit, 1));
-  if (bit) {
-    // 2 bytes: render width.
-    // 2 bytes: render height.
-    RETURN_FALSE_IF_ERROR(br->ConsumeBytes(4));
+bool Vp9ReadRenderSize(rtc::BitBuffer* br, FrameInfo* frame_info) {
+  uint32_t render_and_frame_size_different;
+  RETURN_FALSE_IF_ERROR(br->ReadBits(&render_and_frame_size_different, 1));
+  if (render_and_frame_size_different) {
+    // 16 bits: render width - 1.
+    uint16_t render_width_minus_one;
+    RETURN_FALSE_IF_ERROR(br->ReadUInt16(&render_width_minus_one));
+    // 16 bits: render height - 1.
+    uint16_t render_height_minus_one;
+    RETURN_FALSE_IF_ERROR(br->ReadUInt16(&render_height_minus_one));
+    frame_info->render_width = render_width_minus_one + 1;
+    frame_info->render_height = render_height_minus_one + 1;
+  } else {
+    frame_info->render_width = frame_info->frame_width;
+    frame_info->render_height = frame_info->frame_height;
   }
   return true;
 }
 
-bool Vp9ReadFrameSizeFromRefs(rtc::BitBuffer* br) {
+bool Vp9ReadFrameSizeFromRefs(rtc::BitBuffer* br, FrameInfo* frame_info) {
   uint32_t found_ref = 0;
   for (size_t i = 0; i < kVp9NumRefsPerFrame; i++) {
     // Size in refs.
@@ -120,11 +159,11 @@
   }
 
   if (!found_ref) {
-    if (!Vp9ReadFrameSize(br)) {
+    if (!Vp9ReadFrameSize(br, frame_info)) {
       return false;
     }
   }
-  return Vp9ReadRenderSize(br);
+  return Vp9ReadRenderSize(br, frame_info);
 }
 
 bool Vp9ReadInterpolationFilter(rtc::BitBuffer* br) {
@@ -166,14 +205,14 @@
 }
 }  // namespace
 
-bool GetQp(const uint8_t* buf, size_t length, int* qp) {
+bool Parse(const uint8_t* buf, size_t length, int* qp, FrameInfo* frame_info) {
   rtc::BitBuffer br(buf, length);
 
   // Frame marker.
   uint32_t frame_marker;
   RETURN_FALSE_IF_ERROR(br.ReadBits(&frame_marker, 2));
   if (frame_marker != 0x2) {
-    RTC_LOG(LS_WARNING) << "Failed to get QP. Frame marker should be 2.";
+    RTC_LOG(LS_WARNING) << "Failed to parse header. Frame marker should be 2.";
     return false;
   }
 
@@ -181,6 +220,7 @@
   uint8_t profile;
   if (!Vp9ReadProfile(&br, &profile))
     return false;
+  frame_info->profile = profile;
 
   // Show existing frame.
   uint32_t show_existing_frame;
@@ -195,18 +235,21 @@
   RETURN_FALSE_IF_ERROR(br.ReadBits(&frame_type, 1));
   RETURN_FALSE_IF_ERROR(br.ReadBits(&show_frame, 1));
   RETURN_FALSE_IF_ERROR(br.ReadBits(&error_resilient, 1));
+  frame_info->show_frame = show_frame;
+  frame_info->error_resilient = error_resilient;
 
-  if (!frame_type) {
+  if (frame_type == 0) {
+    // Key-frame.
     if (!Vp9ReadSyncCode(&br))
       return false;
-    if (!Vp9ReadColorConfig(&br, profile))
+    if (!Vp9ReadColorConfig(&br, profile, frame_info))
       return false;
-    if (!Vp9ReadFrameSize(&br))
+    if (!Vp9ReadFrameSize(&br, frame_info))
       return false;
-    if (!Vp9ReadRenderSize(&br))
+    if (!Vp9ReadRenderSize(&br, frame_info))
       return false;
-
   } else {
+    // Non-keyframe.
     uint32_t intra_only = 0;
     if (!show_frame)
       RETURN_FALSE_IF_ERROR(br.ReadBits(&intra_only, 1));
@@ -218,14 +261,14 @@
         return false;
 
       if (profile > 0) {
-        if (!Vp9ReadColorConfig(&br, profile))
+        if (!Vp9ReadColorConfig(&br, profile, frame_info))
           return false;
       }
       // Refresh frame flags.
       RETURN_FALSE_IF_ERROR(br.ConsumeBits(8));
-      if (!Vp9ReadFrameSize(&br))
+      if (!Vp9ReadFrameSize(&br, frame_info))
         return false;
-      if (!Vp9ReadRenderSize(&br))
+      if (!Vp9ReadRenderSize(&br, frame_info))
         return false;
     } else {
       // Refresh frame flags.
@@ -237,7 +280,7 @@
         RETURN_FALSE_IF_ERROR(br.ConsumeBits(4));
       }
 
-      if (!Vp9ReadFrameSizeFromRefs(&br))
+      if (!Vp9ReadFrameSizeFromRefs(&br, frame_info))
         return false;
 
       // Allow high precision mv.
@@ -267,6 +310,20 @@
   return true;
 }
 
-}  // namespace vp9
+bool GetQp(const uint8_t* buf, size_t length, int* qp) {
+  FrameInfo frame_info;
+  return Parse(buf, length, qp, &frame_info);
+}
 
+absl::optional<FrameInfo> ParseIntraFrameInfo(const uint8_t* buf,
+                                              size_t length) {
+  int qp = 0;
+  FrameInfo frame_info;
+  if (Parse(buf, length, &qp, &frame_info) && frame_info.frame_width > 0) {
+    return frame_info;
+  }
+  return absl::nullopt;
+}
+
+}  // namespace vp9
 }  // namespace webrtc
diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser.h b/modules/video_coding/utility/vp9_uncompressed_header_parser.h
index 69e8de8..a7f0467 100644
--- a/modules/video_coding/utility/vp9_uncompressed_header_parser.h
+++ b/modules/video_coding/utility/vp9_uncompressed_header_parser.h
@@ -13,6 +13,7 @@
 
 #include <stddef.h>
 #include <stdint.h>
+#include "absl/types/optional.h"
 
 namespace webrtc {
 
@@ -22,6 +23,65 @@
 // Returns true on success, false otherwise.
 bool GetQp(const uint8_t* buf, size_t length, int* qp);
 
+// Bit depth per channel. Support varies by profile.
+enum class BitDept : uint8_t {
+  k8Bit = 8,
+  k10Bit = 10,
+  k12Bit = 12,
+};
+
+enum class ColorSpace : uint8_t {
+  CS_UNKNOWN = 0,    // Unknown (in this case the color space must be signaled
+                     // outside the VP9 bitstream).
+  CS_BT_601 = 1,     // CS_BT_601 Rec. ITU-R BT.601-7
+  CS_BT_709 = 2,     // Rec. ITU-R BT.709-6
+  CS_SMPTE_170 = 3,  // SMPTE-170
+  CS_SMPTE_240 = 4,  // SMPTE-240
+  CS_BT_2020 = 5,    // Rec. ITU-R BT.2020-2
+  CS_RESERVED = 6,   // Reserved
+  CS_RGB = 7,        // sRGB (IEC 61966-2-1)
+};
+
+enum class ColorRange {
+  kStudio,  // Studio swing:
+            // For BitDepth equals 8:
+            //     Y is between 16 and 235 inclusive.
+            //     U and V are between 16 and 240 inclusive.
+            // For BitDepth equals 10:
+            //     Y is between 64 and 940 inclusive.
+            //     U and V are between 64 and 960 inclusive.
+            // For BitDepth equals 12:
+            //     Y is between 256 and 3760.
+            //     U and V are between 256 and 3840 inclusive.
+  kFull     // Full swing; no restriction on Y, U, V values.
+};
+
+enum class YuvSubsampling {
+  k444,
+  k440,
+  k422,
+  k420,
+};
+
+struct FrameInfo {
+  int profile = 0;  // Profile 0-3 are valid.
+  bool show_frame = false;
+  bool error_resilient = false;
+  BitDept bit_detph = BitDept::k8Bit;
+  ColorSpace color_space = ColorSpace::CS_UNKNOWN;
+  ColorRange color_range;
+  YuvSubsampling sub_sampling;
+  int frame_width = 0;
+  int frame_height = 0;
+  int render_width = 0;
+  int render_height = 0;
+};
+
+// Parses frame information for a VP9 key-frame or all-intra frame from a
+// bitstream. Returns nullopt on failure or if not a key-frame.
+absl::optional<FrameInfo> ParseIntraFrameInfo(const uint8_t* buf,
+                                              size_t length);
+
 }  // namespace vp9
 
 }  // namespace webrtc