Reland "Adds experimental libvpx VP9 speed settings."

This is a reland of 6e7167456b5eba36c7985d6a74f1d191958d4e0f

Patch set 1 is the original.
Later patch sets fix a parsing bug, and adds a new flag which enables
or disabled the ability to set separate per spatial layer speed
(use_per_layer_speed).

Original change's description:
> Adds experimental libvpx VP9 speed settings.
>
> Using the field trial WebRTC-VP9-PerformanceFlags, this CL allows you to
> configure the libvpx VP9 encoder with a list of flags to affect the
> quality vs speed tradeoff. This CL adds support for:
>
> * Speed (effort), for the temporal base layer frames
> * Speed for higher (non-base) layer frames
> * De-blocking (as part of the loopfilter) enabled for:
>   0 = all frames
>   1 = all but frames from the highest temporal layer
>   2 = no frames
>
> Each entry in the list has a threshold in min number of pixels needed
> for settings in the entry to apply.
>
> Example: Two spatial layers (180p, 360p) with three temporal
> layers are configured. Field trial "WebRTC-VP9-PerformanceFlags" set to:
> "use_per_layer_speed,min_pixel_count:0|129600,base_layer_speed:5|7,high_layer_speed:8|8,deblock_mode:1|2"
> This translates to:
> S0:
>   - TL0: Speed 5, deblocked
>   - TL1: Speed 8, deblocked
>   - TL2: Speed 8, not deblocked
> S1:
>   - TL0: Speed 7, not deblocked
>   - TL1: Speed 8, not deblocked
>   - TL2: Speed 8, not deblocked
>
> Bug: webrtc:11551
> Change-Id: Ieef6816d3e0831ff53348ecc4a90260e2ef10422
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/188461
> Reviewed-by: Michael Horowitz <mhoro@webrtc.org>
> Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
> Commit-Queue: Erik Språng <sprang@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#32749}

Bug: webrtc:11551
Change-Id: Ie7c703eb122197235d8ce77cb72db7a347382468
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/196345
Reviewed-by: Michael Horowitz <mhoro@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Commit-Queue: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32780}
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index c2b1f50..74fe565 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -32,6 +32,7 @@
 #include "modules/video_coding/svc/scalable_video_controller_no_layering.h"
 #include "modules/video_coding/utility/vp9_uncompressed_header_parser.h"
 #include "rtc_base/checks.h"
+#include "rtc_base/experiments/field_trial_list.h"
 #include "rtc_base/experiments/field_trial_parser.h"
 #include "rtc_base/experiments/rate_control_settings.h"
 #include "rtc_base/keep_ref_until_done.h"
@@ -65,20 +66,6 @@
 constexpr int kLowVp9QpThreshold = 149;
 constexpr int kHighVp9QpThreshold = 205;
 
-// Only positive speeds, range for real-time coding currently is: 5 - 8.
-// Lower means slower/better quality, higher means fastest/lower quality.
-int GetCpuSpeed(int width, int height) {
-#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || defined(ANDROID)
-  return 8;
-#else
-  // For smaller resolutions, use lower speed setting (get some coding gain at
-  // the cost of increased encoding complexity).
-  if (width * height <= 352 * 288)
-    return 5;
-  else
-    return 7;
-#endif
-}
 // Helper class for extracting VP9 colorspace.
 ColorSpace ExtractVP9ColorSpace(vpx_color_space_t space_t,
                                 vpx_color_range_t range_t,
@@ -281,7 +268,6 @@
           ParseSdpForVP9Profile(codec.params).value_or(VP9Profile::kProfile0)),
       inited_(false),
       timestamp_(0),
-      cpu_speed_(3),
       rc_max_intra_target_(0),
       encoder_(nullptr),
       config_(nullptr),
@@ -318,7 +304,7 @@
       external_ref_ctrl_(
           !absl::StartsWith(trials.Lookup("WebRTC-Vp9ExternalRefCtrl"),
                             "Disabled")),
-      per_layer_speed_(ParsePerLayerSpeed(trials)),
+      performance_flags_(ParsePerformanceFlagsFromTrials(trials)),
       num_steady_state_frames_(0),
       config_changed_(true) {
   codec_ = {};
@@ -455,8 +441,6 @@
   first_active_layer_ = 0;
   bool seen_active_layer = false;
   bool expect_no_more_active_layers = false;
-  int highest_active_width = 0;
-  int highest_active_height = 0;
   for (int i = 0; i < num_spatial_layers_; ++i) {
     if (config_->ss_target_bitrate[i] > 0) {
       RTC_DCHECK(!expect_no_more_active_layers) << "Only middle layer is "
@@ -466,12 +450,6 @@
       }
       num_active_spatial_layers_ = i + 1;
       seen_active_layer = true;
-      highest_active_width =
-          (svc_params_.scaling_factor_num[i] * config_->g_w) /
-          svc_params_.scaling_factor_den[i];
-      highest_active_height =
-          (svc_params_.scaling_factor_num[i] * config_->g_h) /
-          svc_params_.scaling_factor_den[i];
     } else {
       expect_no_more_active_layers = seen_active_layer;
     }
@@ -499,7 +477,6 @@
     svc_controller_->OnRatesUpdated(allocation);
   }
   current_bitrate_allocation_ = bitrate_allocation;
-  cpu_speed_ = GetCpuSpeed(highest_active_width, highest_active_height);
   config_changed_ = true;
   return true;
 }
@@ -570,6 +547,7 @@
   if (&codec_ != inst) {
     codec_ = *inst;
   }
+  memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
 
   force_key_frame_ = true;
   pics_since_key_ = 0;
@@ -665,8 +643,6 @@
   config_->g_threads =
       NumberOfThreads(config_->g_w, config_->g_h, settings.number_of_cores);
 
-  cpu_speed_ = GetCpuSpeed(config_->g_w, config_->g_h);
-
   is_flexible_mode_ = inst->VP9().flexibleMode;
 
   inter_layer_pred_ = inst->VP9().interLayerPred;
@@ -827,22 +803,18 @@
     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
   }
 
-  if (per_layer_speed_.enabled) {
-    for (int i = 0; i < num_spatial_layers_; ++i) {
-      if (codec_.spatialLayers[i].active) {
-        continue;
-      }
-
-      if (per_layer_speed_.layers[i] != -1) {
-        svc_params_.speed_per_layer[i] = per_layer_speed_.layers[i];
-      } else {
-        svc_params_.speed_per_layer[i] = GetCpuSpeed(
-            codec_.spatialLayers[i].width, codec_.spatialLayers[i].height);
-      }
+  UpdatePerformanceFlags();
+  RTC_DCHECK_EQ(performance_flags_by_spatial_index_.size(),
+                static_cast<size_t>(num_spatial_layers_));
+  if (performance_flags_.use_per_layer_speed) {
+    for (int si = 0; si < num_spatial_layers_; ++si) {
+      svc_params_.speed_per_layer[si] =
+          performance_flags_by_spatial_index_[si].base_layer_speed;
+      svc_params_.loopfilter_ctrl[si] =
+          performance_flags_by_spatial_index_[si].deblock_mode;
     }
   }
 
-  vpx_codec_control(encoder_, VP8E_SET_CPUUSED, cpu_speed_);
   vpx_codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT,
                     rc_max_intra_target_);
   vpx_codec_control(encoder_, VP9E_SET_AQ_MODE,
@@ -855,6 +827,11 @@
     vpx_codec_control(encoder_, VP9E_SET_SVC, 1);
     vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_);
   }
+  if (!performance_flags_.use_per_layer_speed) {
+    vpx_codec_control(
+        encoder_, VP8E_SET_CPUUSED,
+        performance_flags_by_spatial_index_.rbegin()->base_layer_speed);
+  }
 
   if (num_spatial_layers_ > 1) {
     switch (inter_layer_pred_) {
@@ -1068,6 +1045,24 @@
     }
   }
 
+  if (is_svc_ && performance_flags_.use_per_layer_speed) {
+    // Update speed settings that might depend on temporal index.
+    bool speed_updated = false;
+    for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) {
+      const int target_speed =
+          layer_id.temporal_layer_id_per_spatial[sl_idx] == 0
+              ? performance_flags_by_spatial_index_[sl_idx].base_layer_speed
+              : performance_flags_by_spatial_index_[sl_idx].high_layer_speed;
+      if (svc_params_.speed_per_layer[sl_idx] != target_speed) {
+        svc_params_.speed_per_layer[sl_idx] = target_speed;
+        speed_updated = true;
+      }
+    }
+    if (speed_updated) {
+      vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_);
+    }
+  }
+
   vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
 
   if (num_spatial_layers_ > 1) {
@@ -1080,7 +1075,25 @@
     if (vpx_codec_enc_config_set(encoder_, config_)) {
       return WEBRTC_VIDEO_CODEC_ERROR;
     }
-    vpx_codec_control(encoder_, VP8E_SET_CPUUSED, cpu_speed_);
+
+    if (!performance_flags_.use_per_layer_speed) {
+      // Not setting individual speeds per layer, find the highest active
+      // resolution instead and base the speed on that.
+      for (int i = num_spatial_layers_ - 1; i >= 0; --i) {
+        if (config_->ss_target_bitrate[i] > 0) {
+          int width = (svc_params_.scaling_factor_num[i] * config_->g_w) /
+                      svc_params_.scaling_factor_den[i];
+          int height = (svc_params_.scaling_factor_num[i] * config_->g_h) /
+                       svc_params_.scaling_factor_den[i];
+          int speed =
+              std::prev(performance_flags_.settings_by_resolution.lower_bound(
+                            width * height))
+                  ->second.base_layer_speed;
+          vpx_codec_control(encoder_, VP8E_SET_CPUUSED, speed);
+          break;
+        }
+      }
+    }
     config_changed_ = false;
   }
 
@@ -1826,18 +1839,92 @@
   return config;
 }
 
+void VP9EncoderImpl::UpdatePerformanceFlags() {
+  const auto find_speed = [&](int min_pixel_count) {
+    RTC_DCHECK(!performance_flags_.settings_by_resolution.empty());
+    auto it =
+        performance_flags_.settings_by_resolution.upper_bound(min_pixel_count);
+    return std::prev(it)->second;
+  };
+
+  performance_flags_by_spatial_index_.clear();
+  if (is_svc_) {
+    for (int si = 0; si < num_spatial_layers_; ++si) {
+      performance_flags_by_spatial_index_.push_back(find_speed(
+          codec_.spatialLayers[si].width * codec_.spatialLayers[si].height));
+    }
+  } else {
+    performance_flags_by_spatial_index_.push_back(
+        find_speed(codec_.width * codec_.height));
+  }
+}
+
 // static
-VP9EncoderImpl::SpeedSettings VP9EncoderImpl::ParsePerLayerSpeed(
+VP9EncoderImpl::PerformanceFlags
+VP9EncoderImpl::ParsePerformanceFlagsFromTrials(
     const WebRtcKeyValueConfig& trials) {
-  FieldTrialFlag enabled("enabled");
-  FieldTrialParameter<int> speeds[kMaxSpatialLayers]{
-      {"s0", -1}, {"s1", -1}, {"s2", -1}, {"s3", -1}, {"s4", -1}};
-  ParseFieldTrial(
-      {&enabled, &speeds[0], &speeds[1], &speeds[2], &speeds[3], &speeds[4]},
-      trials.Lookup("WebRTC-VP9-PerLayerSpeed"));
-  return SpeedSettings{enabled.Get(),
-                       {speeds[0].Get(), speeds[1].Get(), speeds[2].Get(),
-                        speeds[3].Get(), speeds[4].Get()}};
+  struct Params : public PerformanceFlags::ParameterSet {
+    int min_pixel_count = 0;
+  };
+
+  FieldTrialStructList<Params> trials_list(
+      {FieldTrialStructMember("min_pixel_count",
+                              [](Params* p) { return &p->min_pixel_count; }),
+       FieldTrialStructMember("high_layer_speed",
+                              [](Params* p) { return &p->high_layer_speed; }),
+       FieldTrialStructMember("base_layer_speed",
+                              [](Params* p) { return &p->base_layer_speed; }),
+       FieldTrialStructMember("deblock_mode",
+                              [](Params* p) { return &p->deblock_mode; })},
+      {});
+
+  FieldTrialFlag per_layer_speed("use_per_layer_speed");
+
+  ParseFieldTrial({&trials_list, &per_layer_speed},
+                  trials.Lookup("WebRTC-VP9-PerformanceFlags"));
+
+  PerformanceFlags flags;
+  flags.use_per_layer_speed = per_layer_speed.Get();
+
+  constexpr int kMinSpeed = 1;
+  constexpr int kMaxSpeed = 9;
+  for (auto& f : trials_list.Get()) {
+    if (f.base_layer_speed < kMinSpeed || f.base_layer_speed > kMaxSpeed ||
+        f.high_layer_speed < kMinSpeed || f.high_layer_speed > kMaxSpeed ||
+        f.deblock_mode < 0 || f.deblock_mode > 2) {
+      RTC_LOG(LS_WARNING) << "Ignoring invalid performance flags: "
+                          << "min_pixel_count = " << f.min_pixel_count
+                          << ", high_layer_speed = " << f.high_layer_speed
+                          << ", base_layer_speed = " << f.base_layer_speed
+                          << ", deblock_mode = " << f.deblock_mode;
+      continue;
+    }
+    flags.settings_by_resolution[f.min_pixel_count] = f;
+  }
+
+  if (flags.settings_by_resolution.empty()) {
+    return GetDefaultPerformanceFlags();
+  }
+
+  return flags;
+}
+
+// static
+VP9EncoderImpl::PerformanceFlags VP9EncoderImpl::GetDefaultPerformanceFlags() {
+  PerformanceFlags flags;
+  flags.use_per_layer_speed = false;
+#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || defined(ANDROID)
+  // Speed 8 on all layers for all resolutions.
+  flags.settings_by_resolution[0] = {8, 8, 0};
+#else
+  // For smaller resolutions, use lower speed setting (get some coding gain at
+  // the cost of increased encoding complexity).
+  flags.settings_by_resolution[0] = {5, 5, 0};
+
+  // Use speed 7 for QCIF and above.
+  flags.settings_by_resolution[352 * 288] = {7, 7, 0};
+#endif
+  return flags;
 }
 
 void VP9EncoderImpl::MaybeRewrapRawWithFormat(const vpx_img_fmt fmt) {
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h
index 14c3ca8..075a214 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@@ -112,7 +112,6 @@
   const VP9Profile profile_;
   bool inited_;
   int64_t timestamp_;
-  int cpu_speed_;
   uint32_t rc_max_intra_target_;
   vpx_codec_ctx_t* encoder_;
   vpx_codec_enc_cfg_t* config_;
@@ -194,11 +193,40 @@
       const WebRtcKeyValueConfig& trials);
   const bool external_ref_ctrl_;
 
-  const struct SpeedSettings {
-    bool enabled;
-    int layers[kMaxSpatialLayers];
-  } per_layer_speed_;
-  static SpeedSettings ParsePerLayerSpeed(const WebRtcKeyValueConfig& trials);
+  // Flags that can affect speed vs quality tradeoff, and are configureable per
+  // resolution ranges.
+  struct PerformanceFlags {
+    // If false, a lookup will be made in |settings_by_resolution| base on the
+    // highest currently active resolution, and the overall speed then set to
+    // to the |base_layer_speed| matching that entry.
+    // If true, each active resolution will have it's speed and deblock_mode set
+    // based on it resolution, and the high layer speed configured for non
+    // base temporal layer frames.
+    bool use_per_layer_speed = false;
+
+    struct ParameterSet {
+      int base_layer_speed = -1;  // Speed setting for TL0.
+      int high_layer_speed = -1;  // Speed setting for TL1-TL3.
+      //  0 = deblock all temporal layers (TL)
+      //  1 = disable deblock for top-most TL
+      //  2 = disable deblock for all TLs
+      int deblock_mode = 0;
+    };
+    // Map from min pixel count to settings for that resolution and above.
+    // E.g. if you want some settings A if below wvga (640x360) and some other
+    // setting B at wvga and above, you'd use map {{0, A}, {230400, B}}.
+    std::map<int, ParameterSet> settings_by_resolution;
+  };
+  // Performance flags, ordered by |min_pixel_count|.
+  const PerformanceFlags performance_flags_;
+  // Caching of of |speed_configs_|, where index i maps to the resolution as
+  // specified in |codec_.spatialLayer[i]|.
+  std::vector<PerformanceFlags::ParameterSet>
+      performance_flags_by_spatial_index_;
+  void UpdatePerformanceFlags();
+  static PerformanceFlags ParsePerformanceFlagsFromTrials(
+      const WebRtcKeyValueConfig& trials);
+  static PerformanceFlags GetDefaultPerformanceFlags();
 
   int num_steady_state_frames_;
   // Only set config when this flag is set.