Add libom AV1 integration of dynamic speed controller.

This CL creates a config factory for the libaom based av1 encoder and
integrates the use of the speed controller in the encoder wrapper.
A (for now) unused parameter in the API was removed.

Bug: webrtc:443906251
Change-Id: I3c4522ab71446e34d4017b558d961803f26b2e38
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/422800
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#46123}
diff --git a/api/video_codecs/encoder_speed_controller.h b/api/video_codecs/encoder_speed_controller.h
index 69bcd87..87ceaef 100644
--- a/api/video_codecs/encoder_speed_controller.h
+++ b/api/video_codecs/encoder_speed_controller.h
@@ -109,12 +109,8 @@
   // thereafter be configured with requested settings.
   virtual EncodeSettings GetEncodeSettings(FrameEncodingInfo frame_info) = 0;
 
-  // Should be called after each frame has completed encoding. If a baseline
-  // comparison speed was set in the `EncodeSettings`, the `baseline_results`
-  // parameter should be set with the results corresponding to those settings.
-  virtual void OnEncodedFrame(
-      EncodeResults results,
-      std::optional<EncodeResults> baseline_results) = 0;
+  // Should be called after each frame has completed encoding.
+  virtual void OnEncodedFrame(EncodeResults results) = 0;
 };
 
 }  // namespace webrtc
diff --git a/modules/video_coding/codecs/av1/BUILD.gn b/modules/video_coding/codecs/av1/BUILD.gn
index ed40a7b..8b635ef 100644
--- a/modules/video_coding/codecs/av1/BUILD.gn
+++ b/modules/video_coding/codecs/av1/BUILD.gn
@@ -59,18 +59,22 @@
     "libaom_av1_encoder.h",
   ]
   deps = [
+    ":libaom_speed_config_factory",
     "../..:video_codec_interface",
     "../..:video_coding_utility",
     "../../:frame_sampler",
     "../../../../api:field_trials_view",
     "../../../../api:scoped_refptr",
     "../../../../api/environment",
+    "../../../../api/units:time_delta",
+    "../../../../api/units:timestamp",
     "../../../../api/video:encoded_image",
     "../../../../api/video:render_resolution",
     "../../../../api/video:video_codec_constants",
     "../../../../api/video:video_frame",
     "../../../../api/video:video_frame_type",
     "../../../../api/video:video_rtp_headers",
+    "../../../../api/video_codecs:encoder_speed_controller_factory",
     "../../../../api/video_codecs:scalability_mode",
     "../../../../api/video_codecs:video_codecs_api",
     "../../../../common_video",
@@ -80,7 +84,9 @@
     "../../../../rtc_base:logging",
     "../../../../rtc_base:rtc_numerics",
     "../../../../rtc_base/experiments:encoder_info_settings",
+    "../../../../rtc_base/experiments:encoder_speed_experiment",
     "../../../../rtc_base/experiments:psnr_experiment",
+    "../../../../system_wrappers",
     "../../svc:scalability_structures",
     "../../svc:scalable_video_controller",
     "//third_party/abseil-cpp/absl/algorithm:container",
@@ -92,6 +98,21 @@
   ]
 }
 
+rtc_library("libaom_speed_config_factory") {
+  sources = [
+    "libaom_speed_config_factory.cc",
+    "libaom_speed_config_factory.h",
+  ]
+  deps = [
+    "../..:video_codec_interface",
+    "../..:video_coding_utility",
+    "../../../../api:field_trials_view",
+    "../../../../api/video_codecs:video_codecs_api",
+    "../../../../rtc_base:logging",
+    "../../../../rtc_base/experiments:psnr_experiment",
+  ]
+}
+
 if (rtc_include_tests) {
   rtc_library("video_coding_codecs_av1_tests") {
     testonly = true
@@ -128,11 +149,16 @@
       sources += [
         "libaom_av1_encoder_unittest.cc",
         "libaom_av1_unittest.cc",
+        "libaom_speed_config_factory_unittest.cc",
       ]
       deps += [
         ":libaom_av1_encoder",
+        ":libaom_speed_config_factory",
         "../..:encoded_video_frame_producer",
+        "../..:video_coding_utility",
         "../../../../api:create_frame_generator",
+        "../../../../api:field_trials",
+        "../../../../api:field_trials_view",
         "../../../../api:frame_generator_api",
         "../../../../api:mock_video_encoder",
         "../../../../api/units:data_size",
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
index 14a84f9..4c865ec 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@@ -16,7 +16,6 @@
 #include <numeric>
 #include <optional>
 #include <utility>
-#include <variant>
 #include <vector>
 
 #include "absl/algorithm/container.h"
@@ -25,6 +24,8 @@
 #include "api/environment/environment.h"
 #include "api/field_trials_view.h"
 #include "api/scoped_refptr.h"
+#include "api/units/time_delta.h"
+#include "api/units/timestamp.h"
 #include "api/video/encoded_image.h"
 #include "api/video/render_resolution.h"
 #include "api/video/video_codec_constants.h"
@@ -34,11 +35,13 @@
 #include "api/video/video_frame_buffer.h"
 #include "api/video/video_frame_type.h"
 #include "api/video/video_timing.h"
+#include "api/video_codecs/encoder_speed_controller.h"
 #include "api/video_codecs/scalability_mode.h"
 #include "api/video_codecs/video_codec.h"
 #include "api/video_codecs/video_encoder.h"
 #include "common_video/generic_frame_descriptor/generic_frame_info.h"
 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
+#include "modules/video_coding/codecs/av1/libaom_speed_config_factory.h"
 #include "modules/video_coding/include/video_codec_interface.h"
 #include "modules/video_coding/include/video_error_codes.h"
 #include "modules/video_coding/svc/create_scalability_structure.h"
@@ -46,8 +49,10 @@
 #include "modules/video_coding/utility/frame_sampler.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/experiments/encoder_info_settings.h"
+#include "rtc_base/experiments/encoder_speed_experiment.h"
 #include "rtc_base/experiments/psnr_experiment.h"
 #include "rtc_base/logging.h"
+#include "system_wrappers/include/clock.h"
 #include "third_party/libaom/source/libaom/aom/aom_codec.h"
 #include "third_party/libaom/source/libaom/aom/aom_encoder.h"
 #include "third_party/libaom/source/libaom/aom/aom_image.h"
@@ -94,6 +99,26 @@
   encoded_image.rotation_ = frame.rotation();
   encoded_image.SetColorSpace(frame.color_space());
 }
+
+struct EncodeResult {
+  aom_codec_err_t status_code = AOM_CODEC_OK;
+  std::optional<EncodedImage> encoded_image;
+  TimeDelta encode_time = TimeDelta::Zero();
+};
+
+EncoderSpeedController::EncodeResults ToSpeedControllerEncodeResult(
+    const EncodeResult& encode_result,
+    const EncoderSpeedController::FrameEncodingInfo& frame_info,
+    int speed) {
+  RTC_DCHECK(encode_result.encoded_image.has_value());
+  const EncodedImage& image = *encode_result.encoded_image;
+  return EncoderSpeedController::EncodeResults{
+      .speed = speed,
+      .encode_time = encode_result.encode_time,
+      .qp = image.qp_ / 4,  // Use [0, 63] range instead of [0, 255].
+      .frame_info = frame_info};
+}
+
 class LibaomAv1Encoder final : public VideoEncoder {
  public:
   LibaomAv1Encoder(const Environment& env, LibaomAv1EncoderSettings settings);
@@ -131,7 +156,8 @@
   // Configures the encoder with layer for the next frame.
   void SetSvcLayerId(
       const ScalableVideoController::LayerFrameConfig& layer_frame);
-  // Configures the encoder which buffers next frame updates and can reference.
+  // Configures the encoder which buffers next frame updates and can
+  // reference.
   void SetSvcRefFrameConfig(
       const ScalableVideoController::LayerFrameConfig& layer_frame);
   // If pixel format doesn't match, then reallocate.
@@ -143,7 +169,11 @@
   // will be the input resolution.
   void AdjustScalingFactorsForTopActiveLayer();
 
-  using EncodeResult = std::variant<aom_codec_err_t, EncodedImage>;
+  EncoderSpeedController::ReferenceClass AsSpeedControllerFrameType(
+      const ScalableVideoController::LayerFrameConfig& layer_frame) const;
+
+  // Returns frame interval, compensated for relative pixel count allocation.
+  TimeDelta GetFrameInterval(int spatial_index) const;
 
   // Duration is specified in ticks based on aom_codec_enc_cfg_t::g_timebase,
   // in practice that that is kVideoPayloadTypeFrequency (90kHz).
@@ -186,6 +216,13 @@
   FrameSampler psnr_frame_sampler_;
   const bool drop_repeat_frames_on_enhancement_layers_;
   std::map<int, uint32_t> last_encoded_timestamp_by_sid_;
+
+  const EncoderSpeedExperiment encoder_speed_experiment_;
+  // One speed controller per spatial layer.
+  std::vector<std::unique_ptr<webrtc::EncoderSpeedController>>
+      speed_controllers_;
+  // Don't use when setting input frame timestamps!
+  Clock* const realtime_clock_;
 };
 
 int32_t VerifyCodecSettings(const VideoCodec& codec_settings) {
@@ -231,7 +268,9 @@
       psnr_experiment_(env.field_trials()),
       psnr_frame_sampler_(psnr_experiment_.SamplingInterval()),
       drop_repeat_frames_on_enhancement_layers_(env.field_trials().IsEnabled(
-          "WebRTC-LibaomAv1Encoder-DropRepeatFramesOnEnhancementLayers")) {}
+          "WebRTC-LibaomAv1Encoder-DropRepeatFramesOnEnhancementLayers")),
+      encoder_speed_experiment_(env.field_trials()),
+      realtime_clock_(Clock::GetRealTimeClock()) {}
 
 LibaomAv1Encoder::~LibaomAv1Encoder() {
   Release();
@@ -339,6 +378,7 @@
   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TPL_MODEL, 0);
   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DELTAQ_MODE, 0);
   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ORDER_HINT, 0);
+  // AQ_MODE = 3 enables cyclic refresh.
   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_AQ_MODE, 3);
   SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_MAX_INTRA_BITRATE_PCT, 300);
   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_COEFF_COST_UPD_FREQ, 3);
@@ -390,6 +430,31 @@
     SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_POSTENCODE_DROP_RTC, 1);
   }
 
+  if (encoder_speed_experiment_.IsDynamicSpeedEnabled()) {
+    LibaomSpeedConfigFactory speed_config_factory(
+        codec_settings->GetVideoEncoderComplexity(), codec_settings->mode);
+
+    if (SvcEnabled()) {
+      for (int si = 0; si < svc_params_->number_spatial_layers; ++si) {
+        EncoderSpeedController::Config speed_config =
+            speed_config_factory.GetSpeedConfig(
+                encoder_settings_.spatialLayers[si].width,
+                encoder_settings_.spatialLayers[si].height,
+                svc_controller_->StreamConfig().num_temporal_layers);
+
+        speed_controllers_.push_back(
+            EncoderSpeedController::Create(speed_config, GetFrameInterval(si)));
+      }
+    } else {
+      EncoderSpeedController::Config speed_config =
+          speed_config_factory.GetSpeedConfig(encoder_settings_.width,
+                                              encoder_settings_.height,
+                                              /*num_temporal_layers=*/1);
+      speed_controllers_.push_back(EncoderSpeedController::Create(
+          speed_config, GetFrameInterval(/*spatial_index=*/0)));
+    }
+  }
+
   return WEBRTC_VIDEO_CODEC_OK;
 }
 
@@ -408,6 +473,7 @@
 // Only positive speeds, range for real-time coding currently is: 6 - 10.
 // Speed 11 is used for screen sharing.
 // Lower means slower/better quality, higher means fastest/lower quality.
+// Note: not used if dynamic speed controller is enabled.
 int LibaomAv1Encoder::GetCpuSpeed(int width, int height) {
   if (!settings_.max_pixel_count_to_cpu_speed.empty()) {
     if (auto it =
@@ -652,6 +718,49 @@
   }
 }
 
+EncoderSpeedController::ReferenceClass
+LibaomAv1Encoder::AsSpeedControllerFrameType(
+    const ScalableVideoController::LayerFrameConfig& layer_frame) const {
+  if (layer_frame.IsKeyframe()) {
+    return EncoderSpeedController::ReferenceClass::kKey;
+  }
+
+  int tid = layer_frame.TemporalId();
+  if (tid == 0) {
+    return EncoderSpeedController::ReferenceClass::kMain;
+  } else if (svc_params_ && tid == svc_params_->number_temporal_layers - 1) {
+    return EncoderSpeedController::ReferenceClass::kNoneReference;
+  }
+  return EncoderSpeedController::ReferenceClass::kIntermediate;
+}
+
+TimeDelta LibaomAv1Encoder::GetFrameInterval(int spatial_index) const {
+  TimeDelta frame_interval =
+      TimeDelta::Seconds(1) /
+      (framerate_fps_ == 0 ? encoder_settings_.maxFramerate : framerate_fps_);
+
+  if (!SvcEnabled()) {
+    return frame_interval;
+  }
+
+  // Allocate a time slice for each spatial layer, proportional to the
+  // fraction of pixels allocated for that layer.
+  // E.g. if QVGA + VGA is used, 20% of the encoder time will be allocated
+  // for QVGA + 80% for VGA - since VGA has 4x the number of pixels.
+  int pixel_count_sum = 0;
+  for (int si = 0; si < svc_params_->number_spatial_layers; ++si) {
+    pixel_count_sum += encoder_settings_.spatialLayers[si].width *
+                       encoder_settings_.spatialLayers[si].height;
+  }
+
+  double pixel_count_fraction =
+      static_cast<double>(
+          encoder_settings_.spatialLayers[spatial_index].width *
+          encoder_settings_.spatialLayers[spatial_index].height) /
+      pixel_count_sum;
+  return frame_interval * pixel_count_fraction;
+}
+
 int32_t LibaomAv1Encoder::Encode(
     const VideoFrame& frame,
     const std::vector<VideoFrameType>* frame_types) {
@@ -792,7 +901,7 @@
       svc_params_ ? svc_params_->number_spatial_layers : 1;
   auto next_layer_frame = layer_frames.begin();
   std::vector<std::pair<EncodedImage, CodecSpecificInfo>> encoded_images;
-  for (size_t i = 0; i < num_spatial_layers; ++i) {
+  for (size_t sid = 0; sid < num_spatial_layers; ++sid) {
     // The libaom AV1 encoder requires that `aom_codec_encode` is called for
     // every spatial layer, even if the configured bitrate for that layer is
     // zero. For zero bitrate spatial layers no frames will be produced.
@@ -800,18 +909,24 @@
         non_encoded_layer_frame;
     ScalableVideoController::LayerFrameConfig* layer_frame;
     if (next_layer_frame != layer_frames.end() &&
-        next_layer_frame->SpatialId() == static_cast<int>(i)) {
+        next_layer_frame->SpatialId() == static_cast<int>(sid)) {
       layer_frame = &*next_layer_frame;
       ++next_layer_frame;
     } else {
       // For layers that are not encoded only the spatial id matters.
-      non_encoded_layer_frame.emplace().S(i);
+      non_encoded_layer_frame.emplace().S(sid);
       layer_frame = &*non_encoded_layer_frame;
     }
     const bool end_of_picture = (next_layer_frame == layer_frames.end());
 
     aom_enc_frame_flags_t flags =
         layer_frame->IsKeyframe() ? AOM_EFLAG_FORCE_KF : 0;
+
+    if (SvcEnabled()) {
+      SetSvcLayerId(*layer_frame);
+      SetSvcRefFrameConfig(*layer_frame);
+    }
+
 #if defined(WEBRTC_ENCODER_PSNR_STATS) && defined(AOM_EFLAG_CALCULATE_PSNR)
     if (psnr_experiment_.IsEnabled() &&
         psnr_frame_sampler_.ShouldBeSampled(frame)) {
@@ -819,38 +934,73 @@
     }
 #endif
 
-    if (SvcEnabled()) {
-      SetSvcLayerId(*layer_frame);
-      SetSvcRefFrameConfig(*layer_frame);
-    }
+    if (!speed_controllers_.empty()) {
+      RTC_DCHECK_GT(speed_controllers_.size(), sid);
+      EncoderSpeedController& speed_controller = *speed_controllers_[sid];
 
-    EncodeResult result = DoEncode(duration, flags, layer_frame);
-    if (aom_codec_err_t* status = std::get_if<aom_codec_err_t>(&result);
-        status != nullptr) {
-      if (*status == AOM_CODEC_OK) {
-        // AOM_CODEC_OK means success with no image, so do nothing.
-        continue;
-      } else {
-        RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << status
-                            << " on aom_codec_encode.";
+      EncoderSpeedController::FrameEncodingInfo frame_info{
+          .reference_type = AsSpeedControllerFrameType(*layer_frame),
+          .is_repeat_frame = frame.is_repeat_frame()};
+      EncoderSpeedController::EncodeSettings settings =
+          speed_controller.GetEncodeSettings(frame_info);
+
+      SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_CPUUSED, settings.speed);
+      EncodeResult output = DoEncode(duration, flags, layer_frame);
+      if (output.status_code != AOM_CODEC_OK) {
+        RTC_LOG(LS_WARNING)
+            << "LibaomAv1Encoder::Encode returned error: '"
+            << aom_codec_err_to_string(output.status_code) << "'.";
         return WEBRTC_VIDEO_CODEC_ERROR;
       }
+
+      if (!output.encoded_image.has_value()) {
+        // Frame dropped, presumably by rate controller. This is not an error.
+        continue;
+      }
+
+      RTC_DCHECK(output.encoded_image.has_value());
+
+      speed_controller.OnEncodedFrame(
+          ToSpeedControllerEncodeResult(output, frame_info, settings.speed));
+
+      RTC_DCHECK_GT(output.encoded_image->size(), 0u);
+      PopulateEncodedImageFromVideoFrame(frame, *output.encoded_image);
+      CodecSpecificInfo codec_specifics = CreateCodecSpecificInfo(
+          *output.encoded_image, *layer_frame, end_of_picture);
+
+      if (non_encoded_layer_frame) {
+        continue;
+      }
+
+      encoded_images.emplace_back(std::move(*output.encoded_image),
+                                  std::move(codec_specifics));
+    } else {
+      // No speed controller used.
+      EncodeResult output = DoEncode(duration, flags, layer_frame);
+      if (output.status_code != AOM_CODEC_OK) {
+        RTC_LOG(LS_WARNING)
+            << "LibaomAv1Encoder::Encode returned error: '"
+            << aom_codec_err_to_string(output.status_code) << "'.";
+        return WEBRTC_VIDEO_CODEC_ERROR;
+      }
+      if (!output.encoded_image.has_value()) {
+        // Status code OK but no image - the encoder dropped the frame,
+        // presumable due to rate control. This is not an error.
+        continue;
+      }
+
+      if (non_encoded_layer_frame) {
+        continue;
+      }
+
+      RTC_DCHECK_GT(output.encoded_image->size(), 0u);
+      PopulateEncodedImageFromVideoFrame(frame, *output.encoded_image);
+      CodecSpecificInfo codec_specifics = CreateCodecSpecificInfo(
+          *output.encoded_image, *layer_frame, end_of_picture);
+
+      encoded_images.emplace_back(std::move(*output.encoded_image),
+                                  std::move(codec_specifics));
     }
-
-    if (non_encoded_layer_frame) {
-      continue;
-    }
-
-    RTC_DCHECK(std::holds_alternative<EncodedImage>(result));
-    EncodedImage encoded_image = std::get<EncodedImage>(std::move(result));
-
-    RTC_DCHECK_GT(encoded_image.size(), 0u);
-    PopulateEncodedImageFromVideoFrame(frame, encoded_image);
-    CodecSpecificInfo codec_specifics =
-        CreateCodecSpecificInfo(encoded_image, *layer_frame, end_of_picture);
-
-    encoded_images.emplace_back(std::move(encoded_image),
-                                std::move(codec_specifics));
   }
 
   if (!encoded_images.empty()) {
@@ -867,23 +1017,29 @@
   return WEBRTC_VIDEO_CODEC_OK;
 }
 
-LibaomAv1Encoder::EncodeResult LibaomAv1Encoder::DoEncode(
+EncodeResult LibaomAv1Encoder::DoEncode(
     uint32_t duration,
     aom_enc_frame_flags_t flags,
     ScalableVideoController::LayerFrameConfig* layer_frame) {
   // Encode a frame. The presentation timestamp `pts` should not use real
   // timestamps from frames or the wall clock, as that can cause the rate
   // controller to misbehave.
-  aom_codec_err_t ret =
+  EncodeResult output;
+
+  Timestamp start_time = realtime_clock_->CurrentTime();
+  output.status_code =
       aom_codec_encode(&ctx_, frame_for_encode_, timestamp_, duration, flags);
-  if (ret != AOM_CODEC_OK) {
-    return ret;
+  output.encode_time = realtime_clock_->CurrentTime() - start_time;
+
+  if (output.status_code != AOM_CODEC_OK) {
+    return output;
   }
 
   // Get encoded image data.
   aom_codec_iter_t iter = nullptr;
   int data_pkt_count = 0;
-  EncodedImage encoded_image;
+  output.encoded_image.emplace();
+  EncodedImage& encoded_image = *output.encoded_image;
   const aom_codec_cx_pkt_t* pkt = nullptr;
   while ((pkt = aom_codec_get_cx_data(&ctx_, &iter)) != nullptr) {
     if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
@@ -891,7 +1047,8 @@
         RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than "
                                "one data packet for an input video frame.";
         Release();
-        return AOM_CODEC_ERROR;
+        output.status_code = AOM_CODEC_ERROR;
+        return output;
       }
       encoded_image.SetEncodedData(EncodedImageBuffer::Create(
           /*data=*/static_cast<const uint8_t*>(pkt->data.frame.buf),
@@ -924,7 +1081,8 @@
       if (!SetEncoderControlParameters(AOME_GET_LAST_QUANTIZER,
                                        &encoded_image.qp_)) {
         RTC_LOG(LS_WARNING) << "Unable to fetch QP for frame.";
-        return AOM_CODEC_ERROR;
+        output.status_code = AOM_CODEC_ERROR;
+        return output;
       }
 
       ++data_pkt_count;
@@ -937,11 +1095,11 @@
   }
 
   if (encoded_image.size() == 0) {
-    // Encode success, but no image produced.
-    return AOM_CODEC_OK;
+    // Encode success, but no image produced. Frame as just dropped.
+    output.encoded_image.reset();
   }
 
-  return encoded_image;
+  return output;
 }
 
 CodecSpecificInfo LibaomAv1Encoder::CreateCodecSpecificInfo(
@@ -1019,6 +1177,9 @@
   }
 
   framerate_fps_ = parameters.framerate_fps;
+  for (size_t si = 0; si < speed_controllers_.size(); ++si) {
+    speed_controllers_[si]->SetFrameInterval(GetFrameInterval(si));
+  }
 
   rates_configured_ = true;
 }
diff --git a/modules/video_coding/codecs/av1/libaom_speed_config_factory.cc b/modules/video_coding/codecs/av1/libaom_speed_config_factory.cc
new file mode 100644
index 0000000..7194262
--- /dev/null
+++ b/modules/video_coding/codecs/av1/libaom_speed_config_factory.cc
@@ -0,0 +1,134 @@
+// Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#include "modules/video_coding/codecs/av1/libaom_speed_config_factory.h"
+
+#include <algorithm>
+#include <optional>
+
+#include "api/video_codecs/encoder_speed_controller.h"
+#include "api/video_codecs/video_codec.h"
+
+namespace webrtc {
+
+namespace {
+
+constexpr int kNumLevels = 15;
+EncoderSpeedController::Config::SpeedLevel kAllLevels[kNumLevels] = {
+    {.speeds = {5, 5, 6, 6}, .min_qp = 31},
+    {.speeds = {5, 6, 7, 7}, .min_qp = 30},
+    {.speeds = {5, 6, 8, 10}, .min_qp = 30},
+    {.speeds = {5, 6, 9, 11}, .min_qp = 29},
+    {.speeds = {5, 7, 7, 7}, .min_qp = 29},
+    {.speeds = {7, 7, 8, 8}, .min_qp = 28},
+    {.speeds = {7, 7, 8, 9}, .min_qp = 28},
+    {.speeds = {7, 7, 10, 10}, .min_qp = 28},
+    {.speeds = {7, 7, 10, 11}, .min_qp = 27},
+    {.speeds = {7, 7, 11, 11}, .min_qp = 26},
+    {.speeds = {7, 8, 9, 9}, .min_qp = 26},
+    {.speeds = {7, 9, 9, 11}, .min_qp = 25},
+    {.speeds = {8, 9, 10, 11}, .min_qp = 25},
+    {.speeds = {9, 10, 11, 11}, .min_qp = std::nullopt},
+    {.speeds = {10, 11, 11, 11}, .min_qp = std::nullopt}};
+
+bool HasSameSpeeds(const EncoderSpeedController::Config::SpeedLevel& a,
+                   const EncoderSpeedController::Config::SpeedLevel& b,
+                   int num_temporal_layers) {
+  if (a.speeds[0] != b.speeds[0] || a.speeds[1] != b.speeds[1]) {
+    // Keyframe or base layer speed differs.
+    return false;
+  }
+  if (num_temporal_layers > 1 && a.speeds[3] != b.speeds[3]) {
+    // Upper (non-reference) layer speed differs.
+    return false;
+  }
+  // Middle temporal layer (intermedia class).
+  return a.speeds[2] == b.speeds[2];
+}
+
+void AddSpeedLevels(int num_levels,
+                    int num_temporal_layers,
+                    EncoderSpeedController::Config& config) {
+  // Add up to `num_levels` speeds - but ignore levels that have identical
+  // speeds when `num_temporal_layers` is used (e.g. same base-layer speed for
+  // single-layer).
+  config.speed_levels.reserve(num_levels);
+  for (int i = kNumLevels - 1; i >= kNumLevels - num_levels; --i) {
+    if (i == kNumLevels - 1 ||
+        !HasSameSpeeds(kAllLevels[i], config.speed_levels.back(),
+                       num_temporal_layers)) {
+      config.speed_levels.push_back(kAllLevels[i]);
+    }
+  }
+
+  std::reverse(config.speed_levels.begin(), config.speed_levels.end());
+}
+
+}  // namespace
+
+LibaomSpeedConfigFactory::LibaomSpeedConfigFactory(
+    VideoCodecComplexity complexity,
+    VideoCodecMode mode)
+    : complexity_(complexity), mode_(mode) {}
+
+EncoderSpeedController::Config LibaomSpeedConfigFactory::GetSpeedConfig(
+    int width,
+    int height,
+    int num_temporal_layers) const {
+  EncoderSpeedController::Config config;
+  int num_levels = 0;
+  switch (complexity_) {
+    case VideoCodecComplexity::kComplexityLow:
+      // Level 9x10x11x11 and up.
+      num_levels = 2;
+      break;
+    case VideoCodecComplexity::kComplexityNormal:
+      // Level 8x9x10x11 and up.
+      num_levels = 3;
+      break;
+    case VideoCodecComplexity::kComplexityHigh:
+      // Level 7x7x10x10 and up.
+      num_levels = 8;
+      break;
+    case VideoCodecComplexity::kComplexityHigher:
+      // Level 5x6x8x10 and up (< 720p, 5x7x7x7 otherwise)
+      if (width * height < 1280 * 720) {  // Corrected condition
+        num_levels = 12;
+      } else {
+        num_levels = 10;
+      }
+      break;
+    case VideoCodecComplexity::kComplexityMax:
+      // All levels.
+      num_levels = kNumLevels;
+      break;
+  }
+
+  if (mode_ == VideoCodecMode::kScreensharing) {
+    num_levels = std::max(1, num_levels - 1);
+  }
+
+  AddSpeedLevels(num_levels, num_temporal_layers, config);
+
+  // Don't cap speed based on resolution - only adjust the start value.
+  const int num_pixels = width * height;
+  const int available_speed_levels = config.speed_levels.size();
+  if (num_pixels > 1920 * 1080) {
+    config.start_speed_index = std::max(available_speed_levels - 4, 0);
+  } else if (num_pixels > 1280 * 720) {
+    config.start_speed_index = std::max(available_speed_levels - 3, 0);
+  } else if (num_pixels > 640 * 360) {
+    config.start_speed_index = std::max(available_speed_levels - 2, 0);
+  } else {
+    config.start_speed_index = std::max(available_speed_levels - 1, 0);
+  }
+
+  return config;
+}
+
+}  // namespace webrtc
diff --git a/modules/video_coding/codecs/av1/libaom_speed_config_factory.h b/modules/video_coding/codecs/av1/libaom_speed_config_factory.h
new file mode 100644
index 0000000..6631da7
--- /dev/null
+++ b/modules/video_coding/codecs/av1/libaom_speed_config_factory.h
@@ -0,0 +1,33 @@
+// Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_SPEED_CONFIG_FACTORY_H_
+#define MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_SPEED_CONFIG_FACTORY_H_
+
+#include "api/video_codecs/encoder_speed_controller.h"
+#include "api/video_codecs/video_codec.h"
+
+namespace webrtc {
+
+class LibaomSpeedConfigFactory {
+ public:
+  LibaomSpeedConfigFactory(VideoCodecComplexity complexity,
+                           VideoCodecMode mode);
+
+  EncoderSpeedController::Config GetSpeedConfig(int width,
+                                                int height,
+                                                int num_temporal_layers) const;
+
+ private:
+  const VideoCodecComplexity complexity_;
+  const VideoCodecMode mode_;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_SPEED_CONFIG_FACTORY_H_
diff --git a/modules/video_coding/codecs/av1/libaom_speed_config_factory_unittest.cc b/modules/video_coding/codecs/av1/libaom_speed_config_factory_unittest.cc
new file mode 100644
index 0000000..ae4305e
--- /dev/null
+++ b/modules/video_coding/codecs/av1/libaom_speed_config_factory_unittest.cc
@@ -0,0 +1,173 @@
+// Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#include "modules/video_coding/codecs/av1/libaom_speed_config_factory.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <set>
+
+#include "api/video_codecs/encoder_speed_controller.h"
+#include "api/video_codecs/video_codec.h"
+#include "rtc_base/checks.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+
+// Helper to allow SpeedLevel to be used in a set.
+bool operator<(const EncoderSpeedController::Config::SpeedLevel& lhs,
+               const EncoderSpeedController::Config::SpeedLevel& rhs) {
+  if (lhs.speeds != rhs.speeds) {
+    return lhs.speeds < rhs.speeds;
+  }
+  return lhs.min_qp < rhs.min_qp;
+}
+
+namespace {
+
+using ::testing::Values;
+
+// Test that the number of speed levels increases with complexity.
+TEST(LibaomSpeedConfigFactoryTest, NumLevelsIncreaseWithComplexity) {
+  LibaomSpeedConfigFactory factory_low(VideoCodecComplexity::kComplexityLow,
+                                       VideoCodecMode::kRealtimeVideo);
+  EncoderSpeedController::Config config_low =
+      factory_low.GetSpeedConfig(640, 360, 3);
+
+  LibaomSpeedConfigFactory factory_normal(
+      VideoCodecComplexity::kComplexityNormal, VideoCodecMode::kRealtimeVideo);
+  EncoderSpeedController::Config config_normal =
+      factory_normal.GetSpeedConfig(640, 360, 3);
+
+  LibaomSpeedConfigFactory factory_high(VideoCodecComplexity::kComplexityHigh,
+                                        VideoCodecMode::kRealtimeVideo);
+  EncoderSpeedController::Config config_high =
+      factory_high.GetSpeedConfig(640, 360, 3);
+
+  LibaomSpeedConfigFactory factory_higher(
+      VideoCodecComplexity::kComplexityHigher, VideoCodecMode::kRealtimeVideo);
+  EncoderSpeedController::Config config_higher =
+      factory_higher.GetSpeedConfig(640, 360, 3);
+
+  LibaomSpeedConfigFactory factory_max(VideoCodecComplexity::kComplexityMax,
+                                       VideoCodecMode::kRealtimeVideo);
+  EncoderSpeedController::Config config_max =
+      factory_max.GetSpeedConfig(640, 360, 3);
+
+  EXPECT_GE(config_normal.speed_levels.size(), config_low.speed_levels.size());
+  EXPECT_GE(config_high.speed_levels.size(), config_normal.speed_levels.size());
+  EXPECT_GE(config_higher.speed_levels.size(), config_high.speed_levels.size());
+  EXPECT_GE(config_max.speed_levels.size(), config_higher.speed_levels.size());
+}
+
+// Test that speeds within each level are monotonic.
+TEST(LibaomSpeedConfigFactoryTest, SpeedsAreMonotonic) {
+  LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax,
+                                   VideoCodecMode::kRealtimeVideo);
+  EncoderSpeedController::Config config = factory.GetSpeedConfig(1280, 720, 3);
+
+  for (const auto& level : config.speed_levels) {
+    // Lower reference class index means more important, so speed should be
+    // lower or equal.
+    EXPECT_LE(level.speeds[static_cast<int>(
+                  EncoderSpeedController::ReferenceClass::kKey)],
+              level.speeds[static_cast<int>(
+                  EncoderSpeedController::ReferenceClass::kMain)]);
+    EXPECT_LE(level.speeds[static_cast<int>(
+                  EncoderSpeedController::ReferenceClass::kMain)],
+              level.speeds[static_cast<int>(
+                  EncoderSpeedController::ReferenceClass::kIntermediate)]);
+    EXPECT_LE(level.speeds[static_cast<int>(
+                  EncoderSpeedController::ReferenceClass::kIntermediate)],
+              level.speeds[static_cast<int>(
+                  EncoderSpeedController::ReferenceClass::kNoneReference)]);
+  }
+}
+
+// Test that keyframe and base layer speeds between levels are monotonic.
+TEST(LibaomSpeedConfigFactoryTest, KeyAndMainSpeedsIncreaseBetweenLevels) {
+  LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax,
+                                   VideoCodecMode::kRealtimeVideo);
+  EncoderSpeedController::Config config = factory.GetSpeedConfig(1280, 720, 3);
+
+  for (size_t i = 0; i < config.speed_levels.size() - 1; ++i) {
+    const auto& current_level = config.speed_levels[i];
+    const auto& next_level = config.speed_levels[i + 1];
+    EXPECT_LE(current_level.speeds[static_cast<int>(
+                  EncoderSpeedController::ReferenceClass::kKey)],
+              next_level.speeds[static_cast<int>(
+                  EncoderSpeedController::ReferenceClass::kKey)]);
+    EXPECT_LE(current_level.speeds[static_cast<int>(
+                  EncoderSpeedController::ReferenceClass::kMain)],
+              next_level.speeds[static_cast<int>(
+                  EncoderSpeedController::ReferenceClass::kMain)]);
+  }
+}
+
+struct ResolutionParams {
+  int width;
+  int height;
+  int expected_start_index_offset;  // Offset from the last index
+};
+
+class LibaomSpeedConfigFactoryResolutionTest
+    : public ::testing::TestWithParam<ResolutionParams> {};
+
+INSTANTIATE_TEST_SUITE_P(All,
+                         LibaomSpeedConfigFactoryResolutionTest,
+                         Values(ResolutionParams{320, 180, 1},
+                                ResolutionParams{640, 360, 1},
+                                ResolutionParams{1280, 720, 2},
+                                ResolutionParams{1920, 1080, 3},
+                                ResolutionParams{2560, 1440, 4}));
+
+TEST_P(LibaomSpeedConfigFactoryResolutionTest, GetSpeedConfigStartSpeedIndex) {
+  const ResolutionParams& params = GetParam();
+  LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax,
+                                   VideoCodecMode::kRealtimeVideo);
+  EncoderSpeedController::Config config =
+      factory.GetSpeedConfig(params.width, params.height, 3);
+  int expected_index =
+      std::max(0, static_cast<int>(config.speed_levels.size()) -
+                      params.expected_start_index_offset);
+  EXPECT_EQ(config.start_speed_index, expected_index);
+}
+
+void CheckDistinctConfigs(const LibaomSpeedConfigFactory& factory,
+                          int num_temporal_layers) {
+  RTC_DCHECK_GT(num_temporal_layers, 0);
+  RTC_DCHECK_LE(num_temporal_layers, 3);
+
+  EncoderSpeedController::Config config =
+      factory.GetSpeedConfig(640, 360, num_temporal_layers);
+
+  std::set<EncoderSpeedController::Config::SpeedLevel> unique_configs(
+      config.speed_levels.begin(), config.speed_levels.end());
+  EXPECT_EQ(unique_configs.size(), config.speed_levels.size());
+}
+
+TEST(LibaomSpeedConfigFactoryTest, DistinctConfigs1Tl) {
+  LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax,
+                                   VideoCodecMode::kRealtimeVideo);
+  CheckDistinctConfigs(factory, 1);
+}
+
+TEST(LibaomSpeedConfigFactoryTest, DistinctConfigs2Tl) {
+  LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax,
+                                   VideoCodecMode::kRealtimeVideo);
+  CheckDistinctConfigs(factory, 2);
+}
+
+TEST(LibaomSpeedConfigFactoryTest, DistinctConfigs3Tl) {
+  LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax,
+                                   VideoCodecMode::kRealtimeVideo);
+  CheckDistinctConfigs(factory, 3);
+}
+
+}  // namespace
+}  // namespace webrtc
diff --git a/modules/video_coding/utility/encoder_speed_controller_impl.cc b/modules/video_coding/utility/encoder_speed_controller_impl.cc
index 299a269..987f0ad 100644
--- a/modules/video_coding/utility/encoder_speed_controller_impl.cc
+++ b/modules/video_coding/utility/encoder_speed_controller_impl.cc
@@ -151,8 +151,7 @@
 }
 
 void EncoderSpeedControllerImpl::OnEncodedFrame(
-    EncoderSpeedController::EncodeResults results,
-    std::optional<EncodeResults> baseline_results) {
+    EncoderSpeedController::EncodeResults results) {
   double encode_tims_ms = results.encode_time.us() / 1000.0;
   if (results.frame_info.reference_type == ReferenceClass::kKey) {
     encode_tims_ms /= kKeyframeEncodeTimeCompensator;
diff --git a/modules/video_coding/utility/encoder_speed_controller_impl.h b/modules/video_coding/utility/encoder_speed_controller_impl.h
index e84881e..9c7cbba8 100644
--- a/modules/video_coding/utility/encoder_speed_controller_impl.h
+++ b/modules/video_coding/utility/encoder_speed_controller_impl.h
@@ -12,7 +12,6 @@
 #define MODULES_VIDEO_CODING_UTILITY_ENCODER_SPEED_CONTROLLER_IMPL_H_
 
 #include <memory>
-#include <optional>
 
 #include "api/units/time_delta.h"
 #include "api/video_codecs/encoder_speed_controller.h"
@@ -41,11 +40,8 @@
   // thereafter be configured with requested settings.
   EncodeSettings GetEncodeSettings(FrameEncodingInfo frame_info) override;
 
-  // Should be called after each frame has completed encoding. If a baseline
-  // comparison speed was set in the `EncodeSettings`, the `baseline_results`
-  // parameter should be set with the results corresponding to those settings.
-  void OnEncodedFrame(EncodeResults results,
-                      std::optional<EncodeResults> baseline_results) override;
+  // Should be called after each frame has completed encoding.
+  void OnEncodedFrame(EncodeResults results) override;
 
   const Config& config() const { return config_; }
 
diff --git a/modules/video_coding/utility/encoder_speed_controller_impl_unittest.cc b/modules/video_coding/utility/encoder_speed_controller_impl_unittest.cc
index 8b2bdeb..a887961 100644
--- a/modules/video_coding/utility/encoder_speed_controller_impl_unittest.cc
+++ b/modules/video_coding/utility/encoder_speed_controller_impl_unittest.cc
@@ -11,7 +11,6 @@
 #include <optional>
 
 #include "api/units/time_delta.h"
-#include "api/units/timestamp.h"
 #include "api/video_codecs/encoder_speed_controller.h"
 #include "test/gmock.h"
 #include "test/gtest.h"
@@ -61,6 +60,7 @@
 
 TEST(EncoderSpeedControllerTest, GetEncodeSettingsBaseLayers) {
   EncoderSpeedController::Config config = GetDefaultConfig();
+  config.speed_levels[0].min_qp = 25;  // Prevent dropping to speed 5 easily
   auto controller = EncoderSpeedController::Create(config, kFrameInterval);
   ASSERT_NE(controller, nullptr);
 
@@ -72,22 +72,18 @@
 
   // Simulate high encode time to increase speed
   for (int i = 0; i < 10; ++i) {
-    controller->OnEncodedFrame({.speed = 6,
-                                .encode_time = kFrameInterval * 2,
+    controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.90,
                                 .qp = 30,
-                                .frame_info = frame_info},
-                               std::nullopt);
+                                .frame_info = frame_info});
   }
   // Speed should increase to 7
   EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, 7);
 
   // Simulate low encode time to decrease speed
   for (int i = 0; i < 20; ++i) {
-    controller->OnEncodedFrame({.speed = 7,
-                                .encode_time = kFrameInterval / 10,
-                                .qp = 30,
-                                .frame_info = frame_info},
-                               std::nullopt);
+    controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.10,
+                                .qp = 20,
+                                .frame_info = frame_info});
   }
   // Speed should decrease to 6
   EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, 6);
@@ -141,12 +137,9 @@
       .reference_type = ReferenceClass::kMain};
 
   for (int i = 0; i < 20; ++i) {
-    controller->OnEncodedFrame(
-        {.speed = 7,
-         .encode_time = kFrameInterval * 2,  // High encode time
-         .qp = 30,
-         .frame_info = frame_info},
-        std::nullopt);
+    controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.95,
+                                .qp = 30,
+                                .frame_info = frame_info});
   }
 
   EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed,
@@ -163,12 +156,7 @@
       .reference_type = ReferenceClass::kMain};
 
   for (int i = 0; i < 20; ++i) {
-    controller->OnEncodedFrame(
-        {.speed = 5,
-         .encode_time = kFrameInterval / 10,  // Low encode time
-         .qp = 30,
-         .frame_info = frame_info},
-        std::nullopt);
+    controller->OnEncodedFrame({.speed = 5, .frame_info = frame_info});
   }
 
   EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed,
@@ -189,11 +177,9 @@
 
   // Simulate low QP, normal encode time
   for (int i = 0; i < 20; ++i) {
-    controller->OnEncodedFrame({.speed = 6,
-                                .encode_time = kFrameInterval * 0.6,
+    controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.60,
                                 .qp = 10,
-                                .frame_info = frame_info},
-                               std::nullopt);
+                                .frame_info = frame_info});
   }
   // Speed should increase to 7 due to low QP
   EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, 7);
@@ -213,11 +199,9 @@
 
   // Simulate low encode time but also low QP
   for (int i = 0; i < 20; ++i) {
-    controller->OnEncodedFrame({.speed = 6,
-                                .encode_time = kFrameInterval / 10,
+    controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.10,
                                 .qp = 10,
-                                .frame_info = frame_info},
-                               std::nullopt);
+                                .frame_info = frame_info});
   }
   // Speed should NOT decrease to 5 because QP is below the next level's min_qp
   EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, 6);