Add libom AV1 integration of dynamic speed controller.
This CL creates a config factory for the libaom based av1 encoder and
integrates the use of the speed controller in the encoder wrapper.
A (for now) unused parameter in the API was removed.
Bug: webrtc:443906251
Change-Id: I3c4522ab71446e34d4017b558d961803f26b2e38
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/422800
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#46123}
diff --git a/api/video_codecs/encoder_speed_controller.h b/api/video_codecs/encoder_speed_controller.h
index 69bcd87..87ceaef 100644
--- a/api/video_codecs/encoder_speed_controller.h
+++ b/api/video_codecs/encoder_speed_controller.h
@@ -109,12 +109,8 @@
// thereafter be configured with requested settings.
virtual EncodeSettings GetEncodeSettings(FrameEncodingInfo frame_info) = 0;
- // Should be called after each frame has completed encoding. If a baseline
- // comparison speed was set in the `EncodeSettings`, the `baseline_results`
- // parameter should be set with the results corresponding to those settings.
- virtual void OnEncodedFrame(
- EncodeResults results,
- std::optional<EncodeResults> baseline_results) = 0;
+ // Should be called after each frame has completed encoding.
+ virtual void OnEncodedFrame(EncodeResults results) = 0;
};
} // namespace webrtc
diff --git a/modules/video_coding/codecs/av1/BUILD.gn b/modules/video_coding/codecs/av1/BUILD.gn
index ed40a7b..8b635ef 100644
--- a/modules/video_coding/codecs/av1/BUILD.gn
+++ b/modules/video_coding/codecs/av1/BUILD.gn
@@ -59,18 +59,22 @@
"libaom_av1_encoder.h",
]
deps = [
+ ":libaom_speed_config_factory",
"../..:video_codec_interface",
"../..:video_coding_utility",
"../../:frame_sampler",
"../../../../api:field_trials_view",
"../../../../api:scoped_refptr",
"../../../../api/environment",
+ "../../../../api/units:time_delta",
+ "../../../../api/units:timestamp",
"../../../../api/video:encoded_image",
"../../../../api/video:render_resolution",
"../../../../api/video:video_codec_constants",
"../../../../api/video:video_frame",
"../../../../api/video:video_frame_type",
"../../../../api/video:video_rtp_headers",
+ "../../../../api/video_codecs:encoder_speed_controller_factory",
"../../../../api/video_codecs:scalability_mode",
"../../../../api/video_codecs:video_codecs_api",
"../../../../common_video",
@@ -80,7 +84,9 @@
"../../../../rtc_base:logging",
"../../../../rtc_base:rtc_numerics",
"../../../../rtc_base/experiments:encoder_info_settings",
+ "../../../../rtc_base/experiments:encoder_speed_experiment",
"../../../../rtc_base/experiments:psnr_experiment",
+ "../../../../system_wrappers",
"../../svc:scalability_structures",
"../../svc:scalable_video_controller",
"//third_party/abseil-cpp/absl/algorithm:container",
@@ -92,6 +98,21 @@
]
}
+rtc_library("libaom_speed_config_factory") {
+ sources = [
+ "libaom_speed_config_factory.cc",
+ "libaom_speed_config_factory.h",
+ ]
+ deps = [
+ "../..:video_codec_interface",
+ "../..:video_coding_utility",
+ "../../../../api:field_trials_view",
+ "../../../../api/video_codecs:video_codecs_api",
+ "../../../../rtc_base:logging",
+ "../../../../rtc_base/experiments:psnr_experiment",
+ ]
+}
+
if (rtc_include_tests) {
rtc_library("video_coding_codecs_av1_tests") {
testonly = true
@@ -128,11 +149,16 @@
sources += [
"libaom_av1_encoder_unittest.cc",
"libaom_av1_unittest.cc",
+ "libaom_speed_config_factory_unittest.cc",
]
deps += [
":libaom_av1_encoder",
+ ":libaom_speed_config_factory",
"../..:encoded_video_frame_producer",
+ "../..:video_coding_utility",
"../../../../api:create_frame_generator",
+ "../../../../api:field_trials",
+ "../../../../api:field_trials_view",
"../../../../api:frame_generator_api",
"../../../../api:mock_video_encoder",
"../../../../api/units:data_size",
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
index 14a84f9..4c865ec 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@@ -16,7 +16,6 @@
#include <numeric>
#include <optional>
#include <utility>
-#include <variant>
#include <vector>
#include "absl/algorithm/container.h"
@@ -25,6 +24,8 @@
#include "api/environment/environment.h"
#include "api/field_trials_view.h"
#include "api/scoped_refptr.h"
+#include "api/units/time_delta.h"
+#include "api/units/timestamp.h"
#include "api/video/encoded_image.h"
#include "api/video/render_resolution.h"
#include "api/video/video_codec_constants.h"
@@ -34,11 +35,13 @@
#include "api/video/video_frame_buffer.h"
#include "api/video/video_frame_type.h"
#include "api/video/video_timing.h"
+#include "api/video_codecs/encoder_speed_controller.h"
#include "api/video_codecs/scalability_mode.h"
#include "api/video_codecs/video_codec.h"
#include "api/video_codecs/video_encoder.h"
#include "common_video/generic_frame_descriptor/generic_frame_info.h"
#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
+#include "modules/video_coding/codecs/av1/libaom_speed_config_factory.h"
#include "modules/video_coding/include/video_codec_interface.h"
#include "modules/video_coding/include/video_error_codes.h"
#include "modules/video_coding/svc/create_scalability_structure.h"
@@ -46,8 +49,10 @@
#include "modules/video_coding/utility/frame_sampler.h"
#include "rtc_base/checks.h"
#include "rtc_base/experiments/encoder_info_settings.h"
+#include "rtc_base/experiments/encoder_speed_experiment.h"
#include "rtc_base/experiments/psnr_experiment.h"
#include "rtc_base/logging.h"
+#include "system_wrappers/include/clock.h"
#include "third_party/libaom/source/libaom/aom/aom_codec.h"
#include "third_party/libaom/source/libaom/aom/aom_encoder.h"
#include "third_party/libaom/source/libaom/aom/aom_image.h"
@@ -94,6 +99,26 @@
encoded_image.rotation_ = frame.rotation();
encoded_image.SetColorSpace(frame.color_space());
}
+
+struct EncodeResult {
+ aom_codec_err_t status_code = AOM_CODEC_OK;
+ std::optional<EncodedImage> encoded_image;
+ TimeDelta encode_time = TimeDelta::Zero();
+};
+
+EncoderSpeedController::EncodeResults ToSpeedControllerEncodeResult(
+ const EncodeResult& encode_result,
+ const EncoderSpeedController::FrameEncodingInfo& frame_info,
+ int speed) {
+ RTC_DCHECK(encode_result.encoded_image.has_value());
+ const EncodedImage& image = *encode_result.encoded_image;
+ return EncoderSpeedController::EncodeResults{
+ .speed = speed,
+ .encode_time = encode_result.encode_time,
+ .qp = image.qp_ / 4, // Use [0, 63] range instead of [0, 255].
+ .frame_info = frame_info};
+}
+
class LibaomAv1Encoder final : public VideoEncoder {
public:
LibaomAv1Encoder(const Environment& env, LibaomAv1EncoderSettings settings);
@@ -131,7 +156,8 @@
// Configures the encoder with layer for the next frame.
void SetSvcLayerId(
const ScalableVideoController::LayerFrameConfig& layer_frame);
- // Configures the encoder which buffers next frame updates and can reference.
+ // Configures the encoder which buffers next frame updates and can
+ // reference.
void SetSvcRefFrameConfig(
const ScalableVideoController::LayerFrameConfig& layer_frame);
// If pixel format doesn't match, then reallocate.
@@ -143,7 +169,11 @@
// will be the input resolution.
void AdjustScalingFactorsForTopActiveLayer();
- using EncodeResult = std::variant<aom_codec_err_t, EncodedImage>;
+ EncoderSpeedController::ReferenceClass AsSpeedControllerFrameType(
+ const ScalableVideoController::LayerFrameConfig& layer_frame) const;
+
+ // Returns frame interval, compensated for relative pixel count allocation.
+ TimeDelta GetFrameInterval(int spatial_index) const;
// Duration is specified in ticks based on aom_codec_enc_cfg_t::g_timebase,
// in practice that that is kVideoPayloadTypeFrequency (90kHz).
@@ -186,6 +216,13 @@
FrameSampler psnr_frame_sampler_;
const bool drop_repeat_frames_on_enhancement_layers_;
std::map<int, uint32_t> last_encoded_timestamp_by_sid_;
+
+ const EncoderSpeedExperiment encoder_speed_experiment_;
+ // One speed controller per spatial layer.
+ std::vector<std::unique_ptr<webrtc::EncoderSpeedController>>
+ speed_controllers_;
+ // Don't use when setting input frame timestamps!
+ Clock* const realtime_clock_;
};
int32_t VerifyCodecSettings(const VideoCodec& codec_settings) {
@@ -231,7 +268,9 @@
psnr_experiment_(env.field_trials()),
psnr_frame_sampler_(psnr_experiment_.SamplingInterval()),
drop_repeat_frames_on_enhancement_layers_(env.field_trials().IsEnabled(
- "WebRTC-LibaomAv1Encoder-DropRepeatFramesOnEnhancementLayers")) {}
+ "WebRTC-LibaomAv1Encoder-DropRepeatFramesOnEnhancementLayers")),
+ encoder_speed_experiment_(env.field_trials()),
+ realtime_clock_(Clock::GetRealTimeClock()) {}
LibaomAv1Encoder::~LibaomAv1Encoder() {
Release();
@@ -339,6 +378,7 @@
SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TPL_MODEL, 0);
SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DELTAQ_MODE, 0);
SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ORDER_HINT, 0);
+ // AQ_MODE = 3 enables cyclic refresh.
SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_AQ_MODE, 3);
SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_MAX_INTRA_BITRATE_PCT, 300);
SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_COEFF_COST_UPD_FREQ, 3);
@@ -390,6 +430,31 @@
SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_POSTENCODE_DROP_RTC, 1);
}
+ if (encoder_speed_experiment_.IsDynamicSpeedEnabled()) {
+ LibaomSpeedConfigFactory speed_config_factory(
+ codec_settings->GetVideoEncoderComplexity(), codec_settings->mode);
+
+ if (SvcEnabled()) {
+ for (int si = 0; si < svc_params_->number_spatial_layers; ++si) {
+ EncoderSpeedController::Config speed_config =
+ speed_config_factory.GetSpeedConfig(
+ encoder_settings_.spatialLayers[si].width,
+ encoder_settings_.spatialLayers[si].height,
+ svc_controller_->StreamConfig().num_temporal_layers);
+
+ speed_controllers_.push_back(
+ EncoderSpeedController::Create(speed_config, GetFrameInterval(si)));
+ }
+ } else {
+ EncoderSpeedController::Config speed_config =
+ speed_config_factory.GetSpeedConfig(encoder_settings_.width,
+ encoder_settings_.height,
+ /*num_temporal_layers=*/1);
+ speed_controllers_.push_back(EncoderSpeedController::Create(
+ speed_config, GetFrameInterval(/*spatial_index=*/0)));
+ }
+ }
+
return WEBRTC_VIDEO_CODEC_OK;
}
@@ -408,6 +473,7 @@
// Only positive speeds, range for real-time coding currently is: 6 - 10.
// Speed 11 is used for screen sharing.
// Lower means slower/better quality, higher means fastest/lower quality.
+// Note: not used if dynamic speed controller is enabled.
int LibaomAv1Encoder::GetCpuSpeed(int width, int height) {
if (!settings_.max_pixel_count_to_cpu_speed.empty()) {
if (auto it =
@@ -652,6 +718,49 @@
}
}
+EncoderSpeedController::ReferenceClass
+LibaomAv1Encoder::AsSpeedControllerFrameType(
+ const ScalableVideoController::LayerFrameConfig& layer_frame) const {
+ if (layer_frame.IsKeyframe()) {
+ return EncoderSpeedController::ReferenceClass::kKey;
+ }
+
+ int tid = layer_frame.TemporalId();
+ if (tid == 0) {
+ return EncoderSpeedController::ReferenceClass::kMain;
+ } else if (svc_params_ && tid == svc_params_->number_temporal_layers - 1) {
+ return EncoderSpeedController::ReferenceClass::kNoneReference;
+ }
+ return EncoderSpeedController::ReferenceClass::kIntermediate;
+}
+
+TimeDelta LibaomAv1Encoder::GetFrameInterval(int spatial_index) const {
+ TimeDelta frame_interval =
+ TimeDelta::Seconds(1) /
+ (framerate_fps_ == 0 ? encoder_settings_.maxFramerate : framerate_fps_);
+
+ if (!SvcEnabled()) {
+ return frame_interval;
+ }
+
+ // Allocate a time slice for each spatial layer, proportional to the
+ // fraction of pixels allocated for that layer.
+ // E.g. if QVGA + VGA is used, 20% of the encoder time will be allocated
+ // for QVGA + 80% for VGA - since VGA has 4x the number of pixels.
+ int pixel_count_sum = 0;
+ for (int si = 0; si < svc_params_->number_spatial_layers; ++si) {
+ pixel_count_sum += encoder_settings_.spatialLayers[si].width *
+ encoder_settings_.spatialLayers[si].height;
+ }
+
+ double pixel_count_fraction =
+ static_cast<double>(
+ encoder_settings_.spatialLayers[spatial_index].width *
+ encoder_settings_.spatialLayers[spatial_index].height) /
+ pixel_count_sum;
+ return frame_interval * pixel_count_fraction;
+}
+
int32_t LibaomAv1Encoder::Encode(
const VideoFrame& frame,
const std::vector<VideoFrameType>* frame_types) {
@@ -792,7 +901,7 @@
svc_params_ ? svc_params_->number_spatial_layers : 1;
auto next_layer_frame = layer_frames.begin();
std::vector<std::pair<EncodedImage, CodecSpecificInfo>> encoded_images;
- for (size_t i = 0; i < num_spatial_layers; ++i) {
+ for (size_t sid = 0; sid < num_spatial_layers; ++sid) {
// The libaom AV1 encoder requires that `aom_codec_encode` is called for
// every spatial layer, even if the configured bitrate for that layer is
// zero. For zero bitrate spatial layers no frames will be produced.
@@ -800,18 +909,24 @@
non_encoded_layer_frame;
ScalableVideoController::LayerFrameConfig* layer_frame;
if (next_layer_frame != layer_frames.end() &&
- next_layer_frame->SpatialId() == static_cast<int>(i)) {
+ next_layer_frame->SpatialId() == static_cast<int>(sid)) {
layer_frame = &*next_layer_frame;
++next_layer_frame;
} else {
// For layers that are not encoded only the spatial id matters.
- non_encoded_layer_frame.emplace().S(i);
+ non_encoded_layer_frame.emplace().S(sid);
layer_frame = &*non_encoded_layer_frame;
}
const bool end_of_picture = (next_layer_frame == layer_frames.end());
aom_enc_frame_flags_t flags =
layer_frame->IsKeyframe() ? AOM_EFLAG_FORCE_KF : 0;
+
+ if (SvcEnabled()) {
+ SetSvcLayerId(*layer_frame);
+ SetSvcRefFrameConfig(*layer_frame);
+ }
+
#if defined(WEBRTC_ENCODER_PSNR_STATS) && defined(AOM_EFLAG_CALCULATE_PSNR)
if (psnr_experiment_.IsEnabled() &&
psnr_frame_sampler_.ShouldBeSampled(frame)) {
@@ -819,38 +934,73 @@
}
#endif
- if (SvcEnabled()) {
- SetSvcLayerId(*layer_frame);
- SetSvcRefFrameConfig(*layer_frame);
- }
+ if (!speed_controllers_.empty()) {
+ RTC_DCHECK_GT(speed_controllers_.size(), sid);
+ EncoderSpeedController& speed_controller = *speed_controllers_[sid];
- EncodeResult result = DoEncode(duration, flags, layer_frame);
- if (aom_codec_err_t* status = std::get_if<aom_codec_err_t>(&result);
- status != nullptr) {
- if (*status == AOM_CODEC_OK) {
- // AOM_CODEC_OK means success with no image, so do nothing.
- continue;
- } else {
- RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << status
- << " on aom_codec_encode.";
+ EncoderSpeedController::FrameEncodingInfo frame_info{
+ .reference_type = AsSpeedControllerFrameType(*layer_frame),
+ .is_repeat_frame = frame.is_repeat_frame()};
+ EncoderSpeedController::EncodeSettings settings =
+ speed_controller.GetEncodeSettings(frame_info);
+
+ SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_CPUUSED, settings.speed);
+ EncodeResult output = DoEncode(duration, flags, layer_frame);
+ if (output.status_code != AOM_CODEC_OK) {
+ RTC_LOG(LS_WARNING)
+ << "LibaomAv1Encoder::Encode returned error: '"
+ << aom_codec_err_to_string(output.status_code) << "'.";
return WEBRTC_VIDEO_CODEC_ERROR;
}
+
+ if (!output.encoded_image.has_value()) {
+ // Frame dropped, presumably by rate controller. This is not an error.
+ continue;
+ }
+
+ RTC_DCHECK(output.encoded_image.has_value());
+
+ speed_controller.OnEncodedFrame(
+ ToSpeedControllerEncodeResult(output, frame_info, settings.speed));
+
+ RTC_DCHECK_GT(output.encoded_image->size(), 0u);
+ PopulateEncodedImageFromVideoFrame(frame, *output.encoded_image);
+ CodecSpecificInfo codec_specifics = CreateCodecSpecificInfo(
+ *output.encoded_image, *layer_frame, end_of_picture);
+
+ if (non_encoded_layer_frame) {
+ continue;
+ }
+
+ encoded_images.emplace_back(std::move(*output.encoded_image),
+ std::move(codec_specifics));
+ } else {
+ // No speed controller used.
+ EncodeResult output = DoEncode(duration, flags, layer_frame);
+ if (output.status_code != AOM_CODEC_OK) {
+ RTC_LOG(LS_WARNING)
+ << "LibaomAv1Encoder::Encode returned error: '"
+ << aom_codec_err_to_string(output.status_code) << "'.";
+ return WEBRTC_VIDEO_CODEC_ERROR;
+ }
+ if (!output.encoded_image.has_value()) {
+ // Status code OK but no image - the encoder dropped the frame,
+ // presumable due to rate control. This is not an error.
+ continue;
+ }
+
+ if (non_encoded_layer_frame) {
+ continue;
+ }
+
+ RTC_DCHECK_GT(output.encoded_image->size(), 0u);
+ PopulateEncodedImageFromVideoFrame(frame, *output.encoded_image);
+ CodecSpecificInfo codec_specifics = CreateCodecSpecificInfo(
+ *output.encoded_image, *layer_frame, end_of_picture);
+
+ encoded_images.emplace_back(std::move(*output.encoded_image),
+ std::move(codec_specifics));
}
-
- if (non_encoded_layer_frame) {
- continue;
- }
-
- RTC_DCHECK(std::holds_alternative<EncodedImage>(result));
- EncodedImage encoded_image = std::get<EncodedImage>(std::move(result));
-
- RTC_DCHECK_GT(encoded_image.size(), 0u);
- PopulateEncodedImageFromVideoFrame(frame, encoded_image);
- CodecSpecificInfo codec_specifics =
- CreateCodecSpecificInfo(encoded_image, *layer_frame, end_of_picture);
-
- encoded_images.emplace_back(std::move(encoded_image),
- std::move(codec_specifics));
}
if (!encoded_images.empty()) {
@@ -867,23 +1017,29 @@
return WEBRTC_VIDEO_CODEC_OK;
}
-LibaomAv1Encoder::EncodeResult LibaomAv1Encoder::DoEncode(
+EncodeResult LibaomAv1Encoder::DoEncode(
uint32_t duration,
aom_enc_frame_flags_t flags,
ScalableVideoController::LayerFrameConfig* layer_frame) {
// Encode a frame. The presentation timestamp `pts` should not use real
// timestamps from frames or the wall clock, as that can cause the rate
// controller to misbehave.
- aom_codec_err_t ret =
+ EncodeResult output;
+
+ Timestamp start_time = realtime_clock_->CurrentTime();
+ output.status_code =
aom_codec_encode(&ctx_, frame_for_encode_, timestamp_, duration, flags);
- if (ret != AOM_CODEC_OK) {
- return ret;
+ output.encode_time = realtime_clock_->CurrentTime() - start_time;
+
+ if (output.status_code != AOM_CODEC_OK) {
+ return output;
}
// Get encoded image data.
aom_codec_iter_t iter = nullptr;
int data_pkt_count = 0;
- EncodedImage encoded_image;
+ output.encoded_image.emplace();
+ EncodedImage& encoded_image = *output.encoded_image;
const aom_codec_cx_pkt_t* pkt = nullptr;
while ((pkt = aom_codec_get_cx_data(&ctx_, &iter)) != nullptr) {
if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
@@ -891,7 +1047,8 @@
RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than "
"one data packet for an input video frame.";
Release();
- return AOM_CODEC_ERROR;
+ output.status_code = AOM_CODEC_ERROR;
+ return output;
}
encoded_image.SetEncodedData(EncodedImageBuffer::Create(
/*data=*/static_cast<const uint8_t*>(pkt->data.frame.buf),
@@ -924,7 +1081,8 @@
if (!SetEncoderControlParameters(AOME_GET_LAST_QUANTIZER,
&encoded_image.qp_)) {
RTC_LOG(LS_WARNING) << "Unable to fetch QP for frame.";
- return AOM_CODEC_ERROR;
+ output.status_code = AOM_CODEC_ERROR;
+ return output;
}
++data_pkt_count;
@@ -937,11 +1095,11 @@
}
if (encoded_image.size() == 0) {
- // Encode success, but no image produced.
- return AOM_CODEC_OK;
+ // Encode success, but no image produced. Frame as just dropped.
+ output.encoded_image.reset();
}
- return encoded_image;
+ return output;
}
CodecSpecificInfo LibaomAv1Encoder::CreateCodecSpecificInfo(
@@ -1019,6 +1177,9 @@
}
framerate_fps_ = parameters.framerate_fps;
+ for (size_t si = 0; si < speed_controllers_.size(); ++si) {
+ speed_controllers_[si]->SetFrameInterval(GetFrameInterval(si));
+ }
rates_configured_ = true;
}
diff --git a/modules/video_coding/codecs/av1/libaom_speed_config_factory.cc b/modules/video_coding/codecs/av1/libaom_speed_config_factory.cc
new file mode 100644
index 0000000..7194262
--- /dev/null
+++ b/modules/video_coding/codecs/av1/libaom_speed_config_factory.cc
@@ -0,0 +1,134 @@
+// Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#include "modules/video_coding/codecs/av1/libaom_speed_config_factory.h"
+
+#include <algorithm>
+#include <optional>
+
+#include "api/video_codecs/encoder_speed_controller.h"
+#include "api/video_codecs/video_codec.h"
+
+namespace webrtc {
+
+namespace {
+
+constexpr int kNumLevels = 15;
+EncoderSpeedController::Config::SpeedLevel kAllLevels[kNumLevels] = {
+ {.speeds = {5, 5, 6, 6}, .min_qp = 31},
+ {.speeds = {5, 6, 7, 7}, .min_qp = 30},
+ {.speeds = {5, 6, 8, 10}, .min_qp = 30},
+ {.speeds = {5, 6, 9, 11}, .min_qp = 29},
+ {.speeds = {5, 7, 7, 7}, .min_qp = 29},
+ {.speeds = {7, 7, 8, 8}, .min_qp = 28},
+ {.speeds = {7, 7, 8, 9}, .min_qp = 28},
+ {.speeds = {7, 7, 10, 10}, .min_qp = 28},
+ {.speeds = {7, 7, 10, 11}, .min_qp = 27},
+ {.speeds = {7, 7, 11, 11}, .min_qp = 26},
+ {.speeds = {7, 8, 9, 9}, .min_qp = 26},
+ {.speeds = {7, 9, 9, 11}, .min_qp = 25},
+ {.speeds = {8, 9, 10, 11}, .min_qp = 25},
+ {.speeds = {9, 10, 11, 11}, .min_qp = std::nullopt},
+ {.speeds = {10, 11, 11, 11}, .min_qp = std::nullopt}};
+
+bool HasSameSpeeds(const EncoderSpeedController::Config::SpeedLevel& a,
+ const EncoderSpeedController::Config::SpeedLevel& b,
+ int num_temporal_layers) {
+ if (a.speeds[0] != b.speeds[0] || a.speeds[1] != b.speeds[1]) {
+ // Keyframe or base layer speed differs.
+ return false;
+ }
+ if (num_temporal_layers > 1 && a.speeds[3] != b.speeds[3]) {
+ // Upper (non-reference) layer speed differs.
+ return false;
+ }
+ // Middle temporal layer (intermedia class).
+ return a.speeds[2] == b.speeds[2];
+}
+
+void AddSpeedLevels(int num_levels,
+ int num_temporal_layers,
+ EncoderSpeedController::Config& config) {
+ // Add up to `num_levels` speeds - but ignore levels that have identical
+ // speeds when `num_temporal_layers` is used (e.g. same base-layer speed for
+ // single-layer).
+ config.speed_levels.reserve(num_levels);
+ for (int i = kNumLevels - 1; i >= kNumLevels - num_levels; --i) {
+ if (i == kNumLevels - 1 ||
+ !HasSameSpeeds(kAllLevels[i], config.speed_levels.back(),
+ num_temporal_layers)) {
+ config.speed_levels.push_back(kAllLevels[i]);
+ }
+ }
+
+ std::reverse(config.speed_levels.begin(), config.speed_levels.end());
+}
+
+} // namespace
+
+LibaomSpeedConfigFactory::LibaomSpeedConfigFactory(
+ VideoCodecComplexity complexity,
+ VideoCodecMode mode)
+ : complexity_(complexity), mode_(mode) {}
+
+EncoderSpeedController::Config LibaomSpeedConfigFactory::GetSpeedConfig(
+ int width,
+ int height,
+ int num_temporal_layers) const {
+ EncoderSpeedController::Config config;
+ int num_levels = 0;
+ switch (complexity_) {
+ case VideoCodecComplexity::kComplexityLow:
+ // Level 9x10x11x11 and up.
+ num_levels = 2;
+ break;
+ case VideoCodecComplexity::kComplexityNormal:
+ // Level 8x9x10x11 and up.
+ num_levels = 3;
+ break;
+ case VideoCodecComplexity::kComplexityHigh:
+ // Level 7x7x10x10 and up.
+ num_levels = 8;
+ break;
+ case VideoCodecComplexity::kComplexityHigher:
+ // Level 5x6x8x10 and up (< 720p, 5x7x7x7 otherwise)
+ if (width * height < 1280 * 720) { // Corrected condition
+ num_levels = 12;
+ } else {
+ num_levels = 10;
+ }
+ break;
+ case VideoCodecComplexity::kComplexityMax:
+ // All levels.
+ num_levels = kNumLevels;
+ break;
+ }
+
+ if (mode_ == VideoCodecMode::kScreensharing) {
+ num_levels = std::max(1, num_levels - 1);
+ }
+
+ AddSpeedLevels(num_levels, num_temporal_layers, config);
+
+ // Don't cap speed based on resolution - only adjust the start value.
+ const int num_pixels = width * height;
+ const int available_speed_levels = config.speed_levels.size();
+ if (num_pixels > 1920 * 1080) {
+ config.start_speed_index = std::max(available_speed_levels - 4, 0);
+ } else if (num_pixels > 1280 * 720) {
+ config.start_speed_index = std::max(available_speed_levels - 3, 0);
+ } else if (num_pixels > 640 * 360) {
+ config.start_speed_index = std::max(available_speed_levels - 2, 0);
+ } else {
+ config.start_speed_index = std::max(available_speed_levels - 1, 0);
+ }
+
+ return config;
+}
+
+} // namespace webrtc
diff --git a/modules/video_coding/codecs/av1/libaom_speed_config_factory.h b/modules/video_coding/codecs/av1/libaom_speed_config_factory.h
new file mode 100644
index 0000000..6631da7
--- /dev/null
+++ b/modules/video_coding/codecs/av1/libaom_speed_config_factory.h
@@ -0,0 +1,33 @@
+// Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_SPEED_CONFIG_FACTORY_H_
+#define MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_SPEED_CONFIG_FACTORY_H_
+
+#include "api/video_codecs/encoder_speed_controller.h"
+#include "api/video_codecs/video_codec.h"
+
+namespace webrtc {
+
+class LibaomSpeedConfigFactory {
+ public:
+ LibaomSpeedConfigFactory(VideoCodecComplexity complexity,
+ VideoCodecMode mode);
+
+ EncoderSpeedController::Config GetSpeedConfig(int width,
+ int height,
+ int num_temporal_layers) const;
+
+ private:
+ const VideoCodecComplexity complexity_;
+ const VideoCodecMode mode_;
+};
+
+} // namespace webrtc
+
+#endif // MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_SPEED_CONFIG_FACTORY_H_
diff --git a/modules/video_coding/codecs/av1/libaom_speed_config_factory_unittest.cc b/modules/video_coding/codecs/av1/libaom_speed_config_factory_unittest.cc
new file mode 100644
index 0000000..ae4305e
--- /dev/null
+++ b/modules/video_coding/codecs/av1/libaom_speed_config_factory_unittest.cc
@@ -0,0 +1,173 @@
+// Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#include "modules/video_coding/codecs/av1/libaom_speed_config_factory.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <set>
+
+#include "api/video_codecs/encoder_speed_controller.h"
+#include "api/video_codecs/video_codec.h"
+#include "rtc_base/checks.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+
+// Helper to allow SpeedLevel to be used in a set.
+bool operator<(const EncoderSpeedController::Config::SpeedLevel& lhs,
+ const EncoderSpeedController::Config::SpeedLevel& rhs) {
+ if (lhs.speeds != rhs.speeds) {
+ return lhs.speeds < rhs.speeds;
+ }
+ return lhs.min_qp < rhs.min_qp;
+}
+
+namespace {
+
+using ::testing::Values;
+
+// Test that the number of speed levels increases with complexity.
+TEST(LibaomSpeedConfigFactoryTest, NumLevelsIncreaseWithComplexity) {
+ LibaomSpeedConfigFactory factory_low(VideoCodecComplexity::kComplexityLow,
+ VideoCodecMode::kRealtimeVideo);
+ EncoderSpeedController::Config config_low =
+ factory_low.GetSpeedConfig(640, 360, 3);
+
+ LibaomSpeedConfigFactory factory_normal(
+ VideoCodecComplexity::kComplexityNormal, VideoCodecMode::kRealtimeVideo);
+ EncoderSpeedController::Config config_normal =
+ factory_normal.GetSpeedConfig(640, 360, 3);
+
+ LibaomSpeedConfigFactory factory_high(VideoCodecComplexity::kComplexityHigh,
+ VideoCodecMode::kRealtimeVideo);
+ EncoderSpeedController::Config config_high =
+ factory_high.GetSpeedConfig(640, 360, 3);
+
+ LibaomSpeedConfigFactory factory_higher(
+ VideoCodecComplexity::kComplexityHigher, VideoCodecMode::kRealtimeVideo);
+ EncoderSpeedController::Config config_higher =
+ factory_higher.GetSpeedConfig(640, 360, 3);
+
+ LibaomSpeedConfigFactory factory_max(VideoCodecComplexity::kComplexityMax,
+ VideoCodecMode::kRealtimeVideo);
+ EncoderSpeedController::Config config_max =
+ factory_max.GetSpeedConfig(640, 360, 3);
+
+ EXPECT_GE(config_normal.speed_levels.size(), config_low.speed_levels.size());
+ EXPECT_GE(config_high.speed_levels.size(), config_normal.speed_levels.size());
+ EXPECT_GE(config_higher.speed_levels.size(), config_high.speed_levels.size());
+ EXPECT_GE(config_max.speed_levels.size(), config_higher.speed_levels.size());
+}
+
+// Test that speeds within each level are monotonic.
+TEST(LibaomSpeedConfigFactoryTest, SpeedsAreMonotonic) {
+ LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax,
+ VideoCodecMode::kRealtimeVideo);
+ EncoderSpeedController::Config config = factory.GetSpeedConfig(1280, 720, 3);
+
+ for (const auto& level : config.speed_levels) {
+ // Lower reference class index means more important, so speed should be
+ // lower or equal.
+ EXPECT_LE(level.speeds[static_cast<int>(
+ EncoderSpeedController::ReferenceClass::kKey)],
+ level.speeds[static_cast<int>(
+ EncoderSpeedController::ReferenceClass::kMain)]);
+ EXPECT_LE(level.speeds[static_cast<int>(
+ EncoderSpeedController::ReferenceClass::kMain)],
+ level.speeds[static_cast<int>(
+ EncoderSpeedController::ReferenceClass::kIntermediate)]);
+ EXPECT_LE(level.speeds[static_cast<int>(
+ EncoderSpeedController::ReferenceClass::kIntermediate)],
+ level.speeds[static_cast<int>(
+ EncoderSpeedController::ReferenceClass::kNoneReference)]);
+ }
+}
+
+// Test that keyframe and base layer speeds between levels are monotonic.
+TEST(LibaomSpeedConfigFactoryTest, KeyAndMainSpeedsIncreaseBetweenLevels) {
+ LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax,
+ VideoCodecMode::kRealtimeVideo);
+ EncoderSpeedController::Config config = factory.GetSpeedConfig(1280, 720, 3);
+
+ for (size_t i = 0; i < config.speed_levels.size() - 1; ++i) {
+ const auto& current_level = config.speed_levels[i];
+ const auto& next_level = config.speed_levels[i + 1];
+ EXPECT_LE(current_level.speeds[static_cast<int>(
+ EncoderSpeedController::ReferenceClass::kKey)],
+ next_level.speeds[static_cast<int>(
+ EncoderSpeedController::ReferenceClass::kKey)]);
+ EXPECT_LE(current_level.speeds[static_cast<int>(
+ EncoderSpeedController::ReferenceClass::kMain)],
+ next_level.speeds[static_cast<int>(
+ EncoderSpeedController::ReferenceClass::kMain)]);
+ }
+}
+
+struct ResolutionParams {
+ int width;
+ int height;
+ int expected_start_index_offset; // Offset from the last index
+};
+
+class LibaomSpeedConfigFactoryResolutionTest
+ : public ::testing::TestWithParam<ResolutionParams> {};
+
+INSTANTIATE_TEST_SUITE_P(All,
+ LibaomSpeedConfigFactoryResolutionTest,
+ Values(ResolutionParams{320, 180, 1},
+ ResolutionParams{640, 360, 1},
+ ResolutionParams{1280, 720, 2},
+ ResolutionParams{1920, 1080, 3},
+ ResolutionParams{2560, 1440, 4}));
+
+TEST_P(LibaomSpeedConfigFactoryResolutionTest, GetSpeedConfigStartSpeedIndex) {
+ const ResolutionParams& params = GetParam();
+ LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax,
+ VideoCodecMode::kRealtimeVideo);
+ EncoderSpeedController::Config config =
+ factory.GetSpeedConfig(params.width, params.height, 3);
+ int expected_index =
+ std::max(0, static_cast<int>(config.speed_levels.size()) -
+ params.expected_start_index_offset);
+ EXPECT_EQ(config.start_speed_index, expected_index);
+}
+
+void CheckDistinctConfigs(const LibaomSpeedConfigFactory& factory,
+ int num_temporal_layers) {
+ RTC_DCHECK_GT(num_temporal_layers, 0);
+ RTC_DCHECK_LE(num_temporal_layers, 3);
+
+ EncoderSpeedController::Config config =
+ factory.GetSpeedConfig(640, 360, num_temporal_layers);
+
+ std::set<EncoderSpeedController::Config::SpeedLevel> unique_configs(
+ config.speed_levels.begin(), config.speed_levels.end());
+ EXPECT_EQ(unique_configs.size(), config.speed_levels.size());
+}
+
+TEST(LibaomSpeedConfigFactoryTest, DistinctConfigs1Tl) {
+ LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax,
+ VideoCodecMode::kRealtimeVideo);
+ CheckDistinctConfigs(factory, 1);
+}
+
+TEST(LibaomSpeedConfigFactoryTest, DistinctConfigs2Tl) {
+ LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax,
+ VideoCodecMode::kRealtimeVideo);
+ CheckDistinctConfigs(factory, 2);
+}
+
+TEST(LibaomSpeedConfigFactoryTest, DistinctConfigs3Tl) {
+ LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax,
+ VideoCodecMode::kRealtimeVideo);
+ CheckDistinctConfigs(factory, 3);
+}
+
+} // namespace
+} // namespace webrtc
diff --git a/modules/video_coding/utility/encoder_speed_controller_impl.cc b/modules/video_coding/utility/encoder_speed_controller_impl.cc
index 299a269..987f0ad 100644
--- a/modules/video_coding/utility/encoder_speed_controller_impl.cc
+++ b/modules/video_coding/utility/encoder_speed_controller_impl.cc
@@ -151,8 +151,7 @@
}
void EncoderSpeedControllerImpl::OnEncodedFrame(
- EncoderSpeedController::EncodeResults results,
- std::optional<EncodeResults> baseline_results) {
+ EncoderSpeedController::EncodeResults results) {
double encode_tims_ms = results.encode_time.us() / 1000.0;
if (results.frame_info.reference_type == ReferenceClass::kKey) {
encode_tims_ms /= kKeyframeEncodeTimeCompensator;
diff --git a/modules/video_coding/utility/encoder_speed_controller_impl.h b/modules/video_coding/utility/encoder_speed_controller_impl.h
index e84881e..9c7cbba8 100644
--- a/modules/video_coding/utility/encoder_speed_controller_impl.h
+++ b/modules/video_coding/utility/encoder_speed_controller_impl.h
@@ -12,7 +12,6 @@
#define MODULES_VIDEO_CODING_UTILITY_ENCODER_SPEED_CONTROLLER_IMPL_H_
#include <memory>
-#include <optional>
#include "api/units/time_delta.h"
#include "api/video_codecs/encoder_speed_controller.h"
@@ -41,11 +40,8 @@
// thereafter be configured with requested settings.
EncodeSettings GetEncodeSettings(FrameEncodingInfo frame_info) override;
- // Should be called after each frame has completed encoding. If a baseline
- // comparison speed was set in the `EncodeSettings`, the `baseline_results`
- // parameter should be set with the results corresponding to those settings.
- void OnEncodedFrame(EncodeResults results,
- std::optional<EncodeResults> baseline_results) override;
+ // Should be called after each frame has completed encoding.
+ void OnEncodedFrame(EncodeResults results) override;
const Config& config() const { return config_; }
diff --git a/modules/video_coding/utility/encoder_speed_controller_impl_unittest.cc b/modules/video_coding/utility/encoder_speed_controller_impl_unittest.cc
index 8b2bdeb..a887961 100644
--- a/modules/video_coding/utility/encoder_speed_controller_impl_unittest.cc
+++ b/modules/video_coding/utility/encoder_speed_controller_impl_unittest.cc
@@ -11,7 +11,6 @@
#include <optional>
#include "api/units/time_delta.h"
-#include "api/units/timestamp.h"
#include "api/video_codecs/encoder_speed_controller.h"
#include "test/gmock.h"
#include "test/gtest.h"
@@ -61,6 +60,7 @@
TEST(EncoderSpeedControllerTest, GetEncodeSettingsBaseLayers) {
EncoderSpeedController::Config config = GetDefaultConfig();
+ config.speed_levels[0].min_qp = 25; // Prevent dropping to speed 5 easily
auto controller = EncoderSpeedController::Create(config, kFrameInterval);
ASSERT_NE(controller, nullptr);
@@ -72,22 +72,18 @@
// Simulate high encode time to increase speed
for (int i = 0; i < 10; ++i) {
- controller->OnEncodedFrame({.speed = 6,
- .encode_time = kFrameInterval * 2,
+ controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.90,
.qp = 30,
- .frame_info = frame_info},
- std::nullopt);
+ .frame_info = frame_info});
}
// Speed should increase to 7
EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, 7);
// Simulate low encode time to decrease speed
for (int i = 0; i < 20; ++i) {
- controller->OnEncodedFrame({.speed = 7,
- .encode_time = kFrameInterval / 10,
- .qp = 30,
- .frame_info = frame_info},
- std::nullopt);
+ controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.10,
+ .qp = 20,
+ .frame_info = frame_info});
}
// Speed should decrease to 6
EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, 6);
@@ -141,12 +137,9 @@
.reference_type = ReferenceClass::kMain};
for (int i = 0; i < 20; ++i) {
- controller->OnEncodedFrame(
- {.speed = 7,
- .encode_time = kFrameInterval * 2, // High encode time
- .qp = 30,
- .frame_info = frame_info},
- std::nullopt);
+ controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.95,
+ .qp = 30,
+ .frame_info = frame_info});
}
EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed,
@@ -163,12 +156,7 @@
.reference_type = ReferenceClass::kMain};
for (int i = 0; i < 20; ++i) {
- controller->OnEncodedFrame(
- {.speed = 5,
- .encode_time = kFrameInterval / 10, // Low encode time
- .qp = 30,
- .frame_info = frame_info},
- std::nullopt);
+ controller->OnEncodedFrame({.speed = 5, .frame_info = frame_info});
}
EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed,
@@ -189,11 +177,9 @@
// Simulate low QP, normal encode time
for (int i = 0; i < 20; ++i) {
- controller->OnEncodedFrame({.speed = 6,
- .encode_time = kFrameInterval * 0.6,
+ controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.60,
.qp = 10,
- .frame_info = frame_info},
- std::nullopt);
+ .frame_info = frame_info});
}
// Speed should increase to 7 due to low QP
EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, 7);
@@ -213,11 +199,9 @@
// Simulate low encode time but also low QP
for (int i = 0; i < 20; ++i) {
- controller->OnEncodedFrame({.speed = 6,
- .encode_time = kFrameInterval / 10,
+ controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.10,
.qp = 10,
- .frame_info = frame_info},
- std::nullopt);
+ .frame_info = frame_info});
}
// Speed should NOT decrease to 5 because QP is below the next level's min_qp
EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, 6);