Add libom AV1 integration of dynamic speed controller. This CL creates a config factory for the libaom based av1 encoder and integrates the use of the speed controller in the encoder wrapper. A (for now) unused parameter in the API was removed. Bug: webrtc:443906251 Change-Id: I3c4522ab71446e34d4017b558d961803f26b2e38 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/422800 Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Commit-Queue: Erik Språng <sprang@webrtc.org> Cr-Commit-Position: refs/heads/main@{#46123}
diff --git a/api/video_codecs/encoder_speed_controller.h b/api/video_codecs/encoder_speed_controller.h index 69bcd87..87ceaef 100644 --- a/api/video_codecs/encoder_speed_controller.h +++ b/api/video_codecs/encoder_speed_controller.h
@@ -109,12 +109,8 @@ // thereafter be configured with requested settings. virtual EncodeSettings GetEncodeSettings(FrameEncodingInfo frame_info) = 0; - // Should be called after each frame has completed encoding. If a baseline - // comparison speed was set in the `EncodeSettings`, the `baseline_results` - // parameter should be set with the results corresponding to those settings. - virtual void OnEncodedFrame( - EncodeResults results, - std::optional<EncodeResults> baseline_results) = 0; + // Should be called after each frame has completed encoding. + virtual void OnEncodedFrame(EncodeResults results) = 0; }; } // namespace webrtc
diff --git a/modules/video_coding/codecs/av1/BUILD.gn b/modules/video_coding/codecs/av1/BUILD.gn index ed40a7b..8b635ef 100644 --- a/modules/video_coding/codecs/av1/BUILD.gn +++ b/modules/video_coding/codecs/av1/BUILD.gn
@@ -59,18 +59,22 @@ "libaom_av1_encoder.h", ] deps = [ + ":libaom_speed_config_factory", "../..:video_codec_interface", "../..:video_coding_utility", "../../:frame_sampler", "../../../../api:field_trials_view", "../../../../api:scoped_refptr", "../../../../api/environment", + "../../../../api/units:time_delta", + "../../../../api/units:timestamp", "../../../../api/video:encoded_image", "../../../../api/video:render_resolution", "../../../../api/video:video_codec_constants", "../../../../api/video:video_frame", "../../../../api/video:video_frame_type", "../../../../api/video:video_rtp_headers", + "../../../../api/video_codecs:encoder_speed_controller_factory", "../../../../api/video_codecs:scalability_mode", "../../../../api/video_codecs:video_codecs_api", "../../../../common_video", @@ -80,7 +84,9 @@ "../../../../rtc_base:logging", "../../../../rtc_base:rtc_numerics", "../../../../rtc_base/experiments:encoder_info_settings", + "../../../../rtc_base/experiments:encoder_speed_experiment", "../../../../rtc_base/experiments:psnr_experiment", + "../../../../system_wrappers", "../../svc:scalability_structures", "../../svc:scalable_video_controller", "//third_party/abseil-cpp/absl/algorithm:container", @@ -92,6 +98,21 @@ ] } +rtc_library("libaom_speed_config_factory") { + sources = [ + "libaom_speed_config_factory.cc", + "libaom_speed_config_factory.h", + ] + deps = [ + "../..:video_codec_interface", + "../..:video_coding_utility", + "../../../../api:field_trials_view", + "../../../../api/video_codecs:video_codecs_api", + "../../../../rtc_base:logging", + "../../../../rtc_base/experiments:psnr_experiment", + ] +} + if (rtc_include_tests) { rtc_library("video_coding_codecs_av1_tests") { testonly = true @@ -128,11 +149,16 @@ sources += [ "libaom_av1_encoder_unittest.cc", "libaom_av1_unittest.cc", + "libaom_speed_config_factory_unittest.cc", ] deps += [ ":libaom_av1_encoder", + ":libaom_speed_config_factory", "../..:encoded_video_frame_producer", + "../..:video_coding_utility", "../../../../api:create_frame_generator", + "../../../../api:field_trials", + "../../../../api:field_trials_view", "../../../../api:frame_generator_api", "../../../../api:mock_video_encoder", "../../../../api/units:data_size",
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc index 14a84f9..4c865ec 100644 --- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc +++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@@ -16,7 +16,6 @@ #include <numeric> #include <optional> #include <utility> -#include <variant> #include <vector> #include "absl/algorithm/container.h" @@ -25,6 +24,8 @@ #include "api/environment/environment.h" #include "api/field_trials_view.h" #include "api/scoped_refptr.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" #include "api/video/encoded_image.h" #include "api/video/render_resolution.h" #include "api/video/video_codec_constants.h" @@ -34,11 +35,13 @@ #include "api/video/video_frame_buffer.h" #include "api/video/video_frame_type.h" #include "api/video/video_timing.h" +#include "api/video_codecs/encoder_speed_controller.h" #include "api/video_codecs/scalability_mode.h" #include "api/video_codecs/video_codec.h" #include "api/video_codecs/video_encoder.h" #include "common_video/generic_frame_descriptor/generic_frame_info.h" #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "modules/video_coding/codecs/av1/libaom_speed_config_factory.h" #include "modules/video_coding/include/video_codec_interface.h" #include "modules/video_coding/include/video_error_codes.h" #include "modules/video_coding/svc/create_scalability_structure.h" @@ -46,8 +49,10 @@ #include "modules/video_coding/utility/frame_sampler.h" #include "rtc_base/checks.h" #include "rtc_base/experiments/encoder_info_settings.h" +#include "rtc_base/experiments/encoder_speed_experiment.h" #include "rtc_base/experiments/psnr_experiment.h" #include "rtc_base/logging.h" +#include "system_wrappers/include/clock.h" #include "third_party/libaom/source/libaom/aom/aom_codec.h" #include "third_party/libaom/source/libaom/aom/aom_encoder.h" #include "third_party/libaom/source/libaom/aom/aom_image.h" @@ -94,6 +99,26 @@ encoded_image.rotation_ = frame.rotation(); encoded_image.SetColorSpace(frame.color_space()); } + +struct EncodeResult { + aom_codec_err_t status_code = AOM_CODEC_OK; + std::optional<EncodedImage> encoded_image; + TimeDelta encode_time = TimeDelta::Zero(); +}; + +EncoderSpeedController::EncodeResults ToSpeedControllerEncodeResult( + const EncodeResult& encode_result, + const EncoderSpeedController::FrameEncodingInfo& frame_info, + int speed) { + RTC_DCHECK(encode_result.encoded_image.has_value()); + const EncodedImage& image = *encode_result.encoded_image; + return EncoderSpeedController::EncodeResults{ + .speed = speed, + .encode_time = encode_result.encode_time, + .qp = image.qp_ / 4, // Use [0, 63] range instead of [0, 255]. + .frame_info = frame_info}; +} + class LibaomAv1Encoder final : public VideoEncoder { public: LibaomAv1Encoder(const Environment& env, LibaomAv1EncoderSettings settings); @@ -131,7 +156,8 @@ // Configures the encoder with layer for the next frame. void SetSvcLayerId( const ScalableVideoController::LayerFrameConfig& layer_frame); - // Configures the encoder which buffers next frame updates and can reference. + // Configures the encoder which buffers next frame updates and can + // reference. void SetSvcRefFrameConfig( const ScalableVideoController::LayerFrameConfig& layer_frame); // If pixel format doesn't match, then reallocate. @@ -143,7 +169,11 @@ // will be the input resolution. void AdjustScalingFactorsForTopActiveLayer(); - using EncodeResult = std::variant<aom_codec_err_t, EncodedImage>; + EncoderSpeedController::ReferenceClass AsSpeedControllerFrameType( + const ScalableVideoController::LayerFrameConfig& layer_frame) const; + + // Returns frame interval, compensated for relative pixel count allocation. + TimeDelta GetFrameInterval(int spatial_index) const; // Duration is specified in ticks based on aom_codec_enc_cfg_t::g_timebase, // in practice that that is kVideoPayloadTypeFrequency (90kHz). @@ -186,6 +216,13 @@ FrameSampler psnr_frame_sampler_; const bool drop_repeat_frames_on_enhancement_layers_; std::map<int, uint32_t> last_encoded_timestamp_by_sid_; + + const EncoderSpeedExperiment encoder_speed_experiment_; + // One speed controller per spatial layer. + std::vector<std::unique_ptr<webrtc::EncoderSpeedController>> + speed_controllers_; + // Don't use when setting input frame timestamps! + Clock* const realtime_clock_; }; int32_t VerifyCodecSettings(const VideoCodec& codec_settings) { @@ -231,7 +268,9 @@ psnr_experiment_(env.field_trials()), psnr_frame_sampler_(psnr_experiment_.SamplingInterval()), drop_repeat_frames_on_enhancement_layers_(env.field_trials().IsEnabled( - "WebRTC-LibaomAv1Encoder-DropRepeatFramesOnEnhancementLayers")) {} + "WebRTC-LibaomAv1Encoder-DropRepeatFramesOnEnhancementLayers")), + encoder_speed_experiment_(env.field_trials()), + realtime_clock_(Clock::GetRealTimeClock()) {} LibaomAv1Encoder::~LibaomAv1Encoder() { Release(); @@ -339,6 +378,7 @@ SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TPL_MODEL, 0); SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DELTAQ_MODE, 0); SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ORDER_HINT, 0); + // AQ_MODE = 3 enables cyclic refresh. SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_AQ_MODE, 3); SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_MAX_INTRA_BITRATE_PCT, 300); SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_COEFF_COST_UPD_FREQ, 3); @@ -390,6 +430,31 @@ SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_POSTENCODE_DROP_RTC, 1); } + if (encoder_speed_experiment_.IsDynamicSpeedEnabled()) { + LibaomSpeedConfigFactory speed_config_factory( + codec_settings->GetVideoEncoderComplexity(), codec_settings->mode); + + if (SvcEnabled()) { + for (int si = 0; si < svc_params_->number_spatial_layers; ++si) { + EncoderSpeedController::Config speed_config = + speed_config_factory.GetSpeedConfig( + encoder_settings_.spatialLayers[si].width, + encoder_settings_.spatialLayers[si].height, + svc_controller_->StreamConfig().num_temporal_layers); + + speed_controllers_.push_back( + EncoderSpeedController::Create(speed_config, GetFrameInterval(si))); + } + } else { + EncoderSpeedController::Config speed_config = + speed_config_factory.GetSpeedConfig(encoder_settings_.width, + encoder_settings_.height, + /*num_temporal_layers=*/1); + speed_controllers_.push_back(EncoderSpeedController::Create( + speed_config, GetFrameInterval(/*spatial_index=*/0))); + } + } + return WEBRTC_VIDEO_CODEC_OK; } @@ -408,6 +473,7 @@ // Only positive speeds, range for real-time coding currently is: 6 - 10. // Speed 11 is used for screen sharing. // Lower means slower/better quality, higher means fastest/lower quality. +// Note: not used if dynamic speed controller is enabled. int LibaomAv1Encoder::GetCpuSpeed(int width, int height) { if (!settings_.max_pixel_count_to_cpu_speed.empty()) { if (auto it = @@ -652,6 +718,49 @@ } } +EncoderSpeedController::ReferenceClass +LibaomAv1Encoder::AsSpeedControllerFrameType( + const ScalableVideoController::LayerFrameConfig& layer_frame) const { + if (layer_frame.IsKeyframe()) { + return EncoderSpeedController::ReferenceClass::kKey; + } + + int tid = layer_frame.TemporalId(); + if (tid == 0) { + return EncoderSpeedController::ReferenceClass::kMain; + } else if (svc_params_ && tid == svc_params_->number_temporal_layers - 1) { + return EncoderSpeedController::ReferenceClass::kNoneReference; + } + return EncoderSpeedController::ReferenceClass::kIntermediate; +} + +TimeDelta LibaomAv1Encoder::GetFrameInterval(int spatial_index) const { + TimeDelta frame_interval = + TimeDelta::Seconds(1) / + (framerate_fps_ == 0 ? encoder_settings_.maxFramerate : framerate_fps_); + + if (!SvcEnabled()) { + return frame_interval; + } + + // Allocate a time slice for each spatial layer, proportional to the + // fraction of pixels allocated for that layer. + // E.g. if QVGA + VGA is used, 20% of the encoder time will be allocated + // for QVGA + 80% for VGA - since VGA has 4x the number of pixels. + int pixel_count_sum = 0; + for (int si = 0; si < svc_params_->number_spatial_layers; ++si) { + pixel_count_sum += encoder_settings_.spatialLayers[si].width * + encoder_settings_.spatialLayers[si].height; + } + + double pixel_count_fraction = + static_cast<double>( + encoder_settings_.spatialLayers[spatial_index].width * + encoder_settings_.spatialLayers[spatial_index].height) / + pixel_count_sum; + return frame_interval * pixel_count_fraction; +} + int32_t LibaomAv1Encoder::Encode( const VideoFrame& frame, const std::vector<VideoFrameType>* frame_types) { @@ -792,7 +901,7 @@ svc_params_ ? svc_params_->number_spatial_layers : 1; auto next_layer_frame = layer_frames.begin(); std::vector<std::pair<EncodedImage, CodecSpecificInfo>> encoded_images; - for (size_t i = 0; i < num_spatial_layers; ++i) { + for (size_t sid = 0; sid < num_spatial_layers; ++sid) { // The libaom AV1 encoder requires that `aom_codec_encode` is called for // every spatial layer, even if the configured bitrate for that layer is // zero. For zero bitrate spatial layers no frames will be produced. @@ -800,18 +909,24 @@ non_encoded_layer_frame; ScalableVideoController::LayerFrameConfig* layer_frame; if (next_layer_frame != layer_frames.end() && - next_layer_frame->SpatialId() == static_cast<int>(i)) { + next_layer_frame->SpatialId() == static_cast<int>(sid)) { layer_frame = &*next_layer_frame; ++next_layer_frame; } else { // For layers that are not encoded only the spatial id matters. - non_encoded_layer_frame.emplace().S(i); + non_encoded_layer_frame.emplace().S(sid); layer_frame = &*non_encoded_layer_frame; } const bool end_of_picture = (next_layer_frame == layer_frames.end()); aom_enc_frame_flags_t flags = layer_frame->IsKeyframe() ? AOM_EFLAG_FORCE_KF : 0; + + if (SvcEnabled()) { + SetSvcLayerId(*layer_frame); + SetSvcRefFrameConfig(*layer_frame); + } + #if defined(WEBRTC_ENCODER_PSNR_STATS) && defined(AOM_EFLAG_CALCULATE_PSNR) if (psnr_experiment_.IsEnabled() && psnr_frame_sampler_.ShouldBeSampled(frame)) { @@ -819,38 +934,73 @@ } #endif - if (SvcEnabled()) { - SetSvcLayerId(*layer_frame); - SetSvcRefFrameConfig(*layer_frame); - } + if (!speed_controllers_.empty()) { + RTC_DCHECK_GT(speed_controllers_.size(), sid); + EncoderSpeedController& speed_controller = *speed_controllers_[sid]; - EncodeResult result = DoEncode(duration, flags, layer_frame); - if (aom_codec_err_t* status = std::get_if<aom_codec_err_t>(&result); - status != nullptr) { - if (*status == AOM_CODEC_OK) { - // AOM_CODEC_OK means success with no image, so do nothing. - continue; - } else { - RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << status - << " on aom_codec_encode."; + EncoderSpeedController::FrameEncodingInfo frame_info{ + .reference_type = AsSpeedControllerFrameType(*layer_frame), + .is_repeat_frame = frame.is_repeat_frame()}; + EncoderSpeedController::EncodeSettings settings = + speed_controller.GetEncodeSettings(frame_info); + + SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_CPUUSED, settings.speed); + EncodeResult output = DoEncode(duration, flags, layer_frame); + if (output.status_code != AOM_CODEC_OK) { + RTC_LOG(LS_WARNING) + << "LibaomAv1Encoder::Encode returned error: '" + << aom_codec_err_to_string(output.status_code) << "'."; return WEBRTC_VIDEO_CODEC_ERROR; } + + if (!output.encoded_image.has_value()) { + // Frame dropped, presumably by rate controller. This is not an error. + continue; + } + + RTC_DCHECK(output.encoded_image.has_value()); + + speed_controller.OnEncodedFrame( + ToSpeedControllerEncodeResult(output, frame_info, settings.speed)); + + RTC_DCHECK_GT(output.encoded_image->size(), 0u); + PopulateEncodedImageFromVideoFrame(frame, *output.encoded_image); + CodecSpecificInfo codec_specifics = CreateCodecSpecificInfo( + *output.encoded_image, *layer_frame, end_of_picture); + + if (non_encoded_layer_frame) { + continue; + } + + encoded_images.emplace_back(std::move(*output.encoded_image), + std::move(codec_specifics)); + } else { + // No speed controller used. + EncodeResult output = DoEncode(duration, flags, layer_frame); + if (output.status_code != AOM_CODEC_OK) { + RTC_LOG(LS_WARNING) + << "LibaomAv1Encoder::Encode returned error: '" + << aom_codec_err_to_string(output.status_code) << "'."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + if (!output.encoded_image.has_value()) { + // Status code OK but no image - the encoder dropped the frame, + // presumable due to rate control. This is not an error. + continue; + } + + if (non_encoded_layer_frame) { + continue; + } + + RTC_DCHECK_GT(output.encoded_image->size(), 0u); + PopulateEncodedImageFromVideoFrame(frame, *output.encoded_image); + CodecSpecificInfo codec_specifics = CreateCodecSpecificInfo( + *output.encoded_image, *layer_frame, end_of_picture); + + encoded_images.emplace_back(std::move(*output.encoded_image), + std::move(codec_specifics)); } - - if (non_encoded_layer_frame) { - continue; - } - - RTC_DCHECK(std::holds_alternative<EncodedImage>(result)); - EncodedImage encoded_image = std::get<EncodedImage>(std::move(result)); - - RTC_DCHECK_GT(encoded_image.size(), 0u); - PopulateEncodedImageFromVideoFrame(frame, encoded_image); - CodecSpecificInfo codec_specifics = - CreateCodecSpecificInfo(encoded_image, *layer_frame, end_of_picture); - - encoded_images.emplace_back(std::move(encoded_image), - std::move(codec_specifics)); } if (!encoded_images.empty()) { @@ -867,23 +1017,29 @@ return WEBRTC_VIDEO_CODEC_OK; } -LibaomAv1Encoder::EncodeResult LibaomAv1Encoder::DoEncode( +EncodeResult LibaomAv1Encoder::DoEncode( uint32_t duration, aom_enc_frame_flags_t flags, ScalableVideoController::LayerFrameConfig* layer_frame) { // Encode a frame. The presentation timestamp `pts` should not use real // timestamps from frames or the wall clock, as that can cause the rate // controller to misbehave. - aom_codec_err_t ret = + EncodeResult output; + + Timestamp start_time = realtime_clock_->CurrentTime(); + output.status_code = aom_codec_encode(&ctx_, frame_for_encode_, timestamp_, duration, flags); - if (ret != AOM_CODEC_OK) { - return ret; + output.encode_time = realtime_clock_->CurrentTime() - start_time; + + if (output.status_code != AOM_CODEC_OK) { + return output; } // Get encoded image data. aom_codec_iter_t iter = nullptr; int data_pkt_count = 0; - EncodedImage encoded_image; + output.encoded_image.emplace(); + EncodedImage& encoded_image = *output.encoded_image; const aom_codec_cx_pkt_t* pkt = nullptr; while ((pkt = aom_codec_get_cx_data(&ctx_, &iter)) != nullptr) { if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) { @@ -891,7 +1047,8 @@ RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than " "one data packet for an input video frame."; Release(); - return AOM_CODEC_ERROR; + output.status_code = AOM_CODEC_ERROR; + return output; } encoded_image.SetEncodedData(EncodedImageBuffer::Create( /*data=*/static_cast<const uint8_t*>(pkt->data.frame.buf), @@ -924,7 +1081,8 @@ if (!SetEncoderControlParameters(AOME_GET_LAST_QUANTIZER, &encoded_image.qp_)) { RTC_LOG(LS_WARNING) << "Unable to fetch QP for frame."; - return AOM_CODEC_ERROR; + output.status_code = AOM_CODEC_ERROR; + return output; } ++data_pkt_count; @@ -937,11 +1095,11 @@ } if (encoded_image.size() == 0) { - // Encode success, but no image produced. - return AOM_CODEC_OK; + // Encode success, but no image produced. Frame as just dropped. + output.encoded_image.reset(); } - return encoded_image; + return output; } CodecSpecificInfo LibaomAv1Encoder::CreateCodecSpecificInfo( @@ -1019,6 +1177,9 @@ } framerate_fps_ = parameters.framerate_fps; + for (size_t si = 0; si < speed_controllers_.size(); ++si) { + speed_controllers_[si]->SetFrameInterval(GetFrameInterval(si)); + } rates_configured_ = true; }
diff --git a/modules/video_coding/codecs/av1/libaom_speed_config_factory.cc b/modules/video_coding/codecs/av1/libaom_speed_config_factory.cc new file mode 100644 index 0000000..7194262 --- /dev/null +++ b/modules/video_coding/codecs/av1/libaom_speed_config_factory.cc
@@ -0,0 +1,134 @@ +// Copyright (c) 2025 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#include "modules/video_coding/codecs/av1/libaom_speed_config_factory.h" + +#include <algorithm> +#include <optional> + +#include "api/video_codecs/encoder_speed_controller.h" +#include "api/video_codecs/video_codec.h" + +namespace webrtc { + +namespace { + +constexpr int kNumLevels = 15; +EncoderSpeedController::Config::SpeedLevel kAllLevels[kNumLevels] = { + {.speeds = {5, 5, 6, 6}, .min_qp = 31}, + {.speeds = {5, 6, 7, 7}, .min_qp = 30}, + {.speeds = {5, 6, 8, 10}, .min_qp = 30}, + {.speeds = {5, 6, 9, 11}, .min_qp = 29}, + {.speeds = {5, 7, 7, 7}, .min_qp = 29}, + {.speeds = {7, 7, 8, 8}, .min_qp = 28}, + {.speeds = {7, 7, 8, 9}, .min_qp = 28}, + {.speeds = {7, 7, 10, 10}, .min_qp = 28}, + {.speeds = {7, 7, 10, 11}, .min_qp = 27}, + {.speeds = {7, 7, 11, 11}, .min_qp = 26}, + {.speeds = {7, 8, 9, 9}, .min_qp = 26}, + {.speeds = {7, 9, 9, 11}, .min_qp = 25}, + {.speeds = {8, 9, 10, 11}, .min_qp = 25}, + {.speeds = {9, 10, 11, 11}, .min_qp = std::nullopt}, + {.speeds = {10, 11, 11, 11}, .min_qp = std::nullopt}}; + +bool HasSameSpeeds(const EncoderSpeedController::Config::SpeedLevel& a, + const EncoderSpeedController::Config::SpeedLevel& b, + int num_temporal_layers) { + if (a.speeds[0] != b.speeds[0] || a.speeds[1] != b.speeds[1]) { + // Keyframe or base layer speed differs. + return false; + } + if (num_temporal_layers > 1 && a.speeds[3] != b.speeds[3]) { + // Upper (non-reference) layer speed differs. + return false; + } + // Middle temporal layer (intermedia class). + return a.speeds[2] == b.speeds[2]; +} + +void AddSpeedLevels(int num_levels, + int num_temporal_layers, + EncoderSpeedController::Config& config) { + // Add up to `num_levels` speeds - but ignore levels that have identical + // speeds when `num_temporal_layers` is used (e.g. same base-layer speed for + // single-layer). + config.speed_levels.reserve(num_levels); + for (int i = kNumLevels - 1; i >= kNumLevels - num_levels; --i) { + if (i == kNumLevels - 1 || + !HasSameSpeeds(kAllLevels[i], config.speed_levels.back(), + num_temporal_layers)) { + config.speed_levels.push_back(kAllLevels[i]); + } + } + + std::reverse(config.speed_levels.begin(), config.speed_levels.end()); +} + +} // namespace + +LibaomSpeedConfigFactory::LibaomSpeedConfigFactory( + VideoCodecComplexity complexity, + VideoCodecMode mode) + : complexity_(complexity), mode_(mode) {} + +EncoderSpeedController::Config LibaomSpeedConfigFactory::GetSpeedConfig( + int width, + int height, + int num_temporal_layers) const { + EncoderSpeedController::Config config; + int num_levels = 0; + switch (complexity_) { + case VideoCodecComplexity::kComplexityLow: + // Level 9x10x11x11 and up. + num_levels = 2; + break; + case VideoCodecComplexity::kComplexityNormal: + // Level 8x9x10x11 and up. + num_levels = 3; + break; + case VideoCodecComplexity::kComplexityHigh: + // Level 7x7x10x10 and up. + num_levels = 8; + break; + case VideoCodecComplexity::kComplexityHigher: + // Level 5x6x8x10 and up (< 720p, 5x7x7x7 otherwise) + if (width * height < 1280 * 720) { // Corrected condition + num_levels = 12; + } else { + num_levels = 10; + } + break; + case VideoCodecComplexity::kComplexityMax: + // All levels. + num_levels = kNumLevels; + break; + } + + if (mode_ == VideoCodecMode::kScreensharing) { + num_levels = std::max(1, num_levels - 1); + } + + AddSpeedLevels(num_levels, num_temporal_layers, config); + + // Don't cap speed based on resolution - only adjust the start value. + const int num_pixels = width * height; + const int available_speed_levels = config.speed_levels.size(); + if (num_pixels > 1920 * 1080) { + config.start_speed_index = std::max(available_speed_levels - 4, 0); + } else if (num_pixels > 1280 * 720) { + config.start_speed_index = std::max(available_speed_levels - 3, 0); + } else if (num_pixels > 640 * 360) { + config.start_speed_index = std::max(available_speed_levels - 2, 0); + } else { + config.start_speed_index = std::max(available_speed_levels - 1, 0); + } + + return config; +} + +} // namespace webrtc
diff --git a/modules/video_coding/codecs/av1/libaom_speed_config_factory.h b/modules/video_coding/codecs/av1/libaom_speed_config_factory.h new file mode 100644 index 0000000..6631da7 --- /dev/null +++ b/modules/video_coding/codecs/av1/libaom_speed_config_factory.h
@@ -0,0 +1,33 @@ +// Copyright (c) 2025 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#ifndef MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_SPEED_CONFIG_FACTORY_H_ +#define MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_SPEED_CONFIG_FACTORY_H_ + +#include "api/video_codecs/encoder_speed_controller.h" +#include "api/video_codecs/video_codec.h" + +namespace webrtc { + +class LibaomSpeedConfigFactory { + public: + LibaomSpeedConfigFactory(VideoCodecComplexity complexity, + VideoCodecMode mode); + + EncoderSpeedController::Config GetSpeedConfig(int width, + int height, + int num_temporal_layers) const; + + private: + const VideoCodecComplexity complexity_; + const VideoCodecMode mode_; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_AV1_LIBAOM_SPEED_CONFIG_FACTORY_H_
diff --git a/modules/video_coding/codecs/av1/libaom_speed_config_factory_unittest.cc b/modules/video_coding/codecs/av1/libaom_speed_config_factory_unittest.cc new file mode 100644 index 0000000..ae4305e --- /dev/null +++ b/modules/video_coding/codecs/av1/libaom_speed_config_factory_unittest.cc
@@ -0,0 +1,173 @@ +// Copyright (c) 2025 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#include "modules/video_coding/codecs/av1/libaom_speed_config_factory.h" + +#include <algorithm> +#include <cstddef> +#include <set> + +#include "api/video_codecs/encoder_speed_controller.h" +#include "api/video_codecs/video_codec.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { + +// Helper to allow SpeedLevel to be used in a set. +bool operator<(const EncoderSpeedController::Config::SpeedLevel& lhs, + const EncoderSpeedController::Config::SpeedLevel& rhs) { + if (lhs.speeds != rhs.speeds) { + return lhs.speeds < rhs.speeds; + } + return lhs.min_qp < rhs.min_qp; +} + +namespace { + +using ::testing::Values; + +// Test that the number of speed levels increases with complexity. +TEST(LibaomSpeedConfigFactoryTest, NumLevelsIncreaseWithComplexity) { + LibaomSpeedConfigFactory factory_low(VideoCodecComplexity::kComplexityLow, + VideoCodecMode::kRealtimeVideo); + EncoderSpeedController::Config config_low = + factory_low.GetSpeedConfig(640, 360, 3); + + LibaomSpeedConfigFactory factory_normal( + VideoCodecComplexity::kComplexityNormal, VideoCodecMode::kRealtimeVideo); + EncoderSpeedController::Config config_normal = + factory_normal.GetSpeedConfig(640, 360, 3); + + LibaomSpeedConfigFactory factory_high(VideoCodecComplexity::kComplexityHigh, + VideoCodecMode::kRealtimeVideo); + EncoderSpeedController::Config config_high = + factory_high.GetSpeedConfig(640, 360, 3); + + LibaomSpeedConfigFactory factory_higher( + VideoCodecComplexity::kComplexityHigher, VideoCodecMode::kRealtimeVideo); + EncoderSpeedController::Config config_higher = + factory_higher.GetSpeedConfig(640, 360, 3); + + LibaomSpeedConfigFactory factory_max(VideoCodecComplexity::kComplexityMax, + VideoCodecMode::kRealtimeVideo); + EncoderSpeedController::Config config_max = + factory_max.GetSpeedConfig(640, 360, 3); + + EXPECT_GE(config_normal.speed_levels.size(), config_low.speed_levels.size()); + EXPECT_GE(config_high.speed_levels.size(), config_normal.speed_levels.size()); + EXPECT_GE(config_higher.speed_levels.size(), config_high.speed_levels.size()); + EXPECT_GE(config_max.speed_levels.size(), config_higher.speed_levels.size()); +} + +// Test that speeds within each level are monotonic. +TEST(LibaomSpeedConfigFactoryTest, SpeedsAreMonotonic) { + LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax, + VideoCodecMode::kRealtimeVideo); + EncoderSpeedController::Config config = factory.GetSpeedConfig(1280, 720, 3); + + for (const auto& level : config.speed_levels) { + // Lower reference class index means more important, so speed should be + // lower or equal. + EXPECT_LE(level.speeds[static_cast<int>( + EncoderSpeedController::ReferenceClass::kKey)], + level.speeds[static_cast<int>( + EncoderSpeedController::ReferenceClass::kMain)]); + EXPECT_LE(level.speeds[static_cast<int>( + EncoderSpeedController::ReferenceClass::kMain)], + level.speeds[static_cast<int>( + EncoderSpeedController::ReferenceClass::kIntermediate)]); + EXPECT_LE(level.speeds[static_cast<int>( + EncoderSpeedController::ReferenceClass::kIntermediate)], + level.speeds[static_cast<int>( + EncoderSpeedController::ReferenceClass::kNoneReference)]); + } +} + +// Test that keyframe and base layer speeds between levels are monotonic. +TEST(LibaomSpeedConfigFactoryTest, KeyAndMainSpeedsIncreaseBetweenLevels) { + LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax, + VideoCodecMode::kRealtimeVideo); + EncoderSpeedController::Config config = factory.GetSpeedConfig(1280, 720, 3); + + for (size_t i = 0; i < config.speed_levels.size() - 1; ++i) { + const auto& current_level = config.speed_levels[i]; + const auto& next_level = config.speed_levels[i + 1]; + EXPECT_LE(current_level.speeds[static_cast<int>( + EncoderSpeedController::ReferenceClass::kKey)], + next_level.speeds[static_cast<int>( + EncoderSpeedController::ReferenceClass::kKey)]); + EXPECT_LE(current_level.speeds[static_cast<int>( + EncoderSpeedController::ReferenceClass::kMain)], + next_level.speeds[static_cast<int>( + EncoderSpeedController::ReferenceClass::kMain)]); + } +} + +struct ResolutionParams { + int width; + int height; + int expected_start_index_offset; // Offset from the last index +}; + +class LibaomSpeedConfigFactoryResolutionTest + : public ::testing::TestWithParam<ResolutionParams> {}; + +INSTANTIATE_TEST_SUITE_P(All, + LibaomSpeedConfigFactoryResolutionTest, + Values(ResolutionParams{320, 180, 1}, + ResolutionParams{640, 360, 1}, + ResolutionParams{1280, 720, 2}, + ResolutionParams{1920, 1080, 3}, + ResolutionParams{2560, 1440, 4})); + +TEST_P(LibaomSpeedConfigFactoryResolutionTest, GetSpeedConfigStartSpeedIndex) { + const ResolutionParams& params = GetParam(); + LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax, + VideoCodecMode::kRealtimeVideo); + EncoderSpeedController::Config config = + factory.GetSpeedConfig(params.width, params.height, 3); + int expected_index = + std::max(0, static_cast<int>(config.speed_levels.size()) - + params.expected_start_index_offset); + EXPECT_EQ(config.start_speed_index, expected_index); +} + +void CheckDistinctConfigs(const LibaomSpeedConfigFactory& factory, + int num_temporal_layers) { + RTC_DCHECK_GT(num_temporal_layers, 0); + RTC_DCHECK_LE(num_temporal_layers, 3); + + EncoderSpeedController::Config config = + factory.GetSpeedConfig(640, 360, num_temporal_layers); + + std::set<EncoderSpeedController::Config::SpeedLevel> unique_configs( + config.speed_levels.begin(), config.speed_levels.end()); + EXPECT_EQ(unique_configs.size(), config.speed_levels.size()); +} + +TEST(LibaomSpeedConfigFactoryTest, DistinctConfigs1Tl) { + LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax, + VideoCodecMode::kRealtimeVideo); + CheckDistinctConfigs(factory, 1); +} + +TEST(LibaomSpeedConfigFactoryTest, DistinctConfigs2Tl) { + LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax, + VideoCodecMode::kRealtimeVideo); + CheckDistinctConfigs(factory, 2); +} + +TEST(LibaomSpeedConfigFactoryTest, DistinctConfigs3Tl) { + LibaomSpeedConfigFactory factory(VideoCodecComplexity::kComplexityMax, + VideoCodecMode::kRealtimeVideo); + CheckDistinctConfigs(factory, 3); +} + +} // namespace +} // namespace webrtc
diff --git a/modules/video_coding/utility/encoder_speed_controller_impl.cc b/modules/video_coding/utility/encoder_speed_controller_impl.cc index 299a269..987f0ad 100644 --- a/modules/video_coding/utility/encoder_speed_controller_impl.cc +++ b/modules/video_coding/utility/encoder_speed_controller_impl.cc
@@ -151,8 +151,7 @@ } void EncoderSpeedControllerImpl::OnEncodedFrame( - EncoderSpeedController::EncodeResults results, - std::optional<EncodeResults> baseline_results) { + EncoderSpeedController::EncodeResults results) { double encode_tims_ms = results.encode_time.us() / 1000.0; if (results.frame_info.reference_type == ReferenceClass::kKey) { encode_tims_ms /= kKeyframeEncodeTimeCompensator;
diff --git a/modules/video_coding/utility/encoder_speed_controller_impl.h b/modules/video_coding/utility/encoder_speed_controller_impl.h index e84881e..9c7cbba8 100644 --- a/modules/video_coding/utility/encoder_speed_controller_impl.h +++ b/modules/video_coding/utility/encoder_speed_controller_impl.h
@@ -12,7 +12,6 @@ #define MODULES_VIDEO_CODING_UTILITY_ENCODER_SPEED_CONTROLLER_IMPL_H_ #include <memory> -#include <optional> #include "api/units/time_delta.h" #include "api/video_codecs/encoder_speed_controller.h" @@ -41,11 +40,8 @@ // thereafter be configured with requested settings. EncodeSettings GetEncodeSettings(FrameEncodingInfo frame_info) override; - // Should be called after each frame has completed encoding. If a baseline - // comparison speed was set in the `EncodeSettings`, the `baseline_results` - // parameter should be set with the results corresponding to those settings. - void OnEncodedFrame(EncodeResults results, - std::optional<EncodeResults> baseline_results) override; + // Should be called after each frame has completed encoding. + void OnEncodedFrame(EncodeResults results) override; const Config& config() const { return config_; }
diff --git a/modules/video_coding/utility/encoder_speed_controller_impl_unittest.cc b/modules/video_coding/utility/encoder_speed_controller_impl_unittest.cc index 8b2bdeb..a887961 100644 --- a/modules/video_coding/utility/encoder_speed_controller_impl_unittest.cc +++ b/modules/video_coding/utility/encoder_speed_controller_impl_unittest.cc
@@ -11,7 +11,6 @@ #include <optional> #include "api/units/time_delta.h" -#include "api/units/timestamp.h" #include "api/video_codecs/encoder_speed_controller.h" #include "test/gmock.h" #include "test/gtest.h" @@ -61,6 +60,7 @@ TEST(EncoderSpeedControllerTest, GetEncodeSettingsBaseLayers) { EncoderSpeedController::Config config = GetDefaultConfig(); + config.speed_levels[0].min_qp = 25; // Prevent dropping to speed 5 easily auto controller = EncoderSpeedController::Create(config, kFrameInterval); ASSERT_NE(controller, nullptr); @@ -72,22 +72,18 @@ // Simulate high encode time to increase speed for (int i = 0; i < 10; ++i) { - controller->OnEncodedFrame({.speed = 6, - .encode_time = kFrameInterval * 2, + controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.90, .qp = 30, - .frame_info = frame_info}, - std::nullopt); + .frame_info = frame_info}); } // Speed should increase to 7 EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, 7); // Simulate low encode time to decrease speed for (int i = 0; i < 20; ++i) { - controller->OnEncodedFrame({.speed = 7, - .encode_time = kFrameInterval / 10, - .qp = 30, - .frame_info = frame_info}, - std::nullopt); + controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.10, + .qp = 20, + .frame_info = frame_info}); } // Speed should decrease to 6 EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, 6); @@ -141,12 +137,9 @@ .reference_type = ReferenceClass::kMain}; for (int i = 0; i < 20; ++i) { - controller->OnEncodedFrame( - {.speed = 7, - .encode_time = kFrameInterval * 2, // High encode time - .qp = 30, - .frame_info = frame_info}, - std::nullopt); + controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.95, + .qp = 30, + .frame_info = frame_info}); } EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, @@ -163,12 +156,7 @@ .reference_type = ReferenceClass::kMain}; for (int i = 0; i < 20; ++i) { - controller->OnEncodedFrame( - {.speed = 5, - .encode_time = kFrameInterval / 10, // Low encode time - .qp = 30, - .frame_info = frame_info}, - std::nullopt); + controller->OnEncodedFrame({.speed = 5, .frame_info = frame_info}); } EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, @@ -189,11 +177,9 @@ // Simulate low QP, normal encode time for (int i = 0; i < 20; ++i) { - controller->OnEncodedFrame({.speed = 6, - .encode_time = kFrameInterval * 0.6, + controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.60, .qp = 10, - .frame_info = frame_info}, - std::nullopt); + .frame_info = frame_info}); } // Speed should increase to 7 due to low QP EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, 7); @@ -213,11 +199,9 @@ // Simulate low encode time but also low QP for (int i = 0; i < 20; ++i) { - controller->OnEncodedFrame({.speed = 6, - .encode_time = kFrameInterval / 10, + controller->OnEncodedFrame({.encode_time = kFrameInterval * 0.10, .qp = 10, - .frame_info = frame_info}, - std::nullopt); + .frame_info = frame_info}); } // Speed should NOT decrease to 5 because QP is below the next level's min_qp EXPECT_EQ(controller->GetEncodeSettings(frame_info).speed, 6);