api/video_codecs/libaom_av1_encoder_factory.cc - src - Git at Google

 /*
  *  Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include "api/video_codecs/libaom_av1_encoder_factory.h"

 #include <array>
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
 #include <map>
 #include <memory>
 #include <optional>
 #include <string>
 #include <type_traits>
 #include <vector>

 #include "absl/algorithm/container.h"
 #include "absl/cleanup/cleanup.h"
 #include "absl/types/variant.h"
 #include "api/array_view.h"
 #include "api/scoped_refptr.h"
 #include "api/units/data_rate.h"
 #include "api/units/data_size.h"
 #include "api/units/time_delta.h"
 #include "api/video/resolution.h"
 #include "api/video/video_frame_buffer.h"
 #include "api/video_codecs/video_codec.h"
 #include "api/video_codecs/video_encoder_factory_interface.h"
 #include "api/video_codecs/video_encoder_interface.h"
 #include "api/video_codecs/video_encoding_general.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
 #include "rtc_base/numerics/rational.h"
 #include "rtc_base/strings/string_builder.h"
 #include "third_party/libaom/source/libaom/aom/aom_codec.h"
 #include "third_party/libaom/source/libaom/aom/aom_encoder.h"
 #include "third_party/libaom/source/libaom/aom/aom_image.h"
 #include "third_party/libaom/source/libaom/aom/aomcx.h"

 #define SET_OR_RETURN(param_id, param_value)                          \
   do {                                                                \
     if (!SetEncoderControlParameters(&ctx_, param_id, param_value)) { \
       return;                                                         \
     }                                                                 \
   } while (0)

 #define SET_OR_RETURN_FALSE(param_id, param_value)                    \
   do {                                                                \
     if (!SetEncoderControlParameters(&ctx_, param_id, param_value)) { \
       return false;                                                   \
     }                                                                 \
   } while (0)

 namespace webrtc {

 using FrameEncodeSettings = VideoEncoderInterface::FrameEncodeSettings;
 using Cbr = FrameEncodeSettings::Cbr;
 using Cqp = FrameEncodeSettings::Cqp;
 using aom_img_ptr = std::unique_ptr<aom_image_t, decltype(&aom_img_free)>;

 namespace {
 // MaxQp defined here:
 // http://google3/third_party/libaom/git_root/av1/av1_cx_iface.c;l=3510;rcl=527067478
 constexpr int kMaxQp = 63;
 constexpr int kNumBuffers = 8;
 constexpr int kMaxReferences = 3;
 constexpr int kMinEffortLevel = -2;
 constexpr int kMaxEffortLevel = 2;
 constexpr int kMaxSpatialLayersWtf = 4;
 constexpr int kMaxTemporalLayers = 4;
 constexpr int kRtpTicksPerSecond = 90000;
 constexpr std::array<VideoFrameBuffer::Type, 2> kSupportedInputFormats = {
     VideoFrameBuffer::Type::kI420, VideoFrameBuffer::Type::kNV12};

 constexpr std::array<Rational, 7> kSupportedScalingFactors = {
     {{8, 1}, {4, 1}, {2, 1}, {1, 1}, {1, 2}, {1, 4}, {1, 8}}};

 std::optional<Rational> GetScalingFactor(const Resolution& from,
                                          const Resolution& to) {
   auto it = absl::c_find_if(kSupportedScalingFactors, [&](const Rational& r) {
     return (from.width * r.numerator / r.denominator) == to.width &&
            (from.height * r.numerator / r.denominator) == to.height;
   });

   if (it != kSupportedScalingFactors.end()) {
     return *it;
   }

   return {};
 }

 class LibaomAv1Encoder : public VideoEncoderInterface {
  public:
   LibaomAv1Encoder() = default;
   ~LibaomAv1Encoder() override;

   bool InitEncode(
       const VideoEncoderFactoryInterface::StaticEncoderSettings& settings,
       const std::map<std::string, std::string>& encoder_specific_settings);

   void Encode(rtc::scoped_refptr<webrtc::VideoFrameBuffer> frame_buffer,
               const TemporalUnitSettings& tu_settings,
               std::vector<FrameEncodeSettings> frame_settings) override;

  private:
   aom_img_ptr image_to_encode_ = aom_img_ptr(nullptr, aom_img_free);
   aom_codec_ctx_t ctx_;
   aom_codec_enc_cfg_t cfg_;

   std::optional<VideoCodecMode> current_content_type_;
   std::array<std::optional<int>, kMaxSpatialLayersWtf> current_effort_level_;
   int max_number_of_threads_;
   std::array<std::optional<Resolution>, 8> last_resolution_in_buffer_;
 };

 template <typename T>
 bool SetEncoderControlParameters(aom_codec_ctx_t* ctx, int id, T value) {
   aom_codec_err_t error_code = aom_codec_control(ctx, id, value);
   if (error_code != AOM_CODEC_OK) {
     RTC_LOG(LS_WARNING) << "aom_codec_control returned " << error_code
                         << " with id:  " << id << ".";
   }
   return error_code == AOM_CODEC_OK;
 }

 LibaomAv1Encoder::~LibaomAv1Encoder() {
   aom_codec_destroy(&ctx_);
 }

 bool LibaomAv1Encoder::InitEncode(
     const VideoEncoderFactoryInterface::StaticEncoderSettings& settings,
     const std::map<std::string, std::string>& encoder_specific_settings) {
   if (!encoder_specific_settings.empty()) {
     RTC_LOG(LS_ERROR)
         << "libaom av1 encoder accepts no encoder specific settings";
     return false;
   }

   if (aom_codec_err_t ret = aom_codec_enc_config_default(
           aom_codec_av1_cx(), &cfg_, AOM_USAGE_REALTIME);
       ret != AOM_CODEC_OK) {
     RTC_LOG(LS_ERROR) << "aom_codec_enc_config_default returned " << ret;
     return false;
   }

   max_number_of_threads_ = settings.max_number_of_threads;

   // The encode resolution is set dynamically for each call to `Encode`, but for
   // `aom_codec_enc_init` to not fail we set it here as well.
   cfg_.g_w = settings.max_encode_dimensions.width;
   cfg_.g_h = settings.max_encode_dimensions.height;
   cfg_.g_timebase.num = 1;
   // TD: does 90khz timebase make sense, use microseconds instead maybe?
   cfg_.g_timebase.den = kRtpTicksPerSecond;
   cfg_.g_input_bit_depth = settings.encoding_format.bit_depth;
   cfg_.kf_mode = AOM_KF_DISABLED;
   // TD: rc_undershoot_pct and rc_overshoot_pct should probably be removed.
   cfg_.rc_undershoot_pct = 50;
   cfg_.rc_overshoot_pct = 50;
   auto* cbr =
       absl::get_if<VideoEncoderFactoryInterface::StaticEncoderSettings::Cbr>(
           &settings.rc_mode);
   cfg_.rc_buf_initial_sz = cbr ? cbr->target_buffer_size.ms() : 600;
   cfg_.rc_buf_optimal_sz = cbr ? cbr->target_buffer_size.ms() : 600;
   cfg_.rc_buf_sz = cbr ? cbr->max_buffer_size.ms() : 1000;
   cfg_.g_usage = AOM_USAGE_REALTIME;
   cfg_.g_pass = AOM_RC_ONE_PASS;
   cfg_.g_lag_in_frames = 0;
   cfg_.g_error_resilient = 0;
   cfg_.rc_end_usage = cbr ? AOM_CBR : AOM_Q;

   if (aom_codec_err_t ret =
           aom_codec_enc_init(&ctx_, aom_codec_av1_cx(), &cfg_, /*flags=*/0);
       ret != AOM_CODEC_OK) {
     RTC_LOG(LS_ERROR) << "aom_codec_enc_init returned " << ret;
     return false;
   }

   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_CDEF, 1);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_TPL_MODEL, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_DELTAQ_MODE, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_ORDER_HINT, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_AQ_MODE, 3);
   SET_OR_RETURN_FALSE(AOME_SET_MAX_INTRA_BITRATE_PCT, 300);
   SET_OR_RETURN_FALSE(AV1E_SET_COEFF_COST_UPD_FREQ, 3);
   SET_OR_RETURN_FALSE(AV1E_SET_MODE_COST_UPD_FREQ, 3);
   SET_OR_RETURN_FALSE(AV1E_SET_MV_COST_UPD_FREQ, 3);
   SET_OR_RETURN_FALSE(AV1E_SET_ROW_MT, 1);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_OBMC, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_NOISE_SENSITIVITY, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_WARPED_MOTION, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_REF_FRAME_MVS, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_CFL_INTRA, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_SMOOTH_INTRA, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_ANGLE_DELTA, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_FILTER_INTRA, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
   SET_OR_RETURN_FALSE(AV1E_SET_DISABLE_TRELLIS_QUANT, 1);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_DIST_WTD_COMP, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_DIFF_WTD_COMP, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_DUAL_FILTER, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTERINTRA_COMP, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTERINTRA_WEDGE, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTRA_EDGE_FILTER, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTRABC, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_MASKED_COMP, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_PAETH_INTRA, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_QM, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_RECT_PARTITIONS, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_RESTORATION, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_SMOOTH_INTERINTRA, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_TX64, 0);
   SET_OR_RETURN_FALSE(AV1E_SET_MAX_REFERENCE_FRAMES, 3);

   return true;
 }

 struct ThreadTilesAndSuperblockSizeInfo {
   int num_threads;
   int exp_tile_rows;
   int exp_tile_colums;
   aom_superblock_size_t superblock_size;
 };

 ThreadTilesAndSuperblockSizeInfo GetThreadingTilesAndSuperblockSize(
     int width,
     int height,
     int max_number_of_threads) {
   ThreadTilesAndSuperblockSizeInfo res;
   const int num_pixels = width * height;
   if (num_pixels >= 1920 * 1080 && max_number_of_threads > 8) {
     res.num_threads = 8;
     res.exp_tile_rows = 2;
     res.exp_tile_colums = 1;
   } else if (num_pixels >= 640 * 360 && max_number_of_threads > 4) {
     res.num_threads = 4;
     res.exp_tile_rows = 1;
     res.exp_tile_colums = 1;
   } else if (num_pixels >= 320 * 180 && max_number_of_threads > 2) {
     res.num_threads = 2;
     res.exp_tile_rows = 1;
     res.exp_tile_colums = 0;
   } else {
     res.num_threads = 1;
     res.exp_tile_rows = 0;
     res.exp_tile_colums = 0;
   }

   if (res.num_threads > 4 && num_pixels >= 960 * 540) {
     res.superblock_size = AOM_SUPERBLOCK_SIZE_64X64;
   } else {
     res.superblock_size = AOM_SUPERBLOCK_SIZE_DYNAMIC;
   }

   RTC_LOG(LS_WARNING) << __FUNCTION__ << " res.num_threads=" << res.num_threads
                       << " res.exp_tile_rows=" << res.exp_tile_rows
                       << " res.exp_tile_colums=" << res.exp_tile_colums
                       << " res.superblock_size=" << res.superblock_size;

   return res;
 }

 bool ValidateEncodeParams(
     const webrtc::VideoFrameBuffer& /* frame_buffer */,
     const VideoEncoderInterface::TemporalUnitSettings& /* tu_settings */,
     const std::vector<VideoEncoderInterface::FrameEncodeSettings>&
         frame_settings,
     const std::array<std::optional<Resolution>, 8>& last_resolution_in_buffer,
     aom_rc_mode rc_mode) {
   if (frame_settings.empty()) {
     RTC_LOG(LS_ERROR) << "No frame settings provided.";
     return false;
   }

   auto in_range = [](int low, int high, int val) {
     return low <= val && val < high;
   };

   for (size_t i = 0; i < frame_settings.size(); ++i) {
     const VideoEncoderInterface::FrameEncodeSettings& settings =
         frame_settings[i];

     if (!settings.frame_output) {
       RTC_LOG(LS_ERROR) << "No frame output provided.";
       return false;
     }

     if (!in_range(kMinEffortLevel, kMaxEffortLevel + 1,
                   settings.effort_level)) {
       RTC_LOG(LS_ERROR) << "Unsupported effort level " << settings.effort_level;
       return false;
     }

     if (!in_range(0, kMaxSpatialLayersWtf, settings.spatial_id)) {
       RTC_LOG(LS_ERROR) << "invalid spatial id " << settings.spatial_id;
       return false;
     }

     if (!in_range(0, kMaxTemporalLayers, settings.temporal_id)) {
       RTC_LOG(LS_ERROR) << "invalid temporal id " << settings.temporal_id;
       return false;
     }

     if ((settings.frame_type == FrameType::kKeyframe ||
          settings.frame_type == FrameType::kStartFrame) &&
         !settings.reference_buffers.empty()) {
       RTC_LOG(LS_ERROR) << "Reference buffers can not be used for keyframes.";
       return false;
     }

     if ((settings.frame_type == FrameType::kKeyframe ||
          settings.frame_type == FrameType::kStartFrame) &&
         !settings.update_buffer) {
       RTC_LOG(LS_ERROR)
           << "Buffer to update must be specified for keyframe/startframe";
       return false;
     }

     if (settings.update_buffer &&
         !in_range(0, kNumBuffers, *settings.update_buffer)) {
       RTC_LOG(LS_ERROR) << "Invalid update buffer id.";
       return false;
     }

     if (settings.reference_buffers.size() > kMaxReferences) {
       RTC_LOG(LS_ERROR) << "Too many referenced buffers.";
       return false;
     }

     for (size_t j = 0; j < settings.reference_buffers.size(); ++j) {
       if (!in_range(0, kNumBuffers, settings.reference_buffers[j])) {
         RTC_LOG(LS_ERROR) << "Invalid reference buffer id.";
         return false;
       }

       // Figure out which frame resolution a certain buffer will hold when the
       // frame described by `settings` is encoded.
       std::optional<Resolution> referenced_resolution;
       bool keyframe_on_previous_layer = false;

       // Will some other frame in this temporal unit update the buffer?
       for (size_t k = 0; k < i; ++k) {
         if (frame_settings[k].frame_type == FrameType::kKeyframe) {
           keyframe_on_previous_layer = true;
           referenced_resolution.reset();
         }
         if (frame_settings[k].update_buffer == settings.reference_buffers[j]) {
           referenced_resolution = frame_settings[k].resolution;
         }
       }

       // Not updated by another frame in the temporal unit, what is the
       // resolution of the last frame stored into that buffer?
       if (!referenced_resolution && !keyframe_on_previous_layer) {
         referenced_resolution =
             last_resolution_in_buffer[settings.reference_buffers[j]];
       }

       if (!referenced_resolution) {
         RTC_LOG(LS_ERROR) << "Referenced buffer holds no frame.";
         return false;
       }

       if (!GetScalingFactor(*referenced_resolution, settings.resolution)) {
         RTC_LOG(LS_ERROR)
             << "Required resolution scaling factor not supported.";
         return false;
       }

       for (size_t l = i + 1; l < settings.reference_buffers.size(); ++l) {
         if (settings.reference_buffers[i] == settings.reference_buffers[l]) {
           RTC_LOG(LS_ERROR) << "Duplicate reference buffer specified.";
           return false;
         }
       }
     }

     if ((rc_mode == AOM_CBR &&
          absl::holds_alternative<Cqp>(settings.rate_options)) ||
         (rc_mode == AOM_Q &&
          absl::holds_alternative<Cbr>(settings.rate_options))) {
       RTC_LOG(LS_ERROR) << "Invalid rate options, encoder configured with "
                         << (rc_mode == AOM_CBR ? "AOM_CBR" : "AOM_Q");
       return false;
     }

     for (size_t j = i + 1; j < frame_settings.size(); ++j) {
       if (settings.spatial_id >= frame_settings[j].spatial_id) {
         RTC_LOG(LS_ERROR) << "Frame spatial id specified out of order.";
         return false;
       }
     }
   }

   return true;
 }

 void PrepareInputImage(const VideoFrameBuffer& input_buffer,
                        aom_img_ptr& out_aom_image) {
   aom_img_fmt_t input_format;
   switch (input_buffer.type()) {
     case VideoFrameBuffer::Type::kI420:
       input_format = AOM_IMG_FMT_I420;
       break;
     case VideoFrameBuffer::Type::kNV12:
       input_format = AOM_IMG_FMT_NV12;
       break;
     default:
       RTC_CHECK_NOTREACHED();
       return;
   }

   if (!out_aom_image || out_aom_image->fmt != input_format ||
       static_cast<int>(out_aom_image->w) != input_buffer.width() ||
       static_cast<int>(out_aom_image->h) != input_buffer.height()) {
     out_aom_image.reset(
         aom_img_wrap(/*img=*/nullptr, input_format, input_buffer.width(),
                      input_buffer.height(), /*align=*/1, /*img_data=*/nullptr));

     RTC_LOG(LS_WARNING) << __FUNCTION__ << " input_format=" << input_format
                         << " input_buffer.width()=" << input_buffer.width()
                         << " input_buffer.height()=" << input_buffer.height()
                         << " w=" << out_aom_image->w
                         << " h=" << out_aom_image->h
                         << " d_w=" << out_aom_image->d_w
                         << " d_h=" << out_aom_image->d_h
                         << " r_w=" << out_aom_image->r_w
                         << " r_h=" << out_aom_image->r_h;
   }

   if (input_format == AOM_IMG_FMT_I420) {
     const I420BufferInterface* i420_buffer = input_buffer.GetI420();
     RTC_DCHECK(i420_buffer);
     out_aom_image->planes[AOM_PLANE_Y] =
         const_cast<unsigned char*>(i420_buffer->DataY());
     out_aom_image->planes[AOM_PLANE_U] =
         const_cast<unsigned char*>(i420_buffer->DataU());
     out_aom_image->planes[AOM_PLANE_V] =
         const_cast<unsigned char*>(i420_buffer->DataV());
     out_aom_image->stride[AOM_PLANE_Y] = i420_buffer->StrideY();
     out_aom_image->stride[AOM_PLANE_U] = i420_buffer->StrideU();
     out_aom_image->stride[AOM_PLANE_V] = i420_buffer->StrideV();
   } else {
     const NV12BufferInterface* nv12_buffer = input_buffer.GetNV12();
     RTC_DCHECK(nv12_buffer);
     out_aom_image->planes[AOM_PLANE_Y] =
         const_cast<unsigned char*>(nv12_buffer->DataY());
     out_aom_image->planes[AOM_PLANE_U] =
         const_cast<unsigned char*>(nv12_buffer->DataUV());
     out_aom_image->planes[AOM_PLANE_V] = nullptr;
     out_aom_image->stride[AOM_PLANE_Y] = nv12_buffer->StrideY();
     out_aom_image->stride[AOM_PLANE_U] = nv12_buffer->StrideUV();
     out_aom_image->stride[AOM_PLANE_V] = 0;
   }
 }

 aom_svc_ref_frame_config_t GetSvcRefFrameConfig(
     const VideoEncoderInterface::FrameEncodeSettings& settings) {
   // Buffer alias to use for each position. In particular when there are two
   // buffers being used, prefer to alias them as LAST and GOLDEN, since the AV1
   // bitstream format has dedicated fields for them. See last_frame_idx and
   // golden_frame_idx in the av1 spec
   // https://aomediacodec.github.io/av1-spec/av1-spec.pdf.

   // Libaom is also compiled for RTC, which limits the number of references to
   // at most three, and they must be aliased as LAST, GOLDEN and ALTREF. Also
   // note that libaom favors LAST the most, and GOLDEN second most, so buffers
   // should be specified in order of how useful they are for prediction. Libaom
   // could be updated to make LAST, GOLDEN and ALTREF equivalent, but that is
   // not a priority for now. All aliases can be used to update buffers.
   // TD: Automatically select LAST, GOLDEN and ALTREF depending on previous
   //       buffer usage.
   static constexpr int kPreferedAlias[] = {0,  // LAST
                                            3,  // GOLDEN
                                            6,  // ALTREF
                                            1, 2, 4, 5};

   aom_svc_ref_frame_config_t ref_frame_config = {};

   int alias_index = 0;
   if (!settings.reference_buffers.empty()) {
     for (size_t i = 0; i < settings.reference_buffers.size(); ++i) {
       ref_frame_config.ref_idx[kPreferedAlias[alias_index]] =
           settings.reference_buffers[i];
       ref_frame_config.reference[kPreferedAlias[alias_index]] = 1;
       alias_index++;
     }

     // Delta frames must not alias unused buffers, and since start frames only
     // update some buffers it is not safe to leave unused aliases to simply
     // point to buffer 0.
     for (size_t i = settings.reference_buffers.size();
          i < std::size(ref_frame_config.ref_idx); ++i) {
       ref_frame_config.ref_idx[kPreferedAlias[i]] =
           settings.reference_buffers.back();
     }
   }

   if (settings.update_buffer) {
     if (!absl::c_linear_search(settings.reference_buffers,
                                *settings.update_buffer)) {
       ref_frame_config.ref_idx[kPreferedAlias[alias_index]] =
           *settings.update_buffer;
       alias_index++;
     }
     ref_frame_config.refresh[*settings.update_buffer] = 1;
   }

   char buf[256];
   rtc::SimpleStringBuilder sb(buf);
   sb << " spatial_id=" << settings.spatial_id;
   sb << "  ref_idx=[ ";
   for (auto r : ref_frame_config.ref_idx) {
     sb << r << " ";
   }
   sb << "]  reference=[ ";
   for (auto r : ref_frame_config.reference) {
     sb << r << " ";
   }
   sb << "]  refresh=[ ";
   for (auto r : ref_frame_config.refresh) {
     sb << r << " ";
   }
   sb << "]";

   RTC_LOG(LS_WARNING) << __FUNCTION__ << sb.str();

   return ref_frame_config;
 }

 aom_svc_params_t GetSvcParams(
     const webrtc::VideoFrameBuffer& frame_buffer,
     const std::vector<VideoEncoderInterface::FrameEncodeSettings>&
         frame_settings) {
   aom_svc_params_t svc_params = {};
   svc_params.number_spatial_layers = frame_settings.back().spatial_id + 1;
   svc_params.number_temporal_layers = kMaxTemporalLayers;

   // TD: What about svc_params.framerate_factor?
   // If `framerate_factors` are left at 0 then configured bitrate values will
   // not be picked up by libaom.
   for (int tid = 0; tid < svc_params.number_temporal_layers; ++tid) {
     svc_params.framerate_factor[tid] = 1;
   }

   // If the scaling factor is left at zero for unused layers a division by zero
   // will happen inside libaom, default all layers to one.
   for (int sid = 0; sid < svc_params.number_spatial_layers; ++sid) {
     svc_params.scaling_factor_num[sid] = 1;
     svc_params.scaling_factor_den[sid] = 1;
   }

   for (const VideoEncoderInterface::FrameEncodeSettings& settings :
        frame_settings) {
     std::optional<Rational> scaling_factor = GetScalingFactor(
         {frame_buffer.width(), frame_buffer.height()}, settings.resolution);
     RTC_CHECK(scaling_factor);
     svc_params.scaling_factor_num[settings.spatial_id] =
         scaling_factor->numerator;
     svc_params.scaling_factor_den[settings.spatial_id] =
         scaling_factor->denominator;

     const int flat_layer_id =
         settings.spatial_id * svc_params.number_temporal_layers +
         settings.temporal_id;

     RTC_LOG(LS_WARNING) << __FUNCTION__ << " flat_layer_id=" << flat_layer_id
                         << " num="
                         << svc_params.scaling_factor_num[settings.spatial_id]
                         << " den="
                         << svc_params.scaling_factor_den[settings.spatial_id];

     absl::visit(
         [&](auto&& arg) {
           using T = std::decay_t<decltype(arg)>;
           if constexpr (std::is_same_v<T, Cbr>) {
             // Libaom calculates the total bitrate across all spatial layers by
             // summing the bitrate of the last temporal layer in each spatial
             // layer. This means the bitrate for the top temporal layer always
             // has to be set even if that temporal layer is not being encoded.
             const int last_temporal_layer_in_spatial_layer_id =
                 settings.spatial_id * svc_params.number_temporal_layers +
                 (kMaxTemporalLayers - 1);
             svc_params
                 .layer_target_bitrate[last_temporal_layer_in_spatial_layer_id] =
                 arg.target_bitrate.kbps();

             svc_params.layer_target_bitrate[flat_layer_id] =
                 arg.target_bitrate.kbps();
             // When libaom is configured with `AOM_CBR` it will still limit QP
             // to stay between `min_quantizers` and `max_quantizers'. Set
             // `max_quantizers` to max QP to avoid the encoder overshooting.
             svc_params.max_quantizers[flat_layer_id] = kMaxQp;
             svc_params.min_quantizers[flat_layer_id] = 0;
           } else if constexpr (std::is_same_v<T, Cqp>) {
             // When libaom is configured with `AOM_Q` it will still look at the
             // `layer_target_bitrate` to determine whether the layer is disabled
             // or not. Set `layer_target_bitrate` to 1 so that libaom knows the
             // layer is active.
             svc_params.layer_target_bitrate[flat_layer_id] = 1;
             svc_params.max_quantizers[flat_layer_id] = arg.target_qp;
             svc_params.min_quantizers[flat_layer_id] = arg.target_qp;
             RTC_LOG(LS_WARNING) << __FUNCTION__ << " svc_params.qp["
                                 << flat_layer_id << "]=" << arg.target_qp;
             // TD: Does libaom look at both max and min? Shouldn't it just be
             //       one of them
           }
         },
         settings.rate_options);
   }

   char buf[512];
   rtc::SimpleStringBuilder sb(buf);
   sb << "GetSvcParams" << " layer bitrates kbps";
   for (int s = 0; s < svc_params.number_spatial_layers; ++s) {
     sb << " S" << s << "=[ ";
     for (int t = 0; t < svc_params.number_temporal_layers; ++t) {
       int id = s * svc_params.number_temporal_layers + t;
       sb << "T" << t << "=" << svc_params.layer_target_bitrate[id] << " ";
     }
     sb << "]";
   }

   RTC_LOG(LS_WARNING) << sb.str();

   return svc_params;
 }

 void LibaomAv1Encoder::Encode(
     rtc::scoped_refptr<webrtc::VideoFrameBuffer> frame_buffer,
     const TemporalUnitSettings& tu_settings,
     std::vector<FrameEncodeSettings> frame_settings) {
   absl::Cleanup on_return = [&] {
     // On return call `EncodeComplete` with EncodingError result unless they
     // were already called with an EncodedData result.
     for (FrameEncodeSettings& settings : frame_settings) {
       if (settings.frame_output) {
         settings.frame_output->EncodeComplete(EncodingError());
       }
     }
   };

   if (!ValidateEncodeParams(*frame_buffer, tu_settings, frame_settings,
                             last_resolution_in_buffer_, cfg_.rc_end_usage)) {
     return;
   }

   if (current_content_type_ != tu_settings.content_hint) {
     if (tu_settings.content_hint == VideoCodecMode::kScreensharing) {
       // TD: Set speed 11?
       SET_OR_RETURN(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
       SET_OR_RETURN(AV1E_SET_ENABLE_PALETTE, 1);
     } else {
       SET_OR_RETURN(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
       SET_OR_RETURN(AV1E_SET_ENABLE_PALETTE, 0);
     }
     current_content_type_ = tu_settings.content_hint;
   }

   if (cfg_.rc_end_usage == AOM_CBR) {
     DataRate accum_rate = DataRate::Zero();
     for (const FrameEncodeSettings& settings : frame_settings) {
       accum_rate += absl::get<Cbr>(settings.rate_options).target_bitrate;
     }
     cfg_.rc_target_bitrate = accum_rate.kbps();
     RTC_LOG(LS_WARNING) << __FUNCTION__
                         << " cfg_.rc_target_bitrate=" << cfg_.rc_target_bitrate;
   }

   if (static_cast<int>(cfg_.g_w) != frame_buffer->width() ||
       static_cast<int>(cfg_.g_h) != frame_buffer->height()) {
     RTC_LOG(LS_WARNING) << __FUNCTION__ << " resolution changed from "
                         << cfg_.g_w << "x" << cfg_.g_h << " to "
                         << frame_buffer->width() << "x"
                         << frame_buffer->height();
     ThreadTilesAndSuperblockSizeInfo ttsbi = GetThreadingTilesAndSuperblockSize(
         frame_buffer->width(), frame_buffer->height(), max_number_of_threads_);
     SET_OR_RETURN(AV1E_SET_SUPERBLOCK_SIZE, ttsbi.superblock_size);
     SET_OR_RETURN(AV1E_SET_TILE_ROWS, ttsbi.exp_tile_rows);
     SET_OR_RETURN(AV1E_SET_TILE_COLUMNS, ttsbi.exp_tile_colums);
     cfg_.g_threads = ttsbi.num_threads;
     cfg_.g_w = frame_buffer->width();
     cfg_.g_h = frame_buffer->height();
   }

   PrepareInputImage(*frame_buffer, image_to_encode_);

   // The bitrates caluclated internally in libaom when `AV1E_SET_SVC_PARAMS` is
   // called depends on the currently configured `cfg_.rc_target_bitrate`. If the
   // total target bitrate is not updated first a division by zero could happen.
   if (aom_codec_err_t ret = aom_codec_enc_config_set(&ctx_, &cfg_);
       ret != AOM_CODEC_OK) {
     RTC_LOG(LS_ERROR) << "aom_codec_enc_config_set returned " << ret;
     return;
   }
   aom_svc_params_t svc_params = GetSvcParams(*frame_buffer, frame_settings);
   SET_OR_RETURN(AV1E_SET_SVC_PARAMS, &svc_params);

   // The libaom AV1 encoder requires that `aom_codec_encode` is called for
   // every spatial layer, even if no frame should be encoded for that layer.
   std::array<FrameEncodeSettings*, kMaxSpatialLayersWtf>
       settings_for_spatial_id;
   settings_for_spatial_id.fill(nullptr);
   FrameEncodeSettings settings_for_unused_layer;
   for (FrameEncodeSettings& settings : frame_settings) {
     settings_for_spatial_id[settings.spatial_id] = &settings;
   }

   for (int sid = frame_settings[0].spatial_id;
        sid < svc_params.number_spatial_layers; ++sid) {
     const bool layer_enabled = settings_for_spatial_id[sid] != nullptr;
     FrameEncodeSettings& settings = layer_enabled
                                         ? *settings_for_spatial_id[sid]
                                         : settings_for_unused_layer;

     aom_svc_layer_id_t layer_id = {
         .spatial_layer_id = sid,
         .temporal_layer_id = settings.temporal_id,
     };
     SET_OR_RETURN(AV1E_SET_SVC_LAYER_ID, &layer_id);
     aom_svc_ref_frame_config_t ref_config = GetSvcRefFrameConfig(settings);
     SET_OR_RETURN(AV1E_SET_SVC_REF_FRAME_CONFIG, &ref_config);

     // TD: Duration can't be zero, what does it matter when the layer is
     // not being encoded?
     TimeDelta duration = TimeDelta::Millis(1);
     if (layer_enabled) {
       if (const Cbr* cbr = absl::get_if<Cbr>(&settings.rate_options)) {
         duration = cbr->duration;
       } else {
         // TD: What should duration be when Cqp is used?
         duration = TimeDelta::Millis(1);
       }

       if (settings.effort_level != current_effort_level_[settings.spatial_id]) {
         // For RTC we use speed level 6 to 10, with 8 being the default. Note
         // that low effort means higher speed.
         SET_OR_RETURN(AOME_SET_CPUUSED, 8 - settings.effort_level);
         current_effort_level_[settings.spatial_id] = settings.effort_level;
       }
     }

     RTC_LOG(LS_WARNING)
         << __FUNCTION__ << " timestamp="
         << (tu_settings.presentation_timestamp.ms() * kRtpTicksPerSecond / 1000)
         << "  duration=" << (duration.ms() * kRtpTicksPerSecond / 1000)
         << "  type="
         << (settings.frame_type == FrameType::kKeyframe ? "key" : "delta");
     aom_codec_err_t ret = aom_codec_encode(
         &ctx_, &*image_to_encode_, tu_settings.presentation_timestamp.ms() * 90,
         duration.ms() * 90,
         settings.frame_type == FrameType::kKeyframe ? AOM_EFLAG_FORCE_KF : 0);
     if (ret != AOM_CODEC_OK) {
       RTC_LOG(LS_WARNING) << "aom_codec_encode returned " << ret;
       return;
     }

     if (!layer_enabled) {
       continue;
     }

     if (settings.frame_type == FrameType::kKeyframe) {
       last_resolution_in_buffer_ = {};
     }

     if (settings.update_buffer) {
       last_resolution_in_buffer_[*settings.update_buffer] = settings.resolution;
     }

     EncodedData result;
     aom_codec_iter_t iter = nullptr;
     bool bitstream_produced = false;
     while (const aom_codec_cx_pkt_t* pkt =
                aom_codec_get_cx_data(&ctx_, &iter)) {
       if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
         SET_OR_RETURN(AOME_GET_LAST_QUANTIZER_64, &result.encoded_qp);
         result.frame_type = pkt->data.frame.flags & AOM_EFLAG_FORCE_KF
                                 ? FrameType::kKeyframe
                                 : FrameType::kDeltaFrame;
         rtc::ArrayView<uint8_t> output_buffer =
             settings.frame_output->GetBitstreamOutputBuffer(
                 DataSize::Bytes(pkt->data.frame.sz));
         if (output_buffer.size() != pkt->data.frame.sz) {
           return;
         }
         memcpy(output_buffer.data(), pkt->data.frame.buf, pkt->data.frame.sz);
         bitstream_produced = true;
         break;
       }
     }

     if (!bitstream_produced) {
       return;
     } else {
       RTC_CHECK(settings.frame_output);
       settings.frame_output->EncodeComplete(result);
       // To avoid invoking any callback more than once.
       settings.frame_output = nullptr;
     }
   }
 }
 }  // namespace

 std::string LibaomAv1EncoderFactory::CodecName() const {
   return "AV1";
 }

 std::string LibaomAv1EncoderFactory::ImplementationName() const {
   return "Libaom";
 }

 std::map<std::string, std::string> LibaomAv1EncoderFactory::CodecSpecifics()
     const {
   return {};
 }

 // clang-format off
 // The formater and cpplint have conflicting ideas.
 VideoEncoderFactoryInterface::Capabilities
 LibaomAv1EncoderFactory::GetEncoderCapabilities() const {
   return {
       .prediction_constraints = {
            .num_buffers = kNumBuffers,
            .max_references = kMaxReferences,
            .max_temporal_layers = kMaxTemporalLayers,
            .buffer_space_type = VideoEncoderFactoryInterface::Capabilities::
                PredictionConstraints::BufferSpaceType::kSingleKeyframe,
            .max_spatial_layers = kMaxSpatialLayersWtf,
            .scaling_factors = {kSupportedScalingFactors.begin(),
                                kSupportedScalingFactors.end()},
            .supported_frame_types = {FrameType::kKeyframe,
                                      FrameType::kStartFrame,
                                      FrameType::kDeltaFrame}},
       .input_constraints = {
               .min = {.width = 64, .height = 36},
               .max = {.width = 3840, .height = 2160},
               .pixel_alignment = 1,
               .input_formats = {kSupportedInputFormats.begin(),
                                 kSupportedInputFormats.end()},
           },
       .encoding_formats = {{.sub_sampling = EncodingFormat::k420,
                             .bit_depth = 8}},
       .rate_control = {
            .qp_range = {0, kMaxQp},
            .rc_modes = {VideoEncoderFactoryInterface::RateControlMode::kCbr,
                         VideoEncoderFactoryInterface::RateControlMode::kCqp}},
       .performance = {.encode_on_calling_thread = true,
                       .min_max_effort_level = {kMinEffortLevel,
                                                kMaxEffortLevel}},
   };
 }
 // clang-format on

 std::unique_ptr<VideoEncoderInterface> LibaomAv1EncoderFactory::CreateEncoder(
     const StaticEncoderSettings& settings,
     const std::map<std::string, std::string>& encoder_specific_settings) {
   auto encoder = std::make_unique<LibaomAv1Encoder>();
   if (!encoder->InitEncode(settings, encoder_specific_settings)) {
     return nullptr;
   }
   return encoder;
 }

 }  // namespace webrtc