Extract encode method of libaom av1 encoder.
This is a refactoring the extracts the core encode method and the logic
to populate EncodedImage and CodecSpecificInfo in helper methods.
This will make it easier to encode calls from several call sites in
the libaom av1 wrapper.
Bug: webrtc:443906251
Change-Id: If39685e357a0a293eceb0dfe879f1810b017542d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/408783
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Auto-Submit: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#45637}
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
index 2f48f68..403a781 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@@ -15,6 +15,7 @@
#include <numeric>
#include <optional>
#include <utility>
+#include <variant>
#include <vector>
#include "absl/algorithm/container.h"
@@ -83,6 +84,14 @@
return AOM_SUPERBLOCK_SIZE_DYNAMIC;
}
+void PopulateEncodedImageFromVideoFrame(const VideoFrame& frame,
+ EncodedImage& encoded_image) {
+ encoded_image.SetRtpTimestamp(frame.rtp_timestamp());
+ encoded_image.SetPresentationTimestamp(frame.presentation_timestamp());
+ encoded_image.capture_time_ms_ = frame.render_time_ms();
+ encoded_image.rotation_ = frame.rotation();
+ encoded_image.SetColorSpace(frame.color_space());
+}
class LibaomAv1Encoder final : public VideoEncoder {
public:
LibaomAv1Encoder(const Environment& env, LibaomAv1EncoderSettings settings);
@@ -132,6 +141,19 @@
// will be the input resolution.
void AdjustScalingFactorsForTopActiveLayer();
+ using EncodeResult = std::variant<aom_codec_err_t, EncodedImage>;
+
+ // Duration is specified in ticks based on aom_codec_enc_cfg_t::g_timebase,
+ // in practice that that is kVideoPayloadTypeFrequency (90kHz).
+ EncodeResult DoEncode(uint32_t duration,
+ aom_enc_frame_flags_t flags,
+ ScalableVideoController::LayerFrameConfig* layer_frame);
+
+ CodecSpecificInfo CreateCodecSpecificInfo(
+ const EncodedImage& image,
+ const ScalableVideoController::LayerFrameConfig& layer_frame,
+ bool end_of_picture);
+
std::unique_ptr<ScalableVideoController> svc_controller_;
std::optional<ScalabilityMode> scalability_mode_;
// Original scaling factors for all configured layers active and inactive.
@@ -766,109 +788,35 @@
SetSvcRefFrameConfig(*layer_frame);
}
- // Encode a frame. The presentation timestamp `pts` should not use real
- // timestamps from frames or the wall clock, as that can cause the rate
- // controller to misbehave.
- aom_codec_err_t ret =
- aom_codec_encode(&ctx_, frame_for_encode_, timestamp_, duration, flags);
- if (ret != AOM_CODEC_OK) {
- RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret
- << " on aom_codec_encode.";
- return WEBRTC_VIDEO_CODEC_ERROR;
+ EncodeResult result = DoEncode(duration, flags, layer_frame);
+ if (aom_codec_err_t* status = std::get_if<aom_codec_err_t>(&result);
+ status != nullptr) {
+ if (*status == AOM_CODEC_OK) {
+ // AOM_CODEC_OK means success with no image, so do nothing.
+ continue;
+ } else {
+ RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << status
+ << " on aom_codec_encode.";
+ return WEBRTC_VIDEO_CODEC_ERROR;
+ }
}
if (non_encoded_layer_frame) {
continue;
}
- // Get encoded image data.
- EncodedImage encoded_image;
- const aom_codec_cx_pkt_t* pkt;
- aom_codec_iter_t iter = nullptr;
- int data_pkt_count = 0;
- while ((pkt = aom_codec_get_cx_data(&ctx_, &iter)) != nullptr) {
- if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
- if (data_pkt_count > 0) {
- RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than "
- "one data packet for an input video frame.";
- Release();
- }
- encoded_image.SetEncodedData(EncodedImageBuffer::Create(
- /*data=*/static_cast<const uint8_t*>(pkt->data.frame.buf),
- /*size=*/pkt->data.frame.sz));
+ RTC_DCHECK(std::holds_alternative<EncodedImage>(result));
+ EncodedImage encoded_image = std::get<EncodedImage>(std::move(result));
- if ((pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0) {
- layer_frame->Keyframe();
- }
+ RTC_DCHECK_GT(encoded_image.size(), 0u);
+ PopulateEncodedImageFromVideoFrame(frame, encoded_image);
+ CodecSpecificInfo codec_specifics =
+ CreateCodecSpecificInfo(encoded_image, *layer_frame, end_of_picture);
- encoded_image._frameType = layer_frame->IsKeyframe()
- ? VideoFrameType::kVideoFrameKey
- : VideoFrameType::kVideoFrameDelta;
- encoded_image.SetRtpTimestamp(frame.rtp_timestamp());
- encoded_image.SetPresentationTimestamp(frame.presentation_timestamp());
- encoded_image.capture_time_ms_ = frame.render_time_ms();
- encoded_image.rotation_ = frame.rotation();
- encoded_image.content_type_ = VideoContentType::UNSPECIFIED;
- // If encoded image width/height info are added to aom_codec_cx_pkt_t,
- // use those values in lieu of the values in frame.
- if (svc_params_) {
- int n = scaling_factors_num_[layer_frame->SpatialId()];
- int d = scaling_factors_den_[layer_frame->SpatialId()];
- encoded_image._encodedWidth = encoder_settings_.width * n / d;
- encoded_image._encodedHeight = encoder_settings_.height * n / d;
- encoded_image.SetSpatialIndex(layer_frame->SpatialId());
- encoded_image.SetTemporalIndex(layer_frame->TemporalId());
- } else {
- encoded_image._encodedWidth = cfg_.g_w;
- encoded_image._encodedHeight = cfg_.g_h;
- }
- encoded_image.timing_.flags = VideoSendTiming::kInvalid;
-
- int qp = -1;
- SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_GET_LAST_QUANTIZER, &qp);
- encoded_image.qp_ = qp;
-
- encoded_image.SetColorSpace(frame.color_space());
- ++data_pkt_count;
- } else if (pkt->kind == AOM_CODEC_PSNR_PKT) {
- // PSNR index: 0: total, 1: Y, 2: U, 3: V
- encoded_image.set_psnr(
- EncodedImage::Psnr({.y = pkt->data.psnr.psnr[1],
- .u = pkt->data.psnr.psnr[2],
- .v = pkt->data.psnr.psnr[3]}));
- }
- }
-
- // Deliver encoded image data.
- if (encoded_image.size() > 0) {
- CodecSpecificInfo codec_specific_info;
- codec_specific_info.codecType = kVideoCodecAV1;
- codec_specific_info.end_of_picture = end_of_picture;
- codec_specific_info.scalability_mode = scalability_mode_;
- bool is_keyframe = layer_frame->IsKeyframe();
- codec_specific_info.generic_frame_info =
- svc_controller_->OnEncodeDone(*layer_frame);
- if (is_keyframe && codec_specific_info.generic_frame_info) {
- codec_specific_info.template_structure =
- svc_controller_->DependencyStructure();
- auto& resolutions = codec_specific_info.template_structure->resolutions;
- if (SvcEnabled()) {
- resolutions.resize(svc_params_->number_spatial_layers);
- for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
- int n = scaling_factors_num_[sid];
- int d = scaling_factors_den_[sid];
- resolutions[sid] =
- RenderResolution(encoder_settings_.width * n / d,
- encoder_settings_.height * n / d);
- }
- } else {
- resolutions = {RenderResolution(cfg_.g_w, cfg_.g_h)};
- }
- }
- encoded_images.emplace_back(std::move(encoded_image),
- std::move(codec_specific_info));
- }
+ encoded_images.emplace_back(std::move(encoded_image),
+ std::move(codec_specifics));
}
+
if (!encoded_images.empty()) {
encoded_images.back().second.end_of_picture = true;
}
@@ -880,6 +828,113 @@
return WEBRTC_VIDEO_CODEC_OK;
}
+LibaomAv1Encoder::EncodeResult LibaomAv1Encoder::DoEncode(
+ uint32_t duration,
+ aom_enc_frame_flags_t flags,
+ ScalableVideoController::LayerFrameConfig* layer_frame) {
+ // Encode a frame. The presentation timestamp `pts` should not use real
+ // timestamps from frames or the wall clock, as that can cause the rate
+ // controller to misbehave.
+ aom_codec_err_t ret =
+ aom_codec_encode(&ctx_, frame_for_encode_, timestamp_, duration, flags);
+ if (ret != AOM_CODEC_OK) {
+ return ret;
+ }
+
+ // Get encoded image data.
+ aom_codec_iter_t iter = nullptr;
+ int data_pkt_count = 0;
+ EncodedImage encoded_image;
+ const aom_codec_cx_pkt_t* pkt = nullptr;
+ while ((pkt = aom_codec_get_cx_data(&ctx_, &iter)) != nullptr) {
+ if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
+ if (data_pkt_count > 0) {
+ RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than "
+ "one data packet for an input video frame.";
+ Release();
+ return AOM_CODEC_ERROR;
+ }
+ encoded_image.SetEncodedData(EncodedImageBuffer::Create(
+ /*data=*/static_cast<const uint8_t*>(pkt->data.frame.buf),
+ /*size=*/pkt->data.frame.sz));
+
+ if ((pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0) {
+ layer_frame->Keyframe();
+ }
+
+ encoded_image._frameType = layer_frame->IsKeyframe()
+ ? VideoFrameType::kVideoFrameKey
+ : VideoFrameType::kVideoFrameDelta;
+
+ encoded_image.content_type_ = VideoContentType::UNSPECIFIED;
+ // If encoded image width/height info are added to aom_codec_cx_pkt_t,
+ // use those values in lieu of the values in frame.
+ if (svc_params_) {
+ int n = scaling_factors_num_[layer_frame->SpatialId()];
+ int d = scaling_factors_den_[layer_frame->SpatialId()];
+ encoded_image._encodedWidth = encoder_settings_.width * n / d;
+ encoded_image._encodedHeight = encoder_settings_.height * n / d;
+ encoded_image.SetSpatialIndex(layer_frame->SpatialId());
+ encoded_image.SetTemporalIndex(layer_frame->TemporalId());
+ } else {
+ encoded_image._encodedWidth = cfg_.g_w;
+ encoded_image._encodedHeight = cfg_.g_h;
+ }
+ encoded_image.timing_.flags = VideoSendTiming::kInvalid;
+
+ if (!SetEncoderControlParameters(AOME_GET_LAST_QUANTIZER,
+ &encoded_image.qp_)) {
+ RTC_LOG(LS_WARNING) << "Unable to fetch QP for frame.";
+ return AOM_CODEC_ERROR;
+ }
+
+ ++data_pkt_count;
+ } else if (pkt->kind == AOM_CODEC_PSNR_PKT) {
+ // PSNR index: 0: total, 1: Y, 2: U, 3: V
+ encoded_image.set_psnr(EncodedImage::Psnr({.y = pkt->data.psnr.psnr[1],
+ .u = pkt->data.psnr.psnr[2],
+ .v = pkt->data.psnr.psnr[3]}));
+ }
+ }
+
+ if (encoded_image.size() == 0) {
+ // Encode success, but no image produced.
+ return AOM_CODEC_OK;
+ }
+
+ return encoded_image;
+}
+
+CodecSpecificInfo LibaomAv1Encoder::CreateCodecSpecificInfo(
+ const EncodedImage& image,
+ const ScalableVideoController::LayerFrameConfig& layer_frame,
+ bool end_of_picture) {
+ CodecSpecificInfo codec_specific_info;
+ codec_specific_info.codecType = kVideoCodecAV1;
+ codec_specific_info.end_of_picture = end_of_picture;
+ codec_specific_info.scalability_mode = scalability_mode_;
+ bool is_keyframe = layer_frame.IsKeyframe();
+ codec_specific_info.generic_frame_info =
+ svc_controller_->OnEncodeDone(layer_frame);
+ if (is_keyframe && codec_specific_info.generic_frame_info) {
+ codec_specific_info.template_structure =
+ svc_controller_->DependencyStructure();
+ auto& resolutions = codec_specific_info.template_structure->resolutions;
+ if (SvcEnabled()) {
+ resolutions.resize(svc_params_->number_spatial_layers);
+ for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
+ int n = scaling_factors_num_[sid];
+ int d = scaling_factors_den_[sid];
+ resolutions[sid] = RenderResolution(encoder_settings_.width * n / d,
+ encoder_settings_.height * n / d);
+ }
+ } else {
+ resolutions = {RenderResolution(cfg_.g_w, cfg_.g_h)};
+ }
+ }
+ return codec_specific_info;
+}
+
void LibaomAv1Encoder::SetRates(const RateControlParameters& parameters) {
if (!inited_) {
RTC_LOG(LS_WARNING) << "SetRates() while encoder is not initialized";