Introduce layering controller interface for av1 encoder
Add TODOs into AV1 encoder wrapper where it suppose to be used.
Bug: webrtc:11404
Change-Id: If049066b84be72829867d5084827a7d275648a7b
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/174806
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31278}
diff --git a/modules/video_coding/codecs/av1/BUILD.gn b/modules/video_coding/codecs/av1/BUILD.gn
index b2b82d4..e6b689b 100644
--- a/modules/video_coding/codecs/av1/BUILD.gn
+++ b/modules/video_coding/codecs/av1/BUILD.gn
@@ -36,11 +36,27 @@
}
}
+rtc_source_set("scalable_video_controller") {
+ sources = [
+ "scalable_video_controller.h",
+ "scalable_video_controller_no_layering.cc",
+ "scalable_video_controller_no_layering.h",
+ ]
+ deps = [
+ "../../../../api/transport/rtp:dependency_descriptor",
+ "../../../../common_video/generic_frame_descriptor",
+ "../../../../rtc_base:checks",
+ "//third_party/abseil-cpp/absl/container:inlined_vector",
+ "//third_party/abseil-cpp/absl/types:optional",
+ ]
+}
+
rtc_library("libaom_av1_encoder") {
visibility = [ "*" ]
poisonous = [ "software_video_codecs" ]
public = [ "libaom_av1_encoder.h" ]
deps = [
+ ":scalable_video_controller",
"../../../../api/video_codecs:video_codecs_api",
"//third_party/abseil-cpp/absl/base:core_headers",
]
@@ -79,6 +95,8 @@
"../..:video_codec_interface",
"../../../../api:create_frame_generator",
"../../../../api:frame_generator_api",
+ "../../../../api:mock_video_encoder",
+ "../../../../api/video:video_frame_i420",
"../../../../api/video_codecs:video_codecs_api",
"../../../../test:test_support",
"//third_party/abseil-cpp/absl/types:optional",
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
index 59ad127..6a01165 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@@ -13,6 +13,7 @@
#include <stdint.h>
#include <memory>
+#include <utility>
#include <vector>
#include "absl/algorithm/container.h"
@@ -22,6 +23,8 @@
#include "api/video/video_frame.h"
#include "api/video_codecs/video_codec.h"
#include "api/video_codecs/video_encoder.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h"
#include "modules/video_coding/include/video_codec_interface.h"
#include "modules/video_coding/include/video_error_codes.h"
#include "rtc_base/checks.h"
@@ -47,7 +50,8 @@
class LibaomAv1Encoder final : public VideoEncoder {
public:
- LibaomAv1Encoder();
+ explicit LibaomAv1Encoder(
+ std::unique_ptr<ScalableVideoController> svc_controller);
~LibaomAv1Encoder();
int InitEncode(const VideoCodec* codec_settings,
@@ -66,6 +70,7 @@
EncoderInfo GetEncoderInfo() const override;
private:
+ const std::unique_ptr<ScalableVideoController> svc_controller_;
bool inited_;
bool keyframe_required_;
VideoCodec encoder_settings_;
@@ -100,11 +105,15 @@
return WEBRTC_VIDEO_CODEC_OK;
}
-LibaomAv1Encoder::LibaomAv1Encoder()
- : inited_(false),
+LibaomAv1Encoder::LibaomAv1Encoder(
+ std::unique_ptr<ScalableVideoController> svc_controller)
+ : svc_controller_(std::move(svc_controller)),
+ inited_(false),
keyframe_required_(true),
frame_for_encode_(nullptr),
- encoded_image_callback_(nullptr) {}
+ encoded_image_callback_(nullptr) {
+ RTC_DCHECK(svc_controller_);
+}
LibaomAv1Encoder::~LibaomAv1Encoder() {
Release();
@@ -205,6 +214,11 @@
return WEBRTC_VIDEO_CODEC_ERROR;
}
+ ScalableVideoController::StreamLayersConfig svc_config =
+ svc_controller_->StreamConfig();
+ // TODO(danilchap): Configure SVC.
+ (void)svc_config;
+
return WEBRTC_VIDEO_CODEC_OK;
}
@@ -239,6 +253,14 @@
frame_types != nullptr &&
absl::c_linear_search(*frame_types, VideoFrameType::kVideoFrameKey);
+ std::vector<ScalableVideoController::LayerFrameConfig> layer_frames =
+ svc_controller_->NextFrameConfig(keyframe_required_);
+
+ if (layer_frames.empty()) {
+ RTC_LOG(LS_ERROR) << "SVCController returned no configuration for a frame.";
+ return WEBRTC_VIDEO_CODEC_ERROR;
+ }
+
// Convert input frame to I420, if needed.
VideoFrame prepped_input_frame = frame;
if (prepped_input_frame.video_frame_buffer()->type() !=
@@ -263,75 +285,94 @@
const uint32_t duration =
kRtpTicksPerSecond / static_cast<float>(encoder_settings_.maxFramerate);
- aom_enc_frame_flags_t flags = (keyframe_required_) ? AOM_EFLAG_FORCE_KF : 0;
- // Encode a frame.
- aom_codec_err_t ret = aom_codec_encode(&ctx_, frame_for_encode_,
- frame.timestamp(), duration, flags);
- if (ret != AOM_CODEC_OK) {
- RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret
- << " on aom_codec_encode.";
- return WEBRTC_VIDEO_CODEC_ERROR;
- }
+ // TODO(danilchap): Remove this checks when layering is implemented.
+ RTC_DCHECK_EQ(layer_frames.size(), 1);
+ for (ScalableVideoController::LayerFrameConfig& layer_frame : layer_frames) {
+ aom_enc_frame_flags_t flags =
+ layer_frame.is_keyframe ? AOM_EFLAG_FORCE_KF : 0;
- // Get encoded image data.
- EncodedImage encoded_image;
- encoded_image._completeFrame = true;
- aom_codec_iter_t iter = nullptr;
- int data_pkt_count = 0;
- while (const aom_codec_cx_pkt_t* pkt = aom_codec_get_cx_data(&ctx_, &iter)) {
- if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
- if (data_pkt_count > 0) {
- RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than "
- "one data packet for an input video frame.";
- Release();
- }
- // TODO(bugs.webrtc.org/11174): Remove this hack when
- // webrtc_pc_e2e::SingleProcessEncodedImageDataInjector not used or fixed
- // not to assume that encoded image transfered as is.
- const uint8_t* data = static_cast<const uint8_t*>(pkt->data.frame.buf);
- size_t size = pkt->data.frame.sz;
- if (size > 2 && data[0] == 0b0'0010'010 && data[1] == 0) {
- // Typically frame starts with a Temporal Delimter OBU of size 0 that is
- // not need by any component in webrtc and discarded during rtp
- // packetization. Before discarded it confuses test framework that
- // assumes received encoded frame is exactly same as sent frame.
- data += 2;
- size -= 2;
- }
- encoded_image.SetEncodedData(EncodedImageBuffer::Create(data, size));
+ // TODO(danilchap): configure buffers and layers based on
+ // `layer_frame.buffers` when layering is enabled.
- bool is_key_frame = ((pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0);
- encoded_image._frameType = is_key_frame
- ? VideoFrameType::kVideoFrameKey
- : VideoFrameType::kVideoFrameDelta;
- encoded_image.SetTimestamp(frame.timestamp());
- encoded_image.capture_time_ms_ = frame.render_time_ms();
- encoded_image.rotation_ = frame.rotation();
- encoded_image.content_type_ = VideoContentType::UNSPECIFIED;
- // If encoded image width/height info are added to aom_codec_cx_pkt_t,
- // use those values in lieu of the values in frame.
- encoded_image._encodedHeight = frame.height();
- encoded_image._encodedWidth = frame.width();
- encoded_image.timing_.flags = VideoSendTiming::kInvalid;
- int qp = -1;
- ret = aom_codec_control(&ctx_, AOME_GET_LAST_QUANTIZER, &qp);
- if (ret != AOM_CODEC_OK) {
- RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret
- << " on control AOME_GET_LAST_QUANTIZER.";
- return WEBRTC_VIDEO_CODEC_ERROR;
- }
- encoded_image.qp_ = qp;
- encoded_image.SetColorSpace(frame.color_space());
- ++data_pkt_count;
+ // Encode a frame.
+ aom_codec_err_t ret = aom_codec_encode(&ctx_, frame_for_encode_,
+ frame.timestamp(), duration, flags);
+ if (ret != AOM_CODEC_OK) {
+ RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret
+ << " on aom_codec_encode.";
+ return WEBRTC_VIDEO_CODEC_ERROR;
}
- }
- // Deliver encoded image data.
- if (encoded_image.size() > 0) {
- CodecSpecificInfo codec_specific_info;
- encoded_image_callback_->OnEncodedImage(encoded_image, &codec_specific_info,
- nullptr);
+ // Get encoded image data.
+ EncodedImage encoded_image;
+ encoded_image._completeFrame = true;
+ aom_codec_iter_t iter = nullptr;
+ int data_pkt_count = 0;
+ while (const aom_codec_cx_pkt_t* pkt =
+ aom_codec_get_cx_data(&ctx_, &iter)) {
+ if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
+ if (data_pkt_count > 0) {
+ RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than "
+ "one data packet for an input video frame.";
+ Release();
+ }
+ // TODO(bugs.webrtc.org/11174): Remove this hack when
+ // webrtc_pc_e2e::SingleProcessEncodedImageDataInjector not used or
+ // fixed not to assume that encoded image transfered as is.
+ const uint8_t* data = static_cast<const uint8_t*>(pkt->data.frame.buf);
+ size_t size = pkt->data.frame.sz;
+ if (size > 2 && data[0] == 0b0'0010'010 && data[1] == 0) {
+ // Typically frame starts with a Temporal Delimter OBU of size 0 that
+ // is not need by any component in webrtc and discarded during rtp
+ // packetization. Before discarded it confuses test framework that
+ // assumes received encoded frame is exactly same as sent frame.
+ data += 2;
+ size -= 2;
+ }
+ encoded_image.SetEncodedData(EncodedImageBuffer::Create(data, size));
+
+ layer_frame.is_keyframe =
+ ((pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0);
+ encoded_image._frameType = layer_frame.is_keyframe
+ ? VideoFrameType::kVideoFrameKey
+ : VideoFrameType::kVideoFrameDelta;
+ encoded_image.SetTimestamp(frame.timestamp());
+ encoded_image.capture_time_ms_ = frame.render_time_ms();
+ encoded_image.rotation_ = frame.rotation();
+ encoded_image.content_type_ = VideoContentType::UNSPECIFIED;
+ // If encoded image width/height info are added to aom_codec_cx_pkt_t,
+ // use those values in lieu of the values in frame.
+ encoded_image._encodedHeight = frame.height();
+ encoded_image._encodedWidth = frame.width();
+ encoded_image.timing_.flags = VideoSendTiming::kInvalid;
+ int qp = -1;
+ ret = aom_codec_control(&ctx_, AOME_GET_LAST_QUANTIZER, &qp);
+ if (ret != AOM_CODEC_OK) {
+ RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret
+ << " on control AOME_GET_LAST_QUANTIZER.";
+ return WEBRTC_VIDEO_CODEC_ERROR;
+ }
+ encoded_image.qp_ = qp;
+ encoded_image.SetColorSpace(frame.color_space());
+ ++data_pkt_count;
+ }
+ }
+
+ // Deliver encoded image data.
+ if (encoded_image.size() > 0) {
+ CodecSpecificInfo codec_specific_info;
+ codec_specific_info.codecType = kVideoCodecAV1;
+ bool is_keyframe = layer_frame.is_keyframe;
+ codec_specific_info.generic_frame_info =
+ svc_controller_->OnEncodeDone(std::move(layer_frame));
+ if (is_keyframe && codec_specific_info.generic_frame_info) {
+ codec_specific_info.template_structure =
+ svc_controller_->DependencyStructure();
+ }
+ encoded_image_callback_->OnEncodedImage(encoded_image,
+ &codec_specific_info, nullptr);
+ }
}
return WEBRTC_VIDEO_CODEC_OK;
@@ -389,7 +430,13 @@
const bool kIsLibaomAv1EncoderSupported = true;
std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder() {
- return std::make_unique<LibaomAv1Encoder>();
+ return std::make_unique<LibaomAv1Encoder>(
+ std::make_unique<ScalableVideoControllerNoLayering>());
+}
+
+std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder(
+ std::unique_ptr<ScalableVideoController> svc_controller) {
+ return std::make_unique<LibaomAv1Encoder>(std::move(svc_controller));
}
} // namespace webrtc
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.h b/modules/video_coding/codecs/av1/libaom_av1_encoder.h
index 4b0ee28..c2f04e6 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.h
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.h
@@ -14,12 +14,15 @@
#include "absl/base/attributes.h"
#include "api/video_codecs/video_encoder.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller.h"
namespace webrtc {
ABSL_CONST_INIT extern const bool kIsLibaomAv1EncoderSupported;
std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder();
+std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder(
+ std::unique_ptr<ScalableVideoController> controller);
} // namespace webrtc
diff --git a/modules/video_coding/codecs/av1/scalable_video_controller.h b/modules/video_coding/codecs/av1/scalable_video_controller.h
new file mode 100644
index 0000000..dec985f
--- /dev/null
+++ b/modules/video_coding/codecs/av1/scalable_video_controller.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_H_
+#define MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_H_
+
+#include <vector>
+
+#include "absl/container/inlined_vector.h"
+#include "absl/types/optional.h"
+#include "api/transport/rtp/dependency_descriptor.h"
+#include "common_video/generic_frame_descriptor/generic_frame_info.h"
+
+namespace webrtc {
+
+// Controls how video should be encoded to be scalable. Outputs results as
+// buffer usage configuration for encoder and enough details to communicate the
+// scalability structure via dependency descriptor rtp header extension.
+class ScalableVideoController {
+ public:
+ struct StreamLayersConfig {
+ int num_spatial_layers = 1;
+ int num_temporal_layers = 1;
+ };
+ struct LayerFrameConfig {
+ // Id to match configuration returned by NextFrameConfig with
+ // (possibly modified) configuration passed back via OnEncoderDone.
+ // The meaning of the id is an implementation detail of
+ // the ScalableVideoController.
+ int id = 0;
+
+ // Indication frame should be encoded as a key frame. In particular when
+ // `is_keyframe=true` property `CodecBufferUsage::referenced` should be
+ // ignored and treated as false.
+ bool is_keyframe = false;
+
+ int spatial_id = 0;
+ int temporal_id = 0;
+ // Describes how encoder which buffers encoder allowed to reference and
+ // which buffers encoder should update.
+ absl::InlinedVector<CodecBufferUsage, kMaxEncoderBuffers> buffers;
+ };
+
+ virtual ~ScalableVideoController() = default;
+
+ // Returns video structure description for encoder to configure itself.
+ virtual StreamLayersConfig StreamConfig() const = 0;
+
+ // Returns video structure description in format compatible with
+ // dependency descriptor rtp header extension.
+ virtual FrameDependencyStructure DependencyStructure() const = 0;
+
+ // When `restart` is true, first `LayerFrameConfig` should have `is_keyframe`
+ // set to true.
+ // Returned vector shouldn't be empty.
+ virtual std::vector<LayerFrameConfig> NextFrameConfig(bool restart) = 0;
+
+ // Returns configuration to pass to EncoderCallback.
+ virtual absl::optional<GenericFrameInfo> OnEncodeDone(
+ LayerFrameConfig config) = 0;
+};
+
+} // namespace webrtc
+
+#endif // MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_H_
diff --git a/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.cc b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.cc
new file mode 100644
index 0000000..6b63ca4
--- /dev/null
+++ b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h"
+
+#include <utility>
+#include <vector>
+
+#include "api/transport/rtp/dependency_descriptor.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+ScalableVideoControllerNoLayering::~ScalableVideoControllerNoLayering() =
+ default;
+
+ScalableVideoController::StreamLayersConfig
+ScalableVideoControllerNoLayering::StreamConfig() const {
+ StreamLayersConfig result;
+ result.num_spatial_layers = 1;
+ result.num_temporal_layers = 1;
+ return result;
+}
+
+FrameDependencyStructure
+ScalableVideoControllerNoLayering::DependencyStructure() const {
+ FrameDependencyStructure structure;
+ structure.num_decode_targets = 1;
+ FrameDependencyTemplate a_template;
+ a_template.decode_target_indications = {DecodeTargetIndication::kSwitch};
+ structure.templates.push_back(a_template);
+ return structure;
+}
+
+std::vector<ScalableVideoController::LayerFrameConfig>
+ScalableVideoControllerNoLayering::NextFrameConfig(bool restart) {
+ if (restart) {
+ start_ = true;
+ }
+ std::vector<LayerFrameConfig> result(1);
+ result[0].id = 0;
+ result[0].is_keyframe = start_;
+ result[0].buffers = {{/*id=*/0, /*references=*/!start_, /*updates=*/true}};
+
+ start_ = false;
+ return result;
+}
+
+absl::optional<GenericFrameInfo>
+ScalableVideoControllerNoLayering::OnEncodeDone(LayerFrameConfig config) {
+ RTC_DCHECK_EQ(config.id, 0);
+ absl::optional<GenericFrameInfo> frame_info(absl::in_place);
+ frame_info->encoder_buffers = std::move(config.buffers);
+ if (config.is_keyframe) {
+ for (auto& buffer : frame_info->encoder_buffers) {
+ buffer.referenced = false;
+ }
+ }
+ frame_info->decode_target_indications = {DecodeTargetIndication::kSwitch};
+ return frame_info;
+}
+
+} // namespace webrtc
diff --git a/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h
new file mode 100644
index 0000000..ad73098
--- /dev/null
+++ b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_
+#define MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_
+
+#include <vector>
+
+#include "api/transport/rtp/dependency_descriptor.h"
+#include "common_video/generic_frame_descriptor/generic_frame_info.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller.h"
+
+namespace webrtc {
+
+class ScalableVideoControllerNoLayering : public ScalableVideoController {
+ public:
+ ~ScalableVideoControllerNoLayering() override;
+
+ StreamLayersConfig StreamConfig() const override;
+ FrameDependencyStructure DependencyStructure() const override;
+
+ std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override;
+ absl::optional<GenericFrameInfo> OnEncodeDone(
+ LayerFrameConfig config) override;
+
+ private:
+ bool start_ = true;
+};
+
+} // namespace webrtc
+
+#endif // MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_