Introduce layering controller interface for av1 encoder

Add TODOs into AV1 encoder wrapper where it suppose to be used.

Bug: webrtc:11404
Change-Id: If049066b84be72829867d5084827a7d275648a7b
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/174806
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31278}
diff --git a/modules/video_coding/codecs/av1/BUILD.gn b/modules/video_coding/codecs/av1/BUILD.gn
index b2b82d4..e6b689b 100644
--- a/modules/video_coding/codecs/av1/BUILD.gn
+++ b/modules/video_coding/codecs/av1/BUILD.gn
@@ -36,11 +36,27 @@
   }
 }
 
+rtc_source_set("scalable_video_controller") {
+  sources = [
+    "scalable_video_controller.h",
+    "scalable_video_controller_no_layering.cc",
+    "scalable_video_controller_no_layering.h",
+  ]
+  deps = [
+    "../../../../api/transport/rtp:dependency_descriptor",
+    "../../../../common_video/generic_frame_descriptor",
+    "../../../../rtc_base:checks",
+    "//third_party/abseil-cpp/absl/container:inlined_vector",
+    "//third_party/abseil-cpp/absl/types:optional",
+  ]
+}
+
 rtc_library("libaom_av1_encoder") {
   visibility = [ "*" ]
   poisonous = [ "software_video_codecs" ]
   public = [ "libaom_av1_encoder.h" ]
   deps = [
+    ":scalable_video_controller",
     "../../../../api/video_codecs:video_codecs_api",
     "//third_party/abseil-cpp/absl/base:core_headers",
   ]
@@ -79,6 +95,8 @@
         "../..:video_codec_interface",
         "../../../../api:create_frame_generator",
         "../../../../api:frame_generator_api",
+        "../../../../api:mock_video_encoder",
+        "../../../../api/video:video_frame_i420",
         "../../../../api/video_codecs:video_codecs_api",
         "../../../../test:test_support",
         "//third_party/abseil-cpp/absl/types:optional",
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
index 59ad127..6a01165 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@@ -13,6 +13,7 @@
 #include <stdint.h>
 
 #include <memory>
+#include <utility>
 #include <vector>
 
 #include "absl/algorithm/container.h"
@@ -22,6 +23,8 @@
 #include "api/video/video_frame.h"
 #include "api/video_codecs/video_codec.h"
 #include "api/video_codecs/video_encoder.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h"
 #include "modules/video_coding/include/video_codec_interface.h"
 #include "modules/video_coding/include/video_error_codes.h"
 #include "rtc_base/checks.h"
@@ -47,7 +50,8 @@
 
 class LibaomAv1Encoder final : public VideoEncoder {
  public:
-  LibaomAv1Encoder();
+  explicit LibaomAv1Encoder(
+      std::unique_ptr<ScalableVideoController> svc_controller);
   ~LibaomAv1Encoder();
 
   int InitEncode(const VideoCodec* codec_settings,
@@ -66,6 +70,7 @@
   EncoderInfo GetEncoderInfo() const override;
 
  private:
+  const std::unique_ptr<ScalableVideoController> svc_controller_;
   bool inited_;
   bool keyframe_required_;
   VideoCodec encoder_settings_;
@@ -100,11 +105,15 @@
   return WEBRTC_VIDEO_CODEC_OK;
 }
 
-LibaomAv1Encoder::LibaomAv1Encoder()
-    : inited_(false),
+LibaomAv1Encoder::LibaomAv1Encoder(
+    std::unique_ptr<ScalableVideoController> svc_controller)
+    : svc_controller_(std::move(svc_controller)),
+      inited_(false),
       keyframe_required_(true),
       frame_for_encode_(nullptr),
-      encoded_image_callback_(nullptr) {}
+      encoded_image_callback_(nullptr) {
+  RTC_DCHECK(svc_controller_);
+}
 
 LibaomAv1Encoder::~LibaomAv1Encoder() {
   Release();
@@ -205,6 +214,11 @@
     return WEBRTC_VIDEO_CODEC_ERROR;
   }
 
+  ScalableVideoController::StreamLayersConfig svc_config =
+      svc_controller_->StreamConfig();
+  // TODO(danilchap): Configure SVC.
+  (void)svc_config;
+
   return WEBRTC_VIDEO_CODEC_OK;
 }
 
@@ -239,6 +253,14 @@
       frame_types != nullptr &&
       absl::c_linear_search(*frame_types, VideoFrameType::kVideoFrameKey);
 
+  std::vector<ScalableVideoController::LayerFrameConfig> layer_frames =
+      svc_controller_->NextFrameConfig(keyframe_required_);
+
+  if (layer_frames.empty()) {
+    RTC_LOG(LS_ERROR) << "SVCController returned no configuration for a frame.";
+    return WEBRTC_VIDEO_CODEC_ERROR;
+  }
+
   // Convert input frame to I420, if needed.
   VideoFrame prepped_input_frame = frame;
   if (prepped_input_frame.video_frame_buffer()->type() !=
@@ -263,75 +285,94 @@
 
   const uint32_t duration =
       kRtpTicksPerSecond / static_cast<float>(encoder_settings_.maxFramerate);
-  aom_enc_frame_flags_t flags = (keyframe_required_) ? AOM_EFLAG_FORCE_KF : 0;
 
-  // Encode a frame.
-  aom_codec_err_t ret = aom_codec_encode(&ctx_, frame_for_encode_,
-                                         frame.timestamp(), duration, flags);
-  if (ret != AOM_CODEC_OK) {
-    RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret
-                        << " on aom_codec_encode.";
-    return WEBRTC_VIDEO_CODEC_ERROR;
-  }
+  // TODO(danilchap): Remove this checks when layering is implemented.
+  RTC_DCHECK_EQ(layer_frames.size(), 1);
+  for (ScalableVideoController::LayerFrameConfig& layer_frame : layer_frames) {
+    aom_enc_frame_flags_t flags =
+        layer_frame.is_keyframe ? AOM_EFLAG_FORCE_KF : 0;
 
-  // Get encoded image data.
-  EncodedImage encoded_image;
-  encoded_image._completeFrame = true;
-  aom_codec_iter_t iter = nullptr;
-  int data_pkt_count = 0;
-  while (const aom_codec_cx_pkt_t* pkt = aom_codec_get_cx_data(&ctx_, &iter)) {
-    if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
-      if (data_pkt_count > 0) {
-        RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than "
-                               "one data packet for an input video frame.";
-        Release();
-      }
-      // TODO(bugs.webrtc.org/11174): Remove this hack when
-      // webrtc_pc_e2e::SingleProcessEncodedImageDataInjector not used or fixed
-      // not to assume that encoded image transfered as is.
-      const uint8_t* data = static_cast<const uint8_t*>(pkt->data.frame.buf);
-      size_t size = pkt->data.frame.sz;
-      if (size > 2 && data[0] == 0b0'0010'010 && data[1] == 0) {
-        // Typically frame starts with a Temporal Delimter OBU of size 0 that is
-        // not need by any component in webrtc and discarded during rtp
-        // packetization. Before discarded it confuses test framework that
-        // assumes received encoded frame is exactly same as sent frame.
-        data += 2;
-        size -= 2;
-      }
-      encoded_image.SetEncodedData(EncodedImageBuffer::Create(data, size));
+    // TODO(danilchap): configure buffers and layers based on
+    // `layer_frame.buffers` when layering is enabled.
 
-      bool is_key_frame = ((pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0);
-      encoded_image._frameType = is_key_frame
-                                     ? VideoFrameType::kVideoFrameKey
-                                     : VideoFrameType::kVideoFrameDelta;
-      encoded_image.SetTimestamp(frame.timestamp());
-      encoded_image.capture_time_ms_ = frame.render_time_ms();
-      encoded_image.rotation_ = frame.rotation();
-      encoded_image.content_type_ = VideoContentType::UNSPECIFIED;
-      // If encoded image width/height info are added to aom_codec_cx_pkt_t,
-      // use those values in lieu of the values in frame.
-      encoded_image._encodedHeight = frame.height();
-      encoded_image._encodedWidth = frame.width();
-      encoded_image.timing_.flags = VideoSendTiming::kInvalid;
-      int qp = -1;
-      ret = aom_codec_control(&ctx_, AOME_GET_LAST_QUANTIZER, &qp);
-      if (ret != AOM_CODEC_OK) {
-        RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret
-                            << " on control AOME_GET_LAST_QUANTIZER.";
-        return WEBRTC_VIDEO_CODEC_ERROR;
-      }
-      encoded_image.qp_ = qp;
-      encoded_image.SetColorSpace(frame.color_space());
-      ++data_pkt_count;
+    // Encode a frame.
+    aom_codec_err_t ret = aom_codec_encode(&ctx_, frame_for_encode_,
+                                           frame.timestamp(), duration, flags);
+    if (ret != AOM_CODEC_OK) {
+      RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret
+                          << " on aom_codec_encode.";
+      return WEBRTC_VIDEO_CODEC_ERROR;
     }
-  }
 
-  // Deliver encoded image data.
-  if (encoded_image.size() > 0) {
-    CodecSpecificInfo codec_specific_info;
-    encoded_image_callback_->OnEncodedImage(encoded_image, &codec_specific_info,
-                                            nullptr);
+    // Get encoded image data.
+    EncodedImage encoded_image;
+    encoded_image._completeFrame = true;
+    aom_codec_iter_t iter = nullptr;
+    int data_pkt_count = 0;
+    while (const aom_codec_cx_pkt_t* pkt =
+               aom_codec_get_cx_data(&ctx_, &iter)) {
+      if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
+        if (data_pkt_count > 0) {
+          RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than "
+                                 "one data packet for an input video frame.";
+          Release();
+        }
+        // TODO(bugs.webrtc.org/11174): Remove this hack when
+        // webrtc_pc_e2e::SingleProcessEncodedImageDataInjector not used or
+        // fixed not to assume that encoded image transfered as is.
+        const uint8_t* data = static_cast<const uint8_t*>(pkt->data.frame.buf);
+        size_t size = pkt->data.frame.sz;
+        if (size > 2 && data[0] == 0b0'0010'010 && data[1] == 0) {
+          // Typically frame starts with a Temporal Delimter OBU of size 0 that
+          // is not need by any component in webrtc and discarded during rtp
+          // packetization. Before discarded it confuses test framework that
+          // assumes received encoded frame is exactly same as sent frame.
+          data += 2;
+          size -= 2;
+        }
+        encoded_image.SetEncodedData(EncodedImageBuffer::Create(data, size));
+
+        layer_frame.is_keyframe =
+            ((pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0);
+        encoded_image._frameType = layer_frame.is_keyframe
+                                       ? VideoFrameType::kVideoFrameKey
+                                       : VideoFrameType::kVideoFrameDelta;
+        encoded_image.SetTimestamp(frame.timestamp());
+        encoded_image.capture_time_ms_ = frame.render_time_ms();
+        encoded_image.rotation_ = frame.rotation();
+        encoded_image.content_type_ = VideoContentType::UNSPECIFIED;
+        // If encoded image width/height info are added to aom_codec_cx_pkt_t,
+        // use those values in lieu of the values in frame.
+        encoded_image._encodedHeight = frame.height();
+        encoded_image._encodedWidth = frame.width();
+        encoded_image.timing_.flags = VideoSendTiming::kInvalid;
+        int qp = -1;
+        ret = aom_codec_control(&ctx_, AOME_GET_LAST_QUANTIZER, &qp);
+        if (ret != AOM_CODEC_OK) {
+          RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret
+                              << " on control AOME_GET_LAST_QUANTIZER.";
+          return WEBRTC_VIDEO_CODEC_ERROR;
+        }
+        encoded_image.qp_ = qp;
+        encoded_image.SetColorSpace(frame.color_space());
+        ++data_pkt_count;
+      }
+    }
+
+    // Deliver encoded image data.
+    if (encoded_image.size() > 0) {
+      CodecSpecificInfo codec_specific_info;
+      codec_specific_info.codecType = kVideoCodecAV1;
+      bool is_keyframe = layer_frame.is_keyframe;
+      codec_specific_info.generic_frame_info =
+          svc_controller_->OnEncodeDone(std::move(layer_frame));
+      if (is_keyframe && codec_specific_info.generic_frame_info) {
+        codec_specific_info.template_structure =
+            svc_controller_->DependencyStructure();
+      }
+      encoded_image_callback_->OnEncodedImage(encoded_image,
+                                              &codec_specific_info, nullptr);
+    }
   }
 
   return WEBRTC_VIDEO_CODEC_OK;
@@ -389,7 +430,13 @@
 const bool kIsLibaomAv1EncoderSupported = true;
 
 std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder() {
-  return std::make_unique<LibaomAv1Encoder>();
+  return std::make_unique<LibaomAv1Encoder>(
+      std::make_unique<ScalableVideoControllerNoLayering>());
+}
+
+std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder(
+    std::unique_ptr<ScalableVideoController> svc_controller) {
+  return std::make_unique<LibaomAv1Encoder>(std::move(svc_controller));
 }
 
 }  // namespace webrtc
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.h b/modules/video_coding/codecs/av1/libaom_av1_encoder.h
index 4b0ee28..c2f04e6 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.h
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.h
@@ -14,12 +14,15 @@
 
 #include "absl/base/attributes.h"
 #include "api/video_codecs/video_encoder.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller.h"
 
 namespace webrtc {
 
 ABSL_CONST_INIT extern const bool kIsLibaomAv1EncoderSupported;
 
 std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder();
+std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder(
+    std::unique_ptr<ScalableVideoController> controller);
 
 }  // namespace webrtc
 
diff --git a/modules/video_coding/codecs/av1/scalable_video_controller.h b/modules/video_coding/codecs/av1/scalable_video_controller.h
new file mode 100644
index 0000000..dec985f
--- /dev/null
+++ b/modules/video_coding/codecs/av1/scalable_video_controller.h
@@ -0,0 +1,71 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_H_
+#define MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_H_
+
+#include <vector>
+
+#include "absl/container/inlined_vector.h"
+#include "absl/types/optional.h"
+#include "api/transport/rtp/dependency_descriptor.h"
+#include "common_video/generic_frame_descriptor/generic_frame_info.h"
+
+namespace webrtc {
+
+// Controls how video should be encoded to be scalable. Outputs results as
+// buffer usage configuration for encoder and enough details to communicate the
+// scalability structure via dependency descriptor rtp header extension.
+class ScalableVideoController {
+ public:
+  struct StreamLayersConfig {
+    int num_spatial_layers = 1;
+    int num_temporal_layers = 1;
+  };
+  struct LayerFrameConfig {
+    // Id to match configuration returned by NextFrameConfig with
+    // (possibly modified) configuration passed back via OnEncoderDone.
+    // The meaning of the id is an implementation detail of
+    // the ScalableVideoController.
+    int id = 0;
+
+    // Indication frame should be encoded as a key frame. In particular when
+    // `is_keyframe=true` property `CodecBufferUsage::referenced` should be
+    // ignored and treated as false.
+    bool is_keyframe = false;
+
+    int spatial_id = 0;
+    int temporal_id = 0;
+    // Describes how encoder which buffers encoder allowed to reference and
+    // which buffers encoder should update.
+    absl::InlinedVector<CodecBufferUsage, kMaxEncoderBuffers> buffers;
+  };
+
+  virtual ~ScalableVideoController() = default;
+
+  // Returns video structure description for encoder to configure itself.
+  virtual StreamLayersConfig StreamConfig() const = 0;
+
+  // Returns video structure description in format compatible with
+  // dependency descriptor rtp header extension.
+  virtual FrameDependencyStructure DependencyStructure() const = 0;
+
+  // When `restart` is true, first `LayerFrameConfig` should have `is_keyframe`
+  // set to true.
+  // Returned vector shouldn't be empty.
+  virtual std::vector<LayerFrameConfig> NextFrameConfig(bool restart) = 0;
+
+  // Returns configuration to pass to EncoderCallback.
+  virtual absl::optional<GenericFrameInfo> OnEncodeDone(
+      LayerFrameConfig config) = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_H_
diff --git a/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.cc b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.cc
new file mode 100644
index 0000000..6b63ca4
--- /dev/null
+++ b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.cc
@@ -0,0 +1,69 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h"
+
+#include <utility>
+#include <vector>
+
+#include "api/transport/rtp/dependency_descriptor.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+ScalableVideoControllerNoLayering::~ScalableVideoControllerNoLayering() =
+    default;
+
+ScalableVideoController::StreamLayersConfig
+ScalableVideoControllerNoLayering::StreamConfig() const {
+  StreamLayersConfig result;
+  result.num_spatial_layers = 1;
+  result.num_temporal_layers = 1;
+  return result;
+}
+
+FrameDependencyStructure
+ScalableVideoControllerNoLayering::DependencyStructure() const {
+  FrameDependencyStructure structure;
+  structure.num_decode_targets = 1;
+  FrameDependencyTemplate a_template;
+  a_template.decode_target_indications = {DecodeTargetIndication::kSwitch};
+  structure.templates.push_back(a_template);
+  return structure;
+}
+
+std::vector<ScalableVideoController::LayerFrameConfig>
+ScalableVideoControllerNoLayering::NextFrameConfig(bool restart) {
+  if (restart) {
+    start_ = true;
+  }
+  std::vector<LayerFrameConfig> result(1);
+  result[0].id = 0;
+  result[0].is_keyframe = start_;
+  result[0].buffers = {{/*id=*/0, /*references=*/!start_, /*updates=*/true}};
+
+  start_ = false;
+  return result;
+}
+
+absl::optional<GenericFrameInfo>
+ScalableVideoControllerNoLayering::OnEncodeDone(LayerFrameConfig config) {
+  RTC_DCHECK_EQ(config.id, 0);
+  absl::optional<GenericFrameInfo> frame_info(absl::in_place);
+  frame_info->encoder_buffers = std::move(config.buffers);
+  if (config.is_keyframe) {
+    for (auto& buffer : frame_info->encoder_buffers) {
+      buffer.referenced = false;
+    }
+  }
+  frame_info->decode_target_indications = {DecodeTargetIndication::kSwitch};
+  return frame_info;
+}
+
+}  // namespace webrtc
diff --git a/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h
new file mode 100644
index 0000000..ad73098
--- /dev/null
+++ b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h
@@ -0,0 +1,38 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_
+#define MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_
+
+#include <vector>
+
+#include "api/transport/rtp/dependency_descriptor.h"
+#include "common_video/generic_frame_descriptor/generic_frame_info.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller.h"
+
+namespace webrtc {
+
+class ScalableVideoControllerNoLayering : public ScalableVideoController {
+ public:
+  ~ScalableVideoControllerNoLayering() override;
+
+  StreamLayersConfig StreamConfig() const override;
+  FrameDependencyStructure DependencyStructure() const override;
+
+  std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override;
+  absl::optional<GenericFrameInfo> OnEncodeDone(
+      LayerFrameConfig config) override;
+
+ private:
+  bool start_ = true;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_