Implement external scaling/premapping for video

A new API allows webrtc to request the FrameBuffer to prepare for
software readback at the given resolution.
It's done asynchronously because underlying mechanism in chromium is
asynchronous and can take a long time. This way there will be no
artificial bottleneck in the pipeline.

This CL is a no-op itself, as no FrameBuffer implemnetation overrides
the `PrepareMappedBufferAsync()` yet.

Bug: chromium:397485312
Change-Id: I9670c17aa3041ef2107d932e0e6eaefeee851e25
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/406260
Reviewed-by: Erik Språng <sprang@webrtc.org>
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#45556}
diff --git a/api/video/video_frame_buffer.cc b/api/video/video_frame_buffer.cc
index b142814..7b45658 100644
--- a/api/video/video_frame_buffer.cc
+++ b/api/video/video_frame_buffer.cc
@@ -10,6 +10,7 @@
 
 #include "api/video/video_frame_buffer.h"
 
+#include <cstddef>
 #include <string>
 
 #include "api/array_view.h"
@@ -36,6 +37,16 @@
   return result;
 }
 
+void VideoFrameBuffer::PrepareMappedBufferAsync(
+    size_t width,
+    size_t height,
+    scoped_refptr<PreparedFrameHandler> handler,
+    size_t frame_identifier) {
+  // Default implementation can't do any preparations,
+  // so it just invokes the callback immediately.
+  handler->OnFramePrepared(frame_identifier);
+}
+
 const I420BufferInterface* VideoFrameBuffer::GetI420() const {
   // Overridden by subclasses that can return an I420 buffer without any
   // conversion, in particular, I420BufferInterface.
diff --git a/api/video/video_frame_buffer.h b/api/video/video_frame_buffer.h
index 1fbaa53..b160ab8 100644
--- a/api/video/video_frame_buffer.h
+++ b/api/video/video_frame_buffer.h
@@ -11,6 +11,7 @@
 #ifndef API_VIDEO_VIDEO_FRAME_BUFFER_H_
 #define API_VIDEO_VIDEO_FRAME_BUFFER_H_
 
+#include <cstddef>
 #include <cstdint>
 #include <string>
 
@@ -47,6 +48,11 @@
 // VideoFrame, and not here.
 class RTC_EXPORT VideoFrameBuffer : public RefCountInterface {
  public:
+  class RTC_EXPORT PreparedFrameHandler : public webrtc::RefCountInterface {
+   public:
+    virtual void OnFramePrepared(size_t frame_identifier) = 0;
+  };
+
   // New frame buffer types will be added conservatively when there is an
   // opportunity to optimize the path between some pair of video source and
   // video sink.
@@ -128,6 +134,15 @@
   // For logging: returns a textual representation of the storage.
   virtual std::string storage_representation() const;
 
+  // Informs the buffer about the maximum resolution for the upcoming
+  // `GetMappedBuffer()` calls.
+  //
+  virtual void PrepareMappedBufferAsync(
+      size_t width,
+      size_t height,
+      scoped_refptr<PreparedFrameHandler> handler,
+      size_t frame_identifier);
+
  protected:
   ~VideoFrameBuffer() override {}
 };
diff --git a/api/video_codecs/video_encoder.cc b/api/video_codecs/video_encoder.cc
index 3180770..0c25a2f 100644
--- a/api/video_codecs/video_encoder.cc
+++ b/api/video_codecs/video_encoder.cc
@@ -189,6 +189,10 @@
   if (is_qp_trusted.has_value()) {
     oss << ", is_qp_trusted = " << is_qp_trusted.value();
   }
+  if (mapped_resolution.has_value()) {
+    oss << ", mapped_resolution = " << mapped_resolution->width << " x "
+        << mapped_resolution->height;
+  }
   oss << "}";
   return oss.str();
 }
diff --git a/api/video_codecs/video_encoder.h b/api/video_codecs/video_encoder.h
index 44a7516..1a8f895 100644
--- a/api/video_codecs/video_encoder.h
+++ b/api/video_codecs/video_encoder.h
@@ -152,6 +152,12 @@
     }
   };
 
+  struct RTC_EXPORT Resolution {
+    Resolution(int width, int height) : width(width), height(height) {}
+    int width = 0;
+    int height = 0;
+  };
+
   // Struct containing metadata about the encoder implementing this interface.
   struct RTC_EXPORT EncoderInfo {
     static constexpr uint8_t kMaxFramerateFraction =
@@ -266,6 +272,14 @@
     // configuration. This may be used to determine if the encoder has reached
     // its target video quality for static screenshare content.
     std::optional<int> min_qp;
+
+    // Maximum resolution accessed by software encoder,
+    // i.e. resolution needed for cpu readable image.
+    // This has to be set by software encoders.
+    // If it's not set, mapping will happen during the
+    // encode time, otherwise more optimal implementation
+    // specific path may be used.
+    std::optional<Resolution> mapped_resolution;
   };
 
   struct RTC_EXPORT RateControlParameters {
diff --git a/media/engine/simulcast_encoder_adapter.cc b/media/engine/simulcast_encoder_adapter.cc
index 4120131..d715eb9 100644
--- a/media/engine/simulcast_encoder_adapter.cc
+++ b/media/engine/simulcast_encoder_adapter.cc
@@ -965,6 +965,14 @@
     if (!stream_contexts_[i].is_paused()) {
       encoder_names.push_back(encoder_impl_info.implementation_name);
     }
+
+    if (encoder_impl_info.mapped_resolution.has_value() &&
+        (!encoder_info.mapped_resolution.has_value() ||
+         encoder_info.mapped_resolution->width <
+             encoder_impl_info.mapped_resolution->width)) {
+      encoder_info.mapped_resolution = encoder_impl_info.mapped_resolution;
+    }
+
     if (i == 0) {
       encoder_info.supports_native_handle =
           encoder_impl_info.supports_native_handle;
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
index bd6f52a..2f48f68 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@@ -941,6 +941,9 @@
           : VideoEncoder::ScalingSettings(kLowQindex, kHighQindex);
   info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420,
                                   VideoFrameBuffer::Type::kNV12};
+  if (inited_) {
+    info.mapped_resolution = VideoEncoder::Resolution(cfg_.g_w, cfg_.g_h);
+  }
   if (SvcEnabled()) {
     for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
       info.fps_allocation[sid].resize(svc_params_->number_temporal_layers);
diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.cc b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
index 3cb94ad..0a80bee 100644
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
@@ -756,6 +756,10 @@
   info.implementation_name = "OpenH264";
   info.scaling_settings =
       VideoEncoder::ScalingSettings(kLowH264QpThreshold, kHighH264QpThreshold);
+  if (!configurations_.empty()) {
+    info.mapped_resolution = VideoEncoder::Resolution(
+        configurations_.back().width, configurations_.back().height);
+  }
   info.is_hardware_accelerated = false;
   info.supports_simulcast = true;
   info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420};
diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
index f01142e..5edfecc 100644
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
@@ -1421,6 +1421,9 @@
       }
     }
 
+    info.mapped_resolution =
+        VideoEncoder::Resolution(raw_images_[0].d_w, raw_images_[0].d_h);
+
     if (codec_.mode == VideoCodecMode::kScreensharing) {
       info.min_qp = kScreenshareMinQp;
     }
diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
index cecb34a..d2c14f7 100644
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
@@ -1881,6 +1881,10 @@
         max_fps = codec_.spatialLayers[si].maxFramerate;
       }
     }
+    if (num_active_spatial_layers_ > 0) {
+      info.mapped_resolution =
+          VideoEncoder::Resolution(config_->g_w, config_->g_h);
+    }
 
     for (size_t si = 0; si < num_spatial_layers_; ++si) {
       info.fps_allocation[si].clear();
diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc
index 4ae9694..876c5cb 100644
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@@ -79,6 +79,7 @@
 #include "rtc_base/logging.h"
 #include "rtc_base/numerics/safe_conversions.h"
 #include "rtc_base/strings/string_builder.h"
+#include "rtc_base/synchronization/mutex.h"
 #include "rtc_base/system/no_unique_address.h"
 #include "rtc_base/thread_annotations.h"
 #include "rtc_base/trace_event.h"
@@ -120,6 +121,8 @@
 
 constexpr int kDefaultMinScreenSharebps = 1200000;
 
+constexpr int kMaxFramesInPreparation = 10;
+
 int GetNumSpatialLayers(const VideoCodec& codec) {
   if (codec.codecType == kVideoCodecVP9) {
     return codec.VP9().numberOfSpatialLayers;
@@ -600,6 +603,23 @@
 
 }  //  namespace
 
+VideoStreamEncoder::PreparedFramesProcessor::PreparedFramesProcessor(
+    VideoStreamEncoder* parent)
+    : parent_(parent) {}
+
+void VideoStreamEncoder::PreparedFramesProcessor::StopCallbacks() {
+  MutexLock lock(&lock_);
+  parent_ = nullptr;
+}
+
+void VideoStreamEncoder::PreparedFramesProcessor::OnFramePrepared(
+    size_t frame_identifier) {
+  MutexLock lock(&lock_);
+  if (parent_) {
+    parent_->OnFramePrepared(frame_identifier);
+  }
+}
+
 VideoStreamEncoder::EncoderRateSettings::EncoderRateSettings()
     : rate_control(), encoder_target(DataRate::Zero()) {}
 
@@ -736,7 +756,9 @@
           ParseVp9LowTierCoreCountThreshold(env_.field_trials())),
       experimental_encoder_thread_limit_(
           ParseEncoderThreadLimit(env_.field_trials())),
-      encoder_queue_(std::move(encoder_queue)) {
+      encoder_queue_(std::move(encoder_queue)),
+      prepared_frames_processor_(
+          make_ref_counted<PreparedFramesProcessor>(this)) {
   TRACE_EVENT0("webrtc", "VideoStreamEncoder::VideoStreamEncoder");
   RTC_DCHECK_RUN_ON(worker_queue_);
   RTC_DCHECK(encoder_stats_observer);
@@ -773,6 +795,10 @@
   RTC_DCHECK(!video_source_sink_controller_.HasSource())
       << "Must call ::Stop() before destruction.";
 
+  // `StopCallbacks` must be called before the queue is destroyed, because
+  // ongoing notifications of prepared frames may post tasks or run on
+  // `encoder_queue_`.
+  prepared_frames_processor_->StopCallbacks();
   // The queue must be destroyed before its pointer is invalidated to avoid race
   // between destructor and running task that check if function is called on the
   // encoder_queue_.
@@ -1561,6 +1587,7 @@
                                  bool queue_overload,
                                  const VideoFrame& video_frame) {
   RTC_DCHECK_RUN_ON(encoder_queue_.get());
+
   VideoFrame incoming_frame = video_frame;
 
   // In some cases, e.g., when the frame from decoder is fed to encoder,
@@ -1617,7 +1644,7 @@
       cwnd_frame_drop_interval_ &&
       (cwnd_frame_counter_++ % cwnd_frame_drop_interval_.value() == 0);
   if (!queue_overload && !cwnd_frame_drop) {
-    MaybeEncodeVideoFrame(incoming_frame, post_time.us());
+    MaybePrepareVideoFrame(incoming_frame, post_time.us());
   } else {
     if (cwnd_frame_drop) {
       // Frame drop by congestion window pushback. Do not encode this
@@ -1804,6 +1831,62 @@
   }
 }
 
+void VideoStreamEncoder::MaybePrepareVideoFrame(const VideoFrame& video_frame,
+                                                int64_t time_when_posted_us) {
+  RTC_DCHECK_RUN_ON(encoder_queue_.get());
+  if (pending_mapped_frames_.size() > kMaxFramesInPreparation) {
+    RTC_LOG(LS_ERROR) << "To many frames are being prepared.";
+    ProcessDroppedFrame(video_frame,
+                        VideoStreamEncoderObserver::DropReason::kEncoderQueue);
+    return;
+  }
+
+  std::optional<VideoEncoder::Resolution> mapped_resolution;
+  if (encoder_) {
+    mapped_resolution = encoder_->GetEncoderInfo().mapped_resolution;
+  }
+
+  pending_mapped_frames_.push_back(
+      PreparingFrame{.frame = std::move(video_frame),
+                     .can_send = false,
+                     .frame_id = frame_counter_++,
+                     .time_when_posted_us = time_when_posted_us});
+
+  const PreparingFrame& last_frame = pending_mapped_frames_.back();
+  if (mapped_resolution.has_value()) {
+    if (mapped_resolution->width > video_frame.width() ||
+        mapped_resolution->height > video_frame.height()) {
+      mapped_resolution->width = video_frame.width();
+      mapped_resolution->height = video_frame.height();
+    }
+    last_frame.frame.video_frame_buffer()->PrepareMappedBufferAsync(
+        mapped_resolution->width, mapped_resolution->height,
+        prepared_frames_processor_, last_frame.frame_id);
+  } else {
+    OnFramePrepared(last_frame.frame_id);
+  }
+}
+
+void VideoStreamEncoder::OnFramePrepared(size_t frame_id) {
+  if (!encoder_queue_->IsCurrent()) {
+    encoder_queue_->PostTask([this, frame_id]() { OnFramePrepared(frame_id); });
+    return;
+  }
+
+  for (auto& frame : pending_mapped_frames_) {
+    if (frame.frame_id == frame_id) {
+      frame.can_send = true;
+      break;
+    }
+  }
+  while (!pending_mapped_frames_.empty() &&
+         pending_mapped_frames_.front().can_send) {
+    auto& front = pending_mapped_frames_.front();
+    MaybeEncodeVideoFrame(front.frame, front.time_when_posted_us);
+    pending_mapped_frames_.pop_front();
+  }
+}
+
 void VideoStreamEncoder::MaybeEncodeVideoFrame(const VideoFrame& video_frame,
                                                int64_t time_when_posted_us) {
   RTC_DCHECK_RUN_ON(encoder_queue_.get());
diff --git a/video/video_stream_encoder.h b/video/video_stream_encoder.h
index 8cee4f2..8d4d6b8 100644
--- a/video/video_stream_encoder.h
+++ b/video/video_stream_encoder.h
@@ -14,6 +14,7 @@
 #include <atomic>
 #include <cstddef>
 #include <cstdint>
+#include <deque>
 #include <memory>
 #include <optional>
 #include <vector>
@@ -37,6 +38,7 @@
 #include "api/video/video_bitrate_allocation.h"
 #include "api/video/video_bitrate_allocator.h"
 #include "api/video/video_frame.h"
+#include "api/video/video_frame_buffer.h"
 #include "api/video/video_frame_type.h"
 #include "api/video/video_source_interface.h"
 #include "api/video/video_stream_encoder_settings.h"
@@ -51,6 +53,7 @@
 #include "modules/video_coding/utility/frame_dropper.h"
 #include "modules/video_coding/utility/qp_parser.h"
 #include "rtc_base/experiments/rate_control_settings.h"
+#include "rtc_base/synchronization/mutex.h"
 #include "rtc_base/thread_annotations.h"
 #include "video/adaptation/overuse_frame_detector.h"
 #include "video/adaptation/video_stream_encoder_resource_manager.h"
@@ -139,6 +142,8 @@
   DataRate UpdateTargetBitrate(DataRate target_bitrate,
                                double cwnd_reduce_ratio);
 
+  void OnFramePrepared(size_t frame_identifier);
+
  protected:
   friend class VideoStreamEncoderFrameCadenceRestrictionTest;
 
@@ -214,6 +219,20 @@
     DataRate encoder_target;
   };
 
+  class PreparedFramesProcessor
+      : public VideoFrameBuffer::PreparedFrameHandler {
+   public:
+    explicit PreparedFramesProcessor(VideoStreamEncoder* parent);
+
+    void StopCallbacks();
+
+    void OnFramePrepared(size_t frame_identifier) override;
+
+   private:
+    VideoStreamEncoder* parent_ RTC_GUARDED_BY(lock_);
+    Mutex lock_;
+  };
+
   class DegradationPreferenceManager;
 
   void ReconfigureEncoder() RTC_RUN_ON(encoder_queue_);
@@ -224,11 +243,15 @@
   void OnDiscardedFrame();
   void RequestRefreshFrame();
 
+  void MaybePrepareVideoFrame(const VideoFrame& frame,
+                              int64_t time_when_posted_in_ms);
+
   void MaybeEncodeVideoFrame(const VideoFrame& frame,
                              int64_t time_when_posted_in_ms);
 
   void EncodeVideoFrame(const VideoFrame& frame,
                         int64_t time_when_posted_in_ms);
+
   // Indicates whether frame should be dropped because the pixel count is too
   // large for the current bitrate configuration.
   bool DropDueToSize(uint32_t pixel_count) const RTC_RUN_ON(encoder_queue_);
@@ -459,6 +482,19 @@
   //  Required for automatic corruption detection.
   std::unique_ptr<FrameInstrumentationGenerator>
       frame_instrumentation_generator_;
+
+  scoped_refptr<PreparedFramesProcessor> prepared_frames_processor_;
+
+  size_t frame_counter_ = 0;
+
+  struct PreparingFrame {
+    const VideoFrame frame;
+    bool can_send;
+    size_t frame_id;
+    int64_t time_when_posted_us;
+  };
+
+  std::deque<PreparingFrame> pending_mapped_frames_;
 };
 
 }  // namespace webrtc