Implement external scaling/premapping for video
A new API allows webrtc to request the FrameBuffer to prepare for
software readback at the given resolution.
It's done asynchronously because underlying mechanism in chromium is
asynchronous and can take a long time. This way there will be no
artificial bottleneck in the pipeline.
This CL is a no-op itself, as no FrameBuffer implemnetation overrides
the `PrepareMappedBufferAsync()` yet.
Bug: chromium:397485312
Change-Id: I9670c17aa3041ef2107d932e0e6eaefeee851e25
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/406260
Reviewed-by: Erik Språng <sprang@webrtc.org>
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#45556}
diff --git a/api/video/video_frame_buffer.cc b/api/video/video_frame_buffer.cc
index b142814..7b45658 100644
--- a/api/video/video_frame_buffer.cc
+++ b/api/video/video_frame_buffer.cc
@@ -10,6 +10,7 @@
#include "api/video/video_frame_buffer.h"
+#include <cstddef>
#include <string>
#include "api/array_view.h"
@@ -36,6 +37,16 @@
return result;
}
+void VideoFrameBuffer::PrepareMappedBufferAsync(
+ size_t width,
+ size_t height,
+ scoped_refptr<PreparedFrameHandler> handler,
+ size_t frame_identifier) {
+ // Default implementation can't do any preparations,
+ // so it just invokes the callback immediately.
+ handler->OnFramePrepared(frame_identifier);
+}
+
const I420BufferInterface* VideoFrameBuffer::GetI420() const {
// Overridden by subclasses that can return an I420 buffer without any
// conversion, in particular, I420BufferInterface.
diff --git a/api/video/video_frame_buffer.h b/api/video/video_frame_buffer.h
index 1fbaa53..b160ab8 100644
--- a/api/video/video_frame_buffer.h
+++ b/api/video/video_frame_buffer.h
@@ -11,6 +11,7 @@
#ifndef API_VIDEO_VIDEO_FRAME_BUFFER_H_
#define API_VIDEO_VIDEO_FRAME_BUFFER_H_
+#include <cstddef>
#include <cstdint>
#include <string>
@@ -47,6 +48,11 @@
// VideoFrame, and not here.
class RTC_EXPORT VideoFrameBuffer : public RefCountInterface {
public:
+ class RTC_EXPORT PreparedFrameHandler : public webrtc::RefCountInterface {
+ public:
+ virtual void OnFramePrepared(size_t frame_identifier) = 0;
+ };
+
// New frame buffer types will be added conservatively when there is an
// opportunity to optimize the path between some pair of video source and
// video sink.
@@ -128,6 +134,15 @@
// For logging: returns a textual representation of the storage.
virtual std::string storage_representation() const;
+ // Informs the buffer about the maximum resolution for the upcoming
+ // `GetMappedBuffer()` calls.
+ //
+ virtual void PrepareMappedBufferAsync(
+ size_t width,
+ size_t height,
+ scoped_refptr<PreparedFrameHandler> handler,
+ size_t frame_identifier);
+
protected:
~VideoFrameBuffer() override {}
};
diff --git a/api/video_codecs/video_encoder.cc b/api/video_codecs/video_encoder.cc
index 3180770..0c25a2f 100644
--- a/api/video_codecs/video_encoder.cc
+++ b/api/video_codecs/video_encoder.cc
@@ -189,6 +189,10 @@
if (is_qp_trusted.has_value()) {
oss << ", is_qp_trusted = " << is_qp_trusted.value();
}
+ if (mapped_resolution.has_value()) {
+ oss << ", mapped_resolution = " << mapped_resolution->width << " x "
+ << mapped_resolution->height;
+ }
oss << "}";
return oss.str();
}
diff --git a/api/video_codecs/video_encoder.h b/api/video_codecs/video_encoder.h
index 44a7516..1a8f895 100644
--- a/api/video_codecs/video_encoder.h
+++ b/api/video_codecs/video_encoder.h
@@ -152,6 +152,12 @@
}
};
+ struct RTC_EXPORT Resolution {
+ Resolution(int width, int height) : width(width), height(height) {}
+ int width = 0;
+ int height = 0;
+ };
+
// Struct containing metadata about the encoder implementing this interface.
struct RTC_EXPORT EncoderInfo {
static constexpr uint8_t kMaxFramerateFraction =
@@ -266,6 +272,14 @@
// configuration. This may be used to determine if the encoder has reached
// its target video quality for static screenshare content.
std::optional<int> min_qp;
+
+ // Maximum resolution accessed by software encoder,
+ // i.e. resolution needed for cpu readable image.
+ // This has to be set by software encoders.
+ // If it's not set, mapping will happen during the
+ // encode time, otherwise more optimal implementation
+ // specific path may be used.
+ std::optional<Resolution> mapped_resolution;
};
struct RTC_EXPORT RateControlParameters {
diff --git a/media/engine/simulcast_encoder_adapter.cc b/media/engine/simulcast_encoder_adapter.cc
index 4120131..d715eb9 100644
--- a/media/engine/simulcast_encoder_adapter.cc
+++ b/media/engine/simulcast_encoder_adapter.cc
@@ -965,6 +965,14 @@
if (!stream_contexts_[i].is_paused()) {
encoder_names.push_back(encoder_impl_info.implementation_name);
}
+
+ if (encoder_impl_info.mapped_resolution.has_value() &&
+ (!encoder_info.mapped_resolution.has_value() ||
+ encoder_info.mapped_resolution->width <
+ encoder_impl_info.mapped_resolution->width)) {
+ encoder_info.mapped_resolution = encoder_impl_info.mapped_resolution;
+ }
+
if (i == 0) {
encoder_info.supports_native_handle =
encoder_impl_info.supports_native_handle;
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
index bd6f52a..2f48f68 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@@ -941,6 +941,9 @@
: VideoEncoder::ScalingSettings(kLowQindex, kHighQindex);
info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420,
VideoFrameBuffer::Type::kNV12};
+ if (inited_) {
+ info.mapped_resolution = VideoEncoder::Resolution(cfg_.g_w, cfg_.g_h);
+ }
if (SvcEnabled()) {
for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
info.fps_allocation[sid].resize(svc_params_->number_temporal_layers);
diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.cc b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
index 3cb94ad..0a80bee 100644
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
@@ -756,6 +756,10 @@
info.implementation_name = "OpenH264";
info.scaling_settings =
VideoEncoder::ScalingSettings(kLowH264QpThreshold, kHighH264QpThreshold);
+ if (!configurations_.empty()) {
+ info.mapped_resolution = VideoEncoder::Resolution(
+ configurations_.back().width, configurations_.back().height);
+ }
info.is_hardware_accelerated = false;
info.supports_simulcast = true;
info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420};
diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
index f01142e..5edfecc 100644
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
@@ -1421,6 +1421,9 @@
}
}
+ info.mapped_resolution =
+ VideoEncoder::Resolution(raw_images_[0].d_w, raw_images_[0].d_h);
+
if (codec_.mode == VideoCodecMode::kScreensharing) {
info.min_qp = kScreenshareMinQp;
}
diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
index cecb34a..d2c14f7 100644
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
@@ -1881,6 +1881,10 @@
max_fps = codec_.spatialLayers[si].maxFramerate;
}
}
+ if (num_active_spatial_layers_ > 0) {
+ info.mapped_resolution =
+ VideoEncoder::Resolution(config_->g_w, config_->g_h);
+ }
for (size_t si = 0; si < num_spatial_layers_; ++si) {
info.fps_allocation[si].clear();
diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc
index 4ae9694..876c5cb 100644
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@@ -79,6 +79,7 @@
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "rtc_base/strings/string_builder.h"
+#include "rtc_base/synchronization/mutex.h"
#include "rtc_base/system/no_unique_address.h"
#include "rtc_base/thread_annotations.h"
#include "rtc_base/trace_event.h"
@@ -120,6 +121,8 @@
constexpr int kDefaultMinScreenSharebps = 1200000;
+constexpr int kMaxFramesInPreparation = 10;
+
int GetNumSpatialLayers(const VideoCodec& codec) {
if (codec.codecType == kVideoCodecVP9) {
return codec.VP9().numberOfSpatialLayers;
@@ -600,6 +603,23 @@
} // namespace
+VideoStreamEncoder::PreparedFramesProcessor::PreparedFramesProcessor(
+ VideoStreamEncoder* parent)
+ : parent_(parent) {}
+
+void VideoStreamEncoder::PreparedFramesProcessor::StopCallbacks() {
+ MutexLock lock(&lock_);
+ parent_ = nullptr;
+}
+
+void VideoStreamEncoder::PreparedFramesProcessor::OnFramePrepared(
+ size_t frame_identifier) {
+ MutexLock lock(&lock_);
+ if (parent_) {
+ parent_->OnFramePrepared(frame_identifier);
+ }
+}
+
VideoStreamEncoder::EncoderRateSettings::EncoderRateSettings()
: rate_control(), encoder_target(DataRate::Zero()) {}
@@ -736,7 +756,9 @@
ParseVp9LowTierCoreCountThreshold(env_.field_trials())),
experimental_encoder_thread_limit_(
ParseEncoderThreadLimit(env_.field_trials())),
- encoder_queue_(std::move(encoder_queue)) {
+ encoder_queue_(std::move(encoder_queue)),
+ prepared_frames_processor_(
+ make_ref_counted<PreparedFramesProcessor>(this)) {
TRACE_EVENT0("webrtc", "VideoStreamEncoder::VideoStreamEncoder");
RTC_DCHECK_RUN_ON(worker_queue_);
RTC_DCHECK(encoder_stats_observer);
@@ -773,6 +795,10 @@
RTC_DCHECK(!video_source_sink_controller_.HasSource())
<< "Must call ::Stop() before destruction.";
+ // `StopCallbacks` must be called before the queue is destroyed, because
+ // ongoing notifications of prepared frames may post tasks or run on
+ // `encoder_queue_`.
+ prepared_frames_processor_->StopCallbacks();
// The queue must be destroyed before its pointer is invalidated to avoid race
// between destructor and running task that check if function is called on the
// encoder_queue_.
@@ -1561,6 +1587,7 @@
bool queue_overload,
const VideoFrame& video_frame) {
RTC_DCHECK_RUN_ON(encoder_queue_.get());
+
VideoFrame incoming_frame = video_frame;
// In some cases, e.g., when the frame from decoder is fed to encoder,
@@ -1617,7 +1644,7 @@
cwnd_frame_drop_interval_ &&
(cwnd_frame_counter_++ % cwnd_frame_drop_interval_.value() == 0);
if (!queue_overload && !cwnd_frame_drop) {
- MaybeEncodeVideoFrame(incoming_frame, post_time.us());
+ MaybePrepareVideoFrame(incoming_frame, post_time.us());
} else {
if (cwnd_frame_drop) {
// Frame drop by congestion window pushback. Do not encode this
@@ -1804,6 +1831,62 @@
}
}
+void VideoStreamEncoder::MaybePrepareVideoFrame(const VideoFrame& video_frame,
+ int64_t time_when_posted_us) {
+ RTC_DCHECK_RUN_ON(encoder_queue_.get());
+ if (pending_mapped_frames_.size() > kMaxFramesInPreparation) {
+ RTC_LOG(LS_ERROR) << "To many frames are being prepared.";
+ ProcessDroppedFrame(video_frame,
+ VideoStreamEncoderObserver::DropReason::kEncoderQueue);
+ return;
+ }
+
+ std::optional<VideoEncoder::Resolution> mapped_resolution;
+ if (encoder_) {
+ mapped_resolution = encoder_->GetEncoderInfo().mapped_resolution;
+ }
+
+ pending_mapped_frames_.push_back(
+ PreparingFrame{.frame = std::move(video_frame),
+ .can_send = false,
+ .frame_id = frame_counter_++,
+ .time_when_posted_us = time_when_posted_us});
+
+ const PreparingFrame& last_frame = pending_mapped_frames_.back();
+ if (mapped_resolution.has_value()) {
+ if (mapped_resolution->width > video_frame.width() ||
+ mapped_resolution->height > video_frame.height()) {
+ mapped_resolution->width = video_frame.width();
+ mapped_resolution->height = video_frame.height();
+ }
+ last_frame.frame.video_frame_buffer()->PrepareMappedBufferAsync(
+ mapped_resolution->width, mapped_resolution->height,
+ prepared_frames_processor_, last_frame.frame_id);
+ } else {
+ OnFramePrepared(last_frame.frame_id);
+ }
+}
+
+void VideoStreamEncoder::OnFramePrepared(size_t frame_id) {
+ if (!encoder_queue_->IsCurrent()) {
+ encoder_queue_->PostTask([this, frame_id]() { OnFramePrepared(frame_id); });
+ return;
+ }
+
+ for (auto& frame : pending_mapped_frames_) {
+ if (frame.frame_id == frame_id) {
+ frame.can_send = true;
+ break;
+ }
+ }
+ while (!pending_mapped_frames_.empty() &&
+ pending_mapped_frames_.front().can_send) {
+ auto& front = pending_mapped_frames_.front();
+ MaybeEncodeVideoFrame(front.frame, front.time_when_posted_us);
+ pending_mapped_frames_.pop_front();
+ }
+}
+
void VideoStreamEncoder::MaybeEncodeVideoFrame(const VideoFrame& video_frame,
int64_t time_when_posted_us) {
RTC_DCHECK_RUN_ON(encoder_queue_.get());
diff --git a/video/video_stream_encoder.h b/video/video_stream_encoder.h
index 8cee4f2..8d4d6b8 100644
--- a/video/video_stream_encoder.h
+++ b/video/video_stream_encoder.h
@@ -14,6 +14,7 @@
#include <atomic>
#include <cstddef>
#include <cstdint>
+#include <deque>
#include <memory>
#include <optional>
#include <vector>
@@ -37,6 +38,7 @@
#include "api/video/video_bitrate_allocation.h"
#include "api/video/video_bitrate_allocator.h"
#include "api/video/video_frame.h"
+#include "api/video/video_frame_buffer.h"
#include "api/video/video_frame_type.h"
#include "api/video/video_source_interface.h"
#include "api/video/video_stream_encoder_settings.h"
@@ -51,6 +53,7 @@
#include "modules/video_coding/utility/frame_dropper.h"
#include "modules/video_coding/utility/qp_parser.h"
#include "rtc_base/experiments/rate_control_settings.h"
+#include "rtc_base/synchronization/mutex.h"
#include "rtc_base/thread_annotations.h"
#include "video/adaptation/overuse_frame_detector.h"
#include "video/adaptation/video_stream_encoder_resource_manager.h"
@@ -139,6 +142,8 @@
DataRate UpdateTargetBitrate(DataRate target_bitrate,
double cwnd_reduce_ratio);
+ void OnFramePrepared(size_t frame_identifier);
+
protected:
friend class VideoStreamEncoderFrameCadenceRestrictionTest;
@@ -214,6 +219,20 @@
DataRate encoder_target;
};
+ class PreparedFramesProcessor
+ : public VideoFrameBuffer::PreparedFrameHandler {
+ public:
+ explicit PreparedFramesProcessor(VideoStreamEncoder* parent);
+
+ void StopCallbacks();
+
+ void OnFramePrepared(size_t frame_identifier) override;
+
+ private:
+ VideoStreamEncoder* parent_ RTC_GUARDED_BY(lock_);
+ Mutex lock_;
+ };
+
class DegradationPreferenceManager;
void ReconfigureEncoder() RTC_RUN_ON(encoder_queue_);
@@ -224,11 +243,15 @@
void OnDiscardedFrame();
void RequestRefreshFrame();
+ void MaybePrepareVideoFrame(const VideoFrame& frame,
+ int64_t time_when_posted_in_ms);
+
void MaybeEncodeVideoFrame(const VideoFrame& frame,
int64_t time_when_posted_in_ms);
void EncodeVideoFrame(const VideoFrame& frame,
int64_t time_when_posted_in_ms);
+
// Indicates whether frame should be dropped because the pixel count is too
// large for the current bitrate configuration.
bool DropDueToSize(uint32_t pixel_count) const RTC_RUN_ON(encoder_queue_);
@@ -459,6 +482,19 @@
// Required for automatic corruption detection.
std::unique_ptr<FrameInstrumentationGenerator>
frame_instrumentation_generator_;
+
+ scoped_refptr<PreparedFramesProcessor> prepared_frames_processor_;
+
+ size_t frame_counter_ = 0;
+
+ struct PreparingFrame {
+ const VideoFrame frame;
+ bool can_send;
+ size_t frame_id;
+ int64_t time_when_posted_us;
+ };
+
+ std::deque<PreparingFrame> pending_mapped_frames_;
};
} // namespace webrtc