Communicate encoder resolutions via rtc::VideoSinkWants.
This will allow us to optimize the internal buffers of
webrtc::VideoFrame for the resolution(s) that we actually want to
encode.
Bug: webrtc:12469, chromium:1157072
Change-Id: If378b52b5e35aa9a9800c1f7dfe189437ce43253
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/208540
Reviewed-by: Niels Moller <nisse@webrtc.org>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Henrik Boström <hbos@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#33342}
diff --git a/api/video/video_source_interface.h b/api/video/video_source_interface.h
index b03d7c5..8b5823f 100644
--- a/api/video/video_source_interface.h
+++ b/api/video/video_source_interface.h
@@ -12,6 +12,7 @@
#define API_VIDEO_VIDEO_SOURCE_INTERFACE_H_
#include <limits>
+#include <vector>
#include "absl/types/optional.h"
#include "api/video/video_sink_interface.h"
@@ -22,6 +23,15 @@
// VideoSinkWants is used for notifying the source of properties a video frame
// should have when it is delivered to a certain sink.
struct RTC_EXPORT VideoSinkWants {
+ struct FrameSize {
+ FrameSize(int width, int height) : width(width), height(height) {}
+ FrameSize(const FrameSize&) = default;
+ ~FrameSize() = default;
+
+ int width;
+ int height;
+ };
+
VideoSinkWants();
VideoSinkWants(const VideoSinkWants&);
~VideoSinkWants();
@@ -49,8 +59,34 @@
// Note that this field is unrelated to any horizontal or vertical stride
// requirements the encoder has on the incoming video frame buffers.
int resolution_alignment = 1;
+
+ // The resolutions that sink is configured to consume. If the sink is an
+ // encoder this is what the encoder is configured to encode. In singlecast we
+ // only encode one resolution, but in simulcast and SVC this can mean multiple
+ // resolutions per frame.
+ //
+ // The sink is always configured to consume a subset of the
+ // webrtc::VideoFrame's resolution. In the case of encoding, we usually encode
+ // at webrtc::VideoFrame's resolution but this may not always be the case due
+ // to scaleResolutionDownBy or turning off simulcast or SVC layers.
+ //
+ // For example, we may capture at 720p and due to adaptation (e.g. applying
+ // |max_pixel_count| constraints) create webrtc::VideoFrames of size 480p, but
+ // if we do scaleResolutionDownBy:2 then the only resolution we end up
+ // encoding is 240p. In this case we still need to provide webrtc::VideoFrames
+ // of size 480p but we can optimize internal buffers for 240p, avoiding
+ // downsampling to 480p if possible.
+ //
+ // Note that the |resolutions| can change while frames are in flight and
+ // should only be used as a hint when constructing the webrtc::VideoFrame.
+ std::vector<FrameSize> resolutions;
};
+inline bool operator==(const VideoSinkWants::FrameSize& a,
+ const VideoSinkWants::FrameSize& b) {
+ return a.width == b.width && a.height == b.height;
+}
+
template <typename VideoFrameT>
class VideoSourceInterface {
public:
diff --git a/call/call_perf_tests.cc b/call/call_perf_tests.cc
index 6591ab5..4cb9766 100644
--- a/call/call_perf_tests.cc
+++ b/call/call_perf_tests.cc
@@ -561,6 +561,18 @@
// TODO(sprang): Add integration test for maintain-framerate mode?
void OnSinkWantsChanged(rtc::VideoSinkInterface<VideoFrame>* sink,
const rtc::VideoSinkWants& wants) override {
+ // The sink wants can change either because an adaptation happened (i.e.
+ // the pixels or frame rate changed) or for other reasons, such as encoded
+ // resolutions being communicated (happens whenever we capture a new frame
+ // size). In this test, we only care about adaptations.
+ bool did_adapt =
+ last_wants_.max_pixel_count != wants.max_pixel_count ||
+ last_wants_.target_pixel_count != wants.target_pixel_count ||
+ last_wants_.max_framerate_fps != wants.max_framerate_fps;
+ last_wants_ = wants;
+ if (!did_adapt) {
+ return;
+ }
// At kStart expect CPU overuse. Then expect CPU underuse when the encoder
// delay has been decreased.
switch (test_phase_) {
@@ -625,6 +637,9 @@
kAdaptedDown,
kAdaptedUp
} test_phase_;
+
+ private:
+ rtc::VideoSinkWants last_wants_;
} test;
RunBaseTest(&test);
diff --git a/video/video_source_sink_controller.cc b/video/video_source_sink_controller.cc
index 376eb85..4cd12d8 100644
--- a/video/video_source_sink_controller.cc
+++ b/video/video_source_sink_controller.cc
@@ -29,7 +29,14 @@
<< " max_pixel_count=" << wants.max_pixel_count << " target_pixel_count="
<< (wants.target_pixel_count.has_value()
? std::to_string(wants.target_pixel_count.value())
- : "null");
+ : "null")
+ << " resolutions={";
+ for (size_t i = 0; i < wants.resolutions.size(); ++i) {
+ if (i != 0)
+ ss << ",";
+ ss << wants.resolutions[i].width << "x" << wants.resolutions[i].height;
+ }
+ ss << "}";
return ss.Release();
}
@@ -104,6 +111,12 @@
return resolution_alignment_;
}
+const std::vector<rtc::VideoSinkWants::FrameSize>&
+VideoSourceSinkController::resolutions() const {
+ RTC_DCHECK_RUN_ON(&sequence_checker_);
+ return resolutions_;
+}
+
void VideoSourceSinkController::SetRestrictions(
VideoSourceRestrictions restrictions) {
RTC_DCHECK_RUN_ON(&sequence_checker_);
@@ -133,6 +146,12 @@
resolution_alignment_ = resolution_alignment;
}
+void VideoSourceSinkController::SetResolutions(
+ std::vector<rtc::VideoSinkWants::FrameSize> resolutions) {
+ RTC_DCHECK_RUN_ON(&sequence_checker_);
+ resolutions_ = std::move(resolutions);
+}
+
// RTC_EXCLUSIVE_LOCKS_REQUIRED(sequence_checker_)
rtc::VideoSinkWants VideoSourceSinkController::CurrentSettingsToSinkWants()
const {
@@ -161,6 +180,7 @@
frame_rate_upper_limit_.has_value()
? static_cast<int>(frame_rate_upper_limit_.value())
: std::numeric_limits<int>::max());
+ wants.resolutions = resolutions_;
return wants;
}
diff --git a/video/video_source_sink_controller.h b/video/video_source_sink_controller.h
index 29a9588..c61084f 100644
--- a/video/video_source_sink_controller.h
+++ b/video/video_source_sink_controller.h
@@ -12,6 +12,7 @@
#define VIDEO_VIDEO_SOURCE_SINK_CONTROLLER_H_
#include <string>
+#include <vector>
#include "absl/types/optional.h"
#include "api/sequence_checker.h"
@@ -46,6 +47,7 @@
absl::optional<double> frame_rate_upper_limit() const;
bool rotation_applied() const;
int resolution_alignment() const;
+ const std::vector<rtc::VideoSinkWants::FrameSize>& resolutions() const;
// Updates the settings stored internally. In order for these settings to be
// applied to the sink, PushSourceSinkSettings() must subsequently be called.
@@ -55,6 +57,7 @@
void SetFrameRateUpperLimit(absl::optional<double> frame_rate_upper_limit);
void SetRotationApplied(bool rotation_applied);
void SetResolutionAlignment(int resolution_alignment);
+ void SetResolutions(std::vector<rtc::VideoSinkWants::FrameSize> resolutions);
private:
rtc::VideoSinkWants CurrentSettingsToSinkWants() const
@@ -79,6 +82,8 @@
RTC_GUARDED_BY(&sequence_checker_);
bool rotation_applied_ RTC_GUARDED_BY(&sequence_checker_) = false;
int resolution_alignment_ RTC_GUARDED_BY(&sequence_checker_) = 1;
+ std::vector<rtc::VideoSinkWants::FrameSize> resolutions_
+ RTC_GUARDED_BY(&sequence_checker_);
};
} // namespace webrtc
diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc
index 63770c4..ae58725 100644
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@@ -991,14 +991,29 @@
max_framerate = std::max(stream.max_framerate, max_framerate);
}
- main_queue_->PostTask(
- ToQueuedTask(task_safety_, [this, max_framerate, alignment]() {
+ // The resolutions that we're actually encoding with.
+ std::vector<rtc::VideoSinkWants::FrameSize> encoder_resolutions;
+ // TODO(hbos): For the case of SVC, also make use of |codec.spatialLayers|.
+ // For now, SVC layers are handled by the VP9 encoder.
+ for (const auto& simulcastStream : codec.simulcastStream) {
+ if (!simulcastStream.active)
+ continue;
+ encoder_resolutions.emplace_back(simulcastStream.width,
+ simulcastStream.height);
+ }
+ main_queue_->PostTask(ToQueuedTask(
+ task_safety_, [this, max_framerate, alignment,
+ encoder_resolutions = std::move(encoder_resolutions)]() {
RTC_DCHECK_RUN_ON(main_queue_);
if (max_framerate !=
video_source_sink_controller_.frame_rate_upper_limit() ||
- alignment != video_source_sink_controller_.resolution_alignment()) {
+ alignment != video_source_sink_controller_.resolution_alignment() ||
+ encoder_resolutions !=
+ video_source_sink_controller_.resolutions()) {
video_source_sink_controller_.SetFrameRateUpperLimit(max_framerate);
video_source_sink_controller_.SetResolutionAlignment(alignment);
+ video_source_sink_controller_.SetResolutions(
+ std::move(encoder_resolutions));
video_source_sink_controller_.PushSourceSinkSettings();
}
}));
diff --git a/video/video_stream_encoder_unittest.cc b/video/video_stream_encoder_unittest.cc
index f7a3621..d74ebe8 100644
--- a/video/video_stream_encoder_unittest.cc
+++ b/video/video_stream_encoder_unittest.cc
@@ -461,6 +461,10 @@
return adaptation_enabled_;
}
+ // The "last wants" is a snapshot of the previous rtc::VideoSinkWants where
+ // the resolution or frame rate was different than it is currently. If
+ // something else is modified, such as encoder resolutions, but the resolution
+ // and frame rate stays the same, last wants is not updated.
rtc::VideoSinkWants last_wants() const {
MutexLock lock(&mutex_);
return last_wants_;
@@ -519,7 +523,14 @@
void AddOrUpdateSink(rtc::VideoSinkInterface<VideoFrame>* sink,
const rtc::VideoSinkWants& wants) override {
MutexLock lock(&mutex_);
- last_wants_ = sink_wants_locked();
+ rtc::VideoSinkWants prev_wants = sink_wants_locked();
+ bool did_adapt =
+ prev_wants.max_pixel_count != wants.max_pixel_count ||
+ prev_wants.target_pixel_count != wants.target_pixel_count ||
+ prev_wants.max_framerate_fps != wants.max_framerate_fps;
+ if (did_adapt) {
+ last_wants_ = prev_wants;
+ }
adapter_.OnSinkWants(wants);
test::FrameForwarder::AddOrUpdateSinkLocked(sink, wants);
}
@@ -7611,4 +7622,105 @@
video_stream_encoder_->Stop();
}
+TEST_F(VideoStreamEncoderTest, EncoderResolutionsExposedInSinglecast) {
+ const int kFrameWidth = 1280;
+ const int kFrameHeight = 720;
+
+ SetUp();
+ video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources(
+ DataRate::BitsPerSec(kTargetBitrateBps),
+ DataRate::BitsPerSec(kTargetBitrateBps),
+ DataRate::BitsPerSec(kTargetBitrateBps), 0, 0, 0);
+
+ // Capturing a frame should reconfigure the encoder and expose the encoder
+ // resolution, which is the same as the input frame.
+ int64_t timestamp_ms = kFrameIntervalMs;
+ video_source_.IncomingCapturedFrame(
+ CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight));
+ WaitForEncodedFrame(timestamp_ms);
+ video_stream_encoder_->WaitUntilTaskQueueIsIdle();
+ EXPECT_THAT(video_source_.sink_wants().resolutions,
+ ::testing::ElementsAreArray(
+ {rtc::VideoSinkWants::FrameSize(kFrameWidth, kFrameHeight)}));
+
+ video_stream_encoder_->Stop();
+}
+
+TEST_F(VideoStreamEncoderTest, EncoderResolutionsExposedInSimulcast) {
+ // Pick downscale factors such that we never encode at full resolution - this
+ // is an interesting use case. The frame resolution influences the encoder
+ // resolutions, but if no layer has |scale_resolution_down_by| == 1 then the
+ // encoder should not ask for the frame resolution. This allows video frames
+ // to have the appearence of one resolution but optimize its internal buffers
+ // for what is actually encoded.
+ const size_t kNumSimulcastLayers = 3u;
+ const float kDownscaleFactors[] = {8.0, 4.0, 2.0};
+ const int kFrameWidth = 1280;
+ const int kFrameHeight = 720;
+ const rtc::VideoSinkWants::FrameSize kLayer0Size(
+ kFrameWidth / kDownscaleFactors[0], kFrameHeight / kDownscaleFactors[0]);
+ const rtc::VideoSinkWants::FrameSize kLayer1Size(
+ kFrameWidth / kDownscaleFactors[1], kFrameHeight / kDownscaleFactors[1]);
+ const rtc::VideoSinkWants::FrameSize kLayer2Size(
+ kFrameWidth / kDownscaleFactors[2], kFrameHeight / kDownscaleFactors[2]);
+
+ VideoEncoderConfig config;
+ test::FillEncoderConfiguration(kVideoCodecVP8, kNumSimulcastLayers, &config);
+ for (size_t i = 0; i < kNumSimulcastLayers; ++i) {
+ config.simulcast_layers[i].scale_resolution_down_by = kDownscaleFactors[i];
+ config.simulcast_layers[i].active = true;
+ }
+ config.video_stream_factory =
+ new rtc::RefCountedObject<cricket::EncoderStreamFactory>(
+ "VP8", /*max qp*/ 56, /*screencast*/ false,
+ /*screenshare enabled*/ false);
+ video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources(
+ DataRate::BitsPerSec(kSimulcastTargetBitrateBps),
+ DataRate::BitsPerSec(kSimulcastTargetBitrateBps),
+ DataRate::BitsPerSec(kSimulcastTargetBitrateBps), 0, 0, 0);
+
+ // Capture a frame with all layers active.
+ int64_t timestamp_ms = kFrameIntervalMs;
+ sink_.SetNumExpectedLayers(kNumSimulcastLayers);
+ video_stream_encoder_->ConfigureEncoder(config.Copy(), kMaxPayloadLength);
+ video_source_.IncomingCapturedFrame(
+ CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight));
+ WaitForEncodedFrame(timestamp_ms);
+ // Expect encoded resolutions to match the expected simulcast layers.
+ video_stream_encoder_->WaitUntilTaskQueueIsIdle();
+ EXPECT_THAT(
+ video_source_.sink_wants().resolutions,
+ ::testing::ElementsAreArray({kLayer0Size, kLayer1Size, kLayer2Size}));
+
+ // Capture a frame with one of the layers inactive.
+ timestamp_ms += kFrameIntervalMs;
+ config.simulcast_layers[2].active = false;
+ sink_.SetNumExpectedLayers(kNumSimulcastLayers - 1);
+ video_stream_encoder_->ConfigureEncoder(config.Copy(), kMaxPayloadLength);
+ video_source_.IncomingCapturedFrame(
+ CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight));
+ WaitForEncodedFrame(timestamp_ms);
+
+ // Expect encoded resolutions to match the expected simulcast layers.
+ video_stream_encoder_->WaitUntilTaskQueueIsIdle();
+ EXPECT_THAT(video_source_.sink_wants().resolutions,
+ ::testing::ElementsAreArray({kLayer0Size, kLayer1Size}));
+
+ // Capture a frame with all but one layer turned off.
+ timestamp_ms += kFrameIntervalMs;
+ config.simulcast_layers[1].active = false;
+ sink_.SetNumExpectedLayers(kNumSimulcastLayers - 2);
+ video_stream_encoder_->ConfigureEncoder(config.Copy(), kMaxPayloadLength);
+ video_source_.IncomingCapturedFrame(
+ CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight));
+ WaitForEncodedFrame(timestamp_ms);
+
+ // Expect encoded resolutions to match the expected simulcast layers.
+ video_stream_encoder_->WaitUntilTaskQueueIsIdle();
+ EXPECT_THAT(video_source_.sink_wants().resolutions,
+ ::testing::ElementsAreArray({kLayer0Size}));
+
+ video_stream_encoder_->Stop();
+}
+
} // namespace webrtc