[Overuse] Encoding pipeline as input signals in the abstract interface.
This defines the following methods:
- OnFrame(), replaces SetLastFramePixelCount().
- OnFrameDroppedDueToSize(), a rename of FrameDroppedDueToSize() to
match the other methods.
- OnEncodeStarted(), a rename of the incorrectly named FrameCaptured().
- OnEncodeCompleted(), a rename of the poorly named FrameSent().
In order to get rid of SetLastFramePixelCount(), the "we don't know the
frame size" use case - which was previously implicitly avoided by
invoking SetLastFramePixelCount() with a made-up value for
last_frame_info_ - is now avoided using ".value_or()" in
LastInputFrameSizeOrDefault(). This does mean that a constant 144p
resolution value is referenced in two places, but the fact that this is
a magic value is at least made explicit. This may help future
improvements.
Bug: webrtc:11222
Change-Id: I3b28daa8c5ecf57c6537957d4759f15e24bb2234
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/166961
Commit-Queue: Henrik Boström <hbos@webrtc.org>
Reviewed-by: Evan Shrubsole <eshr@google.com>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30352}
diff --git a/call/adaptation/BUILD.gn b/call/adaptation/BUILD.gn
index 99b3f16..10e8cc6 100644
--- a/call/adaptation/BUILD.gn
+++ b/call/adaptation/BUILD.gn
@@ -25,6 +25,7 @@
]
deps = [
"../../api:rtp_parameters",
+ "../../api/video:video_frame",
"../../api/video_codecs:video_codecs_api",
"../../rtc_base:checks",
"../../rtc_base:rtc_base_approved",
diff --git a/call/adaptation/resource_adaptation_module_interface.h b/call/adaptation/resource_adaptation_module_interface.h
index 61380b9..3a3deb2 100644
--- a/call/adaptation/resource_adaptation_module_interface.h
+++ b/call/adaptation/resource_adaptation_module_interface.h
@@ -13,6 +13,7 @@
#include "absl/types/optional.h"
#include "api/rtp_parameters.h"
+#include "api/video/video_frame.h"
#include "api/video_codecs/video_encoder.h"
#include "api/video_codecs/video_encoder_config.h"
#include "call/adaptation/video_source_restrictions.h"
@@ -91,6 +92,40 @@
// TODO(hbos): It's not clear why anybody should be able to tell the module to
// reset like this; can we get rid of this method?
virtual void ResetVideoSourceRestrictions() = 0;
+
+ // The following methods correspond to the pipeline that a frame goes through.
+ // Note that if the encoder is parallelized, multiple frames may be processed
+ // in parallel and methods may be invoked in unexpected orders.
+ //
+ // The implementation must not retain VideoFrames. Doing so may keep video
+ // frame buffers alive - this may even stall encoding.
+ // TODO(hbos): Can we replace VideoFrame with a different struct, maybe width
+ // and height is enough, and some sort of way to identify it at each step?
+
+ // 1. A frame is delivered to the encoder, e.g. from the camera. Next up: it
+ // may get dropped or it may get encoded, see OnFrameDroppedDueToSize() and
+ // OnEncodeStarted().
+ virtual void OnFrame(const VideoFrame& frame) = 0;
+ // 2.i) An input frame was dropped because its resolution is too big (e.g. for
+ // the target bitrate). This frame will not continue through the rest of the
+ // pipeline. The module should adapt down in resolution to avoid subsequent
+ // frames getting dropped for the same reason.
+ // TODO(hbos): If we take frame rate into account perhaps it would be valid to
+ // adapt down in frame rate as well.
+ virtual void OnFrameDroppedDueToSize() = 0;
+ // 2.ii) An input frame is about to be encoded. It may have been cropped and
+ // have different dimensions than what was observed at OnFrame(). Next
+ // up: encoding completes or fails, see OnEncodeCompleted(). There is
+ // currently no signal for encode failure.
+ virtual void OnEncodeStarted(const VideoFrame& cropped_frame,
+ int64_t time_when_first_seen_us) = 0;
+ // 3. The frame has successfully completed encoding. Next up: The encoded
+ // frame is dropped or packetized and sent over the network. There is
+ // currently no signal what happens beyond this point.
+ virtual void OnEncodeCompleted(uint32_t timestamp,
+ int64_t time_sent_in_us,
+ int64_t capture_time_us,
+ absl::optional<int> encode_duration_us) = 0;
};
} // namespace webrtc
diff --git a/video/overuse_frame_detector_resource_adaptation_module.cc b/video/overuse_frame_detector_resource_adaptation_module.cc
index 1ba33e1..baadb98 100644
--- a/video/overuse_frame_detector_resource_adaptation_module.cc
+++ b/video/overuse_frame_detector_resource_adaptation_module.cc
@@ -352,10 +352,10 @@
adapt_counters_(),
balanced_settings_(),
last_adaptation_request_(absl::nullopt),
- last_frame_pixel_count_(absl::nullopt),
source_restrictor_(std::make_unique<VideoSourceRestrictor>()),
overuse_detector_(std::move(overuse_detector)),
overuse_detector_is_started_(false),
+ last_input_frame_size_(absl::nullopt),
target_frame_rate_(absl::nullopt),
target_bitrate_bps_(absl::nullopt),
is_quality_scaler_enabled_(false),
@@ -439,22 +439,12 @@
MaybeUpdateVideoSourceRestrictions();
}
-void OveruseFrameDetectorResourceAdaptationModule::FrameCaptured(
- const VideoFrame& frame,
- int64_t time_when_first_seen_us) {
- overuse_detector_->FrameCaptured(frame, time_when_first_seen_us);
+void OveruseFrameDetectorResourceAdaptationModule::OnFrame(
+ const VideoFrame& frame) {
+ last_input_frame_size_ = frame.size();
}
-void OveruseFrameDetectorResourceAdaptationModule::FrameSent(
- uint32_t timestamp,
- int64_t time_sent_in_us,
- int64_t capture_time_us,
- absl::optional<int> encode_duration_us) {
- overuse_detector_->FrameSent(timestamp, time_sent_in_us, capture_time_us,
- encode_duration_us);
-}
-
-void OveruseFrameDetectorResourceAdaptationModule::FrameDroppedDueToSize() {
+void OveruseFrameDetectorResourceAdaptationModule::OnFrameDroppedDueToSize() {
int fps_count = GetConstAdaptCounter().FramerateCount(
AdaptationObserverInterface::AdaptReason::kQuality);
int res_count = GetConstAdaptCounter().ResolutionCount(
@@ -472,9 +462,23 @@
}
}
-void OveruseFrameDetectorResourceAdaptationModule::SetLastFramePixelCount(
- absl::optional<int> last_frame_pixel_count) {
- last_frame_pixel_count_ = last_frame_pixel_count;
+void OveruseFrameDetectorResourceAdaptationModule::OnEncodeStarted(
+ const VideoFrame& cropped_frame,
+ int64_t time_when_first_seen_us) {
+ // TODO(hbos): Rename FrameCaptured() to something more appropriate (e.g.
+ // "OnEncodeStarted"?) or revise usage.
+ overuse_detector_->FrameCaptured(cropped_frame, time_when_first_seen_us);
+}
+
+void OveruseFrameDetectorResourceAdaptationModule::OnEncodeCompleted(
+ uint32_t timestamp,
+ int64_t time_sent_in_us,
+ int64_t capture_time_us,
+ absl::optional<int> encode_duration_us) {
+ // TODO(hbos): Rename FrameSent() to something more appropriate (e.g.
+ // "OnEncodeCompleted"?).
+ overuse_detector_->FrameSent(timestamp, time_sent_in_us, capture_time_us,
+ encode_duration_us);
}
void OveruseFrameDetectorResourceAdaptationModule::SetIsQualityScalerEnabled(
@@ -492,7 +496,8 @@
RTC_DCHECK_GT(num_downgrades, 0);
AdaptationRequest adaptation_request = {
- *last_frame_pixel_count_, encoder_stats_observer_->GetInputFrameRate(),
+ LastInputFrameSizeOrDefault(),
+ encoder_stats_observer_->GetInputFrameRate(),
AdaptationRequest::Mode::kAdaptUp};
bool adapt_up_requested =
@@ -515,13 +520,13 @@
// Check if quality should be increased based on bitrate.
if (reason == kQuality &&
!balanced_settings_.CanAdaptUp(GetVideoCodecTypeOrGeneric(),
- *last_frame_pixel_count_,
+ LastInputFrameSizeOrDefault(),
target_bitrate_bps_.value_or(0))) {
return;
}
// Try scale up framerate, if higher.
int fps = balanced_settings_.MaxFps(GetVideoCodecTypeOrGeneric(),
- *last_frame_pixel_count_);
+ LastInputFrameSizeOrDefault());
if (source_restrictor_->IncreaseFramerate(fps)) {
GetAdaptCounter().DecrementFramerate(reason, fps);
// Reset framerate in case of fewer fps steps down than up.
@@ -536,7 +541,7 @@
// Check if resolution should be increased based on bitrate.
if (reason == kQuality &&
!balanced_settings_.CanAdaptUpResolution(
- GetVideoCodecTypeOrGeneric(), *last_frame_pixel_count_,
+ GetVideoCodecTypeOrGeneric(), LastInputFrameSizeOrDefault(),
target_bitrate_bps_.value_or(0))) {
return;
}
@@ -547,7 +552,7 @@
// Check if resolution should be increased based on bitrate and
// limits specified by encoder capabilities.
if (reason == kQuality &&
- !CanAdaptUpResolution(*last_frame_pixel_count_,
+ !CanAdaptUpResolution(LastInputFrameSizeOrDefault(),
target_bitrate_bps_.value_or(0))) {
return;
}
@@ -599,7 +604,8 @@
if (!has_input_video_)
return false;
AdaptationRequest adaptation_request = {
- *last_frame_pixel_count_, encoder_stats_observer_->GetInputFrameRate(),
+ LastInputFrameSizeOrDefault(),
+ encoder_stats_observer_->GetInputFrameRate(),
AdaptationRequest::Mode::kAdaptDown};
bool downgrade_requested =
@@ -641,12 +647,12 @@
case DegradationPreference::BALANCED: {
// Try scale down framerate, if lower.
int fps = balanced_settings_.MinFps(GetVideoCodecTypeOrGeneric(),
- *last_frame_pixel_count_);
+ LastInputFrameSizeOrDefault());
if (source_restrictor_->RestrictFramerate(fps)) {
GetAdaptCounter().IncrementFramerate(reason);
// Check if requested fps is higher (or close to) input fps.
absl::optional<int> min_diff =
- balanced_settings_.MinFpsDiff(*last_frame_pixel_count_);
+ balanced_settings_.MinFpsDiff(LastInputFrameSizeOrDefault());
if (min_diff && adaptation_request.framerate_fps_ > 0) {
int fps_diff = adaptation_request.framerate_fps_ - fps;
if (fps_diff < min_diff.value()) {
@@ -709,6 +715,20 @@
: kVideoCodecGeneric;
}
+int OveruseFrameDetectorResourceAdaptationModule::LastInputFrameSizeOrDefault()
+ const {
+ // The dependency on this hardcoded resolution is inherited from old code,
+ // which used this resolution as a stand-in for not knowing the resolution
+ // yet.
+ // TODO(hbos): Can we simply DCHECK has_value() before usage instead? Having a
+ // DCHECK passed all the tests but adding it does change the requirements of
+ // this class (= not being allowed to call AdaptUp() or AdaptDown() before
+ // OnFrame()) and deserves a standalone CL.
+ return last_input_frame_size_.value_or(
+ VideoStreamEncoder::kDefaultLastFrameInfoWidth *
+ VideoStreamEncoder::kDefaultLastFrameInfoHeight);
+}
+
void OveruseFrameDetectorResourceAdaptationModule::
MaybeUpdateVideoSourceRestrictions() {
VideoSourceRestrictions new_restrictions = ApplyDegradationPreference(
@@ -821,9 +841,8 @@
absl::optional<VideoEncoder::QpThresholds>
OveruseFrameDetectorResourceAdaptationModule::GetQpThresholds() const {
- RTC_DCHECK(last_frame_pixel_count_.has_value());
return balanced_settings_.GetQpThresholds(GetVideoCodecTypeOrGeneric(),
- last_frame_pixel_count_.value());
+ LastInputFrameSizeOrDefault());
}
bool OveruseFrameDetectorResourceAdaptationModule::CanAdaptUpResolution(
diff --git a/video/overuse_frame_detector_resource_adaptation_module.h b/video/overuse_frame_detector_resource_adaptation_module.h
index f4080bd..7c63b80 100644
--- a/video/overuse_frame_detector_resource_adaptation_module.h
+++ b/video/overuse_frame_detector_resource_adaptation_module.h
@@ -77,23 +77,15 @@
absl::optional<uint32_t> target_bitrate_bps) override;
void ResetVideoSourceRestrictions() override;
- // Input to the OveruseFrameDetector, which are required for this module to
- // function. These map to OveruseFrameDetector methods.
- // TODO(hbos): Define virtual methods in ResourceAdaptationModuleInterface
- // for input that are more generic so that this class can be used without
- // assumptions about underlying implementation.
- void FrameCaptured(const VideoFrame& frame, int64_t time_when_first_seen_us);
- void FrameSent(uint32_t timestamp,
- int64_t time_sent_in_us,
- int64_t capture_time_us,
- absl::optional<int> encode_duration_us);
- void FrameDroppedDueToSize();
+ void OnFrame(const VideoFrame& frame) override;
+ void OnFrameDroppedDueToSize() override;
+ void OnEncodeStarted(const VideoFrame& cropped_frame,
+ int64_t time_when_first_seen_us) override;
+ void OnEncodeCompleted(uint32_t timestamp,
+ int64_t time_sent_in_us,
+ int64_t capture_time_us,
+ absl::optional<int> encode_duration_us) override;
- // Various other settings and feedback mechanisms.
- // TODO(hbos): Find a common interface that would make sense for a generic
- // resource adaptation module. Unify code paths where possible. Do we really
- // need this many public methods?
- void SetLastFramePixelCount(absl::optional<int> last_frame_pixel_count);
// Inform the detector whether or not the quality scaler is enabled. This
// helps GetActiveCounts() return absl::nullopt when appropriate.
// TODO(hbos): This feels really hacky, can we report the right values without
@@ -178,6 +170,7 @@
};
VideoCodecType GetVideoCodecTypeOrGeneric() const;
+ int LastInputFrameSizeOrDefault() const;
// Makes |video_source_restrictions_| up-to-date and informs the
// |adaptation_listener_| if restrictions are changed, allowing the listener
@@ -210,11 +203,11 @@
// Stores a snapshot of the last adaptation request triggered by an AdaptUp
// or AdaptDown signal.
absl::optional<AdaptationRequest> last_adaptation_request_;
- absl::optional<int> last_frame_pixel_count_;
// Keeps track of source restrictions that this adaptation module outputs.
const std::unique_ptr<VideoSourceRestrictor> source_restrictor_;
const std::unique_ptr<OveruseFrameDetector> overuse_detector_;
bool overuse_detector_is_started_;
+ absl::optional<int> last_input_frame_size_;
absl::optional<double> target_frame_rate_;
absl::optional<uint32_t> target_bitrate_bps_;
bool is_quality_scaler_enabled_;
diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc
index 68f24e6..a69eb04 100644
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@@ -226,6 +226,9 @@
return absl::nullopt;
}
+const int VideoStreamEncoder::kDefaultLastFrameInfoWidth = 176;
+const int VideoStreamEncoder::kDefaultLastFrameInfoHeight = 144;
+
VideoStreamEncoder::EncoderRateSettings::EncoderRateSettings()
: rate_control(),
encoder_target(DataRate::Zero()),
@@ -441,9 +444,8 @@
codec_info_ = settings_.encoder_factory->QueryVideoEncoder(
encoder_config_.video_format);
if (HasInternalSource()) {
- last_frame_info_ = VideoFrameInfo(176, 144, false);
- resource_adaptation_module_->SetLastFramePixelCount(
- last_frame_info_->pixel_count());
+ last_frame_info_ = VideoFrameInfo(
+ kDefaultLastFrameInfoWidth, kDefaultLastFrameInfoHeight, false);
ReconfigureEncoder();
}
}
@@ -1063,6 +1065,7 @@
void VideoStreamEncoder::MaybeEncodeVideoFrame(const VideoFrame& video_frame,
int64_t time_when_posted_us) {
RTC_DCHECK_RUN_ON(&encoder_queue_);
+ resource_adaptation_module_->OnFrame(video_frame);
if (!last_frame_info_ || video_frame.width() != last_frame_info_->width ||
video_frame.height() != last_frame_info_->height ||
@@ -1070,8 +1073,6 @@
pending_encoder_reconfiguration_ = true;
last_frame_info_ = VideoFrameInfo(video_frame.width(), video_frame.height(),
video_frame.is_texture());
- resource_adaptation_module_->SetLastFramePixelCount(
- last_frame_info_->pixel_count());
RTC_LOG(LS_INFO) << "Video frame parameters changed: dimensions="
<< last_frame_info_->width << "x"
<< last_frame_info_->height
@@ -1126,7 +1127,7 @@
if (DropDueToSize(video_frame.size())) {
RTC_LOG(LS_INFO) << "Dropping frame. Too large for target bitrate.";
- resource_adaptation_module_->FrameDroppedDueToSize();
+ resource_adaptation_module_->OnFrameDroppedDueToSize();
++initial_framedrop_;
// Storing references to a native buffer risks blocking frame capture.
if (video_frame.video_frame_buffer()->type() !=
@@ -1335,7 +1336,7 @@
TRACE_EVENT_ASYNC_STEP0("webrtc", "Video", video_frame.render_time_ms(),
"Encode");
- resource_adaptation_module_->FrameCaptured(out_frame, time_when_posted_us);
+ resource_adaptation_module_->OnEncodeStarted(out_frame, time_when_posted_us);
RTC_DCHECK_LE(send_codec_.width, out_frame.width());
RTC_DCHECK_LE(send_codec_.height, out_frame.height());
@@ -1803,7 +1804,7 @@
}
}
- resource_adaptation_module_->FrameSent(
+ resource_adaptation_module_->OnEncodeCompleted(
encoded_image.Timestamp(), time_sent_us,
encoded_image.capture_time_ms_ * rtc::kNumMicrosecsPerMillisec,
encode_duration_us);
diff --git a/video/video_stream_encoder.h b/video/video_stream_encoder.h
index a905420..ac73cd5 100644
--- a/video/video_stream_encoder.h
+++ b/video/video_stream_encoder.h
@@ -64,6 +64,14 @@
private EncodedImageCallback,
public ResourceAdaptationModuleListener {
public:
+ // If the encoder is reconfigured with a source, but we've yet to receive any
+ // frames, this 144p resolution is picked as the default value of
+ // |last_frame_size_|.
+ // TODO(hbos): Can we avoid guesses and properly handle the case of
+ // |last_frame_info_| not having a value, deleting these constants?
+ static const int kDefaultLastFrameInfoWidth;
+ static const int kDefaultLastFrameInfoHeight;
+
VideoStreamEncoder(Clock* clock,
uint32_t number_of_cores,
VideoStreamEncoderObserver* encoder_stats_observer,