[Overuse] Encoding pipeline as input signals in the abstract interface.

This defines the following methods:
- OnFrame(), replaces SetLastFramePixelCount().
- OnFrameDroppedDueToSize(), a rename of FrameDroppedDueToSize() to
  match the other methods.
- OnEncodeStarted(), a rename of the incorrectly named FrameCaptured().
- OnEncodeCompleted(), a rename of the poorly named FrameSent().

In order to get rid of SetLastFramePixelCount(), the "we don't know the
frame size" use case - which was previously implicitly avoided by
invoking SetLastFramePixelCount() with a made-up value for
last_frame_info_ - is now avoided using ".value_or()" in
LastInputFrameSizeOrDefault(). This does mean that a constant 144p
resolution value is referenced in two places, but the fact that this is
a magic value is at least made explicit. This may help future
improvements.

Bug: webrtc:11222
Change-Id: I3b28daa8c5ecf57c6537957d4759f15e24bb2234
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/166961
Commit-Queue: Henrik Boström <hbos@webrtc.org>
Reviewed-by: Evan Shrubsole <eshr@google.com>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30352}
diff --git a/call/adaptation/BUILD.gn b/call/adaptation/BUILD.gn
index 99b3f16..10e8cc6 100644
--- a/call/adaptation/BUILD.gn
+++ b/call/adaptation/BUILD.gn
@@ -25,6 +25,7 @@
   ]
   deps = [
     "../../api:rtp_parameters",
+    "../../api/video:video_frame",
     "../../api/video_codecs:video_codecs_api",
     "../../rtc_base:checks",
     "../../rtc_base:rtc_base_approved",
diff --git a/call/adaptation/resource_adaptation_module_interface.h b/call/adaptation/resource_adaptation_module_interface.h
index 61380b9..3a3deb2 100644
--- a/call/adaptation/resource_adaptation_module_interface.h
+++ b/call/adaptation/resource_adaptation_module_interface.h
@@ -13,6 +13,7 @@
 
 #include "absl/types/optional.h"
 #include "api/rtp_parameters.h"
+#include "api/video/video_frame.h"
 #include "api/video_codecs/video_encoder.h"
 #include "api/video_codecs/video_encoder_config.h"
 #include "call/adaptation/video_source_restrictions.h"
@@ -91,6 +92,40 @@
   // TODO(hbos): It's not clear why anybody should be able to tell the module to
   // reset like this; can we get rid of this method?
   virtual void ResetVideoSourceRestrictions() = 0;
+
+  // The following methods correspond to the pipeline that a frame goes through.
+  // Note that if the encoder is parallelized, multiple frames may be processed
+  // in parallel and methods may be invoked in unexpected orders.
+  //
+  // The implementation must not retain VideoFrames. Doing so may keep video
+  // frame buffers alive - this may even stall encoding.
+  // TODO(hbos): Can we replace VideoFrame with a different struct, maybe width
+  // and height is enough, and some sort of way to identify it at each step?
+
+  // 1. A frame is delivered to the encoder, e.g. from the camera. Next up: it
+  // may get dropped or it may get encoded, see OnFrameDroppedDueToSize() and
+  // OnEncodeStarted().
+  virtual void OnFrame(const VideoFrame& frame) = 0;
+  // 2.i) An input frame was dropped because its resolution is too big (e.g. for
+  // the target bitrate). This frame will not continue through the rest of the
+  // pipeline. The module should adapt down in resolution to avoid subsequent
+  // frames getting dropped for the same reason.
+  // TODO(hbos): If we take frame rate into account perhaps it would be valid to
+  // adapt down in frame rate as well.
+  virtual void OnFrameDroppedDueToSize() = 0;
+  // 2.ii) An input frame is about to be encoded. It may have been cropped and
+  // have different dimensions than what was observed at OnFrame(). Next
+  // up: encoding completes or fails, see OnEncodeCompleted(). There is
+  // currently no signal for encode failure.
+  virtual void OnEncodeStarted(const VideoFrame& cropped_frame,
+                               int64_t time_when_first_seen_us) = 0;
+  // 3. The frame has successfully completed encoding. Next up: The encoded
+  // frame is dropped or packetized and sent over the network. There is
+  // currently no signal what happens beyond this point.
+  virtual void OnEncodeCompleted(uint32_t timestamp,
+                                 int64_t time_sent_in_us,
+                                 int64_t capture_time_us,
+                                 absl::optional<int> encode_duration_us) = 0;
 };
 
 }  // namespace webrtc
diff --git a/video/overuse_frame_detector_resource_adaptation_module.cc b/video/overuse_frame_detector_resource_adaptation_module.cc
index 1ba33e1..baadb98 100644
--- a/video/overuse_frame_detector_resource_adaptation_module.cc
+++ b/video/overuse_frame_detector_resource_adaptation_module.cc
@@ -352,10 +352,10 @@
       adapt_counters_(),
       balanced_settings_(),
       last_adaptation_request_(absl::nullopt),
-      last_frame_pixel_count_(absl::nullopt),
       source_restrictor_(std::make_unique<VideoSourceRestrictor>()),
       overuse_detector_(std::move(overuse_detector)),
       overuse_detector_is_started_(false),
+      last_input_frame_size_(absl::nullopt),
       target_frame_rate_(absl::nullopt),
       target_bitrate_bps_(absl::nullopt),
       is_quality_scaler_enabled_(false),
@@ -439,22 +439,12 @@
   MaybeUpdateVideoSourceRestrictions();
 }
 
-void OveruseFrameDetectorResourceAdaptationModule::FrameCaptured(
-    const VideoFrame& frame,
-    int64_t time_when_first_seen_us) {
-  overuse_detector_->FrameCaptured(frame, time_when_first_seen_us);
+void OveruseFrameDetectorResourceAdaptationModule::OnFrame(
+    const VideoFrame& frame) {
+  last_input_frame_size_ = frame.size();
 }
 
-void OveruseFrameDetectorResourceAdaptationModule::FrameSent(
-    uint32_t timestamp,
-    int64_t time_sent_in_us,
-    int64_t capture_time_us,
-    absl::optional<int> encode_duration_us) {
-  overuse_detector_->FrameSent(timestamp, time_sent_in_us, capture_time_us,
-                               encode_duration_us);
-}
-
-void OveruseFrameDetectorResourceAdaptationModule::FrameDroppedDueToSize() {
+void OveruseFrameDetectorResourceAdaptationModule::OnFrameDroppedDueToSize() {
   int fps_count = GetConstAdaptCounter().FramerateCount(
       AdaptationObserverInterface::AdaptReason::kQuality);
   int res_count = GetConstAdaptCounter().ResolutionCount(
@@ -472,9 +462,23 @@
   }
 }
 
-void OveruseFrameDetectorResourceAdaptationModule::SetLastFramePixelCount(
-    absl::optional<int> last_frame_pixel_count) {
-  last_frame_pixel_count_ = last_frame_pixel_count;
+void OveruseFrameDetectorResourceAdaptationModule::OnEncodeStarted(
+    const VideoFrame& cropped_frame,
+    int64_t time_when_first_seen_us) {
+  // TODO(hbos): Rename FrameCaptured() to something more appropriate (e.g.
+  // "OnEncodeStarted"?) or revise usage.
+  overuse_detector_->FrameCaptured(cropped_frame, time_when_first_seen_us);
+}
+
+void OveruseFrameDetectorResourceAdaptationModule::OnEncodeCompleted(
+    uint32_t timestamp,
+    int64_t time_sent_in_us,
+    int64_t capture_time_us,
+    absl::optional<int> encode_duration_us) {
+  // TODO(hbos): Rename FrameSent() to something more appropriate (e.g.
+  // "OnEncodeCompleted"?).
+  overuse_detector_->FrameSent(timestamp, time_sent_in_us, capture_time_us,
+                               encode_duration_us);
 }
 
 void OveruseFrameDetectorResourceAdaptationModule::SetIsQualityScalerEnabled(
@@ -492,7 +496,8 @@
   RTC_DCHECK_GT(num_downgrades, 0);
 
   AdaptationRequest adaptation_request = {
-      *last_frame_pixel_count_, encoder_stats_observer_->GetInputFrameRate(),
+      LastInputFrameSizeOrDefault(),
+      encoder_stats_observer_->GetInputFrameRate(),
       AdaptationRequest::Mode::kAdaptUp};
 
   bool adapt_up_requested =
@@ -515,13 +520,13 @@
       // Check if quality should be increased based on bitrate.
       if (reason == kQuality &&
           !balanced_settings_.CanAdaptUp(GetVideoCodecTypeOrGeneric(),
-                                         *last_frame_pixel_count_,
+                                         LastInputFrameSizeOrDefault(),
                                          target_bitrate_bps_.value_or(0))) {
         return;
       }
       // Try scale up framerate, if higher.
       int fps = balanced_settings_.MaxFps(GetVideoCodecTypeOrGeneric(),
-                                          *last_frame_pixel_count_);
+                                          LastInputFrameSizeOrDefault());
       if (source_restrictor_->IncreaseFramerate(fps)) {
         GetAdaptCounter().DecrementFramerate(reason, fps);
         // Reset framerate in case of fewer fps steps down than up.
@@ -536,7 +541,7 @@
       // Check if resolution should be increased based on bitrate.
       if (reason == kQuality &&
           !balanced_settings_.CanAdaptUpResolution(
-              GetVideoCodecTypeOrGeneric(), *last_frame_pixel_count_,
+              GetVideoCodecTypeOrGeneric(), LastInputFrameSizeOrDefault(),
               target_bitrate_bps_.value_or(0))) {
         return;
       }
@@ -547,7 +552,7 @@
       // Check if resolution should be increased based on bitrate and
       // limits specified by encoder capabilities.
       if (reason == kQuality &&
-          !CanAdaptUpResolution(*last_frame_pixel_count_,
+          !CanAdaptUpResolution(LastInputFrameSizeOrDefault(),
                                 target_bitrate_bps_.value_or(0))) {
         return;
       }
@@ -599,7 +604,8 @@
   if (!has_input_video_)
     return false;
   AdaptationRequest adaptation_request = {
-      *last_frame_pixel_count_, encoder_stats_observer_->GetInputFrameRate(),
+      LastInputFrameSizeOrDefault(),
+      encoder_stats_observer_->GetInputFrameRate(),
       AdaptationRequest::Mode::kAdaptDown};
 
   bool downgrade_requested =
@@ -641,12 +647,12 @@
     case DegradationPreference::BALANCED: {
       // Try scale down framerate, if lower.
       int fps = balanced_settings_.MinFps(GetVideoCodecTypeOrGeneric(),
-                                          *last_frame_pixel_count_);
+                                          LastInputFrameSizeOrDefault());
       if (source_restrictor_->RestrictFramerate(fps)) {
         GetAdaptCounter().IncrementFramerate(reason);
         // Check if requested fps is higher (or close to) input fps.
         absl::optional<int> min_diff =
-            balanced_settings_.MinFpsDiff(*last_frame_pixel_count_);
+            balanced_settings_.MinFpsDiff(LastInputFrameSizeOrDefault());
         if (min_diff && adaptation_request.framerate_fps_ > 0) {
           int fps_diff = adaptation_request.framerate_fps_ - fps;
           if (fps_diff < min_diff.value()) {
@@ -709,6 +715,20 @@
              : kVideoCodecGeneric;
 }
 
+int OveruseFrameDetectorResourceAdaptationModule::LastInputFrameSizeOrDefault()
+    const {
+  // The dependency on this hardcoded resolution is inherited from old code,
+  // which used this resolution as a stand-in for not knowing the resolution
+  // yet.
+  // TODO(hbos): Can we simply DCHECK has_value() before usage instead? Having a
+  // DCHECK passed all the tests but adding it does change the requirements of
+  // this class (= not being allowed to call AdaptUp() or AdaptDown() before
+  // OnFrame()) and deserves a standalone CL.
+  return last_input_frame_size_.value_or(
+      VideoStreamEncoder::kDefaultLastFrameInfoWidth *
+      VideoStreamEncoder::kDefaultLastFrameInfoHeight);
+}
+
 void OveruseFrameDetectorResourceAdaptationModule::
     MaybeUpdateVideoSourceRestrictions() {
   VideoSourceRestrictions new_restrictions = ApplyDegradationPreference(
@@ -821,9 +841,8 @@
 
 absl::optional<VideoEncoder::QpThresholds>
 OveruseFrameDetectorResourceAdaptationModule::GetQpThresholds() const {
-  RTC_DCHECK(last_frame_pixel_count_.has_value());
   return balanced_settings_.GetQpThresholds(GetVideoCodecTypeOrGeneric(),
-                                            last_frame_pixel_count_.value());
+                                            LastInputFrameSizeOrDefault());
 }
 
 bool OveruseFrameDetectorResourceAdaptationModule::CanAdaptUpResolution(
diff --git a/video/overuse_frame_detector_resource_adaptation_module.h b/video/overuse_frame_detector_resource_adaptation_module.h
index f4080bd..7c63b80 100644
--- a/video/overuse_frame_detector_resource_adaptation_module.h
+++ b/video/overuse_frame_detector_resource_adaptation_module.h
@@ -77,23 +77,15 @@
       absl::optional<uint32_t> target_bitrate_bps) override;
   void ResetVideoSourceRestrictions() override;
 
-  // Input to the OveruseFrameDetector, which are required for this module to
-  // function. These map to OveruseFrameDetector methods.
-  // TODO(hbos): Define virtual methods in ResourceAdaptationModuleInterface
-  // for input that are more generic so that this class can be used without
-  // assumptions about underlying implementation.
-  void FrameCaptured(const VideoFrame& frame, int64_t time_when_first_seen_us);
-  void FrameSent(uint32_t timestamp,
-                 int64_t time_sent_in_us,
-                 int64_t capture_time_us,
-                 absl::optional<int> encode_duration_us);
-  void FrameDroppedDueToSize();
+  void OnFrame(const VideoFrame& frame) override;
+  void OnFrameDroppedDueToSize() override;
+  void OnEncodeStarted(const VideoFrame& cropped_frame,
+                       int64_t time_when_first_seen_us) override;
+  void OnEncodeCompleted(uint32_t timestamp,
+                         int64_t time_sent_in_us,
+                         int64_t capture_time_us,
+                         absl::optional<int> encode_duration_us) override;
 
-  // Various other settings and feedback mechanisms.
-  // TODO(hbos): Find a common interface that would make sense for a generic
-  // resource adaptation module. Unify code paths where possible. Do we really
-  // need this many public methods?
-  void SetLastFramePixelCount(absl::optional<int> last_frame_pixel_count);
   // Inform the detector whether or not the quality scaler is enabled. This
   // helps GetActiveCounts() return absl::nullopt when appropriate.
   // TODO(hbos): This feels really hacky, can we report the right values without
@@ -178,6 +170,7 @@
   };
 
   VideoCodecType GetVideoCodecTypeOrGeneric() const;
+  int LastInputFrameSizeOrDefault() const;
 
   // Makes |video_source_restrictions_| up-to-date and informs the
   // |adaptation_listener_| if restrictions are changed, allowing the listener
@@ -210,11 +203,11 @@
   // Stores a snapshot of the last adaptation request triggered by an AdaptUp
   // or AdaptDown signal.
   absl::optional<AdaptationRequest> last_adaptation_request_;
-  absl::optional<int> last_frame_pixel_count_;
   // Keeps track of source restrictions that this adaptation module outputs.
   const std::unique_ptr<VideoSourceRestrictor> source_restrictor_;
   const std::unique_ptr<OveruseFrameDetector> overuse_detector_;
   bool overuse_detector_is_started_;
+  absl::optional<int> last_input_frame_size_;
   absl::optional<double> target_frame_rate_;
   absl::optional<uint32_t> target_bitrate_bps_;
   bool is_quality_scaler_enabled_;
diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc
index 68f24e6..a69eb04 100644
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@@ -226,6 +226,9 @@
   return absl::nullopt;
 }
 
+const int VideoStreamEncoder::kDefaultLastFrameInfoWidth = 176;
+const int VideoStreamEncoder::kDefaultLastFrameInfoHeight = 144;
+
 VideoStreamEncoder::EncoderRateSettings::EncoderRateSettings()
     : rate_control(),
       encoder_target(DataRate::Zero()),
@@ -441,9 +444,8 @@
           codec_info_ = settings_.encoder_factory->QueryVideoEncoder(
               encoder_config_.video_format);
           if (HasInternalSource()) {
-            last_frame_info_ = VideoFrameInfo(176, 144, false);
-            resource_adaptation_module_->SetLastFramePixelCount(
-                last_frame_info_->pixel_count());
+            last_frame_info_ = VideoFrameInfo(
+                kDefaultLastFrameInfoWidth, kDefaultLastFrameInfoHeight, false);
             ReconfigureEncoder();
           }
         }
@@ -1063,6 +1065,7 @@
 void VideoStreamEncoder::MaybeEncodeVideoFrame(const VideoFrame& video_frame,
                                                int64_t time_when_posted_us) {
   RTC_DCHECK_RUN_ON(&encoder_queue_);
+  resource_adaptation_module_->OnFrame(video_frame);
 
   if (!last_frame_info_ || video_frame.width() != last_frame_info_->width ||
       video_frame.height() != last_frame_info_->height ||
@@ -1070,8 +1073,6 @@
     pending_encoder_reconfiguration_ = true;
     last_frame_info_ = VideoFrameInfo(video_frame.width(), video_frame.height(),
                                       video_frame.is_texture());
-    resource_adaptation_module_->SetLastFramePixelCount(
-        last_frame_info_->pixel_count());
     RTC_LOG(LS_INFO) << "Video frame parameters changed: dimensions="
                      << last_frame_info_->width << "x"
                      << last_frame_info_->height
@@ -1126,7 +1127,7 @@
 
   if (DropDueToSize(video_frame.size())) {
     RTC_LOG(LS_INFO) << "Dropping frame. Too large for target bitrate.";
-    resource_adaptation_module_->FrameDroppedDueToSize();
+    resource_adaptation_module_->OnFrameDroppedDueToSize();
     ++initial_framedrop_;
     // Storing references to a native buffer risks blocking frame capture.
     if (video_frame.video_frame_buffer()->type() !=
@@ -1335,7 +1336,7 @@
   TRACE_EVENT_ASYNC_STEP0("webrtc", "Video", video_frame.render_time_ms(),
                           "Encode");
 
-  resource_adaptation_module_->FrameCaptured(out_frame, time_when_posted_us);
+  resource_adaptation_module_->OnEncodeStarted(out_frame, time_when_posted_us);
 
   RTC_DCHECK_LE(send_codec_.width, out_frame.width());
   RTC_DCHECK_LE(send_codec_.height, out_frame.height());
@@ -1803,7 +1804,7 @@
     }
   }
 
-  resource_adaptation_module_->FrameSent(
+  resource_adaptation_module_->OnEncodeCompleted(
       encoded_image.Timestamp(), time_sent_us,
       encoded_image.capture_time_ms_ * rtc::kNumMicrosecsPerMillisec,
       encode_duration_us);
diff --git a/video/video_stream_encoder.h b/video/video_stream_encoder.h
index a905420..ac73cd5 100644
--- a/video/video_stream_encoder.h
+++ b/video/video_stream_encoder.h
@@ -64,6 +64,14 @@
                            private EncodedImageCallback,
                            public ResourceAdaptationModuleListener {
  public:
+  // If the encoder is reconfigured with a source, but we've yet to receive any
+  // frames, this 144p resolution is picked as the default value of
+  // |last_frame_size_|.
+  // TODO(hbos): Can we avoid guesses and properly handle the case of
+  // |last_frame_info_| not having a value, deleting these constants?
+  static const int kDefaultLastFrameInfoWidth;
+  static const int kDefaultLastFrameInfoHeight;
+
   VideoStreamEncoder(Clock* clock,
                      uint32_t number_of_cores,
                      VideoStreamEncoderObserver* encoder_stats_observer,