Implement automatic animation detection in VideoStreamEncoder

If WebRTC-AutomaticAnimationDetectionScreenshare experiment is enabled,
content type is screenshare and degradation preference is BALANCED,
then input resolution is restricted if update_rect of the incoming frames
is the same for considerable amount of time and is big enough.

This entails treating BALANCED degradation preference for screenshare as
MAINTAIN_RESOLUTION in adaptation logic.

Bug: webrtc:11058
Change-Id: I903dddf53fcbd7c8eac6c5b1447225b15fd8fe5f
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/161097
Reviewed-by: Erik Språng <sprang@webrtc.org>
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30002}
diff --git a/test/test_video_capturer.cc b/test/test_video_capturer.cc
index 6d6db8d..c0d575d 100644
--- a/test/test_video_capturer.cc
+++ b/test/test_video_capturer.cc
@@ -39,15 +39,24 @@
   if (out_height != frame.height() || out_width != frame.width()) {
     // Video adapter has requested a down-scale. Allocate a new buffer and
     // return scaled version.
+    // For simplicity, only scale here without cropping.
     rtc::scoped_refptr<I420Buffer> scaled_buffer =
         I420Buffer::Create(out_width, out_height);
     scaled_buffer->ScaleFrom(*frame.video_frame_buffer()->ToI420());
-    broadcaster_.OnFrame(VideoFrame::Builder()
-                             .set_video_frame_buffer(scaled_buffer)
-                             .set_rotation(kVideoRotation_0)
-                             .set_timestamp_us(frame.timestamp_us())
-                             .set_id(frame.id())
-                             .build());
+    VideoFrame::Builder new_frame_builder =
+        VideoFrame::Builder()
+            .set_video_frame_buffer(scaled_buffer)
+            .set_rotation(kVideoRotation_0)
+            .set_timestamp_us(frame.timestamp_us())
+            .set_id(frame.id());
+    if (frame.has_update_rect()) {
+      VideoFrame::UpdateRect new_rect = frame.update_rect().ScaleWithFrame(
+          frame.width(), frame.height(), 0, 0, frame.width(), frame.height(),
+          out_width, out_height);
+      new_frame_builder.set_update_rect(new_rect);
+    }
+    broadcaster_.OnFrame(new_frame_builder.build());
+
   } else {
     // No adaptations needed, just return the frame as is.
     broadcaster_.OnFrame(frame);
diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc
index dc3bc16..7879522 100644
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@@ -68,6 +68,9 @@
 
 const int64_t kParameterUpdateIntervalMs = 1000;
 
+// Animation is capped to 720p.
+constexpr int kMaxAnimationPixels = 1280 * 720;
+
 uint32_t abs_diff(uint32_t a, uint32_t b) {
   return (a < b) ? b - a : a - b;
 }
@@ -219,7 +222,8 @@
       : video_stream_encoder_(video_stream_encoder),
         degradation_preference_(DegradationPreference::DISABLED),
         source_(nullptr),
-        max_framerate_(std::numeric_limits<int>::max()) {}
+        max_framerate_(std::numeric_limits<int>::max()),
+        max_pixels_(std::numeric_limits<int>::max()) {}
 
   void SetSource(rtc::VideoSourceInterface<VideoFrame>* source,
                  const DegradationPreference& degradation_preference) {
@@ -407,6 +411,22 @@
     return true;
   }
 
+  // Used in automatic animation detection for screenshare.
+  bool RestrictPixels(int max_pixels) {
+    // Called on the encoder task queue.
+    rtc::CritScope lock(&crit_);
+    if (!source_ || !IsResolutionScalingEnabled(degradation_preference_)) {
+      // This can happen since |degradation_preference_| is set on libjingle's
+      // worker thread but the adaptation is done on the encoder task queue.
+      return false;
+    }
+    max_pixels_ = max_pixels;
+    RTC_LOG(LS_INFO) << "Applying max pixel restriction: " << max_pixels;
+    source_->AddOrUpdateSink(video_stream_encoder_,
+                             GetActiveSinkWantsInternal());
+    return true;
+  }
+
  private:
   rtc::VideoSinkWants GetActiveSinkWantsInternal()
       RTC_EXCLUSIVE_LOCKS_REQUIRED(&crit_) {
@@ -430,6 +450,9 @@
     }
     // Limit to configured max framerate.
     wants.max_framerate_fps = std::min(max_framerate_, wants.max_framerate_fps);
+    // Limit resolution due to automatic animation detection for screenshare.
+    wants.max_pixel_count = std::min(max_pixels_, wants.max_pixel_count);
+
     return wants;
   }
 
@@ -440,6 +463,7 @@
   DegradationPreference degradation_preference_ RTC_GUARDED_BY(&crit_);
   rtc::VideoSourceInterface<VideoFrame>* source_ RTC_GUARDED_BY(&crit_);
   int max_framerate_ RTC_GUARDED_BY(&crit_);
+  int max_pixels_ RTC_GUARDED_BY(&crit_);
 
   RTC_DISALLOW_COPY_AND_ASSIGN(VideoSourceProxy);
 };
@@ -519,6 +543,9 @@
       pending_frame_post_time_us_(0),
       accumulated_update_rect_{0, 0, 0, 0},
       accumulated_update_rect_is_valid_(true),
+      animation_start_time_(Timestamp::PlusInfinity()),
+      cap_resolution_due_to_video_content_(false),
+      expect_resize_state_(ExpectResizeState::kNoResize),
       bitrate_observer_(nullptr),
       fec_controller_override_(nullptr),
       force_disable_frame_dropper_(false),
@@ -529,6 +556,8 @@
       experiment_groups_(GetExperimentGroups()),
       next_frame_id_(0),
       encoder_switch_experiment_(ParseEncoderSwitchFieldTrial()),
+      automatic_animation_detection_experiment_(
+          ParseAutomatincAnimationDetectionFieldTrial()),
       encoder_switch_requested_(false),
       encoder_queue_(task_queue_factory->CreateTaskQueue(
           "EncoderQueue",
@@ -1114,6 +1143,7 @@
         const int posted_frames_waiting_for_encode =
             posted_frames_waiting_for_encode_.fetch_sub(1);
         RTC_DCHECK_GT(posted_frames_waiting_for_encode, 0);
+        CheckForAnimatedContent(incoming_frame, post_time_us);
         if (posted_frames_waiting_for_encode == 1) {
           MaybeEncodeVideoFrame(incoming_frame, post_time_us);
         } else {
@@ -1951,7 +1981,7 @@
 
   bool did_adapt = true;
 
-  switch (degradation_preference_) {
+  switch (EffectiveDegradataionPreference()) {
     case DegradationPreference::BALANCED:
       break;
     case DegradationPreference::MAINTAIN_FRAMERATE:
@@ -1980,7 +2010,7 @@
       return true;
   }
 
-  switch (degradation_preference_) {
+  switch (EffectiveDegradataionPreference()) {
     case DegradationPreference::BALANCED: {
       // Try scale down framerate, if lower.
       int fps = balanced_settings_.MinFps(encoder_config_.codec_type,
@@ -2057,7 +2087,8 @@
       last_adaptation_request_ &&
       last_adaptation_request_->mode_ == AdaptationRequest::Mode::kAdaptUp;
 
-  if (degradation_preference_ == DegradationPreference::MAINTAIN_FRAMERATE) {
+  if (EffectiveDegradataionPreference() ==
+      DegradationPreference::MAINTAIN_FRAMERATE) {
     if (adapt_up_requested &&
         adaptation_request.input_pixel_count_ <=
             last_adaptation_request_->input_pixel_count_) {
@@ -2067,7 +2098,7 @@
     }
   }
 
-  switch (degradation_preference_) {
+  switch (EffectiveDegradataionPreference()) {
     case DegradationPreference::BALANCED: {
       // Check if quality should be increased based on bitrate.
       if (reason == kQuality &&
@@ -2494,4 +2525,106 @@
   return result;
 }
 
+VideoStreamEncoder::AutomaticAnimationDetectionExperiment
+VideoStreamEncoder::ParseAutomatincAnimationDetectionFieldTrial() const {
+  AutomaticAnimationDetectionExperiment result;
+
+  result.Parser()->Parse(webrtc::field_trial::FindFullName(
+      "WebRTC-AutomaticAnimationDetectionScreenshare"));
+
+  if (!result.enabled) {
+    RTC_LOG(LS_INFO) << "Automatic animation detection experiment is disabled.";
+    return result;
+  }
+
+  RTC_LOG(LS_INFO) << "Automatic animation detection experiment settings:"
+                   << " min_duration_ms=" << result.min_duration_ms
+                   << " min_area_ration=" << result.min_area_ratio
+                   << " min_fps=" << result.min_fps;
+
+  return result;
+}
+
+void VideoStreamEncoder::CheckForAnimatedContent(
+    const VideoFrame& frame,
+    int64_t time_when_posted_in_us) {
+  if (!automatic_animation_detection_experiment_.enabled ||
+      encoder_config_.content_type !=
+          VideoEncoderConfig::ContentType::kScreen ||
+      degradation_preference_ != DegradationPreference::BALANCED) {
+    return;
+  }
+
+  if (expect_resize_state_ == ExpectResizeState::kResize && last_frame_info_ &&
+      last_frame_info_->width != frame.width() &&
+      last_frame_info_->height != frame.height()) {
+    // On applying resolution cap there will be one frame with no/different
+    // update, which should be skipped.
+    // It can be delayed by several frames.
+    expect_resize_state_ = ExpectResizeState::kFirstFrameAfterResize;
+    return;
+  }
+
+  if (expect_resize_state_ == ExpectResizeState::kFirstFrameAfterResize) {
+    // The first frame after resize should have new, scaled update_rect.
+    if (frame.has_update_rect()) {
+      last_update_rect_ = frame.update_rect();
+    } else {
+      last_update_rect_ = absl::nullopt;
+    }
+    expect_resize_state_ = ExpectResizeState::kNoResize;
+  }
+
+  bool should_cap_resolution = false;
+  if (!frame.has_update_rect()) {
+    last_update_rect_ = absl::nullopt;
+    animation_start_time_ = Timestamp::PlusInfinity();
+  } else if ((!last_update_rect_ ||
+              frame.update_rect() != *last_update_rect_)) {
+    last_update_rect_ = frame.update_rect();
+    animation_start_time_ = Timestamp::us(time_when_posted_in_us);
+  } else {
+    TimeDelta animation_duration =
+        Timestamp::us(time_when_posted_in_us) - animation_start_time_;
+    float area_ratio = static_cast<float>(last_update_rect_->width *
+                                          last_update_rect_->height) /
+                       (frame.width() * frame.height());
+    if (animation_duration.ms() >=
+            automatic_animation_detection_experiment_.min_duration_ms &&
+        area_ratio >=
+            automatic_animation_detection_experiment_.min_area_ratio &&
+        encoder_stats_observer_->GetInputFrameRate() >=
+            automatic_animation_detection_experiment_.min_fps) {
+      should_cap_resolution = true;
+    }
+  }
+  if (cap_resolution_due_to_video_content_ != should_cap_resolution) {
+    expect_resize_state_ = should_cap_resolution ? ExpectResizeState::kResize
+                                                 : ExpectResizeState::kNoResize;
+    cap_resolution_due_to_video_content_ = should_cap_resolution;
+    if (should_cap_resolution) {
+      RTC_LOG(LS_INFO) << "Applying resolution cap due to animation detection.";
+    } else {
+      RTC_LOG(LS_INFO) << "Removing resolution cap due to no consistent "
+                          "animation detection.";
+    }
+    source_proxy_->RestrictPixels(should_cap_resolution
+                                      ? kMaxAnimationPixels
+                                      : std::numeric_limits<int>::max());
+  }
+}
+
+DegradationPreference VideoStreamEncoder::EffectiveDegradataionPreference()
+    const {
+  // Balanced mode for screenshare works via automatic animation detection:
+  // Resolution is capped for fullscreen animated content.
+  // Adapatation is done only via framerate downgrade.
+  // Thus effective degradation preference is MAINTAIN_RESOLUTION.
+  return (encoder_config_.content_type ==
+              VideoEncoderConfig::ContentType::kScreen &&
+          degradation_preference_ == DegradationPreference::BALANCED)
+             ? DegradationPreference::MAINTAIN_RESOLUTION
+             : degradation_preference_;
+}
+
 }  // namespace webrtc
diff --git a/video/video_stream_encoder.h b/video/video_stream_encoder.h
index 12cc689..9517944 100644
--- a/video/video_stream_encoder.h
+++ b/video/video_stream_encoder.h
@@ -234,6 +234,14 @@
   bool HasInternalSource() const RTC_RUN_ON(&encoder_queue_);
   void ReleaseEncoder() RTC_RUN_ON(&encoder_queue_);
 
+  void CheckForAnimatedContent(const VideoFrame& frame,
+                               int64_t time_when_posted_in_ms)
+      RTC_RUN_ON(&encoder_queue_);
+
+  // Calculates degradation preference used in adaptation down or up.
+  DegradationPreference EffectiveDegradataionPreference() const
+      RTC_RUN_ON(&encoder_queue_);
+
   rtc::Event shutdown_event_;
 
   const uint32_t number_of_cores_;
@@ -344,6 +352,19 @@
       RTC_GUARDED_BY(&encoder_queue_);
   bool accumulated_update_rect_is_valid_ RTC_GUARDED_BY(&encoder_queue_);
 
+  // Used for automatic content type detection.
+  absl::optional<VideoFrame::UpdateRect> last_update_rect_
+      RTC_GUARDED_BY(&encoder_queue_);
+  Timestamp animation_start_time_ RTC_GUARDED_BY(&encoder_queue_);
+  bool cap_resolution_due_to_video_content_ RTC_GUARDED_BY(&encoder_queue_);
+  // Used to correctly ignore changes in update_rect introduced by
+  // resize triggered by animation detection.
+  enum class ExpectResizeState {
+    kNoResize,              // Normal operation.
+    kResize,                // Resize was triggered by the animation detection.
+    kFirstFrameAfterResize  // Resize observed.
+  } expect_resize_state_ RTC_GUARDED_BY(&encoder_queue_);
+
   VideoBitrateAllocationObserver* bitrate_observer_
       RTC_GUARDED_BY(&encoder_queue_);
   FecControllerOverride* fec_controller_override_
@@ -428,6 +449,26 @@
   EncoderSwitchExperiment encoder_switch_experiment_
       RTC_GUARDED_BY(&encoder_queue_);
 
+  struct AutomaticAnimationDetectionExperiment {
+    bool enabled = false;
+    int min_duration_ms = 2000;
+    double min_area_ratio = 0.8;
+    int min_fps = 10;
+    std::unique_ptr<StructParametersParser> Parser() {
+      return StructParametersParser::Create(
+          "enabled", &enabled,                  //
+          "min_duration_ms", &min_duration_ms,  //
+          "min_area_ratio", &min_area_ratio,    //
+          "min_fps", &min_fps);
+    }
+  };
+
+  AutomaticAnimationDetectionExperiment
+  ParseAutomatincAnimationDetectionFieldTrial() const;
+
+  AutomaticAnimationDetectionExperiment
+      automatic_animation_detection_experiment_ RTC_GUARDED_BY(&encoder_queue_);
+
   // An encoder switch is only requested once, this variable is used to keep
   // track of whether a request has been made or not.
   bool encoder_switch_requested_ RTC_GUARDED_BY(&encoder_queue_);
diff --git a/video/video_stream_encoder_unittest.cc b/video/video_stream_encoder_unittest.cc
index f50afbd..f2e023d 100644
--- a/video/video_stream_encoder_unittest.cc
+++ b/video/video_stream_encoder_unittest.cc
@@ -301,6 +301,13 @@
                 .set_rotation(kVideoRotation_0)
                 .build();
         adapted_frame.set_ntp_time_ms(video_frame.ntp_time_ms());
+        if (video_frame.has_update_rect()) {
+          adapted_frame.set_update_rect(
+              video_frame.update_rect().ScaleWithFrame(
+                  video_frame.width(), video_frame.height(), 0, 0,
+                  video_frame.width(), video_frame.height(), out_width,
+                  out_height));
+        }
         test::FrameForwarder::IncomingCapturedFrame(adapted_frame);
         last_width_.emplace(adapted_frame.width());
         last_height_.emplace(adapted_frame.height());
@@ -5201,4 +5208,61 @@
   video_stream_encoder_->Stop();
 }
 
+TEST_F(VideoStreamEncoderTest, AutomaticAnimationDetection) {
+  test::ScopedFieldTrials field_trials(
+      "WebRTC-AutomaticAnimationDetectionScreenshare/"
+      "enabled:true,min_fps:20,min_duration_ms:1000,min_area_ratio:0.8/");
+  const int kFramerateFps = 30;
+  const int kWidth = 1920;
+  const int kHeight = 1080;
+  const int kNumFrames = 2 * kFramerateFps;  // >1 seconds of frames.
+  // Works on screenshare mode.
+  ResetEncoder("VP8", 1, 1, 1, /*screenshare*/ true);
+  // We rely on the automatic resolution adaptation, but we handle framerate
+  // adaptation manually by mocking the stats proxy.
+  video_source_.set_adaptation_enabled(true);
+
+  // BALANCED degradation preference is required for this feature.
+  video_stream_encoder_->OnBitrateUpdated(
+      DataRate::bps(kTargetBitrateBps), DataRate::bps(kTargetBitrateBps),
+      DataRate::bps(kTargetBitrateBps), 0, 0);
+  video_stream_encoder_->SetSource(&video_source_,
+                                   webrtc::DegradationPreference::BALANCED);
+  VerifyNoLimitation(video_source_.sink_wants());
+
+  VideoFrame frame = CreateFrame(1, kWidth, kHeight);
+  frame.set_update_rect(VideoFrame::UpdateRect{0, 0, kWidth, kHeight});
+
+  // Pass enough frames with the full update to trigger animation detection.
+  for (int i = 0; i < kNumFrames; ++i) {
+    int64_t timestamp_ms =
+        fake_clock_.TimeNanos() / rtc::kNumNanosecsPerMillisec;
+    frame.set_ntp_time_ms(timestamp_ms);
+    frame.set_timestamp_us(timestamp_ms * 1000);
+    video_source_.IncomingCapturedFrame(frame);
+    WaitForEncodedFrame(timestamp_ms);
+  }
+
+  // Resolution should be limited.
+  rtc::VideoSinkWants expected;
+  expected.max_framerate_fps = kFramerateFps;
+  expected.max_pixel_count = 1280 * 720 + 1;
+  VerifyFpsEqResolutionLt(video_source_.sink_wants(), expected);
+
+  // Pass one frame with no known update.
+  //  Resolution cap should be removed immediately.
+  int64_t timestamp_ms = fake_clock_.TimeNanos() / rtc::kNumNanosecsPerMillisec;
+  frame.set_ntp_time_ms(timestamp_ms);
+  frame.set_timestamp_us(timestamp_ms * 1000);
+  frame.clear_update_rect();
+
+  video_source_.IncomingCapturedFrame(frame);
+  WaitForEncodedFrame(timestamp_ms);
+
+  // Resolution should be unlimited now.
+  VerifyFpsEqResolutionMax(video_source_.sink_wants(), kFramerateFps);
+
+  video_stream_encoder_->Stop();
+}
+
 }  // namespace webrtc