Drop frames until specified bitrate is achieved.

This CL fixes a regression introduced with the new quality scaler
where the video would no longer start in a scaled mode. This CL adds
code that compares incoming captured frames to the target bitrate,
and if they are found to be too large, they are dropped and sinkWants
set to a lower resolution. The number of dropped frames should be low
(0-4 in most cases) and should not introduce a noticeable delay, or
at least should be preferrable to having the first 2-4 seconds of video
have very low quality.

BUG=webrtc:6953

Review-Url: https://codereview.webrtc.org/2630333002
Cr-Original-Commit-Position: refs/heads/master@{#16391}
Cr-Mirrored-From: https://chromium.googlesource.com/external/webrtc
Cr-Mirrored-Commit: 83399caec5762d2dad038b8e9d86163e92c18c9f
diff --git a/api/video/video_frame.cc b/api/video/video_frame.cc
index 0e3efb2..b264746 100644
--- a/api/video/video_frame.cc
+++ b/api/video/video_frame.cc
@@ -51,6 +51,10 @@
   return video_frame_buffer_ ? video_frame_buffer_->height() : 0;
 }
 
+uint32_t VideoFrame::size() const {
+  return width() * height();
+}
+
 rtc::scoped_refptr<VideoFrameBuffer> VideoFrame::video_frame_buffer() const {
   return video_frame_buffer_;
 }
diff --git a/api/video/video_frame.h b/api/video/video_frame.h
index 5c57213..8840782 100644
--- a/api/video/video_frame.h
+++ b/api/video/video_frame.h
@@ -48,9 +48,10 @@
 
   // Get frame width.
   int width() const;
-
   // Get frame height.
   int height() const;
+  // Get frame size in pixels.
+  uint32_t size() const;
 
   // System monotonic clock, same timebase as rtc::TimeMicros().
   int64_t timestamp_us() const { return timestamp_us_; }
diff --git a/media/engine/webrtcvideoengine2_unittest.cc b/media/engine/webrtcvideoengine2_unittest.cc
index ef77e9e..22606a6 100644
--- a/media/engine/webrtcvideoengine2_unittest.cc
+++ b/media/engine/webrtcvideoengine2_unittest.cc
@@ -101,6 +101,12 @@
                 it->second == config.rtp.ulpfec.red_rtx_payload_type);
   }
 }
+
+cricket::MediaConfig GetMediaConfig() {
+  cricket::MediaConfig media_config;
+  media_config.video.enable_cpu_overuse_detection = false;
+  return media_config;
+}
 }  // namespace
 
 namespace cricket {
@@ -329,7 +335,7 @@
 TEST_F(WebRtcVideoEngine2Test, SetSendFailsBeforeSettingCodecs) {
   engine_.Init();
   std::unique_ptr<VideoMediaChannel> channel(
-      engine_.CreateChannel(call_.get(), MediaConfig(), VideoOptions()));
+      engine_.CreateChannel(call_.get(), GetMediaConfig(), VideoOptions()));
 
   EXPECT_TRUE(channel->AddSendStream(StreamParams::CreateLegacy(123)));
 
@@ -342,7 +348,7 @@
 TEST_F(WebRtcVideoEngine2Test, GetStatsWithoutSendCodecsSetDoesNotCrash) {
   engine_.Init();
   std::unique_ptr<VideoMediaChannel> channel(
-      engine_.CreateChannel(call_.get(), MediaConfig(), VideoOptions()));
+      engine_.CreateChannel(call_.get(), GetMediaConfig(), VideoOptions()));
   EXPECT_TRUE(channel->AddSendStream(StreamParams::CreateLegacy(123)));
   VideoMediaInfo info;
   channel->GetStats(&info);
@@ -437,7 +443,7 @@
   } else {
     engine_.Init();
     channel.reset(
-        engine_.CreateChannel(call_.get(), MediaConfig(), VideoOptions()));
+        engine_.CreateChannel(call_.get(), GetMediaConfig(), VideoOptions()));
   }
   ASSERT_TRUE(
       channel->AddSendStream(cricket::StreamParams::CreateLegacy(kSsrc)));
@@ -547,7 +553,7 @@
   engine_.Init();
 
   VideoMediaChannel* channel =
-      engine_.CreateChannel(call_.get(), MediaConfig(), VideoOptions());
+      engine_.CreateChannel(call_.get(), GetMediaConfig(), VideoOptions());
   cricket::VideoSendParameters parameters;
   // We need to look up the codec in the engine to get the correct payload type.
   for (const VideoCodec& codec : encoder_factory->supported_codecs())
@@ -565,7 +571,7 @@
   engine_.Init();
 
   VideoMediaChannel* channel =
-      engine_.CreateChannel(call_.get(), MediaConfig(), VideoOptions());
+      engine_.CreateChannel(call_.get(), GetMediaConfig(), VideoOptions());
   cricket::VideoRecvParameters parameters;
   parameters.codecs = codecs;
   EXPECT_TRUE(channel->SetRecvParameters(parameters));
@@ -638,7 +644,7 @@
   engine_.Init();
 
   std::unique_ptr<VideoMediaChannel> channel(
-      engine_.CreateChannel(call_.get(), MediaConfig(), VideoOptions()));
+      engine_.CreateChannel(call_.get(), GetMediaConfig(), VideoOptions()));
   cricket::VideoSendParameters parameters;
   parameters.codecs.push_back(GetEngineCodec("VP8"));
   EXPECT_TRUE(channel->SetSendParameters(parameters));
@@ -659,7 +665,7 @@
   engine_.Init();
 
   std::unique_ptr<VideoMediaChannel> channel(
-      engine_.CreateChannel(call_.get(), MediaConfig(), VideoOptions()));
+      engine_.CreateChannel(call_.get(), GetMediaConfig(), VideoOptions()));
   cricket::VideoSendParameters parameters;
   parameters.codecs.push_back(GetEngineCodec("VP8"));
   EXPECT_TRUE(channel->SetSendParameters(parameters));
@@ -699,7 +705,7 @@
   engine_.Init();
 
   std::unique_ptr<VideoMediaChannel> channel(
-      engine_.CreateChannel(call_.get(), MediaConfig(), VideoOptions()));
+      engine_.CreateChannel(call_.get(), GetMediaConfig(), VideoOptions()));
   cricket::VideoSendParameters parameters;
   parameters.codecs.push_back(GetEngineCodec("H264"));
   EXPECT_TRUE(channel->SetSendParameters(parameters));
@@ -962,8 +968,8 @@
   void SetUp() override {
     fake_call_.reset(new FakeCall(webrtc::Call::Config(&event_log_)));
     engine_.Init();
-    channel_.reset(
-        engine_.CreateChannel(fake_call_.get(), MediaConfig(), VideoOptions()));
+    channel_.reset(engine_.CreateChannel(fake_call_.get(), GetMediaConfig(),
+                                         VideoOptions()));
     channel_->OnReadyToSend(true);
     last_ssrc_ = 123;
     send_parameters_.codecs = engine_.codecs();
@@ -1771,7 +1777,7 @@
 }
 
 TEST_F(WebRtcVideoChannel2Test, SetMediaConfigSuspendBelowMinBitrate) {
-  MediaConfig media_config = MediaConfig();
+  MediaConfig media_config = GetMediaConfig();
   media_config.video.suspend_below_min_bitrate = true;
 
   channel_.reset(
@@ -2068,7 +2074,7 @@
   cricket::VideoSendParameters parameters;
   parameters.codecs.push_back(codec);
 
-  MediaConfig media_config = MediaConfig();
+  MediaConfig media_config = GetMediaConfig();
   channel_.reset(
       engine_.CreateChannel(fake_call_.get(), media_config, VideoOptions()));
   channel_->OnReadyToSend(true);
@@ -2143,7 +2149,8 @@
   cricket::VideoSendParameters parameters;
   parameters.codecs.push_back(codec);
 
-  MediaConfig media_config = MediaConfig();
+  MediaConfig media_config = GetMediaConfig();
+  media_config.video.enable_cpu_overuse_detection = true;
   channel_.reset(
       engine_.CreateChannel(fake_call_.get(), media_config, VideoOptions()));
   channel_->OnReadyToSend(true);
@@ -2208,9 +2215,9 @@
   cricket::VideoSendParameters parameters;
   parameters.codecs.push_back(codec);
 
-  MediaConfig media_config = MediaConfig();
-  if (!enable_overuse) {
-    media_config.video.enable_cpu_overuse_detection = false;
+  MediaConfig media_config = GetMediaConfig();
+  if (enable_overuse) {
+    media_config.video.enable_cpu_overuse_detection = true;
   }
   channel_.reset(
       engine_.CreateChannel(fake_call_.get(), media_config, VideoOptions()));
@@ -4041,7 +4048,7 @@
   void SetUp() override {
     engine_.Init();
     channel_.reset(
-        engine_.CreateChannel(&fake_call_, MediaConfig(), VideoOptions()));
+        engine_.CreateChannel(&fake_call_, GetMediaConfig(), VideoOptions()));
     channel_->OnReadyToSend(true);
     last_ssrc_ = 123;
   }
diff --git a/modules/video_coding/utility/default_video_bitrate_allocator.cc b/modules/video_coding/utility/default_video_bitrate_allocator.cc
index 482c8ec..a914e8a 100644
--- a/modules/video_coding/utility/default_video_bitrate_allocator.cc
+++ b/modules/video_coding/utility/default_video_bitrate_allocator.cc
@@ -35,7 +35,6 @@
   } else {
     allocation.SetBitrate(0, 0, total_bitrate_bps);
   }
-
   return allocation;
 }
 
diff --git a/video/vie_encoder.cc b/video/vie_encoder.cc
index 01e64fe..5c312bc 100644
--- a/video/vie_encoder.cc
+++ b/video/vie_encoder.cc
@@ -42,6 +42,10 @@
 const int kMinPixelsPerFrame = 120 * 90;
 #endif
 
+// The maximum number of frames to drop at beginning of stream
+// to try and achieve desired bitrate.
+const int kMaxInitialFramedrop = 4;
+
 // TODO(pbos): Lower these thresholds (to closer to 100%) when we handle
 // pipelining encoders better (multiple input frames before something comes
 // out). This should effectively turn off CPU adaptations for systems that
@@ -55,6 +59,17 @@
   return options;
 }
 
+uint32_t MaximumFrameSizeForBitrate(uint32_t kbps) {
+  if (kbps > 0) {
+    if (kbps < 300 /* qvga */) {
+      return 320 * 240;
+    } else if (kbps < 500 /* vga */) {
+      return 640 * 480;
+    }
+  }
+  return std::numeric_limits<uint32_t>::max();
+}
+
 }  //  namespace
 
 class ViEEncoder::ConfigureEncoderTask : public rtc::QueuedTask {
@@ -244,6 +259,7 @@
                        EncodedFrameObserver* encoder_timing)
     : shutdown_event_(true /* manual_reset */, false),
       number_of_cores_(number_of_cores),
+      initial_rampup_(0),
       source_proxy_(new VideoSourceProxy(this)),
       sink_(nullptr),
       settings_(settings),
@@ -340,8 +356,8 @@
     RTC_DCHECK_RUN_ON(&encoder_queue_);
     scaling_enabled_ = (degradation_preference !=
          VideoSendStream::DegradationPreference::kMaintainResolution);
-    stats_proxy_->SetResolutionRestrictionStats(
-        scaling_enabled_, scale_counter_[kCpu] > 0, scale_counter_[kQuality]);
+    initial_rampup_ = scaling_enabled_ ? 0 : kMaxInitialFramedrop;
+    ConfigureQualityScaler();
   });
 }
 
@@ -436,8 +452,14 @@
   sink_->OnEncoderConfigurationChanged(
       std::move(streams), encoder_config_.min_transmit_bitrate_bps);
 
+  ConfigureQualityScaler();
+}
+
+void ViEEncoder::ConfigureQualityScaler() {
+  RTC_DCHECK_RUN_ON(&encoder_queue_);
   const auto scaling_settings = settings_.encoder->GetScalingSettings();
   if (scaling_enabled_ && scaling_settings.enabled) {
+    // Drop frames and scale down until desired quality is achieved.
     if (scaling_settings.thresholds) {
       quality_scaler_.reset(
           new QualityScaler(this, *(scaling_settings.thresholds)));
@@ -446,9 +468,9 @@
     }
   } else {
     quality_scaler_.reset(nullptr);
-    stats_proxy_->SetResolutionRestrictionStats(
-        false, scale_counter_[kCpu] > 0, scale_counter_[kQuality]);
   }
+  stats_proxy_->SetResolutionRestrictionStats(
+      scaling_enabled_, scale_counter_[kCpu] > 0, scale_counter_[kQuality]);
 }
 
 void ViEEncoder::OnFrame(const VideoFrame& video_frame) {
@@ -547,6 +569,16 @@
                  << ", texture=" << last_frame_info_->is_texture;
   }
 
+  if (initial_rampup_ < kMaxInitialFramedrop &&
+      video_frame.size() >
+          MaximumFrameSizeForBitrate(encoder_start_bitrate_bps_ / 1000)) {
+    LOG(LS_INFO) << "Dropping frame. Too large for target bitrate.";
+    ScaleDown(kQuality);
+    ++initial_rampup_;
+    return;
+  }
+  initial_rampup_ = kMaxInitialFramedrop;
+
   int64_t now_ms = clock_->TimeInMilliseconds();
   if (pending_encoder_reconfiguration_) {
     ReconfigureEncoder();
diff --git a/video/vie_encoder.h b/video/vie_encoder.h
index 4a2ca1f..ed514c2 100644
--- a/video/vie_encoder.h
+++ b/video/vie_encoder.h
@@ -150,6 +150,8 @@
                                    bool nack_enabled);
   void ReconfigureEncoder();
 
+  void ConfigureQualityScaler();
+
   // Implements VideoSinkInterface.
   void OnFrame(const VideoFrame& video_frame) override;
 
@@ -175,6 +177,8 @@
   rtc::Event shutdown_event_;
 
   const uint32_t number_of_cores_;
+  // Counts how many frames we've dropped in the initial rampup phase.
+  int initial_rampup_;
 
   const std::unique_ptr<VideoSourceProxy> source_proxy_;
   EncoderSink* sink_;
diff --git a/video/vie_encoder_unittest.cc b/video/vie_encoder_unittest.cc
index dae172d..5dee314 100644
--- a/video/vie_encoder_unittest.cc
+++ b/video/vie_encoder_unittest.cc
@@ -43,7 +43,9 @@
 
 namespace {
 const size_t kMaxPayloadLength = 1440;
-const int kTargetBitrateBps = 100000;
+const int kTargetBitrateBps = 1000000;
+const int kLowTargetBitrateBps = kTargetBitrateBps / 10;
+const int kMaxInitialFramedrop = 4;
 
 class TestBuffer : public webrtc::I420Buffer {
  public:
@@ -149,7 +151,7 @@
     vie_encoder_->SetSink(&sink_, false /* rotation_applied */);
     vie_encoder_->SetSource(&video_source_,
                             VideoSendStream::DegradationPreference::kBalanced);
-    vie_encoder_->SetStartBitrate(10000);
+    vie_encoder_->SetStartBitrate(kTargetBitrateBps);
     vie_encoder_->ConfigureEncoder(std::move(video_encoder_config),
                                    kMaxPayloadLength, nack_enabled);
   }
@@ -162,7 +164,7 @@
 
     VideoEncoderConfig video_encoder_config;
     video_encoder_config.number_of_streams = num_streams;
-    video_encoder_config.max_bitrate_bps = 1000000;
+    video_encoder_config.max_bitrate_bps = kTargetBitrateBps;
     video_encoder_config.video_stream_factory =
         new rtc::RefCountedObject<VideoStreamFactory>(num_temporal_layers);
     ConfigureEncoder(std::move(video_encoder_config), nack_enabled);
@@ -263,6 +265,8 @@
       test_encoder_->CheckLastTimeStampsMatch(expected_ntp_time, timestamp);
     }
 
+    void ExpectDroppedFrame() { EXPECT_FALSE(encoded_frame_event_.Wait(20)); }
+
     void SetExpectNoFrames() {
       rtc::CritScope lock(&crit_);
       expect_frames_ = false;
@@ -679,7 +683,6 @@
 }
 
 TEST_F(ViEEncoderTest, SwitchingSourceKeepsCpuAdaptation) {
-  const int kTargetBitrateBps = 100000;
   vie_encoder_->OnBitrateUpdated(kTargetBitrateBps, 0, 0);
 
   int frame_width = 1280;
@@ -749,7 +752,6 @@
 }
 
 TEST_F(ViEEncoderTest, SwitchingSourceKeepsQualityAdaptation) {
-  const int kTargetBitrateBps = 100000;
   vie_encoder_->OnBitrateUpdated(kTargetBitrateBps, 0, 0);
 
   int frame_width = 1280;
@@ -885,7 +887,6 @@
 }
 
 TEST_F(ViEEncoderTest, ScalingUpAndDownDoesNothingWithMaintainResolution) {
-  const int kTargetBitrateBps = 100000;
   int frame_width = 1280;
   int frame_height = 720;
   vie_encoder_->OnBitrateUpdated(kTargetBitrateBps, 0, 0);
@@ -938,7 +939,6 @@
 }
 
 TEST_F(ViEEncoderTest, DoesNotScaleBelowSetLimit) {
-  const int kTargetBitrateBps = 100000;
   int frame_width = 1280;
   int frame_height = 720;
   vie_encoder_->OnBitrateUpdated(kTargetBitrateBps, 0, 0);
@@ -995,12 +995,12 @@
   const int kDefaultFps = 30;
   const BitrateAllocation expected_bitrate =
       DefaultVideoBitrateAllocator(fake_encoder_.codec_config())
-          .GetAllocation(kTargetBitrateBps, kDefaultFps);
+          .GetAllocation(kLowTargetBitrateBps, kDefaultFps);
 
   // First called on bitrate updated, then again on first frame.
   EXPECT_CALL(bitrate_observer, OnBitrateAllocationUpdated(expected_bitrate))
       .Times(2);
-  vie_encoder_->OnBitrateUpdated(kTargetBitrateBps, 0, 0);
+  vie_encoder_->OnBitrateUpdated(kLowTargetBitrateBps, 0, 0);
 
   const int64_t kStartTimeMs = 1;
   video_source_.IncomingCapturedFrame(
@@ -1029,4 +1029,76 @@
   vie_encoder_->Stop();
 }
 
+TEST_F(ViEEncoderTest, DropsFramesAndScalesWhenBitrateIsTooLow) {
+  vie_encoder_->OnBitrateUpdated(kLowTargetBitrateBps, 0, 0);
+  int frame_width = 640;
+  int frame_height = 360;
+
+  video_source_.IncomingCapturedFrame(
+      CreateFrame(1, frame_width, frame_height));
+
+  // Expect to drop this frame, the wait should time out.
+  sink_.ExpectDroppedFrame();
+
+  // Expect the sink_wants to specify a scaled frame.
+  EXPECT_TRUE(video_source_.sink_wants().max_pixel_count);
+  EXPECT_LT(*video_source_.sink_wants().max_pixel_count, 1000 * 1000);
+
+  int last_pixel_count = *video_source_.sink_wants().max_pixel_count;
+
+  // Next frame is scaled
+  video_source_.IncomingCapturedFrame(
+      CreateFrame(2, frame_width * 3 / 4, frame_height * 3 / 4));
+
+  // Expect to drop this frame, the wait should time out.
+  sink_.ExpectDroppedFrame();
+
+  EXPECT_LT(*video_source_.sink_wants().max_pixel_count, last_pixel_count);
+
+  vie_encoder_->Stop();
+}
+
+TEST_F(ViEEncoderTest, NrOfDroppedFramesLimited) {
+  // 1kbps. This can never be achieved.
+  vie_encoder_->OnBitrateUpdated(1000, 0, 0);
+  int frame_width = 640;
+  int frame_height = 360;
+
+  // We expect the n initial frames to get dropped.
+  int i;
+  for (i = 1; i <= kMaxInitialFramedrop; ++i) {
+    video_source_.IncomingCapturedFrame(
+        CreateFrame(i, frame_width, frame_height));
+    sink_.ExpectDroppedFrame();
+  }
+  // The n+1th frame should not be dropped, even though it's size is too large.
+  video_source_.IncomingCapturedFrame(
+      CreateFrame(i, frame_width, frame_height));
+  sink_.WaitForEncodedFrame(i);
+
+  // Expect the sink_wants to specify a scaled frame.
+  EXPECT_TRUE(video_source_.sink_wants().max_pixel_count);
+  EXPECT_LT(*video_source_.sink_wants().max_pixel_count, 1000 * 1000);
+
+  vie_encoder_->Stop();
+}
+
+TEST_F(ViEEncoderTest, InitialFrameDropOffWithMaintainResolutionPreference) {
+  int frame_width = 640;
+  int frame_height = 360;
+  vie_encoder_->OnBitrateUpdated(kLowTargetBitrateBps, 0, 0);
+
+  // Set degradation preference.
+  vie_encoder_->SetSource(
+      &video_source_,
+      VideoSendStream::DegradationPreference::kMaintainResolution);
+
+  video_source_.IncomingCapturedFrame(
+      CreateFrame(1, frame_width, frame_height));
+  // Frame should not be dropped, even if it's too large.
+  sink_.WaitForEncodedFrame(1);
+
+  vie_encoder_->Stop();
+}
+
 }  // namespace webrtc