Reland "VP9 screenshare: Don't base layers frame-rate on input frame-rate"

Reland with fixes.

If input framerate is a little unstable, using it to cap layers will
make output framerate even smaller for longer periods of time.

Also, fix screenshare_loopback test for low-fps vp9 testing.

Bug: webrtc:10257
Change-Id: Id40a780d461e6b51cb44d275b8aa5d7b348d3586
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/138215
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28054}
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index 309dac1..30f61dc 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -1288,47 +1288,6 @@
   }
 }
 
-TEST_F(TestVp9ImplFrameDropping, LayerMaxFramerateIsCappedByCodecMaxFramerate) {
-  const float input_framerate_fps = 30.0;
-  const float layer_max_framerate_fps = 30.0;
-  const uint32_t codec_max_framerate_fps = layer_max_framerate_fps / 3;
-  const size_t video_duration_secs = 3;
-  const size_t num_input_frames = video_duration_secs * input_framerate_fps;
-
-  VideoBitrateAllocation bitrate_allocation;
-  codec_settings_.spatialLayers[0].width = codec_settings_.width;
-  codec_settings_.spatialLayers[0].height = codec_settings_.height;
-  codec_settings_.spatialLayers[0].maxFramerate = layer_max_framerate_fps;
-  codec_settings_.spatialLayers[0].minBitrate = codec_settings_.startBitrate;
-  codec_settings_.spatialLayers[0].maxBitrate = codec_settings_.startBitrate;
-  codec_settings_.spatialLayers[0].targetBitrate = codec_settings_.startBitrate;
-  codec_settings_.spatialLayers[0].active = true;
-
-  bitrate_allocation.SetBitrate(
-      0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000);
-
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
-            encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
-                                 0 /* max payload size (unused) */));
-
-  encoder_->SetRates(VideoEncoder::RateControlParameters(
-      bitrate_allocation, codec_max_framerate_fps));
-
-  VideoFrame* input_frame = NextInputFrame();
-  for (size_t frame_num = 0; frame_num < num_input_frames; ++frame_num) {
-    EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr));
-    const size_t timestamp = input_frame->timestamp() +
-                             kVideoPayloadTypeFrequency / input_framerate_fps;
-    input_frame->set_timestamp(static_cast<uint32_t>(timestamp));
-  }
-
-  const size_t num_encoded_frames = GetNumEncodedFrames();
-  const float encoded_framerate_fps = num_encoded_frames / video_duration_secs;
-  const float max_framerate_error_fps = codec_max_framerate_fps * 0.1f;
-  EXPECT_NEAR(encoded_framerate_fps, codec_max_framerate_fps,
-              max_framerate_error_fps);
-}
-
 class TestVp9ImplProfile2 : public TestVp9Impl {
  protected:
   void SetUp() override {
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index f974932..9fedf79 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -316,8 +316,7 @@
       }
 
       framerate_controller_[sl_idx].SetTargetRate(
-          std::min(static_cast<float>(codec_.maxFramerate),
-                   codec_.spatialLayers[sl_idx].maxFramerate));
+          codec_.spatialLayers[sl_idx].maxFramerate);
     }
   } else {
     float rate_ratio[VPX_MAX_LAYERS] = {0};
@@ -535,24 +534,18 @@
 
   inter_layer_pred_ = inst->VP9().interLayerPred;
 
-  different_framerates_used_ = false;
-  for (size_t sl_idx = 1; sl_idx < num_spatial_layers_; ++sl_idx) {
-    if (std::abs(codec_.spatialLayers[sl_idx].maxFramerate -
-                 codec_.spatialLayers[0].maxFramerate) > 1e-9) {
-      different_framerates_used_ = true;
-    }
-  }
-
-  if (different_framerates_used_ && !is_flexible_mode_) {
-    RTC_LOG(LS_ERROR) << "Flexible mode required for different framerates on "
-                         "different spatial layers";
+  if (num_spatial_layers_ > 1 &&
+      codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) {
+    RTC_LOG(LS_ERROR) << "Flexible mode is required for screenshare with "
+                         "several spatial layers";
     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
   }
 
   // External reference control is required for different frame rate on spatial
   // layers because libvpx generates rtp incompatible references in this case.
   external_ref_control_ = field_trial::IsEnabled("WebRTC-Vp9ExternalRefCtrl") ||
-                          different_framerates_used_ ||
+                          (num_spatial_layers_ > 1 &&
+                           codec_.mode == VideoCodecMode::kScreensharing) ||
                           inter_layer_pred_ == InterLayerPredMode::kOn;
 
   if (num_temporal_layers_ == 1) {
@@ -589,7 +582,8 @@
 
   if (external_ref_control_) {
     config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
-    if (num_temporal_layers_ > 1 && different_framerates_used_) {
+    if (num_temporal_layers_ > 1 && num_spatial_layers_ > 1 &&
+        codec_.mode == VideoCodecMode::kScreensharing) {
       // External reference control for several temporal layers with different
       // frame rates on spatial layers is not implemented yet.
       return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
@@ -966,7 +960,8 @@
     if (VideoCodecMode::kScreensharing == codec_.mode) {
       for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
         ref_config.duration[sl_idx] = static_cast<int64_t>(
-            90000 / framerate_controller_[sl_idx].GetTargetRate());
+            90000 / (std::min(static_cast<float>(codec_.maxFramerate),
+                              framerate_controller_[sl_idx].GetTargetRate())));
       }
     }
 
@@ -985,8 +980,9 @@
   RTC_DCHECK_GE(framerate_controller_.size(), num_active_spatial_layers_);
   float target_framerate_fps =
       (codec_.mode == VideoCodecMode::kScreensharing)
-          ? framerate_controller_[num_active_spatial_layers_ - 1]
-                .GetTargetRate()
+          ? std::min(static_cast<float>(codec_.maxFramerate),
+                     framerate_controller_[num_active_spatial_layers_ - 1]
+                         .GetTargetRate())
           : codec_.maxFramerate;
   uint32_t duration = static_cast<uint32_t>(90000 / target_framerate_fps);
   const vpx_codec_err_t rv = vpx_codec_encode(encoder_, raw_, timestamp_,
@@ -1201,6 +1197,8 @@
         // It is safe to ignore this requirement if inter-layer prediction is
         // enabled for all frames when all base frames are relayed to receiver.
         RTC_DCHECK_EQ(ref_buf.spatial_layer_id, layer_id.spatial_layer_id);
+      } else {
+        RTC_DCHECK_LE(ref_buf.spatial_layer_id, layer_id.spatial_layer_id);
       }
       RTC_DCHECK_LE(ref_buf.temporal_layer_id, layer_id.temporal_layer_id);
 
@@ -1320,7 +1318,7 @@
       const bool same_spatial_layer =
           ref_buf_[buf_idx].spatial_layer_id == sl_idx;
       bool correct_pid = false;
-      if (different_framerates_used_) {
+      if (is_flexible_mode_) {
         correct_pid = pid_diff < kMaxAllowedPidDIff;
       } else {
         // Below code assumes single temporal referecence.
@@ -1519,7 +1517,8 @@
   const size_t bitrate_bps = current_bitrate_allocation_.GetBitrate(
       sid, tid == kNoTemporalIdx ? 0 : tid);
   const float fps = (codec_.mode == VideoCodecMode::kScreensharing)
-                        ? framerate_controller_[sid].GetTargetRate()
+                        ? std::min(static_cast<float>(codec_.maxFramerate),
+                                   framerate_controller_[sid].GetTargetRate())
                         : codec_.maxFramerate;
   return static_cast<size_t>(
       bitrate_bps / (8 * fps) *
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h
index acc03bf..73bca26 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@@ -113,7 +113,6 @@
   GofInfoVP9 gof_;  // Contains each frame's temporal information for
                     // non-flexible mode.
   bool force_key_frame_;
-  bool different_framerates_used_;
   size_t pics_since_key_;
   uint8_t num_temporal_layers_;
   uint8_t num_spatial_layers_;         // Number of configured SLs
diff --git a/video/video_quality_test.cc b/video/video_quality_test.cc
index 5415f96..cc30c79 100644
--- a/video/video_quality_test.cc
+++ b/video/video_quality_test.cc
@@ -843,7 +843,7 @@
             params_.ss[video_idx].num_spatial_layers);
         vp9_settings.interLayerPred = params_.ss[video_idx].inter_layer_pred;
         // High FPS vp9 screenshare requires flexible mode.
-        if (params_.video[video_idx].fps > 5) {
+        if (params_.ss[video_idx].num_spatial_layers > 1) {
           vp9_settings.flexibleMode = true;
         }
         video_encoder_configs_[video_idx].encoder_specific_settings =