Add support for VP9 configuration through scalability mode.

Bug: webrtc:13960
Change-Id: Ia930647b15f624a4d10d8d335519b69ffdae6636
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/260983
Commit-Queue: Åsa Persson <asapersson@webrtc.org>
Reviewed-by: Niels Moller <nisse@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#36919}
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index 502485e..798a1be 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -745,9 +745,13 @@
     "../../api/video:video_codec_constants",
     "../../api/video_codecs:video_codecs_api",
     "../../common_video",
+    "../../media:rtc_media_base",
     "../../rtc_base:checks",
     "../../rtc_base:logging",
     "../../rtc_base/experiments:stable_target_rate_experiment",
+    "svc:scalability_mode_util",
+    "svc:scalability_structures",
+    "svc:scalable_video_controller",
   ]
   absl_deps = [ "//third_party/abseil-cpp/absl/container:inlined_vector" ]
 }
diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
index 0c02b34..b1fbf97 100644
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
@@ -499,7 +499,7 @@
 
 void LibvpxVp9Encoder::SetRates(const RateControlParameters& parameters) {
   if (!inited_) {
-    RTC_LOG(LS_WARNING) << "SetRates() calll while uninitialzied.";
+    RTC_LOG(LS_WARNING) << "SetRates() called while uninitialized.";
     return;
   }
   if (encoder_->err) {
@@ -570,14 +570,32 @@
   force_key_frame_ = true;
   pics_since_key_ = 0;
 
-  num_spatial_layers_ = inst->VP9().numberOfSpatialLayers;
-  RTC_DCHECK_GT(num_spatial_layers_, 0);
-  num_temporal_layers_ = inst->VP9().numberOfTemporalLayers;
-  if (num_temporal_layers_ == 0) {
-    num_temporal_layers_ = 1;
+  absl::optional<ScalabilityMode> scalability_mode = inst->GetScalabilityMode();
+  if (scalability_mode.has_value()) {
+    // Use settings from `ScalabilityMode` identifier.
+    RTC_LOG(LS_INFO) << "Create scalability structure "
+                     << ScalabilityModeToString(*scalability_mode);
+    svc_controller_ = CreateScalabilityStructure(*scalability_mode);
+    if (!svc_controller_) {
+      RTC_LOG(LS_WARNING) << "Failed to create scalability structure.";
+      return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+    }
+    ScalableVideoController::StreamLayersConfig info =
+        svc_controller_->StreamConfig();
+    num_spatial_layers_ = info.num_spatial_layers;
+    num_temporal_layers_ = info.num_temporal_layers;
+    inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode);
+  } else {
+    num_spatial_layers_ = inst->VP9().numberOfSpatialLayers;
+    RTC_DCHECK_GT(num_spatial_layers_, 0);
+    num_temporal_layers_ = inst->VP9().numberOfTemporalLayers;
+    if (num_temporal_layers_ == 0) {
+      num_temporal_layers_ = 1;
+    }
+    inter_layer_pred_ = inst->VP9().interLayerPred;
+    svc_controller_ = CreateVp9ScalabilityStructure(*inst);
   }
 
-  svc_controller_ = CreateVp9ScalabilityStructure(*inst);
   framerate_controller_ = std::vector<FramerateControllerDeprecated>(
       num_spatial_layers_, FramerateControllerDeprecated(codec_.maxFramerate));
 
@@ -661,8 +679,6 @@
 
   is_flexible_mode_ = inst->VP9().flexibleMode;
 
-  inter_layer_pred_ = inst->VP9().interLayerPred;
-
   if (num_spatial_layers_ > 1 &&
       codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) {
     RTC_LOG(LS_ERROR) << "Flexible mode is required for screenshare with "
diff --git a/modules/video_coding/codecs/vp9/svc_config.cc b/modules/video_coding/codecs/vp9/svc_config.cc
index 92818eb..77eee3d 100644
--- a/modules/video_coding/codecs/vp9/svc_config.cc
+++ b/modules/video_coding/codecs/vp9/svc_config.cc
@@ -12,9 +12,13 @@
 
 #include <algorithm>
 #include <cmath>
+#include <memory>
 #include <vector>
 
+#include "media/base/video_common.h"
 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
+#include "modules/video_coding/svc/create_scalability_structure.h"
+#include "modules/video_coding/svc/scalability_mode_util.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
 
@@ -29,6 +33,19 @@
 const size_t kTargetScreenSharingLayerBitrateKbps[] = {150, 350, 950};
 const size_t kMaxScreenSharingLayerBitrateKbps[] = {250, 500, 950};
 
+// Gets limited number of layers for given resolution.
+size_t GetLimitedNumSpatialLayers(size_t width, size_t height) {
+  const bool is_landscape = width >= height;
+  const size_t min_width = is_landscape ? kMinVp9SpatialLayerLongSideLength
+                                        : kMinVp9SpatialLayerShortSideLength;
+  const size_t min_height = is_landscape ? kMinVp9SpatialLayerShortSideLength
+                                         : kMinVp9SpatialLayerLongSideLength;
+  const size_t num_layers_fit_horz = static_cast<size_t>(
+      std::floor(1 + std::max(0.0f, std::log2(1.0f * width / min_width))));
+  const size_t num_layers_fit_vert = static_cast<size_t>(
+      std::floor(1 + std::max(0.0f, std::log2(1.0f * height / min_height))));
+  return std::min(num_layers_fit_horz, num_layers_fit_vert);
+}
 }  // namespace
 
 std::vector<SpatialLayer> ConfigureSvcScreenSharing(size_t input_width,
@@ -59,27 +76,19 @@
   return spatial_layers;
 }
 
-std::vector<SpatialLayer> ConfigureSvcNormalVideo(size_t input_width,
-                                                  size_t input_height,
-                                                  float max_framerate_fps,
-                                                  size_t first_active_layer,
-                                                  size_t num_spatial_layers,
-                                                  size_t num_temporal_layers) {
+std::vector<SpatialLayer> ConfigureSvcNormalVideo(
+    size_t input_width,
+    size_t input_height,
+    float max_framerate_fps,
+    size_t first_active_layer,
+    size_t num_spatial_layers,
+    size_t num_temporal_layers,
+    absl::optional<ScalableVideoController::StreamLayersConfig> config) {
   RTC_DCHECK_LT(first_active_layer, num_spatial_layers);
-  std::vector<SpatialLayer> spatial_layers;
 
   // Limit number of layers for given resolution.
-  const bool is_landscape = input_width >= input_height;
-  const size_t min_width = is_landscape ? kMinVp9SpatialLayerLongSideLength
-                                        : kMinVp9SpatialLayerShortSideLength;
-  const size_t min_height = is_landscape ? kMinVp9SpatialLayerShortSideLength
-                                         : kMinVp9SpatialLayerLongSideLength;
-  const size_t num_layers_fit_horz = static_cast<size_t>(std::floor(
-      1 + std::max(0.0f, std::log2(1.0f * input_width / min_width))));
-  const size_t num_layers_fit_vert = static_cast<size_t>(std::floor(
-      1 + std::max(0.0f, std::log2(1.0f * input_height / min_height))));
-  const size_t limited_num_spatial_layers =
-      std::min(num_layers_fit_horz, num_layers_fit_vert);
+  size_t limited_num_spatial_layers =
+      GetLimitedNumSpatialLayers(input_width, input_height);
   if (limited_num_spatial_layers < num_spatial_layers) {
     RTC_LOG(LS_WARNING) << "Reducing number of spatial layers from "
                         << num_spatial_layers << " to "
@@ -87,14 +96,23 @@
                         << " due to low input resolution.";
     num_spatial_layers = limited_num_spatial_layers;
   }
+
   // First active layer must be configured.
   num_spatial_layers = std::max(num_spatial_layers, first_active_layer + 1);
 
   // Ensure top layer is even enough.
   int required_divisiblity = 1 << (num_spatial_layers - first_active_layer - 1);
+  if (config) {
+    required_divisiblity = 1;
+    for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+      required_divisiblity = cricket::LeastCommonMultiple(
+          required_divisiblity, config->scaling_factor_den[sl_idx]);
+    }
+  }
   input_width = input_width - input_width % required_divisiblity;
   input_height = input_height - input_height % required_divisiblity;
 
+  std::vector<SpatialLayer> spatial_layers;
   for (size_t sl_idx = first_active_layer; sl_idx < num_spatial_layers;
        ++sl_idx) {
     SpatialLayer spatial_layer = {0};
@@ -104,6 +122,13 @@
     spatial_layer.numberOfTemporalLayers = num_temporal_layers;
     spatial_layer.active = true;
 
+    if (config) {
+      spatial_layer.width = input_width * config->scaling_factor_num[sl_idx] /
+                            config->scaling_factor_den[sl_idx];
+      spatial_layer.height = input_height * config->scaling_factor_num[sl_idx] /
+                             config->scaling_factor_den[sl_idx];
+    }
+
     // minBitrate and maxBitrate formulas were derived from
     // subjective-quality data to determing bit rates below which video
     // quality is unacceptable and above which additional bits do not provide
@@ -124,7 +149,7 @@
     spatial_layers.push_back(spatial_layer);
   }
 
-  // A workaround for sitiation when single HD layer is left with minBitrate
+  // A workaround for situation when single HD layer is left with minBitrate
   // about 500kbps. This would mean that there will always be at least 500kbps
   // allocated to video regardless of how low is the actual BWE.
   // Also, boost maxBitrate for the first layer to account for lost ability to
@@ -140,13 +165,58 @@
   return spatial_layers;
 }
 
-std::vector<SpatialLayer> GetSvcConfig(size_t input_width,
-                                       size_t input_height,
-                                       float max_framerate_fps,
-                                       size_t first_active_layer,
-                                       size_t num_spatial_layers,
-                                       size_t num_temporal_layers,
-                                       bool is_screen_sharing) {
+// Uses scalability mode to configure spatial layers.
+std::vector<SpatialLayer> GetVp9SvcConfig(VideoCodec& codec) {
+  RTC_DCHECK_EQ(codec.codecType, kVideoCodecVP9);
+
+  absl::optional<ScalabilityMode> scalability_mode = codec.GetScalabilityMode();
+  RTC_DCHECK(scalability_mode.has_value());
+
+  absl::optional<ScalableVideoController::StreamLayersConfig> info =
+      ScalabilityStructureConfig(*scalability_mode);
+  if (!info.has_value()) {
+    RTC_LOG(LS_WARNING) << "Failed to create structure "
+                        << ScalabilityModeToString(*scalability_mode);
+    return {};
+  }
+
+  if (static_cast<int>(GetLimitedNumSpatialLayers(codec.width, codec.height)) <
+      info->num_spatial_layers) {
+    // Layers will be reduced, do not use scalability mode for now.
+    // TODO(bugs.webrtc.org/11607): Use a lower scalability mode once all lower
+    // modes are supported.
+    codec.UnsetScalabilityMode();
+    codec.VP9()->interLayerPred =
+        ScalabilityModeToInterLayerPredMode(*scalability_mode);
+  }
+
+  // TODO(bugs.webrtc.org/11607): Add support for screensharing.
+  std::vector<SpatialLayer> spatial_layers =
+      GetSvcConfig(codec.width, codec.height, codec.maxFramerate,
+                   /*first_active_layer=*/0, info->num_spatial_layers,
+                   info->num_temporal_layers, /*is_screen_sharing=*/false,
+                   codec.GetScalabilityMode() ? info : absl::nullopt);
+  RTC_DCHECK(!spatial_layers.empty());
+
+  // Use codec bitrate limits if spatial layering is not requested.
+  if (info->num_spatial_layers == 1) {
+    spatial_layers.back().minBitrate = codec.minBitrate;
+    spatial_layers.back().targetBitrate = codec.maxBitrate;
+    spatial_layers.back().maxBitrate = codec.maxBitrate;
+  }
+
+  return spatial_layers;
+}
+
+std::vector<SpatialLayer> GetSvcConfig(
+    size_t input_width,
+    size_t input_height,
+    float max_framerate_fps,
+    size_t first_active_layer,
+    size_t num_spatial_layers,
+    size_t num_temporal_layers,
+    bool is_screen_sharing,
+    absl::optional<ScalableVideoController::StreamLayersConfig> config) {
   RTC_DCHECK_GT(input_width, 0);
   RTC_DCHECK_GT(input_height, 0);
   RTC_DCHECK_GT(num_spatial_layers, 0);
@@ -158,7 +228,7 @@
   } else {
     return ConfigureSvcNormalVideo(input_width, input_height, max_framerate_fps,
                                    first_active_layer, num_spatial_layers,
-                                   num_temporal_layers);
+                                   num_temporal_layers, config);
   }
 }
 
diff --git a/modules/video_coding/codecs/vp9/svc_config.h b/modules/video_coding/codecs/vp9/svc_config.h
index f6b562e..adeaf0f 100644
--- a/modules/video_coding/codecs/vp9/svc_config.h
+++ b/modules/video_coding/codecs/vp9/svc_config.h
@@ -15,16 +15,24 @@
 #include <vector>
 
 #include "api/video_codecs/spatial_layer.h"
+#include "api/video_codecs/video_codec.h"
+#include "modules/video_coding/svc/scalable_video_controller.h"
 
 namespace webrtc {
 
-std::vector<SpatialLayer> GetSvcConfig(size_t input_width,
-                                       size_t input_height,
-                                       float max_framerate_fps,
-                                       size_t first_active_layer,
-                                       size_t num_spatial_layers,
-                                       size_t num_temporal_layers,
-                                       bool is_screen_sharing);
+// Uses scalability mode to configure spatial layers.
+std::vector<SpatialLayer> GetVp9SvcConfig(VideoCodec& video_codec);
+
+std::vector<SpatialLayer> GetSvcConfig(
+    size_t input_width,
+    size_t input_height,
+    float max_framerate_fps,
+    size_t first_active_layer,
+    size_t num_spatial_layers,
+    size_t num_temporal_layers,
+    bool is_screen_sharing,
+    absl::optional<ScalableVideoController::StreamLayersConfig> config =
+        absl::nullopt);
 
 }  // namespace webrtc
 
diff --git a/modules/video_coding/codecs/vp9/svc_config_unittest.cc b/modules/video_coding/codecs/vp9/svc_config_unittest.cc
index 77d75ee..4de3c5b 100644
--- a/modules/video_coding/codecs/vp9/svc_config_unittest.cc
+++ b/modules/video_coding/codecs/vp9/svc_config_unittest.cc
@@ -14,8 +14,12 @@
 #include <vector>
 
 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
+#include "test/gmock.h"
 #include "test/gtest.h"
 
+using ::testing::ElementsAre;
+using ::testing::Field;
+
 namespace webrtc {
 TEST(SvcConfig, NumSpatialLayers) {
   const size_t max_num_spatial_layers = 6;
@@ -43,6 +47,92 @@
   EXPECT_EQ(spatial_layers.size(), num_spatial_layers);
 }
 
+TEST(SvcConfig, NumSpatialLayersWithScalabilityMode) {
+  VideoCodec codec;
+  codec.codecType = kVideoCodecVP9;
+  codec.width = 960;
+  codec.height = 540;
+  codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY);
+
+  std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+  EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135),
+                                          Field(&SpatialLayer::height, 270),
+                                          Field(&SpatialLayer::height, 540)));
+  EXPECT_THAT(spatial_layers,
+              ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3),
+                          Field(&SpatialLayer::numberOfTemporalLayers, 3),
+                          Field(&SpatialLayer::numberOfTemporalLayers, 3)));
+  EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL3T3_KEY);
+}
+
+TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityMode) {
+  VideoCodec codec;
+  codec.codecType = kVideoCodecVP9;
+  codec.width = 480;
+  codec.height = 270;
+  codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY);
+
+  // Scalability mode reset, configuration should be in accordance to L2T3_KEY.
+  std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+  EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135),
+                                          Field(&SpatialLayer::height, 270)));
+  EXPECT_THAT(spatial_layers,
+              ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3),
+                          Field(&SpatialLayer::numberOfTemporalLayers, 3)));
+  EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOnKeyPic);
+  EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt);
+}
+
+TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModePortrait) {
+  VideoCodec codec;
+  codec.codecType = kVideoCodecVP9;
+  codec.width = 270;
+  codec.height = 480;
+  codec.SetScalabilityMode(ScalabilityMode::kL3T1);
+
+  // Scalability mode reset, configuration should be in accordance to L2T1.
+  std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+  EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 135),
+                                          Field(&SpatialLayer::width, 270)));
+  EXPECT_THAT(spatial_layers,
+              ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1),
+                          Field(&SpatialLayer::numberOfTemporalLayers, 1)));
+  EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOn);
+  EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt);
+}
+
+TEST(SvcConfig, NumSpatialLayersWithScalabilityModeResolutionRatio1_5) {
+  VideoCodec codec;
+  codec.codecType = kVideoCodecVP9;
+  codec.width = 270;
+  codec.height = 480;
+  codec.SetScalabilityMode(ScalabilityMode::kL2T1h);  // 1.5:1
+
+  std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+  EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 180),
+                                          Field(&SpatialLayer::width, 270)));
+  EXPECT_THAT(spatial_layers,
+              ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1),
+                          Field(&SpatialLayer::numberOfTemporalLayers, 1)));
+  EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL2T1h);
+}
+
+TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModeResolutionRatio1_5) {
+  VideoCodec codec;
+  codec.codecType = kVideoCodecVP9;
+  codec.width = 320;
+  codec.height = 180;
+  codec.SetScalabilityMode(ScalabilityMode::kL2T1h);  // 1.5:1
+
+  // Scalability mode reset, configuration should be in accordance to L1T1.
+  std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+  EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 320)));
+  EXPECT_THAT(spatial_layers,
+              ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1)));
+  EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOn);
+  EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt);
+}
+
 TEST(SvcConfig, AlwaysSendsAtLeastOneLayer) {
   const size_t max_num_spatial_layers = 6;
   const size_t first_active_layer = 5;
@@ -91,6 +181,44 @@
   EXPECT_EQ(spatial_layers.back().width, kOddSize);
 }
 
+TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityMode) {
+  VideoCodec codec;
+  codec.codecType = kVideoCodecVP9;
+  codec.width = 1023;
+  codec.height = 1023;
+  codec.SetScalabilityMode(ScalabilityMode::kL3T1);
+
+  std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+  EXPECT_THAT(spatial_layers,  // Divisiblity by 4 required.
+              ElementsAre(Field(&SpatialLayer::width, 255),
+                          Field(&SpatialLayer::width, 510),
+                          Field(&SpatialLayer::width, 1020)));
+
+  codec.SetScalabilityMode(ScalabilityMode::kL2T1);
+  spatial_layers = GetVp9SvcConfig(codec);
+  EXPECT_THAT(spatial_layers,  // Divisiblity by 2 required.
+              ElementsAre(Field(&SpatialLayer::width, 511),
+                          Field(&SpatialLayer::width, 1022)));
+
+  codec.SetScalabilityMode(ScalabilityMode::kL1T1);
+  spatial_layers = GetVp9SvcConfig(codec);
+  EXPECT_THAT(spatial_layers,  // Divisiblity by 1 required.
+              ElementsAre(Field(&SpatialLayer::width, 1023)));
+}
+
+TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityModeResRatio1_5) {
+  VideoCodec codec;
+  codec.codecType = kVideoCodecVP9;
+  codec.width = 1280;
+  codec.height = 1280;
+  codec.SetScalabilityMode(ScalabilityMode::kL2T1h);  // 1.5:1
+
+  std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+  EXPECT_THAT(spatial_layers,  // Divisiblity by 3 required.
+              ElementsAre(Field(&SpatialLayer::width, 852),
+                          Field(&SpatialLayer::width, 1278)));
+}
+
 TEST(SvcConfig, SkipsInactiveLayers) {
   const size_t num_spatial_layers = 4;
   const size_t first_active_layer = 2;
@@ -121,6 +249,25 @@
   }
 }
 
+TEST(SvcConfig, BitrateThresholdsWithScalabilityMode) {
+  VideoCodec codec;
+  codec.codecType = kVideoCodecVP9;
+  codec.width = 960;
+  codec.height = 540;
+  codec.SetScalabilityMode(ScalabilityMode::kS3T3);
+
+  std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+  EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135),
+                                          Field(&SpatialLayer::height, 270),
+                                          Field(&SpatialLayer::height, 540)));
+
+  for (const SpatialLayer& layer : spatial_layers) {
+    EXPECT_LE(layer.minBitrate, layer.maxBitrate);
+    EXPECT_LE(layer.minBitrate, layer.targetBitrate);
+    EXPECT_LE(layer.targetBitrate, layer.maxBitrate);
+  }
+}
+
 TEST(SvcConfig, ScreenSharing) {
   std::vector<SpatialLayer> spatial_layers =
       GetSvcConfig(1920, 1080, 30, 1, 3, 3, true);
diff --git a/modules/video_coding/svc/BUILD.gn b/modules/video_coding/svc/BUILD.gn
index f68001a..d82f316 100644
--- a/modules/video_coding/svc/BUILD.gn
+++ b/modules/video_coding/svc/BUILD.gn
@@ -15,6 +15,7 @@
   ]
   deps = [
     "../../../api/video_codecs:scalability_mode",
+    "../../../api/video_codecs:video_codecs_api",
     "../../../rtc_base:checks",
   ]
   absl_deps = [
diff --git a/modules/video_coding/svc/scalability_mode_util.cc b/modules/video_coding/svc/scalability_mode_util.cc
index 1cbdeb6..d0a56af 100644
--- a/modules/video_coding/svc/scalability_mode_util.cc
+++ b/modules/video_coding/svc/scalability_mode_util.cc
@@ -139,6 +139,52 @@
   RTC_CHECK_NOTREACHED();
 }
 
+InterLayerPredMode ScalabilityModeToInterLayerPredMode(
+    ScalabilityMode scalability_mode) {
+  switch (scalability_mode) {
+    case ScalabilityMode::kL1T1:
+    case ScalabilityMode::kL1T2:
+    case ScalabilityMode::kL1T2h:
+    case ScalabilityMode::kL1T3:
+    case ScalabilityMode::kL1T3h:
+    case ScalabilityMode::kL2T1:
+    case ScalabilityMode::kL2T1h:
+      return InterLayerPredMode::kOn;
+    case ScalabilityMode::kL2T1_KEY:
+      return InterLayerPredMode::kOnKeyPic;
+    case ScalabilityMode::kL2T2:
+    case ScalabilityMode::kL2T2h:
+      return InterLayerPredMode::kOn;
+    case ScalabilityMode::kL2T2_KEY:
+    case ScalabilityMode::kL2T2_KEY_SHIFT:
+      return InterLayerPredMode::kOnKeyPic;
+    case ScalabilityMode::kL2T3:
+    case ScalabilityMode::kL2T3h:
+      return InterLayerPredMode::kOn;
+    case ScalabilityMode::kL2T3_KEY:
+      return InterLayerPredMode::kOnKeyPic;
+    case ScalabilityMode::kL3T1:
+    case ScalabilityMode::kL3T1h:
+      return InterLayerPredMode::kOn;
+    case ScalabilityMode::kL3T1_KEY:
+      return InterLayerPredMode::kOnKeyPic;
+    case ScalabilityMode::kL3T2:
+    case ScalabilityMode::kL3T2h:
+      return InterLayerPredMode::kOn;
+    case ScalabilityMode::kL3T2_KEY:
+      return InterLayerPredMode::kOnKeyPic;
+    case ScalabilityMode::kL3T3:
+    case ScalabilityMode::kL3T3h:
+      return InterLayerPredMode::kOn;
+    case ScalabilityMode::kL3T3_KEY:
+      return InterLayerPredMode::kOnKeyPic;
+    case ScalabilityMode::kS2T1:
+    case ScalabilityMode::kS3T3:
+      return InterLayerPredMode::kOff;
+  }
+  RTC_CHECK_NOTREACHED();
+}
+
 int ScalabilityModeToNumSpatialLayers(ScalabilityMode scalability_mode) {
   switch (scalability_mode) {
     case ScalabilityMode::kL1T1:
diff --git a/modules/video_coding/svc/scalability_mode_util.h b/modules/video_coding/svc/scalability_mode_util.h
index faff4cf..c543c0c 100644
--- a/modules/video_coding/svc/scalability_mode_util.h
+++ b/modules/video_coding/svc/scalability_mode_util.h
@@ -14,6 +14,7 @@
 #include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
 #include "api/video_codecs/scalability_mode.h"
+#include "api/video_codecs/video_codec.h"
 
 namespace webrtc {
 
@@ -22,6 +23,9 @@
 
 absl::string_view ScalabilityModeToString(ScalabilityMode scalability_mode);
 
+InterLayerPredMode ScalabilityModeToInterLayerPredMode(
+    ScalabilityMode scalability_mode);
+
 int ScalabilityModeToNumSpatialLayers(ScalabilityMode scalability_mode);
 
 int ScalabilityModeToNumTemporalLayers(ScalabilityMode scalability_mode);
diff --git a/modules/video_coding/video_codec_initializer.cc b/modules/video_coding/video_codec_initializer.cc
index 03f7ffe..99eb67c 100644
--- a/modules/video_coding/video_codec_initializer.cc
+++ b/modules/video_coding/video_codec_initializer.cc
@@ -231,6 +231,11 @@
       if (!config.spatial_layers.empty()) {
         // Layering is set explicitly.
         spatial_layers = config.spatial_layers;
+      } else if (scalability_mode.has_value()) {
+        // Layering is set via scalability mode.
+        spatial_layers = GetVp9SvcConfig(video_codec);
+        if (spatial_layers.empty())
+          break;
       } else {
         size_t first_active_layer = 0;
         for (size_t spatial_idx = 0;
diff --git a/test/encoder_settings.cc b/test/encoder_settings.cc
index c825188..09cefc8 100644
--- a/test/encoder_settings.cc
+++ b/test/encoder_settings.cc
@@ -88,6 +88,7 @@
       stream_settings[i].width = width / stream.scale_resolution_down_by;
       stream_settings[i].height = height / stream.scale_resolution_down_by;
     }
+    stream_settings[i].scalability_mode = stream.scalability_mode;
     stream_settings[i].target_bitrate_bps = target_bitrate_bps;
     stream_settings[i].max_bitrate_bps = max_bitrate_bps;
     stream_settings[i].active =
diff --git a/video/BUILD.gn b/video/BUILD.gn
index 71f4fc9..980e5b5 100644
--- a/video/BUILD.gn
+++ b/video/BUILD.gn
@@ -926,6 +926,8 @@
       "../modules/video_coding:webrtc_vp9_helpers",
       "../modules/video_coding/codecs/av1:libaom_av1_encoder_if_supported",
       "../modules/video_coding/svc:scalability_mode_util",
+      "../modules/video_coding/svc:scalability_structures",
+      "../modules/video_coding/svc:scalable_video_controller",
       "../rtc_base",
       "../rtc_base:byte_buffer",
       "../rtc_base:checks",
diff --git a/video/video_send_stream_tests.cc b/video/video_send_stream_tests.cc
index c364d0b..02bff7d 100644
--- a/video/video_send_stream_tests.cc
+++ b/video/video_send_stream_tests.cc
@@ -39,7 +39,9 @@
 #include "modules/video_coding/codecs/interface/common_constants.h"
 #include "modules/video_coding/codecs/vp8/include/vp8.h"
 #include "modules/video_coding/codecs/vp9/include/vp9.h"
+#include "modules/video_coding/svc/create_scalability_structure.h"
 #include "modules/video_coding/svc/scalability_mode_util.h"
+#include "modules/video_coding/svc/scalable_video_controller.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/event.h"
 #include "rtc_base/experiments/alr_experiment.h"
@@ -2976,14 +2978,11 @@
 #if defined(RTC_ENABLE_VP9)
 class Vp9HeaderObserver : public test::SendTest {
  public:
-  Vp9HeaderObserver()
+  explicit Vp9HeaderObserver(const Vp9TestParams& params)
       : SendTest(VideoSendStreamTest::kLongTimeoutMs),
         encoder_factory_([]() { return VP9Encoder::Create(); }),
-        vp9_settings_(VideoEncoder::GetDefaultVp9Settings()),
-        packets_sent_(0),
-        frames_sent_(0),
-        expected_width_(0),
-        expected_height_(0) {}
+        params_(params),
+        vp9_settings_(VideoEncoder::GetDefaultVp9Settings()) {}
 
   virtual void ModifyVideoConfigsHook(
       VideoSendStream::Config* send_config,
@@ -3008,8 +3007,6 @@
             vp9_settings_);
     EXPECT_EQ(1u, encoder_config->number_of_streams);
     EXPECT_EQ(1u, encoder_config->simulcast_layers.size());
-    encoder_config->simulcast_layers[0].num_temporal_layers =
-        vp9_settings_.numberOfTemporalLayers;
     encoder_config_ = encoder_config->Copy();
   }
 
@@ -3185,17 +3182,17 @@
     EXPECT_NE(kNoPictureId, vp9.picture_id);                   // I:1
     EXPECT_EQ(vp9_settings_.flexibleMode, vp9.flexible_mode);  // F
 
-    if (vp9_settings_.numberOfSpatialLayers > 1) {
-      EXPECT_LT(vp9.spatial_idx, vp9_settings_.numberOfSpatialLayers);
-    } else if (vp9_settings_.numberOfTemporalLayers > 1) {
+    if (params_.num_spatial_layers > 1) {
+      EXPECT_LT(vp9.spatial_idx, params_.num_spatial_layers);
+    } else if (params_.num_temporal_layers > 1) {
       EXPECT_EQ(vp9.spatial_idx, 0);
     } else {
       EXPECT_EQ(vp9.spatial_idx, kNoSpatialIdx);
     }
 
-    if (vp9_settings_.numberOfTemporalLayers > 1) {
-      EXPECT_LT(vp9.temporal_idx, vp9_settings_.numberOfTemporalLayers);
-    } else if (vp9_settings_.numberOfSpatialLayers > 1) {
+    if (params_.num_temporal_layers > 1) {
+      EXPECT_LT(vp9.temporal_idx, params_.num_temporal_layers);
+    } else if (params_.num_spatial_layers > 1) {
       EXPECT_EQ(vp9.temporal_idx, 0);
     } else {
       EXPECT_EQ(vp9.temporal_idx, kNoTemporalIdx);
@@ -3234,16 +3231,24 @@
   //      +-+-+-+-+-+-+-+-+
   void VerifySsData(const RTPVideoHeaderVP9& vp9) const {
     EXPECT_TRUE(vp9.ss_data_available);             // V
-    EXPECT_EQ(vp9_settings_.numberOfSpatialLayers,  // N_S + 1
+    EXPECT_EQ(params_.num_spatial_layers,           // N_S + 1
               vp9.num_spatial_layers);
     EXPECT_TRUE(vp9.spatial_layer_resolution_present);  // Y:1
-    int expected_width = expected_width_;
-    int expected_height = expected_height_;
+
+    absl::optional<ScalableVideoController::StreamLayersConfig> info;
+    absl::optional<ScalabilityMode> scalability_mode =
+        ScalabilityModeFromString(params_.scalability_mode);
+    if (scalability_mode) {
+      info = ScalabilityStructureConfig(*scalability_mode);
+    }
+    double default_ratio = 1.0;
     for (int i = static_cast<int>(vp9.num_spatial_layers) - 1; i >= 0; --i) {
-      EXPECT_EQ(expected_width, vp9.width[i]);    // WIDTH
-      EXPECT_EQ(expected_height, vp9.height[i]);  // HEIGHT
-      expected_width /= 2;
-      expected_height /= 2;
+      double ratio = info ? (static_cast<double>(info->scaling_factor_num[i]) /
+                             info->scaling_factor_den[i])
+                          : default_ratio;
+      EXPECT_EQ(expected_width_ * ratio, vp9.width[i]);    // WIDTH
+      EXPECT_EQ(expected_height_ * ratio, vp9.height[i]);  // HEIGHT
+      default_ratio /= 2.0;
     }
   }
 
@@ -3281,17 +3286,18 @@
   }
 
   test::FunctionVideoEncoderFactory encoder_factory_;
+  const Vp9TestParams params_;
   VideoCodecVP9 vp9_settings_;
   webrtc::VideoEncoderConfig encoder_config_;
   bool last_packet_marker_ = false;
   uint16_t last_packet_sequence_number_ = 0;
   uint32_t last_packet_timestamp_ = 0;
   RTPVideoHeaderVP9 last_vp9_;
-  size_t packets_sent_;
+  size_t packets_sent_ = 0;
   Mutex mutex_;
-  size_t frames_sent_;
-  int expected_width_;
-  int expected_height_;
+  size_t frames_sent_ = 0;
+  int expected_width_ = 0;
+  int expected_height_ = 0;
 };
 
 class Vp9Test : public VideoSendStreamTest,
@@ -3319,13 +3325,27 @@
              {"L2T1_KEY", 2, 1, InterLayerPredMode::kOnKeyPic},
              {"L2T2", 2, 2, InterLayerPredMode::kOn},
              {"L2T2_KEY", 2, 2, InterLayerPredMode::kOnKeyPic},
-             {"L2T3", 2, 3, InterLayerPredMode::kOn},
              {"L2T3_KEY", 2, 3, InterLayerPredMode::kOnKeyPic},
              {"L3T1", 3, 1, InterLayerPredMode::kOn},
              {"L3T3", 3, 3, InterLayerPredMode::kOn},
              {"L3T3_KEY", 3, 3, InterLayerPredMode::kOnKeyPic},
              {"S2T1", 2, 1, InterLayerPredMode::kOff},
              {"S3T3", 3, 3, InterLayerPredMode::kOff}}),
+        ::testing::Values(false, true)),  // use_scalability_mode_identifier
+    [](const ::testing::TestParamInfo<Vp9Test::ParamType>& info) {
+      rtc::StringBuilder sb;
+      sb << std::get<0>(info.param).scalability_mode << "_"
+         << (std::get<1>(info.param) ? "WithIdentifier" : "WithoutIdentifier");
+      return sb.str();
+    });
+
+INSTANTIATE_TEST_SUITE_P(
+    ScalabilityModeOff,
+    Vp9Test,
+    ::testing::Combine(
+        ::testing::ValuesIn<Vp9TestParams>(
+            {{"L2T3", 2, 3, InterLayerPredMode::kOn},
+             {"S2T3", 2, 3, InterLayerPredMode::kOff}}),
         ::testing::Values(false)),  // use_scalability_mode_identifier
     [](const ::testing::TestParamInfo<Vp9Test::ParamType>& info) {
       rtc::StringBuilder sb;
@@ -3334,6 +3354,20 @@
       return sb.str();
     });
 
+INSTANTIATE_TEST_SUITE_P(
+    ScalabilityModeOn,
+    Vp9Test,
+    ::testing::Combine(
+        ::testing::ValuesIn<Vp9TestParams>({{"L2T1h", 2, 1,
+                                             InterLayerPredMode::kOn}}),
+        ::testing::Values(true)),  // use_scalability_mode_identifier
+    [](const ::testing::TestParamInfo<Vp9Test::ParamType>& info) {
+      rtc::StringBuilder sb;
+      sb << std::get<0>(info.param).scalability_mode << "_"
+         << (std::get<1>(info.param) ? "WithIdentifier" : "WithoutIdentifier");
+      return sb.str();
+    });
+
 TEST_P(Vp9Test, NonFlexMode) {
   TestVp9NonFlexMode(params_, use_scalability_mode_identifier_);
 }
@@ -3353,7 +3387,7 @@
    public:
     NonFlexibleMode(const Vp9TestParams& params,
                     bool use_scalability_mode_identifier)
-        : params_(params),
+        : Vp9HeaderObserver(params),
           use_scalability_mode_identifier_(use_scalability_mode_identifier),
           l_field_(params.num_temporal_layers > 1 ||
                    params.num_spatial_layers > 1) {}
@@ -3372,9 +3406,6 @@
       }
       encoder_config->max_bitrate_bps = bitrate_bps * 2;
 
-      EXPECT_EQ(1u, encoder_config->number_of_streams);
-      EXPECT_EQ(1u, encoder_config->simulcast_layers.size());
-
       encoder_config->frame_drop_enabled = false;
 
       vp9_settings_.flexibleMode = false;
@@ -3385,8 +3416,10 @@
         vp9_settings_.numberOfSpatialLayers = params_.num_spatial_layers;
         vp9_settings_.interLayerPred = params_.inter_layer_pred;
       } else {
-        encoder_config->simulcast_layers[0].scalability_mode =
+        absl::optional<ScalabilityMode> mode =
             ScalabilityModeFromString(params_.scalability_mode);
+        encoder_config->simulcast_layers[0].scalability_mode = mode;
+        EXPECT_TRUE(mode.has_value());
       }
     }
 
@@ -3442,7 +3475,6 @@
       if (frames_sent_ > kNumFramesToSend)
         observation_complete_.Set();
     }
-    const Vp9TestParams params_;
     const bool use_scalability_mode_identifier_;
     const bool l_field_;
 
@@ -3463,16 +3495,20 @@
   static const int kWidth = 4;
   static const int kHeight = 4;
   class NonFlexibleModeResolution : public Vp9HeaderObserver {
+   public:
+    explicit NonFlexibleModeResolution(const Vp9TestParams& params)
+        : Vp9HeaderObserver(params) {}
+
+   private:
     void ModifyVideoConfigsHook(
         VideoSendStream::Config* send_config,
         std::vector<VideoReceiveStream::Config>* receive_configs,
         VideoEncoderConfig* encoder_config) override {
       encoder_config->codec_type = kVideoCodecVP9;
       vp9_settings_.flexibleMode = false;
-      vp9_settings_.numberOfTemporalLayers = 1;
-      vp9_settings_.numberOfSpatialLayers = 1;
-
-      EXPECT_EQ(1u, encoder_config->number_of_streams);
+      vp9_settings_.numberOfTemporalLayers = params_.num_temporal_layers;
+      vp9_settings_.numberOfSpatialLayers = params_.num_spatial_layers;
+      vp9_settings_.interLayerPred = params_.inter_layer_pred;
     }
 
     void InspectHeader(const RTPVideoHeaderVP9& vp9_header) override {
@@ -3488,7 +3524,10 @@
       *width = kWidth;
       *height = kHeight;
     }
-  } test;
+  };
+
+  Vp9TestParams params{"L1T1", 1, 1, InterLayerPredMode::kOn};
+  NonFlexibleModeResolution test(params);
 
   RunBaseTest(&test);
 }
@@ -3504,6 +3543,11 @@
 #endif
 TEST_F(VideoSendStreamTest, MAYBE_Vp9FlexModeRefCount) {
   class FlexibleMode : public Vp9HeaderObserver {
+   public:
+    explicit FlexibleMode(const Vp9TestParams& params)
+        : Vp9HeaderObserver(params) {}
+
+   private:
     void ModifyVideoConfigsHook(
         VideoSendStream::Config* send_config,
         std::vector<VideoReceiveStream::Config>* receive_configs,
@@ -3511,8 +3555,9 @@
       encoder_config->codec_type = kVideoCodecVP9;
       encoder_config->content_type = VideoEncoderConfig::ContentType::kScreen;
       vp9_settings_.flexibleMode = true;
-      vp9_settings_.numberOfTemporalLayers = 1;
-      vp9_settings_.numberOfSpatialLayers = 2;
+      vp9_settings_.numberOfTemporalLayers = params_.num_temporal_layers;
+      vp9_settings_.numberOfSpatialLayers = params_.num_spatial_layers;
+      vp9_settings_.interLayerPred = params_.inter_layer_pred;
     }
 
     void InspectHeader(const RTPVideoHeaderVP9& vp9_header) override {
@@ -3523,7 +3568,10 @@
         observation_complete_.Set();
       }
     }
-  } test;
+  };
+
+  Vp9TestParams params{"L2T1", 2, 1, InterLayerPredMode::kOn};
+  FlexibleMode test(params);
 
   RunBaseTest(&test);
 }