Add support for VP9 configuration through scalability mode.
Bug: webrtc:13960
Change-Id: Ia930647b15f624a4d10d8d335519b69ffdae6636
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/260983
Commit-Queue: Åsa Persson <asapersson@webrtc.org>
Reviewed-by: Niels Moller <nisse@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#36919}
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index 502485e..798a1be 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -745,9 +745,13 @@
"../../api/video:video_codec_constants",
"../../api/video_codecs:video_codecs_api",
"../../common_video",
+ "../../media:rtc_media_base",
"../../rtc_base:checks",
"../../rtc_base:logging",
"../../rtc_base/experiments:stable_target_rate_experiment",
+ "svc:scalability_mode_util",
+ "svc:scalability_structures",
+ "svc:scalable_video_controller",
]
absl_deps = [ "//third_party/abseil-cpp/absl/container:inlined_vector" ]
}
diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
index 0c02b34..b1fbf97 100644
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
@@ -499,7 +499,7 @@
void LibvpxVp9Encoder::SetRates(const RateControlParameters& parameters) {
if (!inited_) {
- RTC_LOG(LS_WARNING) << "SetRates() calll while uninitialzied.";
+ RTC_LOG(LS_WARNING) << "SetRates() called while uninitialized.";
return;
}
if (encoder_->err) {
@@ -570,14 +570,32 @@
force_key_frame_ = true;
pics_since_key_ = 0;
- num_spatial_layers_ = inst->VP9().numberOfSpatialLayers;
- RTC_DCHECK_GT(num_spatial_layers_, 0);
- num_temporal_layers_ = inst->VP9().numberOfTemporalLayers;
- if (num_temporal_layers_ == 0) {
- num_temporal_layers_ = 1;
+ absl::optional<ScalabilityMode> scalability_mode = inst->GetScalabilityMode();
+ if (scalability_mode.has_value()) {
+ // Use settings from `ScalabilityMode` identifier.
+ RTC_LOG(LS_INFO) << "Create scalability structure "
+ << ScalabilityModeToString(*scalability_mode);
+ svc_controller_ = CreateScalabilityStructure(*scalability_mode);
+ if (!svc_controller_) {
+ RTC_LOG(LS_WARNING) << "Failed to create scalability structure.";
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+ ScalableVideoController::StreamLayersConfig info =
+ svc_controller_->StreamConfig();
+ num_spatial_layers_ = info.num_spatial_layers;
+ num_temporal_layers_ = info.num_temporal_layers;
+ inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode);
+ } else {
+ num_spatial_layers_ = inst->VP9().numberOfSpatialLayers;
+ RTC_DCHECK_GT(num_spatial_layers_, 0);
+ num_temporal_layers_ = inst->VP9().numberOfTemporalLayers;
+ if (num_temporal_layers_ == 0) {
+ num_temporal_layers_ = 1;
+ }
+ inter_layer_pred_ = inst->VP9().interLayerPred;
+ svc_controller_ = CreateVp9ScalabilityStructure(*inst);
}
- svc_controller_ = CreateVp9ScalabilityStructure(*inst);
framerate_controller_ = std::vector<FramerateControllerDeprecated>(
num_spatial_layers_, FramerateControllerDeprecated(codec_.maxFramerate));
@@ -661,8 +679,6 @@
is_flexible_mode_ = inst->VP9().flexibleMode;
- inter_layer_pred_ = inst->VP9().interLayerPred;
-
if (num_spatial_layers_ > 1 &&
codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) {
RTC_LOG(LS_ERROR) << "Flexible mode is required for screenshare with "
diff --git a/modules/video_coding/codecs/vp9/svc_config.cc b/modules/video_coding/codecs/vp9/svc_config.cc
index 92818eb..77eee3d 100644
--- a/modules/video_coding/codecs/vp9/svc_config.cc
+++ b/modules/video_coding/codecs/vp9/svc_config.cc
@@ -12,9 +12,13 @@
#include <algorithm>
#include <cmath>
+#include <memory>
#include <vector>
+#include "media/base/video_common.h"
#include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
+#include "modules/video_coding/svc/create_scalability_structure.h"
+#include "modules/video_coding/svc/scalability_mode_util.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
@@ -29,6 +33,19 @@
const size_t kTargetScreenSharingLayerBitrateKbps[] = {150, 350, 950};
const size_t kMaxScreenSharingLayerBitrateKbps[] = {250, 500, 950};
+// Gets limited number of layers for given resolution.
+size_t GetLimitedNumSpatialLayers(size_t width, size_t height) {
+ const bool is_landscape = width >= height;
+ const size_t min_width = is_landscape ? kMinVp9SpatialLayerLongSideLength
+ : kMinVp9SpatialLayerShortSideLength;
+ const size_t min_height = is_landscape ? kMinVp9SpatialLayerShortSideLength
+ : kMinVp9SpatialLayerLongSideLength;
+ const size_t num_layers_fit_horz = static_cast<size_t>(
+ std::floor(1 + std::max(0.0f, std::log2(1.0f * width / min_width))));
+ const size_t num_layers_fit_vert = static_cast<size_t>(
+ std::floor(1 + std::max(0.0f, std::log2(1.0f * height / min_height))));
+ return std::min(num_layers_fit_horz, num_layers_fit_vert);
+}
} // namespace
std::vector<SpatialLayer> ConfigureSvcScreenSharing(size_t input_width,
@@ -59,27 +76,19 @@
return spatial_layers;
}
-std::vector<SpatialLayer> ConfigureSvcNormalVideo(size_t input_width,
- size_t input_height,
- float max_framerate_fps,
- size_t first_active_layer,
- size_t num_spatial_layers,
- size_t num_temporal_layers) {
+std::vector<SpatialLayer> ConfigureSvcNormalVideo(
+ size_t input_width,
+ size_t input_height,
+ float max_framerate_fps,
+ size_t first_active_layer,
+ size_t num_spatial_layers,
+ size_t num_temporal_layers,
+ absl::optional<ScalableVideoController::StreamLayersConfig> config) {
RTC_DCHECK_LT(first_active_layer, num_spatial_layers);
- std::vector<SpatialLayer> spatial_layers;
// Limit number of layers for given resolution.
- const bool is_landscape = input_width >= input_height;
- const size_t min_width = is_landscape ? kMinVp9SpatialLayerLongSideLength
- : kMinVp9SpatialLayerShortSideLength;
- const size_t min_height = is_landscape ? kMinVp9SpatialLayerShortSideLength
- : kMinVp9SpatialLayerLongSideLength;
- const size_t num_layers_fit_horz = static_cast<size_t>(std::floor(
- 1 + std::max(0.0f, std::log2(1.0f * input_width / min_width))));
- const size_t num_layers_fit_vert = static_cast<size_t>(std::floor(
- 1 + std::max(0.0f, std::log2(1.0f * input_height / min_height))));
- const size_t limited_num_spatial_layers =
- std::min(num_layers_fit_horz, num_layers_fit_vert);
+ size_t limited_num_spatial_layers =
+ GetLimitedNumSpatialLayers(input_width, input_height);
if (limited_num_spatial_layers < num_spatial_layers) {
RTC_LOG(LS_WARNING) << "Reducing number of spatial layers from "
<< num_spatial_layers << " to "
@@ -87,14 +96,23 @@
<< " due to low input resolution.";
num_spatial_layers = limited_num_spatial_layers;
}
+
// First active layer must be configured.
num_spatial_layers = std::max(num_spatial_layers, first_active_layer + 1);
// Ensure top layer is even enough.
int required_divisiblity = 1 << (num_spatial_layers - first_active_layer - 1);
+ if (config) {
+ required_divisiblity = 1;
+ for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+ required_divisiblity = cricket::LeastCommonMultiple(
+ required_divisiblity, config->scaling_factor_den[sl_idx]);
+ }
+ }
input_width = input_width - input_width % required_divisiblity;
input_height = input_height - input_height % required_divisiblity;
+ std::vector<SpatialLayer> spatial_layers;
for (size_t sl_idx = first_active_layer; sl_idx < num_spatial_layers;
++sl_idx) {
SpatialLayer spatial_layer = {0};
@@ -104,6 +122,13 @@
spatial_layer.numberOfTemporalLayers = num_temporal_layers;
spatial_layer.active = true;
+ if (config) {
+ spatial_layer.width = input_width * config->scaling_factor_num[sl_idx] /
+ config->scaling_factor_den[sl_idx];
+ spatial_layer.height = input_height * config->scaling_factor_num[sl_idx] /
+ config->scaling_factor_den[sl_idx];
+ }
+
// minBitrate and maxBitrate formulas were derived from
// subjective-quality data to determing bit rates below which video
// quality is unacceptable and above which additional bits do not provide
@@ -124,7 +149,7 @@
spatial_layers.push_back(spatial_layer);
}
- // A workaround for sitiation when single HD layer is left with minBitrate
+ // A workaround for situation when single HD layer is left with minBitrate
// about 500kbps. This would mean that there will always be at least 500kbps
// allocated to video regardless of how low is the actual BWE.
// Also, boost maxBitrate for the first layer to account for lost ability to
@@ -140,13 +165,58 @@
return spatial_layers;
}
-std::vector<SpatialLayer> GetSvcConfig(size_t input_width,
- size_t input_height,
- float max_framerate_fps,
- size_t first_active_layer,
- size_t num_spatial_layers,
- size_t num_temporal_layers,
- bool is_screen_sharing) {
+// Uses scalability mode to configure spatial layers.
+std::vector<SpatialLayer> GetVp9SvcConfig(VideoCodec& codec) {
+ RTC_DCHECK_EQ(codec.codecType, kVideoCodecVP9);
+
+ absl::optional<ScalabilityMode> scalability_mode = codec.GetScalabilityMode();
+ RTC_DCHECK(scalability_mode.has_value());
+
+ absl::optional<ScalableVideoController::StreamLayersConfig> info =
+ ScalabilityStructureConfig(*scalability_mode);
+ if (!info.has_value()) {
+ RTC_LOG(LS_WARNING) << "Failed to create structure "
+ << ScalabilityModeToString(*scalability_mode);
+ return {};
+ }
+
+ if (static_cast<int>(GetLimitedNumSpatialLayers(codec.width, codec.height)) <
+ info->num_spatial_layers) {
+ // Layers will be reduced, do not use scalability mode for now.
+ // TODO(bugs.webrtc.org/11607): Use a lower scalability mode once all lower
+ // modes are supported.
+ codec.UnsetScalabilityMode();
+ codec.VP9()->interLayerPred =
+ ScalabilityModeToInterLayerPredMode(*scalability_mode);
+ }
+
+ // TODO(bugs.webrtc.org/11607): Add support for screensharing.
+ std::vector<SpatialLayer> spatial_layers =
+ GetSvcConfig(codec.width, codec.height, codec.maxFramerate,
+ /*first_active_layer=*/0, info->num_spatial_layers,
+ info->num_temporal_layers, /*is_screen_sharing=*/false,
+ codec.GetScalabilityMode() ? info : absl::nullopt);
+ RTC_DCHECK(!spatial_layers.empty());
+
+ // Use codec bitrate limits if spatial layering is not requested.
+ if (info->num_spatial_layers == 1) {
+ spatial_layers.back().minBitrate = codec.minBitrate;
+ spatial_layers.back().targetBitrate = codec.maxBitrate;
+ spatial_layers.back().maxBitrate = codec.maxBitrate;
+ }
+
+ return spatial_layers;
+}
+
+std::vector<SpatialLayer> GetSvcConfig(
+ size_t input_width,
+ size_t input_height,
+ float max_framerate_fps,
+ size_t first_active_layer,
+ size_t num_spatial_layers,
+ size_t num_temporal_layers,
+ bool is_screen_sharing,
+ absl::optional<ScalableVideoController::StreamLayersConfig> config) {
RTC_DCHECK_GT(input_width, 0);
RTC_DCHECK_GT(input_height, 0);
RTC_DCHECK_GT(num_spatial_layers, 0);
@@ -158,7 +228,7 @@
} else {
return ConfigureSvcNormalVideo(input_width, input_height, max_framerate_fps,
first_active_layer, num_spatial_layers,
- num_temporal_layers);
+ num_temporal_layers, config);
}
}
diff --git a/modules/video_coding/codecs/vp9/svc_config.h b/modules/video_coding/codecs/vp9/svc_config.h
index f6b562e..adeaf0f 100644
--- a/modules/video_coding/codecs/vp9/svc_config.h
+++ b/modules/video_coding/codecs/vp9/svc_config.h
@@ -15,16 +15,24 @@
#include <vector>
#include "api/video_codecs/spatial_layer.h"
+#include "api/video_codecs/video_codec.h"
+#include "modules/video_coding/svc/scalable_video_controller.h"
namespace webrtc {
-std::vector<SpatialLayer> GetSvcConfig(size_t input_width,
- size_t input_height,
- float max_framerate_fps,
- size_t first_active_layer,
- size_t num_spatial_layers,
- size_t num_temporal_layers,
- bool is_screen_sharing);
+// Uses scalability mode to configure spatial layers.
+std::vector<SpatialLayer> GetVp9SvcConfig(VideoCodec& video_codec);
+
+std::vector<SpatialLayer> GetSvcConfig(
+ size_t input_width,
+ size_t input_height,
+ float max_framerate_fps,
+ size_t first_active_layer,
+ size_t num_spatial_layers,
+ size_t num_temporal_layers,
+ bool is_screen_sharing,
+ absl::optional<ScalableVideoController::StreamLayersConfig> config =
+ absl::nullopt);
} // namespace webrtc
diff --git a/modules/video_coding/codecs/vp9/svc_config_unittest.cc b/modules/video_coding/codecs/vp9/svc_config_unittest.cc
index 77d75ee..4de3c5b 100644
--- a/modules/video_coding/codecs/vp9/svc_config_unittest.cc
+++ b/modules/video_coding/codecs/vp9/svc_config_unittest.cc
@@ -14,8 +14,12 @@
#include <vector>
#include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
+#include "test/gmock.h"
#include "test/gtest.h"
+using ::testing::ElementsAre;
+using ::testing::Field;
+
namespace webrtc {
TEST(SvcConfig, NumSpatialLayers) {
const size_t max_num_spatial_layers = 6;
@@ -43,6 +47,92 @@
EXPECT_EQ(spatial_layers.size(), num_spatial_layers);
}
+TEST(SvcConfig, NumSpatialLayersWithScalabilityMode) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 960;
+ codec.height = 540;
+ codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY);
+
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135),
+ Field(&SpatialLayer::height, 270),
+ Field(&SpatialLayer::height, 540)));
+ EXPECT_THAT(spatial_layers,
+ ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3),
+ Field(&SpatialLayer::numberOfTemporalLayers, 3),
+ Field(&SpatialLayer::numberOfTemporalLayers, 3)));
+ EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL3T3_KEY);
+}
+
+TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityMode) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 480;
+ codec.height = 270;
+ codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY);
+
+ // Scalability mode reset, configuration should be in accordance to L2T3_KEY.
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135),
+ Field(&SpatialLayer::height, 270)));
+ EXPECT_THAT(spatial_layers,
+ ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3),
+ Field(&SpatialLayer::numberOfTemporalLayers, 3)));
+ EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOnKeyPic);
+ EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt);
+}
+
+TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModePortrait) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 270;
+ codec.height = 480;
+ codec.SetScalabilityMode(ScalabilityMode::kL3T1);
+
+ // Scalability mode reset, configuration should be in accordance to L2T1.
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 135),
+ Field(&SpatialLayer::width, 270)));
+ EXPECT_THAT(spatial_layers,
+ ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1),
+ Field(&SpatialLayer::numberOfTemporalLayers, 1)));
+ EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOn);
+ EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt);
+}
+
+TEST(SvcConfig, NumSpatialLayersWithScalabilityModeResolutionRatio1_5) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 270;
+ codec.height = 480;
+ codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1
+
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 180),
+ Field(&SpatialLayer::width, 270)));
+ EXPECT_THAT(spatial_layers,
+ ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1),
+ Field(&SpatialLayer::numberOfTemporalLayers, 1)));
+ EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL2T1h);
+}
+
+TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModeResolutionRatio1_5) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 320;
+ codec.height = 180;
+ codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1
+
+ // Scalability mode reset, configuration should be in accordance to L1T1.
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 320)));
+ EXPECT_THAT(spatial_layers,
+ ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1)));
+ EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOn);
+ EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt);
+}
+
TEST(SvcConfig, AlwaysSendsAtLeastOneLayer) {
const size_t max_num_spatial_layers = 6;
const size_t first_active_layer = 5;
@@ -91,6 +181,44 @@
EXPECT_EQ(spatial_layers.back().width, kOddSize);
}
+TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityMode) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 1023;
+ codec.height = 1023;
+ codec.SetScalabilityMode(ScalabilityMode::kL3T1);
+
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, // Divisiblity by 4 required.
+ ElementsAre(Field(&SpatialLayer::width, 255),
+ Field(&SpatialLayer::width, 510),
+ Field(&SpatialLayer::width, 1020)));
+
+ codec.SetScalabilityMode(ScalabilityMode::kL2T1);
+ spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, // Divisiblity by 2 required.
+ ElementsAre(Field(&SpatialLayer::width, 511),
+ Field(&SpatialLayer::width, 1022)));
+
+ codec.SetScalabilityMode(ScalabilityMode::kL1T1);
+ spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, // Divisiblity by 1 required.
+ ElementsAre(Field(&SpatialLayer::width, 1023)));
+}
+
+TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityModeResRatio1_5) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 1280;
+ codec.height = 1280;
+ codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1
+
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, // Divisiblity by 3 required.
+ ElementsAre(Field(&SpatialLayer::width, 852),
+ Field(&SpatialLayer::width, 1278)));
+}
+
TEST(SvcConfig, SkipsInactiveLayers) {
const size_t num_spatial_layers = 4;
const size_t first_active_layer = 2;
@@ -121,6 +249,25 @@
}
}
+TEST(SvcConfig, BitrateThresholdsWithScalabilityMode) {
+ VideoCodec codec;
+ codec.codecType = kVideoCodecVP9;
+ codec.width = 960;
+ codec.height = 540;
+ codec.SetScalabilityMode(ScalabilityMode::kS3T3);
+
+ std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
+ EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135),
+ Field(&SpatialLayer::height, 270),
+ Field(&SpatialLayer::height, 540)));
+
+ for (const SpatialLayer& layer : spatial_layers) {
+ EXPECT_LE(layer.minBitrate, layer.maxBitrate);
+ EXPECT_LE(layer.minBitrate, layer.targetBitrate);
+ EXPECT_LE(layer.targetBitrate, layer.maxBitrate);
+ }
+}
+
TEST(SvcConfig, ScreenSharing) {
std::vector<SpatialLayer> spatial_layers =
GetSvcConfig(1920, 1080, 30, 1, 3, 3, true);
diff --git a/modules/video_coding/svc/BUILD.gn b/modules/video_coding/svc/BUILD.gn
index f68001a..d82f316 100644
--- a/modules/video_coding/svc/BUILD.gn
+++ b/modules/video_coding/svc/BUILD.gn
@@ -15,6 +15,7 @@
]
deps = [
"../../../api/video_codecs:scalability_mode",
+ "../../../api/video_codecs:video_codecs_api",
"../../../rtc_base:checks",
]
absl_deps = [
diff --git a/modules/video_coding/svc/scalability_mode_util.cc b/modules/video_coding/svc/scalability_mode_util.cc
index 1cbdeb6..d0a56af 100644
--- a/modules/video_coding/svc/scalability_mode_util.cc
+++ b/modules/video_coding/svc/scalability_mode_util.cc
@@ -139,6 +139,52 @@
RTC_CHECK_NOTREACHED();
}
+InterLayerPredMode ScalabilityModeToInterLayerPredMode(
+ ScalabilityMode scalability_mode) {
+ switch (scalability_mode) {
+ case ScalabilityMode::kL1T1:
+ case ScalabilityMode::kL1T2:
+ case ScalabilityMode::kL1T2h:
+ case ScalabilityMode::kL1T3:
+ case ScalabilityMode::kL1T3h:
+ case ScalabilityMode::kL2T1:
+ case ScalabilityMode::kL2T1h:
+ return InterLayerPredMode::kOn;
+ case ScalabilityMode::kL2T1_KEY:
+ return InterLayerPredMode::kOnKeyPic;
+ case ScalabilityMode::kL2T2:
+ case ScalabilityMode::kL2T2h:
+ return InterLayerPredMode::kOn;
+ case ScalabilityMode::kL2T2_KEY:
+ case ScalabilityMode::kL2T2_KEY_SHIFT:
+ return InterLayerPredMode::kOnKeyPic;
+ case ScalabilityMode::kL2T3:
+ case ScalabilityMode::kL2T3h:
+ return InterLayerPredMode::kOn;
+ case ScalabilityMode::kL2T3_KEY:
+ return InterLayerPredMode::kOnKeyPic;
+ case ScalabilityMode::kL3T1:
+ case ScalabilityMode::kL3T1h:
+ return InterLayerPredMode::kOn;
+ case ScalabilityMode::kL3T1_KEY:
+ return InterLayerPredMode::kOnKeyPic;
+ case ScalabilityMode::kL3T2:
+ case ScalabilityMode::kL3T2h:
+ return InterLayerPredMode::kOn;
+ case ScalabilityMode::kL3T2_KEY:
+ return InterLayerPredMode::kOnKeyPic;
+ case ScalabilityMode::kL3T3:
+ case ScalabilityMode::kL3T3h:
+ return InterLayerPredMode::kOn;
+ case ScalabilityMode::kL3T3_KEY:
+ return InterLayerPredMode::kOnKeyPic;
+ case ScalabilityMode::kS2T1:
+ case ScalabilityMode::kS3T3:
+ return InterLayerPredMode::kOff;
+ }
+ RTC_CHECK_NOTREACHED();
+}
+
int ScalabilityModeToNumSpatialLayers(ScalabilityMode scalability_mode) {
switch (scalability_mode) {
case ScalabilityMode::kL1T1:
diff --git a/modules/video_coding/svc/scalability_mode_util.h b/modules/video_coding/svc/scalability_mode_util.h
index faff4cf..c543c0c 100644
--- a/modules/video_coding/svc/scalability_mode_util.h
+++ b/modules/video_coding/svc/scalability_mode_util.h
@@ -14,6 +14,7 @@
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/video_codecs/scalability_mode.h"
+#include "api/video_codecs/video_codec.h"
namespace webrtc {
@@ -22,6 +23,9 @@
absl::string_view ScalabilityModeToString(ScalabilityMode scalability_mode);
+InterLayerPredMode ScalabilityModeToInterLayerPredMode(
+ ScalabilityMode scalability_mode);
+
int ScalabilityModeToNumSpatialLayers(ScalabilityMode scalability_mode);
int ScalabilityModeToNumTemporalLayers(ScalabilityMode scalability_mode);
diff --git a/modules/video_coding/video_codec_initializer.cc b/modules/video_coding/video_codec_initializer.cc
index 03f7ffe..99eb67c 100644
--- a/modules/video_coding/video_codec_initializer.cc
+++ b/modules/video_coding/video_codec_initializer.cc
@@ -231,6 +231,11 @@
if (!config.spatial_layers.empty()) {
// Layering is set explicitly.
spatial_layers = config.spatial_layers;
+ } else if (scalability_mode.has_value()) {
+ // Layering is set via scalability mode.
+ spatial_layers = GetVp9SvcConfig(video_codec);
+ if (spatial_layers.empty())
+ break;
} else {
size_t first_active_layer = 0;
for (size_t spatial_idx = 0;
diff --git a/test/encoder_settings.cc b/test/encoder_settings.cc
index c825188..09cefc8 100644
--- a/test/encoder_settings.cc
+++ b/test/encoder_settings.cc
@@ -88,6 +88,7 @@
stream_settings[i].width = width / stream.scale_resolution_down_by;
stream_settings[i].height = height / stream.scale_resolution_down_by;
}
+ stream_settings[i].scalability_mode = stream.scalability_mode;
stream_settings[i].target_bitrate_bps = target_bitrate_bps;
stream_settings[i].max_bitrate_bps = max_bitrate_bps;
stream_settings[i].active =
diff --git a/video/BUILD.gn b/video/BUILD.gn
index 71f4fc9..980e5b5 100644
--- a/video/BUILD.gn
+++ b/video/BUILD.gn
@@ -926,6 +926,8 @@
"../modules/video_coding:webrtc_vp9_helpers",
"../modules/video_coding/codecs/av1:libaom_av1_encoder_if_supported",
"../modules/video_coding/svc:scalability_mode_util",
+ "../modules/video_coding/svc:scalability_structures",
+ "../modules/video_coding/svc:scalable_video_controller",
"../rtc_base",
"../rtc_base:byte_buffer",
"../rtc_base:checks",
diff --git a/video/video_send_stream_tests.cc b/video/video_send_stream_tests.cc
index c364d0b..02bff7d 100644
--- a/video/video_send_stream_tests.cc
+++ b/video/video_send_stream_tests.cc
@@ -39,7 +39,9 @@
#include "modules/video_coding/codecs/interface/common_constants.h"
#include "modules/video_coding/codecs/vp8/include/vp8.h"
#include "modules/video_coding/codecs/vp9/include/vp9.h"
+#include "modules/video_coding/svc/create_scalability_structure.h"
#include "modules/video_coding/svc/scalability_mode_util.h"
+#include "modules/video_coding/svc/scalable_video_controller.h"
#include "rtc_base/checks.h"
#include "rtc_base/event.h"
#include "rtc_base/experiments/alr_experiment.h"
@@ -2976,14 +2978,11 @@
#if defined(RTC_ENABLE_VP9)
class Vp9HeaderObserver : public test::SendTest {
public:
- Vp9HeaderObserver()
+ explicit Vp9HeaderObserver(const Vp9TestParams& params)
: SendTest(VideoSendStreamTest::kLongTimeoutMs),
encoder_factory_([]() { return VP9Encoder::Create(); }),
- vp9_settings_(VideoEncoder::GetDefaultVp9Settings()),
- packets_sent_(0),
- frames_sent_(0),
- expected_width_(0),
- expected_height_(0) {}
+ params_(params),
+ vp9_settings_(VideoEncoder::GetDefaultVp9Settings()) {}
virtual void ModifyVideoConfigsHook(
VideoSendStream::Config* send_config,
@@ -3008,8 +3007,6 @@
vp9_settings_);
EXPECT_EQ(1u, encoder_config->number_of_streams);
EXPECT_EQ(1u, encoder_config->simulcast_layers.size());
- encoder_config->simulcast_layers[0].num_temporal_layers =
- vp9_settings_.numberOfTemporalLayers;
encoder_config_ = encoder_config->Copy();
}
@@ -3185,17 +3182,17 @@
EXPECT_NE(kNoPictureId, vp9.picture_id); // I:1
EXPECT_EQ(vp9_settings_.flexibleMode, vp9.flexible_mode); // F
- if (vp9_settings_.numberOfSpatialLayers > 1) {
- EXPECT_LT(vp9.spatial_idx, vp9_settings_.numberOfSpatialLayers);
- } else if (vp9_settings_.numberOfTemporalLayers > 1) {
+ if (params_.num_spatial_layers > 1) {
+ EXPECT_LT(vp9.spatial_idx, params_.num_spatial_layers);
+ } else if (params_.num_temporal_layers > 1) {
EXPECT_EQ(vp9.spatial_idx, 0);
} else {
EXPECT_EQ(vp9.spatial_idx, kNoSpatialIdx);
}
- if (vp9_settings_.numberOfTemporalLayers > 1) {
- EXPECT_LT(vp9.temporal_idx, vp9_settings_.numberOfTemporalLayers);
- } else if (vp9_settings_.numberOfSpatialLayers > 1) {
+ if (params_.num_temporal_layers > 1) {
+ EXPECT_LT(vp9.temporal_idx, params_.num_temporal_layers);
+ } else if (params_.num_spatial_layers > 1) {
EXPECT_EQ(vp9.temporal_idx, 0);
} else {
EXPECT_EQ(vp9.temporal_idx, kNoTemporalIdx);
@@ -3234,16 +3231,24 @@
// +-+-+-+-+-+-+-+-+
void VerifySsData(const RTPVideoHeaderVP9& vp9) const {
EXPECT_TRUE(vp9.ss_data_available); // V
- EXPECT_EQ(vp9_settings_.numberOfSpatialLayers, // N_S + 1
+ EXPECT_EQ(params_.num_spatial_layers, // N_S + 1
vp9.num_spatial_layers);
EXPECT_TRUE(vp9.spatial_layer_resolution_present); // Y:1
- int expected_width = expected_width_;
- int expected_height = expected_height_;
+
+ absl::optional<ScalableVideoController::StreamLayersConfig> info;
+ absl::optional<ScalabilityMode> scalability_mode =
+ ScalabilityModeFromString(params_.scalability_mode);
+ if (scalability_mode) {
+ info = ScalabilityStructureConfig(*scalability_mode);
+ }
+ double default_ratio = 1.0;
for (int i = static_cast<int>(vp9.num_spatial_layers) - 1; i >= 0; --i) {
- EXPECT_EQ(expected_width, vp9.width[i]); // WIDTH
- EXPECT_EQ(expected_height, vp9.height[i]); // HEIGHT
- expected_width /= 2;
- expected_height /= 2;
+ double ratio = info ? (static_cast<double>(info->scaling_factor_num[i]) /
+ info->scaling_factor_den[i])
+ : default_ratio;
+ EXPECT_EQ(expected_width_ * ratio, vp9.width[i]); // WIDTH
+ EXPECT_EQ(expected_height_ * ratio, vp9.height[i]); // HEIGHT
+ default_ratio /= 2.0;
}
}
@@ -3281,17 +3286,18 @@
}
test::FunctionVideoEncoderFactory encoder_factory_;
+ const Vp9TestParams params_;
VideoCodecVP9 vp9_settings_;
webrtc::VideoEncoderConfig encoder_config_;
bool last_packet_marker_ = false;
uint16_t last_packet_sequence_number_ = 0;
uint32_t last_packet_timestamp_ = 0;
RTPVideoHeaderVP9 last_vp9_;
- size_t packets_sent_;
+ size_t packets_sent_ = 0;
Mutex mutex_;
- size_t frames_sent_;
- int expected_width_;
- int expected_height_;
+ size_t frames_sent_ = 0;
+ int expected_width_ = 0;
+ int expected_height_ = 0;
};
class Vp9Test : public VideoSendStreamTest,
@@ -3319,13 +3325,27 @@
{"L2T1_KEY", 2, 1, InterLayerPredMode::kOnKeyPic},
{"L2T2", 2, 2, InterLayerPredMode::kOn},
{"L2T2_KEY", 2, 2, InterLayerPredMode::kOnKeyPic},
- {"L2T3", 2, 3, InterLayerPredMode::kOn},
{"L2T3_KEY", 2, 3, InterLayerPredMode::kOnKeyPic},
{"L3T1", 3, 1, InterLayerPredMode::kOn},
{"L3T3", 3, 3, InterLayerPredMode::kOn},
{"L3T3_KEY", 3, 3, InterLayerPredMode::kOnKeyPic},
{"S2T1", 2, 1, InterLayerPredMode::kOff},
{"S3T3", 3, 3, InterLayerPredMode::kOff}}),
+ ::testing::Values(false, true)), // use_scalability_mode_identifier
+ [](const ::testing::TestParamInfo<Vp9Test::ParamType>& info) {
+ rtc::StringBuilder sb;
+ sb << std::get<0>(info.param).scalability_mode << "_"
+ << (std::get<1>(info.param) ? "WithIdentifier" : "WithoutIdentifier");
+ return sb.str();
+ });
+
+INSTANTIATE_TEST_SUITE_P(
+ ScalabilityModeOff,
+ Vp9Test,
+ ::testing::Combine(
+ ::testing::ValuesIn<Vp9TestParams>(
+ {{"L2T3", 2, 3, InterLayerPredMode::kOn},
+ {"S2T3", 2, 3, InterLayerPredMode::kOff}}),
::testing::Values(false)), // use_scalability_mode_identifier
[](const ::testing::TestParamInfo<Vp9Test::ParamType>& info) {
rtc::StringBuilder sb;
@@ -3334,6 +3354,20 @@
return sb.str();
});
+INSTANTIATE_TEST_SUITE_P(
+ ScalabilityModeOn,
+ Vp9Test,
+ ::testing::Combine(
+ ::testing::ValuesIn<Vp9TestParams>({{"L2T1h", 2, 1,
+ InterLayerPredMode::kOn}}),
+ ::testing::Values(true)), // use_scalability_mode_identifier
+ [](const ::testing::TestParamInfo<Vp9Test::ParamType>& info) {
+ rtc::StringBuilder sb;
+ sb << std::get<0>(info.param).scalability_mode << "_"
+ << (std::get<1>(info.param) ? "WithIdentifier" : "WithoutIdentifier");
+ return sb.str();
+ });
+
TEST_P(Vp9Test, NonFlexMode) {
TestVp9NonFlexMode(params_, use_scalability_mode_identifier_);
}
@@ -3353,7 +3387,7 @@
public:
NonFlexibleMode(const Vp9TestParams& params,
bool use_scalability_mode_identifier)
- : params_(params),
+ : Vp9HeaderObserver(params),
use_scalability_mode_identifier_(use_scalability_mode_identifier),
l_field_(params.num_temporal_layers > 1 ||
params.num_spatial_layers > 1) {}
@@ -3372,9 +3406,6 @@
}
encoder_config->max_bitrate_bps = bitrate_bps * 2;
- EXPECT_EQ(1u, encoder_config->number_of_streams);
- EXPECT_EQ(1u, encoder_config->simulcast_layers.size());
-
encoder_config->frame_drop_enabled = false;
vp9_settings_.flexibleMode = false;
@@ -3385,8 +3416,10 @@
vp9_settings_.numberOfSpatialLayers = params_.num_spatial_layers;
vp9_settings_.interLayerPred = params_.inter_layer_pred;
} else {
- encoder_config->simulcast_layers[0].scalability_mode =
+ absl::optional<ScalabilityMode> mode =
ScalabilityModeFromString(params_.scalability_mode);
+ encoder_config->simulcast_layers[0].scalability_mode = mode;
+ EXPECT_TRUE(mode.has_value());
}
}
@@ -3442,7 +3475,6 @@
if (frames_sent_ > kNumFramesToSend)
observation_complete_.Set();
}
- const Vp9TestParams params_;
const bool use_scalability_mode_identifier_;
const bool l_field_;
@@ -3463,16 +3495,20 @@
static const int kWidth = 4;
static const int kHeight = 4;
class NonFlexibleModeResolution : public Vp9HeaderObserver {
+ public:
+ explicit NonFlexibleModeResolution(const Vp9TestParams& params)
+ : Vp9HeaderObserver(params) {}
+
+ private:
void ModifyVideoConfigsHook(
VideoSendStream::Config* send_config,
std::vector<VideoReceiveStream::Config>* receive_configs,
VideoEncoderConfig* encoder_config) override {
encoder_config->codec_type = kVideoCodecVP9;
vp9_settings_.flexibleMode = false;
- vp9_settings_.numberOfTemporalLayers = 1;
- vp9_settings_.numberOfSpatialLayers = 1;
-
- EXPECT_EQ(1u, encoder_config->number_of_streams);
+ vp9_settings_.numberOfTemporalLayers = params_.num_temporal_layers;
+ vp9_settings_.numberOfSpatialLayers = params_.num_spatial_layers;
+ vp9_settings_.interLayerPred = params_.inter_layer_pred;
}
void InspectHeader(const RTPVideoHeaderVP9& vp9_header) override {
@@ -3488,7 +3524,10 @@
*width = kWidth;
*height = kHeight;
}
- } test;
+ };
+
+ Vp9TestParams params{"L1T1", 1, 1, InterLayerPredMode::kOn};
+ NonFlexibleModeResolution test(params);
RunBaseTest(&test);
}
@@ -3504,6 +3543,11 @@
#endif
TEST_F(VideoSendStreamTest, MAYBE_Vp9FlexModeRefCount) {
class FlexibleMode : public Vp9HeaderObserver {
+ public:
+ explicit FlexibleMode(const Vp9TestParams& params)
+ : Vp9HeaderObserver(params) {}
+
+ private:
void ModifyVideoConfigsHook(
VideoSendStream::Config* send_config,
std::vector<VideoReceiveStream::Config>* receive_configs,
@@ -3511,8 +3555,9 @@
encoder_config->codec_type = kVideoCodecVP9;
encoder_config->content_type = VideoEncoderConfig::ContentType::kScreen;
vp9_settings_.flexibleMode = true;
- vp9_settings_.numberOfTemporalLayers = 1;
- vp9_settings_.numberOfSpatialLayers = 2;
+ vp9_settings_.numberOfTemporalLayers = params_.num_temporal_layers;
+ vp9_settings_.numberOfSpatialLayers = params_.num_spatial_layers;
+ vp9_settings_.interLayerPred = params_.inter_layer_pred;
}
void InspectHeader(const RTPVideoHeaderVP9& vp9_header) override {
@@ -3523,7 +3568,10 @@
observation_complete_.Set();
}
}
- } test;
+ };
+
+ Vp9TestParams params{"L2T1", 2, 1, InterLayerPredMode::kOn};
+ FlexibleMode test(params);
RunBaseTest(&test);
}