Report proper VP9 scalability mode with layer activation.
This changes the libvpx VP9 encoder to generate the scalability mode based on the current encoding parameters when using layer activation.
Tested: Ran with L3T3_KEY reduced to L2T3_KEY and L1T3 due to bandwidth or layer activation. Added unit tests.
Bug: webrtc:15892
Change-Id: Iaedca4ea5fc3a692996666ceaf0d6aa03fb058a1
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/344760
Commit-Queue: Evan Shrubsole <eshr@google.com>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#42007}
diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
index 055ec67..d7a59ee 100644
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
@@ -10,11 +10,11 @@
*/
#include <memory>
+
#ifdef RTC_ENABLE_VP9
#include <algorithm>
#include <limits>
-#include <tuple>
#include <utility>
#include <vector>
@@ -87,17 +87,13 @@
return {0, 0};
}
-using Vp9ScalabilityStructure =
- std::tuple<std::unique_ptr<ScalableVideoController>, ScalabilityMode>;
-absl::optional<Vp9ScalabilityStructure> CreateVp9ScalabilityStructure(
+std::unique_ptr<ScalableVideoController> CreateVp9ScalabilityStructure(
const VideoCodec& codec) {
int num_spatial_layers = codec.VP9().numberOfSpatialLayers;
int num_temporal_layers =
std::max(1, int{codec.VP9().numberOfTemporalLayers});
if (num_spatial_layers == 1 && num_temporal_layers == 1) {
- return absl::make_optional<Vp9ScalabilityStructure>(
- std::make_unique<ScalableVideoControllerNoLayering>(),
- ScalabilityMode::kL1T1);
+ return std::make_unique<ScalableVideoControllerNoLayering>();
}
char name[20];
@@ -105,7 +101,7 @@
if (codec.mode == VideoCodecMode::kScreensharing) {
// TODO(bugs.webrtc.org/11999): Compose names of the structures when they
// are implemented.
- return absl::nullopt;
+ return nullptr;
} else if (codec.VP9().interLayerPred == InterLayerPredMode::kOn ||
num_spatial_layers == 1) {
ss << "L" << num_spatial_layers << "T" << num_temporal_layers;
@@ -122,7 +118,7 @@
codec.height != codec.spatialLayers[num_spatial_layers - 1].height) {
RTC_LOG(LS_WARNING)
<< "Top layer resolution expected to match overall resolution";
- return absl::nullopt;
+ return nullptr;
}
// Check if the ratio is one of the supported.
int numerator;
@@ -140,7 +136,7 @@
RTC_LOG(LS_WARNING) << "Unsupported scalability ratio "
<< codec.spatialLayers[0].width << ":"
<< codec.spatialLayers[1].width;
- return absl::nullopt;
+ return nullptr;
}
// Validate ratio is consistent for all spatial layer transitions.
for (int sid = 1; sid < num_spatial_layers; ++sid) {
@@ -150,7 +146,7 @@
codec.spatialLayers[sid - 1].height * denominator) {
RTC_LOG(LS_WARNING) << "Inconsistent scalability ratio " << numerator
<< ":" << denominator;
- return absl::nullopt;
+ return nullptr;
}
}
}
@@ -159,7 +155,7 @@
ScalabilityModeFromString(name);
if (!scalability_mode.has_value()) {
RTC_LOG(LS_WARNING) << "Invalid scalability mode " << name;
- return absl::nullopt;
+ return nullptr;
}
auto scalability_structure_controller =
CreateScalabilityStructure(*scalability_mode);
@@ -168,8 +164,7 @@
} else {
RTC_LOG(LS_INFO) << "Created scalability structure " << name;
}
- return absl::make_optional<Vp9ScalabilityStructure>(
- std::move(scalability_structure_controller), *scalability_mode);
+ return scalability_structure_controller;
}
vpx_svc_ref_frame_config_t Vp9References(
@@ -602,14 +597,7 @@
num_temporal_layers_ = 1;
}
inter_layer_pred_ = inst->VP9().interLayerPred;
- auto vp9_scalability = CreateVp9ScalabilityStructure(*inst);
- if (vp9_scalability.has_value()) {
- std::tie(svc_controller_, scalability_mode_) =
- std::move(vp9_scalability.value());
- } else {
- svc_controller_ = nullptr;
- scalability_mode_ = absl::nullopt;
- }
+ svc_controller_ = CreateVp9ScalabilityStructure(*inst);
}
framerate_controller_ = std::vector<FramerateControllerDeprecated>(
@@ -1461,7 +1449,19 @@
}
}
}
- codec_specific->scalability_mode = scalability_mode_;
+ // If returned the configured scalability mode in standard mode, otherwise
+ // create one if it is based on layer activation.
+ if (scalability_mode_) {
+ codec_specific->scalability_mode = scalability_mode_;
+ } else {
+ codec_specific_.scalability_mode = MakeScalabilityMode(
+ num_active_spatial_layers_, num_temporal_layers_, inter_layer_pred_,
+ num_active_spatial_layers_ > 1
+ ? absl::make_optional(ScalabilityModeResolutionRatio::kTwoToOne)
+ : absl::nullopt,
+ /*shift=*/false);
+ }
+
return true;
}
diff --git a/modules/video_coding/svc/scalability_mode_util.cc b/modules/video_coding/svc/scalability_mode_util.cc
index 35d66df..c6b3b47 100644
--- a/modules/video_coding/svc/scalability_mode_util.cc
+++ b/modules/video_coding/svc/scalability_mode_util.cc
@@ -10,13 +10,115 @@
#include "modules/video_coding/svc/scalability_mode_util.h"
+#include <array>
+#include <utility>
+
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/video_codecs/scalability_mode.h"
+#include "api/video_codecs/video_codec.h"
#include "rtc_base/checks.h"
namespace webrtc {
+namespace {
+struct ScalabilityModeParameters {
+ int num_spatial_layers;
+ int num_temporal_layers;
+ InterLayerPredMode inter_layer_pred;
+ absl::optional<ScalabilityModeResolutionRatio> ratio;
+ bool shift;
+
+ constexpr bool operator==(const ScalabilityModeParameters& other) const {
+ // For all L1Tx modes, ignore inter_layer_pred, ratio and shift.
+ if (this->num_spatial_layers == 1) {
+ return this->num_spatial_layers == other.num_spatial_layers &&
+ this->num_temporal_layers == other.num_temporal_layers;
+ }
+ return this->num_spatial_layers == other.num_spatial_layers &&
+ this->num_temporal_layers == other.num_temporal_layers &&
+ this->inter_layer_pred == other.inter_layer_pred &&
+ this->ratio == other.ratio && this->shift == other.shift;
+ }
+};
+
+struct ScalabilityModeConfiguration {
+ explicit ScalabilityModeConfiguration(ScalabilityMode scalability_mode)
+ : scalability_mode(scalability_mode),
+ params{
+ .num_spatial_layers =
+ (ScalabilityModeToNumSpatialLayers(scalability_mode)),
+ .num_temporal_layers =
+ (ScalabilityModeToNumTemporalLayers(scalability_mode)),
+ .inter_layer_pred =
+ (ScalabilityModeToInterLayerPredMode(scalability_mode)),
+ .ratio = (ScalabilityModeToResolutionRatio(scalability_mode)),
+ .shift = (ScalabilityModeIsShiftMode(scalability_mode)),
+ } {}
+
+ const ScalabilityMode scalability_mode;
+ const ScalabilityModeParameters params;
+};
+
+constexpr size_t kNumScalabilityModes =
+ static_cast<size_t>(ScalabilityMode::kS3T3h) + 1;
+} // namespace
+
+absl::optional<ScalabilityMode> MakeScalabilityMode(
+ int num_spatial_layers,
+ int num_temporal_layers,
+ InterLayerPredMode inter_layer_pred,
+ absl::optional<ScalabilityModeResolutionRatio> ratio,
+ bool shift) {
+ ScalabilityModeParameters params{num_spatial_layers, num_temporal_layers,
+ inter_layer_pred, std::move(ratio), shift};
+
+ static const ScalabilityModeConfiguration kScalabilityModeConfigs[] = {
+ ScalabilityModeConfiguration{ScalabilityMode::kL1T1},
+ ScalabilityModeConfiguration{ScalabilityMode::kL1T2},
+ ScalabilityModeConfiguration{ScalabilityMode::kL1T3},
+ ScalabilityModeConfiguration{ScalabilityMode::kL2T1},
+ ScalabilityModeConfiguration{ScalabilityMode::kL2T1h},
+ ScalabilityModeConfiguration{ScalabilityMode::kL2T1_KEY},
+ ScalabilityModeConfiguration{ScalabilityMode::kL2T2},
+ ScalabilityModeConfiguration{ScalabilityMode::kL2T2h},
+ ScalabilityModeConfiguration{ScalabilityMode::kL2T2_KEY},
+ ScalabilityModeConfiguration{ScalabilityMode::kL2T2_KEY_SHIFT},
+ ScalabilityModeConfiguration{ScalabilityMode::kL2T3},
+ ScalabilityModeConfiguration{ScalabilityMode::kL2T3h},
+ ScalabilityModeConfiguration{ScalabilityMode::kL2T3_KEY},
+ ScalabilityModeConfiguration{ScalabilityMode::kL3T1},
+ ScalabilityModeConfiguration{ScalabilityMode::kL3T1h},
+ ScalabilityModeConfiguration{ScalabilityMode::kL3T1_KEY},
+ ScalabilityModeConfiguration{ScalabilityMode::kL3T2},
+ ScalabilityModeConfiguration{ScalabilityMode::kL3T2h},
+ ScalabilityModeConfiguration{ScalabilityMode::kL3T2_KEY},
+ ScalabilityModeConfiguration{ScalabilityMode::kL3T3},
+ ScalabilityModeConfiguration{ScalabilityMode::kL3T3h},
+ ScalabilityModeConfiguration{ScalabilityMode::kL3T3_KEY},
+ ScalabilityModeConfiguration{ScalabilityMode::kS2T1},
+ ScalabilityModeConfiguration{ScalabilityMode::kS2T1h},
+ ScalabilityModeConfiguration{ScalabilityMode::kS2T2},
+ ScalabilityModeConfiguration{ScalabilityMode::kS2T2h},
+ ScalabilityModeConfiguration{ScalabilityMode::kS2T3},
+ ScalabilityModeConfiguration{ScalabilityMode::kS2T3h},
+ ScalabilityModeConfiguration{ScalabilityMode::kS3T1},
+ ScalabilityModeConfiguration{ScalabilityMode::kS3T1h},
+ ScalabilityModeConfiguration{ScalabilityMode::kS3T2},
+ ScalabilityModeConfiguration{ScalabilityMode::kS3T2h},
+ ScalabilityModeConfiguration{ScalabilityMode::kS3T3},
+ ScalabilityModeConfiguration{ScalabilityMode::kS3T3h},
+ };
+ static_assert(std::size(kScalabilityModeConfigs) == kNumScalabilityModes);
+
+ for (const auto& candidate_mode : kScalabilityModeConfigs) {
+ if (candidate_mode.params == params) {
+ return candidate_mode.scalability_mode;
+ }
+ }
+ return absl::nullopt;
+}
+
absl::optional<ScalabilityMode> ScalabilityModeFromString(
absl::string_view mode_string) {
if (mode_string == "L1T1")
@@ -387,4 +489,8 @@
RTC_CHECK_NOTREACHED();
}
+bool ScalabilityModeIsShiftMode(ScalabilityMode scalability_mode) {
+ return scalability_mode == ScalabilityMode::kL2T2_KEY_SHIFT;
+}
+
} // namespace webrtc
diff --git a/modules/video_coding/svc/scalability_mode_util.h b/modules/video_coding/svc/scalability_mode_util.h
index 9c8193e..1c8a513 100644
--- a/modules/video_coding/svc/scalability_mode_util.h
+++ b/modules/video_coding/svc/scalability_mode_util.h
@@ -25,6 +25,13 @@
static constexpr char kDefaultScalabilityModeStr[] = "L1T2";
+absl::optional<ScalabilityMode> MakeScalabilityMode(
+ int num_spatial_layers,
+ int num_temporal_layers,
+ InterLayerPredMode inter_layer_pred,
+ absl::optional<ScalabilityModeResolutionRatio> ratio,
+ bool shift);
+
absl::optional<ScalabilityMode> ScalabilityModeFromString(
absl::string_view scalability_mode_string);
@@ -38,6 +45,8 @@
absl::optional<ScalabilityModeResolutionRatio> ScalabilityModeToResolutionRatio(
ScalabilityMode scalability_mode);
+bool ScalabilityModeIsShiftMode(ScalabilityMode scalability_mode);
+
ScalabilityMode LimitNumSpatialLayers(ScalabilityMode scalability_mode,
int max_spatial_layers);
diff --git a/modules/video_coding/svc/scalability_mode_util_unittest.cc b/modules/video_coding/svc/scalability_mode_util_unittest.cc
index 448494f..b023a12 100644
--- a/modules/video_coding/svc/scalability_mode_util_unittest.cc
+++ b/modules/video_coding/svc/scalability_mode_util_unittest.cc
@@ -17,9 +17,11 @@
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/video_codecs/scalability_mode.h"
+#include "test/gmock.h"
#include "test/gtest.h"
namespace webrtc {
+
namespace {
TEST(ScalabilityModeUtil, ConvertsL1T2) {
@@ -32,6 +34,26 @@
EXPECT_EQ(ScalabilityModeFromString("not-a-mode"), absl::nullopt);
}
+TEST(ScalabilityModeUtil, MakeScalabilityModeRoundTrip) {
+ const ScalabilityMode kLastEnum = ScalabilityMode::kS3T3h;
+ for (int numerical_enum = 0; numerical_enum <= static_cast<int>(kLastEnum);
+ numerical_enum++) {
+ ScalabilityMode scalability_mode =
+ static_cast<ScalabilityMode>(numerical_enum);
+ absl::optional<ScalabilityMode> created_mode = MakeScalabilityMode(
+ ScalabilityModeToNumSpatialLayers(scalability_mode),
+ ScalabilityModeToNumTemporalLayers(scalability_mode),
+ ScalabilityModeToInterLayerPredMode(scalability_mode),
+ ScalabilityModeToResolutionRatio(scalability_mode),
+ ScalabilityModeIsShiftMode(scalability_mode));
+ EXPECT_THAT(created_mode, ::testing::Optional(scalability_mode))
+ << "Expected "
+ << (created_mode.has_value() ? ScalabilityModeToString(*created_mode)
+ : "(nullopt)")
+ << " to equal " << ScalabilityModeToString(scalability_mode);
+ }
+}
+
// Check roundtrip conversion of all enum values.
TEST(ScalabilityModeUtil, ConvertsAllToAndFromString) {
const ScalabilityMode kLastEnum = ScalabilityMode::kS3T3h;
diff --git a/pc/peer_connection_encodings_integrationtest.cc b/pc/peer_connection_encodings_integrationtest.cc
index d6c4499..fc055b8 100644
--- a/pc/peer_connection_encodings_integrationtest.cc
+++ b/pc/peer_connection_encodings_integrationtest.cc
@@ -25,6 +25,7 @@
#include "api/rtp_transceiver_interface.h"
#include "api/stats/rtcstats_objects.h"
#include "api/units/data_rate.h"
+#include "api/video_codecs/scalability_mode.h"
#include "api/video_codecs/video_decoder_factory_template.h"
#include "api/video_codecs/video_decoder_factory_template_dav1d_adapter.h"
#include "api/video_codecs/video_decoder_factory_template_libvpx_vp8_adapter.h"
@@ -310,6 +311,14 @@
auto* outbound_rtp = FindOutboundRtpByRid(outbound_rtps, rid);
if (!outbound_rtp || !outbound_rtp->scalability_mode.has_value() ||
*outbound_rtp->scalability_mode != expected_scalability_mode) {
+ RTC_LOG(LS_INFO) << "Waiting for scalability mode ("
+ << (outbound_rtp
+ ? outbound_rtp->scalability_mode.value_or(
+ "nullopt")
+ : "not found")
+ << ") to be " << expected_scalability_mode;
+ // Sleep to avoid log spam when this is used in ASSERT_TRUE_WAIT().
+ rtc::Thread::Current()->SleepMs(1000);
return false;
}
if (outbound_rtp->frame_height.has_value()) {
@@ -354,9 +363,8 @@
RTC_LOG(LS_ERROR) << "rid=" << resolution.rid << " is "
<< *outbound_rtp->frame_width << "x"
<< *outbound_rtp->frame_height
- << ", this is greater than the "
- << "expected " << resolution.width << "x"
- << resolution.height;
+ << ", this is greater than the " << "expected "
+ << resolution.width << "x" << resolution.height;
return false;
}
}
@@ -832,6 +840,39 @@
EXPECT_FALSE(parameters.encodings[2].scalability_mode.has_value());
}
+TEST_F(PeerConnectionEncodingsIntegrationTest, VP9_OneLayerActive_LegacySvc) {
+ rtc::scoped_refptr<PeerConnectionTestWrapper> local_pc_wrapper = CreatePc();
+ rtc::scoped_refptr<PeerConnectionTestWrapper> remote_pc_wrapper = CreatePc();
+ ExchangeIceCandidates(local_pc_wrapper, remote_pc_wrapper);
+
+ std::vector<cricket::SimulcastLayer> layers =
+ CreateLayers({"f", "h", "q"}, /*active=*/true);
+ rtc::scoped_refptr<RtpTransceiverInterface> transceiver =
+ AddTransceiverWithSimulcastLayers(local_pc_wrapper, remote_pc_wrapper,
+ layers);
+ std::vector<RtpCodecCapability> codecs =
+ GetCapabilitiesAndRestrictToCodec(remote_pc_wrapper, "VP9");
+ transceiver->SetCodecPreferences(codecs);
+
+ // Sending L1T3 with legacy SVC mode means setting 1 layer active.
+ rtc::scoped_refptr<RtpSenderInterface> sender = transceiver->sender();
+ RtpParameters parameters = sender->GetParameters();
+ ASSERT_THAT(parameters.encodings, SizeIs(3));
+ parameters.encodings[0].active = true;
+ parameters.encodings[1].active = false;
+ parameters.encodings[2].active = false;
+ sender->SetParameters(parameters);
+
+ NegotiateWithSimulcastTweaks(local_pc_wrapper, remote_pc_wrapper);
+ local_pc_wrapper->WaitForConnection();
+ remote_pc_wrapper->WaitForConnection();
+
+ // Ensure that we are getting 180P at L1T3 from the "f" rid.
+ ASSERT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode(
+ local_pc_wrapper, "f", "L1T3", 720 / 4),
+ kLongTimeoutForRampingUp.ms());
+}
+
TEST_F(PeerConnectionEncodingsIntegrationTest,
VP9_AllLayersInactive_LegacySvc) {
rtc::scoped_refptr<PeerConnectionTestWrapper> local_pc_wrapper = CreatePc();