Report proper VP9 scalability mode with layer activation.

This changes the libvpx VP9 encoder to generate the scalability mode based on the current encoding parameters when using layer activation.

Tested: Ran with L3T3_KEY reduced to L2T3_KEY and L1T3 due to bandwidth or layer activation. Added unit tests.
Bug: webrtc:15892
Change-Id: Iaedca4ea5fc3a692996666ceaf0d6aa03fb058a1
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/344760
Commit-Queue: Evan Shrubsole <eshr@google.com>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#42007}
diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
index 055ec67..d7a59ee 100644
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
@@ -10,11 +10,11 @@
  */
 
 #include <memory>
+
 #ifdef RTC_ENABLE_VP9
 
 #include <algorithm>
 #include <limits>
-#include <tuple>
 #include <utility>
 #include <vector>
 
@@ -87,17 +87,13 @@
   return {0, 0};
 }
 
-using Vp9ScalabilityStructure =
-    std::tuple<std::unique_ptr<ScalableVideoController>, ScalabilityMode>;
-absl::optional<Vp9ScalabilityStructure> CreateVp9ScalabilityStructure(
+std::unique_ptr<ScalableVideoController> CreateVp9ScalabilityStructure(
     const VideoCodec& codec) {
   int num_spatial_layers = codec.VP9().numberOfSpatialLayers;
   int num_temporal_layers =
       std::max(1, int{codec.VP9().numberOfTemporalLayers});
   if (num_spatial_layers == 1 && num_temporal_layers == 1) {
-    return absl::make_optional<Vp9ScalabilityStructure>(
-        std::make_unique<ScalableVideoControllerNoLayering>(),
-        ScalabilityMode::kL1T1);
+    return std::make_unique<ScalableVideoControllerNoLayering>();
   }
 
   char name[20];
@@ -105,7 +101,7 @@
   if (codec.mode == VideoCodecMode::kScreensharing) {
     // TODO(bugs.webrtc.org/11999): Compose names of the structures when they
     // are implemented.
-    return absl::nullopt;
+    return nullptr;
   } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOn ||
              num_spatial_layers == 1) {
     ss << "L" << num_spatial_layers << "T" << num_temporal_layers;
@@ -122,7 +118,7 @@
         codec.height != codec.spatialLayers[num_spatial_layers - 1].height) {
       RTC_LOG(LS_WARNING)
           << "Top layer resolution expected to match overall resolution";
-      return absl::nullopt;
+      return nullptr;
     }
     // Check if the ratio is one of the supported.
     int numerator;
@@ -140,7 +136,7 @@
       RTC_LOG(LS_WARNING) << "Unsupported scalability ratio "
                           << codec.spatialLayers[0].width << ":"
                           << codec.spatialLayers[1].width;
-      return absl::nullopt;
+      return nullptr;
     }
     // Validate ratio is consistent for all spatial layer transitions.
     for (int sid = 1; sid < num_spatial_layers; ++sid) {
@@ -150,7 +146,7 @@
               codec.spatialLayers[sid - 1].height * denominator) {
         RTC_LOG(LS_WARNING) << "Inconsistent scalability ratio " << numerator
                             << ":" << denominator;
-        return absl::nullopt;
+        return nullptr;
       }
     }
   }
@@ -159,7 +155,7 @@
       ScalabilityModeFromString(name);
   if (!scalability_mode.has_value()) {
     RTC_LOG(LS_WARNING) << "Invalid scalability mode " << name;
-    return absl::nullopt;
+    return nullptr;
   }
   auto scalability_structure_controller =
       CreateScalabilityStructure(*scalability_mode);
@@ -168,8 +164,7 @@
   } else {
     RTC_LOG(LS_INFO) << "Created scalability structure " << name;
   }
-  return absl::make_optional<Vp9ScalabilityStructure>(
-      std::move(scalability_structure_controller), *scalability_mode);
+  return scalability_structure_controller;
 }
 
 vpx_svc_ref_frame_config_t Vp9References(
@@ -602,14 +597,7 @@
       num_temporal_layers_ = 1;
     }
     inter_layer_pred_ = inst->VP9().interLayerPred;
-    auto vp9_scalability = CreateVp9ScalabilityStructure(*inst);
-    if (vp9_scalability.has_value()) {
-      std::tie(svc_controller_, scalability_mode_) =
-          std::move(vp9_scalability.value());
-    } else {
-      svc_controller_ = nullptr;
-      scalability_mode_ = absl::nullopt;
-    }
+    svc_controller_ = CreateVp9ScalabilityStructure(*inst);
   }
 
   framerate_controller_ = std::vector<FramerateControllerDeprecated>(
@@ -1461,7 +1449,19 @@
       }
     }
   }
-  codec_specific->scalability_mode = scalability_mode_;
+  // If returned the configured scalability mode in standard mode, otherwise
+  // create one if it is based on layer activation.
+  if (scalability_mode_) {
+    codec_specific->scalability_mode = scalability_mode_;
+  } else {
+    codec_specific_.scalability_mode = MakeScalabilityMode(
+        num_active_spatial_layers_, num_temporal_layers_, inter_layer_pred_,
+        num_active_spatial_layers_ > 1
+            ? absl::make_optional(ScalabilityModeResolutionRatio::kTwoToOne)
+            : absl::nullopt,
+        /*shift=*/false);
+  }
+
   return true;
 }
 
diff --git a/modules/video_coding/svc/scalability_mode_util.cc b/modules/video_coding/svc/scalability_mode_util.cc
index 35d66df..c6b3b47 100644
--- a/modules/video_coding/svc/scalability_mode_util.cc
+++ b/modules/video_coding/svc/scalability_mode_util.cc
@@ -10,13 +10,115 @@
 
 #include "modules/video_coding/svc/scalability_mode_util.h"
 
+#include <array>
+#include <utility>
+
 #include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
 #include "api/video_codecs/scalability_mode.h"
+#include "api/video_codecs/video_codec.h"
 #include "rtc_base/checks.h"
 
 namespace webrtc {
 
+namespace {
+struct ScalabilityModeParameters {
+  int num_spatial_layers;
+  int num_temporal_layers;
+  InterLayerPredMode inter_layer_pred;
+  absl::optional<ScalabilityModeResolutionRatio> ratio;
+  bool shift;
+
+  constexpr bool operator==(const ScalabilityModeParameters& other) const {
+    // For all L1Tx modes, ignore inter_layer_pred, ratio and shift.
+    if (this->num_spatial_layers == 1) {
+      return this->num_spatial_layers == other.num_spatial_layers &&
+             this->num_temporal_layers == other.num_temporal_layers;
+    }
+    return this->num_spatial_layers == other.num_spatial_layers &&
+           this->num_temporal_layers == other.num_temporal_layers &&
+           this->inter_layer_pred == other.inter_layer_pred &&
+           this->ratio == other.ratio && this->shift == other.shift;
+  }
+};
+
+struct ScalabilityModeConfiguration {
+  explicit ScalabilityModeConfiguration(ScalabilityMode scalability_mode)
+      : scalability_mode(scalability_mode),
+        params{
+            .num_spatial_layers =
+                (ScalabilityModeToNumSpatialLayers(scalability_mode)),
+            .num_temporal_layers =
+                (ScalabilityModeToNumTemporalLayers(scalability_mode)),
+            .inter_layer_pred =
+                (ScalabilityModeToInterLayerPredMode(scalability_mode)),
+            .ratio = (ScalabilityModeToResolutionRatio(scalability_mode)),
+            .shift = (ScalabilityModeIsShiftMode(scalability_mode)),
+        } {}
+
+  const ScalabilityMode scalability_mode;
+  const ScalabilityModeParameters params;
+};
+
+constexpr size_t kNumScalabilityModes =
+    static_cast<size_t>(ScalabilityMode::kS3T3h) + 1;
+}  // namespace
+
+absl::optional<ScalabilityMode> MakeScalabilityMode(
+    int num_spatial_layers,
+    int num_temporal_layers,
+    InterLayerPredMode inter_layer_pred,
+    absl::optional<ScalabilityModeResolutionRatio> ratio,
+    bool shift) {
+  ScalabilityModeParameters params{num_spatial_layers, num_temporal_layers,
+                                   inter_layer_pred, std::move(ratio), shift};
+
+  static const ScalabilityModeConfiguration kScalabilityModeConfigs[] = {
+      ScalabilityModeConfiguration{ScalabilityMode::kL1T1},
+      ScalabilityModeConfiguration{ScalabilityMode::kL1T2},
+      ScalabilityModeConfiguration{ScalabilityMode::kL1T3},
+      ScalabilityModeConfiguration{ScalabilityMode::kL2T1},
+      ScalabilityModeConfiguration{ScalabilityMode::kL2T1h},
+      ScalabilityModeConfiguration{ScalabilityMode::kL2T1_KEY},
+      ScalabilityModeConfiguration{ScalabilityMode::kL2T2},
+      ScalabilityModeConfiguration{ScalabilityMode::kL2T2h},
+      ScalabilityModeConfiguration{ScalabilityMode::kL2T2_KEY},
+      ScalabilityModeConfiguration{ScalabilityMode::kL2T2_KEY_SHIFT},
+      ScalabilityModeConfiguration{ScalabilityMode::kL2T3},
+      ScalabilityModeConfiguration{ScalabilityMode::kL2T3h},
+      ScalabilityModeConfiguration{ScalabilityMode::kL2T3_KEY},
+      ScalabilityModeConfiguration{ScalabilityMode::kL3T1},
+      ScalabilityModeConfiguration{ScalabilityMode::kL3T1h},
+      ScalabilityModeConfiguration{ScalabilityMode::kL3T1_KEY},
+      ScalabilityModeConfiguration{ScalabilityMode::kL3T2},
+      ScalabilityModeConfiguration{ScalabilityMode::kL3T2h},
+      ScalabilityModeConfiguration{ScalabilityMode::kL3T2_KEY},
+      ScalabilityModeConfiguration{ScalabilityMode::kL3T3},
+      ScalabilityModeConfiguration{ScalabilityMode::kL3T3h},
+      ScalabilityModeConfiguration{ScalabilityMode::kL3T3_KEY},
+      ScalabilityModeConfiguration{ScalabilityMode::kS2T1},
+      ScalabilityModeConfiguration{ScalabilityMode::kS2T1h},
+      ScalabilityModeConfiguration{ScalabilityMode::kS2T2},
+      ScalabilityModeConfiguration{ScalabilityMode::kS2T2h},
+      ScalabilityModeConfiguration{ScalabilityMode::kS2T3},
+      ScalabilityModeConfiguration{ScalabilityMode::kS2T3h},
+      ScalabilityModeConfiguration{ScalabilityMode::kS3T1},
+      ScalabilityModeConfiguration{ScalabilityMode::kS3T1h},
+      ScalabilityModeConfiguration{ScalabilityMode::kS3T2},
+      ScalabilityModeConfiguration{ScalabilityMode::kS3T2h},
+      ScalabilityModeConfiguration{ScalabilityMode::kS3T3},
+      ScalabilityModeConfiguration{ScalabilityMode::kS3T3h},
+  };
+  static_assert(std::size(kScalabilityModeConfigs) == kNumScalabilityModes);
+
+  for (const auto& candidate_mode : kScalabilityModeConfigs) {
+    if (candidate_mode.params == params) {
+      return candidate_mode.scalability_mode;
+    }
+  }
+  return absl::nullopt;
+}
+
 absl::optional<ScalabilityMode> ScalabilityModeFromString(
     absl::string_view mode_string) {
   if (mode_string == "L1T1")
@@ -387,4 +489,8 @@
   RTC_CHECK_NOTREACHED();
 }
 
+bool ScalabilityModeIsShiftMode(ScalabilityMode scalability_mode) {
+  return scalability_mode == ScalabilityMode::kL2T2_KEY_SHIFT;
+}
+
 }  // namespace webrtc
diff --git a/modules/video_coding/svc/scalability_mode_util.h b/modules/video_coding/svc/scalability_mode_util.h
index 9c8193e..1c8a513 100644
--- a/modules/video_coding/svc/scalability_mode_util.h
+++ b/modules/video_coding/svc/scalability_mode_util.h
@@ -25,6 +25,13 @@
 
 static constexpr char kDefaultScalabilityModeStr[] = "L1T2";
 
+absl::optional<ScalabilityMode> MakeScalabilityMode(
+    int num_spatial_layers,
+    int num_temporal_layers,
+    InterLayerPredMode inter_layer_pred,
+    absl::optional<ScalabilityModeResolutionRatio> ratio,
+    bool shift);
+
 absl::optional<ScalabilityMode> ScalabilityModeFromString(
     absl::string_view scalability_mode_string);
 
@@ -38,6 +45,8 @@
 absl::optional<ScalabilityModeResolutionRatio> ScalabilityModeToResolutionRatio(
     ScalabilityMode scalability_mode);
 
+bool ScalabilityModeIsShiftMode(ScalabilityMode scalability_mode);
+
 ScalabilityMode LimitNumSpatialLayers(ScalabilityMode scalability_mode,
                                       int max_spatial_layers);
 
diff --git a/modules/video_coding/svc/scalability_mode_util_unittest.cc b/modules/video_coding/svc/scalability_mode_util_unittest.cc
index 448494f..b023a12 100644
--- a/modules/video_coding/svc/scalability_mode_util_unittest.cc
+++ b/modules/video_coding/svc/scalability_mode_util_unittest.cc
@@ -17,9 +17,11 @@
 #include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
 #include "api/video_codecs/scalability_mode.h"
+#include "test/gmock.h"
 #include "test/gtest.h"
 
 namespace webrtc {
+
 namespace {
 
 TEST(ScalabilityModeUtil, ConvertsL1T2) {
@@ -32,6 +34,26 @@
   EXPECT_EQ(ScalabilityModeFromString("not-a-mode"), absl::nullopt);
 }
 
+TEST(ScalabilityModeUtil, MakeScalabilityModeRoundTrip) {
+  const ScalabilityMode kLastEnum = ScalabilityMode::kS3T3h;
+  for (int numerical_enum = 0; numerical_enum <= static_cast<int>(kLastEnum);
+       numerical_enum++) {
+    ScalabilityMode scalability_mode =
+        static_cast<ScalabilityMode>(numerical_enum);
+    absl::optional<ScalabilityMode> created_mode = MakeScalabilityMode(
+        ScalabilityModeToNumSpatialLayers(scalability_mode),
+        ScalabilityModeToNumTemporalLayers(scalability_mode),
+        ScalabilityModeToInterLayerPredMode(scalability_mode),
+        ScalabilityModeToResolutionRatio(scalability_mode),
+        ScalabilityModeIsShiftMode(scalability_mode));
+    EXPECT_THAT(created_mode, ::testing::Optional(scalability_mode))
+        << "Expected "
+        << (created_mode.has_value() ? ScalabilityModeToString(*created_mode)
+                                     : "(nullopt)")
+        << " to equal " << ScalabilityModeToString(scalability_mode);
+  }
+}
+
 // Check roundtrip conversion of all enum values.
 TEST(ScalabilityModeUtil, ConvertsAllToAndFromString) {
   const ScalabilityMode kLastEnum = ScalabilityMode::kS3T3h;
diff --git a/pc/peer_connection_encodings_integrationtest.cc b/pc/peer_connection_encodings_integrationtest.cc
index d6c4499..fc055b8 100644
--- a/pc/peer_connection_encodings_integrationtest.cc
+++ b/pc/peer_connection_encodings_integrationtest.cc
@@ -25,6 +25,7 @@
 #include "api/rtp_transceiver_interface.h"
 #include "api/stats/rtcstats_objects.h"
 #include "api/units/data_rate.h"
+#include "api/video_codecs/scalability_mode.h"
 #include "api/video_codecs/video_decoder_factory_template.h"
 #include "api/video_codecs/video_decoder_factory_template_dav1d_adapter.h"
 #include "api/video_codecs/video_decoder_factory_template_libvpx_vp8_adapter.h"
@@ -310,6 +311,14 @@
     auto* outbound_rtp = FindOutboundRtpByRid(outbound_rtps, rid);
     if (!outbound_rtp || !outbound_rtp->scalability_mode.has_value() ||
         *outbound_rtp->scalability_mode != expected_scalability_mode) {
+      RTC_LOG(LS_INFO) << "Waiting for scalability mode ("
+                       << (outbound_rtp
+                               ? outbound_rtp->scalability_mode.value_or(
+                                     "nullopt")
+                               : "not found")
+                       << ") to be " << expected_scalability_mode;
+      // Sleep to avoid log spam when this is used in ASSERT_TRUE_WAIT().
+      rtc::Thread::Current()->SleepMs(1000);
       return false;
     }
     if (outbound_rtp->frame_height.has_value()) {
@@ -354,9 +363,8 @@
         RTC_LOG(LS_ERROR) << "rid=" << resolution.rid << " is "
                           << *outbound_rtp->frame_width << "x"
                           << *outbound_rtp->frame_height
-                          << ", this is greater than the "
-                          << "expected " << resolution.width << "x"
-                          << resolution.height;
+                          << ", this is greater than the " << "expected "
+                          << resolution.width << "x" << resolution.height;
         return false;
       }
     }
@@ -832,6 +840,39 @@
   EXPECT_FALSE(parameters.encodings[2].scalability_mode.has_value());
 }
 
+TEST_F(PeerConnectionEncodingsIntegrationTest, VP9_OneLayerActive_LegacySvc) {
+  rtc::scoped_refptr<PeerConnectionTestWrapper> local_pc_wrapper = CreatePc();
+  rtc::scoped_refptr<PeerConnectionTestWrapper> remote_pc_wrapper = CreatePc();
+  ExchangeIceCandidates(local_pc_wrapper, remote_pc_wrapper);
+
+  std::vector<cricket::SimulcastLayer> layers =
+      CreateLayers({"f", "h", "q"}, /*active=*/true);
+  rtc::scoped_refptr<RtpTransceiverInterface> transceiver =
+      AddTransceiverWithSimulcastLayers(local_pc_wrapper, remote_pc_wrapper,
+                                        layers);
+  std::vector<RtpCodecCapability> codecs =
+      GetCapabilitiesAndRestrictToCodec(remote_pc_wrapper, "VP9");
+  transceiver->SetCodecPreferences(codecs);
+
+  // Sending L1T3 with legacy SVC mode means setting 1 layer active.
+  rtc::scoped_refptr<RtpSenderInterface> sender = transceiver->sender();
+  RtpParameters parameters = sender->GetParameters();
+  ASSERT_THAT(parameters.encodings, SizeIs(3));
+  parameters.encodings[0].active = true;
+  parameters.encodings[1].active = false;
+  parameters.encodings[2].active = false;
+  sender->SetParameters(parameters);
+
+  NegotiateWithSimulcastTweaks(local_pc_wrapper, remote_pc_wrapper);
+  local_pc_wrapper->WaitForConnection();
+  remote_pc_wrapper->WaitForConnection();
+
+  // Ensure that we are getting 180P at L1T3 from the "f" rid.
+  ASSERT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode(
+                       local_pc_wrapper, "f", "L1T3", 720 / 4),
+                   kLongTimeoutForRampingUp.ms());
+}
+
 TEST_F(PeerConnectionEncodingsIntegrationTest,
        VP9_AllLayersInactive_LegacySvc) {
   rtc::scoped_refptr<PeerConnectionTestWrapper> local_pc_wrapper = CreatePc();