Add the dependency descriptor for H.264 temporal scalability

And validate it using svc_e2e_tests.

Bug: webrtc:13961
Change-Id: Ie7edcf5a0684f46e4d26155b77cebbebbd46d21f
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/269541
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Commit-Queue: Daniel.L (Byoungchan) Lee <daniel.l@hpcnt.com>
Reviewed-by: Florent Castelli <orphis@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38153}
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index 0a3af1b..fd5f43e 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -524,6 +524,8 @@
     "../../rtc_base/system:rtc_export",
     "../../system_wrappers:field_trial",
     "../../system_wrappers:metrics",
+    "svc:scalability_structures",
+    "svc:scalable_video_controller",
     "//third_party/libyuv",
   ]
   absl_deps = [
diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.cc b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
index f6d52c6..fc3fd19 100644
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
@@ -22,6 +22,7 @@
 
 #include "absl/strings/match.h"
 #include "common_video/libyuv/include/webrtc_libyuv.h"
+#include "modules/video_coding/svc/create_scalability_structure.h"
 #include "modules/video_coding/utility/simulcast_rate_allocator.h"
 #include "modules/video_coding/utility/simulcast_utility.h"
 #include "rtc_base/checks.h"
@@ -157,6 +158,7 @@
   encoders_.reserve(kMaxSimulcastStreams);
   configurations_.reserve(kMaxSimulcastStreams);
   tl0sync_limit_.reserve(kMaxSimulcastStreams);
+  svc_controllers_.reserve(kMaxSimulcastStreams);
 }
 
 H264EncoderImpl::~H264EncoderImpl() {
@@ -196,6 +198,7 @@
   encoded_images_.resize(number_of_streams);
   encoders_.resize(number_of_streams);
   pictures_.resize(number_of_streams);
+  svc_controllers_.resize(number_of_streams);
   configurations_.resize(number_of_streams);
   tl0sync_limit_.resize(number_of_streams);
 
@@ -281,6 +284,32 @@
     encoded_images_[i].set_size(0);
 
     tl0sync_limit_[i] = configurations_[i].num_temporal_layers;
+    absl::optional<ScalabilityMode> scalability_mode;
+    switch (configurations_[i].num_temporal_layers) {
+      case 0:
+        break;
+      case 1:
+        scalability_mode = ScalabilityMode::kL1T1;
+        break;
+      case 2:
+        scalability_mode = ScalabilityMode::kL1T2;
+        break;
+      case 3:
+        scalability_mode = ScalabilityMode::kL1T3;
+        break;
+      default:
+        RTC_DCHECK_NOTREACHED();
+    }
+    if (scalability_mode.has_value()) {
+      svc_controllers_[i] =
+          CreateScalabilityStructure(scalability_mode.value());
+      if (svc_controllers_[i] == nullptr) {
+        RTC_LOG(LS_ERROR) << "Failed to create scalability structure";
+        Release();
+        ReportError();
+        return WEBRTC_VIDEO_CODEC_ERROR;
+      }
+    }
   }
 
   SimulcastRateAllocator init_allocator(codec_);
@@ -305,6 +334,7 @@
   encoded_images_.clear();
   pictures_.clear();
   tl0sync_limit_.clear();
+  svc_controllers_.clear();
   return WEBRTC_VIDEO_CODEC_OK;
 }
 
@@ -467,6 +497,12 @@
     SFrameBSInfo info;
     memset(&info, 0, sizeof(SFrameBSInfo));
 
+    std::vector<ScalableVideoController::LayerFrameConfig> layer_frames;
+    if (svc_controllers_[i]) {
+      layer_frames = svc_controllers_[i]->NextFrameConfig(send_key_frame);
+      RTC_CHECK_EQ(layer_frames.size(), 1);
+    }
+
     // Encode!
     int enc_ret = encoders_[i]->EncodeFrame(&pictures_[i], &info);
     if (enc_ret != 0) {
@@ -510,6 +546,15 @@
         codec_specific.codecSpecific.H264.temporal_idx = tid;
         codec_specific.codecSpecific.H264.base_layer_sync =
             tid > 0 && tid < tl0sync_limit_[i];
+        if (svc_controllers_[i]) {
+          if (layer_frames[0].TemporalId() != tid) {
+            RTC_LOG(LS_WARNING)
+                << "Encoder produced a frame for layer S" << (i + 1) << "T"
+                << tid + 1 << " that wasn't requested.";
+            continue;
+          }
+          encoded_images_[i].SetTemporalIndex(tid);
+        }
         if (codec_specific.codecSpecific.H264.base_layer_sync) {
           tl0sync_limit_[i] = tid;
         }
@@ -517,6 +562,14 @@
           tl0sync_limit_[i] = configurations_[i].num_temporal_layers;
         }
       }
+      if (svc_controllers_[i]) {
+        codec_specific.generic_frame_info =
+            svc_controllers_[i]->OnEncodeDone(layer_frames[0]);
+        if (send_key_frame && codec_specific.generic_frame_info.has_value()) {
+          codec_specific.template_structure =
+              svc_controllers_[i]->DependencyStructure();
+        }
+      }
       encoded_image_callback_->OnEncodedImage(encoded_images_[i],
                                               &codec_specific);
     }
diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.h b/modules/video_coding/codecs/h264/h264_encoder_impl.h
index 1163464..780781b 100644
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.h
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.h
@@ -28,6 +28,7 @@
 #include "api/video_codecs/video_encoder.h"
 #include "common_video/h264/h264_bitstream_parser.h"
 #include "modules/video_coding/codecs/h264/include/h264.h"
+#include "modules/video_coding/svc/scalable_video_controller.h"
 #include "modules/video_coding/utility/quality_scaler.h"
 #include "third_party/openh264/src/codec/api/svc/codec_app_def.h"
 
@@ -97,6 +98,7 @@
   std::vector<rtc::scoped_refptr<I420Buffer>> downscaled_buffers_;
   std::vector<LayerConfig> configurations_;
   std::vector<EncodedImage> encoded_images_;
+  std::vector<std::unique_ptr<ScalableVideoController>> svc_controllers_;
 
   VideoCodec codec_;
   H264PacketizationMode packetization_mode_;
diff --git a/pc/test/svc_e2e_tests.cc b/pc/test/svc_e2e_tests.cc
index 4e15c6c..8482c77 100644
--- a/pc/test/svc_e2e_tests.cc
+++ b/pc/test/svc_e2e_tests.cc
@@ -335,11 +335,12 @@
     SvcTest,
     Combine(ValuesIn({
                 SvcTestParameters::Create(kH264CodecName, "L1T1"),
-                // SSvcTestParameters::Create(kH264CodecName, "L1T2"),
-                // SSvcTestParameters::Create(kH264CodecName, "L1T3"),
+                SvcTestParameters::Create(kH264CodecName, "L1T2"),
+                SvcTestParameters::Create(kH264CodecName, "L1T3"),
             }),
-            Values(UseDependencyDescriptor::Disabled,
-                   UseDependencyDescriptor::Enabled)),
+            // Like AV1, H.264 RTP format does not include SVC related
+            // information, so always use Dependency Descriptor.
+            Values(UseDependencyDescriptor::Enabled)),
     SvcTestNameGenerator);
 #endif