Enable H.264 temporal scalability in simulcast.

Bug: webrtc:10651
Change-Id: I58372186930ce33e925f85edb0f308657dbfe273
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/142840
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Stefan Holmer <stefan@webrtc.org>
Commit-Queue: Sergey Silkin <ssilkin@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28381}
diff --git a/media/engine/simulcast_encoder_adapter.cc b/media/engine/simulcast_encoder_adapter.cc
index 6d8bedb..596c975 100644
--- a/media/engine/simulcast_encoder_adapter.cc
+++ b/media/engine/simulcast_encoder_adapter.cc
@@ -585,6 +585,9 @@
       // Turn off denoising for all streams but the highest resolution.
       stream_codec->VP8()->denoisingOn = false;
     }
+  } else if (inst.codecType == webrtc::kVideoCodecH264) {
+    stream_codec->H264()->numberOfTemporalLayers =
+        inst.simulcastStream[stream_index].numberOfTemporalLayers;
   }
   // TODO(ronghuawu): what to do with targetBitrate.
 
diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.cc b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
index 13565bd..4334dc3 100644
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
@@ -155,9 +155,7 @@
       number_of_cores_(0),
       encoded_image_callback_(nullptr),
       has_reported_init_(false),
-      has_reported_error_(false),
-      num_temporal_layers_(1),
-      tl0sync_limit_(0) {
+      has_reported_error_(false) {
   RTC_CHECK(absl::EqualsIgnoreCase(codec.name, cricket::kH264CodecName));
   std::string packetization_mode_string;
   if (codec.GetParam(cricket::kH264FmtpPacketizationMode,
@@ -169,6 +167,7 @@
   encoded_images_.reserve(kMaxSimulcastStreams);
   encoders_.reserve(kMaxSimulcastStreams);
   configurations_.reserve(kMaxSimulcastStreams);
+  tl0sync_limit_.reserve(kMaxSimulcastStreams);
 }
 
 H264EncoderImpl::~H264EncoderImpl() {
@@ -209,6 +208,7 @@
   encoders_.resize(number_of_streams);
   pictures_.resize(number_of_streams);
   configurations_.resize(number_of_streams);
+  tl0sync_limit_.resize(number_of_streams);
 
   number_of_cores_ = settings.number_of_cores;
   max_payload_size_ = settings.max_payload_size;
@@ -221,8 +221,6 @@
     codec_.simulcastStream[0].height = codec_.height;
   }
 
-  num_temporal_layers_ = codec_.H264()->numberOfTemporalLayers;
-
   for (int i = 0, idx = number_of_streams - 1; i < number_of_streams;
        ++i, --idx) {
     ISVCEncoder* openh264_encoder;
@@ -253,6 +251,8 @@
     configurations_[i].max_frame_rate = static_cast<float>(codec_.maxFramerate);
     configurations_[i].frame_dropping_on = codec_.H264()->frameDroppingOn;
     configurations_[i].key_frame_interval = codec_.H264()->keyFrameInterval;
+    configurations_[i].num_temporal_layers =
+        codec_.simulcastStream[idx].numberOfTemporalLayers;
 
     // Create downscaled image buffers.
     if (i > 0) {
@@ -290,6 +290,8 @@
     encoded_images_[i]._encodedWidth = codec_.simulcastStream[idx].width;
     encoded_images_[i]._encodedHeight = codec_.simulcastStream[idx].height;
     encoded_images_[i].set_size(0);
+
+    tl0sync_limit_[i] = configurations_[i].num_temporal_layers;
   }
 
   SimulcastRateAllocator init_allocator(codec_);
@@ -312,6 +314,7 @@
   configurations_.clear();
   encoded_images_.clear();
   pictures_.clear();
+  tl0sync_limit_.clear();
   return WEBRTC_VIDEO_CODEC_OK;
 }
 
@@ -510,16 +513,16 @@
       codec_specific.codecSpecific.H264.idr_frame =
           info.eFrameType == videoFrameTypeIDR;
       codec_specific.codecSpecific.H264.base_layer_sync = false;
-      if (num_temporal_layers_ > 1) {
+      if (configurations_[i].num_temporal_layers > 1) {
         const uint8_t tid = info.sLayerInfo[0].uiTemporalId;
         codec_specific.codecSpecific.H264.temporal_idx = tid;
         codec_specific.codecSpecific.H264.base_layer_sync =
-            tid > 0 && tid < tl0sync_limit_;
+            tid > 0 && tid < tl0sync_limit_[i];
         if (codec_specific.codecSpecific.H264.base_layer_sync) {
-          tl0sync_limit_ = tid;
+          tl0sync_limit_[i] = tid;
         }
         if (tid == 0) {
-          tl0sync_limit_ = num_temporal_layers_;
+          tl0sync_limit_[i] = configurations_[i].num_temporal_layers;
         }
       }
       encoded_image_callback_->OnEncodedImage(encoded_images_[i],
@@ -573,7 +576,7 @@
       encoder_params.iTargetBitrate;
   encoder_params.sSpatialLayers[0].iMaxSpatialBitrate =
       encoder_params.iMaxBitrate;
-  encoder_params.iTemporalLayerNum = num_temporal_layers_;
+  encoder_params.iTemporalLayerNum = configurations_[i].num_temporal_layers;
   if (encoder_params.iTemporalLayerNum > 1) {
     encoder_params.iNumRefFrame = 1;
   }
diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.h b/modules/video_coding/codecs/h264/h264_encoder_impl.h
index 6097388..6c0f0bc 100644
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.h
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.h
@@ -49,6 +49,7 @@
     uint32_t max_bps = 0;
     bool frame_dropping_on = false;
     int key_frame_interval = 0;
+    int num_temporal_layers = 1;
 
     void SetStreamState(bool send_stream);
   };
@@ -107,8 +108,7 @@
   bool has_reported_init_;
   bool has_reported_error_;
 
-  int num_temporal_layers_;
-  uint8_t tl0sync_limit_;
+  std::vector<uint8_t> tl0sync_limit_;
 };
 
 }  // namespace webrtc
diff --git a/modules/video_coding/codecs/h264/h264_simulcast_unittest.cc b/modules/video_coding/codecs/h264/h264_simulcast_unittest.cc
index 3b720b3..c7f3661 100644
--- a/modules/video_coding/codecs/h264/h264_simulcast_unittest.cc
+++ b/modules/video_coding/codecs/h264/h264_simulcast_unittest.cc
@@ -95,5 +95,10 @@
   fixture->TestStrideEncodeDecode();
 }
 
+TEST(TestH264Simulcast, TestSpatioTemporalLayers333PatternEncoder) {
+  auto fixture = CreateSpecificSimulcastTestFixture();
+  fixture->TestSpatioTemporalLayers333PatternEncoder();
+}
+
 }  // namespace test
 }  // namespace webrtc
diff --git a/modules/video_coding/utility/simulcast_test_fixture_impl.cc b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
index d63e67f..79bbdf2 100644
--- a/modules/video_coding/utility/simulcast_test_fixture_impl.cc
+++ b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
@@ -78,6 +78,7 @@
                         const CodecSpecificInfo* codec_specific_info,
                         const RTPFragmentationHeader* fragmentation) override {
     bool is_vp8 = (codec_specific_info->codecType == kVideoCodecVP8);
+    bool is_h264 = (codec_specific_info->codecType == kVideoCodecH264);
     // Only store the base layer.
     if (encoded_image.SpatialIndex().value_or(0) == 0) {
       if (encoded_image._frameType == VideoFrameType::kVideoFrameKey) {
@@ -102,6 +103,11 @@
           codec_specific_info->codecSpecific.VP8.layerSync;
       temporal_layer_[encoded_image.SpatialIndex().value_or(0)] =
           codec_specific_info->codecSpecific.VP8.temporalIdx;
+    } else if (is_h264) {
+      layer_sync_[encoded_image.SpatialIndex().value_or(0)] =
+          codec_specific_info->codecSpecific.H264.base_layer_sync;
+      temporal_layer_[encoded_image.SpatialIndex().value_or(0)] =
+          codec_specific_info->codecSpecific.H264.temporal_idx;
     }
     return Result(Result::OK, encoded_image.Timestamp());
   }
@@ -263,8 +269,8 @@
     : codec_type_(PayloadStringToCodecType(video_format.name)) {
   encoder_ = encoder_factory->CreateVideoEncoder(video_format);
   decoder_ = decoder_factory->CreateVideoDecoder(video_format);
-  SetUpCodec(codec_type_ == kVideoCodecVP8 ? kDefaultTemporalLayerProfile
-    : kNoTemporalLayerProfile);
+  SetUpCodec((codec_type_ == kVideoCodecVP8 || codec_type_ == kVideoCodecH264)
+      ? kDefaultTemporalLayerProfile : kNoTemporalLayerProfile);
 }
 
 SimulcastTestFixtureImpl::~SimulcastTestFixtureImpl() {
@@ -677,7 +683,7 @@
 // 3-3-3 pattern: 3 temporal layers for all spatial streams, so same
 // temporal_layer id and layer_sync is expected for all streams.
 void SimulcastTestFixtureImpl::TestSpatioTemporalLayers333PatternEncoder() {
-  EXPECT_EQ(codec_type_, kVideoCodecVP8);
+  bool is_h264 = codec_type_ == kVideoCodecH264;
   TestEncodedImageCallback encoder_callback;
   encoder_->RegisterEncodeCompleteCallback(&encoder_callback);
   SetRates(kMaxBitrates[2], 30);  // To get all three streams.
@@ -688,7 +694,7 @@
   // First frame: #0.
   EXPECT_EQ(0, encoder_->Encode(*input_frame_, NULL));
   SetExpectedValues3<int>(0, 0, 0, expected_temporal_idx);
-  SetExpectedValues3<bool>(true, true, true, expected_layer_sync);
+  SetExpectedValues3<bool>(!is_h264, !is_h264, !is_h264, expected_layer_sync);
   VerifyTemporalIdxAndSyncForAllSpatialLayers(
       &encoder_callback, expected_temporal_idx, expected_layer_sync, 3);
 
@@ -728,7 +734,7 @@
   input_frame_->set_timestamp(input_frame_->timestamp() + 3000);
   EXPECT_EQ(0, encoder_->Encode(*input_frame_, NULL));
   SetExpectedValues3<int>(2, 2, 2, expected_temporal_idx);
-  SetExpectedValues3<bool>(false, false, false, expected_layer_sync);
+  SetExpectedValues3<bool>(is_h264, is_h264, is_h264, expected_layer_sync);
   VerifyTemporalIdxAndSyncForAllSpatialLayers(
       &encoder_callback, expected_temporal_idx, expected_layer_sync, 3);
 }