Write VP9 RTP SS on key frames of each independently coded spatial layer.

Bug: webrtc:10565
Change-Id: I186ca043268872bacd1dc4a462b67632b74e6510
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/133621
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Sergey Silkin <ssilkin@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#27703}
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index d956b5d..95ba266 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -488,12 +488,15 @@
     EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_layer_predicted);
     EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
               inter_layer_pred == InterLayerPredMode::kOff);
+    EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.ss_data_available);
 
     ASSERT_EQ(frames[1].SpatialIndex(), 1);
     ASSERT_FALSE(codec_specific[1].codecSpecific.VP9.inter_pic_predicted);
     EXPECT_EQ(codec_specific[1].codecSpecific.VP9.inter_layer_predicted,
               inter_layer_pred == InterLayerPredMode::kOn ||
                   inter_layer_pred == InterLayerPredMode::kOnKeyPic);
+    EXPECT_EQ(codec_specific[1].codecSpecific.VP9.ss_data_available,
+              inter_layer_pred == InterLayerPredMode::kOff);
     EXPECT_TRUE(
         codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred);
 
@@ -508,6 +511,7 @@
     EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_layer_predicted);
     EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
               inter_layer_pred != InterLayerPredMode::kOn);
+    EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.ss_data_available);
 
     ASSERT_EQ(frames[1].SpatialIndex(), 1);
     ASSERT_TRUE(codec_specific[1].codecSpecific.VP9.inter_pic_predicted);
@@ -515,6 +519,7 @@
               inter_layer_pred == InterLayerPredMode::kOn);
     EXPECT_TRUE(
         codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred);
+    EXPECT_FALSE(codec_specific[1].codecSpecific.VP9.ss_data_available);
   }
 }
 
@@ -1008,6 +1013,7 @@
             encoder_->Encode(*NextInputFrame(), nullptr));
   ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
   EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1u);
+  EXPECT_FALSE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
 
   // Next is TL0 frame, which should have delayed SS structure.
   SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
@@ -1018,9 +1024,8 @@
   EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
   EXPECT_TRUE(codec_specific_info[0]
                   .codecSpecific.VP9.spatial_layer_resolution_present);
-  EXPECT_EQ(
-      codec_specific_info[0].codecSpecific.VP9.width[num_spatial_layers - 1],
-      0u);
+  EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.num_spatial_layers,
+            num_spatial_layers - 1);
 }
 
 TEST_F(TestVp9Impl,
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index ad2a7eb..abb54b6 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -1016,8 +1016,6 @@
 
   vp9_info->first_frame_in_picture = first_frame_in_picture_;
   vp9_info->flexible_mode = is_flexible_mode_;
-  vp9_info->ss_data_available =
-      (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
 
   if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
     pics_since_key_ = 0;
@@ -1031,13 +1029,6 @@
   // Can't have keyframe with non-zero temporal layer.
   RTC_DCHECK(pics_since_key_ != 0 || layer_id.temporal_layer_id == 0);
 
-  if (ss_info_needed_ && layer_id.temporal_layer_id == 0 &&
-      layer_id.spatial_layer_id == 0) {
-    // Force SS info after the layers configuration has changed.
-    vp9_info->ss_data_available = true;
-    ss_info_needed_ = false;
-  }
-
   RTC_CHECK_GT(num_temporal_layers_, 0);
   RTC_CHECK_GT(num_active_spatial_layers_, 0);
   if (num_temporal_layers_ == 1) {
@@ -1052,9 +1043,6 @@
   } else {
     *spatial_idx = layer_id.spatial_layer_id;
   }
-  if (layer_id.spatial_layer_id != 0) {
-    vp9_info->ss_data_available = false;
-  }
 
   // TODO(asapersson): this info has to be obtained from the encoder.
   vp9_info->temporal_up_switch = false;
@@ -1100,7 +1088,13 @@
 
   vp9_info->inter_pic_predicted = (!is_key_pic && vp9_info->num_ref_pics > 0);
 
-  if (vp9_info->ss_data_available) {
+  // Write SS on key frame of independently coded spatial layers and on base
+  // temporal/spatial layer frame if number of layers changed without issuing
+  // of key picture (inter-layer prediction is enabled).
+  const bool is_key_frame = is_key_pic && !vp9_info->inter_layer_predicted;
+  if (is_key_frame || (ss_info_needed_ && layer_id.temporal_layer_id == 0 &&
+                       layer_id.spatial_layer_id == 0)) {
+    vp9_info->ss_data_available = true;
     vp9_info->spatial_layer_resolution_present = true;
     for (size_t i = 0; i < num_active_spatial_layers_; ++i) {
       vp9_info->width[i] = codec_.width * svc_params_.scaling_factor_num[i] /
@@ -1113,6 +1107,10 @@
     } else {
       vp9_info->gof.CopyGofInfoVP9(gof_);
     }
+
+    ss_info_needed_ = false;
+  } else {
+    vp9_info->ss_data_available = false;
   }
 
   first_frame_in_picture_ = false;