Update INTER_LAYER_PRED_OFF code paths.

This removes speculative refresh of reference buffers on key frame
in WebRTC VP9 encoder wrapper and updates relevant unit tests.

Bug: webrtc:10437
Change-Id: Icd178f9dbbd19af87e3ec782f9a6cecc98d47139
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/128611
Commit-Queue: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#27229}
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index cd29915..68124e4 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -492,24 +492,36 @@
     ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
 
     // Key frame.
-    EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
-    EXPECT_EQ(frames[0].SpatialIndex(), 0);
+    ASSERT_EQ(frames[0].SpatialIndex(), 0);
+    ASSERT_FALSE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
+    EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_layer_predicted);
     EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
               inter_layer_pred == InterLayerPredMode::kOff);
+
+    ASSERT_EQ(frames[1].SpatialIndex(), 1);
+    ASSERT_FALSE(codec_specific[1].codecSpecific.VP9.inter_pic_predicted);
+    EXPECT_EQ(codec_specific[1].codecSpecific.VP9.inter_layer_predicted,
+              inter_layer_pred == InterLayerPredMode::kOn ||
+                  inter_layer_pred == InterLayerPredMode::kOnKeyPic);
     EXPECT_TRUE(
         codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred);
 
+    // Delta frame.
     SetWaitForEncodedFramesThreshold(2);
     EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
               encoder_->Encode(*NextInputFrame(), nullptr));
     ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
 
-    // Delta frame.
-    EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
-    EXPECT_EQ(frames[0].SpatialIndex(), 0);
+    ASSERT_EQ(frames[0].SpatialIndex(), 0);
+    ASSERT_TRUE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
+    EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_layer_predicted);
     EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
-              inter_layer_pred == InterLayerPredMode::kOff ||
-                  inter_layer_pred == InterLayerPredMode::kOnKeyPic);
+              inter_layer_pred != InterLayerPredMode::kOn);
+
+    ASSERT_EQ(frames[1].SpatialIndex(), 1);
+    ASSERT_TRUE(codec_specific[1].codecSpecific.VP9.inter_pic_predicted);
+    EXPECT_EQ(codec_specific[1].codecSpecific.VP9.inter_layer_predicted,
+              inter_layer_pred == InterLayerPredMode::kOn);
     EXPECT_TRUE(
         codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred);
   }
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index e39e9bb..8982416 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -1056,6 +1056,7 @@
   if (is_svc_) {
     vpx_svc_ref_frame_config_t enc_layer_conf = {{0}};
     vpx_codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, &enc_layer_conf);
+    int ref_buf_flags = 0;
 
     if (enc_layer_conf.reference_last[layer_id.spatial_layer_id]) {
       const size_t fb_idx =
@@ -1064,6 +1065,7 @@
       if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
                     ref_buf_.at(fb_idx)) == ref_buf_list.end()) {
         ref_buf_list.push_back(ref_buf_.at(fb_idx));
+        ref_buf_flags |= 1 << fb_idx;
       }
     }
 
@@ -1074,6 +1076,7 @@
       if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
                     ref_buf_.at(fb_idx)) == ref_buf_list.end()) {
         ref_buf_list.push_back(ref_buf_.at(fb_idx));
+        ref_buf_flags |= 1 << fb_idx;
       }
     }
 
@@ -1084,8 +1087,22 @@
       if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
                     ref_buf_.at(fb_idx)) == ref_buf_list.end()) {
         ref_buf_list.push_back(ref_buf_.at(fb_idx));
+        ref_buf_flags |= 1 << fb_idx;
       }
     }
+
+    RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl "
+                        << layer_id.spatial_layer_id << " tl "
+                        << layer_id.temporal_layer_id << " refered buffers "
+                        << (ref_buf_flags & (1 << 0) ? 1 : 0)
+                        << (ref_buf_flags & (1 << 1) ? 1 : 0)
+                        << (ref_buf_flags & (1 << 2) ? 1 : 0)
+                        << (ref_buf_flags & (1 << 3) ? 1 : 0)
+                        << (ref_buf_flags & (1 << 4) ? 1 : 0)
+                        << (ref_buf_flags & (1 << 5) ? 1 : 0)
+                        << (ref_buf_flags & (1 << 6) ? 1 : 0)
+                        << (ref_buf_flags & (1 << 7) ? 1 : 0);
+
   } else if (!is_key_frame) {
     RTC_DCHECK_EQ(num_spatial_layers_, 1);
     RTC_DCHECK_EQ(num_temporal_layers_, 1);
@@ -1146,27 +1163,32 @@
   vpx_svc_layer_id_t layer_id = {0};
   vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
 
-  const bool is_key_frame =
-      (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
-
   RefFrameBuffer frame_buf(pic_num, layer_id.spatial_layer_id,
                            layer_id.temporal_layer_id);
 
-  if (is_key_frame && layer_id.spatial_layer_id == 0) {
-    // Key frame updates all ref buffers.
-    for (size_t i = 0; i < kNumVp9Buffers; ++i) {
-      ref_buf_[i] = frame_buf;
-    }
-  } else if (is_svc_) {
+  if (is_svc_) {
     vpx_svc_ref_frame_config_t enc_layer_conf = {{0}};
     vpx_codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, &enc_layer_conf);
+    const int update_buffer_slot =
+        enc_layer_conf.update_buffer_slot[layer_id.spatial_layer_id];
 
     for (size_t i = 0; i < kNumVp9Buffers; ++i) {
-      if (enc_layer_conf.update_buffer_slot[layer_id.spatial_layer_id] &
-          (1 << i)) {
+      if (update_buffer_slot & (1 << i)) {
         ref_buf_[i] = frame_buf;
       }
     }
+
+    RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl "
+                        << layer_id.spatial_layer_id << " tl "
+                        << layer_id.temporal_layer_id << " updated buffers "
+                        << (update_buffer_slot & (1 << 0) ? 1 : 0)
+                        << (update_buffer_slot & (1 << 1) ? 1 : 0)
+                        << (update_buffer_slot & (1 << 2) ? 1 : 0)
+                        << (update_buffer_slot & (1 << 3) ? 1 : 0)
+                        << (update_buffer_slot & (1 << 4) ? 1 : 0)
+                        << (update_buffer_slot & (1 << 5) ? 1 : 0)
+                        << (update_buffer_slot & (1 << 6) ? 1 : 0)
+                        << (update_buffer_slot & (1 << 7) ? 1 : 0);
   } else {
     RTC_DCHECK_EQ(num_spatial_layers_, 1);
     RTC_DCHECK_EQ(num_temporal_layers_, 1);