Make DefaultTemporalLayers explicitly request a key frame

Bug: webrtc:10758
Change-Id: I426bfee7c3cdc2ac058f7e7f44368530a28b02a5
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/143169
Reviewed-by: Erik Språng <sprang@webrtc.org>
Commit-Queue: Elad Alon <eladalon@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28345}
diff --git a/api/video_codecs/vp8_frame_config.h b/api/video_codecs/vp8_frame_config.h
index d3a4fc0..5369bf5 100644
--- a/api/video_codecs/vp8_frame_config.h
+++ b/api/video_codecs/vp8_frame_config.h
@@ -18,6 +18,13 @@
 // Configuration of a VP8 frame - which buffers are to be referenced
 // by it, which buffers should be updated, etc.
 struct Vp8FrameConfig {
+  static Vp8FrameConfig GetIntraFrameConfig() {
+    Vp8FrameConfig frame_config = Vp8FrameConfig(
+        BufferFlags::kUpdate, BufferFlags::kUpdate, BufferFlags::kUpdate);
+    frame_config.packetizer_temporal_idx = 0;
+    return frame_config;
+  }
+
   enum BufferFlags : int {
     kNone = 0,
     kReference = 1,
diff --git a/modules/video_coding/codecs/vp8/default_temporal_layers.cc b/modules/video_coding/codecs/vp8/default_temporal_layers.cc
index 339e81d..986ea2e 100644
--- a/modules/video_coding/codecs/vp8/default_temporal_layers.cc
+++ b/modules/video_coding/codecs/vp8/default_temporal_layers.cc
@@ -348,6 +348,9 @@
   RTC_DCHECK_GT(num_layers_, 0);
   RTC_DCHECK_GT(temporal_pattern_.size(), 0);
 
+  RTC_DCHECK_GT(kUninitializedPatternIndex, temporal_pattern_.size());
+  const bool first_frame = (pattern_idx_ == kUninitializedPatternIndex);
+
   pattern_idx_ = (pattern_idx_ + 1) % temporal_pattern_.size();
   DependencyInfo dependency_info = temporal_pattern_[pattern_idx_];
   Vp8FrameConfig& tl_config = dependency_info.frame_config;
@@ -363,28 +366,33 @@
     }
   }
 
-  // Last is always ok to reference as it contains the base layer. For other
-  // buffers though, we need to check if the buffer has actually been refreshed
-  // this cycle of the temporal pattern. If the encoder dropped a frame, it
-  // might not have.
-  ValidateReferences(&tl_config.golden_buffer_flags,
-                     Vp8BufferReference::kGolden);
-  ValidateReferences(&tl_config.arf_buffer_flags, Vp8BufferReference::kAltref);
-  // Update search order to let the encoder know which buffers contains the most
-  // recent data.
-  UpdateSearchOrder(&tl_config);
-  // Figure out if this a sync frame (non-base-layer frame with only base-layer
-  // references).
-  tl_config.layer_sync = IsSyncFrame(tl_config);
+  if (first_frame) {
+    tl_config = Vp8FrameConfig::GetIntraFrameConfig();
+  } else {
+    // Last is always ok to reference as it contains the base layer. For other
+    // buffers though, we need to check if the buffer has actually been
+    // refreshed this cycle of the temporal pattern. If the encoder dropped
+    // a frame, it might not have.
+    ValidateReferences(&tl_config.golden_buffer_flags,
+                       Vp8BufferReference::kGolden);
+    ValidateReferences(&tl_config.arf_buffer_flags,
+                       Vp8BufferReference::kAltref);
+    // Update search order to let the encoder know which buffers contains the
+    // most recent data.
+    UpdateSearchOrder(&tl_config);
+    // Figure out if this a sync frame (non-base-layer frame with only
+    // base-layer references).
+    tl_config.layer_sync = IsSyncFrame(tl_config);
 
-  // Increment frame age, this needs to be in sync with |pattern_idx_|, so must
-  // update it here. Resetting age to 0 must be done when encoding is complete
-  // though, and so in the case of pipelining encoder it might lag. To prevent
-  // this data spill over into the next iteration, the |pedning_frames_| map
-  // is reset in loops. If delay is constant, the relative age should still be
-  // OK for the search order.
-  for (Vp8BufferReference buffer : kAllBuffers) {
-    ++frames_since_buffer_refresh_[buffer];
+    // Increment frame age, this needs to be in sync with |pattern_idx_|,
+    // so must update it here. Resetting age to 0 must be done when encoding is
+    // complete though, and so in the case of pipelining encoder it might lag.
+    // To prevent this data spill over into the next iteration,
+    // the |pedning_frames_| map is reset in loops. If delay is constant,
+    // the relative age should still be OK for the search order.
+    for (Vp8BufferReference buffer : kAllBuffers) {
+      ++frames_since_buffer_refresh_[buffer];
+    }
   }
 
   // Add frame to set of pending frames, awaiting completion.
@@ -395,7 +403,7 @@
   // Checker does not yet support encoder frame dropping, so validate flags
   // here before they can be dropped.
   // TODO(sprang): Update checker to support dropping.
-  RTC_DCHECK(checker_->CheckTemporalConfig(false, tl_config));
+  RTC_DCHECK(checker_->CheckTemporalConfig(first_frame, tl_config));
 #endif
 
   return tl_config;
diff --git a/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc b/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc
index 2c419da..dd123cf 100644
--- a/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc
+++ b/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc
@@ -67,6 +67,7 @@
 constexpr uint8_t kLast = static_cast<uint8_t>(Vp8BufferReference::kLast);
 constexpr uint8_t kGolden = static_cast<uint8_t>(Vp8BufferReference::kGolden);
 constexpr uint8_t kAltref = static_cast<uint8_t>(Vp8BufferReference::kAltref);
+constexpr uint8_t kAll = kLast | kGolden | kAltref;
 
 constexpr int ToVp8CodecFlags(uint8_t referenced_buffers,
                               uint8_t updated_buffers,
@@ -80,6 +81,8 @@
          (update_entropy ? 0 : VP8_EFLAG_NO_UPD_ENTROPY);
 }
 
+constexpr int kKeyFrameFlags = ToVp8CodecFlags(kNone, kAll, true);
+
 std::vector<uint32_t> GetTemporalLayerRates(int target_bitrate_kbps,
                                             int framerate_fps,
                                             int num_temporal_layers) {
@@ -142,17 +145,19 @@
   uint32_t timestamp = 0;
   for (size_t i = 0; i < kPatternSize * kRepetitions; ++i) {
     const size_t ind = i % kPatternSize;
+    const bool is_keyframe = (i == 0);
     CodecSpecificInfo info;
     Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp);
-    EXPECT_EQ(expected_flags[ind], LibvpxVp8Encoder::EncodeFlags(tl_config))
+    EXPECT_EQ(is_keyframe ? kKeyFrameFlags : expected_flags[ind],
+              LibvpxVp8Encoder::EncodeFlags(tl_config))
         << i;
-    tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, i == 0, kDefaultQp,
-                    &info);
-    EXPECT_TRUE(checker.CheckTemporalConfig(i == 0, tl_config));
+    tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, is_keyframe,
+                    kDefaultQp, &info);
+    EXPECT_TRUE(checker.CheckTemporalConfig(is_keyframe, tl_config));
     EXPECT_EQ(expected_temporal_idx[ind], info.codecSpecific.VP8.temporalIdx);
     EXPECT_EQ(expected_temporal_idx[ind], tl_config.packetizer_temporal_idx);
     EXPECT_EQ(expected_temporal_idx[ind], tl_config.encoder_layer_id);
-    EXPECT_EQ(i == 0 || expected_layer_sync[ind],
+    EXPECT_EQ(is_keyframe || expected_layer_sync[ind],
               info.codecSpecific.VP8.layerSync);
     EXPECT_EQ(expected_layer_sync[ind], tl_config.layer_sync);
     timestamp += 3000;
@@ -196,16 +201,19 @@
 
   unsigned int timestamp = 0;
   for (int i = 0; i < 16; ++i) {
+    const bool is_keyframe = (i == 0);
     CodecSpecificInfo info;
     Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp);
-    EXPECT_EQ(expected_flags[i], LibvpxVp8Encoder::EncodeFlags(tl_config)) << i;
-    tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, i == 0, kDefaultQp,
-                    &info);
-    EXPECT_TRUE(checker.CheckTemporalConfig(i == 0, tl_config));
+    EXPECT_EQ(is_keyframe ? kKeyFrameFlags : expected_flags[i],
+              LibvpxVp8Encoder::EncodeFlags(tl_config))
+        << i;
+    tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, is_keyframe,
+                    kDefaultQp, &info);
+    EXPECT_TRUE(checker.CheckTemporalConfig(is_keyframe, tl_config));
     EXPECT_EQ(expected_temporal_idx[i], info.codecSpecific.VP8.temporalIdx);
     EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx);
     EXPECT_EQ(expected_temporal_idx[i], tl_config.encoder_layer_id);
-    EXPECT_EQ(i == 0 || expected_layer_sync[i],
+    EXPECT_EQ(is_keyframe || expected_layer_sync[i],
               info.codecSpecific.VP8.layerSync);
     EXPECT_EQ(expected_layer_sync[i], tl_config.layer_sync);
     timestamp += 3000;
@@ -238,16 +246,19 @@
 
   unsigned int timestamp = 0;
   for (int i = 0; i < 8; ++i) {
+    const bool is_keyframe = (i == 0);
     CodecSpecificInfo info;
     Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp);
-    EXPECT_EQ(expected_flags[i], LibvpxVp8Encoder::EncodeFlags(tl_config)) << i;
-    tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, i == 0, kDefaultQp,
-                    &info);
-    EXPECT_TRUE(checker.CheckTemporalConfig(i == 0, tl_config));
+    EXPECT_EQ(is_keyframe ? kKeyFrameFlags : expected_flags[i],
+              LibvpxVp8Encoder::EncodeFlags(tl_config))
+        << i;
+    tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, is_keyframe,
+                    kDefaultQp, &info);
+    EXPECT_TRUE(checker.CheckTemporalConfig(is_keyframe, tl_config));
     EXPECT_EQ(expected_temporal_idx[i], info.codecSpecific.VP8.temporalIdx);
     EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx);
     EXPECT_EQ(expected_temporal_idx[i], tl_config.encoder_layer_id);
-    EXPECT_EQ(i == 0 || expected_layer_sync[i],
+    EXPECT_EQ(is_keyframe || expected_layer_sync[i],
               info.codecSpecific.VP8.layerSync);
     EXPECT_EQ(expected_layer_sync[i], tl_config.layer_sync);
     timestamp += 3000;
@@ -373,16 +384,19 @@
 
   uint32_t timestamp = 0;
   for (int i = 0; i < 16; ++i) {
+    const bool is_keyframe = (i == 0);
     CodecSpecificInfo info;
     Vp8FrameConfig tl_config = tl.NextFrameConfig(0, timestamp);
-    EXPECT_EQ(expected_flags[i], LibvpxVp8Encoder::EncodeFlags(tl_config)) << i;
-    tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, i == 0, kDefaultQp,
-                    &info);
-    EXPECT_TRUE(checker.CheckTemporalConfig(i == 0, tl_config));
+    EXPECT_EQ(is_keyframe ? kKeyFrameFlags : expected_flags[i],
+              LibvpxVp8Encoder::EncodeFlags(tl_config))
+        << i;
+    tl.OnEncodeDone(0, timestamp, kDefaultBytesPerFrame, is_keyframe,
+                    kDefaultQp, &info);
+    EXPECT_TRUE(checker.CheckTemporalConfig(is_keyframe, tl_config));
     EXPECT_EQ(expected_temporal_idx[i], info.codecSpecific.VP8.temporalIdx);
     EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx);
     EXPECT_EQ(expected_temporal_idx[i], tl_config.encoder_layer_id);
-    EXPECT_EQ(i == 0 || expected_layer_sync[i],
+    EXPECT_EQ(is_keyframe || expected_layer_sync[i],
               info.codecSpecific.VP8.layerSync);
     EXPECT_EQ(expected_layer_sync[i], tl_config.layer_sync);
     timestamp += 3000;