In SVC controllers add support for frames dropped by encoder

by updating flag that T1 frame can be referenced when it is encoded
rather than when it is sent for encoding.
Otherwise when encoder drops T1 frame, configuration for following T2 frame would
still try to reference that absent T1 frame leading to invalid references.

Bug: None
Change-Id: I6398275971596b0618bcf9c926f0282f74120976
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/202030
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#33002}
diff --git a/modules/video_coding/svc/scalability_structure_full_svc.cc b/modules/video_coding/svc/scalability_structure_full_svc.cc
index c489b60..5454622 100644
--- a/modules/video_coding/svc/scalability_structure_full_svc.cc
+++ b/modules/video_coding/svc/scalability_structure_full_svc.cc
@@ -188,7 +188,6 @@
         // No frame reference top layer frame, so no need save it into a buffer.
         if (num_temporal_layers_ > 2 || sid < num_spatial_layers_ - 1) {
           config.Update(BufferIndex(sid, /*tid=*/1));
-          can_reference_t1_frame_for_spatial_id_.set(sid);
         }
         spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/1);
       }
@@ -246,6 +245,10 @@
 
 GenericFrameInfo ScalabilityStructureFullSvc::OnEncodeDone(
     const LayerFrameConfig& config) {
+  if (config.TemporalId() == 1) {
+    can_reference_t1_frame_for_spatial_id_.set(config.SpatialId());
+  }
+
   GenericFrameInfo frame_info;
   frame_info.spatial_id = config.SpatialId();
   frame_info.temporal_id = config.TemporalId();
diff --git a/modules/video_coding/svc/scalability_structure_key_svc.cc b/modules/video_coding/svc/scalability_structure_key_svc.cc
index cfc89a3..9399c0c 100644
--- a/modules/video_coding/svc/scalability_structure_key_svc.cc
+++ b/modules/video_coding/svc/scalability_structure_key_svc.cc
@@ -148,7 +148,6 @@
     config.Id(kDelta).S(sid).T(1).Reference(BufferIndex(sid, /*tid=*/0));
     if (num_temporal_layers_ > 2) {
       config.Update(BufferIndex(sid, /*tid=*/1));
-      can_reference_t1_frame_for_spatial_id_.set(sid);
     }
   }
   return configs;
@@ -223,6 +222,10 @@
 
 GenericFrameInfo ScalabilityStructureKeySvc::OnEncodeDone(
     const LayerFrameConfig& config) {
+  if (config.TemporalId() == 1) {
+    can_reference_t1_frame_for_spatial_id_.set(config.SpatialId());
+  }
+
   GenericFrameInfo frame_info;
   frame_info.spatial_id = config.SpatialId();
   frame_info.temporal_id = config.TemporalId();
diff --git a/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc b/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc
index 752f710..34ec747 100644
--- a/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc
+++ b/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc
@@ -52,6 +52,29 @@
 }
 
 TEST(ScalabilityStructureL3T3KeyTest,
+     SkipT1FrameByEncoderKeepsReferencesValid) {
+  std::vector<GenericFrameInfo> frames;
+  ScalabilityStructureL3T3Key structure;
+  ScalabilityStructureWrapper wrapper(structure);
+
+  // 1st 2 temporal units (T0 and T2)
+  wrapper.GenerateFrames(/*num_temporal_units=*/2, frames);
+  // Simulate T1 frame dropped by the encoder,
+  // i.e. retrieve config, but skip calling OnEncodeDone.
+  structure.NextFrameConfig(/*restart=*/false);
+  // one more temporal units (T2)
+  wrapper.GenerateFrames(/*num_temporal_units=*/1, frames);
+
+  ASSERT_THAT(frames, SizeIs(9));
+  EXPECT_EQ(frames[0].temporal_id, 0);
+  EXPECT_EQ(frames[3].temporal_id, 2);
+  // T1 frames were dropped by the encoder.
+  EXPECT_EQ(frames[6].temporal_id, 2);
+
+  EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames));
+}
+
+TEST(ScalabilityStructureL3T3KeyTest,
      ReenablingSpatialLayerBeforeMissedT0FrameDoesntTriggerAKeyFrame) {
   ScalabilityStructureL3T3Key structure;
   ScalabilityStructureWrapper wrapper(structure);
diff --git a/modules/video_coding/svc/scalability_structure_l3t3_unittest.cc b/modules/video_coding/svc/scalability_structure_l3t3_unittest.cc
index 1a3dc8b..ca66fa8 100644
--- a/modules/video_coding/svc/scalability_structure_l3t3_unittest.cc
+++ b/modules/video_coding/svc/scalability_structure_l3t3_unittest.cc
@@ -9,6 +9,8 @@
  */
 #include "modules/video_coding/svc/scalability_structure_l3t3.h"
 
+#include <vector>
+
 #include "modules/video_coding/svc/scalability_structure_test_helpers.h"
 #include "test/gmock.h"
 #include "test/gtest.h"
@@ -44,6 +46,28 @@
   EXPECT_EQ(frames[0].temporal_id, 2);
 }
 
+TEST(ScalabilityStructureL3T3Test, SkipT1FrameByEncoderKeepsReferencesValid) {
+  std::vector<GenericFrameInfo> frames;
+  ScalabilityStructureL3T3 structure;
+  ScalabilityStructureWrapper wrapper(structure);
+
+  // 1st 2 temporal units (T0 and T2)
+  wrapper.GenerateFrames(/*num_temporal_units=*/2, frames);
+  // Simulate T1 frame dropped by the encoder,
+  // i.e. retrieve config, but skip calling OnEncodeDone.
+  structure.NextFrameConfig(/*restart=*/false);
+  // one more temporal units (T2)
+  wrapper.GenerateFrames(/*num_temporal_units=*/1, frames);
+
+  ASSERT_THAT(frames, SizeIs(9));
+  EXPECT_EQ(frames[0].temporal_id, 0);
+  EXPECT_EQ(frames[3].temporal_id, 2);
+  // T1 frame was dropped by the encoder.
+  EXPECT_EQ(frames[6].temporal_id, 2);
+
+  EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames));
+}
+
 TEST(ScalabilityStructureL3T3Test, SwitchSpatialLayerBeforeT1Frame) {
   ScalabilityStructureL3T3 structure;
   ScalabilityStructureWrapper wrapper(structure);