Stop using inter_layer_predicted flag for VP9.

Instead of signaling an inter layer dependency with the inter_layer_prediction flag we instead flatten the frame IDs so that an inter layer dependency can be signaled as a regular frame reference.

Bug: webrtc:12206, webrtc:12221
Change-Id: I0390fd3d0f5494cde59eece227db938dbc5d7992
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/196648
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32808}
diff --git a/modules/video_coding/encoded_frame.h b/modules/video_coding/encoded_frame.h
index a77d42e..61054ea 100644
--- a/modules/video_coding/encoded_frame.h
+++ b/modules/video_coding/encoded_frame.h
@@ -21,7 +21,7 @@
 
 namespace webrtc {
 
-class RTC_EXPORT VCMEncodedFrame : protected EncodedImage {
+class RTC_EXPORT VCMEncodedFrame : public EncodedImage {
  public:
   VCMEncodedFrame();
   VCMEncodedFrame(const VCMEncodedFrame&);
diff --git a/modules/video_coding/frame_buffer2.cc b/modules/video_coding/frame_buffer2.cc
index ea5dcb6..be97b64 100644
--- a/modules/video_coding/frame_buffer2.cc
+++ b/modules/video_coding/frame_buffer2.cc
@@ -394,49 +394,6 @@
   callback_checker_.Detach();
 }
 
-bool FrameBuffer::IsCompleteSuperFrame(const EncodedFrame& frame) {
-  if (frame.inter_layer_predicted) {
-    // Check that all previous spatial layers are already inserted.
-    VideoLayerFrameId id = frame.id;
-    RTC_DCHECK_GT(id.spatial_layer, 0);
-    --id.spatial_layer;
-    FrameMap::iterator prev_frame = frames_.find(id);
-    if (prev_frame == frames_.end() || !prev_frame->second.frame)
-      return false;
-    while (prev_frame->second.frame->inter_layer_predicted) {
-      if (prev_frame == frames_.begin())
-        return false;
-      --prev_frame;
-      --id.spatial_layer;
-      if (!prev_frame->second.frame ||
-          prev_frame->first.picture_id != id.picture_id ||
-          prev_frame->first.spatial_layer != id.spatial_layer) {
-        return false;
-      }
-    }
-  }
-
-  if (!frame.is_last_spatial_layer) {
-    // Check that all following spatial layers are already inserted.
-    VideoLayerFrameId id = frame.id;
-    ++id.spatial_layer;
-    FrameMap::iterator next_frame = frames_.find(id);
-    if (next_frame == frames_.end() || !next_frame->second.frame)
-      return false;
-    while (!next_frame->second.frame->is_last_spatial_layer) {
-      ++next_frame;
-      ++id.spatial_layer;
-      if (next_frame == frames_.end() || !next_frame->second.frame ||
-          next_frame->first.picture_id != id.picture_id ||
-          next_frame->first.spatial_layer != id.spatial_layer) {
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
 int64_t FrameBuffer::InsertFrame(std::unique_ptr<EncodedFrame> frame) {
   TRACE_EVENT0("webrtc", "FrameBuffer::InsertFrame");
   RTC_DCHECK(frame);
@@ -523,7 +480,9 @@
   if (!frame->delayed_by_retransmission())
     timing_->IncomingTimestamp(frame->Timestamp(), frame->ReceivedTime());
 
-  if (stats_callback_ && IsCompleteSuperFrame(*frame)) {
+  // It can happen that a frame will be reported as fully received even if a
+  // lower spatial layer frame is missing.
+  if (stats_callback_ && frame->is_last_spatial_layer) {
     stats_callback_->OnCompleteFrame(frame->is_keyframe(), frame->size(),
                                      frame->contentType());
   }
@@ -736,14 +695,14 @@
   }
   auto encoded_image_buffer = EncodedImageBuffer::Create(total_length);
   uint8_t* buffer = encoded_image_buffer->data();
-  first_frame->SetSpatialLayerFrameSize(first_frame->id.spatial_layer,
+  first_frame->SetSpatialLayerFrameSize(first_frame->SpatialIndex().value_or(0),
                                         first_frame->size());
   memcpy(buffer, first_frame->data(), first_frame->size());
   buffer += first_frame->size();
 
   // Spatial index of combined frame is set equal to spatial index of its top
   // spatial layer.
-  first_frame->SetSpatialIndex(last_frame->id.spatial_layer);
+  first_frame->SetSpatialIndex(last_frame->SpatialIndex().value_or(0));
   first_frame->id.spatial_layer = last_frame->id.spatial_layer;
 
   first_frame->video_timing_mutable()->network2_timestamp_ms =
@@ -754,8 +713,8 @@
   // Append all remaining frames to the first one.
   for (size_t i = 1; i < frames.size(); ++i) {
     EncodedFrame* next_frame = frames[i];
-    first_frame->SetSpatialLayerFrameSize(next_frame->id.spatial_layer,
-                                          next_frame->size());
+    first_frame->SetSpatialLayerFrameSize(
+        next_frame->SpatialIndex().value_or(0), next_frame->size());
     memcpy(buffer, next_frame->data(), next_frame->size());
     buffer += next_frame->size();
     delete next_frame;
diff --git a/modules/video_coding/frame_buffer2.h b/modules/video_coding/frame_buffer2.h
index b19b6d3..080ce7c 100644
--- a/modules/video_coding/frame_buffer2.h
+++ b/modules/video_coding/frame_buffer2.h
@@ -148,10 +148,6 @@
 
   void ClearFramesAndHistory() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
-  // Checks if the superframe, which current frame belongs to, is complete.
-  bool IsCompleteSuperFrame(const EncodedFrame& frame)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
   bool HasBadRenderTiming(const EncodedFrame& frame, int64_t now_ms)
       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
diff --git a/modules/video_coding/rtp_vp9_ref_finder.cc b/modules/video_coding/rtp_vp9_ref_finder.cc
index a725a26..e1dba9c 100644
--- a/modules/video_coding/rtp_vp9_ref_finder.cc
+++ b/modules/video_coding/rtp_vp9_ref_finder.cc
@@ -51,8 +51,7 @@
       codec_header.spatial_idx >= kMaxSpatialLayers)
     return kDrop;
 
-  frame->id.spatial_layer = codec_header.spatial_idx;
-  frame->inter_layer_predicted = codec_header.inter_layer_predicted;
+  frame->SetSpatialIndex(codec_header.spatial_idx);
   frame->id.picture_id = codec_header.picture_id & (kFrameIdLength - 1);
 
   if (last_picture_id_ == -1)
@@ -68,7 +67,7 @@
                                                       codec_header.pid_diff[i]);
     }
 
-    UnwrapPictureIds(frame);
+    FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
     return kHandOff;
   }
 
@@ -120,11 +119,11 @@
     if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
       frame->num_references = 0;
       FrameReceivedVp9(frame->id.picture_id, info);
-      UnwrapPictureIds(frame);
+      FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
       return kHandOff;
     }
   } else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
-    if (frame->id.spatial_layer == 0) {
+    if (frame->SpatialIndex() == 0) {
       RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
       return kDrop;
     }
@@ -134,12 +133,10 @@
 
     info = &gof_info_it->second;
 
-    if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
-      frame->num_references = 0;
-      FrameReceivedVp9(frame->id.picture_id, info);
-      UnwrapPictureIds(frame);
-      return kHandOff;
-    }
+    frame->num_references = 0;
+    FrameReceivedVp9(frame->id.picture_id, info);
+    FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
+    return kHandOff;
   } else {
     auto gof_info_it = gof_info_.find(
         (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0);
@@ -204,7 +201,7 @@
     frame->num_references = 0;
   }
 
-  UnwrapPictureIds(frame);
+  FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
   return kHandOff;
 }
 
@@ -326,10 +323,22 @@
   } while (complete_frame);
 }
 
-void RtpVp9RefFinder::UnwrapPictureIds(RtpFrameObject* frame) {
-  for (size_t i = 0; i < frame->num_references; ++i)
-    frame->references[i] = unwrapper_.Unwrap(frame->references[i]);
-  frame->id.picture_id = unwrapper_.Unwrap(frame->id.picture_id);
+void RtpVp9RefFinder::FlattenFrameIdAndRefs(RtpFrameObject* frame,
+                                            bool inter_layer_predicted) {
+  for (size_t i = 0; i < frame->num_references; ++i) {
+    frame->references[i] =
+        unwrapper_.Unwrap(frame->references[i]) * kMaxSpatialLayers +
+        *frame->SpatialIndex();
+  }
+  frame->id.picture_id =
+      unwrapper_.Unwrap(frame->id.picture_id) * kMaxSpatialLayers +
+      *frame->SpatialIndex();
+
+  if (inter_layer_predicted &&
+      frame->num_references + 1 <= EncodedFrame::kMaxFrameReferences) {
+    frame->references[frame->num_references] = frame->id.picture_id - 1;
+    ++frame->num_references;
+  }
 }
 
 void RtpVp9RefFinder::ClearTo(uint16_t seq_num) {
diff --git a/modules/video_coding/rtp_vp9_ref_finder.h b/modules/video_coding/rtp_vp9_ref_finder.h
index 9990c5d..1ccfa3b 100644
--- a/modules/video_coding/rtp_vp9_ref_finder.h
+++ b/modules/video_coding/rtp_vp9_ref_finder.h
@@ -59,7 +59,7 @@
                              uint8_t temporal_idx,
                              uint16_t pid_ref);
 
-  void UnwrapPictureIds(RtpFrameObject* frame);
+  void FlattenFrameIdAndRefs(RtpFrameObject* frame, bool inter_layer_predicted);
 
   // Save the last picture id in order to detect when there is a gap in frames
   // that have not yet been fully received.
diff --git a/modules/video_coding/rtp_vp9_ref_finder_unittest.cc b/modules/video_coding/rtp_vp9_ref_finder_unittest.cc
index e3e4814..aa883c8 100644
--- a/modules/video_coding/rtp_vp9_ref_finder_unittest.cc
+++ b/modules/video_coding/rtp_vp9_ref_finder_unittest.cc
@@ -8,22 +8,21 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include <cstring>
-#include <map>
-#include <set>
 #include <utility>
 #include <vector>
 
 #include "modules/video_coding/frame_object.h"
 #include "modules/video_coding/rtp_vp9_ref_finder.h"
-#include "rtc_base/random.h"
 #include "test/gmock.h"
 #include "test/gtest.h"
 
+using ::testing::Contains;
 using ::testing::Matcher;
 using ::testing::MatcherInterface;
 using ::testing::Matches;
 using ::testing::MatchResultListener;
+using ::testing::Pointee;
+using ::testing::Property;
 using ::testing::UnorderedElementsAreArray;
 
 namespace webrtc {
@@ -101,6 +100,7 @@
       }
     }
     vp9_header.temporal_up_switch = up_switch;
+    vp9_header.inter_layer_predicted = inter_layer;
     vp9_header.inter_pic_predicted = inter_pic && !keyframe;
     if (scalability_structure != nullptr) {
       vp9_header.ss_data_available = true;
@@ -155,23 +155,19 @@
 class HasFrameMatcher : public MatcherInterface<const FrameVector&> {
  public:
   explicit HasFrameMatcher(int64_t frame_id,
-                           int spatial_id,
                            const std::vector<int64_t>& expected_refs)
       : frame_id_(frame_id),
-        spatial_id_(spatial_id),
         expected_refs_(expected_refs) {}
 
   bool MatchAndExplain(const FrameVector& frames,
                        MatchResultListener* result_listener) const override {
     auto it = std::find_if(frames.begin(), frames.end(),
                            [this](const std::unique_ptr<EncodedFrame>& f) {
-                             return f->id.picture_id == frame_id_ &&
-                                    f->id.spatial_layer == spatial_id_;
+                             return f->id.picture_id == frame_id_;
                            });
     if (it == frames.end()) {
       if (result_listener->IsInterested()) {
-        *result_listener << "No frame with frame_id:" << frame_id_
-                         << " and spatial_id:" << spatial_id_;
+        *result_listener << "No frame with frame_id:" << frame_id_;
       }
       return false;
     }
@@ -180,8 +176,7 @@
                                         (*it)->num_references);
     if (!Matches(UnorderedElementsAreArray(expected_refs_))(actual_refs)) {
       if (result_listener->IsInterested()) {
-        *result_listener << "Frame with frame_id:" << frame_id_
-                         << ", spatial_id:" << spatial_id_ << " and "
+        *result_listener << "Frame with frame_id:" << frame_id_ << " and "
                          << actual_refs.size() << " references { ";
         for (auto r : actual_refs) {
           *result_listener << r << " ";
@@ -195,8 +190,8 @@
   }
 
   void DescribeTo(std::ostream* os) const override {
-    *os << "frame with frame_id:" << frame_id_ << ", spatial_id:" << spatial_id_
-        << " and " << expected_refs_.size() << " references { ";
+    *os << "frame with frame_id:" << frame_id_ << " and "
+        << expected_refs_.size() << " references { ";
     for (auto r : expected_refs_) {
       *os << r << " ";
     }
@@ -205,8 +200,7 @@
 
  private:
   const int64_t frame_id_;
-  const int spatial_id_;
-  std::vector<int64_t> expected_refs_;
+  const std::vector<int64_t> expected_refs_;
 };
 
 }  // namespace
@@ -225,11 +219,9 @@
   FrameVector frames_;
 };
 
-Matcher<const FrameVector&> HasFrameWithIdAndRefs(
-    int64_t frame_id,
-    int spatial_id,
-    const std::vector<int64_t>& refs) {
-  return MakeMatcher(new HasFrameMatcher(frame_id, spatial_id, refs));
+Matcher<const FrameVector&> HasFrameWithIdAndRefs(int64_t frame_id,
+                                                  std::vector<int64_t> refs) {
+  return MakeMatcher(new HasFrameMatcher(frame_id, refs));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofInsertOneFrame) {
@@ -239,7 +231,7 @@
   Insert(Frame().Pid(1).SidAndTid(0, 0).Tl0(0).AsKeyFrame().Gof(&ss));
 
   EXPECT_EQ(frames_.size(), 1UL);
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {}));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofTemporalLayers_0) {
@@ -250,8 +242,8 @@
   Insert(Frame().Pid(2).SidAndTid(0, 0).Tl0(1));
 
   EXPECT_EQ(frames_.size(), 2UL);
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 0, {1}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, {5}));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofSpatialLayers_2) {
@@ -265,11 +257,11 @@
   Insert(Frame().Pid(3).SidAndTid(1, 0).Tl0(2));
 
   EXPECT_EQ(frames_.size(), 5UL);
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 0, {1}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 1, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 0, {2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 1, {2}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, {5}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(11, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(15, {10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(16, {11}));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofTemporalLayersReordered_0) {
@@ -287,15 +279,15 @@
   Insert(Frame().Pid(5).SidAndTid(0, 0).Tl0(4));
 
   EXPECT_EQ(frames_.size(), 9UL);
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 0, {1}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 1, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 0, {2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 1, {2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 0, {3}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 1, {3}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, 1, {4}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, {5}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(11, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(15, {10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(16, {11}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(20, {15}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(21, {16}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(25, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(26, {21}));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofSkipFramesTemporalLayers_01) {
@@ -314,12 +306,12 @@
   Insert(Frame().Pid(11).SidAndTid(0, 1).Tl0(5));
 
   ASSERT_EQ(6UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, 0, {8}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(11, 0, {10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(20, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(25, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(50, {40}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(55, {50}));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofSkipFramesTemporalLayers_0212) {
@@ -333,10 +325,10 @@
   Insert(Frame().Pid(3).SidAndTid(0, 2).Tl0(0));
 
   ASSERT_EQ(4UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 0, {2}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(15, {10}));
 
   // Skip frames with tl0 = 1
 
@@ -347,25 +339,25 @@
   Insert(Frame().Pid(11).SidAndTid(0, 2).Tl0(2));
 
   ASSERT_EQ(8UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(8, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(9, 0, {8}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, 0, {8}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(11, 0, {10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(40, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(45, {40}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(50, {40}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(55, {50}));
 
   // Now insert frames with tl0 = 1
   Insert(Frame().Pid(4).SidAndTid(0, 0).Tl0(1).AsKeyFrame().Gof(&ss));
   Insert(Frame().Pid(7).SidAndTid(0, 2).Tl0(1));
 
   ASSERT_EQ(9UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(20, {}));
 
   Insert(Frame().Pid(5).SidAndTid(0, 2).Tl0(1));
   Insert(Frame().Pid(6).SidAndTid(0, 1).Tl0(1));
 
   ASSERT_EQ(12UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(6, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(7, 0, {6}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(25, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(30, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(35, {30}));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofTemporalLayers_01) {
@@ -379,10 +371,10 @@
   Insert(Frame().Pid(3).SidAndTid(0, 1).Tl0(1));
 
   ASSERT_EQ(4UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 0, {2}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(15, {10}));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofTemporalLayersReordered_01) {
@@ -402,16 +394,16 @@
   Insert(Frame().Pid(9).SidAndTid(0, 1).Tl0(4));
 
   ASSERT_EQ(10UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 0, {2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 0, {2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(6, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(7, 0, {6}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(8, 0, {6}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(9, 0, {8}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(15, {10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(20, {10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(25, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(30, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(35, {30}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(40, {30}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(45, {40}));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofTemporalLayers_0212) {
@@ -429,14 +421,14 @@
   Insert(Frame().Pid(7).SidAndTid(0, 2).Tl0(1));
 
   ASSERT_EQ(8UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 0, {2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(6, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(7, 0, {6}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(15, {10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(20, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(25, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(30, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(35, {30}));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofTemporalLayersReordered_0212) {
@@ -458,18 +450,18 @@
   Insert(Frame().Pid(10).SidAndTid(0, 1).Tl0(2));
 
   ASSERT_EQ(12UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 0, {2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(6, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(7, 0, {6}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(8, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(9, 0, {8}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, 0, {8}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(11, 0, {10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(15, {10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(20, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(25, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(30, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(35, {30}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(40, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(45, {40}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(50, {40}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(55, {50}));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofTemporalLayersUpSwitch_02120212) {
@@ -495,22 +487,22 @@
   Insert(Frame().Pid(15).SidAndTid(0, 2).Tl0(3));
 
   ASSERT_EQ(16UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 0, {1, 2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, 0, {3, 4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(6, 0, {2, 4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(7, 0, {6}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(8, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(9, 0, {8}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, 0, {8}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(11, 0, {9, 10}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(12, 0, {8}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(13, 0, {11, 12}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(14, 0, {10, 12}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(15, 0, {13, 14}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(15, {5, 10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(20, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(25, {15, 20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(30, {10, 20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(35, {30}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(40, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(45, {40}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(50, {40}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(55, {45, 50}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(60, {40}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(65, {55, 60}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(70, {50, 60}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(75, {65, 70}));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofTemporalLayersUpSwitchReordered_02120212) {
@@ -536,22 +528,22 @@
   Insert(Frame().Pid(14).SidAndTid(0, 1).Tl0(3));
 
   ASSERT_EQ(16UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 0, {1, 2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, 0, {3, 4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(6, 0, {2, 4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(7, 0, {6}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(8, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(9, 0, {8}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, 0, {8}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(11, 0, {9, 10}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(12, 0, {8}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(13, 0, {11, 12}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(14, 0, {10, 12}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(15, 0, {13, 14}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(15, {5, 10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(20, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(25, {15, 20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(30, {10, 20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(35, {30}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(40, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(45, {40}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(50, {40}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(55, {45, 50}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(60, {40}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(65, {55, 60}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(70, {50, 60}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(75, {65, 70}));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofTemporalLayersReordered_01_0212) {
@@ -574,25 +566,25 @@
   Insert(Frame().Pid(9).SidAndTid(0, 2).Tl0(3));
 
   ASSERT_EQ(12UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 0, {2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(6, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(7, 0, {6}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(8, 0, {4}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(9, 0, {8}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, 0, {8}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(11, 0, {10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(15, {10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(20, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(25, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(30, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(35, {30}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(40, {20}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(45, {40}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(50, {40}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(55, {50}));
 }
 
 TEST_F(RtpVp9RefFinderTest, FlexibleModeOneFrame) {
   Insert(Frame().Pid(0).SidAndTid(0, 0).AsKeyFrame());
 
   ASSERT_EQ(1UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
 }
 
 TEST_F(RtpVp9RefFinderTest, FlexibleModeTwoSpatialLayers) {
@@ -606,14 +598,14 @@
   Insert(Frame().Pid(4).SidAndTid(1, 0).FlexRefs({1}));
 
   ASSERT_EQ(8UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 1, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 1, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 1, {1}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 1, {2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 0, {2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 1, {3}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(6, {1}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(11, {6}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(16, {11}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(20, {10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(21, {16}));
 }
 
 TEST_F(RtpVp9RefFinderTest, FlexibleModeTwoSpatialLayersReordered) {
@@ -627,14 +619,14 @@
   Insert(Frame().Pid(4).SidAndTid(0, 0).FlexRefs({2}));
 
   ASSERT_EQ(8UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 1, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 1, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 0, {0}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(2, 1, {1}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(3, 1, {2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 0, {2}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(4, 1, {3}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(6, {1}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(10, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(11, {6}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(16, {11}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(20, {10}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(21, {16}));
 }
 
 TEST_F(RtpVp9RefFinderTest, WrappingFlexReference) {
@@ -642,7 +634,8 @@
 
   ASSERT_EQ(1UL, frames_.size());
   const EncodedFrame& frame = *frames_[0];
-  ASSERT_EQ(frame.id.picture_id - frame.references[0], 1);
+
+  ASSERT_EQ(frame.id.picture_id - frame.references[0], 5);
 }
 
 TEST_F(RtpVp9RefFinderTest, GofPidJump) {
@@ -680,7 +673,7 @@
   Insert(Frame().Pid(1).SidAndTid(0, 0).Tl0(1));
 
   ASSERT_EQ(1UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
 }
 
 TEST_F(RtpVp9RefFinderTest, GofZeroFrames) {
@@ -692,8 +685,22 @@
   Insert(Frame().Pid(1).SidAndTid(0, 0).Tl0(1));
 
   ASSERT_EQ(2UL, frames_.size());
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, 0, {}));
-  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(1, 0, {0}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(0, {}));
+  EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {0}));
+}
+
+TEST_F(RtpVp9RefFinderTest, SpatialIndex) {
+  Insert(Frame().Pid(0).SidAndTid(0, 0).AsKeyFrame());
+  Insert(Frame().Pid(0).SidAndTid(1, 0).AsKeyFrame());
+  Insert(Frame().Pid(0).SidAndTid(2, 0).AsKeyFrame());
+
+  ASSERT_EQ(3UL, frames_.size());
+  EXPECT_THAT(frames_,
+              Contains(Pointee(Property(&EncodedFrame::SpatialIndex, 0))));
+  EXPECT_THAT(frames_,
+              Contains(Pointee(Property(&EncodedFrame::SpatialIndex, 1))));
+  EXPECT_THAT(frames_,
+              Contains(Pointee(Property(&EncodedFrame::SpatialIndex, 2))));
 }
 
 }  // namespace video_coding