Add support for handling reordered SS data on the receive-side for VP9.

BUG=chromium:500602

Review URL: https://codereview.webrtc.org/1386903002

Cr-Commit-Position: refs/heads/master@{#10383}
diff --git a/webrtc/modules/video_coding/main/source/frame_buffer.cc b/webrtc/modules/video_coding/main/source/frame_buffer.cc
index 192febe..3e4e762 100644
--- a/webrtc/modules/video_coding/main/source/frame_buffer.cc
+++ b/webrtc/modules/video_coding/main/source/frame_buffer.cc
@@ -75,6 +75,15 @@
   return _sessionInfo.NonReference();
 }
 
+void VCMFrameBuffer::SetGofInfo(const GofInfoVP9& gof_info, size_t idx) {
+  _sessionInfo.SetGofInfo(gof_info, idx);
+  // TODO(asapersson): Consider adding hdr->VP9.ref_picture_id for testing.
+  _codecSpecificInfo.codecSpecific.VP9.temporal_idx =
+      gof_info.temporal_idx[idx];
+  _codecSpecificInfo.codecSpecific.VP9.temporal_up_switch =
+      gof_info.temporal_up_switch[idx];
+}
+
 bool
 VCMFrameBuffer::IsSessionComplete() const {
     return _sessionInfo.complete();
diff --git a/webrtc/modules/video_coding/main/source/frame_buffer.h b/webrtc/modules/video_coding/main/source/frame_buffer.h
index d98b024..ab4ff65 100644
--- a/webrtc/modules/video_coding/main/source/frame_buffer.h
+++ b/webrtc/modules/video_coding/main/source/frame_buffer.h
@@ -61,6 +61,8 @@
   int Tl0PicId() const;
   bool NonReference() const;
 
+  void SetGofInfo(const GofInfoVP9& gof_info, size_t idx);
+
   // Increments a counter to keep track of the number of packets of this frame
   // which were NACKed before they arrived.
   void IncrementNackCount();
@@ -73,17 +75,16 @@
   webrtc::FrameType FrameType() const;
   void SetPreviousFrameLoss();
 
-  // The number of packets discarded because the decoder can't make use of
-  // them.
+  // The number of packets discarded because the decoder can't make use of them.
   int NotDecodablePackets() const;
 
  private:
   void SetState(VCMFrameBufferStateEnum state);  // Set state of frame
 
-  VCMFrameBufferStateEnum    _state;         // Current state of the frame
-  VCMSessionInfo             _sessionInfo;
-  uint16_t             _nackCount;
-  int64_t              _latestPacketTimeMs;
+  VCMFrameBufferStateEnum _state;  // Current state of the frame
+  VCMSessionInfo _sessionInfo;
+  uint16_t _nackCount;
+  int64_t _latestPacketTimeMs;
 };
 
 }  // namespace webrtc
diff --git a/webrtc/modules/video_coding/main/source/jitter_buffer.cc b/webrtc/modules/video_coding/main/source/jitter_buffer.cc
index b91cd2e..96c0245 100644
--- a/webrtc/modules/video_coding/main/source/jitter_buffer.cc
+++ b/webrtc/modules/video_coding/main/source/jitter_buffer.cc
@@ -14,7 +14,9 @@
 #include <algorithm>
 #include <utility>
 
+#include "webrtc/base/checks.h"
 #include "webrtc/base/trace_event.h"
+#include "webrtc/modules/rtp_rtcp/interface/rtp_rtcp_defines.h"
 #include "webrtc/modules/video_coding/main/interface/video_coding.h"
 #include "webrtc/modules/video_coding/main/source/frame_buffer.h"
 #include "webrtc/modules/video_coding/main/source/inter_frame_delay.h"
@@ -30,6 +32,9 @@
 
 namespace webrtc {
 
+// Interval for updating SS data.
+static const uint32_t kSsCleanupIntervalSec = 60;
+
 // Use this rtt if no value has been reported.
 static const int64_t kDefaultRtt = 200;
 
@@ -113,6 +118,97 @@
   }
 }
 
+bool Vp9SsMap::Insert(const VCMPacket& packet) {
+  if (!packet.codecSpecificHeader.codecHeader.VP9.ss_data_available)
+    return false;
+
+  ss_map_[packet.timestamp] = packet.codecSpecificHeader.codecHeader.VP9.gof;
+  return true;
+}
+
+void Vp9SsMap::Reset() {
+  ss_map_.clear();
+}
+
+bool Vp9SsMap::Find(uint32_t timestamp, SsMap::iterator* it_out) {
+  bool found = false;
+  for (SsMap::iterator it = ss_map_.begin(); it != ss_map_.end(); ++it) {
+    if (it->first == timestamp || IsNewerTimestamp(timestamp, it->first)) {
+      *it_out = it;
+      found = true;
+    }
+  }
+  return found;
+}
+
+void Vp9SsMap::RemoveOld(uint32_t timestamp) {
+  if (!TimeForCleanup(timestamp))
+    return;
+
+  SsMap::iterator it;
+  if (!Find(timestamp, &it))
+    return;
+
+  ss_map_.erase(ss_map_.begin(), it);
+  AdvanceFront(timestamp);
+}
+
+bool Vp9SsMap::TimeForCleanup(uint32_t timestamp) const {
+  if (ss_map_.empty() || !IsNewerTimestamp(timestamp, ss_map_.begin()->first))
+    return false;
+
+  uint32_t diff = timestamp - ss_map_.begin()->first;
+  return diff / kVideoPayloadTypeFrequency >= kSsCleanupIntervalSec;
+}
+
+void Vp9SsMap::AdvanceFront(uint32_t timestamp) {
+  RTC_DCHECK(!ss_map_.empty());
+  GofInfoVP9 gof = ss_map_.begin()->second;
+  ss_map_.erase(ss_map_.begin());
+  ss_map_[timestamp] = gof;
+}
+
+bool Vp9SsMap::UpdatePacket(VCMPacket* packet) {
+  uint8_t gof_idx = packet->codecSpecificHeader.codecHeader.VP9.gof_idx;
+  if (gof_idx == kNoGofIdx)
+    return false;  // No update needed.
+
+  SsMap::iterator it;
+  if (!Find(packet->timestamp, &it))
+    return false;  // Corresponding SS not yet received.
+
+  if (gof_idx >= it->second.num_frames_in_gof)
+    return false;  // Assume corresponding SS not yet received.
+
+  RTPVideoHeaderVP9* vp9 = &packet->codecSpecificHeader.codecHeader.VP9;
+  vp9->temporal_idx = it->second.temporal_idx[gof_idx];
+  vp9->temporal_up_switch = it->second.temporal_up_switch[gof_idx];
+
+  // TODO(asapersson): Set vp9.ref_picture_id[i] and add usage.
+  vp9->num_ref_pics = it->second.num_ref_pics[gof_idx];
+  for (size_t i = 0; i < it->second.num_ref_pics[gof_idx]; ++i) {
+    vp9->pid_diff[i] = it->second.pid_diff[gof_idx][i];
+  }
+  return true;
+}
+
+void Vp9SsMap::UpdateFrames(FrameList* frames) {
+  for (const auto& frame_it : *frames) {
+    uint8_t gof_idx =
+        frame_it.second->CodecSpecific()->codecSpecific.VP9.gof_idx;
+    if (gof_idx == kNoGofIdx) {
+      continue;
+    }
+    SsMap::iterator ss_it;
+    if (Find(frame_it.second->TimeStamp(), &ss_it)) {
+      if (gof_idx >= ss_it->second.num_frames_in_gof) {
+        continue;  // Assume corresponding SS not yet received.
+      }
+      frame_it.second->SetGofInfo(ss_it->second, gof_idx);
+    }
+  }
+}
+
 VCMJitterBuffer::VCMJitterBuffer(Clock* clock,
                                  rtc::scoped_ptr<EventWrapper> event)
     : clock_(clock),
@@ -125,8 +221,6 @@
       incomplete_frames_(),
       last_decoded_state_(),
       first_packet_since_reset_(true),
-      last_gof_timestamp_(0),
-      last_gof_valid_(false),
       stats_callback_(NULL),
       incoming_frame_rate_(0),
       incoming_frame_count_(0),
@@ -222,7 +316,7 @@
   first_packet_since_reset_ = true;
   rtt_ms_ = kDefaultRtt;
   last_decoded_state_.Reset();
-  last_gof_valid_ = false;
+  vp9_ss_map_.Reset();
 }
 
 void VCMJitterBuffer::Stop() {
@@ -230,7 +324,7 @@
   UpdateHistograms();
   running_ = false;
   last_decoded_state_.Reset();
-  last_gof_valid_ = false;
+  vp9_ss_map_.Reset();
 
   // Make sure all frames are free and reset.
   for (FrameList::iterator it = decodable_frames_.begin();
@@ -262,7 +356,7 @@
   decodable_frames_.Reset(&free_frames_);
   incomplete_frames_.Reset(&free_frames_);
   last_decoded_state_.Reset();  // TODO(mikhal): sync reset.
-  last_gof_valid_ = false;
+  vp9_ss_map_.Reset();
   num_consecutive_old_packets_ = 0;
   // Also reset the jitter and delay estimates
   jitter_estimate_.Reset();
@@ -592,39 +686,22 @@
     return kOldPacket;
   }
 
+  num_consecutive_old_packets_ = 0;
+
   if (packet.codec == kVideoCodecVP9) {
-    // TODO(asapersson): Move this code to appropriate place.
-    // TODO(asapersson): Handle out of order GOF.
     if (packet.codecSpecificHeader.codecHeader.VP9.flexible_mode) {
       // TODO(asapersson): Add support for flexible mode.
       return kGeneralError;
     }
-    if (packet.codecSpecificHeader.codecHeader.VP9.ss_data_available) {
-      if (!last_gof_valid_ ||
-          IsNewerTimestamp(packet.timestamp, last_gof_timestamp_)) {
-        last_gof_.CopyGofInfoVP9(
-            packet.codecSpecificHeader.codecHeader.VP9.gof);
-        last_gof_timestamp_ = packet.timestamp;
-        last_gof_valid_ = true;
-      }
-    }
-    if (last_gof_valid_ &&
-        !packet.codecSpecificHeader.codecHeader.VP9.flexible_mode) {
-      uint8_t gof_idx = packet.codecSpecificHeader.codecHeader.VP9.gof_idx;
-      if (gof_idx != kNoGofIdx) {
-        if (gof_idx >= last_gof_.num_frames_in_gof) {
-          LOG(LS_WARNING) << "Incorrect gof_idx: " << gof_idx;
-          return kGeneralError;
-        }
-        RTPVideoTypeHeader* hdr = const_cast<RTPVideoTypeHeader*>(
-            &packet.codecSpecificHeader.codecHeader);
-        hdr->VP9.temporal_idx = last_gof_.temporal_idx[gof_idx];
-        hdr->VP9.temporal_up_switch = last_gof_.temporal_up_switch[gof_idx];
-      }
-    }
-  }
+    if (!packet.codecSpecificHeader.codecHeader.VP9.flexible_mode) {
+      if (vp9_ss_map_.Insert(packet))
+        vp9_ss_map_.UpdateFrames(&incomplete_frames_);
 
-  num_consecutive_old_packets_ = 0;
+      vp9_ss_map_.UpdatePacket(const_cast<VCMPacket*>(&packet));
+    }
+    if (!last_decoded_state_.in_initial_state())
+      vp9_ss_map_.RemoveOld(last_decoded_state_.time_stamp());
+  }
 
   VCMFrameBuffer* frame;
   FrameList* frame_list;
diff --git a/webrtc/modules/video_coding/main/source/jitter_buffer.h b/webrtc/modules/video_coding/main/source/jitter_buffer.h
index 8a05f1f..31e3be2 100644
--- a/webrtc/modules/video_coding/main/source/jitter_buffer.h
+++ b/webrtc/modules/video_coding/main/source/jitter_buffer.h
@@ -74,6 +74,37 @@
   void Reset(UnorderedFrameList* free_frames);
 };
 
+class Vp9SsMap {
+ public:
+  typedef std::map<uint32_t, GofInfoVP9, TimestampLessThan> SsMap;
+  bool Insert(const VCMPacket& packet);
+  void Reset();
+
+  // Removes SS data that are older than |timestamp|.
+  // The |timestamp| should be an old timestamp, i.e. packets with older
+  // timestamps should no longer be inserted.
+  void RemoveOld(uint32_t timestamp);
+
+  bool UpdatePacket(VCMPacket* packet);
+  void UpdateFrames(FrameList* frames);
+
+  // Public for testing.
+  // Returns an iterator to the corresponding SS data for the input |timestamp|.
+  bool Find(uint32_t timestamp, SsMap::iterator* it);
+
+ private:
+  // These two functions are called by RemoveOld.
+  // Checks if it is time to do a clean up (done each kSsCleanupIntervalSec).
+  bool TimeForCleanup(uint32_t timestamp) const;
+
+  // Advances the oldest SS data to handle timestamp wrap in cases where SS data
+  // are received very seldom (e.g. only once in beginning, second when
+  // IsNewerTimestamp is not true).
+  void AdvanceFront(uint32_t timestamp);
+
+  SsMap ss_map_;
+};
+
 class VCMJitterBuffer {
  public:
   VCMJitterBuffer(Clock* clock, rtc::scoped_ptr<EventWrapper> event);
@@ -307,10 +338,8 @@
   FrameList incomplete_frames_ GUARDED_BY(crit_sect_);
   VCMDecodingState last_decoded_state_ GUARDED_BY(crit_sect_);
   bool first_packet_since_reset_;
-  // Contains last received frame's temporal information for non-flexible mode.
-  GofInfoVP9 last_gof_;
-  uint32_t last_gof_timestamp_;
-  bool last_gof_valid_;
+  // Contains scalability structure data for VP9.
+  Vp9SsMap vp9_ss_map_ GUARDED_BY(crit_sect_);
 
   // Statistics.
   VCMReceiveStatisticsCallback* stats_callback_ GUARDED_BY(crit_sect_);
diff --git a/webrtc/modules/video_coding/main/source/jitter_buffer_unittest.cc b/webrtc/modules/video_coding/main/source/jitter_buffer_unittest.cc
index ab4d8cd..06946a4 100644
--- a/webrtc/modules/video_coding/main/source/jitter_buffer_unittest.cc
+++ b/webrtc/modules/video_coding/main/source/jitter_buffer_unittest.cc
@@ -25,6 +25,166 @@
 
 namespace webrtc {
 
+namespace {
+  const uint32_t kProcessIntervalSec = 60;
+}  // namespace
+
+class Vp9SsMapTest : public ::testing::Test {
+ protected:
+  Vp9SsMapTest()
+      : packet_(data_, 1400, 1234, 1, true) {}
+
+  virtual void SetUp() {
+    packet_.isFirstPacket = true;
+    packet_.markerBit = true;
+    packet_.frameType = kVideoFrameKey;
+    packet_.codec = kVideoCodecVP9;
+    packet_.codecSpecificHeader.codec = kRtpVideoVp9;
+    packet_.codecSpecificHeader.codecHeader.VP9.flexible_mode = false;
+    packet_.codecSpecificHeader.codecHeader.VP9.gof_idx = 0;
+    packet_.codecSpecificHeader.codecHeader.VP9.temporal_idx = kNoTemporalIdx;
+    packet_.codecSpecificHeader.codecHeader.VP9.temporal_up_switch = false;
+    packet_.codecSpecificHeader.codecHeader.VP9.ss_data_available = true;
+    packet_.codecSpecificHeader.codecHeader.VP9.gof.SetGofInfoVP9(
+        kTemporalStructureMode3);  // kTemporalStructureMode3: 0-2-1-2..
+  }
+
+  Vp9SsMap map_;
+  uint8_t data_[1500];
+  VCMPacket packet_;
+};
+
+TEST_F(Vp9SsMapTest, Insert) {
+  EXPECT_TRUE(map_.Insert(packet_));
+}
+
+TEST_F(Vp9SsMapTest, Insert_NoSsData) {
+  packet_.codecSpecificHeader.codecHeader.VP9.ss_data_available = false;
+  EXPECT_FALSE(map_.Insert(packet_));
+}
+
+TEST_F(Vp9SsMapTest, Find) {
+  EXPECT_TRUE(map_.Insert(packet_));
+  Vp9SsMap::SsMap::iterator it;
+  EXPECT_TRUE(map_.Find(packet_.timestamp, &it));
+  EXPECT_EQ(packet_.timestamp, it->first);
+}
+
+TEST_F(Vp9SsMapTest, Find_WithWrap) {
+  const uint32_t kSsTimestamp1 = 0xFFFFFFFF;
+  const uint32_t kSsTimestamp2 = 100;
+  packet_.timestamp = kSsTimestamp1;
+  EXPECT_TRUE(map_.Insert(packet_));
+  packet_.timestamp = kSsTimestamp2;
+  EXPECT_TRUE(map_.Insert(packet_));
+  Vp9SsMap::SsMap::iterator it;
+  EXPECT_FALSE(map_.Find(kSsTimestamp1 - 1, &it));
+  EXPECT_TRUE(map_.Find(kSsTimestamp1, &it));
+  EXPECT_EQ(kSsTimestamp1, it->first);
+  EXPECT_TRUE(map_.Find(0, &it));
+  EXPECT_EQ(kSsTimestamp1, it->first);
+  EXPECT_TRUE(map_.Find(kSsTimestamp2 - 1, &it));
+  EXPECT_EQ(kSsTimestamp1, it->first);
+  EXPECT_TRUE(map_.Find(kSsTimestamp2, &it));
+  EXPECT_EQ(kSsTimestamp2, it->first);
+  EXPECT_TRUE(map_.Find(kSsTimestamp2 + 1, &it));
+  EXPECT_EQ(kSsTimestamp2, it->first);
+}
+
+TEST_F(Vp9SsMapTest, Reset) {
+  EXPECT_TRUE(map_.Insert(packet_));
+  Vp9SsMap::SsMap::iterator it;
+  EXPECT_TRUE(map_.Find(packet_.timestamp, &it));
+  EXPECT_EQ(packet_.timestamp, it->first);
+
+  map_.Reset();
+  EXPECT_FALSE(map_.Find(packet_.timestamp, &it));
+}
+
+TEST_F(Vp9SsMapTest, RemoveOld) {
+  Vp9SsMap::SsMap::iterator it;
+  const uint32_t kSsTimestamp1 = 10000;
+  packet_.timestamp = kSsTimestamp1;
+  EXPECT_TRUE(map_.Insert(packet_));
+
+  const uint32_t kTimestamp = kSsTimestamp1 + kProcessIntervalSec * 90000;
+  map_.RemoveOld(kTimestamp - 1);              // Interval not passed.
+  EXPECT_TRUE(map_.Find(kSsTimestamp1, &it));  // Should not been removed.
+
+  map_.RemoveOld(kTimestamp);
+  EXPECT_FALSE(map_.Find(kSsTimestamp1, &it));
+  EXPECT_TRUE(map_.Find(kTimestamp, &it));
+  EXPECT_EQ(kTimestamp, it->first);
+}
+
+TEST_F(Vp9SsMapTest, RemoveOld_WithWrap) {
+  Vp9SsMap::SsMap::iterator it;
+  const uint32_t kSsTimestamp1 = 0xFFFFFFFF - kProcessIntervalSec * 90000;
+  const uint32_t kSsTimestamp2 = 10;
+  const uint32_t kSsTimestamp3 = 1000;
+  packet_.timestamp = kSsTimestamp1;
+  EXPECT_TRUE(map_.Insert(packet_));
+  packet_.timestamp = kSsTimestamp2;
+  EXPECT_TRUE(map_.Insert(packet_));
+  packet_.timestamp = kSsTimestamp3;
+  EXPECT_TRUE(map_.Insert(packet_));
+
+  map_.RemoveOld(kSsTimestamp3);
+  EXPECT_FALSE(map_.Find(kSsTimestamp1, &it));
+  EXPECT_FALSE(map_.Find(kSsTimestamp2, &it));
+  EXPECT_TRUE(map_.Find(kSsTimestamp3, &it));
+}
+
+TEST_F(Vp9SsMapTest, UpdatePacket_NoSsData) {
+  packet_.codecSpecificHeader.codecHeader.VP9.gof_idx = 0;
+  EXPECT_FALSE(map_.UpdatePacket(&packet_));
+}
+
+TEST_F(Vp9SsMapTest, UpdatePacket_NoGofIdx) {
+  EXPECT_TRUE(map_.Insert(packet_));
+  packet_.codecSpecificHeader.codecHeader.VP9.gof_idx = kNoGofIdx;
+  EXPECT_FALSE(map_.UpdatePacket(&packet_));
+}
+
+TEST_F(Vp9SsMapTest, UpdatePacket_InvalidGofIdx) {
+  EXPECT_TRUE(map_.Insert(packet_));
+  packet_.codecSpecificHeader.codecHeader.VP9.gof_idx = 4;
+  EXPECT_FALSE(map_.UpdatePacket(&packet_));
+}
+
+TEST_F(Vp9SsMapTest, UpdatePacket) {
+  EXPECT_TRUE(map_.Insert(packet_));  // kTemporalStructureMode3: 0-2-1-2..
+
+  packet_.codecSpecificHeader.codecHeader.VP9.gof_idx = 0;
+  EXPECT_TRUE(map_.UpdatePacket(&packet_));
+  EXPECT_EQ(0, packet_.codecSpecificHeader.codecHeader.VP9.temporal_idx);
+  EXPECT_FALSE(packet_.codecSpecificHeader.codecHeader.VP9.temporal_up_switch);
+  EXPECT_EQ(1U, packet_.codecSpecificHeader.codecHeader.VP9.num_ref_pics);
+  EXPECT_EQ(4, packet_.codecSpecificHeader.codecHeader.VP9.pid_diff[0]);
+
+  packet_.codecSpecificHeader.codecHeader.VP9.gof_idx = 1;
+  EXPECT_TRUE(map_.UpdatePacket(&packet_));
+  EXPECT_EQ(2, packet_.codecSpecificHeader.codecHeader.VP9.temporal_idx);
+  EXPECT_TRUE(packet_.codecSpecificHeader.codecHeader.VP9.temporal_up_switch);
+  EXPECT_EQ(1U, packet_.codecSpecificHeader.codecHeader.VP9.num_ref_pics);
+  EXPECT_EQ(1, packet_.codecSpecificHeader.codecHeader.VP9.pid_diff[0]);
+
+  packet_.codecSpecificHeader.codecHeader.VP9.gof_idx = 2;
+  EXPECT_TRUE(map_.UpdatePacket(&packet_));
+  EXPECT_EQ(1, packet_.codecSpecificHeader.codecHeader.VP9.temporal_idx);
+  EXPECT_TRUE(packet_.codecSpecificHeader.codecHeader.VP9.temporal_up_switch);
+  EXPECT_EQ(1U, packet_.codecSpecificHeader.codecHeader.VP9.num_ref_pics);
+  EXPECT_EQ(2, packet_.codecSpecificHeader.codecHeader.VP9.pid_diff[0]);
+
+  packet_.codecSpecificHeader.codecHeader.VP9.gof_idx = 3;
+  EXPECT_TRUE(map_.UpdatePacket(&packet_));
+  EXPECT_EQ(2, packet_.codecSpecificHeader.codecHeader.VP9.temporal_idx);
+  EXPECT_FALSE(packet_.codecSpecificHeader.codecHeader.VP9.temporal_up_switch);
+  EXPECT_EQ(2U, packet_.codecSpecificHeader.codecHeader.VP9.num_ref_pics);
+  EXPECT_EQ(1, packet_.codecSpecificHeader.codecHeader.VP9.pid_diff[0]);
+  EXPECT_EQ(2, packet_.codecSpecificHeader.codecHeader.VP9.pid_diff[1]);
+}
+
 class TestBasicJitterBuffer : public ::testing::Test {
  protected:
   virtual void SetUp() {
@@ -706,6 +866,215 @@
   }
 }
 
+TEST_F(TestBasicJitterBuffer, TestSkipForwardVp9) {
+  // Verify that JB skips forward to next base layer frame.
+  //  -------------------------------------------------
+  // | 65485 | 65486 | 65487 | 65488 | 65489 | ...
+  // | pid:5 | pid:6 | pid:7 | pid:8 | pid:9 | ...
+  // | tid:0 | tid:2 | tid:1 | tid:2 | tid:0 | ...
+  // |  ss   |   x   |   x   |   x   |       |
+  //  -------------------------------------------------
+  // |<----------tl0idx:200--------->|<---tl0idx:201---
+
+  bool re = false;
+  packet_->codec = kVideoCodecVP9;
+  packet_->codecSpecificHeader.codec = kRtpVideoVp9;
+  packet_->isFirstPacket = true;
+  packet_->markerBit = true;
+  packet_->codecSpecificHeader.codecHeader.VP9.flexible_mode = false;
+  packet_->codecSpecificHeader.codecHeader.VP9.spatial_idx = 0;
+  packet_->codecSpecificHeader.codecHeader.VP9.beginning_of_frame = true;
+  packet_->codecSpecificHeader.codecHeader.VP9.end_of_frame = true;
+  packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = kNoTemporalIdx;
+  packet_->codecSpecificHeader.codecHeader.VP9.temporal_up_switch = false;
+
+  packet_->seqNum = 65485;
+  packet_->timestamp = 1000;
+  packet_->frameType = kVideoFrameKey;
+  packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 5;
+  packet_->codecSpecificHeader.codecHeader.VP9.tl0_pic_idx = 200;
+  packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 0;
+  packet_->codecSpecificHeader.codecHeader.VP9.ss_data_available = true;
+  packet_->codecSpecificHeader.codecHeader.VP9.gof.SetGofInfoVP9(
+      kTemporalStructureMode3);  // kTemporalStructureMode3: 0-2-1-2..
+  EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
+
+  // Insert next temporal layer 0.
+  packet_->seqNum = 65489;
+  packet_->timestamp = 13000;
+  packet_->frameType = kVideoFrameDelta;
+  packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 9;
+  packet_->codecSpecificHeader.codecHeader.VP9.tl0_pic_idx = 201;
+  packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 0;
+  packet_->codecSpecificHeader.codecHeader.VP9.ss_data_available = false;
+  EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
+
+  VCMEncodedFrame* frame_out = DecodeCompleteFrame();
+  EXPECT_EQ(1000U, frame_out->TimeStamp());
+  EXPECT_EQ(kVideoFrameKey, frame_out->FrameType());
+  jitter_buffer_->ReleaseFrame(frame_out);
+
+  frame_out = DecodeCompleteFrame();
+  EXPECT_EQ(13000U, frame_out->TimeStamp());
+  EXPECT_EQ(kVideoFrameDelta, frame_out->FrameType());
+  jitter_buffer_->ReleaseFrame(frame_out);
+}
+
+TEST_F(TestBasicJitterBuffer, ReorderedVp9SsData_3TlLayers) {
+  // Verify that frames are updated with SS data when SS packet is reordered.
+  //  --------------------------------
+  // | 65486 | 65487 | 65485 |...
+  // | pid:6 | pid:7 | pid:5 |...
+  // | tid:2 | tid:1 | tid:0 |...
+  // |       |       |  ss   |
+  //  --------------------------------
+  // |<--------tl0idx:200--------->|
+
+  bool re = false;
+  packet_->codec = kVideoCodecVP9;
+  packet_->codecSpecificHeader.codec = kRtpVideoVp9;
+  packet_->isFirstPacket = true;
+  packet_->markerBit = true;
+  packet_->codecSpecificHeader.codecHeader.VP9.flexible_mode = false;
+  packet_->codecSpecificHeader.codecHeader.VP9.spatial_idx = 0;
+  packet_->codecSpecificHeader.codecHeader.VP9.beginning_of_frame = true;
+  packet_->codecSpecificHeader.codecHeader.VP9.end_of_frame = true;
+  packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = kNoTemporalIdx;
+  packet_->codecSpecificHeader.codecHeader.VP9.temporal_up_switch = false;
+  packet_->codecSpecificHeader.codecHeader.VP9.tl0_pic_idx = 200;
+
+  packet_->seqNum = 65486;
+  packet_->timestamp = 6000;
+  packet_->frameType = kVideoFrameDelta;
+  packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 6;
+  packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 1;
+  EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
+
+  packet_->seqNum = 65487;
+  packet_->timestamp = 9000;
+  packet_->frameType = kVideoFrameDelta;
+  packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 7;
+  packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 2;
+  EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
+
+  // Insert first frame with SS data.
+  packet_->seqNum = 65485;
+  packet_->timestamp = 3000;
+  packet_->frameType = kVideoFrameKey;
+  packet_->width = 352;
+  packet_->height = 288;
+  packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 5;
+  packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 0;
+  packet_->codecSpecificHeader.codecHeader.VP9.ss_data_available = true;
+  packet_->codecSpecificHeader.codecHeader.VP9.gof.SetGofInfoVP9(
+      kTemporalStructureMode3);  // kTemporalStructureMode3: 0-2-1-2..
+  EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
+
+  VCMEncodedFrame* frame_out = DecodeCompleteFrame();
+  EXPECT_EQ(3000U, frame_out->TimeStamp());
+  EXPECT_EQ(kVideoFrameKey, frame_out->FrameType());
+  EXPECT_EQ(0, frame_out->CodecSpecific()->codecSpecific.VP9.temporal_idx);
+  EXPECT_FALSE(
+      frame_out->CodecSpecific()->codecSpecific.VP9.temporal_up_switch);
+  jitter_buffer_->ReleaseFrame(frame_out);
+
+  frame_out = DecodeCompleteFrame();
+  EXPECT_EQ(6000U, frame_out->TimeStamp());
+  EXPECT_EQ(kVideoFrameDelta, frame_out->FrameType());
+  EXPECT_EQ(2, frame_out->CodecSpecific()->codecSpecific.VP9.temporal_idx);
+  EXPECT_TRUE(frame_out->CodecSpecific()->codecSpecific.VP9.temporal_up_switch);
+  jitter_buffer_->ReleaseFrame(frame_out);
+
+  frame_out = DecodeCompleteFrame();
+  EXPECT_EQ(9000U, frame_out->TimeStamp());
+  EXPECT_EQ(kVideoFrameDelta, frame_out->FrameType());
+  EXPECT_EQ(1, frame_out->CodecSpecific()->codecSpecific.VP9.temporal_idx);
+  EXPECT_TRUE(frame_out->CodecSpecific()->codecSpecific.VP9.temporal_up_switch);
+  jitter_buffer_->ReleaseFrame(frame_out);
+}
+
+TEST_F(TestBasicJitterBuffer, ReorderedVp9SsData_2Tl2SLayers) {
+  // Verify that frames are updated with SS data when SS packet is reordered.
+  //  -----------------------------------------
+  // | 65486  | 65487  | 65485  | 65484  |...
+  // | pid:6  | pid:6  | pid:5  | pid:5  |...
+  // | tid:1  | tid:1  | tid:0  | tid:0  |...
+  // | sid:0  | sid:1  | sid:1  | sid:0  |...
+  // | t:6000 | t:6000 | t:3000 | t:3000 |
+  // |        |        |        |  ss    |
+  //  -----------------------------------------
+  // |<-----------tl0idx:200------------>|
+
+  bool re = false;
+  packet_->codec = kVideoCodecVP9;
+  packet_->codecSpecificHeader.codec = kRtpVideoVp9;
+  packet_->codecSpecificHeader.codecHeader.VP9.flexible_mode = false;
+  packet_->codecSpecificHeader.codecHeader.VP9.beginning_of_frame = true;
+  packet_->codecSpecificHeader.codecHeader.VP9.end_of_frame = true;
+  packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = kNoTemporalIdx;
+  packet_->codecSpecificHeader.codecHeader.VP9.temporal_up_switch = false;
+  packet_->codecSpecificHeader.codecHeader.VP9.tl0_pic_idx = 200;
+
+  packet_->isFirstPacket = true;
+  packet_->markerBit = false;
+  packet_->seqNum = 65486;
+  packet_->timestamp = 6000;
+  packet_->frameType = kVideoFrameDelta;
+  packet_->codecSpecificHeader.codecHeader.VP9.spatial_idx = 0;
+  packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 6;
+  packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 1;
+  EXPECT_EQ(kIncomplete, jitter_buffer_->InsertPacket(*packet_, &re));
+
+  packet_->isFirstPacket = false;
+  packet_->markerBit = true;
+  packet_->seqNum = 65487;
+  packet_->frameType = kVideoFrameDelta;
+  packet_->codecSpecificHeader.codecHeader.VP9.spatial_idx = 1;
+  packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 6;
+  packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 1;
+  EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
+
+  packet_->isFirstPacket = false;
+  packet_->markerBit = true;
+  packet_->seqNum = 65485;
+  packet_->timestamp = 3000;
+  packet_->frameType = kVideoFrameKey;
+  packet_->codecSpecificHeader.codecHeader.VP9.spatial_idx = 1;
+  packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 5;
+  packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 0;
+  EXPECT_EQ(kIncomplete, jitter_buffer_->InsertPacket(*packet_, &re));
+
+  // Insert first frame with SS data.
+  packet_->isFirstPacket = true;
+  packet_->markerBit = false;
+  packet_->seqNum = 65484;
+  packet_->frameType = kVideoFrameKey;
+  packet_->width = 352;
+  packet_->height = 288;
+  packet_->codecSpecificHeader.codecHeader.VP9.spatial_idx = 0;
+  packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 5;
+  packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 0;
+  packet_->codecSpecificHeader.codecHeader.VP9.ss_data_available = true;
+  packet_->codecSpecificHeader.codecHeader.VP9.gof.SetGofInfoVP9(
+      kTemporalStructureMode2);  // kTemporalStructureMode3: 0-1-0-1..
+  EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
+
+  VCMEncodedFrame* frame_out = DecodeCompleteFrame();
+  EXPECT_EQ(3000U, frame_out->TimeStamp());
+  EXPECT_EQ(kVideoFrameKey, frame_out->FrameType());
+  EXPECT_EQ(0, frame_out->CodecSpecific()->codecSpecific.VP9.temporal_idx);
+  EXPECT_FALSE(
+      frame_out->CodecSpecific()->codecSpecific.VP9.temporal_up_switch);
+  jitter_buffer_->ReleaseFrame(frame_out);
+
+  frame_out = DecodeCompleteFrame();
+  EXPECT_EQ(6000U, frame_out->TimeStamp());
+  EXPECT_EQ(kVideoFrameDelta, frame_out->FrameType());
+  EXPECT_EQ(1, frame_out->CodecSpecific()->codecSpecific.VP9.temporal_idx);
+  EXPECT_TRUE(frame_out->CodecSpecific()->codecSpecific.VP9.temporal_up_switch);
+  jitter_buffer_->ReleaseFrame(frame_out);
+}
+
 TEST_F(TestBasicJitterBuffer, H264InsertStartCode) {
   packet_->frameType = kVideoFrameKey;
   packet_->isFirstPacket = true;
diff --git a/webrtc/modules/video_coding/main/source/session_info.cc b/webrtc/modules/video_coding/main/source/session_info.cc
index 7a32504..88fe69c 100644
--- a/webrtc/modules/video_coding/main/source/session_info.cc
+++ b/webrtc/modules/video_coding/main/source/session_info.cc
@@ -114,6 +114,24 @@
   return packets_.front().codecSpecificHeader.codecHeader.VP8.nonReference;
 }
 
+void VCMSessionInfo::SetGofInfo(const GofInfoVP9& gof_info, size_t idx) {
+  if (packets_.empty() ||
+      packets_.front().codecSpecificHeader.codec != kRtpVideoVp9 ||
+      packets_.front().codecSpecificHeader.codecHeader.VP9.flexible_mode) {
+    return;
+  }
+  packets_.front().codecSpecificHeader.codecHeader.VP9.temporal_idx =
+      gof_info.temporal_idx[idx];
+  packets_.front().codecSpecificHeader.codecHeader.VP9.temporal_up_switch =
+      gof_info.temporal_up_switch[idx];
+  packets_.front().codecSpecificHeader.codecHeader.VP9.num_ref_pics =
+      gof_info.num_ref_pics[idx];
+  for (size_t i = 0; i < gof_info.num_ref_pics[idx]; ++i) {
+    packets_.front().codecSpecificHeader.codecHeader.VP9.pid_diff[i] =
+        gof_info.pid_diff[idx][i];
+  }
+}
+
 void VCMSessionInfo::Reset() {
   session_nack_ = false;
   complete_ = false;
diff --git a/webrtc/modules/video_coding/main/source/session_info.h b/webrtc/modules/video_coding/main/source/session_info.h
index 21f6c43..88071e1 100644
--- a/webrtc/modules/video_coding/main/source/session_info.h
+++ b/webrtc/modules/video_coding/main/source/session_info.h
@@ -88,6 +88,8 @@
   int Tl0PicId() const;
   bool NonReference() const;
 
+  void SetGofInfo(const GofInfoVP9& gof_info, size_t idx);
+
   // The number of packets discarded because the decoder can't make use of
   // them.
   int packets_not_decodable() const;