Reland of Work on flexible mode and screen sharing. (patchset #1 id:1 of https://codereview.webrtc.org/1438543002/ )

Reason for revert:
Failed test not related to this CL (test fails on
master at an earlier date), re-landing original CL..

(This time from my @webrtc account.)

Original issue's description:
> Revert of Work on flexible mode and screen sharing. (patchset #28 id:520001 of https://codereview.webrtc.org/1328113004/ )
>
> Reason for revert:
> Seems to break VideoSendStreamTest.ReconfigureBitratesSetsEncoderBitratesCorrectly on Linux Memcheck buildbot.
>
> Original issue's description:
> > Work on flexible mode and screen sharing.
> >
> > Implement VP8 style screensharing but with spatial layers.
> > Implement flexible mode.
> >
> > Files from other patches:
> > generic_encoder.cc
> > layer_filtering_transport.cc
> >
> > BUG=webrtc:4914
> >
> > Committed: https://crrev.com/77ccfb4d16c148e61a316746bb5d9705e8b39f4a
> > Cr-Commit-Position: refs/heads/master@{#10572}
>
> TBR=sprang@webrtc.org,stefan@webrtc.org,philipel@google.com,asapersson@webrtc.org,mflodman@webrtc.org,philipel@webrtc.org
> NOPRESUBMIT=true
> NOTREECHECKS=true
> NOTRY=true
> BUG=webrtc:4914
>
> Committed: https://crrev.com/0be8f1d347bdb171462df89c2a4c69b3f3eb7519
> Cr-Commit-Position: refs/heads/master@{#10578}

TBR=sprang@webrtc.org,stefan@webrtc.org,philipel@google.com,asapersson@webrtc.org,mflodman@webrtc.org,terelius@webrtc.org
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=webrtc:4914

Review URL: https://codereview.webrtc.org/1431283002

Cr-Original-Commit-Position: refs/heads/master@{#10581}
Cr-Mirrored-From: https://chromium.googlesource.com/external/webrtc
Cr-Mirrored-Commit: cfc319be1d6afec77bd41eeb70d3e7886dd524db
diff --git a/modules/include/module_common_types.h b/modules/include/module_common_types.h
index ea43e4f..3a63af0 100644
--- a/modules/include/module_common_types.h
+++ b/modules/include/module_common_types.h
@@ -38,6 +38,7 @@
 const uint8_t kNoTemporalIdx = 0xFF;
 const uint8_t kNoSpatialIdx = 0xFF;
 const uint8_t kNoGofIdx = 0xFF;
+const uint8_t kNumVp9Buffers = 8;
 const size_t kMaxVp9RefPics = 3;
 const size_t kMaxVp9FramesInGof = 0xFF;  // 8 bits
 const size_t kMaxVp9NumberOfSpatialLayers = 8;
diff --git a/modules/modules.gyp b/modules/modules.gyp
index a814376..a32271a 100644
--- a/modules/modules.gyp
+++ b/modules/modules.gyp
@@ -280,6 +280,7 @@
             'video_coding/codecs/vp8/simulcast_encoder_adapter_unittest.cc',
             'video_coding/codecs/vp8/simulcast_unittest.cc',
             'video_coding/codecs/vp8/simulcast_unittest.h',
+            'video_coding/codecs/vp9/screenshare_layers_unittest.cc',
             'video_coding/main/interface/mock/mock_vcm_callbacks.h',
             'video_coding/main/source/decoding_state_unittest.cc',
             'video_coding/main/source/jitter_buffer_unittest.cc',
diff --git a/modules/rtp_rtcp/source/rtp_format_vp9.cc b/modules/rtp_rtcp/source/rtp_format_vp9.cc
index c9d0f32..d2f22d5 100644
--- a/modules/rtp_rtcp/source/rtp_format_vp9.cc
+++ b/modules/rtp_rtcp/source/rtp_format_vp9.cc
@@ -725,7 +725,8 @@
       parsed_payload->type.Video.height = vp9->height[0];
     }
   }
-  parsed_payload->type.Video.isFirstPacket = b_bit && (vp9->spatial_idx == 0);
+  parsed_payload->type.Video.isFirstPacket =
+      b_bit && (!l_bit || !vp9->inter_layer_predicted);
 
   uint64_t rem_bits = parser.RemainingBitCount();
   assert(rem_bits % 8 == 0);
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index 9e8cd47..9a4a528 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -212,6 +212,8 @@
   if (rtc_build_vp9) {
     sources = [
       "codecs/vp9/include/vp9.h",
+      "codecs/vp9/screenshare_layers.cc",
+      "codecs/vp9/screenshare_layers.h",
       "codecs/vp9/vp9_frame_buffer_pool.cc",
       "codecs/vp9/vp9_frame_buffer_pool.h",
       "codecs/vp9/vp9_impl.cc",
diff --git a/modules/video_coding/codecs/interface/video_codec_interface.h b/modules/video_coding/codecs/interface/video_codec_interface.h
index 5115c4b..1e7de1f 100644
--- a/modules/video_coding/codecs/interface/video_codec_interface.h
+++ b/modules/video_coding/codecs/interface/video_codec_interface.h
@@ -68,6 +68,10 @@
   uint16_t width[kMaxVp9NumberOfSpatialLayers];
   uint16_t height[kMaxVp9NumberOfSpatialLayers];
   GofInfoVP9 gof;
+
+  // Frame reference data.
+  uint8_t num_ref_pics;
+  uint8_t p_diff[kMaxVp9RefPics];
 };
 
 struct CodecSpecificInfoGeneric {
diff --git a/modules/video_coding/codecs/vp9/screenshare_layers.cc b/modules/video_coding/codecs/vp9/screenshare_layers.cc
new file mode 100644
index 0000000..c7ed78a
--- /dev/null
+++ b/modules/video_coding/codecs/vp9/screenshare_layers.cc
@@ -0,0 +1,93 @@
+/* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+*
+*  Use of this source code is governed by a BSD-style license
+*  that can be found in the LICENSE file in the root of the source
+*  tree. An additional intellectual property rights grant can be found
+*  in the file PATENTS.  All contributing project authors may
+*  be found in the AUTHORS file in the root of the source tree.
+*/
+
+#include <algorithm>
+#include "webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h"
+#include "webrtc/base/checks.h"
+
+namespace webrtc {
+
+ScreenshareLayersVP9::ScreenshareLayersVP9(uint8_t num_layers)
+    : num_layers_(num_layers),
+      start_layer_(0),
+      last_timestamp_(0),
+      timestamp_initialized_(false) {
+  RTC_DCHECK_GT(num_layers, 0);
+  RTC_DCHECK_LE(num_layers, kMaxVp9NumberOfSpatialLayers);
+  memset(bits_used_, 0, sizeof(bits_used_));
+  memset(threshold_kbps_, 0, sizeof(threshold_kbps_));
+}
+
+uint8_t ScreenshareLayersVP9::GetStartLayer() const {
+  return start_layer_;
+}
+
+void ScreenshareLayersVP9::ConfigureBitrate(int threshold_kbps,
+                                            uint8_t layer_id) {
+  // The upper layer is always the layer we spill frames
+  // to when the bitrate becomes to high, therefore setting
+  // a max limit is not allowed. The top layer bitrate is
+  // never used either so configuring it makes no difference.
+  RTC_DCHECK_LT(layer_id, num_layers_ - 1);
+  threshold_kbps_[layer_id] = threshold_kbps;
+}
+
+void ScreenshareLayersVP9::LayerFrameEncoded(unsigned int size_bytes,
+                                             uint8_t layer_id) {
+  RTC_DCHECK_LT(layer_id, num_layers_);
+  bits_used_[layer_id] += size_bytes * 8;
+}
+
+VP9EncoderImpl::SuperFrameRefSettings
+ScreenshareLayersVP9::GetSuperFrameSettings(uint32_t timestamp,
+                                            bool is_keyframe) {
+  VP9EncoderImpl::SuperFrameRefSettings settings;
+  if (!timestamp_initialized_) {
+    last_timestamp_ = timestamp;
+    timestamp_initialized_ = true;
+  }
+  float time_diff = (timestamp - last_timestamp_) / 90.f;
+  float total_bits_used = 0;
+  float total_threshold_kbps = 0;
+  start_layer_ = 0;
+
+  // Up to (num_layers - 1) because we only have
+  // (num_layers - 1) thresholds to check.
+  for (int layer_id = 0; layer_id < num_layers_ - 1; ++layer_id) {
+    bits_used_[layer_id] = std::max(
+        0.f, bits_used_[layer_id] - time_diff * threshold_kbps_[layer_id]);
+    total_bits_used += bits_used_[layer_id];
+    total_threshold_kbps += threshold_kbps_[layer_id];
+
+    // If this is a keyframe then there should be no
+    // references to any previous frames.
+    if (!is_keyframe) {
+      settings.layer[layer_id].ref_buf1 = layer_id;
+      if (total_bits_used > total_threshold_kbps * 1000)
+        start_layer_ = layer_id + 1;
+    }
+
+    settings.layer[layer_id].upd_buf = layer_id;
+  }
+  // Since the above loop does not iterate over the last layer
+  // the reference of the last layer has to be set after the loop,
+  // and if this is a keyframe there should be no references to
+  // any previous frames.
+  if (!is_keyframe)
+    settings.layer[num_layers_ - 1].ref_buf1 = num_layers_ - 1;
+
+  settings.layer[num_layers_ - 1].upd_buf = num_layers_ - 1;
+  settings.is_keyframe = is_keyframe;
+  settings.start_layer = start_layer_;
+  settings.stop_layer = num_layers_ - 1;
+  last_timestamp_ = timestamp;
+  return settings;
+}
+
+}  // namespace webrtc
diff --git a/modules/video_coding/codecs/vp9/screenshare_layers.h b/modules/video_coding/codecs/vp9/screenshare_layers.h
new file mode 100644
index 0000000..5a901ae
--- /dev/null
+++ b/modules/video_coding/codecs/vp9/screenshare_layers.h
@@ -0,0 +1,66 @@
+/* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+*
+*  Use of this source code is governed by a BSD-style license
+*  that can be found in the LICENSE file in the root of the source
+*  tree. An additional intellectual property rights grant can be found
+*  in the file PATENTS.  All contributing project authors may
+*  be found in the AUTHORS file in the root of the source tree.
+*/
+
+#ifndef WEBRTC_MODULES_VIDEO_CODING_CODECS_VP9_SCREENSHARE_LAYERS_H_
+#define WEBRTC_MODULES_VIDEO_CODING_CODECS_VP9_SCREENSHARE_LAYERS_H_
+
+#include "webrtc/modules/video_coding/codecs/vp9/vp9_impl.h"
+
+namespace webrtc {
+
+class ScreenshareLayersVP9 {
+ public:
+  explicit ScreenshareLayersVP9(uint8_t num_layers);
+
+  // The target bitrate for layer with id layer_id.
+  void ConfigureBitrate(int threshold_kbps, uint8_t layer_id);
+
+  // The current start layer.
+  uint8_t GetStartLayer() const;
+
+  // Update the layer with the size of the layer frame.
+  void LayerFrameEncoded(unsigned int size_bytes, uint8_t layer_id);
+
+  // Get the layer settings for the next superframe.
+  //
+  // In short, each time the GetSuperFrameSettings is called the
+  // bitrate of every layer is calculated and if the cummulative
+  // bitrate exceeds the configured cummulative bitrates
+  // (ConfigureBitrate to configure) up to and including that
+  // layer then the resulting encoding settings for the
+  // superframe will only encode layers above that layer.
+  VP9EncoderImpl::SuperFrameRefSettings GetSuperFrameSettings(
+      uint32_t timestamp,
+      bool is_keyframe);
+
+ private:
+  // How many layers that are used.
+  uint8_t num_layers_;
+
+  // The index of the first layer to encode.
+  uint8_t start_layer_;
+
+  // Cummulative target kbps for the different layers.
+  float threshold_kbps_[kMaxVp9NumberOfSpatialLayers - 1];
+
+  // How many bits that has been used for a certain layer. Increased in
+  // FrameEncoded() by the size of the encoded frame and decreased in
+  // GetSuperFrameSettings() depending on the time between frames.
+  float bits_used_[kMaxVp9NumberOfSpatialLayers];
+
+  // Timestamp of last frame.
+  uint32_t last_timestamp_;
+
+  // If the last_timestamp_ has been set.
+  bool timestamp_initialized_;
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_VIDEO_CODING_CODECS_VP9_SCREENSHARE_LAYERS_H_
diff --git a/modules/video_coding/codecs/vp9/screenshare_layers_unittest.cc b/modules/video_coding/codecs/vp9/screenshare_layers_unittest.cc
new file mode 100644
index 0000000..5eb7b23
--- /dev/null
+++ b/modules/video_coding/codecs/vp9/screenshare_layers_unittest.cc
@@ -0,0 +1,323 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "vpx/vp8cx.h"
+#include "webrtc/base/logging.h"
+#include "webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h"
+#include "webrtc/modules/video_coding/codecs/vp9/vp9_impl.h"
+#include "webrtc/system_wrappers/include/clock.h"
+
+namespace webrtc {
+
+typedef VP9EncoderImpl::SuperFrameRefSettings Settings;
+
+const uint32_t kTickFrequency = 90000;
+
+class ScreenshareLayerTestVP9 : public ::testing::Test {
+ protected:
+  ScreenshareLayerTestVP9() : clock_(0) {}
+  virtual ~ScreenshareLayerTestVP9() {}
+
+  void InitScreenshareLayers(int layers) {
+    layers_.reset(new ScreenshareLayersVP9(layers));
+  }
+
+  void ConfigureBitrateForLayer(int kbps, uint8_t layer_id) {
+    layers_->ConfigureBitrate(kbps, layer_id);
+  }
+
+  void AdvanceTime(int64_t milliseconds) {
+    clock_.AdvanceTimeMilliseconds(milliseconds);
+  }
+
+  void AddKilobitsToLayer(int kilobits, uint8_t layer_id) {
+    layers_->LayerFrameEncoded(kilobits * 1000 / 8, layer_id);
+  }
+
+  void EqualRefsForLayer(const Settings& actual, uint8_t layer_id) {
+    EXPECT_EQ(expected_.layer[layer_id].upd_buf,
+              actual.layer[layer_id].upd_buf);
+    EXPECT_EQ(expected_.layer[layer_id].ref_buf1,
+              actual.layer[layer_id].ref_buf1);
+    EXPECT_EQ(expected_.layer[layer_id].ref_buf2,
+              actual.layer[layer_id].ref_buf2);
+    EXPECT_EQ(expected_.layer[layer_id].ref_buf3,
+              actual.layer[layer_id].ref_buf3);
+  }
+
+  void EqualRefs(const Settings& actual) {
+    for (unsigned int layer_id = 0; layer_id < kMaxVp9NumberOfSpatialLayers;
+         ++layer_id) {
+      EqualRefsForLayer(actual, layer_id);
+    }
+  }
+
+  void EqualStartStopKeyframe(const Settings& actual) {
+    EXPECT_EQ(expected_.start_layer, actual.start_layer);
+    EXPECT_EQ(expected_.stop_layer, actual.stop_layer);
+    EXPECT_EQ(expected_.is_keyframe, actual.is_keyframe);
+  }
+
+  // Check that the settings returned by GetSuperFrameSettings() is
+  // equal to the expected_ settings.
+  void EqualToExpected() {
+    uint32_t frame_timestamp_ =
+        clock_.TimeInMilliseconds() * (kTickFrequency / 1000);
+    Settings actual =
+        layers_->GetSuperFrameSettings(frame_timestamp_, expected_.is_keyframe);
+    EqualRefs(actual);
+    EqualStartStopKeyframe(actual);
+  }
+
+  Settings expected_;
+  SimulatedClock clock_;
+  rtc::scoped_ptr<ScreenshareLayersVP9> layers_;
+};
+
+TEST_F(ScreenshareLayerTestVP9, NoRefsOnKeyFrame) {
+  const int kNumLayers = kMaxVp9NumberOfSpatialLayers;
+  InitScreenshareLayers(kNumLayers);
+  expected_.start_layer = 0;
+  expected_.stop_layer = kNumLayers - 1;
+
+  for (int l = 0; l < kNumLayers; ++l) {
+    expected_.layer[l].upd_buf = l;
+  }
+  expected_.is_keyframe = true;
+  EqualToExpected();
+
+  for (int l = 0; l < kNumLayers; ++l) {
+    expected_.layer[l].ref_buf1 = l;
+  }
+  expected_.is_keyframe = false;
+  EqualToExpected();
+}
+
+// Test if it is possible to send at a high bitrate (over the threshold)
+// after a longer period of low bitrate. This should not be possible.
+TEST_F(ScreenshareLayerTestVP9, DontAccumelateAvailableBitsOverTime) {
+  InitScreenshareLayers(2);
+  ConfigureBitrateForLayer(100, 0);
+
+  expected_.layer[0].upd_buf = 0;
+  expected_.layer[0].ref_buf1 = 0;
+  expected_.layer[1].upd_buf = 1;
+  expected_.layer[1].ref_buf1 = 1;
+  expected_.start_layer = 0;
+  expected_.stop_layer = 1;
+
+  // Send 10 frames at a low bitrate (50 kbps)
+  for (int i = 0; i < 10; ++i) {
+    AdvanceTime(200);
+    EqualToExpected();
+    AddKilobitsToLayer(10, 0);
+  }
+
+  AdvanceTime(200);
+  EqualToExpected();
+  AddKilobitsToLayer(301, 0);
+
+  // Send 10 frames at a high bitrate (200 kbps)
+  expected_.start_layer = 1;
+  for (int i = 0; i < 10; ++i) {
+    AdvanceTime(200);
+    EqualToExpected();
+    AddKilobitsToLayer(40, 1);
+  }
+}
+
+// Test if used bits are accumelated over layers, as they should;
+TEST_F(ScreenshareLayerTestVP9, AccumelateUsedBitsOverLayers) {
+  const int kNumLayers = kMaxVp9NumberOfSpatialLayers;
+  InitScreenshareLayers(kNumLayers);
+  for (int l = 0; l < kNumLayers - 1; ++l)
+    ConfigureBitrateForLayer(100, l);
+  for (int l = 0; l < kNumLayers; ++l) {
+    expected_.layer[l].upd_buf = l;
+    expected_.layer[l].ref_buf1 = l;
+  }
+
+  expected_.start_layer = 0;
+  expected_.stop_layer = kNumLayers - 1;
+  EqualToExpected();
+
+  for (int layer = 0; layer < kNumLayers - 1; ++layer) {
+    expected_.start_layer = layer;
+    EqualToExpected();
+    AddKilobitsToLayer(101, layer);
+  }
+}
+
+// General testing of the bitrate controller.
+TEST_F(ScreenshareLayerTestVP9, 2LayerBitrate) {
+  InitScreenshareLayers(2);
+  ConfigureBitrateForLayer(100, 0);
+
+  expected_.layer[0].upd_buf = 0;
+  expected_.layer[1].upd_buf = 1;
+  expected_.layer[0].ref_buf1 = -1;
+  expected_.layer[1].ref_buf1 = -1;
+  expected_.start_layer = 0;
+  expected_.stop_layer = 1;
+
+  expected_.is_keyframe = true;
+  EqualToExpected();
+  AddKilobitsToLayer(100, 0);
+
+  expected_.layer[0].ref_buf1 = 0;
+  expected_.layer[1].ref_buf1 = 1;
+  expected_.is_keyframe = false;
+  AdvanceTime(199);
+  EqualToExpected();
+  AddKilobitsToLayer(100, 0);
+
+  expected_.start_layer = 1;
+  for (int frame = 0; frame < 3; ++frame) {
+    AdvanceTime(200);
+    EqualToExpected();
+    AddKilobitsToLayer(100, 1);
+  }
+
+  // Just before enough bits become available for L0 @0.999 seconds.
+  AdvanceTime(199);
+  EqualToExpected();
+  AddKilobitsToLayer(100, 1);
+
+  // Just after enough bits become available for L0 @1.0001 seconds.
+  expected_.start_layer = 0;
+  AdvanceTime(2);
+  EqualToExpected();
+  AddKilobitsToLayer(100, 0);
+
+  // Keyframes always encode all layers, even if it is over budget.
+  expected_.layer[0].ref_buf1 = -1;
+  expected_.layer[1].ref_buf1 = -1;
+  expected_.is_keyframe = true;
+  AdvanceTime(499);
+  EqualToExpected();
+  expected_.layer[0].ref_buf1 = 0;
+  expected_.layer[1].ref_buf1 = 1;
+  expected_.start_layer = 1;
+  expected_.is_keyframe = false;
+  EqualToExpected();
+  AddKilobitsToLayer(100, 0);
+
+  // 400 kb in L0 --> @3 second mark to fall below the threshold..
+  // just before @2.999 seconds.
+  expected_.is_keyframe = false;
+  AdvanceTime(1499);
+  EqualToExpected();
+  AddKilobitsToLayer(100, 1);
+
+  // just after @3.001 seconds.
+  expected_.start_layer = 0;
+  AdvanceTime(2);
+  EqualToExpected();
+  AddKilobitsToLayer(100, 0);
+}
+
+// General testing of the bitrate controller.
+TEST_F(ScreenshareLayerTestVP9, 3LayerBitrate) {
+  InitScreenshareLayers(3);
+  ConfigureBitrateForLayer(100, 0);
+  ConfigureBitrateForLayer(100, 1);
+
+  for (int l = 0; l < 3; ++l) {
+    expected_.layer[l].upd_buf = l;
+    expected_.layer[l].ref_buf1 = l;
+  }
+  expected_.start_layer = 0;
+  expected_.stop_layer = 2;
+
+  EqualToExpected();
+  AddKilobitsToLayer(105, 0);
+  AddKilobitsToLayer(30, 1);
+
+  AdvanceTime(199);
+  EqualToExpected();
+  AddKilobitsToLayer(105, 0);
+  AddKilobitsToLayer(30, 1);
+
+  expected_.start_layer = 1;
+  AdvanceTime(200);
+  EqualToExpected();
+  AddKilobitsToLayer(130, 1);
+
+  expected_.start_layer = 2;
+  AdvanceTime(200);
+  EqualToExpected();
+
+  // 400 kb in L1 --> @1.0 second mark to fall below threshold.
+  // 210 kb in L0 --> @1.1 second mark to fall below threshold.
+  // Just before L1 @0.999 seconds.
+  AdvanceTime(399);
+  EqualToExpected();
+
+  // Just after L1 @1.001 seconds.
+  expected_.start_layer = 1;
+  AdvanceTime(2);
+  EqualToExpected();
+
+  // Just before L0 @1.099 seconds.
+  AdvanceTime(99);
+  EqualToExpected();
+
+  // Just after L0 @1.101 seconds.
+  expected_.start_layer = 0;
+  AdvanceTime(2);
+  EqualToExpected();
+
+  // @1.1 seconds
+  AdvanceTime(99);
+  EqualToExpected();
+  AddKilobitsToLayer(200, 1);
+
+  expected_.is_keyframe = true;
+  for (int l = 0; l < 3; ++l)
+    expected_.layer[l].ref_buf1 = -1;
+  AdvanceTime(200);
+  EqualToExpected();
+
+  expected_.is_keyframe = false;
+  expected_.start_layer = 2;
+  for (int l = 0; l < 3; ++l)
+    expected_.layer[l].ref_buf1 = l;
+  AdvanceTime(200);
+  EqualToExpected();
+}
+
+// Test that the bitrate calculations are
+// correct when the timestamp wrap.
+TEST_F(ScreenshareLayerTestVP9, TimestampWrap) {
+  InitScreenshareLayers(2);
+  ConfigureBitrateForLayer(100, 0);
+
+  expected_.layer[0].upd_buf = 0;
+  expected_.layer[0].ref_buf1 = 0;
+  expected_.layer[1].upd_buf = 1;
+  expected_.layer[1].ref_buf1 = 1;
+  expected_.start_layer = 0;
+  expected_.stop_layer = 1;
+
+  // Advance time to just before the timestamp wraps.
+  AdvanceTime(std::numeric_limits<uint32_t>::max() / (kTickFrequency / 1000));
+  EqualToExpected();
+  AddKilobitsToLayer(200, 0);
+
+  // Wrap
+  expected_.start_layer = 1;
+  AdvanceTime(1);
+  EqualToExpected();
+}
+
+}  // namespace webrtc
diff --git a/modules/video_coding/codecs/vp9/vp9.gyp b/modules/video_coding/codecs/vp9/vp9.gyp
index 752521c..9049b63 100644
--- a/modules/video_coding/codecs/vp9/vp9.gyp
+++ b/modules/video_coding/codecs/vp9/vp9.gyp
@@ -28,6 +28,8 @@
         ['build_vp9==1', {
           'sources': [
             'include/vp9.h',
+            'screenshare_layers.cc',
+            'screenshare_layers.h',
             'vp9_frame_buffer_pool.cc',
             'vp9_frame_buffer_pool.h',
             'vp9_impl.cc',
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index 7ebe1a2..4dd59b2 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -27,6 +27,7 @@
 #include "webrtc/common.h"
 #include "webrtc/common_video/libyuv/include/webrtc_libyuv.h"
 #include "webrtc/modules/include/module_common_types.h"
+#include "webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h"
 #include "webrtc/system_wrappers/include/logging.h"
 #include "webrtc/system_wrappers/include/tick_util.h"
 
@@ -76,9 +77,12 @@
       raw_(NULL),
       input_image_(NULL),
       tl0_pic_idx_(0),
-      gof_idx_(0),
+      frames_since_kf_(0),
       num_temporal_layers_(0),
-      num_spatial_layers_(0) {
+      num_spatial_layers_(0),
+      frames_encoded_(0),
+      // Use two spatial when screensharing with flexible mode.
+      spatial_layer_(new ScreenshareLayersVP9(2)) {
   memset(&codec_, 0, sizeof(codec_));
   uint32_t seed = static_cast<uint32_t>(TickTime::MillisecondTimestamp());
   srand(seed);
@@ -208,6 +212,7 @@
   }
   config_->rc_target_bitrate = new_bitrate_kbit;
   codec_.maxFramerate = new_framerate;
+  spatial_layer_->ConfigureBitrate(new_bitrate_kbit, 0);
 
   if (!SetSvcRates()) {
     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
@@ -246,6 +251,7 @@
   if (inst->codecSpecific.VP9.numberOfSpatialLayers > 2) {
     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
   }
+
   int retVal = Release();
   if (retVal < 0) {
     return retVal;
@@ -324,7 +330,13 @@
 
   // TODO(asapersson): Check configuration of temporal switch up and increase
   // pattern length.
-  if (num_temporal_layers_ == 1) {
+  is_flexible_mode_ = inst->codecSpecific.VP9.flexibleMode;
+  if (is_flexible_mode_) {
+    config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+    config_->ts_number_layers = num_temporal_layers_;
+    if (codec_.mode == kScreensharing)
+      spatial_layer_->ConfigureBitrate(inst->startBitrate, 0);
+  } else if (num_temporal_layers_ == 1) {
     gof_.SetGofInfoVP9(kTemporalStructureMode1);
     config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;
     config_->ts_number_layers = 1;
@@ -395,7 +407,8 @@
       // 1:2 scaling in each dimension.
       svc_internal_.svc_params.scaling_factor_num[i] = scaling_factor_num;
       svc_internal_.svc_params.scaling_factor_den[i] = 256;
-      scaling_factor_num /= 2;
+      if (codec_.mode != kScreensharing)
+        scaling_factor_num /= 2;
     }
   }
 
@@ -495,12 +508,35 @@
   raw_->stride[VPX_PLANE_U] = input_image.stride(kUPlane);
   raw_->stride[VPX_PLANE_V] = input_image.stride(kVPlane);
 
-  int flags = 0;
+  vpx_enc_frame_flags_t flags = 0;
   bool send_keyframe = (frame_type == kVideoFrameKey);
   if (send_keyframe) {
     // Key frame request from caller.
     flags = VPX_EFLAG_FORCE_KF;
   }
+
+  if (is_flexible_mode_) {
+    SuperFrameRefSettings settings;
+
+    // These structs are copied when calling vpx_codec_control,
+    // therefore it is ok for them to go out of scope.
+    vpx_svc_ref_frame_config enc_layer_conf;
+    vpx_svc_layer_id layer_id;
+
+    if (codec_.mode == kRealtimeVideo) {
+      // Real time video not yet implemented in flexible mode.
+      RTC_NOTREACHED();
+    } else {
+      settings = spatial_layer_->GetSuperFrameSettings(input_image.timestamp(),
+                                                       send_keyframe);
+    }
+    enc_layer_conf = GenerateRefsAndFlags(settings);
+    layer_id.temporal_layer_id = 0;
+    layer_id.spatial_layer_id = settings.start_layer;
+    vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
+    vpx_codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, &enc_layer_conf);
+  }
+
   assert(codec_.maxFramerate > 0);
   uint32_t duration = 90000 / codec_.maxFramerate;
   if (vpx_codec_encode(encoder_, raw_, timestamp_, duration, flags,
@@ -526,9 +562,8 @@
                                  !codec_.codecSpecific.VP9.flexibleMode)
                                     ? true
                                     : false;
-  if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
-    gof_idx_ = 0;
-  }
+  if (pkt.data.frame.flags & VPX_FRAME_IS_KEY)
+    frames_since_kf_ = 0;
 
   vpx_svc_layer_id_t layer_id = {0};
   vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
@@ -551,17 +586,18 @@
     vp9_info->ss_data_available = false;
   }
 
-  if (vp9_info->flexible_mode) {
-    vp9_info->gof_idx = kNoGofIdx;
-  } else {
-    vp9_info->gof_idx =
-        static_cast<uint8_t>(gof_idx_++ % gof_.num_frames_in_gof);
-  }
-
   // TODO(asapersson): this info has to be obtained from the encoder.
   vp9_info->temporal_up_switch = true;
 
-  if (layer_id.spatial_layer_id == 0) {
+  bool is_first_frame = false;
+  if (is_flexible_mode_) {
+    is_first_frame =
+        layer_id.spatial_layer_id == spatial_layer_->GetStartLayer();
+  } else {
+    is_first_frame = layer_id.spatial_layer_id == 0;
+  }
+
+  if (is_first_frame) {
     picture_id_ = (picture_id_ + 1) & 0x7FFF;
     // TODO(asapersson): this info has to be obtained from the encoder.
     vp9_info->inter_layer_predicted = false;
@@ -582,6 +618,20 @@
   // Always populate this, so that the packetizer can properly set the marker
   // bit.
   vp9_info->num_spatial_layers = num_spatial_layers_;
+
+  vp9_info->num_ref_pics = 0;
+  if (vp9_info->flexible_mode) {
+    vp9_info->gof_idx = kNoGofIdx;
+    vp9_info->num_ref_pics = num_ref_pics_[layer_id.spatial_layer_id];
+    for (int i = 0; i < num_ref_pics_[layer_id.spatial_layer_id]; ++i) {
+      vp9_info->p_diff[i] = p_diff_[layer_id.spatial_layer_id][i];
+    }
+  } else {
+    vp9_info->gof_idx =
+        static_cast<uint8_t>(frames_since_kf_ % gof_.num_frames_in_gof);
+  }
+  ++frames_since_kf_;
+
   if (vp9_info->ss_data_available) {
     vp9_info->spatial_layer_resolution_present = true;
     for (size_t i = 0; i < vp9_info->num_spatial_layers; ++i) {
@@ -617,6 +667,14 @@
   frag_info.fragmentationPlType[part_idx] = 0;
   frag_info.fragmentationTimeDiff[part_idx] = 0;
   encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz);
+
+  vpx_svc_layer_id_t layer_id = {0};
+  vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
+  if (is_flexible_mode_ && codec_.mode == kScreensharing)
+    spatial_layer_->LayerFrameEncoded(
+        static_cast<unsigned int>(encoded_image_._length),
+        layer_id.spatial_layer_id);
+
   assert(encoded_image_._length <= encoded_image_._size);
 
   // End of frame.
@@ -638,6 +696,108 @@
   return WEBRTC_VIDEO_CODEC_OK;
 }
 
+vpx_svc_ref_frame_config VP9EncoderImpl::GenerateRefsAndFlags(
+    const SuperFrameRefSettings& settings) {
+  static const vpx_enc_frame_flags_t kAllFlags =
+      VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST |
+      VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF;
+  vpx_svc_ref_frame_config sf_conf = {};
+  if (settings.is_keyframe) {
+    // Used later on to make sure we don't make any invalid references.
+    memset(buffer_updated_at_frame_, -1, sizeof(buffer_updated_at_frame_));
+    for (int layer = settings.start_layer; layer <= settings.stop_layer;
+         ++layer) {
+      num_ref_pics_[layer] = 0;
+      buffer_updated_at_frame_[settings.layer[layer].upd_buf] = frames_encoded_;
+      // When encoding a keyframe only the alt_fb_idx is used
+      // to specify which layer ends up in which buffer.
+      sf_conf.alt_fb_idx[layer] = settings.layer[layer].upd_buf;
+    }
+  } else {
+    for (int layer_idx = settings.start_layer; layer_idx <= settings.stop_layer;
+         ++layer_idx) {
+      vpx_enc_frame_flags_t layer_flags = kAllFlags;
+      num_ref_pics_[layer_idx] = 0;
+      int8_t refs[3] = {settings.layer[layer_idx].ref_buf1,
+                        settings.layer[layer_idx].ref_buf2,
+                        settings.layer[layer_idx].ref_buf3};
+
+      for (unsigned int ref_idx = 0; ref_idx < kMaxVp9RefPics; ++ref_idx) {
+        if (refs[ref_idx] == -1)
+          continue;
+
+        RTC_DCHECK_GE(refs[ref_idx], 0);
+        RTC_DCHECK_LE(refs[ref_idx], 7);
+        // Easier to remove flags from all flags rather than having to
+        // build the flags from 0.
+        switch (num_ref_pics_[layer_idx]) {
+          case 0: {
+            sf_conf.lst_fb_idx[layer_idx] = refs[ref_idx];
+            layer_flags &= ~VP8_EFLAG_NO_REF_LAST;
+            break;
+          }
+          case 1: {
+            sf_conf.gld_fb_idx[layer_idx] = refs[ref_idx];
+            layer_flags &= ~VP8_EFLAG_NO_REF_GF;
+            break;
+          }
+          case 2: {
+            sf_conf.alt_fb_idx[layer_idx] = refs[ref_idx];
+            layer_flags &= ~VP8_EFLAG_NO_REF_ARF;
+            break;
+          }
+        }
+        // Make sure we don't reference a buffer that hasn't been
+        // used at all or hasn't been used since a keyframe.
+        RTC_DCHECK_NE(buffer_updated_at_frame_[refs[ref_idx]], -1);
+
+        p_diff_[layer_idx][num_ref_pics_[layer_idx]] =
+            frames_encoded_ - buffer_updated_at_frame_[refs[ref_idx]];
+        num_ref_pics_[layer_idx]++;
+      }
+
+      bool upd_buf_same_as_a_ref = false;
+      if (settings.layer[layer_idx].upd_buf != -1) {
+        for (unsigned int ref_idx = 0; ref_idx < kMaxVp9RefPics; ++ref_idx) {
+          if (settings.layer[layer_idx].upd_buf == refs[ref_idx]) {
+            switch (ref_idx) {
+              case 0: {
+                layer_flags &= ~VP8_EFLAG_NO_UPD_LAST;
+                break;
+              }
+              case 1: {
+                layer_flags &= ~VP8_EFLAG_NO_UPD_GF;
+                break;
+              }
+              case 2: {
+                layer_flags &= ~VP8_EFLAG_NO_UPD_ARF;
+                break;
+              }
+            }
+            upd_buf_same_as_a_ref = true;
+            break;
+          }
+        }
+        if (!upd_buf_same_as_a_ref) {
+          // If we have three references and a buffer is specified to be
+          // updated, then that buffer must be the same as one of the
+          // three references.
+          RTC_CHECK_LT(num_ref_pics_[layer_idx], kMaxVp9RefPics);
+
+          sf_conf.alt_fb_idx[layer_idx] = settings.layer[layer_idx].upd_buf;
+          layer_flags ^= VP8_EFLAG_NO_UPD_ARF;
+        }
+
+        int updated_buffer = settings.layer[layer_idx].upd_buf;
+        buffer_updated_at_frame_[updated_buffer] = frames_encoded_;
+        sf_conf.frame_flags[layer_idx] = layer_flags;
+      }
+    }
+  }
+  ++frames_encoded_;
+  return sf_conf;
+}
+
 int VP9EncoderImpl::SetChannelParameters(uint32_t packet_loss, int64_t rtt) {
   return WEBRTC_VIDEO_CODEC_OK;
 }
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h
index ecc0465..9a48e74 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@@ -21,6 +21,8 @@
 
 namespace webrtc {
 
+class ScreenshareLayersVP9;
+
 class VP9EncoderImpl : public VP9Encoder {
  public:
   VP9EncoderImpl();
@@ -45,6 +47,20 @@
 
   void OnDroppedFrame() override {}
 
+  struct LayerFrameRefSettings {
+    int8_t upd_buf = -1;   // -1 - no update,    0..7 - update buffer 0..7
+    int8_t ref_buf1 = -1;  // -1 - no reference, 0..7 - reference buffer 0..7
+    int8_t ref_buf2 = -1;  // -1 - no reference, 0..7 - reference buffer 0..7
+    int8_t ref_buf3 = -1;  // -1 - no reference, 0..7 - reference buffer 0..7
+  };
+
+  struct SuperFrameRefSettings {
+    LayerFrameRefSettings layer[kMaxVp9NumberOfSpatialLayers];
+    uint8_t start_layer = 0;  // The first spatial layer to be encoded.
+    uint8_t stop_layer = 0;   // The last spatial layer to be encoded.
+    bool is_keyframe = false;
+  };
+
  private:
   // Determine number of encoder threads to use.
   int NumberOfThreads(int width, int height, int number_of_cores);
@@ -59,6 +75,15 @@
   bool ExplicitlyConfiguredSpatialLayers() const;
   bool SetSvcRates();
 
+  // Used for flexible mode to set the flags and buffer references used
+  // by the encoder. Also calculates the references used by the RTP
+  // packetizer.
+  //
+  // Has to be called for every frame (keyframes included) to update the
+  // state used to calculate references.
+  vpx_svc_ref_frame_config GenerateRefsAndFlags(
+      const SuperFrameRefSettings& settings);
+
   virtual int GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt);
 
   // Callback function for outputting packets per spatial layer.
@@ -89,9 +114,17 @@
   GofInfoVP9 gof_;       // Contains each frame's temporal information for
                          // non-flexible mode.
   uint8_t tl0_pic_idx_;  // Only used in non-flexible mode.
-  size_t gof_idx_;       // Only used in non-flexible mode.
+  size_t frames_since_kf_;
   uint8_t num_temporal_layers_;
   uint8_t num_spatial_layers_;
+
+  // Used for flexible mode.
+  bool is_flexible_mode_;
+  int64_t buffer_updated_at_frame_[kNumVp9Buffers];
+  int64_t frames_encoded_;
+  uint8_t num_ref_pics_[kMaxVp9NumberOfSpatialLayers];
+  uint8_t p_diff_[kMaxVp9NumberOfSpatialLayers][kMaxVp9RefPics];
+  rtc::scoped_ptr<ScreenshareLayersVP9> spatial_layer_;
 };
 
 
diff --git a/modules/video_coding/main/source/decoding_state.cc b/modules/video_coding/main/source/decoding_state.cc
index a3da7c6..bdc6329 100644
--- a/modules/video_coding/main/source/decoding_state.cc
+++ b/modules/video_coding/main/source/decoding_state.cc
@@ -24,7 +24,9 @@
       temporal_id_(kNoTemporalIdx),
       tl0_pic_id_(kNoTl0PicIdx),
       full_sync_(true),
-      in_initial_state_(true) {}
+      in_initial_state_(true) {
+  memset(frame_decoded_, 0, sizeof(frame_decoded_));
+}
 
 VCMDecodingState::~VCMDecodingState() {}
 
@@ -37,6 +39,7 @@
   tl0_pic_id_ = kNoTl0PicIdx;
   full_sync_ = true;
   in_initial_state_ = true;
+  memset(frame_decoded_, 0, sizeof(frame_decoded_));
 }
 
 uint32_t VCMDecodingState::time_stamp() const {
@@ -63,12 +66,33 @@
 
 void VCMDecodingState::SetState(const VCMFrameBuffer* frame) {
   assert(frame != NULL && frame->GetHighSeqNum() >= 0);
-  UpdateSyncState(frame);
+  if (!UsingFlexibleMode(frame))
+    UpdateSyncState(frame);
   sequence_num_ = static_cast<uint16_t>(frame->GetHighSeqNum());
   time_stamp_ = frame->TimeStamp();
   picture_id_ = frame->PictureId();
   temporal_id_ = frame->TemporalId();
   tl0_pic_id_ = frame->Tl0PicId();
+
+  if (UsingFlexibleMode(frame)) {
+    uint16_t frame_index = picture_id_ % kFrameDecodedLength;
+    if (in_initial_state_) {
+      frame_decoded_cleared_to_ = frame_index;
+    } else if (frame->FrameType() == kVideoFrameKey) {
+      memset(frame_decoded_, 0, sizeof(frame_decoded_));
+      frame_decoded_cleared_to_ = frame_index;
+    } else {
+      if (AheadOfFramesDecodedClearedTo(frame_index)) {
+        while (frame_decoded_cleared_to_ != frame_index) {
+          frame_decoded_cleared_to_ =
+              (frame_decoded_cleared_to_ + 1) % kFrameDecodedLength;
+          frame_decoded_[frame_decoded_cleared_to_] = false;
+        }
+      }
+    }
+    frame_decoded_[frame_index] = true;
+  }
+
   in_initial_state_ = false;
 }
 
@@ -80,6 +104,8 @@
   tl0_pic_id_ = state.tl0_pic_id_;
   full_sync_ = state.full_sync_;
   in_initial_state_ = state.in_initial_state_;
+  frame_decoded_cleared_to_ = state.frame_decoded_cleared_to_;
+  memcpy(frame_decoded_, state.frame_decoded_, sizeof(frame_decoded_));
 }
 
 bool VCMDecodingState::UpdateEmptyFrame(const VCMFrameBuffer* frame) {
@@ -173,7 +199,11 @@
   if (!full_sync_ && !frame->LayerSync())
     return false;
   if (UsingPictureId(frame)) {
-    return ContinuousPictureId(frame->PictureId());
+    if (UsingFlexibleMode(frame)) {
+      return ContinuousFrameRefs(frame);
+    } else {
+      return ContinuousPictureId(frame->PictureId());
+    }
   } else {
     return ContinuousSeqNum(static_cast<uint16_t>(frame->GetLowSeqNum()));
   }
@@ -216,8 +246,41 @@
   return (static_cast<uint8_t>(tl0_pic_id_ + 1) == tl0_pic_id);
 }
 
+bool VCMDecodingState::ContinuousFrameRefs(const VCMFrameBuffer* frame) const {
+  uint8_t num_refs = frame->CodecSpecific()->codecSpecific.VP9.num_ref_pics;
+  for (uint8_t r = 0; r < num_refs; ++r) {
+    uint16_t frame_ref = frame->PictureId() -
+                         frame->CodecSpecific()->codecSpecific.VP9.p_diff[r];
+    uint16_t frame_index = frame_ref % kFrameDecodedLength;
+    if (AheadOfFramesDecodedClearedTo(frame_index) ||
+        !frame_decoded_[frame_index]) {
+      return false;
+    }
+  }
+  return true;
+}
+
 bool VCMDecodingState::UsingPictureId(const VCMFrameBuffer* frame) const {
   return (frame->PictureId() != kNoPictureId && picture_id_ != kNoPictureId);
 }
 
+bool VCMDecodingState::UsingFlexibleMode(const VCMFrameBuffer* frame) const {
+  return frame->CodecSpecific()->codecType == kVideoCodecVP9 &&
+         frame->CodecSpecific()->codecSpecific.VP9.flexible_mode;
+}
+
+// TODO(philipel): change how check work, this check practially
+// limits the max p_diff to 64.
+bool VCMDecodingState::AheadOfFramesDecodedClearedTo(uint16_t index) const {
+  // No way of knowing for sure if we are actually ahead of
+  // frame_decoded_cleared_to_. We just make the assumption
+  // that we are not trying to reference back to a very old
+  // index, but instead are referencing a newer index.
+  uint16_t diff =
+      index > frame_decoded_cleared_to_
+          ? kFrameDecodedLength - (index - frame_decoded_cleared_to_)
+          : frame_decoded_cleared_to_ - index;
+  return diff > kFrameDecodedLength / 2;
+}
+
 }  // namespace webrtc
diff --git a/modules/video_coding/main/source/decoding_state.h b/modules/video_coding/main/source/decoding_state.h
index 99ee335..fe40b24 100644
--- a/modules/video_coding/main/source/decoding_state.h
+++ b/modules/video_coding/main/source/decoding_state.h
@@ -21,6 +21,11 @@
 
 class VCMDecodingState {
  public:
+  // The max number of bits used to reference back
+  // to a previous frame when using flexible mode.
+  static const uint16_t kNumRefBits = 7;
+  static const uint16_t kFrameDecodedLength = 1 << kNumRefBits;
+
   VCMDecodingState();
   ~VCMDecodingState();
   // Check for old frame
@@ -52,7 +57,10 @@
   bool ContinuousPictureId(int picture_id) const;
   bool ContinuousSeqNum(uint16_t seq_num) const;
   bool ContinuousLayer(int temporal_id, int tl0_pic_id) const;
+  bool ContinuousFrameRefs(const VCMFrameBuffer* frame) const;
   bool UsingPictureId(const VCMFrameBuffer* frame) const;
+  bool UsingFlexibleMode(const VCMFrameBuffer* frame) const;
+  bool AheadOfFramesDecodedClearedTo(uint16_t index) const;
 
   // Keep state of last decoded frame.
   // TODO(mikhal/stefan): create designated classes to handle these types.
@@ -63,6 +71,10 @@
   int         tl0_pic_id_;
   bool        full_sync_;  // Sync flag when temporal layers are used.
   bool        in_initial_state_;
+
+  // Used to check references in flexible mode.
+  bool frame_decoded_[kFrameDecodedLength];
+  uint16_t frame_decoded_cleared_to_;
 };
 
 }  // namespace webrtc
diff --git a/modules/video_coding/main/source/decoding_state_unittest.cc b/modules/video_coding/main/source/decoding_state_unittest.cc
index 30b5786..9dfb313 100644
--- a/modules/video_coding/main/source/decoding_state_unittest.cc
+++ b/modules/video_coding/main/source/decoding_state_unittest.cc
@@ -446,4 +446,254 @@
   EXPECT_FALSE(dec_state.ContinuousFrame(&frame));
 }
 
+TEST(TestDecodingState, FrameContinuityFlexibleModeKeyFrame) {
+  VCMDecodingState dec_state;
+  VCMFrameBuffer frame;
+  VCMPacket packet;
+  packet.isFirstPacket = true;
+  packet.timestamp = 1;
+  packet.seqNum = 0xffff;
+  uint8_t data[] = "I need a data pointer for this test!";
+  packet.sizeBytes = sizeof(data);
+  packet.dataPtr = data;
+  packet.codecSpecificHeader.codec = kRtpVideoVp9;
+
+  RTPVideoHeaderVP9& vp9_hdr = packet.codecSpecificHeader.codecHeader.VP9;
+  vp9_hdr.picture_id = 10;
+  vp9_hdr.flexible_mode = true;
+
+  FrameData frame_data;
+  frame_data.rtt_ms = 0;
+  frame_data.rolling_average_packets_per_frame = -1;
+
+  // Key frame as first frame
+  packet.frameType = kVideoFrameKey;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Key frame again
+  vp9_hdr.picture_id = 11;
+  frame.Reset();
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Ref to 11, continuous
+  frame.Reset();
+  packet.frameType = kVideoFrameDelta;
+  vp9_hdr.picture_id = 12;
+  vp9_hdr.num_ref_pics = 1;
+  vp9_hdr.pid_diff[0] = 1;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+}
+
+TEST(TestDecodingState, FrameContinuityFlexibleModeOutOfOrderFrames) {
+  VCMDecodingState dec_state;
+  VCMFrameBuffer frame;
+  VCMPacket packet;
+  packet.isFirstPacket = true;
+  packet.timestamp = 1;
+  packet.seqNum = 0xffff;
+  uint8_t data[] = "I need a data pointer for this test!";
+  packet.sizeBytes = sizeof(data);
+  packet.dataPtr = data;
+  packet.codecSpecificHeader.codec = kRtpVideoVp9;
+
+  RTPVideoHeaderVP9& vp9_hdr = packet.codecSpecificHeader.codecHeader.VP9;
+  vp9_hdr.picture_id = 10;
+  vp9_hdr.flexible_mode = true;
+
+  FrameData frame_data;
+  frame_data.rtt_ms = 0;
+  frame_data.rolling_average_packets_per_frame = -1;
+
+  // Key frame as first frame
+  packet.frameType = kVideoFrameKey;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Ref to 10, continuous
+  frame.Reset();
+  packet.frameType = kVideoFrameDelta;
+  vp9_hdr.picture_id = 15;
+  vp9_hdr.num_ref_pics = 1;
+  vp9_hdr.pid_diff[0] = 5;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Out of order, last id 15, this id 12, ref to 10, continuous
+  frame.Reset();
+  vp9_hdr.picture_id = 12;
+  vp9_hdr.pid_diff[0] = 2;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Ref 10, 12, 15, continuous
+  frame.Reset();
+  vp9_hdr.picture_id = 20;
+  vp9_hdr.num_ref_pics = 3;
+  vp9_hdr.pid_diff[0] = 10;
+  vp9_hdr.pid_diff[1] = 8;
+  vp9_hdr.pid_diff[2] = 5;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+}
+
+TEST(TestDecodingState, FrameContinuityFlexibleModeGeneral) {
+  VCMDecodingState dec_state;
+  VCMFrameBuffer frame;
+  VCMPacket packet;
+  packet.isFirstPacket = true;
+  packet.timestamp = 1;
+  packet.seqNum = 0xffff;
+  uint8_t data[] = "I need a data pointer for this test!";
+  packet.sizeBytes = sizeof(data);
+  packet.dataPtr = data;
+  packet.codecSpecificHeader.codec = kRtpVideoVp9;
+
+  RTPVideoHeaderVP9& vp9_hdr = packet.codecSpecificHeader.codecHeader.VP9;
+  vp9_hdr.picture_id = 10;
+  vp9_hdr.flexible_mode = true;
+
+  FrameData frame_data;
+  frame_data.rtt_ms = 0;
+  frame_data.rolling_average_packets_per_frame = -1;
+
+  // Key frame as first frame
+  packet.frameType = kVideoFrameKey;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+
+  // Delta frame as first frame
+  frame.Reset();
+  packet.frameType = kVideoFrameDelta;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_FALSE(dec_state.ContinuousFrame(&frame));
+
+  // Key frame then delta frame
+  frame.Reset();
+  packet.frameType = kVideoFrameKey;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  dec_state.SetState(&frame);
+  frame.Reset();
+  packet.frameType = kVideoFrameDelta;
+  vp9_hdr.num_ref_pics = 1;
+  vp9_hdr.picture_id = 15;
+  vp9_hdr.pid_diff[0] = 5;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Ref to 11, not continuous
+  frame.Reset();
+  vp9_hdr.picture_id = 16;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_FALSE(dec_state.ContinuousFrame(&frame));
+
+  // Ref to 15, continuous
+  frame.Reset();
+  vp9_hdr.picture_id = 16;
+  vp9_hdr.pid_diff[0] = 1;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Ref to 11 and 15, not continuous
+  frame.Reset();
+  vp9_hdr.picture_id = 20;
+  vp9_hdr.num_ref_pics = 2;
+  vp9_hdr.pid_diff[0] = 9;
+  vp9_hdr.pid_diff[1] = 5;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_FALSE(dec_state.ContinuousFrame(&frame));
+
+  // Ref to 10, 15 and 16, continuous
+  frame.Reset();
+  vp9_hdr.picture_id = 22;
+  vp9_hdr.num_ref_pics = 3;
+  vp9_hdr.pid_diff[0] = 12;
+  vp9_hdr.pid_diff[1] = 7;
+  vp9_hdr.pid_diff[2] = 6;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Key Frame, continuous
+  frame.Reset();
+  packet.frameType = kVideoFrameKey;
+  vp9_hdr.picture_id = VCMDecodingState::kFrameDecodedLength - 2;
+  vp9_hdr.num_ref_pics = 0;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Frame at last index, ref to KF, continuous
+  frame.Reset();
+  packet.frameType = kVideoFrameDelta;
+  vp9_hdr.picture_id = VCMDecodingState::kFrameDecodedLength - 1;
+  vp9_hdr.num_ref_pics = 1;
+  vp9_hdr.pid_diff[0] = 1;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Frame after wrapping buffer length, ref to last index, continuous
+  frame.Reset();
+  vp9_hdr.picture_id = 0;
+  vp9_hdr.num_ref_pics = 1;
+  vp9_hdr.pid_diff[0] = 1;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Frame after wrapping start frame, ref to 0, continuous
+  frame.Reset();
+  vp9_hdr.picture_id = 20;
+  vp9_hdr.num_ref_pics = 1;
+  vp9_hdr.pid_diff[0] = 20;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Frame after wrapping start frame, ref to 10, not continuous
+  frame.Reset();
+  vp9_hdr.picture_id = 23;
+  vp9_hdr.num_ref_pics = 1;
+  vp9_hdr.pid_diff[0] = 13;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_FALSE(dec_state.ContinuousFrame(&frame));
+
+  // Key frame, continuous
+  frame.Reset();
+  packet.frameType = kVideoFrameKey;
+  vp9_hdr.picture_id = 25;
+  vp9_hdr.num_ref_pics = 0;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Ref to KF, continuous
+  frame.Reset();
+  packet.frameType = kVideoFrameDelta;
+  vp9_hdr.picture_id = 26;
+  vp9_hdr.num_ref_pics = 1;
+  vp9_hdr.pid_diff[0] = 1;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_TRUE(dec_state.ContinuousFrame(&frame));
+  dec_state.SetState(&frame);
+
+  // Ref to frame previous to KF, not continuous
+  frame.Reset();
+  vp9_hdr.picture_id = 30;
+  vp9_hdr.num_ref_pics = 1;
+  vp9_hdr.pid_diff[0] = 30;
+  EXPECT_LE(0, frame.InsertPacket(packet, 0, kNoErrors, frame_data));
+  EXPECT_FALSE(dec_state.ContinuousFrame(&frame));
+}
+
 }  // namespace webrtc
diff --git a/modules/video_coding/main/source/encoded_frame.cc b/modules/video_coding/main/source/encoded_frame.cc
index d86704d..89a8777 100644
--- a/modules/video_coding/main/source/encoded_frame.cc
+++ b/modules/video_coding/main/source/encoded_frame.cc
@@ -147,6 +147,12 @@
             header->codecHeader.VP9.inter_pic_predicted;
         _codecSpecificInfo.codecSpecific.VP9.flexible_mode =
             header->codecHeader.VP9.flexible_mode;
+        _codecSpecificInfo.codecSpecific.VP9.num_ref_pics =
+            header->codecHeader.VP9.num_ref_pics;
+        for (uint8_t r = 0; r < header->codecHeader.VP9.num_ref_pics; ++r) {
+          _codecSpecificInfo.codecSpecific.VP9.p_diff[r] =
+              header->codecHeader.VP9.pid_diff[r];
+        }
         _codecSpecificInfo.codecSpecific.VP9.ss_data_available =
             header->codecHeader.VP9.ss_data_available;
         if (header->codecHeader.VP9.picture_id != kNoPictureId) {
diff --git a/modules/video_coding/main/source/generic_encoder.cc b/modules/video_coding/main/source/generic_encoder.cc
index de19604..c10c3ab 100644
--- a/modules/video_coding/main/source/generic_encoder.cc
+++ b/modules/video_coding/main/source/generic_encoder.cc
@@ -54,11 +54,9 @@
       rtp->codecHeader.VP9.inter_layer_predicted =
           info->codecSpecific.VP9.inter_layer_predicted;
       rtp->codecHeader.VP9.gof_idx = info->codecSpecific.VP9.gof_idx;
-
-      // Packetizer needs to know the number of spatial layers to correctly set
-      // the marker bit, even when the number won't be written in the packet.
       rtp->codecHeader.VP9.num_spatial_layers =
           info->codecSpecific.VP9.num_spatial_layers;
+
       if (info->codecSpecific.VP9.ss_data_available) {
         rtp->codecHeader.VP9.spatial_layer_resolution_present =
             info->codecSpecific.VP9.spatial_layer_resolution_present;
@@ -71,6 +69,10 @@
         }
         rtp->codecHeader.VP9.gof.CopyGofInfoVP9(info->codecSpecific.VP9.gof);
       }
+
+      rtp->codecHeader.VP9.num_ref_pics = info->codecSpecific.VP9.num_ref_pics;
+      for (int i = 0; i < info->codecSpecific.VP9.num_ref_pics; ++i)
+        rtp->codecHeader.VP9.pid_diff[i] = info->codecSpecific.VP9.p_diff[i];
       return;
     }
     case kVideoCodecH264:
diff --git a/modules/video_coding/main/source/jitter_buffer.cc b/modules/video_coding/main/source/jitter_buffer.cc
index a5e774b..23fb3f9 100644
--- a/modules/video_coding/main/source/jitter_buffer.cc
+++ b/modules/video_coding/main/source/jitter_buffer.cc
@@ -686,12 +686,6 @@
 
   num_consecutive_old_packets_ = 0;
 
-  if (packet.codec == kVideoCodecVP9 &&
-      packet.codecSpecificHeader.codecHeader.VP9.flexible_mode) {
-    // TODO(asapersson): Add support for flexible mode.
-    return kGeneralError;
-  }
-
   VCMFrameBuffer* frame;
   FrameList* frame_list;
   const VCMFrameBufferEnum error = GetFrame(packet, &frame, &frame_list);
diff --git a/video/full_stack.cc b/video/full_stack.cc
index 2810cd6..66ec49c 100644
--- a/video/full_stack.cc
+++ b/video/full_stack.cc
@@ -145,12 +145,15 @@
   RunTest(config);
 }
 
-TEST_F(FullStackTest, ScreenshareSlidesVP9_2TL) {
+TEST_F(FullStackTest, ScreenshareSlidesVP9_2SL) {
   VideoQualityTest::Params screenshare = {
-      {1850, 1110, 5, 50000, 200000, 2000000, "VP9", 2, 1, 400000},
+      {1850, 1110, 5, 50000, 200000, 2000000, "VP9", 1, 0, 400000},
       {},
       {true, 10},
-      {"screenshare_slides_vp9_2tl", 0.0, 0.0, kFullStackTestDurationSecs}};
+      {"screenshare_slides_vp9_2tl", 0.0, 0.0, kFullStackTestDurationSecs},
+      {},
+      false,
+      {std::vector<VideoStream>(), 0, 2, 1}};
   RunTest(screenshare);
 }
 }  // namespace webrtc
diff --git a/video/video_send_stream.cc b/video/video_send_stream.cc
index 33e1f57..fd0906d 100644
--- a/video/video_send_stream.cc
+++ b/video/video_send_stream.cc
@@ -345,6 +345,12 @@
     if (config.encoder_specific_settings != nullptr) {
       video_codec.codecSpecific.VP9 = *reinterpret_cast<const VideoCodecVP9*>(
                                           config.encoder_specific_settings);
+      if (video_codec.mode == kScreensharing) {
+        video_codec.codecSpecific.VP9.flexibleMode = true;
+        // For now VP9 screensharing use 1 temporal and 2 spatial layers.
+        RTC_DCHECK_EQ(video_codec.codecSpecific.VP9.numberOfTemporalLayers, 1);
+        RTC_DCHECK_EQ(video_codec.codecSpecific.VP9.numberOfSpatialLayers, 2);
+      }
     }
     video_codec.codecSpecific.VP9.numberOfTemporalLayers =
         static_cast<unsigned char>(
diff --git a/video/video_send_stream_tests.cc b/video/video_send_stream_tests.cc
index e19dc48..0f44c6f 100644
--- a/video/video_send_stream_tests.cc
+++ b/video/video_send_stream_tests.cc
@@ -1793,7 +1793,10 @@
   VP9HeaderObeserver()
       : SendTest(VideoSendStreamTest::kDefaultTimeoutMs),
         vp9_encoder_(VP9Encoder::Create()),
-        vp9_settings_(VideoEncoder::GetDefaultVp9Settings()) {}
+        vp9_settings_(VideoEncoder::GetDefaultVp9Settings()) {
+    vp9_settings_.numberOfTemporalLayers = 1;
+    vp9_settings_.numberOfSpatialLayers = 2;
+  }
 
   virtual void ModifyConfigsHook(
       VideoSendStream::Config* send_config,
@@ -1809,6 +1812,7 @@
                      std::vector<VideoReceiveStream::Config>* receive_configs,
                      VideoEncoderConfig* encoder_config) override {
     encoder_config->encoder_specific_settings = &vp9_settings_;
+    encoder_config->content_type = VideoEncoderConfig::ContentType::kScreen;
     send_config->encoder_settings.encoder = vp9_encoder_.get();
     send_config->encoder_settings.payload_name = "VP9";
     send_config->encoder_settings.payload_type = kVp9PayloadType;
@@ -1857,17 +1861,6 @@
   VideoCodecVP9 vp9_settings_;
 };
 
-TEST_F(VideoSendStreamTest, VP9NoFlexMode) {
-  class NoFlexibleMode : public VP9HeaderObeserver {
-    void InspectHeader(RTPVideoHeaderVP9* vp9videoHeader) override {
-      EXPECT_FALSE(vp9videoHeader->flexible_mode);
-      observation_complete_->Set();
-    }
-  } test;
-
-  RunBaseTest(&test, FakeNetworkPipe::Config());
-}
-
 TEST_F(VideoSendStreamTest, DISABLED_VP9FlexMode) {
   class FlexibleMode : public VP9HeaderObeserver {
     void ModifyConfigsHook(
@@ -1881,6 +1874,66 @@
       EXPECT_TRUE(vp9videoHeader->flexible_mode);
       observation_complete_->Set();
     }
+  } test;
+
+  RunBaseTest(&test, FakeNetworkPipe::Config());
+}
+
+TEST_F(VideoSendStreamTest, VP9FlexModeHasPictureId) {
+  class FlexibleMode : public VP9HeaderObeserver {
+    void ModifyConfigsHook(
+        VideoSendStream::Config* send_config,
+        std::vector<VideoReceiveStream::Config>* receive_configs,
+        VideoEncoderConfig* encoder_config) override {
+      vp9_settings_.flexibleMode = true;
+    }
+
+    void InspectHeader(RTPVideoHeaderVP9* vp9videoHeader) override {
+      EXPECT_NE(vp9videoHeader->picture_id, kNoPictureId);
+      observation_complete_->Set();
+    }
+  } test;
+
+  RunBaseTest(&test, FakeNetworkPipe::Config());
+}
+
+TEST_F(VideoSendStreamTest, VP9FlexModeRefCount) {
+  class FlexibleMode : public VP9HeaderObeserver {
+    void ModifyConfigsHook(
+        VideoSendStream::Config* send_config,
+        std::vector<VideoReceiveStream::Config>* receive_configs,
+        VideoEncoderConfig* encoder_config) override {
+      vp9_settings_.flexibleMode = true;
+    }
+
+    void InspectHeader(RTPVideoHeaderVP9* vp9videoHeader) override {
+      EXPECT_TRUE(vp9videoHeader->flexible_mode);
+      if (vp9videoHeader->inter_pic_predicted) {
+        EXPECT_GT(vp9videoHeader->num_ref_pics, 0u);
+        observation_complete_->Set();
+      }
+    }
+  } test;
+
+  RunBaseTest(&test, FakeNetworkPipe::Config());
+}
+
+TEST_F(VideoSendStreamTest, VP9FlexModeRefs) {
+  class FlexibleMode : public VP9HeaderObeserver {
+    void ModifyConfigsHook(
+        VideoSendStream::Config* send_config,
+        std::vector<VideoReceiveStream::Config>* receive_configs,
+        VideoEncoderConfig* encoder_config) override {
+      vp9_settings_.flexibleMode = true;
+    }
+
+    void InspectHeader(RTPVideoHeaderVP9* vp9videoHeader) override {
+      EXPECT_TRUE(vp9videoHeader->flexible_mode);
+      if (vp9videoHeader->inter_pic_predicted) {
+        EXPECT_GT(vp9videoHeader->num_ref_pics, 0u);
+        observation_complete_->Set();
+      }
+    }
 
   } test;