Integration of VP9 packetization.

Supports running 1 spatial and 1-3 temporal layers in non-flexible mode.

BUG=webrtc:4148, webrtc:4168, chromium:500602
TBR=mflodman

Review URL: https://codereview.webrtc.org/1211353002

Cr-Commit-Position: refs/heads/master@{#9665}
diff --git a/webrtc/common_types.h b/webrtc/common_types.h
index bda9c7e..ac56eb1 100644
--- a/webrtc/common_types.h
+++ b/webrtc/common_types.h
@@ -618,7 +618,7 @@
   }
 };
 
-// VP9 specific
+// VP9 specific.
 struct VideoCodecVP9 {
   VideoCodecComplexity complexity;
   int                  resilience;
@@ -627,6 +627,8 @@
   bool                 frameDroppingOn;
   int                  keyFrameInterval;
   bool                 adaptiveQpMode;
+  unsigned char        numberOfSpatialLayers;
+  bool                 flexibleMode;
 };
 
 // H264 specific.
diff --git a/webrtc/modules/interface/module_common_types.h b/webrtc/modules/interface/module_common_types.h
index 62fe694..232e695 100644
--- a/webrtc/modules/interface/module_common_types.h
+++ b/webrtc/modules/interface/module_common_types.h
@@ -69,7 +69,62 @@
                               // in a VP8 partition. Otherwise false
 };
 
+enum TemporalStructureMode {
+  kTemporalStructureMode1,    // 1 temporal layer structure - i.e., IPPP...
+  kTemporalStructureMode2,    // 2 temporal layers 0-1-0-1...
+  kTemporalStructureMode3     // 3 temporal layers 0-2-1-2-0-2-1-2...
+};
+
 struct GofInfoVP9 {
+  void SetGofInfoVP9(TemporalStructureMode tm) {
+    switch (tm) {
+      case kTemporalStructureMode1:
+        num_frames_in_gof = 1;
+        temporal_idx[0] = 0;
+        temporal_up_switch[0] = false;
+        num_ref_pics[0] = 1;
+        pid_diff[0][0] = 1;
+        break;
+      case kTemporalStructureMode2:
+        num_frames_in_gof = 2;
+        temporal_idx[0] = 0;
+        temporal_up_switch[0] = false;
+        num_ref_pics[0] = 1;
+        pid_diff[0][0] = 2;
+
+        temporal_idx[1] = 1;
+        temporal_up_switch[1] = true;
+        num_ref_pics[1] = 1;
+        pid_diff[1][0] = 1;
+        break;
+      case kTemporalStructureMode3:
+        num_frames_in_gof = 4;
+        temporal_idx[0] = 0;
+        temporal_up_switch[0] = false;
+        num_ref_pics[0] = 1;
+        pid_diff[0][0] = 4;
+
+        temporal_idx[1] = 2;
+        temporal_up_switch[1] = true;
+        num_ref_pics[1] = 1;
+        pid_diff[1][0] = 1;
+
+        temporal_idx[2] = 1;
+        temporal_up_switch[2] = true;
+        num_ref_pics[2] = 1;
+        pid_diff[2][0] = 2;
+
+        temporal_idx[3] = 2;
+        temporal_up_switch[3] = false;
+        num_ref_pics[3] = 2;
+        pid_diff[3][0] = 1;
+        pid_diff[3][1] = 2;
+        break;
+      default:
+        assert(false);
+    }
+  }
+
   void CopyGofInfoVP9(const GofInfoVP9& src) {
     num_frames_in_gof = src.num_frames_in_gof;
     for (size_t i = 0; i < num_frames_in_gof; ++i) {
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc
index 00b2f72..64bd5aa 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc
@@ -643,8 +643,8 @@
   bool p_bit = hdr_.inter_pic_predicted;
   bool l_bit = LayerInfoPresent(hdr_);
   bool f_bit = hdr_.flexible_mode;
-  bool b_bit = hdr_.beginning_of_frame && packet_info.layer_begin;
-  bool e_bit = hdr_.end_of_frame && packet_info.layer_end;
+  bool b_bit = packet_info.layer_begin;
+  bool e_bit = packet_info.layer_end;
   bool v_bit = hdr_.ss_data_available && b_bit;
 
   rtc::BitBufferWriter writer(buffer, max_payload_length_);
@@ -720,7 +720,6 @@
   vp9->beginning_of_frame = b_bit ? true : false;
   vp9->end_of_frame = e_bit ? true : false;
   vp9->ss_data_available = v_bit ? true : false;
-  vp9->temporal_idx = 0;
   vp9->spatial_idx = 0;
 
   // Parse fields that are present.
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
index a052e3e..1f57c92 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
@@ -27,8 +27,7 @@
   EXPECT_EQ(expected.ss_data_available, actual.ss_data_available);
   EXPECT_EQ(expected.picture_id, actual.picture_id);
   EXPECT_EQ(expected.max_picture_id, actual.max_picture_id);
-  EXPECT_EQ(expected.temporal_idx == kNoTemporalIdx ? 0 : expected.temporal_idx,
-      actual.temporal_idx);
+  EXPECT_EQ(expected.temporal_idx, actual.temporal_idx);
   EXPECT_EQ(expected.spatial_idx == kNoSpatialIdx ? 0 : expected.spatial_idx,
       actual.spatial_idx);
   EXPECT_EQ(expected.gof_idx, actual.gof_idx);
@@ -128,9 +127,6 @@
   RtpPacketizerVp9Test() {}
   virtual void SetUp() {
     expected_.InitRTPVideoHeaderVP9();
-    // Always input one layer frame at a time.
-    expected_.beginning_of_frame = true;
-    expected_.end_of_frame = true;
   }
 
   rtc::scoped_ptr<uint8_t[]> packet_;
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_payload_registry.cc b/webrtc/modules/rtp_rtcp/source/rtp_payload_registry.cc
index 8e2ff17..20e650c 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_payload_registry.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_payload_registry.cc
@@ -433,6 +433,8 @@
 
     if (RtpUtility::StringCompare(payloadName, "VP8", 3)) {
       videoType = kRtpVideoVp8;
+    } else if (RtpUtility::StringCompare(payloadName, "VP9", 3)) {
+      videoType = kRtpVideoVp9;
     } else if (RtpUtility::StringCompare(payloadName, "H264", 4)) {
       videoType = kRtpVideoH264;
     } else if (RtpUtility::StringCompare(payloadName, "I420", 4)) {
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_sender_video.cc b/webrtc/modules/rtp_rtcp/source/rtp_sender_video.cc
index 88bb5bb..4c740e8 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_sender_video.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_sender_video.cc
@@ -21,6 +21,7 @@
 #include "webrtc/modules/rtp_rtcp/source/producer_fec.h"
 #include "webrtc/modules/rtp_rtcp/source/rtp_format_video_generic.h"
 #include "webrtc/modules/rtp_rtcp/source/rtp_format_vp8.h"
+#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h"
 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
 #include "webrtc/system_wrappers/interface/logging.h"
 #include "webrtc/system_wrappers/interface/trace_event.h"
@@ -76,6 +77,8 @@
   RtpVideoCodecTypes videoType = kRtpVideoGeneric;
   if (RtpUtility::StringCompare(payloadName, "VP8", 3)) {
     videoType = kRtpVideoVp8;
+  } else if (RtpUtility::StringCompare(payloadName, "VP9", 3)) {
+    videoType = kRtpVideoVp9;
   } else if (RtpUtility::StringCompare(payloadName, "H264", 4)) {
     videoType = kRtpVideoH264;
   } else if (RtpUtility::StringCompare(payloadName, "I420", 4)) {
diff --git a/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h b/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h
index 6acd2d4..411fbfd 100644
--- a/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h
+++ b/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h
@@ -43,16 +43,31 @@
 };
 
 struct CodecSpecificInfoVP9 {
-  bool hasReceivedSLI;
-  uint8_t pictureIdSLI;
-  bool hasReceivedRPSI;
-  uint64_t pictureIdRPSI;
-  int16_t pictureId;  // Negative value to skip pictureId.
-  bool nonReference;
-  uint8_t temporalIdx;
-  bool layerSync;
-  int tl0PicIdx;  // Negative value to skip tl0PicIdx.
-  int8_t keyIdx;  // Negative value to skip keyIdx.
+  bool has_received_sli;
+  uint8_t picture_id_sli;
+  bool has_received_rpsi;
+  uint64_t picture_id_rpsi;
+  int16_t picture_id;  // Negative value to skip pictureId.
+
+  bool inter_pic_predicted;  // This layer frame is dependent on previously
+                             // coded frame(s).
+  bool flexible_mode;
+  bool ss_data_available;
+
+  int tl0_pic_idx;  // Negative value to skip tl0PicIdx.
+  uint8_t temporal_idx;
+  uint8_t spatial_idx;
+  bool temporal_up_switch;
+  bool inter_layer_predicted;  // Frame is dependent on directly lower spatial
+                               // layer frame.
+  uint8_t gof_idx;
+
+  // SS data.
+  size_t num_spatial_layers;
+  bool spatial_layer_resolution_present;
+  uint16_t width[kMaxVp9NumberOfSpatialLayers];
+  uint16_t height[kMaxVp9NumberOfSpatialLayers];
+  GofInfoVP9 gof;
 };
 
 struct CodecSpecificInfoGeneric {
diff --git a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc
index cd91fa3..04bbd16 100644
--- a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -57,6 +57,12 @@
   return new VP9EncoderImpl();
 }
 
+void VP9EncoderImpl::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
+                                                      void* user_data) {
+  VP9EncoderImpl* enc = (VP9EncoderImpl*)(user_data);
+  enc->GetEncodedLayerFrame(pkt);
+}
+
 VP9EncoderImpl::VP9EncoderImpl()
     : encoded_image_(),
       encoded_complete_callback_(NULL),
@@ -67,7 +73,12 @@
       rc_max_intra_target_(0),
       encoder_(NULL),
       config_(NULL),
-      raw_(NULL) {
+      raw_(NULL),
+      input_image_(NULL),
+      tl0_pic_idx_(0),
+      gof_idx_(0),
+      num_temporal_layers_(0),
+      num_spatial_layers_(0) {
   memset(&codec_, 0, sizeof(codec_));
   uint32_t seed = static_cast<uint32_t>(TickTime::MillisecondTimestamp());
   srand(seed);
@@ -101,6 +112,55 @@
   return WEBRTC_VIDEO_CODEC_OK;
 }
 
+bool VP9EncoderImpl::SetSvcRates() {
+  float rate_ratio[VPX_MAX_LAYERS] = {0};
+  float total = 0;
+  uint8_t i = 0;
+
+  for (i = 0; i < num_spatial_layers_; ++i) {
+    if (svc_internal_.svc_params.scaling_factor_num[i] <= 0 ||
+        svc_internal_.svc_params.scaling_factor_den[i] <= 0) {
+      return false;
+    }
+    rate_ratio[i] = static_cast<float>(
+        svc_internal_.svc_params.scaling_factor_num[i]) /
+        svc_internal_.svc_params.scaling_factor_den[i];
+    total += rate_ratio[i];
+  }
+
+  for (i = 0; i < num_spatial_layers_; ++i) {
+    config_->ss_target_bitrate[i] = static_cast<unsigned int>(
+        config_->rc_target_bitrate * rate_ratio[i] / total);
+    if (num_temporal_layers_ == 1) {
+      config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i];
+    } else if (num_temporal_layers_ == 2) {
+      config_->layer_target_bitrate[i * num_temporal_layers_] =
+          config_->ss_target_bitrate[i] * 2 / 3;
+      config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
+          config_->ss_target_bitrate[i];
+    } else if (num_temporal_layers_ == 3) {
+      config_->layer_target_bitrate[i * num_temporal_layers_] =
+          config_->ss_target_bitrate[i] / 2;
+      config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
+          config_->layer_target_bitrate[i * num_temporal_layers_] +
+          (config_->ss_target_bitrate[i] / 4);
+      config_->layer_target_bitrate[i * num_temporal_layers_ + 2] =
+          config_->ss_target_bitrate[i];
+    } else {
+      return false;
+    }
+  }
+
+  // For now, temporal layers only supported when having one spatial layer.
+  if (num_spatial_layers_ == 1) {
+    for (i = 0; i < num_temporal_layers_; ++i) {
+      config_->ts_target_bitrate[i] = config_->layer_target_bitrate[i];
+    }
+  }
+
+  return true;
+}
+
 int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit,
                              uint32_t new_framerate) {
   if (!inited_) {
@@ -118,6 +178,11 @@
   }
   config_->rc_target_bitrate = new_bitrate_kbit;
   codec_.maxFramerate = new_framerate;
+
+  if (!SetSvcRates()) {
+    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+  }
+
   // Update encoder context
   if (vpx_codec_enc_config_set(encoder_, config_)) {
     return WEBRTC_VIDEO_CODEC_ERROR;
@@ -144,6 +209,13 @@
   if (number_of_cores < 1) {
     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
   }
+  if (inst->codecSpecific.VP9.numberOfTemporalLayers > 3) {
+    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+  }
+  // For now, only support one spatial layer.
+  if (inst->codecSpecific.VP9.numberOfSpatialLayers != 1) {
+    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+  }
   int retVal = Release();
   if (retVal < 0) {
     return retVal;
@@ -158,6 +230,12 @@
   if (&codec_ != inst) {
     codec_ = *inst;
   }
+
+  num_spatial_layers_ = inst->codecSpecific.VP9.numberOfSpatialLayers;
+  num_temporal_layers_ = inst->codecSpecific.VP9.numberOfTemporalLayers;
+  if (num_temporal_layers_ == 0)
+    num_temporal_layers_ = 1;
+
   // Random start 16 bits is enough.
   picture_id_ = static_cast<uint16_t>(rand()) & 0x7FFF;
   // Allocate memory for encoded image
@@ -209,13 +287,57 @@
   config_->g_threads = NumberOfThreads(config_->g_w,
                                        config_->g_h,
                                        number_of_cores);
+
   cpu_speed_ = GetCpuSpeed(config_->g_w, config_->g_h);
+
+  // TODO(asapersson): Check configuration of temporal switch up and increase
+  // pattern length.
+  if (num_temporal_layers_ == 1) {
+    gof_.SetGofInfoVP9(kTemporalStructureMode1);
+    config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;
+    config_->ts_number_layers = 1;
+    config_->ts_rate_decimator[0] = 1;
+    config_->ts_periodicity = 1;
+    config_->ts_layer_id[0] = 0;
+  } else if (num_temporal_layers_ == 2) {
+    gof_.SetGofInfoVP9(kTemporalStructureMode2);
+    config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101;
+    config_->ts_number_layers = 2;
+    config_->ts_rate_decimator[0] = 2;
+    config_->ts_rate_decimator[1] = 1;
+    config_->ts_periodicity = 2;
+    config_->ts_layer_id[0] = 0;
+    config_->ts_layer_id[1] = 1;
+  } else if (num_temporal_layers_ == 3) {
+    gof_.SetGofInfoVP9(kTemporalStructureMode3);
+    config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212;
+    config_->ts_number_layers = 3;
+    config_->ts_rate_decimator[0] = 4;
+    config_->ts_rate_decimator[1] = 2;
+    config_->ts_rate_decimator[2] = 1;
+    config_->ts_periodicity = 4;
+    config_->ts_layer_id[0] = 0;
+    config_->ts_layer_id[1] = 2;
+    config_->ts_layer_id[2] = 1;
+    config_->ts_layer_id[3] = 2;
+  } else {
+    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+  }
+
+  tl0_pic_idx_ = static_cast<uint8_t>(rand());
+
   return InitAndSetControlSettings(inst);
 }
 
 int VP9EncoderImpl::NumberOfThreads(int width,
                                     int height,
                                     int number_of_cores) {
+  // For the current libvpx library, only 1 thread is supported when SVC is
+  // turned on.
+  if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) {
+    return 1;
+  }
+
   // Keep the number of encoder threads equal to the possible number of column
   // tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS.
   if (width * height >= 1280 * 720 && number_of_cores > 4) {
@@ -229,6 +351,27 @@
 }
 
 int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
+
+  config_->ss_number_layers = num_spatial_layers_;
+
+  if (num_spatial_layers_ > 1) {
+    config_->rc_min_quantizer = 0;
+    config_->rc_max_quantizer = 63;
+  }
+  int scaling_factor_num = 256;
+  for (int i = num_spatial_layers_ - 1; i >= 0; --i) {
+    svc_internal_.svc_params.max_quantizers[i] = config_->rc_max_quantizer;
+    svc_internal_.svc_params.min_quantizers[i] = config_->rc_min_quantizer;
+    // 1:2 scaling in each dimension.
+    svc_internal_.svc_params.scaling_factor_num[i] = scaling_factor_num;
+    svc_internal_.svc_params.scaling_factor_den[i] = 256;
+    scaling_factor_num /= 2;
+  }
+
+  if (!SetSvcRates()) {
+    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+  }
+
   if (vpx_codec_enc_init(encoder_, vpx_codec_vp9_cx(), config_, 0)) {
     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
   }
@@ -237,6 +380,19 @@
                     rc_max_intra_target_);
   vpx_codec_control(encoder_, VP9E_SET_AQ_MODE,
                     inst->codecSpecific.VP9.adaptiveQpMode ? 3 : 0);
+
+  vpx_codec_control(
+      encoder_, VP9E_SET_SVC,
+      (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) ? 1 : 0);
+  if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) {
+    vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS,
+                      &svc_internal_.svc_params);
+  }
+  // Register callback for getting each spatial layer.
+  vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = {
+      VP9EncoderImpl::EncoderOutputCodedPacketCallback, (void*)(this)};
+  vpx_codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK, (void*)(&cbp));
+
   // Control function to set the number of column tiles in encoding a frame, in
   // log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns.
   // The number tile columns will be capped by the encoder based on image size
@@ -286,6 +442,13 @@
   }
   DCHECK_EQ(input_image.width(), static_cast<int>(raw_->d_w));
   DCHECK_EQ(input_image.height(), static_cast<int>(raw_->d_h));
+
+  // Set input image for use in the callback.
+  // This was necessary since you need some information from input_image.
+  // You can save only the necessary information (such as timestamp) instead of
+  // doing this.
+  input_image_ = &input_image;
+
   // Image in vpx_image_t format.
   // Input image is const. VPX's raw image is not defined as const.
   raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(input_image.buffer(kYPlane));
@@ -308,7 +471,8 @@
     return WEBRTC_VIDEO_CODEC_ERROR;
   }
   timestamp_ += duration;
-  return GetEncodedPartitions(input_image);
+
+  return WEBRTC_VIDEO_CODEC_OK;
 }
 
 void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
@@ -317,20 +481,83 @@
   assert(codec_specific != NULL);
   codec_specific->codecType = kVideoCodecVP9;
   CodecSpecificInfoVP9 *vp9_info = &(codec_specific->codecSpecific.VP9);
-  vp9_info->pictureId = picture_id_;
-  vp9_info->keyIdx = kNoKeyIdx;
-  vp9_info->nonReference = (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) != 0;
-  // TODO(marpan): Temporal layers are supported in the current VP9 version,
-  // but for now use 1 temporal layer encoding. Will update this when temporal
-  // layer support for VP9 is added in webrtc.
-  vp9_info->temporalIdx = kNoTemporalIdx;
-  vp9_info->layerSync = false;
-  vp9_info->tl0PicIdx = kNoTl0PicIdx;
-  picture_id_ = (picture_id_ + 1) & 0x7FFF;
+  // TODO(asapersson): Set correct values.
+  vp9_info->inter_pic_predicted =
+      (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? false : true;
+  vp9_info->flexible_mode = codec_.codecSpecific.VP9.flexibleMode;
+  vp9_info->ss_data_available =
+      (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
+  if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
+    gof_idx_ = 0;
+  }
+
+  vpx_svc_layer_id_t layer_id = {0};
+  vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
+
+  assert(num_temporal_layers_ > 0);
+  assert(num_spatial_layers_ > 0);
+  if (num_temporal_layers_ == 1) {
+    assert(layer_id.temporal_layer_id == 0);
+    vp9_info->temporal_idx = kNoTemporalIdx;
+  } else {
+    vp9_info->temporal_idx = layer_id.temporal_layer_id;
+  }
+  if (num_spatial_layers_ == 1) {
+    assert(layer_id.spatial_layer_id == 0);
+    vp9_info->spatial_idx = kNoSpatialIdx;
+  } else {
+    vp9_info->spatial_idx = layer_id.spatial_layer_id;
+  }
+  if (layer_id.spatial_layer_id != 0) {
+    vp9_info->ss_data_available = false;
+  }
+
+  if (vp9_info->flexible_mode) {
+    vp9_info->gof_idx = kNoGofIdx;
+  } else {
+    vp9_info->gof_idx =
+        static_cast<uint8_t>(gof_idx_++ % gof_.num_frames_in_gof);
+  }
+
+  // TODO(asapersson): this info has to be obtained from the encoder.
+  vp9_info->temporal_up_switch = true;
+
+  if (layer_id.spatial_layer_id == 0) {
+    picture_id_ = (picture_id_ + 1) & 0x7FFF;
+    // TODO(asapersson): this info has to be obtained from the encoder.
+    vp9_info->inter_layer_predicted = false;
+  } else {
+    // TODO(asapersson): this info has to be obtained from the encoder.
+    vp9_info->inter_layer_predicted = true;
+  }
+
+  vp9_info->picture_id = picture_id_;
+
+  if (!vp9_info->flexible_mode) {
+    if (layer_id.temporal_layer_id == 0 && layer_id.spatial_layer_id == 0) {
+      tl0_pic_idx_++;
+    }
+    vp9_info->tl0_pic_idx = tl0_pic_idx_;
+  }
+
+  if (vp9_info->ss_data_available) {
+    vp9_info->num_spatial_layers = num_spatial_layers_;
+    vp9_info->spatial_layer_resolution_present = true;
+    for (size_t i = 0; i < vp9_info->num_spatial_layers; ++i) {
+      vp9_info->width[i] = codec_.width *
+                           svc_internal_.svc_params.scaling_factor_num[i] /
+                           svc_internal_.svc_params.scaling_factor_den[i];
+      vp9_info->height[i] = codec_.height *
+                            svc_internal_.svc_params.scaling_factor_num[i] /
+                            svc_internal_.svc_params.scaling_factor_den[i];
+    }
+    if (!vp9_info->flexible_mode) {
+      vp9_info->gof.CopyGofInfoVP9(gof_);
+    }
+  }
 }
 
-int VP9EncoderImpl::GetEncodedPartitions(const VideoFrame& input_image) {
-  vpx_codec_iter_t iter = NULL;
+int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
   encoded_image_._length = 0;
   encoded_image_._frameType = kDeltaFrame;
   RTPFragmentationHeader frag_info;
@@ -339,44 +566,33 @@
   frag_info.VerifyAndAllocateFragmentationHeader(1);
   int part_idx = 0;
   CodecSpecificInfo codec_specific;
-  const vpx_codec_cx_pkt_t *pkt = NULL;
-  while ((pkt = vpx_codec_get_cx_data(encoder_, &iter)) != NULL) {
-    switch (pkt->kind) {
-      case VPX_CODEC_CX_FRAME_PKT: {
-        memcpy(&encoded_image_._buffer[encoded_image_._length],
-               pkt->data.frame.buf,
-               pkt->data.frame.sz);
-        frag_info.fragmentationOffset[part_idx] = encoded_image_._length;
-        frag_info.fragmentationLength[part_idx] =
-            static_cast<uint32_t>(pkt->data.frame.sz);
-        frag_info.fragmentationPlType[part_idx] = 0;
-        frag_info.fragmentationTimeDiff[part_idx] = 0;
-        encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz);
-        assert(encoded_image_._length <= encoded_image_._size);
-        break;
-      }
-      default: {
-        break;
-      }
-    }
-    // End of frame.
-    if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) {
-      // Check if encoded frame is a key frame.
-      if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
-        encoded_image_._frameType = kKeyFrame;
-      }
-      PopulateCodecSpecific(&codec_specific, *pkt, input_image.timestamp());
-      break;
-    }
+
+  assert(pkt->kind == VPX_CODEC_CX_FRAME_PKT);
+  memcpy(&encoded_image_._buffer[encoded_image_._length], pkt->data.frame.buf,
+         pkt->data.frame.sz);
+  frag_info.fragmentationOffset[part_idx] = encoded_image_._length;
+  frag_info.fragmentationLength[part_idx] =
+      static_cast<uint32_t>(pkt->data.frame.sz);
+  frag_info.fragmentationPlType[part_idx] = 0;
+  frag_info.fragmentationTimeDiff[part_idx] = 0;
+  encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz);
+  assert(encoded_image_._length <= encoded_image_._size);
+
+  // End of frame.
+  // Check if encoded frame is a key frame.
+  if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
+    encoded_image_._frameType = kKeyFrame;
   }
+  PopulateCodecSpecific(&codec_specific, *pkt, input_image_->timestamp());
+
   if (encoded_image_._length > 0) {
     TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length);
-    encoded_image_._timeStamp = input_image.timestamp();
-    encoded_image_.capture_time_ms_ = input_image.render_time_ms();
+    encoded_image_._timeStamp = input_image_->timestamp();
+    encoded_image_.capture_time_ms_ = input_image_->render_time_ms();
     encoded_image_._encodedHeight = raw_->d_h;
     encoded_image_._encodedWidth = raw_->d_w;
     encoded_complete_callback_->Encoded(encoded_image_, &codec_specific,
-                                      &frag_info);
+                                        &frag_info);
   }
   return WEBRTC_VIDEO_CODEC_OK;
 }
diff --git a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.h b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.h
index 5775952..c164a63 100644
--- a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.h
@@ -15,6 +15,7 @@
 #include "webrtc/modules/video_coding/codecs/vp9/include/vp9.h"
 #include "webrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h"
 
+#include "vpx/svc_context.h"
 #include "vpx/vpx_decoder.h"
 #include "vpx/vpx_encoder.h"
 
@@ -55,7 +56,13 @@
                              const vpx_codec_cx_pkt& pkt,
                              uint32_t timestamp);
 
-  int GetEncodedPartitions(const VideoFrame& input_image);
+  bool SetSvcRates();
+
+  virtual int GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt);
+
+  // Callback function for outputting packets per spatial layer.
+  static void EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
+                                               void* user_data);
 
   // Determine maximum target for Intra frames
   //
@@ -76,6 +83,14 @@
   vpx_codec_ctx_t* encoder_;
   vpx_codec_enc_cfg_t* config_;
   vpx_image_t* raw_;
+  SvcInternal_t svc_internal_;
+  const VideoFrame* input_image_;
+  GofInfoVP9 gof_;       // Contains each frame's temporal information for
+                         // non-flexible mode.
+  uint8_t tl0_pic_idx_;  // Only used in non-flexible mode.
+  size_t gof_idx_;       // Only used in non-flexible mode.
+  uint8_t num_temporal_layers_;
+  uint8_t num_spatial_layers_;
 };
 
 
diff --git a/webrtc/modules/video_coding/main/source/codec_database.cc b/webrtc/modules/video_coding/main/source/codec_database.cc
index 5f89936..2e2d91e 100644
--- a/webrtc/modules/video_coding/main/source/codec_database.cc
+++ b/webrtc/modules/video_coding/main/source/codec_database.cc
@@ -61,7 +61,8 @@
   vp9_settings.frameDroppingOn = true;
   vp9_settings.keyFrameInterval = 3000;
   vp9_settings.adaptiveQpMode = true;
-
+  vp9_settings.numberOfSpatialLayers = 1;
+  vp9_settings.flexibleMode = false;
   return vp9_settings;
 }
 
diff --git a/webrtc/modules/video_coding/main/source/encoded_frame.cc b/webrtc/modules/video_coding/main/source/encoded_frame.cc
index 2830399..0fa4425 100644
--- a/webrtc/modules/video_coding/main/source/encoded_frame.cc
+++ b/webrtc/modules/video_coding/main/source/encoded_frame.cc
@@ -132,6 +132,67 @@
         }
         break;
       }
+      case kRtpVideoVp9: {
+        if (_codecSpecificInfo.codecType != kVideoCodecVP9) {
+          // This is the first packet for this frame.
+          _codecSpecificInfo.codecSpecific.VP9.picture_id = -1;
+          _codecSpecificInfo.codecSpecific.VP9.temporal_idx = 0;
+          _codecSpecificInfo.codecSpecific.VP9.spatial_idx = 0;
+          _codecSpecificInfo.codecSpecific.VP9.gof_idx = 0;
+          _codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted = false;
+          _codecSpecificInfo.codecSpecific.VP9.tl0_pic_idx = -1;
+          _codecSpecificInfo.codecType = kVideoCodecVP9;
+        }
+        _codecSpecificInfo.codecSpecific.VP9.inter_pic_predicted =
+            header->codecHeader.VP9.inter_pic_predicted;
+        _codecSpecificInfo.codecSpecific.VP9.flexible_mode =
+            header->codecHeader.VP9.flexible_mode;
+        _codecSpecificInfo.codecSpecific.VP9.ss_data_available =
+            header->codecHeader.VP9.ss_data_available;
+        if (header->codecHeader.VP9.picture_id != kNoPictureId) {
+          _codecSpecificInfo.codecSpecific.VP9.picture_id =
+              header->codecHeader.VP9.picture_id;
+        }
+        if (header->codecHeader.VP9.tl0_pic_idx != kNoTl0PicIdx) {
+          _codecSpecificInfo.codecSpecific.VP9.tl0_pic_idx =
+              header->codecHeader.VP9.tl0_pic_idx;
+        }
+        if (header->codecHeader.VP9.temporal_idx != kNoTemporalIdx) {
+          _codecSpecificInfo.codecSpecific.VP9.temporal_idx =
+              header->codecHeader.VP9.temporal_idx;
+          _codecSpecificInfo.codecSpecific.VP9.temporal_up_switch =
+              header->codecHeader.VP9.temporal_up_switch;
+        }
+        if (header->codecHeader.VP9.spatial_idx != kNoSpatialIdx) {
+          _codecSpecificInfo.codecSpecific.VP9.spatial_idx =
+              header->codecHeader.VP9.spatial_idx;
+          _codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted =
+              header->codecHeader.VP9.inter_layer_predicted;
+        }
+        if (header->codecHeader.VP9.gof_idx != kNoGofIdx) {
+          _codecSpecificInfo.codecSpecific.VP9.gof_idx =
+              header->codecHeader.VP9.gof_idx;
+        }
+        if (header->codecHeader.VP9.ss_data_available) {
+          _codecSpecificInfo.codecSpecific.VP9.num_spatial_layers =
+              header->codecHeader.VP9.num_spatial_layers;
+          _codecSpecificInfo.codecSpecific.VP9
+              .spatial_layer_resolution_present =
+              header->codecHeader.VP9.spatial_layer_resolution_present;
+          if (header->codecHeader.VP9.spatial_layer_resolution_present) {
+            for (size_t i = 0; i < header->codecHeader.VP9.num_spatial_layers;
+                 ++i) {
+              _codecSpecificInfo.codecSpecific.VP9.width[i] =
+                  header->codecHeader.VP9.width[i];
+              _codecSpecificInfo.codecSpecific.VP9.height[i] =
+                  header->codecHeader.VP9.height[i];
+            }
+          }
+          _codecSpecificInfo.codecSpecific.VP9.gof.CopyGofInfoVP9(
+              header->codecHeader.VP9.gof);
+        }
+        break;
+      }
       case kRtpVideoH264: {
         _codecSpecificInfo.codecType = kVideoCodecH264;
         break;
diff --git a/webrtc/modules/video_coding/main/source/generic_encoder.cc b/webrtc/modules/video_coding/main/source/generic_encoder.cc
index 63e3976..c0925b9 100644
--- a/webrtc/modules/video_coding/main/source/generic_encoder.cc
+++ b/webrtc/modules/video_coding/main/source/generic_encoder.cc
@@ -36,6 +36,41 @@
       rtp->simulcastIdx = info->codecSpecific.VP8.simulcastIdx;
       return;
     }
+    case kVideoCodecVP9: {
+      rtp->codec = kRtpVideoVp9;
+      rtp->codecHeader.VP9.InitRTPVideoHeaderVP9();
+      rtp->codecHeader.VP9.inter_pic_predicted =
+          info->codecSpecific.VP9.inter_pic_predicted;
+      rtp->codecHeader.VP9.flexible_mode =
+          info->codecSpecific.VP9.flexible_mode;
+      rtp->codecHeader.VP9.ss_data_available =
+          info->codecSpecific.VP9.ss_data_available;
+      rtp->codecHeader.VP9.picture_id = info->codecSpecific.VP9.picture_id;
+      rtp->codecHeader.VP9.tl0_pic_idx = info->codecSpecific.VP9.tl0_pic_idx;
+      rtp->codecHeader.VP9.temporal_idx = info->codecSpecific.VP9.temporal_idx;
+      rtp->codecHeader.VP9.spatial_idx = info->codecSpecific.VP9.spatial_idx;
+      rtp->codecHeader.VP9.temporal_up_switch =
+          info->codecSpecific.VP9.temporal_up_switch;
+      rtp->codecHeader.VP9.inter_layer_predicted =
+          info->codecSpecific.VP9.inter_layer_predicted;
+      rtp->codecHeader.VP9.gof_idx = info->codecSpecific.VP9.gof_idx;
+
+      if (info->codecSpecific.VP9.ss_data_available) {
+        rtp->codecHeader.VP9.num_spatial_layers =
+            info->codecSpecific.VP9.num_spatial_layers;
+        rtp->codecHeader.VP9.spatial_layer_resolution_present =
+            info->codecSpecific.VP9.spatial_layer_resolution_present;
+        if (info->codecSpecific.VP9.spatial_layer_resolution_present) {
+          for (size_t i = 0; i < info->codecSpecific.VP9.num_spatial_layers;
+               ++i) {
+            rtp->codecHeader.VP9.width[i] = info->codecSpecific.VP9.width[i];
+            rtp->codecHeader.VP9.height[i] = info->codecSpecific.VP9.height[i];
+          }
+        }
+        rtp->codecHeader.VP9.gof.CopyGofInfoVP9(info->codecSpecific.VP9.gof);
+      }
+      return;
+    }
     case kVideoCodecH264:
       rtp->codec = kRtpVideoH264;
       return;
diff --git a/webrtc/modules/video_coding/main/source/jitter_buffer.cc b/webrtc/modules/video_coding/main/source/jitter_buffer.cc
index 9156cc1..49c2325 100644
--- a/webrtc/modules/video_coding/main/source/jitter_buffer.cc
+++ b/webrtc/modules/video_coding/main/source/jitter_buffer.cc
@@ -125,6 +125,8 @@
       incomplete_frames_(),
       last_decoded_state_(),
       first_packet_since_reset_(true),
+      last_gof_timestamp_(0),
+      last_gof_valid_(false),
       stats_callback_(NULL),
       incoming_frame_rate_(0),
       incoming_frame_count_(0),
@@ -220,6 +222,7 @@
   first_packet_since_reset_ = true;
   rtt_ms_ = kDefaultRtt;
   last_decoded_state_.Reset();
+  last_gof_valid_ = false;
 }
 
 void VCMJitterBuffer::Stop() {
@@ -227,6 +230,8 @@
   UpdateHistograms();
   running_ = false;
   last_decoded_state_.Reset();
+  last_gof_valid_ = false;
+
   // Make sure all frames are free and reset.
   for (FrameList::iterator it = decodable_frames_.begin();
        it != decodable_frames_.end(); ++it) {
@@ -257,6 +262,7 @@
   decodable_frames_.Reset(&free_frames_);
   incomplete_frames_.Reset(&free_frames_);
   last_decoded_state_.Reset();  // TODO(mikhal): sync reset.
+  last_gof_valid_ = false;
   num_consecutive_old_packets_ = 0;
   // Also reset the jitter and delay estimates
   jitter_estimate_.Reset();
@@ -586,6 +592,38 @@
     return kOldPacket;
   }
 
+  if (packet.codec == kVideoCodecVP9) {
+    // TODO(asapersson): Move this code to appropriate place.
+    // TODO(asapersson): Handle out of order GOF.
+    if (packet.codecSpecificHeader.codecHeader.VP9.flexible_mode) {
+      // TODO(asapersson): Add support for flexible mode.
+      return kGeneralError;
+    }
+    if (packet.codecSpecificHeader.codecHeader.VP9.ss_data_available) {
+      if (!last_gof_valid_ ||
+          IsNewerTimestamp(packet.timestamp, last_gof_timestamp_)) {
+        last_gof_.CopyGofInfoVP9(
+            packet.codecSpecificHeader.codecHeader.VP9.gof);
+        last_gof_timestamp_ = packet.timestamp;
+        last_gof_valid_ = true;
+      }
+    }
+    if (last_gof_valid_ &&
+        !packet.codecSpecificHeader.codecHeader.VP9.flexible_mode) {
+      uint8_t gof_idx = packet.codecSpecificHeader.codecHeader.VP9.gof_idx;
+      if (gof_idx != kNoGofIdx) {
+        if (gof_idx >= last_gof_.num_frames_in_gof) {
+          LOG(LS_WARNING) << "Incorrect gof_idx: " << gof_idx;
+          return kGeneralError;
+        }
+        RTPVideoTypeHeader* hdr = const_cast<RTPVideoTypeHeader*>(
+            &packet.codecSpecificHeader.codecHeader);
+        hdr->VP9.temporal_idx = last_gof_.temporal_idx[gof_idx];
+        hdr->VP9.temporal_up_switch = last_gof_.temporal_up_switch[gof_idx];
+      }
+    }
+  }
+
   num_consecutive_old_packets_ = 0;
 
   VCMFrameBuffer* frame;
diff --git a/webrtc/modules/video_coding/main/source/jitter_buffer.h b/webrtc/modules/video_coding/main/source/jitter_buffer.h
index 455ac26..3961dff 100644
--- a/webrtc/modules/video_coding/main/source/jitter_buffer.h
+++ b/webrtc/modules/video_coding/main/source/jitter_buffer.h
@@ -307,6 +307,10 @@
   FrameList incomplete_frames_ GUARDED_BY(crit_sect_);
   VCMDecodingState last_decoded_state_ GUARDED_BY(crit_sect_);
   bool first_packet_since_reset_;
+  // Contains last received frame's temporal information for non-flexible mode.
+  GofInfoVP9 last_gof_;
+  uint32_t last_gof_timestamp_;
+  bool last_gof_valid_;
 
   // Statistics.
   VCMReceiveStatisticsCallback* stats_callback_ GUARDED_BY(crit_sect_);
diff --git a/webrtc/modules/video_coding/main/source/session_info.cc b/webrtc/modules/video_coding/main/source/session_info.cc
index 49839e5..bf6bcb3 100644
--- a/webrtc/modules/video_coding/main/source/session_info.cc
+++ b/webrtc/modules/video_coding/main/source/session_info.cc
@@ -59,31 +59,52 @@
 }
 
 int VCMSessionInfo::PictureId() const {
-  if (packets_.empty() ||
-      packets_.front().codecSpecificHeader.codec != kRtpVideoVp8)
+  if (packets_.empty())
     return kNoPictureId;
-  return packets_.front().codecSpecificHeader.codecHeader.VP8.pictureId;
+  if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp8) {
+    return packets_.front().codecSpecificHeader.codecHeader.VP8.pictureId;
+  } else if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp9) {
+    return packets_.front().codecSpecificHeader.codecHeader.VP9.picture_id;
+  } else {
+    return kNoPictureId;
+  }
 }
 
 int VCMSessionInfo::TemporalId() const {
-  if (packets_.empty() ||
-      packets_.front().codecSpecificHeader.codec != kRtpVideoVp8)
+  if (packets_.empty())
     return kNoTemporalIdx;
-  return packets_.front().codecSpecificHeader.codecHeader.VP8.temporalIdx;
+  if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp8) {
+    return packets_.front().codecSpecificHeader.codecHeader.VP8.temporalIdx;
+  } else if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp9) {
+    return packets_.front().codecSpecificHeader.codecHeader.VP9.temporal_idx;
+  } else {
+    return kNoTemporalIdx;
+  }
 }
 
 bool VCMSessionInfo::LayerSync() const {
-  if (packets_.empty() ||
-        packets_.front().codecSpecificHeader.codec != kRtpVideoVp8)
+  if (packets_.empty())
     return false;
-  return packets_.front().codecSpecificHeader.codecHeader.VP8.layerSync;
+  if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp8) {
+    return packets_.front().codecSpecificHeader.codecHeader.VP8.layerSync;
+  } else if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp9) {
+    return
+        packets_.front().codecSpecificHeader.codecHeader.VP9.temporal_up_switch;
+  } else {
+    return false;
+  }
 }
 
 int VCMSessionInfo::Tl0PicId() const {
-  if (packets_.empty() ||
-      packets_.front().codecSpecificHeader.codec != kRtpVideoVp8)
+  if (packets_.empty())
     return kNoTl0PicIdx;
-  return packets_.front().codecSpecificHeader.codecHeader.VP8.tl0PicIdx;
+  if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp8) {
+    return packets_.front().codecSpecificHeader.codecHeader.VP8.tl0PicIdx;
+  } else if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp9) {
+    return packets_.front().codecSpecificHeader.codecHeader.VP9.tl0_pic_idx;
+  } else {
+    return kNoTl0PicIdx;
+  }
 }
 
 bool VCMSessionInfo::NonReference() const {