Integration of VP9 packetization.
Supports running 1 spatial and 1-3 temporal layers in non-flexible mode.
BUG=webrtc:4148, webrtc:4168, chromium:500602
TBR=mflodman
Review URL: https://codereview.webrtc.org/1211353002
Cr-Commit-Position: refs/heads/master@{#9665}
diff --git a/webrtc/common_types.h b/webrtc/common_types.h
index bda9c7e..ac56eb1 100644
--- a/webrtc/common_types.h
+++ b/webrtc/common_types.h
@@ -618,7 +618,7 @@
}
};
-// VP9 specific
+// VP9 specific.
struct VideoCodecVP9 {
VideoCodecComplexity complexity;
int resilience;
@@ -627,6 +627,8 @@
bool frameDroppingOn;
int keyFrameInterval;
bool adaptiveQpMode;
+ unsigned char numberOfSpatialLayers;
+ bool flexibleMode;
};
// H264 specific.
diff --git a/webrtc/modules/interface/module_common_types.h b/webrtc/modules/interface/module_common_types.h
index 62fe694..232e695 100644
--- a/webrtc/modules/interface/module_common_types.h
+++ b/webrtc/modules/interface/module_common_types.h
@@ -69,7 +69,62 @@
// in a VP8 partition. Otherwise false
};
+enum TemporalStructureMode {
+ kTemporalStructureMode1, // 1 temporal layer structure - i.e., IPPP...
+ kTemporalStructureMode2, // 2 temporal layers 0-1-0-1...
+ kTemporalStructureMode3 // 3 temporal layers 0-2-1-2-0-2-1-2...
+};
+
struct GofInfoVP9 {
+ void SetGofInfoVP9(TemporalStructureMode tm) {
+ switch (tm) {
+ case kTemporalStructureMode1:
+ num_frames_in_gof = 1;
+ temporal_idx[0] = 0;
+ temporal_up_switch[0] = false;
+ num_ref_pics[0] = 1;
+ pid_diff[0][0] = 1;
+ break;
+ case kTemporalStructureMode2:
+ num_frames_in_gof = 2;
+ temporal_idx[0] = 0;
+ temporal_up_switch[0] = false;
+ num_ref_pics[0] = 1;
+ pid_diff[0][0] = 2;
+
+ temporal_idx[1] = 1;
+ temporal_up_switch[1] = true;
+ num_ref_pics[1] = 1;
+ pid_diff[1][0] = 1;
+ break;
+ case kTemporalStructureMode3:
+ num_frames_in_gof = 4;
+ temporal_idx[0] = 0;
+ temporal_up_switch[0] = false;
+ num_ref_pics[0] = 1;
+ pid_diff[0][0] = 4;
+
+ temporal_idx[1] = 2;
+ temporal_up_switch[1] = true;
+ num_ref_pics[1] = 1;
+ pid_diff[1][0] = 1;
+
+ temporal_idx[2] = 1;
+ temporal_up_switch[2] = true;
+ num_ref_pics[2] = 1;
+ pid_diff[2][0] = 2;
+
+ temporal_idx[3] = 2;
+ temporal_up_switch[3] = false;
+ num_ref_pics[3] = 2;
+ pid_diff[3][0] = 1;
+ pid_diff[3][1] = 2;
+ break;
+ default:
+ assert(false);
+ }
+ }
+
void CopyGofInfoVP9(const GofInfoVP9& src) {
num_frames_in_gof = src.num_frames_in_gof;
for (size_t i = 0; i < num_frames_in_gof; ++i) {
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc
index 00b2f72..64bd5aa 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc
@@ -643,8 +643,8 @@
bool p_bit = hdr_.inter_pic_predicted;
bool l_bit = LayerInfoPresent(hdr_);
bool f_bit = hdr_.flexible_mode;
- bool b_bit = hdr_.beginning_of_frame && packet_info.layer_begin;
- bool e_bit = hdr_.end_of_frame && packet_info.layer_end;
+ bool b_bit = packet_info.layer_begin;
+ bool e_bit = packet_info.layer_end;
bool v_bit = hdr_.ss_data_available && b_bit;
rtc::BitBufferWriter writer(buffer, max_payload_length_);
@@ -720,7 +720,6 @@
vp9->beginning_of_frame = b_bit ? true : false;
vp9->end_of_frame = e_bit ? true : false;
vp9->ss_data_available = v_bit ? true : false;
- vp9->temporal_idx = 0;
vp9->spatial_idx = 0;
// Parse fields that are present.
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
index a052e3e..1f57c92 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
@@ -27,8 +27,7 @@
EXPECT_EQ(expected.ss_data_available, actual.ss_data_available);
EXPECT_EQ(expected.picture_id, actual.picture_id);
EXPECT_EQ(expected.max_picture_id, actual.max_picture_id);
- EXPECT_EQ(expected.temporal_idx == kNoTemporalIdx ? 0 : expected.temporal_idx,
- actual.temporal_idx);
+ EXPECT_EQ(expected.temporal_idx, actual.temporal_idx);
EXPECT_EQ(expected.spatial_idx == kNoSpatialIdx ? 0 : expected.spatial_idx,
actual.spatial_idx);
EXPECT_EQ(expected.gof_idx, actual.gof_idx);
@@ -128,9 +127,6 @@
RtpPacketizerVp9Test() {}
virtual void SetUp() {
expected_.InitRTPVideoHeaderVP9();
- // Always input one layer frame at a time.
- expected_.beginning_of_frame = true;
- expected_.end_of_frame = true;
}
rtc::scoped_ptr<uint8_t[]> packet_;
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_payload_registry.cc b/webrtc/modules/rtp_rtcp/source/rtp_payload_registry.cc
index 8e2ff17..20e650c 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_payload_registry.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_payload_registry.cc
@@ -433,6 +433,8 @@
if (RtpUtility::StringCompare(payloadName, "VP8", 3)) {
videoType = kRtpVideoVp8;
+ } else if (RtpUtility::StringCompare(payloadName, "VP9", 3)) {
+ videoType = kRtpVideoVp9;
} else if (RtpUtility::StringCompare(payloadName, "H264", 4)) {
videoType = kRtpVideoH264;
} else if (RtpUtility::StringCompare(payloadName, "I420", 4)) {
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_sender_video.cc b/webrtc/modules/rtp_rtcp/source/rtp_sender_video.cc
index 88bb5bb..4c740e8 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_sender_video.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_sender_video.cc
@@ -21,6 +21,7 @@
#include "webrtc/modules/rtp_rtcp/source/producer_fec.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format_video_generic.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp8.h"
+#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h"
#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
#include "webrtc/system_wrappers/interface/logging.h"
#include "webrtc/system_wrappers/interface/trace_event.h"
@@ -76,6 +77,8 @@
RtpVideoCodecTypes videoType = kRtpVideoGeneric;
if (RtpUtility::StringCompare(payloadName, "VP8", 3)) {
videoType = kRtpVideoVp8;
+ } else if (RtpUtility::StringCompare(payloadName, "VP9", 3)) {
+ videoType = kRtpVideoVp9;
} else if (RtpUtility::StringCompare(payloadName, "H264", 4)) {
videoType = kRtpVideoH264;
} else if (RtpUtility::StringCompare(payloadName, "I420", 4)) {
diff --git a/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h b/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h
index 6acd2d4..411fbfd 100644
--- a/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h
+++ b/webrtc/modules/video_coding/codecs/interface/video_codec_interface.h
@@ -43,16 +43,31 @@
};
struct CodecSpecificInfoVP9 {
- bool hasReceivedSLI;
- uint8_t pictureIdSLI;
- bool hasReceivedRPSI;
- uint64_t pictureIdRPSI;
- int16_t pictureId; // Negative value to skip pictureId.
- bool nonReference;
- uint8_t temporalIdx;
- bool layerSync;
- int tl0PicIdx; // Negative value to skip tl0PicIdx.
- int8_t keyIdx; // Negative value to skip keyIdx.
+ bool has_received_sli;
+ uint8_t picture_id_sli;
+ bool has_received_rpsi;
+ uint64_t picture_id_rpsi;
+ int16_t picture_id; // Negative value to skip pictureId.
+
+ bool inter_pic_predicted; // This layer frame is dependent on previously
+ // coded frame(s).
+ bool flexible_mode;
+ bool ss_data_available;
+
+ int tl0_pic_idx; // Negative value to skip tl0PicIdx.
+ uint8_t temporal_idx;
+ uint8_t spatial_idx;
+ bool temporal_up_switch;
+ bool inter_layer_predicted; // Frame is dependent on directly lower spatial
+ // layer frame.
+ uint8_t gof_idx;
+
+ // SS data.
+ size_t num_spatial_layers;
+ bool spatial_layer_resolution_present;
+ uint16_t width[kMaxVp9NumberOfSpatialLayers];
+ uint16_t height[kMaxVp9NumberOfSpatialLayers];
+ GofInfoVP9 gof;
};
struct CodecSpecificInfoGeneric {
diff --git a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc
index cd91fa3..04bbd16 100644
--- a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -57,6 +57,12 @@
return new VP9EncoderImpl();
}
+void VP9EncoderImpl::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
+ void* user_data) {
+ VP9EncoderImpl* enc = (VP9EncoderImpl*)(user_data);
+ enc->GetEncodedLayerFrame(pkt);
+}
+
VP9EncoderImpl::VP9EncoderImpl()
: encoded_image_(),
encoded_complete_callback_(NULL),
@@ -67,7 +73,12 @@
rc_max_intra_target_(0),
encoder_(NULL),
config_(NULL),
- raw_(NULL) {
+ raw_(NULL),
+ input_image_(NULL),
+ tl0_pic_idx_(0),
+ gof_idx_(0),
+ num_temporal_layers_(0),
+ num_spatial_layers_(0) {
memset(&codec_, 0, sizeof(codec_));
uint32_t seed = static_cast<uint32_t>(TickTime::MillisecondTimestamp());
srand(seed);
@@ -101,6 +112,55 @@
return WEBRTC_VIDEO_CODEC_OK;
}
+bool VP9EncoderImpl::SetSvcRates() {
+ float rate_ratio[VPX_MAX_LAYERS] = {0};
+ float total = 0;
+ uint8_t i = 0;
+
+ for (i = 0; i < num_spatial_layers_; ++i) {
+ if (svc_internal_.svc_params.scaling_factor_num[i] <= 0 ||
+ svc_internal_.svc_params.scaling_factor_den[i] <= 0) {
+ return false;
+ }
+ rate_ratio[i] = static_cast<float>(
+ svc_internal_.svc_params.scaling_factor_num[i]) /
+ svc_internal_.svc_params.scaling_factor_den[i];
+ total += rate_ratio[i];
+ }
+
+ for (i = 0; i < num_spatial_layers_; ++i) {
+ config_->ss_target_bitrate[i] = static_cast<unsigned int>(
+ config_->rc_target_bitrate * rate_ratio[i] / total);
+ if (num_temporal_layers_ == 1) {
+ config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i];
+ } else if (num_temporal_layers_ == 2) {
+ config_->layer_target_bitrate[i * num_temporal_layers_] =
+ config_->ss_target_bitrate[i] * 2 / 3;
+ config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
+ config_->ss_target_bitrate[i];
+ } else if (num_temporal_layers_ == 3) {
+ config_->layer_target_bitrate[i * num_temporal_layers_] =
+ config_->ss_target_bitrate[i] / 2;
+ config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
+ config_->layer_target_bitrate[i * num_temporal_layers_] +
+ (config_->ss_target_bitrate[i] / 4);
+ config_->layer_target_bitrate[i * num_temporal_layers_ + 2] =
+ config_->ss_target_bitrate[i];
+ } else {
+ return false;
+ }
+ }
+
+ // For now, temporal layers only supported when having one spatial layer.
+ if (num_spatial_layers_ == 1) {
+ for (i = 0; i < num_temporal_layers_; ++i) {
+ config_->ts_target_bitrate[i] = config_->layer_target_bitrate[i];
+ }
+ }
+
+ return true;
+}
+
int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit,
uint32_t new_framerate) {
if (!inited_) {
@@ -118,6 +178,11 @@
}
config_->rc_target_bitrate = new_bitrate_kbit;
codec_.maxFramerate = new_framerate;
+
+ if (!SetSvcRates()) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+
// Update encoder context
if (vpx_codec_enc_config_set(encoder_, config_)) {
return WEBRTC_VIDEO_CODEC_ERROR;
@@ -144,6 +209,13 @@
if (number_of_cores < 1) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
+ if (inst->codecSpecific.VP9.numberOfTemporalLayers > 3) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+ // For now, only support one spatial layer.
+ if (inst->codecSpecific.VP9.numberOfSpatialLayers != 1) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
int retVal = Release();
if (retVal < 0) {
return retVal;
@@ -158,6 +230,12 @@
if (&codec_ != inst) {
codec_ = *inst;
}
+
+ num_spatial_layers_ = inst->codecSpecific.VP9.numberOfSpatialLayers;
+ num_temporal_layers_ = inst->codecSpecific.VP9.numberOfTemporalLayers;
+ if (num_temporal_layers_ == 0)
+ num_temporal_layers_ = 1;
+
// Random start 16 bits is enough.
picture_id_ = static_cast<uint16_t>(rand()) & 0x7FFF;
// Allocate memory for encoded image
@@ -209,13 +287,57 @@
config_->g_threads = NumberOfThreads(config_->g_w,
config_->g_h,
number_of_cores);
+
cpu_speed_ = GetCpuSpeed(config_->g_w, config_->g_h);
+
+ // TODO(asapersson): Check configuration of temporal switch up and increase
+ // pattern length.
+ if (num_temporal_layers_ == 1) {
+ gof_.SetGofInfoVP9(kTemporalStructureMode1);
+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;
+ config_->ts_number_layers = 1;
+ config_->ts_rate_decimator[0] = 1;
+ config_->ts_periodicity = 1;
+ config_->ts_layer_id[0] = 0;
+ } else if (num_temporal_layers_ == 2) {
+ gof_.SetGofInfoVP9(kTemporalStructureMode2);
+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101;
+ config_->ts_number_layers = 2;
+ config_->ts_rate_decimator[0] = 2;
+ config_->ts_rate_decimator[1] = 1;
+ config_->ts_periodicity = 2;
+ config_->ts_layer_id[0] = 0;
+ config_->ts_layer_id[1] = 1;
+ } else if (num_temporal_layers_ == 3) {
+ gof_.SetGofInfoVP9(kTemporalStructureMode3);
+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212;
+ config_->ts_number_layers = 3;
+ config_->ts_rate_decimator[0] = 4;
+ config_->ts_rate_decimator[1] = 2;
+ config_->ts_rate_decimator[2] = 1;
+ config_->ts_periodicity = 4;
+ config_->ts_layer_id[0] = 0;
+ config_->ts_layer_id[1] = 2;
+ config_->ts_layer_id[2] = 1;
+ config_->ts_layer_id[3] = 2;
+ } else {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+
+ tl0_pic_idx_ = static_cast<uint8_t>(rand());
+
return InitAndSetControlSettings(inst);
}
int VP9EncoderImpl::NumberOfThreads(int width,
int height,
int number_of_cores) {
+ // For the current libvpx library, only 1 thread is supported when SVC is
+ // turned on.
+ if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) {
+ return 1;
+ }
+
// Keep the number of encoder threads equal to the possible number of column
// tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS.
if (width * height >= 1280 * 720 && number_of_cores > 4) {
@@ -229,6 +351,27 @@
}
int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
+
+ config_->ss_number_layers = num_spatial_layers_;
+
+ if (num_spatial_layers_ > 1) {
+ config_->rc_min_quantizer = 0;
+ config_->rc_max_quantizer = 63;
+ }
+ int scaling_factor_num = 256;
+ for (int i = num_spatial_layers_ - 1; i >= 0; --i) {
+ svc_internal_.svc_params.max_quantizers[i] = config_->rc_max_quantizer;
+ svc_internal_.svc_params.min_quantizers[i] = config_->rc_min_quantizer;
+ // 1:2 scaling in each dimension.
+ svc_internal_.svc_params.scaling_factor_num[i] = scaling_factor_num;
+ svc_internal_.svc_params.scaling_factor_den[i] = 256;
+ scaling_factor_num /= 2;
+ }
+
+ if (!SetSvcRates()) {
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+ }
+
if (vpx_codec_enc_init(encoder_, vpx_codec_vp9_cx(), config_, 0)) {
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
}
@@ -237,6 +380,19 @@
rc_max_intra_target_);
vpx_codec_control(encoder_, VP9E_SET_AQ_MODE,
inst->codecSpecific.VP9.adaptiveQpMode ? 3 : 0);
+
+ vpx_codec_control(
+ encoder_, VP9E_SET_SVC,
+ (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) ? 1 : 0);
+ if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) {
+ vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS,
+ &svc_internal_.svc_params);
+ }
+ // Register callback for getting each spatial layer.
+ vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = {
+ VP9EncoderImpl::EncoderOutputCodedPacketCallback, (void*)(this)};
+ vpx_codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK, (void*)(&cbp));
+
// Control function to set the number of column tiles in encoding a frame, in
// log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns.
// The number tile columns will be capped by the encoder based on image size
@@ -286,6 +442,13 @@
}
DCHECK_EQ(input_image.width(), static_cast<int>(raw_->d_w));
DCHECK_EQ(input_image.height(), static_cast<int>(raw_->d_h));
+
+ // Set input image for use in the callback.
+ // This was necessary since you need some information from input_image.
+ // You can save only the necessary information (such as timestamp) instead of
+ // doing this.
+ input_image_ = &input_image;
+
// Image in vpx_image_t format.
// Input image is const. VPX's raw image is not defined as const.
raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(input_image.buffer(kYPlane));
@@ -308,7 +471,8 @@
return WEBRTC_VIDEO_CODEC_ERROR;
}
timestamp_ += duration;
- return GetEncodedPartitions(input_image);
+
+ return WEBRTC_VIDEO_CODEC_OK;
}
void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
@@ -317,20 +481,83 @@
assert(codec_specific != NULL);
codec_specific->codecType = kVideoCodecVP9;
CodecSpecificInfoVP9 *vp9_info = &(codec_specific->codecSpecific.VP9);
- vp9_info->pictureId = picture_id_;
- vp9_info->keyIdx = kNoKeyIdx;
- vp9_info->nonReference = (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) != 0;
- // TODO(marpan): Temporal layers are supported in the current VP9 version,
- // but for now use 1 temporal layer encoding. Will update this when temporal
- // layer support for VP9 is added in webrtc.
- vp9_info->temporalIdx = kNoTemporalIdx;
- vp9_info->layerSync = false;
- vp9_info->tl0PicIdx = kNoTl0PicIdx;
- picture_id_ = (picture_id_ + 1) & 0x7FFF;
+ // TODO(asapersson): Set correct values.
+ vp9_info->inter_pic_predicted =
+ (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? false : true;
+ vp9_info->flexible_mode = codec_.codecSpecific.VP9.flexibleMode;
+ vp9_info->ss_data_available =
+ (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
+ if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
+ gof_idx_ = 0;
+ }
+
+ vpx_svc_layer_id_t layer_id = {0};
+ vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
+
+ assert(num_temporal_layers_ > 0);
+ assert(num_spatial_layers_ > 0);
+ if (num_temporal_layers_ == 1) {
+ assert(layer_id.temporal_layer_id == 0);
+ vp9_info->temporal_idx = kNoTemporalIdx;
+ } else {
+ vp9_info->temporal_idx = layer_id.temporal_layer_id;
+ }
+ if (num_spatial_layers_ == 1) {
+ assert(layer_id.spatial_layer_id == 0);
+ vp9_info->spatial_idx = kNoSpatialIdx;
+ } else {
+ vp9_info->spatial_idx = layer_id.spatial_layer_id;
+ }
+ if (layer_id.spatial_layer_id != 0) {
+ vp9_info->ss_data_available = false;
+ }
+
+ if (vp9_info->flexible_mode) {
+ vp9_info->gof_idx = kNoGofIdx;
+ } else {
+ vp9_info->gof_idx =
+ static_cast<uint8_t>(gof_idx_++ % gof_.num_frames_in_gof);
+ }
+
+ // TODO(asapersson): this info has to be obtained from the encoder.
+ vp9_info->temporal_up_switch = true;
+
+ if (layer_id.spatial_layer_id == 0) {
+ picture_id_ = (picture_id_ + 1) & 0x7FFF;
+ // TODO(asapersson): this info has to be obtained from the encoder.
+ vp9_info->inter_layer_predicted = false;
+ } else {
+ // TODO(asapersson): this info has to be obtained from the encoder.
+ vp9_info->inter_layer_predicted = true;
+ }
+
+ vp9_info->picture_id = picture_id_;
+
+ if (!vp9_info->flexible_mode) {
+ if (layer_id.temporal_layer_id == 0 && layer_id.spatial_layer_id == 0) {
+ tl0_pic_idx_++;
+ }
+ vp9_info->tl0_pic_idx = tl0_pic_idx_;
+ }
+
+ if (vp9_info->ss_data_available) {
+ vp9_info->num_spatial_layers = num_spatial_layers_;
+ vp9_info->spatial_layer_resolution_present = true;
+ for (size_t i = 0; i < vp9_info->num_spatial_layers; ++i) {
+ vp9_info->width[i] = codec_.width *
+ svc_internal_.svc_params.scaling_factor_num[i] /
+ svc_internal_.svc_params.scaling_factor_den[i];
+ vp9_info->height[i] = codec_.height *
+ svc_internal_.svc_params.scaling_factor_num[i] /
+ svc_internal_.svc_params.scaling_factor_den[i];
+ }
+ if (!vp9_info->flexible_mode) {
+ vp9_info->gof.CopyGofInfoVP9(gof_);
+ }
+ }
}
-int VP9EncoderImpl::GetEncodedPartitions(const VideoFrame& input_image) {
- vpx_codec_iter_t iter = NULL;
+int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
encoded_image_._length = 0;
encoded_image_._frameType = kDeltaFrame;
RTPFragmentationHeader frag_info;
@@ -339,44 +566,33 @@
frag_info.VerifyAndAllocateFragmentationHeader(1);
int part_idx = 0;
CodecSpecificInfo codec_specific;
- const vpx_codec_cx_pkt_t *pkt = NULL;
- while ((pkt = vpx_codec_get_cx_data(encoder_, &iter)) != NULL) {
- switch (pkt->kind) {
- case VPX_CODEC_CX_FRAME_PKT: {
- memcpy(&encoded_image_._buffer[encoded_image_._length],
- pkt->data.frame.buf,
- pkt->data.frame.sz);
- frag_info.fragmentationOffset[part_idx] = encoded_image_._length;
- frag_info.fragmentationLength[part_idx] =
- static_cast<uint32_t>(pkt->data.frame.sz);
- frag_info.fragmentationPlType[part_idx] = 0;
- frag_info.fragmentationTimeDiff[part_idx] = 0;
- encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz);
- assert(encoded_image_._length <= encoded_image_._size);
- break;
- }
- default: {
- break;
- }
- }
- // End of frame.
- if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) {
- // Check if encoded frame is a key frame.
- if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
- encoded_image_._frameType = kKeyFrame;
- }
- PopulateCodecSpecific(&codec_specific, *pkt, input_image.timestamp());
- break;
- }
+
+ assert(pkt->kind == VPX_CODEC_CX_FRAME_PKT);
+ memcpy(&encoded_image_._buffer[encoded_image_._length], pkt->data.frame.buf,
+ pkt->data.frame.sz);
+ frag_info.fragmentationOffset[part_idx] = encoded_image_._length;
+ frag_info.fragmentationLength[part_idx] =
+ static_cast<uint32_t>(pkt->data.frame.sz);
+ frag_info.fragmentationPlType[part_idx] = 0;
+ frag_info.fragmentationTimeDiff[part_idx] = 0;
+ encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz);
+ assert(encoded_image_._length <= encoded_image_._size);
+
+ // End of frame.
+ // Check if encoded frame is a key frame.
+ if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
+ encoded_image_._frameType = kKeyFrame;
}
+ PopulateCodecSpecific(&codec_specific, *pkt, input_image_->timestamp());
+
if (encoded_image_._length > 0) {
TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length);
- encoded_image_._timeStamp = input_image.timestamp();
- encoded_image_.capture_time_ms_ = input_image.render_time_ms();
+ encoded_image_._timeStamp = input_image_->timestamp();
+ encoded_image_.capture_time_ms_ = input_image_->render_time_ms();
encoded_image_._encodedHeight = raw_->d_h;
encoded_image_._encodedWidth = raw_->d_w;
encoded_complete_callback_->Encoded(encoded_image_, &codec_specific,
- &frag_info);
+ &frag_info);
}
return WEBRTC_VIDEO_CODEC_OK;
}
diff --git a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.h b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.h
index 5775952..c164a63 100644
--- a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.h
@@ -15,6 +15,7 @@
#include "webrtc/modules/video_coding/codecs/vp9/include/vp9.h"
#include "webrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h"
+#include "vpx/svc_context.h"
#include "vpx/vpx_decoder.h"
#include "vpx/vpx_encoder.h"
@@ -55,7 +56,13 @@
const vpx_codec_cx_pkt& pkt,
uint32_t timestamp);
- int GetEncodedPartitions(const VideoFrame& input_image);
+ bool SetSvcRates();
+
+ virtual int GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt);
+
+ // Callback function for outputting packets per spatial layer.
+ static void EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
+ void* user_data);
// Determine maximum target for Intra frames
//
@@ -76,6 +83,14 @@
vpx_codec_ctx_t* encoder_;
vpx_codec_enc_cfg_t* config_;
vpx_image_t* raw_;
+ SvcInternal_t svc_internal_;
+ const VideoFrame* input_image_;
+ GofInfoVP9 gof_; // Contains each frame's temporal information for
+ // non-flexible mode.
+ uint8_t tl0_pic_idx_; // Only used in non-flexible mode.
+ size_t gof_idx_; // Only used in non-flexible mode.
+ uint8_t num_temporal_layers_;
+ uint8_t num_spatial_layers_;
};
diff --git a/webrtc/modules/video_coding/main/source/codec_database.cc b/webrtc/modules/video_coding/main/source/codec_database.cc
index 5f89936..2e2d91e 100644
--- a/webrtc/modules/video_coding/main/source/codec_database.cc
+++ b/webrtc/modules/video_coding/main/source/codec_database.cc
@@ -61,7 +61,8 @@
vp9_settings.frameDroppingOn = true;
vp9_settings.keyFrameInterval = 3000;
vp9_settings.adaptiveQpMode = true;
-
+ vp9_settings.numberOfSpatialLayers = 1;
+ vp9_settings.flexibleMode = false;
return vp9_settings;
}
diff --git a/webrtc/modules/video_coding/main/source/encoded_frame.cc b/webrtc/modules/video_coding/main/source/encoded_frame.cc
index 2830399..0fa4425 100644
--- a/webrtc/modules/video_coding/main/source/encoded_frame.cc
+++ b/webrtc/modules/video_coding/main/source/encoded_frame.cc
@@ -132,6 +132,67 @@
}
break;
}
+ case kRtpVideoVp9: {
+ if (_codecSpecificInfo.codecType != kVideoCodecVP9) {
+ // This is the first packet for this frame.
+ _codecSpecificInfo.codecSpecific.VP9.picture_id = -1;
+ _codecSpecificInfo.codecSpecific.VP9.temporal_idx = 0;
+ _codecSpecificInfo.codecSpecific.VP9.spatial_idx = 0;
+ _codecSpecificInfo.codecSpecific.VP9.gof_idx = 0;
+ _codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted = false;
+ _codecSpecificInfo.codecSpecific.VP9.tl0_pic_idx = -1;
+ _codecSpecificInfo.codecType = kVideoCodecVP9;
+ }
+ _codecSpecificInfo.codecSpecific.VP9.inter_pic_predicted =
+ header->codecHeader.VP9.inter_pic_predicted;
+ _codecSpecificInfo.codecSpecific.VP9.flexible_mode =
+ header->codecHeader.VP9.flexible_mode;
+ _codecSpecificInfo.codecSpecific.VP9.ss_data_available =
+ header->codecHeader.VP9.ss_data_available;
+ if (header->codecHeader.VP9.picture_id != kNoPictureId) {
+ _codecSpecificInfo.codecSpecific.VP9.picture_id =
+ header->codecHeader.VP9.picture_id;
+ }
+ if (header->codecHeader.VP9.tl0_pic_idx != kNoTl0PicIdx) {
+ _codecSpecificInfo.codecSpecific.VP9.tl0_pic_idx =
+ header->codecHeader.VP9.tl0_pic_idx;
+ }
+ if (header->codecHeader.VP9.temporal_idx != kNoTemporalIdx) {
+ _codecSpecificInfo.codecSpecific.VP9.temporal_idx =
+ header->codecHeader.VP9.temporal_idx;
+ _codecSpecificInfo.codecSpecific.VP9.temporal_up_switch =
+ header->codecHeader.VP9.temporal_up_switch;
+ }
+ if (header->codecHeader.VP9.spatial_idx != kNoSpatialIdx) {
+ _codecSpecificInfo.codecSpecific.VP9.spatial_idx =
+ header->codecHeader.VP9.spatial_idx;
+ _codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted =
+ header->codecHeader.VP9.inter_layer_predicted;
+ }
+ if (header->codecHeader.VP9.gof_idx != kNoGofIdx) {
+ _codecSpecificInfo.codecSpecific.VP9.gof_idx =
+ header->codecHeader.VP9.gof_idx;
+ }
+ if (header->codecHeader.VP9.ss_data_available) {
+ _codecSpecificInfo.codecSpecific.VP9.num_spatial_layers =
+ header->codecHeader.VP9.num_spatial_layers;
+ _codecSpecificInfo.codecSpecific.VP9
+ .spatial_layer_resolution_present =
+ header->codecHeader.VP9.spatial_layer_resolution_present;
+ if (header->codecHeader.VP9.spatial_layer_resolution_present) {
+ for (size_t i = 0; i < header->codecHeader.VP9.num_spatial_layers;
+ ++i) {
+ _codecSpecificInfo.codecSpecific.VP9.width[i] =
+ header->codecHeader.VP9.width[i];
+ _codecSpecificInfo.codecSpecific.VP9.height[i] =
+ header->codecHeader.VP9.height[i];
+ }
+ }
+ _codecSpecificInfo.codecSpecific.VP9.gof.CopyGofInfoVP9(
+ header->codecHeader.VP9.gof);
+ }
+ break;
+ }
case kRtpVideoH264: {
_codecSpecificInfo.codecType = kVideoCodecH264;
break;
diff --git a/webrtc/modules/video_coding/main/source/generic_encoder.cc b/webrtc/modules/video_coding/main/source/generic_encoder.cc
index 63e3976..c0925b9 100644
--- a/webrtc/modules/video_coding/main/source/generic_encoder.cc
+++ b/webrtc/modules/video_coding/main/source/generic_encoder.cc
@@ -36,6 +36,41 @@
rtp->simulcastIdx = info->codecSpecific.VP8.simulcastIdx;
return;
}
+ case kVideoCodecVP9: {
+ rtp->codec = kRtpVideoVp9;
+ rtp->codecHeader.VP9.InitRTPVideoHeaderVP9();
+ rtp->codecHeader.VP9.inter_pic_predicted =
+ info->codecSpecific.VP9.inter_pic_predicted;
+ rtp->codecHeader.VP9.flexible_mode =
+ info->codecSpecific.VP9.flexible_mode;
+ rtp->codecHeader.VP9.ss_data_available =
+ info->codecSpecific.VP9.ss_data_available;
+ rtp->codecHeader.VP9.picture_id = info->codecSpecific.VP9.picture_id;
+ rtp->codecHeader.VP9.tl0_pic_idx = info->codecSpecific.VP9.tl0_pic_idx;
+ rtp->codecHeader.VP9.temporal_idx = info->codecSpecific.VP9.temporal_idx;
+ rtp->codecHeader.VP9.spatial_idx = info->codecSpecific.VP9.spatial_idx;
+ rtp->codecHeader.VP9.temporal_up_switch =
+ info->codecSpecific.VP9.temporal_up_switch;
+ rtp->codecHeader.VP9.inter_layer_predicted =
+ info->codecSpecific.VP9.inter_layer_predicted;
+ rtp->codecHeader.VP9.gof_idx = info->codecSpecific.VP9.gof_idx;
+
+ if (info->codecSpecific.VP9.ss_data_available) {
+ rtp->codecHeader.VP9.num_spatial_layers =
+ info->codecSpecific.VP9.num_spatial_layers;
+ rtp->codecHeader.VP9.spatial_layer_resolution_present =
+ info->codecSpecific.VP9.spatial_layer_resolution_present;
+ if (info->codecSpecific.VP9.spatial_layer_resolution_present) {
+ for (size_t i = 0; i < info->codecSpecific.VP9.num_spatial_layers;
+ ++i) {
+ rtp->codecHeader.VP9.width[i] = info->codecSpecific.VP9.width[i];
+ rtp->codecHeader.VP9.height[i] = info->codecSpecific.VP9.height[i];
+ }
+ }
+ rtp->codecHeader.VP9.gof.CopyGofInfoVP9(info->codecSpecific.VP9.gof);
+ }
+ return;
+ }
case kVideoCodecH264:
rtp->codec = kRtpVideoH264;
return;
diff --git a/webrtc/modules/video_coding/main/source/jitter_buffer.cc b/webrtc/modules/video_coding/main/source/jitter_buffer.cc
index 9156cc1..49c2325 100644
--- a/webrtc/modules/video_coding/main/source/jitter_buffer.cc
+++ b/webrtc/modules/video_coding/main/source/jitter_buffer.cc
@@ -125,6 +125,8 @@
incomplete_frames_(),
last_decoded_state_(),
first_packet_since_reset_(true),
+ last_gof_timestamp_(0),
+ last_gof_valid_(false),
stats_callback_(NULL),
incoming_frame_rate_(0),
incoming_frame_count_(0),
@@ -220,6 +222,7 @@
first_packet_since_reset_ = true;
rtt_ms_ = kDefaultRtt;
last_decoded_state_.Reset();
+ last_gof_valid_ = false;
}
void VCMJitterBuffer::Stop() {
@@ -227,6 +230,8 @@
UpdateHistograms();
running_ = false;
last_decoded_state_.Reset();
+ last_gof_valid_ = false;
+
// Make sure all frames are free and reset.
for (FrameList::iterator it = decodable_frames_.begin();
it != decodable_frames_.end(); ++it) {
@@ -257,6 +262,7 @@
decodable_frames_.Reset(&free_frames_);
incomplete_frames_.Reset(&free_frames_);
last_decoded_state_.Reset(); // TODO(mikhal): sync reset.
+ last_gof_valid_ = false;
num_consecutive_old_packets_ = 0;
// Also reset the jitter and delay estimates
jitter_estimate_.Reset();
@@ -586,6 +592,38 @@
return kOldPacket;
}
+ if (packet.codec == kVideoCodecVP9) {
+ // TODO(asapersson): Move this code to appropriate place.
+ // TODO(asapersson): Handle out of order GOF.
+ if (packet.codecSpecificHeader.codecHeader.VP9.flexible_mode) {
+ // TODO(asapersson): Add support for flexible mode.
+ return kGeneralError;
+ }
+ if (packet.codecSpecificHeader.codecHeader.VP9.ss_data_available) {
+ if (!last_gof_valid_ ||
+ IsNewerTimestamp(packet.timestamp, last_gof_timestamp_)) {
+ last_gof_.CopyGofInfoVP9(
+ packet.codecSpecificHeader.codecHeader.VP9.gof);
+ last_gof_timestamp_ = packet.timestamp;
+ last_gof_valid_ = true;
+ }
+ }
+ if (last_gof_valid_ &&
+ !packet.codecSpecificHeader.codecHeader.VP9.flexible_mode) {
+ uint8_t gof_idx = packet.codecSpecificHeader.codecHeader.VP9.gof_idx;
+ if (gof_idx != kNoGofIdx) {
+ if (gof_idx >= last_gof_.num_frames_in_gof) {
+ LOG(LS_WARNING) << "Incorrect gof_idx: " << gof_idx;
+ return kGeneralError;
+ }
+ RTPVideoTypeHeader* hdr = const_cast<RTPVideoTypeHeader*>(
+ &packet.codecSpecificHeader.codecHeader);
+ hdr->VP9.temporal_idx = last_gof_.temporal_idx[gof_idx];
+ hdr->VP9.temporal_up_switch = last_gof_.temporal_up_switch[gof_idx];
+ }
+ }
+ }
+
num_consecutive_old_packets_ = 0;
VCMFrameBuffer* frame;
diff --git a/webrtc/modules/video_coding/main/source/jitter_buffer.h b/webrtc/modules/video_coding/main/source/jitter_buffer.h
index 455ac26..3961dff 100644
--- a/webrtc/modules/video_coding/main/source/jitter_buffer.h
+++ b/webrtc/modules/video_coding/main/source/jitter_buffer.h
@@ -307,6 +307,10 @@
FrameList incomplete_frames_ GUARDED_BY(crit_sect_);
VCMDecodingState last_decoded_state_ GUARDED_BY(crit_sect_);
bool first_packet_since_reset_;
+ // Contains last received frame's temporal information for non-flexible mode.
+ GofInfoVP9 last_gof_;
+ uint32_t last_gof_timestamp_;
+ bool last_gof_valid_;
// Statistics.
VCMReceiveStatisticsCallback* stats_callback_ GUARDED_BY(crit_sect_);
diff --git a/webrtc/modules/video_coding/main/source/session_info.cc b/webrtc/modules/video_coding/main/source/session_info.cc
index 49839e5..bf6bcb3 100644
--- a/webrtc/modules/video_coding/main/source/session_info.cc
+++ b/webrtc/modules/video_coding/main/source/session_info.cc
@@ -59,31 +59,52 @@
}
int VCMSessionInfo::PictureId() const {
- if (packets_.empty() ||
- packets_.front().codecSpecificHeader.codec != kRtpVideoVp8)
+ if (packets_.empty())
return kNoPictureId;
- return packets_.front().codecSpecificHeader.codecHeader.VP8.pictureId;
+ if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp8) {
+ return packets_.front().codecSpecificHeader.codecHeader.VP8.pictureId;
+ } else if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp9) {
+ return packets_.front().codecSpecificHeader.codecHeader.VP9.picture_id;
+ } else {
+ return kNoPictureId;
+ }
}
int VCMSessionInfo::TemporalId() const {
- if (packets_.empty() ||
- packets_.front().codecSpecificHeader.codec != kRtpVideoVp8)
+ if (packets_.empty())
return kNoTemporalIdx;
- return packets_.front().codecSpecificHeader.codecHeader.VP8.temporalIdx;
+ if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp8) {
+ return packets_.front().codecSpecificHeader.codecHeader.VP8.temporalIdx;
+ } else if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp9) {
+ return packets_.front().codecSpecificHeader.codecHeader.VP9.temporal_idx;
+ } else {
+ return kNoTemporalIdx;
+ }
}
bool VCMSessionInfo::LayerSync() const {
- if (packets_.empty() ||
- packets_.front().codecSpecificHeader.codec != kRtpVideoVp8)
+ if (packets_.empty())
return false;
- return packets_.front().codecSpecificHeader.codecHeader.VP8.layerSync;
+ if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp8) {
+ return packets_.front().codecSpecificHeader.codecHeader.VP8.layerSync;
+ } else if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp9) {
+ return
+ packets_.front().codecSpecificHeader.codecHeader.VP9.temporal_up_switch;
+ } else {
+ return false;
+ }
}
int VCMSessionInfo::Tl0PicId() const {
- if (packets_.empty() ||
- packets_.front().codecSpecificHeader.codec != kRtpVideoVp8)
+ if (packets_.empty())
return kNoTl0PicIdx;
- return packets_.front().codecSpecificHeader.codecHeader.VP8.tl0PicIdx;
+ if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp8) {
+ return packets_.front().codecSpecificHeader.codecHeader.VP8.tl0PicIdx;
+ } else if (packets_.front().codecSpecificHeader.codec == kRtpVideoVp9) {
+ return packets_.front().codecSpecificHeader.codecHeader.VP9.tl0_pic_idx;
+ } else {
+ return kNoTl0PicIdx;
+ }
}
bool VCMSessionInfo::NonReference() const {