| /* |
| * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "call/rtp_payload_params.h" |
| |
| #include <stddef.h> |
| |
| #include <algorithm> |
| #include <cstdint> |
| #include <optional> |
| |
| #include "absl/container/inlined_vector.h" |
| #include "absl/strings/match.h" |
| #include "absl/types/variant.h" |
| #include "api/field_trials_view.h" |
| #include "api/transport/rtp/dependency_descriptor.h" |
| #include "api/video/encoded_image.h" |
| #include "api/video/render_resolution.h" |
| #include "api/video/video_codec_constants.h" |
| #include "api/video/video_codec_type.h" |
| #include "api/video/video_frame_type.h" |
| #include "api/video/video_timing.h" |
| #include "call/rtp_config.h" |
| #include "common_video/generic_frame_descriptor/generic_frame_info.h" |
| #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h" |
| #include "modules/rtp_rtcp/source/rtp_video_header.h" |
| #include "modules/video_coding/codecs/h264/include/h264_globals.h" |
| #include "modules/video_coding/codecs/interface/common_constants.h" |
| #include "modules/video_coding/codecs/vp8/include/vp8_globals.h" |
| #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" |
| #include "modules/video_coding/frame_dependencies_calculator.h" |
| #include "modules/video_coding/include/video_codec_interface.h" |
| #include "rtc_base/arraysize.h" |
| #include "rtc_base/checks.h" |
| #include "rtc_base/logging.h" |
| #include "rtc_base/random.h" |
| #include "rtc_base/time_utils.h" |
| |
| namespace webrtc { |
| namespace { |
| |
| constexpr int kMaxSimulatedSpatialLayers = 3; |
| |
| void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info, |
| std::optional<int> spatial_index, |
| RTPVideoHeader* rtp) { |
| rtp->codec = info.codecType; |
| rtp->is_last_frame_in_picture = info.end_of_picture; |
| switch (info.codecType) { |
| case kVideoCodecVP8: { |
| auto& vp8_header = rtp->video_type_header.emplace<RTPVideoHeaderVP8>(); |
| vp8_header.InitRTPVideoHeaderVP8(); |
| vp8_header.nonReference = info.codecSpecific.VP8.nonReference; |
| vp8_header.temporalIdx = info.codecSpecific.VP8.temporalIdx; |
| vp8_header.layerSync = info.codecSpecific.VP8.layerSync; |
| vp8_header.keyIdx = info.codecSpecific.VP8.keyIdx; |
| return; |
| } |
| case kVideoCodecVP9: { |
| auto& vp9_header = rtp->video_type_header.emplace<RTPVideoHeaderVP9>(); |
| vp9_header.InitRTPVideoHeaderVP9(); |
| vp9_header.inter_pic_predicted = |
| info.codecSpecific.VP9.inter_pic_predicted; |
| vp9_header.flexible_mode = info.codecSpecific.VP9.flexible_mode; |
| vp9_header.ss_data_available = info.codecSpecific.VP9.ss_data_available; |
| vp9_header.non_ref_for_inter_layer_pred = |
| info.codecSpecific.VP9.non_ref_for_inter_layer_pred; |
| vp9_header.temporal_idx = info.codecSpecific.VP9.temporal_idx; |
| vp9_header.temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch; |
| vp9_header.inter_layer_predicted = |
| info.codecSpecific.VP9.inter_layer_predicted; |
| vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx; |
| vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers; |
| vp9_header.first_active_layer = info.codecSpecific.VP9.first_active_layer; |
| if (vp9_header.num_spatial_layers > 1) { |
| vp9_header.spatial_idx = spatial_index.value_or(kNoSpatialIdx); |
| } else { |
| vp9_header.spatial_idx = kNoSpatialIdx; |
| } |
| if (info.codecSpecific.VP9.ss_data_available) { |
| vp9_header.spatial_layer_resolution_present = |
| info.codecSpecific.VP9.spatial_layer_resolution_present; |
| if (info.codecSpecific.VP9.spatial_layer_resolution_present) { |
| for (size_t i = 0; i < info.codecSpecific.VP9.num_spatial_layers; |
| ++i) { |
| vp9_header.width[i] = info.codecSpecific.VP9.width[i]; |
| vp9_header.height[i] = info.codecSpecific.VP9.height[i]; |
| } |
| } |
| vp9_header.gof.CopyGofInfoVP9(info.codecSpecific.VP9.gof); |
| } |
| |
| vp9_header.num_ref_pics = info.codecSpecific.VP9.num_ref_pics; |
| for (int i = 0; i < info.codecSpecific.VP9.num_ref_pics; ++i) { |
| vp9_header.pid_diff[i] = info.codecSpecific.VP9.p_diff[i]; |
| } |
| vp9_header.end_of_picture = info.end_of_picture; |
| return; |
| } |
| case kVideoCodecH264: { |
| auto& h264_header = rtp->video_type_header.emplace<RTPVideoHeaderH264>(); |
| h264_header.packetization_mode = |
| info.codecSpecific.H264.packetization_mode; |
| return; |
| } |
| case kVideoCodecGeneric: |
| rtp->codec = kVideoCodecGeneric; |
| return; |
| // TODO(bugs.webrtc.org/13485): Implement H265 codec specific info |
| default: |
| return; |
| } |
| } |
| |
| void SetVideoTiming(const EncodedImage& image, VideoSendTiming* timing) { |
| if (image.timing_.flags == VideoSendTiming::TimingFrameFlags::kInvalid || |
| image.timing_.flags == VideoSendTiming::TimingFrameFlags::kNotTriggered) { |
| timing->flags = VideoSendTiming::TimingFrameFlags::kInvalid; |
| return; |
| } |
| |
| timing->encode_start_delta_ms = VideoSendTiming::GetDeltaCappedMs( |
| image.capture_time_ms_, image.timing_.encode_start_ms); |
| timing->encode_finish_delta_ms = VideoSendTiming::GetDeltaCappedMs( |
| image.capture_time_ms_, image.timing_.encode_finish_ms); |
| timing->packetization_finish_delta_ms = 0; |
| timing->pacer_exit_delta_ms = 0; |
| timing->network_timestamp_delta_ms = 0; |
| timing->network2_timestamp_delta_ms = 0; |
| timing->flags = image.timing_.flags; |
| } |
| |
| // Returns structure that aligns with simulated generic info. The templates |
| // allow to produce valid dependency descriptor for any stream where |
| // `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by |
| // https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see |
| // template_fdiffs()). The set of the templates is not tuned for any paricular |
| // structure thus dependency descriptor would use more bytes on the wire than |
| // with tuned templates. |
| FrameDependencyStructure MinimalisticStructure(int num_spatial_layers, |
| int num_temporal_layers) { |
| RTC_DCHECK_LE(num_spatial_layers, DependencyDescriptor::kMaxSpatialIds); |
| RTC_DCHECK_LE(num_temporal_layers, DependencyDescriptor::kMaxTemporalIds); |
| RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32); |
| FrameDependencyStructure structure; |
| structure.num_decode_targets = num_spatial_layers * num_temporal_layers; |
| structure.num_chains = num_spatial_layers; |
| structure.templates.reserve(num_spatial_layers * num_temporal_layers); |
| for (int sid = 0; sid < num_spatial_layers; ++sid) { |
| for (int tid = 0; tid < num_temporal_layers; ++tid) { |
| FrameDependencyTemplate a_template; |
| a_template.spatial_id = sid; |
| a_template.temporal_id = tid; |
| for (int s = 0; s < num_spatial_layers; ++s) { |
| for (int t = 0; t < num_temporal_layers; ++t) { |
| // Prefer kSwitch indication for frames that is part of the decode |
| // target because dependency descriptor information generated in this |
| // class use kSwitch indications more often that kRequired, increasing |
| // the chance of a good (or complete) template match. |
| a_template.decode_target_indications.push_back( |
| sid <= s && tid <= t ? DecodeTargetIndication::kSwitch |
| : DecodeTargetIndication::kNotPresent); |
| } |
| } |
| a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers * |
| num_temporal_layers |
| : num_spatial_layers); |
| a_template.chain_diffs.assign(structure.num_chains, 1); |
| structure.templates.push_back(a_template); |
| |
| structure.decode_target_protected_by_chain.push_back(sid); |
| } |
| } |
| return structure; |
| } |
| } // namespace |
| |
| RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc, |
| const RtpPayloadState* state, |
| const FieldTrialsView& trials) |
| : ssrc_(ssrc), |
| generic_picture_id_experiment_( |
| absl::StartsWith(trials.Lookup("WebRTC-GenericPictureId"), |
| "Enabled")), |
| simulate_generic_structure_(absl::StartsWith( |
| trials.Lookup("WebRTC-GenericCodecDependencyDescriptor"), |
| "Enabled")) { |
| for (auto& spatial_layer : last_frame_id_) |
| spatial_layer.fill(-1); |
| |
| chain_last_frame_id_.fill(-1); |
| buffer_id_to_frame_id_.fill(-1); |
| |
| Random random(rtc::TimeMicros()); |
| state_.picture_id = |
| state ? state->picture_id : (random.Rand<int16_t>() & 0x7FFF); |
| state_.tl0_pic_idx = state ? state->tl0_pic_idx : (random.Rand<uint8_t>()); |
| state_.frame_id = state ? state->frame_id : random.Rand<uint16_t>(); |
| } |
| |
| RtpPayloadParams::RtpPayloadParams(const RtpPayloadParams& other) = default; |
| |
| RtpPayloadParams::~RtpPayloadParams() {} |
| |
| RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader( |
| const EncodedImage& image, |
| const CodecSpecificInfo* codec_specific_info, |
| std::optional<int64_t> shared_frame_id) { |
| int64_t frame_id; |
| if (shared_frame_id) { |
| frame_id = *shared_frame_id; |
| } else { |
| frame_id = state_.frame_id++; |
| } |
| |
| RTPVideoHeader rtp_video_header; |
| if (codec_specific_info) { |
| PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(), |
| &rtp_video_header); |
| } |
| rtp_video_header.simulcastIdx = image.SimulcastIndex().value_or(0); |
| rtp_video_header.frame_type = image._frameType; |
| rtp_video_header.rotation = image.rotation_; |
| rtp_video_header.content_type = image.content_type_; |
| rtp_video_header.playout_delay = image.PlayoutDelay(); |
| rtp_video_header.width = image._encodedWidth; |
| rtp_video_header.height = image._encodedHeight; |
| rtp_video_header.color_space = image.ColorSpace() |
| ? std::make_optional(*image.ColorSpace()) |
| : std::nullopt; |
| rtp_video_header.video_frame_tracking_id = image.VideoFrameTrackingId(); |
| SetVideoTiming(image, &rtp_video_header.video_timing); |
| |
| const bool is_keyframe = image._frameType == VideoFrameType::kVideoFrameKey; |
| const bool first_frame_in_picture = |
| (codec_specific_info && codec_specific_info->codecType == kVideoCodecVP9) |
| ? codec_specific_info->codecSpecific.VP9.first_frame_in_picture |
| : true; |
| |
| SetCodecSpecific(&rtp_video_header, first_frame_in_picture); |
| |
| SetGeneric(codec_specific_info, frame_id, is_keyframe, &rtp_video_header); |
| |
| return rtp_video_header; |
| } |
| |
| uint32_t RtpPayloadParams::ssrc() const { |
| return ssrc_; |
| } |
| |
| RtpPayloadState RtpPayloadParams::state() const { |
| return state_; |
| } |
| |
| void RtpPayloadParams::SetCodecSpecific(RTPVideoHeader* rtp_video_header, |
| bool first_frame_in_picture) { |
| // Always set picture id. Set tl0_pic_idx iff temporal index is set. |
| if (first_frame_in_picture) { |
| state_.picture_id = (static_cast<uint16_t>(state_.picture_id) + 1) & 0x7FFF; |
| } |
| if (rtp_video_header->codec == kVideoCodecVP8) { |
| auto& vp8_header = |
| absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header); |
| vp8_header.pictureId = state_.picture_id; |
| |
| if (vp8_header.temporalIdx != kNoTemporalIdx) { |
| if (vp8_header.temporalIdx == 0) { |
| ++state_.tl0_pic_idx; |
| } |
| vp8_header.tl0PicIdx = state_.tl0_pic_idx; |
| } |
| } |
| if (rtp_video_header->codec == kVideoCodecVP9) { |
| auto& vp9_header = |
| absl::get<RTPVideoHeaderVP9>(rtp_video_header->video_type_header); |
| vp9_header.picture_id = state_.picture_id; |
| |
| // Note that in the case that we have no temporal layers but we do have |
| // spatial layers, packets will carry layering info with a temporal_idx of |
| // zero, and we then have to set and increment tl0_pic_idx. |
| if (vp9_header.temporal_idx != kNoTemporalIdx || |
| vp9_header.spatial_idx != kNoSpatialIdx) { |
| if (first_frame_in_picture && |
| (vp9_header.temporal_idx == 0 || |
| vp9_header.temporal_idx == kNoTemporalIdx)) { |
| ++state_.tl0_pic_idx; |
| } |
| vp9_header.tl0_pic_idx = state_.tl0_pic_idx; |
| } |
| } |
| if (generic_picture_id_experiment_ && |
| rtp_video_header->codec == kVideoCodecGeneric) { |
| rtp_video_header->video_type_header.emplace<RTPVideoHeaderLegacyGeneric>() |
| .picture_id = state_.picture_id; |
| } |
| } |
| |
| RTPVideoHeader::GenericDescriptorInfo |
| RtpPayloadParams::GenericDescriptorFromFrameInfo( |
| const GenericFrameInfo& frame_info, |
| int64_t frame_id) { |
| RTPVideoHeader::GenericDescriptorInfo generic; |
| generic.frame_id = frame_id; |
| generic.dependencies = dependencies_calculator_.FromBuffersUsage( |
| frame_id, frame_info.encoder_buffers); |
| generic.chain_diffs = |
| chains_calculator_.From(frame_id, frame_info.part_of_chain); |
| generic.spatial_index = frame_info.spatial_id; |
| generic.temporal_index = frame_info.temporal_id; |
| generic.decode_target_indications = frame_info.decode_target_indications; |
| generic.active_decode_targets = frame_info.active_decode_targets; |
| return generic; |
| } |
| |
| void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info, |
| int64_t frame_id, |
| bool is_keyframe, |
| RTPVideoHeader* rtp_video_header) { |
| if (codec_specific_info && codec_specific_info->generic_frame_info && |
| !codec_specific_info->generic_frame_info->encoder_buffers.empty()) { |
| if (is_keyframe) { |
| // Key frame resets all chains it is in. |
| chains_calculator_.Reset( |
| codec_specific_info->generic_frame_info->part_of_chain); |
| } |
| rtp_video_header->generic = GenericDescriptorFromFrameInfo( |
| *codec_specific_info->generic_frame_info, frame_id); |
| return; |
| } |
| |
| switch (rtp_video_header->codec) { |
| case VideoCodecType::kVideoCodecGeneric: |
| GenericToGeneric(frame_id, is_keyframe, rtp_video_header); |
| return; |
| case VideoCodecType::kVideoCodecVP8: |
| if (codec_specific_info) { |
| Vp8ToGeneric(codec_specific_info->codecSpecific.VP8, frame_id, |
| is_keyframe, rtp_video_header); |
| } |
| return; |
| case VideoCodecType::kVideoCodecVP9: |
| if (codec_specific_info != nullptr) { |
| Vp9ToGeneric(codec_specific_info->codecSpecific.VP9, frame_id, |
| *rtp_video_header); |
| } |
| return; |
| case VideoCodecType::kVideoCodecAV1: |
| // TODO(philipel): Implement AV1 to generic descriptor. |
| return; |
| case VideoCodecType::kVideoCodecH264: |
| if (codec_specific_info) { |
| H264ToGeneric(codec_specific_info->codecSpecific.H264, frame_id, |
| is_keyframe, rtp_video_header); |
| } |
| return; |
| case VideoCodecType::kVideoCodecH265: |
| // TODO(bugs.webrtc.org/13485): Implement H265 to generic descriptor. |
| return; |
| } |
| RTC_DCHECK_NOTREACHED() << "Unsupported codec."; |
| } |
| |
| std::optional<FrameDependencyStructure> RtpPayloadParams::GenericStructure( |
| const CodecSpecificInfo* codec_specific_info) { |
| if (codec_specific_info == nullptr) { |
| return std::nullopt; |
| } |
| // This helper shouldn't be used when template structure is specified |
| // explicetly. |
| RTC_DCHECK(!codec_specific_info->template_structure.has_value()); |
| switch (codec_specific_info->codecType) { |
| case VideoCodecType::kVideoCodecGeneric: |
| if (simulate_generic_structure_) { |
| return MinimalisticStructure(/*num_spatial_layers=*/1, |
| /*num_temporal_layer=*/1); |
| } |
| return std::nullopt; |
| case VideoCodecType::kVideoCodecVP8: |
| return MinimalisticStructure(/*num_spatial_layers=*/1, |
| /*num_temporal_layer=*/kMaxTemporalStreams); |
| case VideoCodecType::kVideoCodecVP9: { |
| std::optional<FrameDependencyStructure> structure = MinimalisticStructure( |
| /*num_spatial_layers=*/kMaxSimulatedSpatialLayers, |
| /*num_temporal_layer=*/kMaxTemporalStreams); |
| const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9; |
| if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) { |
| RenderResolution first_valid; |
| RenderResolution last_valid; |
| for (size_t i = 0; i < vp9.num_spatial_layers; ++i) { |
| RenderResolution r(vp9.width[i], vp9.height[i]); |
| if (r.Valid()) { |
| if (!first_valid.Valid()) { |
| first_valid = r; |
| } |
| last_valid = r; |
| } |
| structure->resolutions.push_back(r); |
| } |
| if (!last_valid.Valid()) { |
| // No valid resolution found. Do not send resolutions. |
| structure->resolutions.clear(); |
| } else { |
| structure->resolutions.resize(kMaxSimulatedSpatialLayers, last_valid); |
| // VP9 encoder wrapper may disable first few spatial layers by |
| // setting invalid resolution (0,0). `structure->resolutions` |
| // doesn't support invalid resolution, so reset them to something |
| // valid. |
| for (RenderResolution& r : structure->resolutions) { |
| if (!r.Valid()) { |
| r = first_valid; |
| } |
| } |
| } |
| } |
| return structure; |
| } |
| case VideoCodecType::kVideoCodecAV1: |
| case VideoCodecType::kVideoCodecH264: |
| case VideoCodecType::kVideoCodecH265: |
| return std::nullopt; |
| } |
| RTC_DCHECK_NOTREACHED() << "Unsupported codec."; |
| } |
| |
| void RtpPayloadParams::GenericToGeneric(int64_t frame_id, |
| bool is_keyframe, |
| RTPVideoHeader* rtp_video_header) { |
| RTPVideoHeader::GenericDescriptorInfo& generic = |
| rtp_video_header->generic.emplace(); |
| |
| generic.frame_id = frame_id; |
| generic.decode_target_indications.push_back(DecodeTargetIndication::kSwitch); |
| |
| if (is_keyframe) { |
| generic.chain_diffs.push_back(0); |
| last_frame_id_[0].fill(-1); |
| } else { |
| int64_t last_frame_id = last_frame_id_[0][0]; |
| RTC_DCHECK_NE(last_frame_id, -1); |
| RTC_DCHECK_LT(last_frame_id, frame_id); |
| generic.chain_diffs.push_back(frame_id - last_frame_id); |
| generic.dependencies.push_back(last_frame_id); |
| } |
| |
| last_frame_id_[0][0] = frame_id; |
| } |
| |
| void RtpPayloadParams::H264ToGeneric(const CodecSpecificInfoH264& h264_info, |
| int64_t frame_id, |
| bool is_keyframe, |
| RTPVideoHeader* rtp_video_header) { |
| const int temporal_index = |
| h264_info.temporal_idx != kNoTemporalIdx ? h264_info.temporal_idx : 0; |
| |
| if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers) { |
| RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be " |
| "used with generic frame descriptor."; |
| return; |
| } |
| |
| RTPVideoHeader::GenericDescriptorInfo& generic = |
| rtp_video_header->generic.emplace(); |
| |
| generic.frame_id = frame_id; |
| generic.temporal_index = temporal_index; |
| |
| if (is_keyframe) { |
| RTC_DCHECK_EQ(temporal_index, 0); |
| last_frame_id_[/*spatial index*/ 0].fill(-1); |
| last_frame_id_[/*spatial index*/ 0][temporal_index] = frame_id; |
| return; |
| } |
| |
| if (h264_info.base_layer_sync) { |
| int64_t tl0_frame_id = last_frame_id_[/*spatial index*/ 0][0]; |
| |
| for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) { |
| if (last_frame_id_[/*spatial index*/ 0][i] < tl0_frame_id) { |
| last_frame_id_[/*spatial index*/ 0][i] = -1; |
| } |
| } |
| |
| RTC_DCHECK_GE(tl0_frame_id, 0); |
| RTC_DCHECK_LT(tl0_frame_id, frame_id); |
| generic.dependencies.push_back(tl0_frame_id); |
| } else { |
| for (int i = 0; i <= temporal_index; ++i) { |
| int64_t last_frame_id = last_frame_id_[/*spatial index*/ 0][i]; |
| |
| if (last_frame_id != -1) { |
| RTC_DCHECK_LT(last_frame_id, frame_id); |
| generic.dependencies.push_back(last_frame_id); |
| } |
| } |
| } |
| |
| last_frame_id_[/*spatial_index*/ 0][temporal_index] = frame_id; |
| } |
| |
| void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info, |
| int64_t frame_id, |
| bool is_keyframe, |
| RTPVideoHeader* rtp_video_header) { |
| const auto& vp8_header = |
| absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header); |
| const int spatial_index = 0; |
| const int temporal_index = |
| vp8_header.temporalIdx != kNoTemporalIdx ? vp8_header.temporalIdx : 0; |
| |
| if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers || |
| spatial_index >= RtpGenericFrameDescriptor::kMaxSpatialLayers) { |
| RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be " |
| "used with generic frame descriptor."; |
| return; |
| } |
| |
| RTPVideoHeader::GenericDescriptorInfo& generic = |
| rtp_video_header->generic.emplace(); |
| |
| generic.frame_id = frame_id; |
| generic.spatial_index = spatial_index; |
| generic.temporal_index = temporal_index; |
| |
| // Generate decode target indications. |
| RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams); |
| generic.decode_target_indications.resize(kMaxTemporalStreams); |
| auto it = std::fill_n(generic.decode_target_indications.begin(), |
| temporal_index, DecodeTargetIndication::kNotPresent); |
| std::fill(it, generic.decode_target_indications.end(), |
| DecodeTargetIndication::kSwitch); |
| |
| // Frame dependencies. |
| if (vp8_info.useExplicitDependencies) { |
| SetDependenciesVp8New(vp8_info, frame_id, is_keyframe, vp8_header.layerSync, |
| &generic); |
| } else { |
| SetDependenciesVp8Deprecated(vp8_info, frame_id, is_keyframe, spatial_index, |
| temporal_index, vp8_header.layerSync, |
| &generic); |
| } |
| |
| // Calculate chains. |
| generic.chain_diffs = { |
| (is_keyframe || chain_last_frame_id_[0] < 0) |
| ? 0 |
| : static_cast<int>(frame_id - chain_last_frame_id_[0])}; |
| if (temporal_index == 0) { |
| chain_last_frame_id_[0] = frame_id; |
| } |
| } |
| |
| void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info, |
| int64_t frame_id, |
| RTPVideoHeader& rtp_video_header) { |
| const auto& vp9_header = |
| absl::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header); |
| const int num_spatial_layers = kMaxSimulatedSpatialLayers; |
| const int first_active_spatial_id = vp9_header.first_active_layer; |
| const int last_active_spatial_id = vp9_header.num_spatial_layers - 1; |
| const int num_temporal_layers = kMaxTemporalStreams; |
| static_assert(num_spatial_layers <= |
| RtpGenericFrameDescriptor::kMaxSpatialLayers); |
| static_assert(num_temporal_layers <= |
| RtpGenericFrameDescriptor::kMaxTemporalLayers); |
| static_assert(num_spatial_layers <= DependencyDescriptor::kMaxSpatialIds); |
| static_assert(num_temporal_layers <= DependencyDescriptor::kMaxTemporalIds); |
| |
| int spatial_index = |
| vp9_header.spatial_idx != kNoSpatialIdx ? vp9_header.spatial_idx : 0; |
| int temporal_index = |
| vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0; |
| |
| if (!(temporal_index < num_temporal_layers && |
| first_active_spatial_id <= spatial_index && |
| spatial_index <= last_active_spatial_id && |
| last_active_spatial_id < num_spatial_layers)) { |
| // Prefer to generate no generic layering than an inconsistent one. |
| RTC_LOG(LS_ERROR) << "Inconsistent layer id sid=" << spatial_index |
| << ",tid=" << temporal_index |
| << " in VP9 header. Active spatial ids: [" |
| << first_active_spatial_id << "," |
| << last_active_spatial_id << "]"; |
| return; |
| } |
| |
| RTPVideoHeader::GenericDescriptorInfo& result = |
| rtp_video_header.generic.emplace(); |
| |
| result.frame_id = frame_id; |
| result.spatial_index = spatial_index; |
| result.temporal_index = temporal_index; |
| |
| result.decode_target_indications.reserve(num_spatial_layers * |
| num_temporal_layers); |
| for (int sid = 0; sid < num_spatial_layers; ++sid) { |
| for (int tid = 0; tid < num_temporal_layers; ++tid) { |
| DecodeTargetIndication dti; |
| if (sid < spatial_index || tid < temporal_index) { |
| dti = DecodeTargetIndication::kNotPresent; |
| } else if (spatial_index != sid && |
| vp9_header.non_ref_for_inter_layer_pred) { |
| dti = DecodeTargetIndication::kNotPresent; |
| } else if (sid == spatial_index && tid == temporal_index) { |
| // Assume that if frame is decodable, all of its own layer is decodable. |
| dti = DecodeTargetIndication::kSwitch; |
| } else if (sid == spatial_index && vp9_header.temporal_up_switch) { |
| dti = DecodeTargetIndication::kSwitch; |
| } else if (!vp9_header.inter_pic_predicted) { |
| // Key frame or spatial upswitch |
| dti = DecodeTargetIndication::kSwitch; |
| } else { |
| // Make no other assumptions. That should be safe, though suboptimal. |
| // To provide more accurate dti, encoder wrapper should fill in |
| // CodecSpecificInfo::generic_frame_info |
| dti = DecodeTargetIndication::kRequired; |
| } |
| result.decode_target_indications.push_back(dti); |
| } |
| } |
| |
| // Calculate frame dependencies. |
| static constexpr int kPictureDiffLimit = 128; |
| if (last_vp9_frame_id_.empty()) { |
| // Create the array only if it is ever used. |
| last_vp9_frame_id_.resize(kPictureDiffLimit); |
| } |
| |
| if (vp9_header.flexible_mode) { |
| if (vp9_header.inter_layer_predicted && spatial_index > 0) { |
| result.dependencies.push_back( |
| last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit] |
| [spatial_index - 1]); |
| } |
| if (vp9_header.inter_pic_predicted) { |
| for (size_t i = 0; i < vp9_header.num_ref_pics; ++i) { |
| // picture_id is 15 bit number that wraps around. Though undeflow may |
| // produce picture that exceeds 2^15, it is ok because in this |
| // code block only last 7 bits of the picture_id are used. |
| uint16_t depend_on = vp9_header.picture_id - vp9_header.pid_diff[i]; |
| result.dependencies.push_back( |
| last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]); |
| } |
| } |
| last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit] |
| [spatial_index] = frame_id; |
| } else { |
| // Implementing general conversion logic for non-flexible mode requires some |
| // work and we will almost certainly never need it, so for now support only |
| // non-layerd streams. |
| if (spatial_index > 0 || temporal_index > 0) { |
| // Prefer to generate no generic layering than an inconsistent one. |
| rtp_video_header.generic.reset(); |
| return; |
| } |
| |
| if (vp9_header.inter_pic_predicted) { |
| // Since we only support non-scalable streams we only need to save the |
| // last frame id. |
| result.dependencies.push_back(last_vp9_frame_id_[0][0]); |
| } |
| last_vp9_frame_id_[0][0] = frame_id; |
| } |
| |
| result.active_decode_targets = |
| ((uint32_t{1} << num_temporal_layers * (last_active_spatial_id + 1)) - |
| 1) ^ |
| ((uint32_t{1} << num_temporal_layers * first_active_spatial_id) - 1); |
| |
| // Calculate chains, asuming chain includes all frames with temporal_id = 0 |
| if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) { |
| // Assume frames without dependencies also reset chains. |
| for (int sid = spatial_index; sid <= last_active_spatial_id; ++sid) { |
| chain_last_frame_id_[sid] = -1; |
| } |
| } |
| result.chain_diffs.resize(num_spatial_layers, 0); |
| for (int sid = first_active_spatial_id; sid <= last_active_spatial_id; |
| ++sid) { |
| if (chain_last_frame_id_[sid] == -1) { |
| result.chain_diffs[sid] = 0; |
| continue; |
| } |
| int64_t chain_diff = frame_id - chain_last_frame_id_[sid]; |
| if (chain_diff >= 256) { |
| RTC_LOG(LS_ERROR) |
| << "Too many frames since last VP9 T0 frame for spatial layer #" |
| << sid << " at frame#" << frame_id; |
| chain_last_frame_id_[sid] = -1; |
| chain_diff = 0; |
| } |
| result.chain_diffs[sid] = chain_diff; |
| } |
| |
| if (temporal_index == 0) { |
| chain_last_frame_id_[spatial_index] = frame_id; |
| if (!vp9_header.non_ref_for_inter_layer_pred) { |
| for (int sid = spatial_index + 1; sid <= last_active_spatial_id; ++sid) { |
| chain_last_frame_id_[sid] = frame_id; |
| } |
| } |
| } |
| } |
| |
| void RtpPayloadParams::SetDependenciesVp8Deprecated( |
| const CodecSpecificInfoVP8& vp8_info, |
| int64_t frame_id, |
| bool is_keyframe, |
| int spatial_index, |
| int temporal_index, |
| bool layer_sync, |
| RTPVideoHeader::GenericDescriptorInfo* generic) { |
| RTC_DCHECK(!vp8_info.useExplicitDependencies); |
| RTC_DCHECK(!new_version_used_.has_value() || !new_version_used_.value()); |
| new_version_used_ = false; |
| |
| if (is_keyframe) { |
| RTC_DCHECK_EQ(temporal_index, 0); |
| last_frame_id_[spatial_index].fill(-1); |
| last_frame_id_[spatial_index][temporal_index] = frame_id; |
| return; |
| } |
| |
| if (layer_sync) { |
| int64_t tl0_frame_id = last_frame_id_[spatial_index][0]; |
| |
| for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) { |
| if (last_frame_id_[spatial_index][i] < tl0_frame_id) { |
| last_frame_id_[spatial_index][i] = -1; |
| } |
| } |
| |
| RTC_DCHECK_GE(tl0_frame_id, 0); |
| RTC_DCHECK_LT(tl0_frame_id, frame_id); |
| generic->dependencies.push_back(tl0_frame_id); |
| } else { |
| for (int i = 0; i <= temporal_index; ++i) { |
| int64_t last_frame_id = last_frame_id_[spatial_index][i]; |
| |
| if (last_frame_id != -1) { |
| RTC_DCHECK_LT(last_frame_id, frame_id); |
| generic->dependencies.push_back(last_frame_id); |
| } |
| } |
| } |
| |
| last_frame_id_[spatial_index][temporal_index] = frame_id; |
| } |
| |
| void RtpPayloadParams::SetDependenciesVp8New( |
| const CodecSpecificInfoVP8& vp8_info, |
| int64_t frame_id, |
| bool is_keyframe, |
| bool layer_sync, |
| RTPVideoHeader::GenericDescriptorInfo* generic) { |
| RTC_DCHECK(vp8_info.useExplicitDependencies); |
| RTC_DCHECK(!new_version_used_.has_value() || new_version_used_.value()); |
| new_version_used_ = true; |
| |
| if (is_keyframe) { |
| RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u); |
| buffer_id_to_frame_id_.fill(frame_id); |
| return; |
| } |
| |
| constexpr size_t kBuffersCountVp8 = CodecSpecificInfoVP8::kBuffersCount; |
| |
| RTC_DCHECK_GT(vp8_info.referencedBuffersCount, 0u); |
| RTC_DCHECK_LE(vp8_info.referencedBuffersCount, |
| arraysize(vp8_info.referencedBuffers)); |
| |
| for (size_t i = 0; i < vp8_info.referencedBuffersCount; ++i) { |
| const size_t referenced_buffer = vp8_info.referencedBuffers[i]; |
| RTC_DCHECK_LT(referenced_buffer, kBuffersCountVp8); |
| RTC_DCHECK_LT(referenced_buffer, buffer_id_to_frame_id_.size()); |
| |
| const int64_t dependency_frame_id = |
| buffer_id_to_frame_id_[referenced_buffer]; |
| RTC_DCHECK_GE(dependency_frame_id, 0); |
| RTC_DCHECK_LT(dependency_frame_id, frame_id); |
| |
| const bool is_new_dependency = |
| std::find(generic->dependencies.begin(), generic->dependencies.end(), |
| dependency_frame_id) == generic->dependencies.end(); |
| if (is_new_dependency) { |
| generic->dependencies.push_back(dependency_frame_id); |
| } |
| } |
| |
| RTC_DCHECK_LE(vp8_info.updatedBuffersCount, kBuffersCountVp8); |
| for (size_t i = 0; i < vp8_info.updatedBuffersCount; ++i) { |
| const size_t updated_id = vp8_info.updatedBuffers[i]; |
| buffer_id_to_frame_id_[updated_id] = frame_id; |
| } |
| |
| RTC_DCHECK_LE(buffer_id_to_frame_id_.size(), kBuffersCountVp8); |
| } |
| |
| } // namespace webrtc |