|  | /* | 
|  | *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. | 
|  | * | 
|  | *  Use of this source code is governed by a BSD-style license | 
|  | *  that can be found in the LICENSE file in the root of the source | 
|  | *  tree. An additional intellectual property rights grant can be found | 
|  | *  in the file PATENTS.  All contributing project authors may | 
|  | *  be found in the AUTHORS file in the root of the source tree. | 
|  | */ | 
|  |  | 
|  | #include "call/rtp_payload_params.h" | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <cstddef> | 
|  | #include <cstdint> | 
|  | #include <iterator> | 
|  | #include <optional> | 
|  |  | 
|  | #include "absl/container/inlined_vector.h" | 
|  | #include "api/field_trials_view.h" | 
|  | #include "api/transport/rtp/dependency_descriptor.h" | 
|  | #include "api/video/encoded_image.h" | 
|  | #include "api/video/render_resolution.h" | 
|  | #include "api/video/video_codec_constants.h" | 
|  | #include "api/video/video_codec_type.h" | 
|  | #include "api/video/video_frame_type.h" | 
|  | #include "api/video/video_timing.h" | 
|  | #include "call/rtp_config.h" | 
|  | #include "common_video/generic_frame_descriptor/generic_frame_info.h" | 
|  | #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h" | 
|  | #include "modules/rtp_rtcp/source/rtp_video_header.h" | 
|  | #include "modules/video_coding/codecs/h264/include/h264_globals.h" | 
|  | #include "modules/video_coding/codecs/interface/common_constants.h" | 
|  | #include "modules/video_coding/codecs/vp8/include/vp8_globals.h" | 
|  | #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" | 
|  | #include "modules/video_coding/frame_dependencies_calculator.h" | 
|  | #include "modules/video_coding/include/video_codec_interface.h" | 
|  | #include "rtc_base/checks.h" | 
|  | #include "rtc_base/logging.h" | 
|  | #include "rtc_base/random.h" | 
|  | #include "rtc_base/time_utils.h" | 
|  |  | 
|  | namespace webrtc { | 
|  | namespace { | 
|  |  | 
|  | constexpr int kMaxSimulatedSpatialLayers = 3; | 
|  |  | 
|  | void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info, | 
|  | std::optional<int> spatial_index, | 
|  | RTPVideoHeader* rtp) { | 
|  | rtp->codec = info.codecType; | 
|  | rtp->is_last_frame_in_picture = info.end_of_picture; | 
|  | rtp->frame_instrumentation_data = info.frame_instrumentation_data; | 
|  | switch (info.codecType) { | 
|  | case kVideoCodecVP8: { | 
|  | auto& vp8_header = rtp->video_type_header.emplace<RTPVideoHeaderVP8>(); | 
|  | vp8_header.InitRTPVideoHeaderVP8(); | 
|  | vp8_header.nonReference = info.codecSpecific.VP8.nonReference; | 
|  | vp8_header.temporalIdx = info.codecSpecific.VP8.temporalIdx; | 
|  | vp8_header.layerSync = info.codecSpecific.VP8.layerSync; | 
|  | vp8_header.keyIdx = info.codecSpecific.VP8.keyIdx; | 
|  | return; | 
|  | } | 
|  | case kVideoCodecVP9: { | 
|  | auto& vp9_header = rtp->video_type_header.emplace<RTPVideoHeaderVP9>(); | 
|  | vp9_header.InitRTPVideoHeaderVP9(); | 
|  | vp9_header.inter_pic_predicted = | 
|  | info.codecSpecific.VP9.inter_pic_predicted; | 
|  | vp9_header.flexible_mode = info.codecSpecific.VP9.flexible_mode; | 
|  | vp9_header.ss_data_available = info.codecSpecific.VP9.ss_data_available; | 
|  | vp9_header.non_ref_for_inter_layer_pred = | 
|  | info.codecSpecific.VP9.non_ref_for_inter_layer_pred; | 
|  | vp9_header.temporal_idx = info.codecSpecific.VP9.temporal_idx; | 
|  | vp9_header.temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch; | 
|  | vp9_header.inter_layer_predicted = | 
|  | info.codecSpecific.VP9.inter_layer_predicted; | 
|  | vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx; | 
|  | vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers; | 
|  | vp9_header.first_active_layer = info.codecSpecific.VP9.first_active_layer; | 
|  | if (vp9_header.num_spatial_layers > 1) { | 
|  | vp9_header.spatial_idx = spatial_index.value_or(kNoSpatialIdx); | 
|  | } else { | 
|  | vp9_header.spatial_idx = kNoSpatialIdx; | 
|  | } | 
|  | if (info.codecSpecific.VP9.ss_data_available) { | 
|  | vp9_header.spatial_layer_resolution_present = | 
|  | info.codecSpecific.VP9.spatial_layer_resolution_present; | 
|  | if (info.codecSpecific.VP9.spatial_layer_resolution_present) { | 
|  | for (size_t i = 0; i < info.codecSpecific.VP9.num_spatial_layers; | 
|  | ++i) { | 
|  | vp9_header.width[i] = info.codecSpecific.VP9.width[i]; | 
|  | vp9_header.height[i] = info.codecSpecific.VP9.height[i]; | 
|  | } | 
|  | } | 
|  | vp9_header.gof.CopyGofInfoVP9(info.codecSpecific.VP9.gof); | 
|  | } | 
|  |  | 
|  | vp9_header.num_ref_pics = info.codecSpecific.VP9.num_ref_pics; | 
|  | for (int i = 0; i < info.codecSpecific.VP9.num_ref_pics; ++i) { | 
|  | vp9_header.pid_diff[i] = info.codecSpecific.VP9.p_diff[i]; | 
|  | } | 
|  | vp9_header.end_of_picture = info.end_of_picture; | 
|  | return; | 
|  | } | 
|  | case kVideoCodecH264: { | 
|  | auto& h264_header = rtp->video_type_header.emplace<RTPVideoHeaderH264>(); | 
|  | h264_header.packetization_mode = | 
|  | info.codecSpecific.H264.packetization_mode; | 
|  | return; | 
|  | } | 
|  | // These codec types do not have codec-specifics. | 
|  | case kVideoCodecGeneric: | 
|  | case kVideoCodecH265: | 
|  | case kVideoCodecAV1: | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | void SetVideoTiming(const EncodedImage& image, VideoSendTiming* timing) { | 
|  | if (image.timing_.flags == VideoSendTiming::TimingFrameFlags::kInvalid || | 
|  | image.timing_.flags == VideoSendTiming::TimingFrameFlags::kNotTriggered) { | 
|  | timing->flags = VideoSendTiming::TimingFrameFlags::kInvalid; | 
|  | return; | 
|  | } | 
|  |  | 
|  | timing->encode_start_delta_ms = VideoSendTiming::GetDeltaCappedMs( | 
|  | image.capture_time_ms_, image.timing_.encode_start_ms); | 
|  | timing->encode_finish_delta_ms = VideoSendTiming::GetDeltaCappedMs( | 
|  | image.capture_time_ms_, image.timing_.encode_finish_ms); | 
|  | timing->packetization_finish_delta_ms = 0; | 
|  | timing->pacer_exit_delta_ms = 0; | 
|  | timing->network_timestamp_delta_ms = 0; | 
|  | timing->network2_timestamp_delta_ms = 0; | 
|  | timing->flags = image.timing_.flags; | 
|  | } | 
|  |  | 
|  | // Returns structure that aligns with simulated generic info. The templates | 
|  | // allow to produce valid dependency descriptor for any stream where | 
|  | // `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by | 
|  | // https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see | 
|  | // template_fdiffs()). The set of the templates is not tuned for any paricular | 
|  | // structure thus dependency descriptor would use more bytes on the wire than | 
|  | // with tuned templates. | 
|  | FrameDependencyStructure MinimalisticStructure(int num_spatial_layers, | 
|  | int num_temporal_layers) { | 
|  | RTC_DCHECK_LE(num_spatial_layers, DependencyDescriptor::kMaxSpatialIds); | 
|  | RTC_DCHECK_LE(num_temporal_layers, DependencyDescriptor::kMaxTemporalIds); | 
|  | RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32); | 
|  | FrameDependencyStructure structure; | 
|  | structure.num_decode_targets = num_spatial_layers * num_temporal_layers; | 
|  | structure.num_chains = num_spatial_layers; | 
|  | structure.templates.reserve(num_spatial_layers * num_temporal_layers); | 
|  | for (int sid = 0; sid < num_spatial_layers; ++sid) { | 
|  | for (int tid = 0; tid < num_temporal_layers; ++tid) { | 
|  | FrameDependencyTemplate a_template; | 
|  | a_template.spatial_id = sid; | 
|  | a_template.temporal_id = tid; | 
|  | for (int s = 0; s < num_spatial_layers; ++s) { | 
|  | for (int t = 0; t < num_temporal_layers; ++t) { | 
|  | // Prefer kSwitch indication for frames that is part of the decode | 
|  | // target because dependency descriptor information generated in this | 
|  | // class use kSwitch indications more often that kRequired, increasing | 
|  | // the chance of a good (or complete) template match. | 
|  | a_template.decode_target_indications.push_back( | 
|  | sid <= s && tid <= t ? DecodeTargetIndication::kSwitch | 
|  | : DecodeTargetIndication::kNotPresent); | 
|  | } | 
|  | } | 
|  | a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers * | 
|  | num_temporal_layers | 
|  | : num_spatial_layers); | 
|  | a_template.chain_diffs.assign(structure.num_chains, 1); | 
|  | structure.templates.push_back(a_template); | 
|  |  | 
|  | structure.decode_target_protected_by_chain.push_back(sid); | 
|  | } | 
|  | } | 
|  | return structure; | 
|  | } | 
|  | }  // namespace | 
|  |  | 
|  | RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc, | 
|  | const RtpPayloadState* state, | 
|  | const FieldTrialsView& trials) | 
|  | : ssrc_(ssrc), | 
|  | generic_picture_id_experiment_( | 
|  | trials.IsEnabled("WebRTC-GenericPictureId")), | 
|  | simulate_generic_structure_( | 
|  | trials.IsEnabled("WebRTC-GenericCodecDependencyDescriptor")) { | 
|  | for (auto& spatial_layer : last_frame_id_) | 
|  | spatial_layer.fill(-1); | 
|  |  | 
|  | chain_last_frame_id_.fill(-1); | 
|  | buffer_id_to_frame_id_.fill(-1); | 
|  |  | 
|  | Random random(TimeMicros()); | 
|  | state_.picture_id = | 
|  | state ? state->picture_id : (random.Rand<int16_t>() & 0x7FFF); | 
|  | state_.tl0_pic_idx = state ? state->tl0_pic_idx : (random.Rand<uint8_t>()); | 
|  | state_.frame_id = state ? state->frame_id : random.Rand<uint16_t>(); | 
|  | } | 
|  |  | 
|  | RtpPayloadParams::RtpPayloadParams(const RtpPayloadParams& other) = default; | 
|  |  | 
|  | RtpPayloadParams::~RtpPayloadParams() {} | 
|  |  | 
|  | RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader( | 
|  | const EncodedImage& image, | 
|  | const CodecSpecificInfo* codec_specific_info, | 
|  | std::optional<int64_t> shared_frame_id) { | 
|  | int64_t frame_id; | 
|  | if (shared_frame_id) { | 
|  | frame_id = *shared_frame_id; | 
|  | } else { | 
|  | frame_id = state_.frame_id++; | 
|  | } | 
|  |  | 
|  | RTPVideoHeader rtp_video_header; | 
|  | if (codec_specific_info) { | 
|  | PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(), | 
|  | &rtp_video_header); | 
|  | } | 
|  | rtp_video_header.simulcastIdx = image.SimulcastIndex().value_or(0); | 
|  | rtp_video_header.frame_type = image._frameType; | 
|  | rtp_video_header.rotation = image.rotation_; | 
|  | rtp_video_header.content_type = image.content_type_; | 
|  | rtp_video_header.playout_delay = image.PlayoutDelay(); | 
|  | rtp_video_header.width = image._encodedWidth; | 
|  | rtp_video_header.height = image._encodedHeight; | 
|  | rtp_video_header.color_space = image.ColorSpace() | 
|  | ? std::make_optional(*image.ColorSpace()) | 
|  | : std::nullopt; | 
|  | rtp_video_header.video_frame_tracking_id = image.VideoFrameTrackingId(); | 
|  | SetVideoTiming(image, &rtp_video_header.video_timing); | 
|  |  | 
|  | const bool is_keyframe = image._frameType == VideoFrameType::kVideoFrameKey; | 
|  | const bool first_frame_in_picture = | 
|  | (codec_specific_info && codec_specific_info->codecType == kVideoCodecVP9) | 
|  | ? codec_specific_info->codecSpecific.VP9.first_frame_in_picture | 
|  | : true; | 
|  |  | 
|  | SetCodecSpecific(&rtp_video_header, first_frame_in_picture); | 
|  |  | 
|  | SetGeneric(codec_specific_info, frame_id, is_keyframe, &rtp_video_header); | 
|  |  | 
|  | return rtp_video_header; | 
|  | } | 
|  |  | 
|  | uint32_t RtpPayloadParams::ssrc() const { | 
|  | return ssrc_; | 
|  | } | 
|  |  | 
|  | RtpPayloadState RtpPayloadParams::state() const { | 
|  | return state_; | 
|  | } | 
|  |  | 
|  | void RtpPayloadParams::SetCodecSpecific(RTPVideoHeader* rtp_video_header, | 
|  | bool first_frame_in_picture) { | 
|  | // Always set picture id. Set tl0_pic_idx iff temporal index is set. | 
|  | if (first_frame_in_picture) { | 
|  | state_.picture_id = (static_cast<uint16_t>(state_.picture_id) + 1) & 0x7FFF; | 
|  | } | 
|  | if (rtp_video_header->codec == kVideoCodecVP8) { | 
|  | auto& vp8_header = | 
|  | std::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header); | 
|  | vp8_header.pictureId = state_.picture_id; | 
|  |  | 
|  | if (vp8_header.temporalIdx != kNoTemporalIdx) { | 
|  | if (vp8_header.temporalIdx == 0) { | 
|  | ++state_.tl0_pic_idx; | 
|  | } | 
|  | vp8_header.tl0PicIdx = state_.tl0_pic_idx; | 
|  | } | 
|  | } | 
|  | if (rtp_video_header->codec == kVideoCodecVP9) { | 
|  | auto& vp9_header = | 
|  | std::get<RTPVideoHeaderVP9>(rtp_video_header->video_type_header); | 
|  | vp9_header.picture_id = state_.picture_id; | 
|  |  | 
|  | // Note that in the case that we have no temporal layers but we do have | 
|  | // spatial layers, packets will carry layering info with a temporal_idx of | 
|  | // zero, and we then have to set and increment tl0_pic_idx. | 
|  | if (vp9_header.temporal_idx != kNoTemporalIdx || | 
|  | vp9_header.spatial_idx != kNoSpatialIdx) { | 
|  | if (first_frame_in_picture && | 
|  | (vp9_header.temporal_idx == 0 || | 
|  | vp9_header.temporal_idx == kNoTemporalIdx)) { | 
|  | ++state_.tl0_pic_idx; | 
|  | } | 
|  | vp9_header.tl0_pic_idx = state_.tl0_pic_idx; | 
|  | } | 
|  | } | 
|  | if (generic_picture_id_experiment_ && | 
|  | rtp_video_header->codec == kVideoCodecGeneric) { | 
|  | rtp_video_header->video_type_header.emplace<RTPVideoHeaderLegacyGeneric>() | 
|  | .picture_id = state_.picture_id; | 
|  | } | 
|  | } | 
|  |  | 
|  | RTPVideoHeader::GenericDescriptorInfo | 
|  | RtpPayloadParams::GenericDescriptorFromFrameInfo( | 
|  | const GenericFrameInfo& frame_info, | 
|  | int64_t frame_id) { | 
|  | RTPVideoHeader::GenericDescriptorInfo generic; | 
|  | generic.frame_id = frame_id; | 
|  | generic.dependencies = dependencies_calculator_.FromBuffersUsage( | 
|  | frame_id, frame_info.encoder_buffers); | 
|  | generic.chain_diffs = | 
|  | chains_calculator_.From(frame_id, frame_info.part_of_chain); | 
|  | generic.spatial_index = frame_info.spatial_id; | 
|  | generic.temporal_index = frame_info.temporal_id; | 
|  | generic.decode_target_indications = frame_info.decode_target_indications; | 
|  | generic.active_decode_targets = frame_info.active_decode_targets; | 
|  | return generic; | 
|  | } | 
|  |  | 
|  | void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info, | 
|  | int64_t frame_id, | 
|  | bool is_keyframe, | 
|  | RTPVideoHeader* rtp_video_header) { | 
|  | if (codec_specific_info && codec_specific_info->generic_frame_info && | 
|  | !codec_specific_info->generic_frame_info->encoder_buffers.empty()) { | 
|  | if (is_keyframe) { | 
|  | // Key frame resets all chains it is in. | 
|  | chains_calculator_.Reset( | 
|  | codec_specific_info->generic_frame_info->part_of_chain); | 
|  | } | 
|  | rtp_video_header->generic = GenericDescriptorFromFrameInfo( | 
|  | *codec_specific_info->generic_frame_info, frame_id); | 
|  | return; | 
|  | } | 
|  |  | 
|  | switch (rtp_video_header->codec) { | 
|  | case VideoCodecType::kVideoCodecGeneric: | 
|  | GenericToGeneric(frame_id, is_keyframe, rtp_video_header); | 
|  | return; | 
|  | case VideoCodecType::kVideoCodecVP8: | 
|  | if (codec_specific_info) { | 
|  | Vp8ToGeneric(codec_specific_info->codecSpecific.VP8, frame_id, | 
|  | is_keyframe, rtp_video_header); | 
|  | } | 
|  | return; | 
|  | case VideoCodecType::kVideoCodecVP9: | 
|  | if (codec_specific_info != nullptr) { | 
|  | Vp9ToGeneric(codec_specific_info->codecSpecific.VP9, frame_id, | 
|  | *rtp_video_header); | 
|  | } | 
|  | return; | 
|  | case VideoCodecType::kVideoCodecAV1: | 
|  | // Codec-specifics is not supported for AV1. We convert from the | 
|  | // generic_frame_info. | 
|  | return; | 
|  | case VideoCodecType::kVideoCodecH264: | 
|  | if (codec_specific_info) { | 
|  | H264ToGeneric(codec_specific_info->codecSpecific.H264, frame_id, | 
|  | is_keyframe, rtp_video_header); | 
|  | } | 
|  | return; | 
|  | case VideoCodecType::kVideoCodecH265: | 
|  | // Codec-specifics is not supported for H.265. We convert from the | 
|  | // generic_frame_info. | 
|  | return; | 
|  | } | 
|  | RTC_DCHECK_NOTREACHED() << "Unsupported codec."; | 
|  | } | 
|  |  | 
|  | std::optional<FrameDependencyStructure> RtpPayloadParams::GenericStructure( | 
|  | const CodecSpecificInfo* codec_specific_info) { | 
|  | if (codec_specific_info == nullptr) { | 
|  | return std::nullopt; | 
|  | } | 
|  | // This helper shouldn't be used when template structure is specified | 
|  | // explicetly. | 
|  | RTC_DCHECK(!codec_specific_info->template_structure.has_value()); | 
|  | switch (codec_specific_info->codecType) { | 
|  | case VideoCodecType::kVideoCodecGeneric: | 
|  | if (simulate_generic_structure_) { | 
|  | return MinimalisticStructure(/*num_spatial_layers=*/1, | 
|  | /*num_temporal_layers=*/1); | 
|  | } | 
|  | return std::nullopt; | 
|  | case VideoCodecType::kVideoCodecVP8: | 
|  | return MinimalisticStructure(/*num_spatial_layers=*/1, | 
|  | /*num_temporal_layers=*/kMaxTemporalStreams); | 
|  | case VideoCodecType::kVideoCodecVP9: { | 
|  | std::optional<FrameDependencyStructure> structure = MinimalisticStructure( | 
|  | /*num_spatial_layers=*/kMaxSimulatedSpatialLayers, | 
|  | /*num_temporal_layers=*/kMaxTemporalStreams); | 
|  | const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9; | 
|  | if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) { | 
|  | RenderResolution first_valid; | 
|  | RenderResolution last_valid; | 
|  | for (size_t i = 0; i < vp9.num_spatial_layers; ++i) { | 
|  | RenderResolution r(vp9.width[i], vp9.height[i]); | 
|  | if (r.Valid()) { | 
|  | if (!first_valid.Valid()) { | 
|  | first_valid = r; | 
|  | } | 
|  | last_valid = r; | 
|  | } | 
|  | structure->resolutions.push_back(r); | 
|  | } | 
|  | if (!last_valid.Valid()) { | 
|  | // No valid resolution found. Do not send resolutions. | 
|  | structure->resolutions.clear(); | 
|  | } else { | 
|  | structure->resolutions.resize(kMaxSimulatedSpatialLayers, last_valid); | 
|  | // VP9 encoder wrapper may disable first few spatial layers by | 
|  | // setting invalid resolution (0,0). `structure->resolutions` | 
|  | // doesn't support invalid resolution, so reset them to something | 
|  | // valid. | 
|  | for (RenderResolution& r : structure->resolutions) { | 
|  | if (!r.Valid()) { | 
|  | r = first_valid; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | return structure; | 
|  | } | 
|  | case VideoCodecType::kVideoCodecH264: | 
|  | return MinimalisticStructure( | 
|  | /*num_spatial_layers=*/1, | 
|  | /*num_temporal_layers=*/kMaxTemporalStreams); | 
|  | case VideoCodecType::kVideoCodecAV1: | 
|  | case VideoCodecType::kVideoCodecH265: | 
|  | return std::nullopt; | 
|  | } | 
|  | RTC_DCHECK_NOTREACHED() << "Unsupported codec."; | 
|  | } | 
|  |  | 
|  | void RtpPayloadParams::GenericToGeneric(int64_t frame_id, | 
|  | bool is_keyframe, | 
|  | RTPVideoHeader* rtp_video_header) { | 
|  | RTPVideoHeader::GenericDescriptorInfo& generic = | 
|  | rtp_video_header->generic.emplace(); | 
|  |  | 
|  | generic.frame_id = frame_id; | 
|  | generic.decode_target_indications.push_back(DecodeTargetIndication::kSwitch); | 
|  |  | 
|  | if (is_keyframe) { | 
|  | generic.chain_diffs.push_back(0); | 
|  | last_frame_id_[0].fill(-1); | 
|  | } else { | 
|  | int64_t last_frame_id = last_frame_id_[0][0]; | 
|  | RTC_DCHECK_NE(last_frame_id, -1); | 
|  | RTC_DCHECK_LT(last_frame_id, frame_id); | 
|  | generic.chain_diffs.push_back(frame_id - last_frame_id); | 
|  | generic.dependencies.push_back(last_frame_id); | 
|  | } | 
|  |  | 
|  | last_frame_id_[0][0] = frame_id; | 
|  | } | 
|  |  | 
|  | void RtpPayloadParams::H264ToGeneric(const CodecSpecificInfoH264& h264_info, | 
|  | int64_t frame_id, | 
|  | bool is_keyframe, | 
|  | RTPVideoHeader* rtp_video_header) { | 
|  | const int temporal_index = | 
|  | h264_info.temporal_idx != kNoTemporalIdx ? h264_info.temporal_idx : 0; | 
|  |  | 
|  | if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers) { | 
|  | RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be " | 
|  | "used with generic frame descriptor."; | 
|  | return; | 
|  | } | 
|  |  | 
|  | RTPVideoHeader::GenericDescriptorInfo& generic = | 
|  | rtp_video_header->generic.emplace(); | 
|  |  | 
|  | generic.frame_id = frame_id; | 
|  | generic.temporal_index = temporal_index; | 
|  |  | 
|  | // Generate decode target indications. | 
|  | RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams); | 
|  | generic.decode_target_indications.resize(kMaxTemporalStreams); | 
|  | auto it = std::fill_n(generic.decode_target_indications.begin(), | 
|  | temporal_index, DecodeTargetIndication::kNotPresent); | 
|  | std::fill(it, generic.decode_target_indications.end(), | 
|  | DecodeTargetIndication::kSwitch); | 
|  | generic.chain_diffs = { | 
|  | (is_keyframe || last_frame_id_[0][0] < 0) | 
|  | ? 0 | 
|  | : static_cast<int>(frame_id - last_frame_id_[0][0])}; | 
|  |  | 
|  | if (is_keyframe) { | 
|  | RTC_DCHECK_EQ(temporal_index, 0); | 
|  | last_frame_id_[/*spatial index*/ 0].fill(-1); | 
|  | last_frame_id_[/*spatial index*/ 0][temporal_index] = frame_id; | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (h264_info.base_layer_sync) { | 
|  | int64_t tl0_frame_id = last_frame_id_[/*spatial index*/ 0][0]; | 
|  |  | 
|  | for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) { | 
|  | if (last_frame_id_[/*spatial index*/ 0][i] < tl0_frame_id) { | 
|  | last_frame_id_[/*spatial index*/ 0][i] = -1; | 
|  | } | 
|  | } | 
|  |  | 
|  | RTC_DCHECK_GE(tl0_frame_id, 0); | 
|  | RTC_DCHECK_LT(tl0_frame_id, frame_id); | 
|  | generic.dependencies.push_back(tl0_frame_id); | 
|  | } else { | 
|  | for (int i = 0; i <= temporal_index; ++i) { | 
|  | int64_t last_frame_id = last_frame_id_[/*spatial index*/ 0][i]; | 
|  |  | 
|  | if (last_frame_id != -1) { | 
|  | RTC_DCHECK_LT(last_frame_id, frame_id); | 
|  | generic.dependencies.push_back(last_frame_id); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | last_frame_id_[/*spatial_index*/ 0][temporal_index] = frame_id; | 
|  | } | 
|  |  | 
|  | void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info, | 
|  | int64_t frame_id, | 
|  | bool is_keyframe, | 
|  | RTPVideoHeader* rtp_video_header) { | 
|  | const auto& vp8_header = | 
|  | std::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header); | 
|  | const int spatial_index = 0; | 
|  | const int temporal_index = | 
|  | vp8_header.temporalIdx != kNoTemporalIdx ? vp8_header.temporalIdx : 0; | 
|  |  | 
|  | if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers || | 
|  | spatial_index >= RtpGenericFrameDescriptor::kMaxSpatialLayers) { | 
|  | RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be " | 
|  | "used with generic frame descriptor."; | 
|  | return; | 
|  | } | 
|  |  | 
|  | RTPVideoHeader::GenericDescriptorInfo& generic = | 
|  | rtp_video_header->generic.emplace(); | 
|  |  | 
|  | generic.frame_id = frame_id; | 
|  | generic.spatial_index = spatial_index; | 
|  | generic.temporal_index = temporal_index; | 
|  |  | 
|  | // Generate decode target indications. | 
|  | RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams); | 
|  | generic.decode_target_indications.resize(kMaxTemporalStreams); | 
|  | auto it = std::fill_n(generic.decode_target_indications.begin(), | 
|  | temporal_index, DecodeTargetIndication::kNotPresent); | 
|  | std::fill(it, generic.decode_target_indications.end(), | 
|  | DecodeTargetIndication::kSwitch); | 
|  |  | 
|  | // Frame dependencies. | 
|  | if (vp8_info.useExplicitDependencies) { | 
|  | SetDependenciesVp8New(vp8_info, frame_id, is_keyframe, vp8_header.layerSync, | 
|  | &generic); | 
|  | } else { | 
|  | SetDependenciesVp8Deprecated(vp8_info, frame_id, is_keyframe, spatial_index, | 
|  | temporal_index, vp8_header.layerSync, | 
|  | &generic); | 
|  | } | 
|  |  | 
|  | // Calculate chains. | 
|  | generic.chain_diffs = { | 
|  | (is_keyframe || chain_last_frame_id_[0] < 0) | 
|  | ? 0 | 
|  | : static_cast<int>(frame_id - chain_last_frame_id_[0])}; | 
|  | if (temporal_index == 0) { | 
|  | chain_last_frame_id_[0] = frame_id; | 
|  | } | 
|  | } | 
|  |  | 
|  | void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& /* vp9_info */, | 
|  | int64_t frame_id, | 
|  | RTPVideoHeader& rtp_video_header) { | 
|  | const auto& vp9_header = | 
|  | std::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header); | 
|  | const int num_spatial_layers = kMaxSimulatedSpatialLayers; | 
|  | const int first_active_spatial_id = vp9_header.first_active_layer; | 
|  | const int last_active_spatial_id = vp9_header.num_spatial_layers - 1; | 
|  | const int num_temporal_layers = kMaxTemporalStreams; | 
|  | static_assert(num_spatial_layers <= | 
|  | RtpGenericFrameDescriptor::kMaxSpatialLayers); | 
|  | static_assert(num_temporal_layers <= | 
|  | RtpGenericFrameDescriptor::kMaxTemporalLayers); | 
|  | static_assert(num_spatial_layers <= DependencyDescriptor::kMaxSpatialIds); | 
|  | static_assert(num_temporal_layers <= DependencyDescriptor::kMaxTemporalIds); | 
|  |  | 
|  | int spatial_index = | 
|  | vp9_header.spatial_idx != kNoSpatialIdx ? vp9_header.spatial_idx : 0; | 
|  | int temporal_index = | 
|  | vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0; | 
|  |  | 
|  | if (!(temporal_index < num_temporal_layers && | 
|  | first_active_spatial_id <= spatial_index && | 
|  | spatial_index <= last_active_spatial_id && | 
|  | last_active_spatial_id < num_spatial_layers)) { | 
|  | // Prefer to generate no generic layering than an inconsistent one. | 
|  | RTC_LOG(LS_ERROR) << "Inconsistent layer id sid=" << spatial_index | 
|  | << ",tid=" << temporal_index | 
|  | << " in VP9 header. Active spatial ids: [" | 
|  | << first_active_spatial_id << "," | 
|  | << last_active_spatial_id << "]"; | 
|  | return; | 
|  | } | 
|  |  | 
|  | RTPVideoHeader::GenericDescriptorInfo& result = | 
|  | rtp_video_header.generic.emplace(); | 
|  |  | 
|  | result.frame_id = frame_id; | 
|  | result.spatial_index = spatial_index; | 
|  | result.temporal_index = temporal_index; | 
|  |  | 
|  | result.decode_target_indications.reserve(num_spatial_layers * | 
|  | num_temporal_layers); | 
|  | for (int sid = 0; sid < num_spatial_layers; ++sid) { | 
|  | for (int tid = 0; tid < num_temporal_layers; ++tid) { | 
|  | DecodeTargetIndication dti; | 
|  | if (sid < spatial_index || tid < temporal_index) { | 
|  | dti = DecodeTargetIndication::kNotPresent; | 
|  | } else if (spatial_index != sid && | 
|  | vp9_header.non_ref_for_inter_layer_pred) { | 
|  | dti = DecodeTargetIndication::kNotPresent; | 
|  | } else if (sid == spatial_index && tid == temporal_index) { | 
|  | // Assume that if frame is decodable, all of its own layer is decodable. | 
|  | dti = DecodeTargetIndication::kSwitch; | 
|  | } else if (sid == spatial_index && vp9_header.temporal_up_switch) { | 
|  | dti = DecodeTargetIndication::kSwitch; | 
|  | } else if (!vp9_header.inter_pic_predicted) { | 
|  | // Key frame or spatial upswitch | 
|  | dti = DecodeTargetIndication::kSwitch; | 
|  | } else { | 
|  | // Make no other assumptions. That should be safe, though suboptimal. | 
|  | // To provide more accurate dti, encoder wrapper should fill in | 
|  | // CodecSpecificInfo::generic_frame_info | 
|  | dti = DecodeTargetIndication::kRequired; | 
|  | } | 
|  | result.decode_target_indications.push_back(dti); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Calculate frame dependencies. | 
|  | static constexpr int kPictureDiffLimit = 128; | 
|  | if (last_vp9_frame_id_.empty()) { | 
|  | // Create the array only if it is ever used. | 
|  | last_vp9_frame_id_.resize(kPictureDiffLimit); | 
|  | } | 
|  |  | 
|  | if (vp9_header.flexible_mode) { | 
|  | if (vp9_header.inter_layer_predicted && spatial_index > 0) { | 
|  | result.dependencies.push_back( | 
|  | last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit] | 
|  | [spatial_index - 1]); | 
|  | } | 
|  | if (vp9_header.inter_pic_predicted) { | 
|  | for (size_t i = 0; i < vp9_header.num_ref_pics; ++i) { | 
|  | // picture_id is 15 bit number that wraps around. Though undeflow may | 
|  | // produce picture that exceeds 2^15, it is ok because in this | 
|  | // code block only last 7 bits of the picture_id are used. | 
|  | uint16_t depend_on = vp9_header.picture_id - vp9_header.pid_diff[i]; | 
|  | result.dependencies.push_back( | 
|  | last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]); | 
|  | } | 
|  | } | 
|  | last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit] | 
|  | [spatial_index] = frame_id; | 
|  | } else { | 
|  | // Implementing general conversion logic for non-flexible mode requires some | 
|  | // work and we will almost certainly never need it, so for now support only | 
|  | // non-layerd streams. | 
|  | if (spatial_index > 0 || temporal_index > 0) { | 
|  | // Prefer to generate no generic layering than an inconsistent one. | 
|  | rtp_video_header.generic.reset(); | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (vp9_header.inter_pic_predicted) { | 
|  | // Since we only support non-scalable streams we only need to save the | 
|  | // last frame id. | 
|  | result.dependencies.push_back(last_vp9_frame_id_[0][0]); | 
|  | } | 
|  | last_vp9_frame_id_[0][0] = frame_id; | 
|  | } | 
|  |  | 
|  | result.active_decode_targets = | 
|  | ((uint32_t{1} << num_temporal_layers * (last_active_spatial_id + 1)) - | 
|  | 1) ^ | 
|  | ((uint32_t{1} << num_temporal_layers * first_active_spatial_id) - 1); | 
|  |  | 
|  | // Calculate chains, asuming chain includes all frames with temporal_id = 0 | 
|  | if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) { | 
|  | // Assume frames without dependencies also reset chains. | 
|  | for (int sid = spatial_index; sid <= last_active_spatial_id; ++sid) { | 
|  | chain_last_frame_id_[sid] = -1; | 
|  | } | 
|  | } | 
|  | result.chain_diffs.resize(num_spatial_layers, 0); | 
|  | for (int sid = first_active_spatial_id; sid <= last_active_spatial_id; | 
|  | ++sid) { | 
|  | if (chain_last_frame_id_[sid] == -1) { | 
|  | result.chain_diffs[sid] = 0; | 
|  | continue; | 
|  | } | 
|  | int64_t chain_diff = frame_id - chain_last_frame_id_[sid]; | 
|  | if (chain_diff >= 256) { | 
|  | RTC_LOG(LS_ERROR) | 
|  | << "Too many frames since last VP9 T0 frame for spatial layer #" | 
|  | << sid << " at frame#" << frame_id; | 
|  | chain_last_frame_id_[sid] = -1; | 
|  | chain_diff = 0; | 
|  | } | 
|  | result.chain_diffs[sid] = chain_diff; | 
|  | } | 
|  |  | 
|  | if (temporal_index == 0) { | 
|  | chain_last_frame_id_[spatial_index] = frame_id; | 
|  | if (!vp9_header.non_ref_for_inter_layer_pred) { | 
|  | for (int sid = spatial_index + 1; sid <= last_active_spatial_id; ++sid) { | 
|  | chain_last_frame_id_[sid] = frame_id; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void RtpPayloadParams::SetDependenciesVp8Deprecated( | 
|  | const CodecSpecificInfoVP8& vp8_info, | 
|  | int64_t frame_id, | 
|  | bool is_keyframe, | 
|  | int spatial_index, | 
|  | int temporal_index, | 
|  | bool layer_sync, | 
|  | RTPVideoHeader::GenericDescriptorInfo* generic) { | 
|  | RTC_DCHECK(!vp8_info.useExplicitDependencies); | 
|  | RTC_DCHECK(!new_version_used_.has_value() || !new_version_used_.value()); | 
|  | new_version_used_ = false; | 
|  |  | 
|  | if (is_keyframe) { | 
|  | RTC_DCHECK_EQ(temporal_index, 0); | 
|  | last_frame_id_[spatial_index].fill(-1); | 
|  | last_frame_id_[spatial_index][temporal_index] = frame_id; | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (layer_sync) { | 
|  | int64_t tl0_frame_id = last_frame_id_[spatial_index][0]; | 
|  |  | 
|  | for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) { | 
|  | if (last_frame_id_[spatial_index][i] < tl0_frame_id) { | 
|  | last_frame_id_[spatial_index][i] = -1; | 
|  | } | 
|  | } | 
|  |  | 
|  | RTC_DCHECK_GE(tl0_frame_id, 0); | 
|  | RTC_DCHECK_LT(tl0_frame_id, frame_id); | 
|  | generic->dependencies.push_back(tl0_frame_id); | 
|  | } else { | 
|  | for (int i = 0; i <= temporal_index; ++i) { | 
|  | int64_t last_frame_id = last_frame_id_[spatial_index][i]; | 
|  |  | 
|  | if (last_frame_id != -1) { | 
|  | RTC_DCHECK_LT(last_frame_id, frame_id); | 
|  | generic->dependencies.push_back(last_frame_id); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | last_frame_id_[spatial_index][temporal_index] = frame_id; | 
|  | } | 
|  |  | 
|  | void RtpPayloadParams::SetDependenciesVp8New( | 
|  | const CodecSpecificInfoVP8& vp8_info, | 
|  | int64_t frame_id, | 
|  | bool is_keyframe, | 
|  | bool /* layer_sync */, | 
|  | RTPVideoHeader::GenericDescriptorInfo* generic) { | 
|  | RTC_DCHECK(vp8_info.useExplicitDependencies); | 
|  | RTC_DCHECK(!new_version_used_.has_value() || new_version_used_.value()); | 
|  | new_version_used_ = true; | 
|  |  | 
|  | if (is_keyframe) { | 
|  | RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u); | 
|  | buffer_id_to_frame_id_.fill(frame_id); | 
|  | return; | 
|  | } | 
|  |  | 
|  | constexpr size_t kBuffersCountVp8 = CodecSpecificInfoVP8::kBuffersCount; | 
|  |  | 
|  | RTC_DCHECK_GT(vp8_info.referencedBuffersCount, 0u); | 
|  | RTC_DCHECK_LE(vp8_info.referencedBuffersCount, | 
|  | std::size(vp8_info.referencedBuffers)); | 
|  |  | 
|  | for (size_t i = 0; i < vp8_info.referencedBuffersCount; ++i) { | 
|  | const size_t referenced_buffer = vp8_info.referencedBuffers[i]; | 
|  | RTC_DCHECK_LT(referenced_buffer, kBuffersCountVp8); | 
|  | RTC_DCHECK_LT(referenced_buffer, buffer_id_to_frame_id_.size()); | 
|  |  | 
|  | const int64_t dependency_frame_id = | 
|  | buffer_id_to_frame_id_[referenced_buffer]; | 
|  | RTC_DCHECK_GE(dependency_frame_id, 0); | 
|  | RTC_DCHECK_LT(dependency_frame_id, frame_id); | 
|  |  | 
|  | const bool is_new_dependency = | 
|  | std::find(generic->dependencies.begin(), generic->dependencies.end(), | 
|  | dependency_frame_id) == generic->dependencies.end(); | 
|  | if (is_new_dependency) { | 
|  | generic->dependencies.push_back(dependency_frame_id); | 
|  | } | 
|  | } | 
|  |  | 
|  | RTC_DCHECK_LE(vp8_info.updatedBuffersCount, kBuffersCountVp8); | 
|  | for (size_t i = 0; i < vp8_info.updatedBuffersCount; ++i) { | 
|  | const size_t updated_id = vp8_info.updatedBuffers[i]; | 
|  | buffer_id_to_frame_id_[updated_id] = frame_id; | 
|  | } | 
|  |  | 
|  | RTC_DCHECK_LE(buffer_id_to_frame_id_.size(), kBuffersCountVp8); | 
|  | } | 
|  |  | 
|  | }  // namespace webrtc |