call/rtp_payload_params.cc - src - Git at Google

 /*
  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include "call/rtp_payload_params.h"

 #include <stddef.h>

 #include <algorithm>
 #include <cstdint>
 #include <optional>

 #include "absl/container/inlined_vector.h"
 #include "absl/strings/match.h"
 #include "absl/types/variant.h"
 #include "api/field_trials_view.h"
 #include "api/transport/rtp/dependency_descriptor.h"
 #include "api/video/encoded_image.h"
 #include "api/video/render_resolution.h"
 #include "api/video/video_codec_constants.h"
 #include "api/video/video_codec_type.h"
 #include "api/video/video_frame_type.h"
 #include "api/video/video_timing.h"
 #include "call/rtp_config.h"
 #include "common_video/generic_frame_descriptor/generic_frame_info.h"
 #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
 #include "modules/rtp_rtcp/source/rtp_video_header.h"
 #include "modules/video_coding/codecs/h264/include/h264_globals.h"
 #include "modules/video_coding/codecs/interface/common_constants.h"
 #include "modules/video_coding/codecs/vp8/include/vp8_globals.h"
 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
 #include "modules/video_coding/frame_dependencies_calculator.h"
 #include "modules/video_coding/include/video_codec_interface.h"
 #include "rtc_base/arraysize.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
 #include "rtc_base/random.h"
 #include "rtc_base/time_utils.h"

 namespace webrtc {
 namespace {

 constexpr int kMaxSimulatedSpatialLayers = 3;

 void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info,
                                    std::optional<int> spatial_index,
                                    RTPVideoHeader* rtp) {
   rtp->codec = info.codecType;
   rtp->is_last_frame_in_picture = info.end_of_picture;
   rtp->frame_instrumentation_data = info.frame_instrumentation_data;
   switch (info.codecType) {
     case kVideoCodecVP8: {
       auto& vp8_header = rtp->video_type_header.emplace<RTPVideoHeaderVP8>();
       vp8_header.InitRTPVideoHeaderVP8();
       vp8_header.nonReference = info.codecSpecific.VP8.nonReference;
       vp8_header.temporalIdx = info.codecSpecific.VP8.temporalIdx;
       vp8_header.layerSync = info.codecSpecific.VP8.layerSync;
       vp8_header.keyIdx = info.codecSpecific.VP8.keyIdx;
       return;
     }
     case kVideoCodecVP9: {
       auto& vp9_header = rtp->video_type_header.emplace<RTPVideoHeaderVP9>();
       vp9_header.InitRTPVideoHeaderVP9();
       vp9_header.inter_pic_predicted =
           info.codecSpecific.VP9.inter_pic_predicted;
       vp9_header.flexible_mode = info.codecSpecific.VP9.flexible_mode;
       vp9_header.ss_data_available = info.codecSpecific.VP9.ss_data_available;
       vp9_header.non_ref_for_inter_layer_pred =
           info.codecSpecific.VP9.non_ref_for_inter_layer_pred;
       vp9_header.temporal_idx = info.codecSpecific.VP9.temporal_idx;
       vp9_header.temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch;
       vp9_header.inter_layer_predicted =
           info.codecSpecific.VP9.inter_layer_predicted;
       vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx;
       vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers;
       vp9_header.first_active_layer = info.codecSpecific.VP9.first_active_layer;
       if (vp9_header.num_spatial_layers > 1) {
         vp9_header.spatial_idx = spatial_index.value_or(kNoSpatialIdx);
       } else {
         vp9_header.spatial_idx = kNoSpatialIdx;
       }
       if (info.codecSpecific.VP9.ss_data_available) {
         vp9_header.spatial_layer_resolution_present =
             info.codecSpecific.VP9.spatial_layer_resolution_present;
         if (info.codecSpecific.VP9.spatial_layer_resolution_present) {
           for (size_t i = 0; i < info.codecSpecific.VP9.num_spatial_layers;
                ++i) {
             vp9_header.width[i] = info.codecSpecific.VP9.width[i];
             vp9_header.height[i] = info.codecSpecific.VP9.height[i];
           }
         }
         vp9_header.gof.CopyGofInfoVP9(info.codecSpecific.VP9.gof);
       }

       vp9_header.num_ref_pics = info.codecSpecific.VP9.num_ref_pics;
       for (int i = 0; i < info.codecSpecific.VP9.num_ref_pics; ++i) {
         vp9_header.pid_diff[i] = info.codecSpecific.VP9.p_diff[i];
       }
       vp9_header.end_of_picture = info.end_of_picture;
       return;
     }
     case kVideoCodecH264: {
       auto& h264_header = rtp->video_type_header.emplace<RTPVideoHeaderH264>();
       h264_header.packetization_mode =
           info.codecSpecific.H264.packetization_mode;
       return;
     }
     // These codec types do not have codec-specifics.
     case kVideoCodecGeneric:
     case kVideoCodecH265:
     case kVideoCodecAV1:
       return;
   }
 }

 void SetVideoTiming(const EncodedImage& image, VideoSendTiming* timing) {
   if (image.timing_.flags == VideoSendTiming::TimingFrameFlags::kInvalid ||
       image.timing_.flags == VideoSendTiming::TimingFrameFlags::kNotTriggered) {
     timing->flags = VideoSendTiming::TimingFrameFlags::kInvalid;
     return;
   }

   timing->encode_start_delta_ms = VideoSendTiming::GetDeltaCappedMs(
       image.capture_time_ms_, image.timing_.encode_start_ms);
   timing->encode_finish_delta_ms = VideoSendTiming::GetDeltaCappedMs(
       image.capture_time_ms_, image.timing_.encode_finish_ms);
   timing->packetization_finish_delta_ms = 0;
   timing->pacer_exit_delta_ms = 0;
   timing->network_timestamp_delta_ms = 0;
   timing->network2_timestamp_delta_ms = 0;
   timing->flags = image.timing_.flags;
 }

 // Returns structure that aligns with simulated generic info. The templates
 // allow to produce valid dependency descriptor for any stream where
 // `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by
 // https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see
 // template_fdiffs()). The set of the templates is not tuned for any paricular
 // structure thus dependency descriptor would use more bytes on the wire than
 // with tuned templates.
 FrameDependencyStructure MinimalisticStructure(int num_spatial_layers,
                                                int num_temporal_layers) {
   RTC_DCHECK_LE(num_spatial_layers, DependencyDescriptor::kMaxSpatialIds);
   RTC_DCHECK_LE(num_temporal_layers, DependencyDescriptor::kMaxTemporalIds);
   RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32);
   FrameDependencyStructure structure;
   structure.num_decode_targets = num_spatial_layers * num_temporal_layers;
   structure.num_chains = num_spatial_layers;
   structure.templates.reserve(num_spatial_layers * num_temporal_layers);
   for (int sid = 0; sid < num_spatial_layers; ++sid) {
     for (int tid = 0; tid < num_temporal_layers; ++tid) {
       FrameDependencyTemplate a_template;
       a_template.spatial_id = sid;
       a_template.temporal_id = tid;
       for (int s = 0; s < num_spatial_layers; ++s) {
         for (int t = 0; t < num_temporal_layers; ++t) {
           // Prefer kSwitch indication for frames that is part of the decode
           // target because dependency descriptor information generated in this
           // class use kSwitch indications more often that kRequired, increasing
           // the chance of a good (or complete) template match.
           a_template.decode_target_indications.push_back(
               sid <= s && tid <= t ? DecodeTargetIndication::kSwitch
                                    : DecodeTargetIndication::kNotPresent);
         }
       }
       a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers *
                                                       num_temporal_layers
                                                 : num_spatial_layers);
       a_template.chain_diffs.assign(structure.num_chains, 1);
       structure.templates.push_back(a_template);

       structure.decode_target_protected_by_chain.push_back(sid);
     }
   }
   return structure;
 }
 }  // namespace

 RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc,
                                    const RtpPayloadState* state,
                                    const FieldTrialsView& trials)
     : ssrc_(ssrc),
       generic_picture_id_experiment_(
           absl::StartsWith(trials.Lookup("WebRTC-GenericPictureId"),
                            "Enabled")),
       simulate_generic_structure_(absl::StartsWith(
           trials.Lookup("WebRTC-GenericCodecDependencyDescriptor"),
           "Enabled")) {
   for (auto& spatial_layer : last_frame_id_)
     spatial_layer.fill(-1);

   chain_last_frame_id_.fill(-1);
   buffer_id_to_frame_id_.fill(-1);

   Random random(rtc::TimeMicros());
   state_.picture_id =
       state ? state->picture_id : (random.Rand<int16_t>() & 0x7FFF);
   state_.tl0_pic_idx = state ? state->tl0_pic_idx : (random.Rand<uint8_t>());
   state_.frame_id = state ? state->frame_id : random.Rand<uint16_t>();
 }

 RtpPayloadParams::RtpPayloadParams(const RtpPayloadParams& other) = default;

 RtpPayloadParams::~RtpPayloadParams() {}

 RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader(
     const EncodedImage& image,
     const CodecSpecificInfo* codec_specific_info,
     std::optional<int64_t> shared_frame_id) {
   int64_t frame_id;
   if (shared_frame_id) {
     frame_id = *shared_frame_id;
   } else {
     frame_id = state_.frame_id++;
   }

   RTPVideoHeader rtp_video_header;
   if (codec_specific_info) {
     PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(),
                                   &rtp_video_header);
   }
   rtp_video_header.simulcastIdx = image.SimulcastIndex().value_or(0);
   rtp_video_header.frame_type = image._frameType;
   rtp_video_header.rotation = image.rotation_;
   rtp_video_header.content_type = image.content_type_;
   rtp_video_header.playout_delay = image.PlayoutDelay();
   rtp_video_header.width = image._encodedWidth;
   rtp_video_header.height = image._encodedHeight;
   rtp_video_header.color_space = image.ColorSpace()
                                      ? std::make_optional(*image.ColorSpace())
                                      : std::nullopt;
   rtp_video_header.video_frame_tracking_id = image.VideoFrameTrackingId();
   SetVideoTiming(image, &rtp_video_header.video_timing);

   const bool is_keyframe = image._frameType == VideoFrameType::kVideoFrameKey;
   const bool first_frame_in_picture =
       (codec_specific_info && codec_specific_info->codecType == kVideoCodecVP9)
           ? codec_specific_info->codecSpecific.VP9.first_frame_in_picture
           : true;

   SetCodecSpecific(&rtp_video_header, first_frame_in_picture);

   SetGeneric(codec_specific_info, frame_id, is_keyframe, &rtp_video_header);

   return rtp_video_header;
 }

 uint32_t RtpPayloadParams::ssrc() const {
   return ssrc_;
 }

 RtpPayloadState RtpPayloadParams::state() const {
   return state_;
 }

 void RtpPayloadParams::SetCodecSpecific(RTPVideoHeader* rtp_video_header,
                                         bool first_frame_in_picture) {
   // Always set picture id. Set tl0_pic_idx iff temporal index is set.
   if (first_frame_in_picture) {
     state_.picture_id = (static_cast<uint16_t>(state_.picture_id) + 1) & 0x7FFF;
   }
   if (rtp_video_header->codec == kVideoCodecVP8) {
     auto& vp8_header =
         absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
     vp8_header.pictureId = state_.picture_id;

     if (vp8_header.temporalIdx != kNoTemporalIdx) {
       if (vp8_header.temporalIdx == 0) {
         ++state_.tl0_pic_idx;
       }
       vp8_header.tl0PicIdx = state_.tl0_pic_idx;
     }
   }
   if (rtp_video_header->codec == kVideoCodecVP9) {
     auto& vp9_header =
         absl::get<RTPVideoHeaderVP9>(rtp_video_header->video_type_header);
     vp9_header.picture_id = state_.picture_id;

     // Note that in the case that we have no temporal layers but we do have
     // spatial layers, packets will carry layering info with a temporal_idx of
     // zero, and we then have to set and increment tl0_pic_idx.
     if (vp9_header.temporal_idx != kNoTemporalIdx ||
         vp9_header.spatial_idx != kNoSpatialIdx) {
       if (first_frame_in_picture &&
           (vp9_header.temporal_idx == 0 ||
            vp9_header.temporal_idx == kNoTemporalIdx)) {
         ++state_.tl0_pic_idx;
       }
       vp9_header.tl0_pic_idx = state_.tl0_pic_idx;
     }
   }
   if (generic_picture_id_experiment_ &&
       rtp_video_header->codec == kVideoCodecGeneric) {
     rtp_video_header->video_type_header.emplace<RTPVideoHeaderLegacyGeneric>()
         .picture_id = state_.picture_id;
   }
 }

 RTPVideoHeader::GenericDescriptorInfo
 RtpPayloadParams::GenericDescriptorFromFrameInfo(
     const GenericFrameInfo& frame_info,
     int64_t frame_id) {
   RTPVideoHeader::GenericDescriptorInfo generic;
   generic.frame_id = frame_id;
   generic.dependencies = dependencies_calculator_.FromBuffersUsage(
       frame_id, frame_info.encoder_buffers);
   generic.chain_diffs =
       chains_calculator_.From(frame_id, frame_info.part_of_chain);
   generic.spatial_index = frame_info.spatial_id;
   generic.temporal_index = frame_info.temporal_id;
   generic.decode_target_indications = frame_info.decode_target_indications;
   generic.active_decode_targets = frame_info.active_decode_targets;
   return generic;
 }

 void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info,
                                   int64_t frame_id,
                                   bool is_keyframe,
                                   RTPVideoHeader* rtp_video_header) {
   if (codec_specific_info && codec_specific_info->generic_frame_info &&
       !codec_specific_info->generic_frame_info->encoder_buffers.empty()) {
     if (is_keyframe) {
       // Key frame resets all chains it is in.
       chains_calculator_.Reset(
           codec_specific_info->generic_frame_info->part_of_chain);
     }
     rtp_video_header->generic = GenericDescriptorFromFrameInfo(
         *codec_specific_info->generic_frame_info, frame_id);
     return;
   }

   switch (rtp_video_header->codec) {
     case VideoCodecType::kVideoCodecGeneric:
       GenericToGeneric(frame_id, is_keyframe, rtp_video_header);
       return;
     case VideoCodecType::kVideoCodecVP8:
       if (codec_specific_info) {
         Vp8ToGeneric(codec_specific_info->codecSpecific.VP8, frame_id,
                      is_keyframe, rtp_video_header);
       }
       return;
     case VideoCodecType::kVideoCodecVP9:
       if (codec_specific_info != nullptr) {
         Vp9ToGeneric(codec_specific_info->codecSpecific.VP9, frame_id,
                      *rtp_video_header);
       }
       return;
     case VideoCodecType::kVideoCodecAV1:
       // Codec-specifics is not supported for AV1. We convert from the
       // generic_frame_info.
       return;
     case VideoCodecType::kVideoCodecH264:
       if (codec_specific_info) {
         H264ToGeneric(codec_specific_info->codecSpecific.H264, frame_id,
                       is_keyframe, rtp_video_header);
       }
       return;
     case VideoCodecType::kVideoCodecH265:
       // Codec-specifics is not supported for H.265. We convert from the
       // generic_frame_info.
       return;
   }
   RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
 }

 std::optional<FrameDependencyStructure> RtpPayloadParams::GenericStructure(
     const CodecSpecificInfo* codec_specific_info) {
   if (codec_specific_info == nullptr) {
     return std::nullopt;
   }
   // This helper shouldn't be used when template structure is specified
   // explicetly.
   RTC_DCHECK(!codec_specific_info->template_structure.has_value());
   switch (codec_specific_info->codecType) {
     case VideoCodecType::kVideoCodecGeneric:
       if (simulate_generic_structure_) {
         return MinimalisticStructure(/*num_spatial_layers=*/1,
                                      /*num_temporal_layer=*/1);
       }
       return std::nullopt;
     case VideoCodecType::kVideoCodecVP8:
       return MinimalisticStructure(/*num_spatial_layers=*/1,
                                    /*num_temporal_layer=*/kMaxTemporalStreams);
     case VideoCodecType::kVideoCodecVP9: {
       std::optional<FrameDependencyStructure> structure = MinimalisticStructure(
           /*num_spatial_layers=*/kMaxSimulatedSpatialLayers,
           /*num_temporal_layer=*/kMaxTemporalStreams);
       const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9;
       if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) {
         RenderResolution first_valid;
         RenderResolution last_valid;
         for (size_t i = 0; i < vp9.num_spatial_layers; ++i) {
           RenderResolution r(vp9.width[i], vp9.height[i]);
           if (r.Valid()) {
             if (!first_valid.Valid()) {
               first_valid = r;
             }
             last_valid = r;
           }
           structure->resolutions.push_back(r);
         }
         if (!last_valid.Valid()) {
           // No valid resolution found. Do not send resolutions.
           structure->resolutions.clear();
         } else {
           structure->resolutions.resize(kMaxSimulatedSpatialLayers, last_valid);
           // VP9 encoder wrapper may disable first few spatial layers by
           // setting invalid resolution (0,0). `structure->resolutions`
           // doesn't support invalid resolution, so reset them to something
           // valid.
           for (RenderResolution& r : structure->resolutions) {
             if (!r.Valid()) {
               r = first_valid;
             }
           }
         }
       }
       return structure;
     }
     case VideoCodecType::kVideoCodecH264:
       return MinimalisticStructure(
           /*num_spatial_layers=*/1,
           /*num_temporal_layers=*/kMaxTemporalStreams);
     case VideoCodecType::kVideoCodecAV1:
     case VideoCodecType::kVideoCodecH265:
       return std::nullopt;
   }
   RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
 }

 void RtpPayloadParams::GenericToGeneric(int64_t frame_id,
                                         bool is_keyframe,
                                         RTPVideoHeader* rtp_video_header) {
   RTPVideoHeader::GenericDescriptorInfo& generic =
       rtp_video_header->generic.emplace();

   generic.frame_id = frame_id;
   generic.decode_target_indications.push_back(DecodeTargetIndication::kSwitch);

   if (is_keyframe) {
     generic.chain_diffs.push_back(0);
     last_frame_id_[0].fill(-1);
   } else {
     int64_t last_frame_id = last_frame_id_[0][0];
     RTC_DCHECK_NE(last_frame_id, -1);
     RTC_DCHECK_LT(last_frame_id, frame_id);
     generic.chain_diffs.push_back(frame_id - last_frame_id);
     generic.dependencies.push_back(last_frame_id);
   }

   last_frame_id_[0][0] = frame_id;
 }

 void RtpPayloadParams::H264ToGeneric(const CodecSpecificInfoH264& h264_info,
                                      int64_t frame_id,
                                      bool is_keyframe,
                                      RTPVideoHeader* rtp_video_header) {
   const int temporal_index =
       h264_info.temporal_idx != kNoTemporalIdx ? h264_info.temporal_idx : 0;

   if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers) {
     RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
                            "used with generic frame descriptor.";
     return;
   }

   RTPVideoHeader::GenericDescriptorInfo& generic =
       rtp_video_header->generic.emplace();

   generic.frame_id = frame_id;
   generic.temporal_index = temporal_index;

   // Generate decode target indications.
   RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams);
   generic.decode_target_indications.resize(kMaxTemporalStreams);
   auto it = std::fill_n(generic.decode_target_indications.begin(),
                         temporal_index, DecodeTargetIndication::kNotPresent);
   std::fill(it, generic.decode_target_indications.end(),
             DecodeTargetIndication::kSwitch);

   if (is_keyframe) {
     RTC_DCHECK_EQ(temporal_index, 0);
     last_frame_id_[/*spatial index*/ 0].fill(-1);
     last_frame_id_[/*spatial index*/ 0][temporal_index] = frame_id;
     return;
   }

   if (h264_info.base_layer_sync) {
     int64_t tl0_frame_id = last_frame_id_[/*spatial index*/ 0][0];

     for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
       if (last_frame_id_[/*spatial index*/ 0][i] < tl0_frame_id) {
         last_frame_id_[/*spatial index*/ 0][i] = -1;
       }
     }

     RTC_DCHECK_GE(tl0_frame_id, 0);
     RTC_DCHECK_LT(tl0_frame_id, frame_id);
     generic.dependencies.push_back(tl0_frame_id);
   } else {
     for (int i = 0; i <= temporal_index; ++i) {
       int64_t last_frame_id = last_frame_id_[/*spatial index*/ 0][i];

       if (last_frame_id != -1) {
         RTC_DCHECK_LT(last_frame_id, frame_id);
         generic.dependencies.push_back(last_frame_id);
       }
     }
   }

   last_frame_id_[/*spatial_index*/ 0][temporal_index] = frame_id;
 }

 void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
                                     int64_t frame_id,
                                     bool is_keyframe,
                                     RTPVideoHeader* rtp_video_header) {
   const auto& vp8_header =
       absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
   const int spatial_index = 0;
   const int temporal_index =
       vp8_header.temporalIdx != kNoTemporalIdx ? vp8_header.temporalIdx : 0;

   if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers ||
       spatial_index >= RtpGenericFrameDescriptor::kMaxSpatialLayers) {
     RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
                            "used with generic frame descriptor.";
     return;
   }

   RTPVideoHeader::GenericDescriptorInfo& generic =
       rtp_video_header->generic.emplace();

   generic.frame_id = frame_id;
   generic.spatial_index = spatial_index;
   generic.temporal_index = temporal_index;

   // Generate decode target indications.
   RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams);
   generic.decode_target_indications.resize(kMaxTemporalStreams);
   auto it = std::fill_n(generic.decode_target_indications.begin(),
                         temporal_index, DecodeTargetIndication::kNotPresent);
   std::fill(it, generic.decode_target_indications.end(),
             DecodeTargetIndication::kSwitch);

   // Frame dependencies.
   if (vp8_info.useExplicitDependencies) {
     SetDependenciesVp8New(vp8_info, frame_id, is_keyframe, vp8_header.layerSync,
                           &generic);
   } else {
     SetDependenciesVp8Deprecated(vp8_info, frame_id, is_keyframe, spatial_index,
                                  temporal_index, vp8_header.layerSync,
                                  &generic);
   }

   // Calculate chains.
   generic.chain_diffs = {
       (is_keyframe || chain_last_frame_id_[0] < 0)
           ? 0
           : static_cast<int>(frame_id - chain_last_frame_id_[0])};
   if (temporal_index == 0) {
     chain_last_frame_id_[0] = frame_id;
   }
 }

 void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& /* vp9_info */,
                                     int64_t frame_id,
                                     RTPVideoHeader& rtp_video_header) {
   const auto& vp9_header =
       absl::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header);
   const int num_spatial_layers = kMaxSimulatedSpatialLayers;
   const int first_active_spatial_id = vp9_header.first_active_layer;
   const int last_active_spatial_id = vp9_header.num_spatial_layers - 1;
   const int num_temporal_layers = kMaxTemporalStreams;
   static_assert(num_spatial_layers <=
                 RtpGenericFrameDescriptor::kMaxSpatialLayers);
   static_assert(num_temporal_layers <=
                 RtpGenericFrameDescriptor::kMaxTemporalLayers);
   static_assert(num_spatial_layers <= DependencyDescriptor::kMaxSpatialIds);
   static_assert(num_temporal_layers <= DependencyDescriptor::kMaxTemporalIds);

   int spatial_index =
       vp9_header.spatial_idx != kNoSpatialIdx ? vp9_header.spatial_idx : 0;
   int temporal_index =
       vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0;

   if (!(temporal_index < num_temporal_layers &&
         first_active_spatial_id <= spatial_index &&
         spatial_index <= last_active_spatial_id &&
         last_active_spatial_id < num_spatial_layers)) {
     // Prefer to generate no generic layering than an inconsistent one.
     RTC_LOG(LS_ERROR) << "Inconsistent layer id sid=" << spatial_index
                       << ",tid=" << temporal_index
                       << " in VP9 header. Active spatial ids: ["
                       << first_active_spatial_id << ","
                       << last_active_spatial_id << "]";
     return;
   }

   RTPVideoHeader::GenericDescriptorInfo& result =
       rtp_video_header.generic.emplace();

   result.frame_id = frame_id;
   result.spatial_index = spatial_index;
   result.temporal_index = temporal_index;

   result.decode_target_indications.reserve(num_spatial_layers *
                                            num_temporal_layers);
   for (int sid = 0; sid < num_spatial_layers; ++sid) {
     for (int tid = 0; tid < num_temporal_layers; ++tid) {
       DecodeTargetIndication dti;
       if (sid < spatial_index || tid < temporal_index) {
         dti = DecodeTargetIndication::kNotPresent;
       } else if (spatial_index != sid &&
                  vp9_header.non_ref_for_inter_layer_pred) {
         dti = DecodeTargetIndication::kNotPresent;
       } else if (sid == spatial_index && tid == temporal_index) {
         // Assume that if frame is decodable, all of its own layer is decodable.
         dti = DecodeTargetIndication::kSwitch;
       } else if (sid == spatial_index && vp9_header.temporal_up_switch) {
         dti = DecodeTargetIndication::kSwitch;
       } else if (!vp9_header.inter_pic_predicted) {
         // Key frame or spatial upswitch
         dti = DecodeTargetIndication::kSwitch;
       } else {
         // Make no other assumptions. That should be safe, though suboptimal.
         // To provide more accurate dti, encoder wrapper should fill in
         // CodecSpecificInfo::generic_frame_info
         dti = DecodeTargetIndication::kRequired;
       }
       result.decode_target_indications.push_back(dti);
     }
   }

   // Calculate frame dependencies.
   static constexpr int kPictureDiffLimit = 128;
   if (last_vp9_frame_id_.empty()) {
     // Create the array only if it is ever used.
     last_vp9_frame_id_.resize(kPictureDiffLimit);
   }

   if (vp9_header.flexible_mode) {
     if (vp9_header.inter_layer_predicted && spatial_index > 0) {
       result.dependencies.push_back(
           last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
                             [spatial_index - 1]);
     }
     if (vp9_header.inter_pic_predicted) {
       for (size_t i = 0; i < vp9_header.num_ref_pics; ++i) {
         // picture_id is 15 bit number that wraps around. Though undeflow may
         // produce picture that exceeds 2^15, it is ok because in this
         // code block only last 7 bits of the picture_id are used.
         uint16_t depend_on = vp9_header.picture_id - vp9_header.pid_diff[i];
         result.dependencies.push_back(
             last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]);
       }
     }
     last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
                       [spatial_index] = frame_id;
   } else {
     // Implementing general conversion logic for non-flexible mode requires some
     // work and we will almost certainly never need it, so for now support only
     // non-layerd streams.
     if (spatial_index > 0 || temporal_index > 0) {
       // Prefer to generate no generic layering than an inconsistent one.
       rtp_video_header.generic.reset();
       return;
     }

     if (vp9_header.inter_pic_predicted) {
       // Since we only support non-scalable streams we only need to save the
       // last frame id.
       result.dependencies.push_back(last_vp9_frame_id_[0][0]);
     }
     last_vp9_frame_id_[0][0] = frame_id;
   }

   result.active_decode_targets =
       ((uint32_t{1} << num_temporal_layers * (last_active_spatial_id + 1)) -
        1) ^
       ((uint32_t{1} << num_temporal_layers * first_active_spatial_id) - 1);

   // Calculate chains, asuming chain includes all frames with temporal_id = 0
   if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) {
     // Assume frames without dependencies also reset chains.
     for (int sid = spatial_index; sid <= last_active_spatial_id; ++sid) {
       chain_last_frame_id_[sid] = -1;
     }
   }
   result.chain_diffs.resize(num_spatial_layers, 0);
   for (int sid = first_active_spatial_id; sid <= last_active_spatial_id;
        ++sid) {
     if (chain_last_frame_id_[sid] == -1) {
       result.chain_diffs[sid] = 0;
       continue;
     }
     int64_t chain_diff = frame_id - chain_last_frame_id_[sid];
     if (chain_diff >= 256) {
       RTC_LOG(LS_ERROR)
           << "Too many frames since last VP9 T0 frame for spatial layer #"
           << sid << " at frame#" << frame_id;
       chain_last_frame_id_[sid] = -1;
       chain_diff = 0;
     }
     result.chain_diffs[sid] = chain_diff;
   }

   if (temporal_index == 0) {
     chain_last_frame_id_[spatial_index] = frame_id;
     if (!vp9_header.non_ref_for_inter_layer_pred) {
       for (int sid = spatial_index + 1; sid <= last_active_spatial_id; ++sid) {
         chain_last_frame_id_[sid] = frame_id;
       }
     }
   }
 }

 void RtpPayloadParams::SetDependenciesVp8Deprecated(
     const CodecSpecificInfoVP8& vp8_info,
     int64_t frame_id,
     bool is_keyframe,
     int spatial_index,
     int temporal_index,
     bool layer_sync,
     RTPVideoHeader::GenericDescriptorInfo* generic) {
   RTC_DCHECK(!vp8_info.useExplicitDependencies);
   RTC_DCHECK(!new_version_used_.has_value() || !new_version_used_.value());
   new_version_used_ = false;

   if (is_keyframe) {
     RTC_DCHECK_EQ(temporal_index, 0);
     last_frame_id_[spatial_index].fill(-1);
     last_frame_id_[spatial_index][temporal_index] = frame_id;
     return;
   }

   if (layer_sync) {
     int64_t tl0_frame_id = last_frame_id_[spatial_index][0];

     for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
       if (last_frame_id_[spatial_index][i] < tl0_frame_id) {
         last_frame_id_[spatial_index][i] = -1;
       }
     }

     RTC_DCHECK_GE(tl0_frame_id, 0);
     RTC_DCHECK_LT(tl0_frame_id, frame_id);
     generic->dependencies.push_back(tl0_frame_id);
   } else {
     for (int i = 0; i <= temporal_index; ++i) {
       int64_t last_frame_id = last_frame_id_[spatial_index][i];

       if (last_frame_id != -1) {
         RTC_DCHECK_LT(last_frame_id, frame_id);
         generic->dependencies.push_back(last_frame_id);
       }
     }
   }

   last_frame_id_[spatial_index][temporal_index] = frame_id;
 }

 void RtpPayloadParams::SetDependenciesVp8New(
     const CodecSpecificInfoVP8& vp8_info,
     int64_t frame_id,
     bool is_keyframe,
     bool /* layer_sync */,
     RTPVideoHeader::GenericDescriptorInfo* generic) {
   RTC_DCHECK(vp8_info.useExplicitDependencies);
   RTC_DCHECK(!new_version_used_.has_value() || new_version_used_.value());
   new_version_used_ = true;

   if (is_keyframe) {
     RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u);
     buffer_id_to_frame_id_.fill(frame_id);
     return;
   }

   constexpr size_t kBuffersCountVp8 = CodecSpecificInfoVP8::kBuffersCount;

   RTC_DCHECK_GT(vp8_info.referencedBuffersCount, 0u);
   RTC_DCHECK_LE(vp8_info.referencedBuffersCount,
                 arraysize(vp8_info.referencedBuffers));

   for (size_t i = 0; i < vp8_info.referencedBuffersCount; ++i) {
     const size_t referenced_buffer = vp8_info.referencedBuffers[i];
     RTC_DCHECK_LT(referenced_buffer, kBuffersCountVp8);
     RTC_DCHECK_LT(referenced_buffer, buffer_id_to_frame_id_.size());

     const int64_t dependency_frame_id =
         buffer_id_to_frame_id_[referenced_buffer];
     RTC_DCHECK_GE(dependency_frame_id, 0);
     RTC_DCHECK_LT(dependency_frame_id, frame_id);

     const bool is_new_dependency =
         std::find(generic->dependencies.begin(), generic->dependencies.end(),
                   dependency_frame_id) == generic->dependencies.end();
     if (is_new_dependency) {
       generic->dependencies.push_back(dependency_frame_id);
     }
   }

   RTC_DCHECK_LE(vp8_info.updatedBuffersCount, kBuffersCountVp8);
   for (size_t i = 0; i < vp8_info.updatedBuffersCount; ++i) {
     const size_t updated_id = vp8_info.updatedBuffers[i];
     buffer_id_to_frame_id_[updated_id] = frame_id;
   }

   RTC_DCHECK_LE(buffer_id_to_frame_id_.size(), kBuffersCountVp8);
 }

 }  // namespace webrtc