| /* |
| * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "modules/video_coding/rtp_vp9_ref_finder.h" |
| |
| #include <algorithm> |
| #include <utility> |
| |
| #include "rtc_base/logging.h" |
| |
| namespace webrtc { |
| RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame( |
| std::unique_ptr<RtpFrameObject> frame) { |
| const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>( |
| frame->GetRtpVideoHeader().video_type_header); |
| |
| if (codec_header.temporal_idx != kNoTemporalIdx) |
| frame->SetTemporalIndex(codec_header.temporal_idx); |
| frame->SetSpatialIndex(codec_header.spatial_idx); |
| frame->SetId(codec_header.picture_id & (kFrameIdLength - 1)); |
| |
| FrameDecision decision; |
| if (codec_header.temporal_idx >= kMaxTemporalLayers || |
| codec_header.spatial_idx >= kMaxSpatialLayers) { |
| decision = kDrop; |
| } else if (codec_header.flexible_mode) { |
| decision = ManageFrameFlexible(frame.get(), codec_header); |
| } else { |
| if (codec_header.tl0_pic_idx == kNoTl0PicIdx) { |
| RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in " |
| "non-flexible mode."; |
| decision = kDrop; |
| } else { |
| int64_t unwrapped_tl0 = |
| tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF); |
| decision = ManageFrameGof(frame.get(), codec_header, unwrapped_tl0); |
| |
| if (decision == kStash) { |
| if (stashed_frames_.size() > kMaxStashedFrames) { |
| stashed_frames_.pop_back(); |
| } |
| |
| stashed_frames_.push_front( |
| {.unwrapped_tl0 = unwrapped_tl0, .frame = std::move(frame)}); |
| } |
| } |
| } |
| |
| RtpFrameReferenceFinder::ReturnVector res; |
| switch (decision) { |
| case kStash: |
| return res; |
| case kHandOff: |
| res.push_back(std::move(frame)); |
| RetryStashedFrames(res); |
| return res; |
| case kDrop: |
| return res; |
| } |
| |
| return res; |
| } |
| |
| RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameFlexible( |
| RtpFrameObject* frame, |
| const RTPVideoHeaderVP9& codec_header) { |
| if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) { |
| return kDrop; |
| } |
| |
| frame->num_references = codec_header.num_ref_pics; |
| for (size_t i = 0; i < frame->num_references; ++i) { |
| frame->references[i] = |
| Subtract<kFrameIdLength>(frame->Id(), codec_header.pid_diff[i]); |
| } |
| |
| FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted); |
| return kHandOff; |
| } |
| |
| RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameGof( |
| RtpFrameObject* frame, |
| const RTPVideoHeaderVP9& codec_header, |
| int64_t unwrapped_tl0) { |
| GofInfo* info; |
| if (codec_header.ss_data_available) { |
| if (codec_header.temporal_idx != 0) { |
| RTC_LOG(LS_WARNING) << "Received scalability structure on a non base " |
| "layer frame. Scalability structure ignored."; |
| } else { |
| if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) { |
| return kDrop; |
| } |
| |
| for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) { |
| if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) { |
| return kDrop; |
| } |
| } |
| |
| GofInfoVP9 gof = codec_header.gof; |
| if (gof.num_frames_in_gof == 0) { |
| RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume " |
| "that stream has only one temporal layer."; |
| gof.SetGofInfoVP9(kTemporalStructureMode1); |
| } |
| |
| current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1); |
| scalability_structures_[current_ss_idx_] = gof; |
| scalability_structures_[current_ss_idx_].pid_start = frame->Id(); |
| gof_info_.emplace( |
| unwrapped_tl0, |
| GofInfo(&scalability_structures_[current_ss_idx_], frame->Id())); |
| } |
| |
| const auto gof_info_it = gof_info_.find(unwrapped_tl0); |
| if (gof_info_it == gof_info_.end()) |
| return kStash; |
| |
| info = &gof_info_it->second; |
| |
| if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { |
| frame->num_references = 0; |
| FrameReceivedVp9(frame->Id(), info); |
| FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted); |
| return kHandOff; |
| } |
| } else { |
| if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { |
| RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure"; |
| return kDrop; |
| } |
| |
| // tl0_idx is incremented on temporal_idx=0 frames of the lowest spatial |
| // layer (which spatial_idx is not necessarily zero). Upper spatial layer |
| // frames with inter-layer prediction use GOF info of their base spatial |
| // layer frames. |
| const bool use_prev_gof = |
| codec_header.temporal_idx == 0 && !codec_header.inter_layer_predicted; |
| auto gof_info_it = |
| gof_info_.find(use_prev_gof ? unwrapped_tl0 - 1 : unwrapped_tl0); |
| |
| // Gof info for this frame is not available yet, stash this frame. |
| if (gof_info_it == gof_info_.end()) |
| return kStash; |
| |
| if (codec_header.temporal_idx == 0) { |
| gof_info_it = gof_info_ |
| .emplace(unwrapped_tl0, |
| GofInfo(gof_info_it->second.gof, frame->Id())) |
| .first; |
| } |
| |
| info = &gof_info_it->second; |
| } |
| |
| // Clean up info for base layers that are too old. |
| int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved; |
| auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx); |
| gof_info_.erase(gof_info_.begin(), clean_gof_info_to); |
| |
| FrameReceivedVp9(frame->Id(), info); |
| |
| // Make sure we don't miss any frame that could potentially have the |
| // up switch flag set. |
| if (MissingRequiredFrameVp9(frame->Id(), *info)) |
| return kStash; |
| |
| if (codec_header.temporal_up_switch) |
| up_switch_.emplace(frame->Id(), codec_header.temporal_idx); |
| |
| // Clean out old info about up switch frames. |
| uint16_t old_picture_id = Subtract<kFrameIdLength>(frame->Id(), 50); |
| auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id); |
| up_switch_.erase(up_switch_.begin(), up_switch_erase_to); |
| |
| if (codec_header.inter_pic_predicted) { |
| size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, |
| frame->Id()); |
| size_t gof_idx = diff % info->gof->num_frames_in_gof; |
| |
| if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) { |
| return kDrop; |
| } |
| |
| // Populate references according to the scalability structure. |
| frame->num_references = info->gof->num_ref_pics[gof_idx]; |
| for (size_t i = 0; i < frame->num_references; ++i) { |
| frame->references[i] = Subtract<kFrameIdLength>( |
| frame->Id(), info->gof->pid_diff[gof_idx][i]); |
| |
| // If this is a reference to a frame earlier than the last up switch |
| // point, then ignore this reference. |
| if (UpSwitchInIntervalVp9(frame->Id(), codec_header.temporal_idx, |
| frame->references[i])) { |
| --frame->num_references; |
| } |
| } |
| } else { |
| frame->num_references = 0; |
| } |
| |
| FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted); |
| return kHandOff; |
| } |
| |
| bool RtpVp9RefFinder::MissingRequiredFrameVp9(uint16_t picture_id, |
| const GofInfo& info) { |
| size_t diff = |
| ForwardDiff<uint16_t, kFrameIdLength>(info.gof->pid_start, picture_id); |
| size_t gof_idx = diff % info.gof->num_frames_in_gof; |
| size_t temporal_idx = info.gof->temporal_idx[gof_idx]; |
| |
| if (temporal_idx >= kMaxTemporalLayers) { |
| RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers |
| << " temporal " |
| "layers are supported."; |
| return true; |
| } |
| |
| // For every reference this frame has, check if there is a frame missing in |
| // the interval (`ref_pid`, `picture_id`) in any of the lower temporal |
| // layers. If so, we are missing a required frame. |
| uint8_t num_references = info.gof->num_ref_pics[gof_idx]; |
| for (size_t i = 0; i < num_references; ++i) { |
| uint16_t ref_pid = |
| Subtract<kFrameIdLength>(picture_id, info.gof->pid_diff[gof_idx][i]); |
| for (size_t l = 0; l < temporal_idx; ++l) { |
| auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid); |
| if (missing_frame_it != missing_frames_for_layer_[l].end() && |
| AheadOf<uint16_t, kFrameIdLength>(picture_id, *missing_frame_it)) { |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| void RtpVp9RefFinder::FrameReceivedVp9(uint16_t picture_id, GofInfo* info) { |
| int last_picture_id = info->last_picture_id; |
| size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof); |
| |
| // If there is a gap, find which temporal layer the missing frames |
| // belong to and add the frame as missing for that temporal layer. |
| // Otherwise, remove this frame from the set of missing frames. |
| if (AheadOf<uint16_t, kFrameIdLength>(picture_id, last_picture_id)) { |
| size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, |
| last_picture_id); |
| size_t gof_idx = diff % gof_size; |
| |
| last_picture_id = Add<kFrameIdLength>(last_picture_id, 1); |
| while (last_picture_id != picture_id) { |
| gof_idx = (gof_idx + 1) % gof_size; |
| RTC_CHECK(gof_idx < kMaxVp9FramesInGof); |
| |
| size_t temporal_idx = info->gof->temporal_idx[gof_idx]; |
| if (temporal_idx >= kMaxTemporalLayers) { |
| RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers |
| << " temporal " |
| "layers are supported."; |
| return; |
| } |
| |
| missing_frames_for_layer_[temporal_idx].insert(last_picture_id); |
| last_picture_id = Add<kFrameIdLength>(last_picture_id, 1); |
| } |
| |
| info->last_picture_id = last_picture_id; |
| } else { |
| size_t diff = |
| ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, picture_id); |
| size_t gof_idx = diff % gof_size; |
| RTC_CHECK(gof_idx < kMaxVp9FramesInGof); |
| |
| size_t temporal_idx = info->gof->temporal_idx[gof_idx]; |
| if (temporal_idx >= kMaxTemporalLayers) { |
| RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers |
| << " temporal " |
| "layers are supported."; |
| return; |
| } |
| |
| missing_frames_for_layer_[temporal_idx].erase(picture_id); |
| } |
| } |
| |
| bool RtpVp9RefFinder::UpSwitchInIntervalVp9(uint16_t picture_id, |
| uint8_t temporal_idx, |
| uint16_t pid_ref) { |
| for (auto up_switch_it = up_switch_.upper_bound(pid_ref); |
| up_switch_it != up_switch_.end() && |
| AheadOf<uint16_t, kFrameIdLength>(picture_id, up_switch_it->first); |
| ++up_switch_it) { |
| if (up_switch_it->second < temporal_idx) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| void RtpVp9RefFinder::RetryStashedFrames( |
| RtpFrameReferenceFinder::ReturnVector& res) { |
| bool complete_frame = false; |
| do { |
| complete_frame = false; |
| for (auto it = stashed_frames_.begin(); it != stashed_frames_.end();) { |
| const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>( |
| it->frame->GetRtpVideoHeader().video_type_header); |
| RTC_DCHECK(!codec_header.flexible_mode); |
| FrameDecision decision = |
| ManageFrameGof(it->frame.get(), codec_header, it->unwrapped_tl0); |
| |
| switch (decision) { |
| case kStash: |
| ++it; |
| break; |
| case kHandOff: |
| complete_frame = true; |
| res.push_back(std::move(it->frame)); |
| [[fallthrough]]; |
| case kDrop: |
| it = stashed_frames_.erase(it); |
| } |
| } |
| } while (complete_frame); |
| } |
| |
| void RtpVp9RefFinder::FlattenFrameIdAndRefs(RtpFrameObject* frame, |
| bool inter_layer_predicted) { |
| for (size_t i = 0; i < frame->num_references; ++i) { |
| frame->references[i] = |
| unwrapper_.Unwrap(frame->references[i]) * kMaxSpatialLayers + |
| *frame->SpatialIndex(); |
| } |
| frame->SetId(unwrapper_.Unwrap(frame->Id()) * kMaxSpatialLayers + |
| *frame->SpatialIndex()); |
| |
| if (inter_layer_predicted && |
| frame->num_references + 1 <= EncodedFrame::kMaxFrameReferences) { |
| frame->references[frame->num_references] = frame->Id() - 1; |
| ++frame->num_references; |
| } |
| } |
| |
| void RtpVp9RefFinder::ClearTo(uint16_t seq_num) { |
| auto it = stashed_frames_.begin(); |
| while (it != stashed_frames_.end()) { |
| if (AheadOf<uint16_t>(seq_num, it->frame->first_seq_num())) { |
| it = stashed_frames_.erase(it); |
| } else { |
| ++it; |
| } |
| } |
| } |
| |
| } // namespace webrtc |