ManageFrameH264 for temporal layers (PART 3/3)

Bug: webrtc:10579
Change-Id: Iec54f6b1231d34c2018f22841c3614ddd0b05612
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/86200
Commit-Queue: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Stefan Holmer <stefan@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#27820}
diff --git a/modules/video_coding/rtp_frame_reference_finder.cc b/modules/video_coding/rtp_frame_reference_finder.cc
index 8191017..af474d5 100644
--- a/modules/video_coding/rtp_frame_reference_finder.cc
+++ b/modules/video_coding/rtp_frame_reference_finder.cc
@@ -95,6 +95,8 @@
       return ManageFrameVp8(frame);
     case kVideoCodecVP9:
       return ManageFrameVp9(frame);
+    case kVideoCodecH264:
+      return ManageFrameH264(frame);
     default: {
       // Use 15 first bits of frame ID as picture ID if available.
       absl::optional<RTPVideoHeader> video_header = frame->GetRtpVideoHeader();
@@ -671,5 +673,190 @@
   frame->id.picture_id = unwrapper_.Unwrap(frame->id.picture_id);
 }
 
+RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameH264(
+    RtpFrameObject* frame) {
+  absl::optional<FrameMarking> rtp_frame_marking = frame->GetFrameMarking();
+  if (!rtp_frame_marking) {
+    return ManageFramePidOrSeqNum(std::move(frame), kNoPictureId);
+  }
+
+  uint8_t tid = rtp_frame_marking->temporal_id;
+  bool blSync = rtp_frame_marking->base_layer_sync;
+
+  if (tid == kNoTemporalIdx)
+    return ManageFramePidOrSeqNum(std::move(frame), kNoPictureId);
+
+  frame->id.picture_id = frame->last_seq_num();
+
+  if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
+    // For H264, use last_seq_num_gop_ to simply store last picture id
+    // as a pair of unpadded and padded sequence numbers.
+    if (last_seq_num_gop_.empty()) {
+      last_seq_num_gop_.insert(std::make_pair(
+          0, std::make_pair(frame->id.picture_id, frame->id.picture_id)));
+    }
+  }
+
+  // Stash if we have no keyframe yet.
+  if (last_seq_num_gop_.empty())
+    return kStash;
+
+  // Check for gap in sequence numbers. Store in |not_yet_received_seq_num_|.
+  if (frame->frame_type() == VideoFrameType::kVideoFrameDelta) {
+    uint16_t last_pic_id_padded = last_seq_num_gop_.begin()->second.second;
+    if (AheadOf<uint16_t>(frame->id.picture_id, last_pic_id_padded)) {
+      do {
+        last_pic_id_padded = last_pic_id_padded + 1;
+        not_yet_received_seq_num_.insert(last_pic_id_padded);
+      } while (last_pic_id_padded != frame->id.picture_id);
+    }
+  }
+
+  int64_t unwrapped_tl0 = tl0_unwrapper_.Unwrap(rtp_frame_marking->tl0_pic_idx);
+
+  // Clean up info for base layers that are too old.
+  int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxLayerInfo;
+  auto clean_layer_info_to = layer_info_.lower_bound(old_tl0_pic_idx);
+  layer_info_.erase(layer_info_.begin(), clean_layer_info_to);
+
+  // Clean up info about not yet received frames that are too old.
+  uint16_t old_picture_id = frame->id.picture_id - kMaxNotYetReceivedFrames * 2;
+  auto clean_frames_to = not_yet_received_seq_num_.lower_bound(old_picture_id);
+  not_yet_received_seq_num_.erase(not_yet_received_seq_num_.begin(),
+                                  clean_frames_to);
+
+  if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
+    frame->num_references = 0;
+    layer_info_[unwrapped_tl0].fill(-1);
+    UpdateDataH264(frame, unwrapped_tl0, tid);
+    return kHandOff;
+  }
+
+  auto layer_info_it = layer_info_.find(
+      tid == 0 ? unwrapped_tl0 - 1 : unwrapped_tl0);
+
+  // Stash if we have no base layer frame yet.
+  if (layer_info_it == layer_info_.end())
+    return kStash;
+
+  // Base layer frame. Copy layer info from previous base layer frame.
+  if (tid == 0) {
+    layer_info_it = layer_info_.insert(
+        std::make_pair(unwrapped_tl0, layer_info_it->second)).first;
+    frame->num_references = 1;
+    frame->references[0] = layer_info_it->second[0];
+    UpdateDataH264(frame, unwrapped_tl0, tid);
+    return kHandOff;
+  }
+
+  // This frame only references its base layer frame.
+  if (blSync) {
+    frame->num_references = 1;
+    frame->references[0] = layer_info_it->second[0];
+    UpdateDataH264(frame, unwrapped_tl0, tid);
+    return kHandOff;
+  }
+
+  // Find all references for general frame.
+  frame->num_references = 0;
+  for (uint8_t layer = 0; layer <= tid; ++layer) {
+    // Stash if we have not yet received frames on this temporal layer.
+    if (layer_info_it->second[layer] == -1)
+      return kStash;
+
+    // Drop if the last frame on this layer is ahead of this frame. A layer sync
+    // frame was received after this frame for the same base layer frame.
+    uint16_t last_frame_in_layer = layer_info_it->second[layer];
+    if (AheadOf<uint16_t>(last_frame_in_layer, frame->id.picture_id))
+      return kDrop;
+
+    // Stash and wait for missing frame between this frame and the reference
+    auto not_received_seq_num_it =
+        not_yet_received_seq_num_.upper_bound(last_frame_in_layer);
+    if (not_received_seq_num_it != not_yet_received_seq_num_.end() &&
+        AheadOf<uint16_t>(frame->id.picture_id, *not_received_seq_num_it)) {
+      return kStash;
+    }
+
+    if (!(AheadOf<uint16_t>(frame->id.picture_id, last_frame_in_layer))) {
+      RTC_LOG(LS_WARNING) << "Frame with picture id " << frame->id.picture_id
+                          << " and packet range [" << frame->first_seq_num()
+                          << ", " << frame->last_seq_num()
+                          << "] already received, "
+                          << " dropping frame.";
+      return kDrop;
+    }
+
+    ++frame->num_references;
+    frame->references[layer] = last_frame_in_layer;
+  }
+
+  UpdateDataH264(frame, unwrapped_tl0, tid);
+  return kHandOff;
+}
+
+void RtpFrameReferenceFinder::UpdateLastPictureIdWithPaddingH264() {
+  auto seq_num_it = last_seq_num_gop_.begin();
+
+  // Check if next sequence number is in a stashed padding packet.
+  uint16_t next_padded_seq_num = seq_num_it->second.second + 1;
+  auto padding_seq_num_it = stashed_padding_.lower_bound(next_padded_seq_num);
+
+  // Check for more consecutive padding packets to increment
+  // the "last-picture-id-with-padding" and remove the stashed packets.
+  while (padding_seq_num_it != stashed_padding_.end() &&
+         *padding_seq_num_it == next_padded_seq_num) {
+    seq_num_it->second.second = next_padded_seq_num;
+    ++next_padded_seq_num;
+    padding_seq_num_it = stashed_padding_.erase(padding_seq_num_it);
+  }
+}
+
+void RtpFrameReferenceFinder::UpdateLayerInfoH264(RtpFrameObject* frame,
+                                                  int64_t unwrapped_tl0,
+                                                  uint8_t temporal_idx) {
+  auto layer_info_it = layer_info_.find(unwrapped_tl0);
+
+  // Update this layer info and newer.
+  while (layer_info_it != layer_info_.end()) {
+    if (layer_info_it->second[temporal_idx] != -1 &&
+        AheadOf<uint16_t>(layer_info_it->second[temporal_idx],
+                          frame->id.picture_id)) {
+      // Not a newer frame. No subsequent layer info needs update.
+      break;
+    }
+
+    layer_info_it->second[temporal_idx] = frame->id.picture_id;
+    ++unwrapped_tl0;
+    layer_info_it = layer_info_.find(unwrapped_tl0);
+  }
+
+  for (size_t i = 0; i < frame->num_references; ++i)
+    frame->references[i] = rtp_seq_num_unwrapper_.Unwrap(frame->references[i]);
+  frame->id.picture_id = rtp_seq_num_unwrapper_.Unwrap(frame->id.picture_id);
+}
+
+void RtpFrameReferenceFinder::UpdateDataH264(RtpFrameObject* frame,
+                                             int64_t unwrapped_tl0,
+                                             uint8_t temporal_idx) {
+  // Update last_seq_num_gop_ entry for last picture id.
+  auto seq_num_it = last_seq_num_gop_.begin();
+  uint16_t last_pic_id = seq_num_it->second.first;
+  if (AheadOf<uint16_t>(frame->id.picture_id, last_pic_id)) {
+    seq_num_it->second.first = frame->id.picture_id;
+    seq_num_it->second.second = frame->id.picture_id;
+  }
+  UpdateLastPictureIdWithPaddingH264();
+
+  UpdateLayerInfoH264(frame, unwrapped_tl0, temporal_idx);
+
+  // Remove any current packets from |not_yet_received_seq_num_|.
+  uint16_t last_seq_num_padded = seq_num_it->second.second;
+  for (uint16_t n = frame->first_seq_num();
+       AheadOrAt(last_seq_num_padded, n); ++n) {
+    not_yet_received_seq_num_.erase(n);
+  }
+}
+
 }  // namespace video_coding
 }  // namespace webrtc