Don't store RtpPacketInfo in the PacketBuffer. Historically the PacketBuffer used a callback for assembled frames, and because of that RtpPacketInfos were piped through it even though they didn't have anything to do with the PacketBuffer. With this CL RtpPacketInfos are stored in the RtpVideoStreamReceiver(2) instead. Bug: webrtc:12579 Change-Id: Ia6285b59e135910eee7234b89b23425bb0fc0d2b Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/215320 Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Commit-Queue: Philip Eliasson <philipel@webrtc.org> Cr-Commit-Position: refs/heads/master@{#33980}

commit: 9599b3c58214d5b4cc8b39d15aea4dfece33143b [log] [tgz]
author: philipel <philipel@webrtc.org> Tue May 11 09:30:52 2021
committer: WebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com> Tue May 11 10:37:46 2021
tree: 7553cd7859e8e426520a22dbd24ee997db40f7b0
parent: ae1f8f59ac199e314cfb72ffdf40542ce2cfccb8 [diff]
diff --git a/modules/video_coding/packet_buffer.cc b/modules/video_coding/packet_buffer.cc
index 99f2891..c98ae00 100644
--- a/modules/video_coding/packet_buffer.cc
+++ b/modules/video_coding/packet_buffer.cc

@@ -35,20 +35,13 @@
 namespace video_coding {
 
 PacketBuffer::Packet::Packet(const RtpPacketReceived& rtp_packet,
-                             const RTPVideoHeader& video_header,
-                             Timestamp receive_time)
+                             const RTPVideoHeader& video_header)
     : marker_bit(rtp_packet.Marker()),
       payload_type(rtp_packet.PayloadType()),
       seq_num(rtp_packet.SequenceNumber()),
       timestamp(rtp_packet.Timestamp()),
       times_nacked(-1),
-      video_header(video_header),
-      packet_info(rtp_packet.Ssrc(),
-                  rtp_packet.Csrcs(),
-                  rtp_packet.Timestamp(),
-                  /*audio_level=*/absl::nullopt,
-                  rtp_packet.GetExtension<AbsoluteCaptureTimeExtension>(),
-                  receive_time) {}
+      video_header(video_header) {}
 
 PacketBuffer::PacketBuffer(size_t start_buffer_size, size_t max_buffer_size)
     : max_size_(max_buffer_size),

diff --git a/modules/video_coding/packet_buffer.h b/modules/video_coding/packet_buffer.h
index e8d2446..f4dbe31 100644
--- a/modules/video_coding/packet_buffer.h
+++ b/modules/video_coding/packet_buffer.h

@@ -34,8 +34,7 @@
   struct Packet {
     Packet() = default;
     Packet(const RtpPacketReceived& rtp_packet,
-           const RTPVideoHeader& video_header,
-           Timestamp receive_time);
+           const RTPVideoHeader& video_header);
     Packet(const Packet&) = delete;
     Packet(Packet&&) = delete;
     Packet& operator=(const Packet&) = delete;
@@ -64,8 +63,6 @@
 
     rtc::CopyOnWriteBuffer video_payload;
     RTPVideoHeader video_header;
-
-    RtpPacketInfo packet_info;
   };
   struct InsertResult {
     std::vector<std::unique_ptr<Packet>> packets;

diff --git a/video/rtp_video_stream_receiver.cc b/video/rtp_video_stream_receiver.cc
index b84b7c8..5c877c5 100644
--- a/video/rtp_video_stream_receiver.cc
+++ b/video/rtp_video_stream_receiver.cc

@@ -506,18 +506,9 @@
     const RtpPacketReceived& rtp_packet,
     const RTPVideoHeader& video) {
   RTC_DCHECK_RUN_ON(&worker_task_checker_);
-  auto packet = std::make_unique<video_coding::PacketBuffer::Packet>(
-      rtp_packet, video, clock_->CurrentTime());
 
-  // Try to extrapolate absolute capture time if it is missing.
-  packet->packet_info.set_absolute_capture_time(
-      absolute_capture_time_receiver_.OnReceivePacket(
-          AbsoluteCaptureTimeReceiver::GetSource(packet->packet_info.ssrc(),
-                                                 packet->packet_info.csrcs()),
-          packet->packet_info.rtp_timestamp(),
-          // Assume frequency is the same one for all video frames.
-          kVideoPayloadTypeFrequency,
-          packet->packet_info.absolute_capture_time()));
+  auto packet =
+      std::make_unique<video_coding::PacketBuffer::Packet>(rtp_packet, video);
 
   RTPVideoHeader& video_header = packet->video_header;
   video_header.rotation = kVideoRotation_0;
@@ -648,6 +639,29 @@
   video_coding::PacketBuffer::InsertResult insert_result;
   {
     MutexLock lock(&packet_buffer_lock_);
+    int64_t unwrapped_rtp_seq_num =
+        rtp_seq_num_unwrapper_.Unwrap(rtp_packet.SequenceNumber());
+    auto& packet_info =
+        packet_infos_
+            .emplace(
+                unwrapped_rtp_seq_num,
+                RtpPacketInfo(
+                    rtp_packet.Ssrc(), rtp_packet.Csrcs(),
+                    rtp_packet.Timestamp(),
+                    /*audio_level=*/absl::nullopt,
+                    rtp_packet.GetExtension<AbsoluteCaptureTimeExtension>(),
+                    /*receive_time_ms=*/clock_->TimeInMilliseconds()))
+            .first->second;
+
+    // Try to extrapolate absolute capture time if it is missing.
+    packet_info.set_absolute_capture_time(
+        absolute_capture_time_receiver_.OnReceivePacket(
+            AbsoluteCaptureTimeReceiver::GetSource(packet_info.ssrc(),
+                                                   packet_info.csrcs()),
+            packet_info.rtp_timestamp(),
+            // Assume frequency is the same one for all video frames.
+            kVideoPayloadTypeFrequency, packet_info.absolute_capture_time()));
+
     insert_result = packet_buffer_.InsertPacket(std::move(packet));
   }
   OnInsertedPacket(std::move(insert_result));
@@ -737,69 +751,83 @@
 
 void RtpVideoStreamReceiver::OnInsertedPacket(
     video_coding::PacketBuffer::InsertResult result) {
-  video_coding::PacketBuffer::Packet* first_packet = nullptr;
-  int max_nack_count;
-  int64_t min_recv_time;
-  int64_t max_recv_time;
-  std::vector<rtc::ArrayView<const uint8_t>> payloads;
-  RtpPacketInfos::vector_type packet_infos;
+  std::vector<std::unique_ptr<RtpFrameObject>> assembled_frames;
+  {
+    MutexLock lock(&packet_buffer_lock_);
+    video_coding::PacketBuffer::Packet* first_packet = nullptr;
+    int max_nack_count;
+    int64_t min_recv_time;
+    int64_t max_recv_time;
+    std::vector<rtc::ArrayView<const uint8_t>> payloads;
+    RtpPacketInfos::vector_type packet_infos;
 
-  bool frame_boundary = true;
-  for (auto& packet : result.packets) {
-    // PacketBuffer promisses frame boundaries are correctly set on each
-    // packet. Document that assumption with the DCHECKs.
-    RTC_DCHECK_EQ(frame_boundary, packet->is_first_packet_in_frame());
-    if (packet->is_first_packet_in_frame()) {
-      first_packet = packet.get();
-      max_nack_count = packet->times_nacked;
-      min_recv_time = packet->packet_info.receive_time().ms();
-      max_recv_time = packet->packet_info.receive_time().ms();
-      payloads.clear();
-      packet_infos.clear();
-    } else {
-      max_nack_count = std::max(max_nack_count, packet->times_nacked);
-      min_recv_time =
-          std::min(min_recv_time, packet->packet_info.receive_time().ms());
-      max_recv_time =
-          std::max(max_recv_time, packet->packet_info.receive_time().ms());
-    }
-    payloads.emplace_back(packet->video_payload);
-    packet_infos.push_back(packet->packet_info);
-
-    frame_boundary = packet->is_last_packet_in_frame();
-    if (packet->is_last_packet_in_frame()) {
-      auto depacketizer_it = payload_type_map_.find(first_packet->payload_type);
-      RTC_CHECK(depacketizer_it != payload_type_map_.end());
-
-      rtc::scoped_refptr<EncodedImageBuffer> bitstream =
-          depacketizer_it->second->AssembleFrame(payloads);
-      if (!bitstream) {
-        // Failed to assemble a frame. Discard and continue.
-        continue;
+    bool frame_boundary = true;
+    for (auto& packet : result.packets) {
+      // PacketBuffer promisses frame boundaries are correctly set on each
+      // packet. Document that assumption with the DCHECKs.
+      RTC_DCHECK_EQ(frame_boundary, packet->is_first_packet_in_frame());
+      int64_t unwrapped_rtp_seq_num =
+          rtp_seq_num_unwrapper_.Unwrap(packet->seq_num);
+      RTC_DCHECK(packet_infos_.count(unwrapped_rtp_seq_num) > 0);
+      RtpPacketInfo& packet_info = packet_infos_[unwrapped_rtp_seq_num];
+      if (packet->is_first_packet_in_frame()) {
+        first_packet = packet.get();
+        max_nack_count = packet->times_nacked;
+        min_recv_time = packet_info.receive_time().ms();
+        max_recv_time = packet_info.receive_time().ms();
+        payloads.clear();
+        packet_infos.clear();
+      } else {
+        max_nack_count = std::max(max_nack_count, packet->times_nacked);
+        min_recv_time =
+            std::min(min_recv_time, packet_info.receive_time().ms());
+        max_recv_time =
+            std::max(max_recv_time, packet_info.receive_time().ms());
       }
+      payloads.emplace_back(packet->video_payload);
+      packet_infos.push_back(packet_info);
 
-      const video_coding::PacketBuffer::Packet& last_packet = *packet;
-      OnAssembledFrame(std::make_unique<RtpFrameObject>(
-          first_packet->seq_num,                             //
-          last_packet.seq_num,                               //
-          last_packet.marker_bit,                            //
-          max_nack_count,                                    //
-          min_recv_time,                                     //
-          max_recv_time,                                     //
-          first_packet->timestamp,                           //
-          ntp_estimator_.Estimate(first_packet->timestamp),  //
-          last_packet.video_header.video_timing,             //
-          first_packet->payload_type,                        //
-          first_packet->codec(),                             //
-          last_packet.video_header.rotation,                 //
-          last_packet.video_header.content_type,             //
-          first_packet->video_header,                        //
-          last_packet.video_header.color_space,              //
-          RtpPacketInfos(std::move(packet_infos)),           //
-          std::move(bitstream)));
+      frame_boundary = packet->is_last_packet_in_frame();
+      if (packet->is_last_packet_in_frame()) {
+        auto depacketizer_it =
+            payload_type_map_.find(first_packet->payload_type);
+        RTC_CHECK(depacketizer_it != payload_type_map_.end());
+
+        rtc::scoped_refptr<EncodedImageBuffer> bitstream =
+            depacketizer_it->second->AssembleFrame(payloads);
+        if (!bitstream) {
+          // Failed to assemble a frame. Discard and continue.
+          continue;
+        }
+
+        const video_coding::PacketBuffer::Packet& last_packet = *packet;
+        assembled_frames.push_back(std::make_unique<RtpFrameObject>(
+            first_packet->seq_num,                             //
+            last_packet.seq_num,                               //
+            last_packet.marker_bit,                            //
+            max_nack_count,                                    //
+            min_recv_time,                                     //
+            max_recv_time,                                     //
+            first_packet->timestamp,                           //
+            ntp_estimator_.Estimate(first_packet->timestamp),  //
+            last_packet.video_header.video_timing,             //
+            first_packet->payload_type,                        //
+            first_packet->codec(),                             //
+            last_packet.video_header.rotation,                 //
+            last_packet.video_header.content_type,             //
+            first_packet->video_header,                        //
+            last_packet.video_header.color_space,              //
+            RtpPacketInfos(std::move(packet_infos)),           //
+            std::move(bitstream)));
+      }
     }
-  }
-  RTC_DCHECK(frame_boundary);
+    RTC_DCHECK(frame_boundary);
+
+    if (result.buffer_cleared) {
+      packet_infos_.clear();
+    }
+  }  // packet_buffer_lock_
+
   if (result.buffer_cleared) {
     {
       MutexLock lock(&sync_info_lock_);
@@ -809,6 +837,10 @@
     }
     RequestKeyFrame();
   }
+
+  for (auto& frame : assembled_frames) {
+    OnAssembledFrame(std::move(frame));
+  }
 }
 
 void RtpVideoStreamReceiver::OnAssembledFrame(
@@ -851,8 +883,8 @@
       if (frame_is_newer) {
         // When we reset the |reference_finder_| we don't want new picture ids
         // to overlap with old picture ids. To ensure that doesn't happen we
-        // start from the |last_completed_picture_id_| and add an offset in case
-        // of reordering.
+        // start from the |last_completed_picture_id_| and add an offset in
+        // case of reordering.
         reference_finder_ = std::make_unique<RtpFrameReferenceFinder>(
             this,
             last_completed_picture_id_ + std::numeric_limits<uint16_t>::max());
@@ -1028,12 +1060,14 @@
     MutexLock lock(&reference_finder_lock_);
     reference_finder_->PaddingReceived(seq_num);
   }
+
   video_coding::PacketBuffer::InsertResult insert_result;
   {
     MutexLock lock(&packet_buffer_lock_);
     insert_result = packet_buffer_.InsertPadding(seq_num);
   }
   OnInsertedPacket(std::move(insert_result));
+
   if (nack_module_) {
     nack_module_->OnReceivedPacket(seq_num, /* is_keyframe = */ false,
                                    /* is _recovered = */ false);
@@ -1118,6 +1152,9 @@
     {
       MutexLock lock(&packet_buffer_lock_);
       packet_buffer_.ClearTo(seq_num);
+      int64_t unwrapped_rtp_seq_num = rtp_seq_num_unwrapper_.Unwrap(seq_num);
+      packet_infos_.erase(packet_infos_.begin(),
+                          packet_infos_.upper_bound(unwrapped_rtp_seq_num));
     }
     MutexLock lock(&reference_finder_lock_);
     reference_finder_->ClearTo(seq_num);

diff --git a/video/rtp_video_stream_receiver.h b/video/rtp_video_stream_receiver.h
index 090488c..aa86a0c 100644
--- a/video/rtp_video_stream_receiver.h
+++ b/video/rtp_video_stream_receiver.h

@@ -303,7 +303,8 @@
   ParseGenericDependenciesResult ParseGenericDependenciesExtension(
       const RtpPacketReceived& rtp_packet,
       RTPVideoHeader* video_header) RTC_RUN_ON(worker_task_checker_);
-  void OnAssembledFrame(std::unique_ptr<RtpFrameObject> frame);
+  void OnAssembledFrame(std::unique_ptr<RtpFrameObject> frame)
+      RTC_LOCKS_EXCLUDED(packet_buffer_lock_);
   void UpdatePacketReceiveTimestamps(const RtpPacketReceived& packet,
                                      bool is_keyframe)
       RTC_RUN_ON(worker_task_checker_);
@@ -407,6 +408,11 @@
 
   rtc::scoped_refptr<RtpVideoStreamReceiverFrameTransformerDelegate>
       frame_transformer_delegate_;
+
+  SeqNumUnwrapper<uint16_t> rtp_seq_num_unwrapper_
+      RTC_GUARDED_BY(packet_buffer_lock_);
+  std::map<int64_t, RtpPacketInfo> packet_infos_
+      RTC_GUARDED_BY(packet_buffer_lock_);
 };
 
 }  // namespace webrtc

diff --git a/video/rtp_video_stream_receiver2.cc b/video/rtp_video_stream_receiver2.cc
index 9e68fa2..40fe8fe 100644
--- a/video/rtp_video_stream_receiver2.cc
+++ b/video/rtp_video_stream_receiver2.cc

@@ -473,18 +473,31 @@
     const RtpPacketReceived& rtp_packet,
     const RTPVideoHeader& video) {
   RTC_DCHECK_RUN_ON(&worker_task_checker_);
-  auto packet = std::make_unique<video_coding::PacketBuffer::Packet>(
-      rtp_packet, video, clock_->CurrentTime());
+
+  auto packet =
+      std::make_unique<video_coding::PacketBuffer::Packet>(rtp_packet, video);
+
+  int64_t unwrapped_rtp_seq_num =
+      rtp_seq_num_unwrapper_.Unwrap(rtp_packet.SequenceNumber());
+  auto& packet_info =
+      packet_infos_
+          .emplace(
+              unwrapped_rtp_seq_num,
+              RtpPacketInfo(
+                  rtp_packet.Ssrc(), rtp_packet.Csrcs(), rtp_packet.Timestamp(),
+                  /*audio_level=*/absl::nullopt,
+                  rtp_packet.GetExtension<AbsoluteCaptureTimeExtension>(),
+                  /*receive_time_ms=*/clock_->CurrentTime()))
+          .first->second;
 
   // Try to extrapolate absolute capture time if it is missing.
-  packet->packet_info.set_absolute_capture_time(
+  packet_info.set_absolute_capture_time(
       absolute_capture_time_receiver_.OnReceivePacket(
-          AbsoluteCaptureTimeReceiver::GetSource(packet->packet_info.ssrc(),
-                                                 packet->packet_info.csrcs()),
-          packet->packet_info.rtp_timestamp(),
+          AbsoluteCaptureTimeReceiver::GetSource(packet_info.ssrc(),
+                                                 packet_info.csrcs()),
+          packet_info.rtp_timestamp(),
           // Assume frequency is the same one for all video frames.
-          kVideoPayloadTypeFrequency,
-          packet->packet_info.absolute_capture_time()));
+          kVideoPayloadTypeFrequency, packet_info.absolute_capture_time()));
 
   RTPVideoHeader& video_header = packet->video_header;
   video_header.rotation = kVideoRotation_0;
@@ -715,22 +728,24 @@
     // PacketBuffer promisses frame boundaries are correctly set on each
     // packet. Document that assumption with the DCHECKs.
     RTC_DCHECK_EQ(frame_boundary, packet->is_first_packet_in_frame());
+    int64_t unwrapped_rtp_seq_num =
+        rtp_seq_num_unwrapper_.Unwrap(packet->seq_num);
+    RTC_DCHECK(packet_infos_.count(unwrapped_rtp_seq_num) > 0);
+    RtpPacketInfo& packet_info = packet_infos_[unwrapped_rtp_seq_num];
     if (packet->is_first_packet_in_frame()) {
       first_packet = packet.get();
       max_nack_count = packet->times_nacked;
-      min_recv_time = packet->packet_info.receive_time().ms();
-      max_recv_time = packet->packet_info.receive_time().ms();
+      min_recv_time = packet_info.receive_time().ms();
+      max_recv_time = packet_info.receive_time().ms();
       payloads.clear();
       packet_infos.clear();
     } else {
       max_nack_count = std::max(max_nack_count, packet->times_nacked);
-      min_recv_time =
-          std::min(min_recv_time, packet->packet_info.receive_time().ms());
-      max_recv_time =
-          std::max(max_recv_time, packet->packet_info.receive_time().ms());
+      min_recv_time = std::min(min_recv_time, packet_info.receive_time().ms());
+      max_recv_time = std::max(max_recv_time, packet_info.receive_time().ms());
     }
     payloads.emplace_back(packet->video_payload);
-    packet_infos.push_back(packet->packet_info);
+    packet_infos.push_back(packet_info);
 
     frame_boundary = packet->is_last_packet_in_frame();
     if (packet->is_last_packet_in_frame()) {
@@ -770,6 +785,7 @@
     last_received_rtp_system_time_.reset();
     last_received_keyframe_rtp_system_time_.reset();
     last_received_keyframe_rtp_timestamp_.reset();
+    packet_infos_.clear();
     RequestKeyFrame();
   }
 }
@@ -1053,6 +1069,9 @@
   }
 
   if (seq_num != -1) {
+    int64_t unwrapped_rtp_seq_num = rtp_seq_num_unwrapper_.Unwrap(seq_num);
+    packet_infos_.erase(packet_infos_.begin(),
+                        packet_infos_.upper_bound(unwrapped_rtp_seq_num));
     packet_buffer_.ClearTo(seq_num);
     reference_finder_->ClearTo(seq_num);
   }

diff --git a/video/rtp_video_stream_receiver2.h b/video/rtp_video_stream_receiver2.h
index 6649246..a524f4b 100644
--- a/video/rtp_video_stream_receiver2.h
+++ b/video/rtp_video_stream_receiver2.h

@@ -357,6 +357,11 @@
 
   rtc::scoped_refptr<RtpVideoStreamReceiverFrameTransformerDelegate>
       frame_transformer_delegate_;
+
+  SeqNumUnwrapper<uint16_t> rtp_seq_num_unwrapper_
+      RTC_GUARDED_BY(worker_task_checker_);
+  std::map<int64_t, RtpPacketInfo> packet_infos_
+      RTC_GUARDED_BY(worker_task_checker_);
 };
 
 }  // namespace webrtc
commit	9599b3c58214d5b4cc8b39d15aea4dfece33143b	[log] [tgz]
author	philipel <philipel@webrtc.org>	Tue May 11 09:30:52 2021
committer	WebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com>	Tue May 11 10:37:46 2021
tree	7553cd7859e8e426520a22dbd24ee997db40f7b0
parent	ae1f8f59ac199e314cfb72ffdf40542ce2cfccb8 [diff]