Add plumbing of RtpPacketInfos to each VideoFrame as input for SourceTracker.

This change adds the plumbing of RtpPacketInfo from RtpVideoStreamReceiver::OnRtpPacket() to VideoReceiveStream::OnFrame() for video. It is a step towards replacing the non-spec compliant ContributingSources that updates itself at packet-receive time, with the spec-compliant SourceTracker that will update itself at frame-delivery-to-track time.

Bug: webrtc:10668
Change-Id: Ib97d430530c5a8487d3b129936c7c51e118889bd
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/139891
Reviewed-by: Stefan Holmer <stefan@webrtc.org>
Reviewed-by: Niels Moller <nisse@webrtc.org>
Commit-Queue: Chen Xing <chxg@google.com>
Cr-Commit-Position: refs/heads/master@{#28332}
diff --git a/api/video/encoded_image.h b/api/video/encoded_image.h
index 9091c02..754b58b 100644
--- a/api/video/encoded_image.h
+++ b/api/video/encoded_image.h
@@ -13,8 +13,10 @@
 
 #include <stdint.h>
 #include <map>
+#include <utility>
 
 #include "absl/types/optional.h"
+#include "api/rtp_packet_infos.h"
 #include "api/scoped_refptr.h"
 #include "api/video/color_space.h"
 #include "api/video/video_codec_constants.h"
@@ -114,6 +116,11 @@
     color_space_ = color_space;
   }
 
+  const RtpPacketInfos& PacketInfos() const { return packet_infos_; }
+  void SetPacketInfos(RtpPacketInfos packet_infos) {
+    packet_infos_ = std::move(packet_infos);
+  }
+
   bool RetransmissionAllowed() const { return retransmission_allowed_; }
   void SetRetransmissionAllowed(bool retransmission_allowed) {
     retransmission_allowed_ = retransmission_allowed;
@@ -210,6 +217,11 @@
   absl::optional<int> spatial_index_;
   std::map<int, size_t> spatial_layer_frame_size_bytes_;
   absl::optional<webrtc::ColorSpace> color_space_;
+  // Information about packets used to assemble this video frame. This is needed
+  // by |SourceTracker| when the frame is delivered to the RTCRtpReceiver's
+  // MediaStreamTrack, in order to implement getContributingSources(). See:
+  // https://w3c.github.io/webrtc-pc/#dom-rtcrtpreceiver-getcontributingsources
+  RtpPacketInfos packet_infos_;
   bool retransmission_allowed_ = true;
 };
 
diff --git a/api/video/video_frame.cc b/api/video/video_frame.cc
index 8040536..2ef8d8d 100644
--- a/api/video/video_frame.cc
+++ b/api/video/video_frame.cc
@@ -11,6 +11,7 @@
 #include "api/video/video_frame.h"
 
 #include <algorithm>
+#include <utility>
 
 #include "rtc_base/checks.h"
 #include "rtc_base/time_utils.h"
@@ -66,7 +67,8 @@
 VideoFrame VideoFrame::Builder::build() {
   RTC_CHECK(video_frame_buffer_ != nullptr);
   return VideoFrame(id_, video_frame_buffer_, timestamp_us_, timestamp_rtp_,
-                    ntp_time_ms_, rotation_, color_space_, update_rect_);
+                    ntp_time_ms_, rotation_, color_space_, update_rect_,
+                    packet_infos_);
 }
 
 VideoFrame::Builder& VideoFrame::Builder::set_video_frame_buffer(
@@ -127,6 +129,12 @@
   return *this;
 }
 
+VideoFrame::Builder& VideoFrame::Builder::set_packet_infos(
+    RtpPacketInfos packet_infos) {
+  packet_infos_ = std::move(packet_infos);
+  return *this;
+}
+
 VideoFrame::VideoFrame(const rtc::scoped_refptr<VideoFrameBuffer>& buffer,
                        webrtc::VideoRotation rotation,
                        int64_t timestamp_us)
@@ -157,7 +165,8 @@
                        int64_t ntp_time_ms,
                        VideoRotation rotation,
                        const absl::optional<ColorSpace>& color_space,
-                       const absl::optional<UpdateRect>& update_rect)
+                       const absl::optional<UpdateRect>& update_rect,
+                       RtpPacketInfos packet_infos)
     : id_(id),
       video_frame_buffer_(buffer),
       timestamp_rtp_(timestamp_rtp),
@@ -166,7 +175,8 @@
       rotation_(rotation),
       color_space_(color_space),
       update_rect_(update_rect.value_or(UpdateRect{
-          0, 0, video_frame_buffer_->width(), video_frame_buffer_->height()})) {
+          0, 0, video_frame_buffer_->width(), video_frame_buffer_->height()})),
+      packet_infos_(std::move(packet_infos)) {
   RTC_DCHECK_GE(update_rect_.offset_x, 0);
   RTC_DCHECK_GE(update_rect_.offset_y, 0);
   RTC_DCHECK_LE(update_rect_.offset_x + update_rect_.width, width());
diff --git a/api/video/video_frame.h b/api/video/video_frame.h
index 5e04c1b..a84a2a5 100644
--- a/api/video/video_frame.h
+++ b/api/video/video_frame.h
@@ -12,8 +12,10 @@
 #define API_VIDEO_VIDEO_FRAME_H_
 
 #include <stdint.h>
+#include <utility>
 
 #include "absl/types/optional.h"
+#include "api/rtp_packet_infos.h"
 #include "api/scoped_refptr.h"
 #include "api/video/color_space.h"
 #include "api/video/hdr_metadata.h"
@@ -62,6 +64,7 @@
     Builder& set_color_space(const ColorSpace* color_space);
     Builder& set_id(uint16_t id);
     Builder& set_update_rect(const UpdateRect& update_rect);
+    Builder& set_packet_infos(RtpPacketInfos packet_infos);
 
    private:
     uint16_t id_ = 0;
@@ -72,6 +75,7 @@
     VideoRotation rotation_ = kVideoRotation_0;
     absl::optional<ColorSpace> color_space_;
     absl::optional<UpdateRect> update_rect_;
+    RtpPacketInfos packet_infos_;
   };
 
   // To be deprecated. Migrate all use to Builder.
@@ -181,6 +185,13 @@
     update_rect_ = update_rect;
   }
 
+  // Get information about packets used to assemble this video frame. Might be
+  // empty if the information isn't available.
+  const RtpPacketInfos& packet_infos() const { return packet_infos_; }
+  void set_packet_infos(RtpPacketInfos value) {
+    packet_infos_ = std::move(value);
+  }
+
  private:
   VideoFrame(uint16_t id,
              const rtc::scoped_refptr<VideoFrameBuffer>& buffer,
@@ -189,7 +200,8 @@
              int64_t ntp_time_ms,
              VideoRotation rotation,
              const absl::optional<ColorSpace>& color_space,
-             const absl::optional<UpdateRect>& update_rect);
+             const absl::optional<UpdateRect>& update_rect,
+             RtpPacketInfos packet_infos);
 
   uint16_t id_;
   // An opaque reference counted handle that stores the pixel data.
@@ -202,6 +214,11 @@
   // Updated since the last frame area. Unless set explicitly, will always be
   // a full frame rectangle.
   UpdateRect update_rect_;
+  // Information about packets used to assemble this video frame. This is needed
+  // by |SourceTracker| when the frame is delivered to the RTCRtpReceiver's
+  // MediaStreamTrack, in order to implement getContributingSources(). See:
+  // https://w3c.github.io/webrtc-pc/#dom-rtcrtpreceiver-getcontributingsources
+  RtpPacketInfos packet_infos_;
 };
 
 }  // namespace webrtc
diff --git a/common_video/test/utilities.cc b/common_video/test/utilities.cc
index 5769f74..c2a3266 100644
--- a/common_video/test/utilities.cc
+++ b/common_video/test/utilities.cc
@@ -9,6 +9,8 @@
  */
 #include "common_video/test/utilities.h"
 
+#include <utility>
+
 namespace webrtc {
 
 HdrMetadata CreateTestHdrMetadata() {
@@ -39,4 +41,8 @@
       with_hdr_metadata ? &hdr_metadata : nullptr);
 }
 
+RtpPacketInfos CreatePacketInfos(size_t count) {
+  return RtpPacketInfos(RtpPacketInfos::vector_type(count));
+}
+
 }  // namespace webrtc
diff --git a/common_video/test/utilities.h b/common_video/test/utilities.h
index d50df62..7e15cf9 100644
--- a/common_video/test/utilities.h
+++ b/common_video/test/utilities.h
@@ -11,12 +11,16 @@
 #ifndef COMMON_VIDEO_TEST_UTILITIES_H_
 #define COMMON_VIDEO_TEST_UTILITIES_H_
 
+#include <initializer_list>
+
+#include "api/rtp_packet_infos.h"
 #include "api/video/color_space.h"
 
 namespace webrtc {
 
 HdrMetadata CreateTestHdrMetadata();
 ColorSpace CreateTestColorSpace(bool with_hdr_metadata);
+RtpPacketInfos CreatePacketInfos(size_t count);
 
 }  // namespace webrtc
 #endif  // COMMON_VIDEO_TEST_UTILITIES_H_
diff --git a/media/base/fake_video_renderer.cc b/media/base/fake_video_renderer.cc
index 4253ba4..801f81d 100644
--- a/media/base/fake_video_renderer.cc
+++ b/media/base/fake_video_renderer.cc
@@ -30,6 +30,7 @@
   timestamp_us_ = frame.timestamp_us();
   ntp_timestamp_ms_ = frame.ntp_time_ms();
   color_space_ = frame.color_space();
+  packet_infos_ = frame.packet_infos();
   frame_rendered_event_.Set();
 }
 
diff --git a/media/base/fake_video_renderer.h b/media/base/fake_video_renderer.h
index e04bb3e..ba67bf0 100644
--- a/media/base/fake_video_renderer.h
+++ b/media/base/fake_video_renderer.h
@@ -71,6 +71,11 @@
     return color_space_;
   }
 
+  webrtc::RtpPacketInfos packet_infos() const {
+    rtc::CritScope cs(&crit_);
+    return packet_infos_;
+  }
+
   bool WaitForRenderedFrame(int64_t timeout_ms);
 
  private:
@@ -138,6 +143,7 @@
   rtc::CriticalSection crit_;
   rtc::Event frame_rendered_event_;
   absl::optional<webrtc::ColorSpace> color_space_;
+  webrtc::RtpPacketInfos packet_infos_;
 };
 
 }  // namespace cricket
diff --git a/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc b/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc
index d1c8220..fa1d2b9 100644
--- a/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc
@@ -262,6 +262,7 @@
                                 .set_timestamp_us(0)
                                 .set_rotation(decoded_image->rotation())
                                 .set_id(decoded_image->id())
+                                .set_packet_infos(decoded_image->packet_infos())
                                 .build();
   decoded_complete_callback_->Decoded(merged_image, decode_time_ms, qp);
 }
diff --git a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
index 59d59c1..bfc03f2 100644
--- a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
@@ -205,6 +205,7 @@
                                .set_timestamp_ms(input_image.render_time_ms())
                                .set_rotation(input_image.rotation())
                                .set_id(input_image.id())
+                               .set_packet_infos(input_image.packet_infos())
                                .build();
   rv = encoders_[kAXXStream]->Encode(alpha_image, &adjusted_frame_types);
   return rv;
diff --git a/modules/video_coding/encoded_frame.h b/modules/video_coding/encoded_frame.h
index 2ebef31..f8ee6a7 100644
--- a/modules/video_coding/encoded_frame.h
+++ b/modules/video_coding/encoded_frame.h
@@ -54,9 +54,11 @@
 
   using EncodedImage::ColorSpace;
   using EncodedImage::data;
+  using EncodedImage::PacketInfos;
   using EncodedImage::set_size;
   using EncodedImage::SetColorSpace;
   using EncodedImage::SetEncodedData;
+  using EncodedImage::SetPacketInfos;
   using EncodedImage::SetSpatialIndex;
   using EncodedImage::SetSpatialLayerFrameSize;
   using EncodedImage::SetTimestamp;
diff --git a/modules/video_coding/frame_object.cc b/modules/video_coding/frame_object.cc
index fab6066..5a485da 100644
--- a/modules/video_coding/frame_object.cc
+++ b/modules/video_coding/frame_object.cc
@@ -11,6 +11,7 @@
 #include "modules/video_coding/frame_object.h"
 
 #include <string.h>
+#include <utility>
 
 #include "api/video/encoded_image.h"
 #include "api/video/video_timing.h"
@@ -28,7 +29,8 @@
                                size_t frame_size,
                                int times_nacked,
                                int64_t first_packet_received_time,
-                               int64_t last_packet_received_time)
+                               int64_t last_packet_received_time,
+                               RtpPacketInfos packet_infos)
     : packet_buffer_(packet_buffer),
       first_seq_num_(first_seq_num),
       last_seq_num_(last_seq_num),
@@ -63,6 +65,7 @@
 
   // EncodedFrame members
   SetTimestamp(first_packet->timestamp);
+  SetPacketInfos(std::move(packet_infos));
 
   VCMPacket* last_packet = packet_buffer_->GetPacket(last_seq_num);
   RTC_CHECK(last_packet);
diff --git a/modules/video_coding/frame_object.h b/modules/video_coding/frame_object.h
index 1ba99cb..3ad356f 100644
--- a/modules/video_coding/frame_object.h
+++ b/modules/video_coding/frame_object.h
@@ -29,7 +29,8 @@
                  size_t frame_size,
                  int times_nacked,
                  int64_t first_packet_received_time,
-                 int64_t last_packet_received_time);
+                 int64_t last_packet_received_time,
+                 RtpPacketInfos packet_infos);
 
   ~RtpFrameObject() override;
   uint16_t first_seq_num() const;
diff --git a/modules/video_coding/generic_decoder.cc b/modules/video_coding/generic_decoder.cc
index cb6c819..ab83119 100644
--- a/modules/video_coding/generic_decoder.cc
+++ b/modules/video_coding/generic_decoder.cc
@@ -84,6 +84,7 @@
   if (frameInfo->color_space) {
     decodedImage.set_color_space(frameInfo->color_space);
   }
+  decodedImage.set_packet_infos(frameInfo->packet_infos);
   decodedImage.set_rotation(frameInfo->rotation);
 
   const int64_t now_ms = _clock->TimeInMilliseconds();
@@ -211,6 +212,7 @@
   } else {
     _frameInfos[_nextFrameInfoIdx].color_space = absl::nullopt;
   }
+  _frameInfos[_nextFrameInfoIdx].packet_infos = frame.PacketInfos();
 
   // Set correctly only for key frames. Thus, use latest key frame
   // content type. If the corresponding key frame was lost, decode will fail
diff --git a/modules/video_coding/generic_decoder.h b/modules/video_coding/generic_decoder.h
index 97336b1..50d7dba 100644
--- a/modules/video_coding/generic_decoder.h
+++ b/modules/video_coding/generic_decoder.h
@@ -36,6 +36,7 @@
   EncodedImage::Timing timing;
   int64_t ntp_time_ms;
   absl::optional<ColorSpace> color_space;
+  RtpPacketInfos packet_infos;
 };
 
 class VCMDecodedFrameCallback : public DecodedImageCallback {
diff --git a/modules/video_coding/generic_decoder_unittest.cc b/modules/video_coding/generic_decoder_unittest.cc
index adc945c..691561d 100644
--- a/modules/video_coding/generic_decoder_unittest.cc
+++ b/modules/video_coding/generic_decoder_unittest.cc
@@ -122,5 +122,31 @@
   EXPECT_EQ(*decoded_color_space, color_space);
 }
 
+TEST_F(GenericDecoderTest, PassesPacketInfos) {
+  RtpPacketInfos packet_infos = CreatePacketInfos(3);
+  VCMEncodedFrame encoded_frame;
+  encoded_frame.SetPacketInfos(packet_infos);
+  generic_decoder_.Decode(encoded_frame, clock_.TimeInMilliseconds());
+  absl::optional<VideoFrame> decoded_frame = user_callback_.WaitForFrame(10);
+  ASSERT_TRUE(decoded_frame.has_value());
+  EXPECT_EQ(decoded_frame->packet_infos().size(), 3U);
+}
+
+TEST_F(GenericDecoderTest, PassesPacketInfosForDelayedDecoders) {
+  RtpPacketInfos packet_infos = CreatePacketInfos(3);
+  decoder_.SetDelayedDecoding(100);
+
+  {
+    // Ensure the original frame is destroyed before the decoding is completed.
+    VCMEncodedFrame encoded_frame;
+    encoded_frame.SetPacketInfos(packet_infos);
+    generic_decoder_.Decode(encoded_frame, clock_.TimeInMilliseconds());
+  }
+
+  absl::optional<VideoFrame> decoded_frame = user_callback_.WaitForFrame(200);
+  ASSERT_TRUE(decoded_frame.has_value());
+  EXPECT_EQ(decoded_frame->packet_infos().size(), 3U);
+}
+
 }  // namespace video_coding
 }  // namespace webrtc
diff --git a/modules/video_coding/jitter_buffer_unittest.cc b/modules/video_coding/jitter_buffer_unittest.cc
index 53eba63..315e5f9 100644
--- a/modules/video_coding/jitter_buffer_unittest.cc
+++ b/modules/video_coding/jitter_buffer_unittest.cc
@@ -67,7 +67,8 @@
     video_header.is_first_packet_in_frame = true;
     video_header.frame_type = VideoFrameType::kVideoFrameDelta;
     packet_.reset(new VCMPacket(data_, size_, rtp_header, video_header,
-                                /*ntp_time_ms=*/0));
+                                /*ntp_time_ms=*/0,
+                                clock_->TimeInMilliseconds()));
   }
 
   VCMEncodedFrame* DecodeCompleteFrame() {
@@ -542,7 +543,7 @@
   video_header.codec = kVideoCodecGeneric;
   video_header.frame_type = VideoFrameType::kEmptyFrame;
   VCMPacket empty_packet(data_, 0, rtp_header, video_header,
-                         /*ntp_time_ms=*/0);
+                         /*ntp_time_ms=*/0, clock_->TimeInMilliseconds());
   EXPECT_EQ(kOldPacket,
             jitter_buffer_->InsertPacket(empty_packet, &retransmitted));
   empty_packet.seqNum += 1;
diff --git a/modules/video_coding/packet.cc b/modules/video_coding/packet.cc
index 46df82a..0c4a658 100644
--- a/modules/video_coding/packet.cc
+++ b/modules/video_coding/packet.cc
@@ -25,8 +25,7 @@
       timesNacked(-1),
       completeNALU(kNaluUnset),
       insertStartCode(false),
-      video_header(),
-      receive_time_ms(0) {
+      video_header() {
   video_header.playout_delay = {-1, -1};
 }
 
@@ -34,7 +33,8 @@
                      size_t size,
                      const RTPHeader& rtp_header,
                      const RTPVideoHeader& videoHeader,
-                     int64_t ntp_time_ms)
+                     int64_t ntp_time_ms,
+                     int64_t receive_time_ms)
     : payloadType(rtp_header.payloadType),
       timestamp(rtp_header.timestamp),
       ntp_time_ms_(ntp_time_ms),
@@ -46,7 +46,8 @@
       completeNALU(kNaluIncomplete),
       insertStartCode(videoHeader.codec == kVideoCodecH264 &&
                       videoHeader.is_first_packet_in_frame),
-      video_header(videoHeader) {
+      video_header(videoHeader),
+      packet_info(rtp_header, receive_time_ms) {
   if (is_first_packet_in_frame() && markerBit) {
     completeNALU = kNaluComplete;
   } else if (is_first_packet_in_frame()) {
diff --git a/modules/video_coding/packet.h b/modules/video_coding/packet.h
index 3f22845..f157e10 100644
--- a/modules/video_coding/packet.h
+++ b/modules/video_coding/packet.h
@@ -16,6 +16,7 @@
 
 #include "absl/types/optional.h"
 #include "api/rtp_headers.h"
+#include "api/rtp_packet_info.h"
 #include "api/video/video_frame_type.h"
 #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
 #include "modules/rtp_rtcp/source/rtp_video_header.h"
@@ -39,7 +40,8 @@
             size_t size,
             const RTPHeader& rtp_header,
             const RTPVideoHeader& video_header,
-            int64_t ntp_time_ms);
+            int64_t ntp_time_ms,
+            int64_t receive_time_ms);
 
   ~VCMPacket();
 
@@ -70,7 +72,7 @@
   RTPVideoHeader video_header;
   absl::optional<RtpGenericFrameDescriptor> generic_descriptor;
 
-  int64_t receive_time_ms;
+  RtpPacketInfo packet_info;
 };
 
 }  // namespace webrtc
diff --git a/modules/video_coding/packet_buffer.cc b/modules/video_coding/packet_buffer.cc
index bd1ab03..e6469b1 100644
--- a/modules/video_coding/packet_buffer.cc
+++ b/modules/video_coding/packet_buffer.cc
@@ -286,8 +286,9 @@
       size_t frame_size = 0;
       int max_nack_count = -1;
       uint16_t start_seq_num = seq_num;
-      int64_t min_recv_time = data_buffer_[index].receive_time_ms;
-      int64_t max_recv_time = data_buffer_[index].receive_time_ms;
+      int64_t min_recv_time = data_buffer_[index].packet_info.receive_time_ms();
+      int64_t max_recv_time = data_buffer_[index].packet_info.receive_time_ms();
+      RtpPacketInfos::vector_type packet_infos;
 
       // Find the start index by searching backward until the packet with
       // the |frame_begin| flag is set.
@@ -310,9 +311,16 @@
         sequence_buffer_[start_index].frame_created = true;
 
         min_recv_time =
-            std::min(min_recv_time, data_buffer_[start_index].receive_time_ms);
+            std::min(min_recv_time,
+                     data_buffer_[start_index].packet_info.receive_time_ms());
         max_recv_time =
-            std::max(max_recv_time, data_buffer_[start_index].receive_time_ms);
+            std::max(max_recv_time,
+                     data_buffer_[start_index].packet_info.receive_time_ms());
+
+        // Should use |push_front()| since the loop traverses backwards. But
+        // it's too inefficient to do so on a vector so we'll instead fix the
+        // order afterwards.
+        packet_infos.push_back(data_buffer_[start_index].packet_info);
 
         if (!is_h264 && sequence_buffer_[start_index].frame_begin)
           break;
@@ -359,6 +367,9 @@
         --start_seq_num;
       }
 
+      // Fix the order since the packet-finding loop traverses backwards.
+      std::reverse(packet_infos.begin(), packet_infos.end());
+
       if (is_h264) {
         // Warn if this is an unsafe frame.
         if (has_h264_idr && (!has_h264_sps || !has_h264_pps)) {
@@ -406,7 +417,8 @@
 
       found_frames.emplace_back(
           new RtpFrameObject(this, start_seq_num, seq_num, frame_size,
-                             max_nack_count, min_recv_time, max_recv_time));
+                             max_nack_count, min_recv_time, max_recv_time,
+                             RtpPacketInfos(std::move(packet_infos))));
     }
     ++seq_num;
   }
diff --git a/modules/video_coding/rtp_frame_reference_finder_unittest.cc b/modules/video_coding/rtp_frame_reference_finder_unittest.cc
index aba807e..83945d0 100644
--- a/modules/video_coding/rtp_frame_reference_finder_unittest.cc
+++ b/modules/video_coding/rtp_frame_reference_finder_unittest.cc
@@ -92,7 +92,7 @@
     ref_packet_buffer_->InsertPacket(&packet);
 
     std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
     reference_finder_->ManageFrame(std::move(frame));
   }
 
@@ -126,7 +126,7 @@
     }
 
     std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
     reference_finder_->ManageFrame(std::move(frame));
   }
 
@@ -172,7 +172,7 @@
     }
 
     std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
     reference_finder_->ManageFrame(std::move(frame));
   }
 
@@ -213,7 +213,7 @@
     }
 
     std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
     reference_finder_->ManageFrame(std::move(frame));
   }
 
@@ -243,7 +243,7 @@
     }
 
     std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
     reference_finder_->ManageFrame(std::move(frame));
   }
 
diff --git a/modules/video_coding/video_receiver.cc b/modules/video_coding/video_receiver.cc
index 8f9e849..2ef06ff 100644
--- a/modules/video_coding/video_receiver.cc
+++ b/modules/video_coding/video_receiver.cc
@@ -338,7 +338,8 @@
   }
   // Callers don't provide any ntp time.
   const VCMPacket packet(incomingPayload, payloadLength, rtp_header,
-                         video_header, /*ntp_time_ms=*/0);
+                         video_header, /*ntp_time_ms=*/0,
+                         clock_->TimeInMilliseconds());
   int32_t ret = _receiver.InsertPacket(packet);
 
   // TODO(holmer): Investigate if this somehow should use the key frame
diff --git a/test/fuzzers/BUILD.gn b/test/fuzzers/BUILD.gn
index f1d8a05..06b96e8 100644
--- a/test/fuzzers/BUILD.gn
+++ b/test/fuzzers/BUILD.gn
@@ -551,6 +551,7 @@
     "rtp_frame_reference_finder_fuzzer.cc",
   ]
   deps = [
+    "../../api:rtp_packet_info",
     "../../api:scoped_refptr",
     "../../modules/video_coding/",
     "../../system_wrappers",
diff --git a/test/fuzzers/rtp_frame_reference_finder_fuzzer.cc b/test/fuzzers/rtp_frame_reference_finder_fuzzer.cc
index 7eae896..4e1e9ce 100644
--- a/test/fuzzers/rtp_frame_reference_finder_fuzzer.cc
+++ b/test/fuzzers/rtp_frame_reference_finder_fuzzer.cc
@@ -11,6 +11,7 @@
 #include "modules/video_coding/rtp_frame_reference_finder.h"
 
 #include "absl/memory/memory.h"
+#include "api/rtp_packet_infos.h"
 #include "modules/video_coding/frame_object.h"
 #include "modules/video_coding/packet_buffer.h"
 #include "system_wrappers/include/clock.h"
@@ -124,7 +125,8 @@
 
   while (reader.MoreToRead()) {
     auto frame = absl::make_unique<video_coding::RtpFrameObject>(
-        pb, reader.GetNum<uint16_t>(), reader.GetNum<uint16_t>(), 0, 0, 0, 0);
+        pb, reader.GetNum<uint16_t>(), reader.GetNum<uint16_t>(), 0, 0, 0, 0,
+        RtpPacketInfos());
     reference_finder.ManageFrame(std::move(frame));
   }
 }
diff --git a/video/buffered_frame_decryptor_unittest.cc b/video/buffered_frame_decryptor_unittest.cc
index 4506dcf..697bc3d 100644
--- a/video/buffered_frame_decryptor_unittest.cc
+++ b/video/buffered_frame_decryptor_unittest.cc
@@ -107,7 +107,7 @@
 
     return std::unique_ptr<video_coding::RtpFrameObject>(
         new video_coding::RtpFrameObject(fake_packet_buffer_.get(), seq_num_,
-                                         seq_num_, 0, 0, 0, 0));
+                                         seq_num_, 0, 0, 0, 0, {}));
   }
 
  protected:
diff --git a/video/frame_encode_metadata_writer.cc b/video/frame_encode_metadata_writer.cc
index d2cbf7d..4ed90db 100644
--- a/video/frame_encode_metadata_writer.cc
+++ b/video/frame_encode_metadata_writer.cc
@@ -97,6 +97,7 @@
   metadata.timestamp_us = frame.timestamp_us();
   metadata.rotation = frame.rotation();
   metadata.color_space = frame.color_space();
+  metadata.packet_infos = frame.packet_infos();
   for (size_t si = 0; si < num_spatial_layers; ++si) {
     RTC_DCHECK(timing_frames_info_[si].frames.empty() ||
                rtc::TimeDiff(
@@ -278,6 +279,7 @@
       encoded_image->ntp_time_ms_ = metadata_list->front().ntp_time_ms;
       encoded_image->rotation_ = metadata_list->front().rotation;
       encoded_image->SetColorSpace(metadata_list->front().color_space);
+      encoded_image->SetPacketInfos(metadata_list->front().packet_infos);
       metadata_list->pop_front();
     } else {
       ++reordered_frames_logged_messages_;
diff --git a/video/frame_encode_metadata_writer.h b/video/frame_encode_metadata_writer.h
index 467c859..4ee2d7e 100644
--- a/video/frame_encode_metadata_writer.h
+++ b/video/frame_encode_metadata_writer.h
@@ -60,6 +60,7 @@
     int64_t timestamp_us = 0;
     VideoRotation rotation = kVideoRotation_0;
     absl::optional<ColorSpace> color_space;
+    RtpPacketInfos packet_infos;
   };
   struct TimingFramesLayerInfo {
     TimingFramesLayerInfo();
diff --git a/video/frame_encode_metadata_writer_unittest.cc b/video/frame_encode_metadata_writer_unittest.cc
index 96df534..2f74599 100644
--- a/video/frame_encode_metadata_writer_unittest.cc
+++ b/video/frame_encode_metadata_writer_unittest.cc
@@ -435,6 +435,31 @@
   EXPECT_EQ(color_space, *image.ColorSpace());
 }
 
+TEST(FrameEncodeMetadataWriterTest, CopiesPacketInfos) {
+  EncodedImage image;
+  const int64_t kTimestampMs = 123456;
+  FakeEncodedImageCallback sink;
+
+  FrameEncodeMetadataWriter encode_timer(&sink);
+  encode_timer.OnEncoderInit(VideoCodec(), false);
+  // Any non-zero bitrate needed to be set before the first frame.
+  VideoBitrateAllocation bitrate_allocation;
+  bitrate_allocation.SetBitrate(0, 0, 500000);
+  encode_timer.OnSetRates(bitrate_allocation, 30);
+
+  RtpPacketInfos packet_infos = CreatePacketInfos(3);
+  image.SetTimestamp(static_cast<uint32_t>(kTimestampMs * 90));
+  VideoFrame frame = VideoFrame::Builder()
+                         .set_timestamp_ms(kTimestampMs)
+                         .set_timestamp_rtp(kTimestampMs * 90)
+                         .set_packet_infos(packet_infos)
+                         .set_video_frame_buffer(kFrameBuffer)
+                         .build();
+  encode_timer.OnEncodeStarted(frame);
+  encode_timer.FillTimingInfo(0, &image);
+  EXPECT_EQ(image.PacketInfos().size(), 3U);
+}
+
 TEST(FrameEncodeMetadataWriterTest, DoesNotRewriteBitstreamWithoutCodecInfo) {
   uint8_t buffer[] = {1, 2, 3};
   EncodedImage image(buffer, sizeof(buffer), sizeof(buffer));
diff --git a/video/rtp_video_stream_receiver.cc b/video/rtp_video_stream_receiver.cc
index aae1f09..ea0f663 100644
--- a/video/rtp_video_stream_receiver.cc
+++ b/video/rtp_video_stream_receiver.cc
@@ -327,7 +327,8 @@
     const absl::optional<RtpGenericFrameDescriptor>& generic_descriptor,
     bool is_recovered) {
   VCMPacket packet(payload_data, payload_size, rtp_header, video_header,
-                   ntp_estimator_.Estimate(rtp_header.timestamp));
+                   ntp_estimator_.Estimate(rtp_header.timestamp),
+                   clock_->TimeInMilliseconds());
   packet.generic_descriptor = generic_descriptor;
 
   if (loss_notification_controller_) {
@@ -350,7 +351,6 @@
   } else {
     packet.timesNacked = -1;
   }
-  packet.receive_time_ms = clock_->TimeInMilliseconds();
 
   if (packet.sizeBytes == 0) {
     NotifyReceiverOfEmptyPacket(packet.seqNum);
diff --git a/video/rtp_video_stream_receiver_unittest.cc b/video/rtp_video_stream_receiver_unittest.cc
index 24a49f0..d30a698 100644
--- a/video/rtp_video_stream_receiver_unittest.cc
+++ b/video/rtp_video_stream_receiver_unittest.cc
@@ -36,6 +36,7 @@
 
 using ::testing::_;
 using ::testing::Invoke;
+using ::testing::SizeIs;
 using ::testing::Values;
 
 namespace webrtc {
@@ -721,6 +722,7 @@
         EXPECT_EQ(frame->references[0], frame->id.picture_id - 90);
         EXPECT_EQ(frame->references[1], frame->id.picture_id - 80);
         EXPECT_EQ(frame->id.spatial_layer, kSpatialIndex);
+        EXPECT_THAT(frame->PacketInfos(), SizeIs(1));
       }));
 
   rtp_video_stream_receiver_->OnRtpPacket(rtp_packet);
@@ -785,6 +787,7 @@
         EXPECT_EQ(frame->id.spatial_layer, kSpatialIndex);
         EXPECT_EQ(frame->EncodedImage()._encodedWidth, 480u);
         EXPECT_EQ(frame->EncodedImage()._encodedHeight, 360u);
+        EXPECT_THAT(frame->PacketInfos(), SizeIs(2));
       }));
 
   rtp_video_stream_receiver_->OnRtpPacket(second_packet);
diff --git a/video/video_receive_stream_unittest.cc b/video/video_receive_stream_unittest.cc
index fe4c292..d8a845e 100644
--- a/video/video_receive_stream_unittest.cc
+++ b/video/video_receive_stream_unittest.cc
@@ -309,4 +309,18 @@
   EXPECT_EQ(color_space, *fake_renderer_.color_space());
 }
 
+TEST_F(VideoReceiveStreamTestWithFakeDecoder, PassesPacketInfos) {
+  auto test_frame = absl::make_unique<FrameObjectFake>();
+  test_frame->SetPayloadType(99);
+  test_frame->id.picture_id = 0;
+  RtpPacketInfos packet_infos = CreatePacketInfos(3);
+  test_frame->SetPacketInfos(packet_infos);
+
+  video_receive_stream_->Start();
+  video_receive_stream_->OnCompleteFrame(std::move(test_frame));
+  EXPECT_TRUE(fake_renderer_.WaitForRenderedFrame(kDefaultTimeOutMs));
+
+  EXPECT_EQ(fake_renderer_.packet_infos().size(), 3U);
+}
+
 }  // namespace webrtc