Propagate Video CSRCs modified by an insertable streams frame transform

Allow CSRCs to be modified per-frame in an Encoded Insertable Streams
transform, to support a web API which allows per-frame CSRC
modifications to signal when a JS application has changed the source
of the video which is written into an encoded frame.

Initially only for Video, with Audio support likely to follow later.

Bug: webrtc:14709
Change-Id: Ib34f35faa9cee56216b30eaae42d7e65c78bb9f2
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/291324
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Reviewed-by: Tove Petersson <tovep@google.com>
Reviewed-by: Henrik Boström <hbos@webrtc.org>
Commit-Queue: Tony Herre <herre@google.com>
Cr-Commit-Position: refs/heads/main@{#39214}
diff --git a/api/video/video_frame_metadata.cc b/api/video/video_frame_metadata.cc
index e1863e9..6efb6ad 100644
--- a/api/video/video_frame_metadata.cc
+++ b/api/video/video_frame_metadata.cc
@@ -136,4 +136,12 @@
   codec_specifics_ = std::move(codec_specifics);
 }
 
+std::vector<uint32_t> VideoFrameMetadata::GetCsrcs() const {
+  return csrcs_;
+}
+
+void VideoFrameMetadata::SetCsrcs(std::vector<uint32_t> csrcs) {
+  csrcs_ = std::move(csrcs);
+}
+
 }  // namespace webrtc
diff --git a/api/video/video_frame_metadata.h b/api/video/video_frame_metadata.h
index 2703f11..f8f144e 100644
--- a/api/video/video_frame_metadata.h
+++ b/api/video/video_frame_metadata.h
@@ -12,6 +12,7 @@
 #define API_VIDEO_VIDEO_FRAME_METADATA_H_
 
 #include <cstdint>
+#include <vector>
 
 #include "absl/container/inlined_vector.h"
 #include "absl/types/optional.h"
@@ -88,6 +89,9 @@
   void SetRTPVideoHeaderCodecSpecifics(
       RTPVideoHeaderCodecSpecifics codec_specifics);
 
+  std::vector<uint32_t> GetCsrcs() const;
+  void SetCsrcs(std::vector<uint32_t> csrcs);
+
  private:
   VideoFrameType frame_type_ = VideoFrameType::kEmptyFrame;
   int16_t width_ = 0;
@@ -106,6 +110,8 @@
   uint8_t simulcast_idx_ = 0;
   VideoCodecType codec_ = VideoCodecType::kVideoCodecGeneric;
   RTPVideoHeaderCodecSpecifics codec_specifics_;
+
+  std::vector<uint32_t> csrcs_;
 };
 }  // namespace webrtc
 
diff --git a/modules/rtp_rtcp/source/frame_transformer_factory_unittest.cc b/modules/rtp_rtcp/source/frame_transformer_factory_unittest.cc
index 65a0e4c..1a78c6d 100644
--- a/modules/rtp_rtcp/source/frame_transformer_factory_unittest.cc
+++ b/modules/rtp_rtcp/source/frame_transformer_factory_unittest.cc
@@ -29,6 +29,7 @@
 
 using testing::NiceMock;
 using testing::Return;
+using testing::ReturnRef;
 
 class MockTransformableVideoFrame
     : public webrtc::TransformableVideoFrameInterface {
@@ -60,9 +61,18 @@
   std::fill_n(data, 10, 5);
   rtc::ArrayView<uint8_t> data_view(data);
   EXPECT_CALL(original_frame, GetData()).WillRepeatedly(Return(data_view));
+  webrtc::VideoFrameMetadata metadata;
+  std::vector<uint32_t> csrcs{123, 321};
+  // Copy csrcs rather than moving so we can compare in an EXPECT_EQ later.
+  metadata.SetCsrcs(csrcs);
+
+  EXPECT_CALL(original_frame, GetMetadata())
+      .WillRepeatedly(ReturnRef(metadata));
   auto cloned_frame = CloneVideoFrame(&original_frame);
+
   EXPECT_EQ(cloned_frame->GetData().size(), 10u);
   EXPECT_THAT(cloned_frame->GetData(), testing::Each(5u));
+  EXPECT_EQ(cloned_frame->GetMetadata().GetCsrcs(), csrcs);
 }
 
 }  // namespace
diff --git a/modules/rtp_rtcp/source/nack_rtx_unittest.cc b/modules/rtp_rtcp/source/nack_rtx_unittest.cc
index d0617f3..87c6e66 100644
--- a/modules/rtp_rtcp/source/nack_rtx_unittest.cc
+++ b/modules/rtp_rtcp/source/nack_rtx_unittest.cc
@@ -210,7 +210,7 @@
       video_header.frame_type = VideoFrameType::kVideoFrameDelta;
       EXPECT_TRUE(rtp_sender_video_->SendVideo(
           kPayloadType, VideoCodecType::kVideoCodecGeneric, timestamp,
-          timestamp / 90, payload_data, video_header, 0));
+          timestamp / 90, payload_data, video_header, 0, {}));
       // Min required delay until retransmit = 5 + RTT ms (RTT = 0).
       fake_clock.AdvanceTimeMilliseconds(5);
       int length = BuildNackList(nack_list);
@@ -260,7 +260,7 @@
     video_header.frame_type = VideoFrameType::kVideoFrameDelta;
     EXPECT_TRUE(rtp_sender_video_->SendVideo(
         kPayloadType, VideoCodecType::kVideoCodecGeneric, timestamp,
-        timestamp / 90, payload_data, video_header, 0));
+        timestamp / 90, payload_data, video_header, 0, {}));
     // Prepare next frame.
     timestamp += 3000;
     fake_clock.AdvanceTimeMilliseconds(33);
diff --git a/modules/rtp_rtcp/source/rtp_rtcp_impl2_unittest.cc b/modules/rtp_rtcp/source/rtp_rtcp_impl2_unittest.cc
index 918e075..b793ba8 100644
--- a/modules/rtp_rtcp/source/rtp_rtcp_impl2_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_rtcp_impl2_unittest.cc
@@ -358,7 +358,7 @@
 
     success &= sender->SendVideo(kPayloadType, VideoCodecType::kVideoCodecVP8,
                                  rtp_timestamp, capture_time_ms, payload,
-                                 rtp_video_header, 0);
+                                 rtp_video_header, 0, {});
     return success;
   }
 
diff --git a/modules/rtp_rtcp/source/rtp_rtcp_impl_unittest.cc b/modules/rtp_rtcp/source/rtp_rtcp_impl_unittest.cc
index 96bef23a..4c621d7 100644
--- a/modules/rtp_rtcp/source/rtp_rtcp_impl_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_rtcp_impl_unittest.cc
@@ -228,7 +228,7 @@
     const uint8_t payload[100] = {0};
     EXPECT_TRUE(module->impl_->OnSendingRtpFrame(0, 0, kPayloadType, true));
     EXPECT_TRUE(sender->SendVideo(kPayloadType, VideoCodecType::kVideoCodecVP8,
-                                  0, 0, payload, rtp_video_header, 0));
+                                  0, 0, payload, rtp_video_header, 0, {}));
   }
 
   void IncomingRtcpNack(const RtpRtcpModule* module, uint16_t sequence_number) {
diff --git a/modules/rtp_rtcp/source/rtp_sender.cc b/modules/rtp_rtcp/source/rtp_sender.cc
index ac3bd55..9b977fa 100644
--- a/modules/rtp_rtcp/source/rtp_sender.cc
+++ b/modules/rtp_rtcp/source/rtp_sender.cc
@@ -521,6 +521,7 @@
       &rtp_header_extension_map_, max_packet_size_ + kExtraCapacity);
   packet->SetSsrc(ssrc_);
   packet->SetCsrcs(csrcs_);
+
   // Reserve extensions, if registered, RtpSender set in SendToNetwork.
   packet->ReserveExtension<AbsoluteSendTime>();
   packet->ReserveExtension<TransmissionOffset>();
@@ -582,6 +583,11 @@
   UpdateHeaderSizes();
 }
 
+std::vector<uint32_t> RTPSender::Csrcs() const {
+  MutexLock lock(&send_mutex_);
+  return csrcs_;
+}
+
 void RTPSender::SetCsrcs(const std::vector<uint32_t>& csrcs) {
   RTC_DCHECK_LE(csrcs.size(), kRtpCsrcSize);
   MutexLock lock(&send_mutex_);
diff --git a/modules/rtp_rtcp/source/rtp_sender.h b/modules/rtp_rtcp/source/rtp_sender.h
index 55dee7f..c45f597 100644
--- a/modules/rtp_rtcp/source/rtp_sender.h
+++ b/modules/rtp_rtcp/source/rtp_sender.h
@@ -63,6 +63,7 @@
   uint16_t SequenceNumber() const RTC_LOCKS_EXCLUDED(send_mutex_);
   void SetSequenceNumber(uint16_t seq) RTC_LOCKS_EXCLUDED(send_mutex_);
 
+  std::vector<uint32_t> Csrcs() const;
   void SetCsrcs(const std::vector<uint32_t>& csrcs)
       RTC_LOCKS_EXCLUDED(send_mutex_);
 
diff --git a/modules/rtp_rtcp/source/rtp_sender_unittest.cc b/modules/rtp_rtcp/source/rtp_sender_unittest.cc
index 432c5e7..ff6a372 100644
--- a/modules/rtp_rtcp/source/rtp_sender_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_unittest.cc
@@ -1348,7 +1348,7 @@
     EXPECT_TRUE(rtp_sender_video.SendVideo(
         kPayloadType, kCodecType,
         capture_time_ms * kCaptureTimeMsToRtpTimestamp, capture_time_ms,
-        kPayloadData, video_header, kDefaultExpectedRetransmissionTimeMs));
+        kPayloadData, video_header, kDefaultExpectedRetransmissionTimeMs, {}));
 
     time_controller_.AdvanceTime(TimeDelta::Millis(33));
   }
@@ -1364,7 +1364,7 @@
     EXPECT_TRUE(rtp_sender_video.SendVideo(
         kPayloadType, kCodecType,
         capture_time_ms * kCaptureTimeMsToRtpTimestamp, capture_time_ms,
-        kPayloadData, video_header, kDefaultExpectedRetransmissionTimeMs));
+        kPayloadData, video_header, kDefaultExpectedRetransmissionTimeMs, {}));
 
     time_controller_.AdvanceTime(TimeDelta::Millis(33));
   }
diff --git a/modules/rtp_rtcp/source/rtp_sender_video.cc b/modules/rtp_rtcp/source/rtp_sender_video.cc
index e1ac4e4..e1f1697 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_video.cc
@@ -171,6 +171,7 @@
                     this,
                     config.frame_transformer,
                     rtp_sender_->SSRC(),
+                    rtp_sender_->Csrcs(),
                     config.task_queue_factory)
               : nullptr),
       include_capture_clock_offset_(!absl::StartsWith(
@@ -474,7 +475,8 @@
     int64_t capture_time_ms,
     rtc::ArrayView<const uint8_t> payload,
     RTPVideoHeader video_header,
-    absl::optional<int64_t> expected_retransmission_time_ms) {
+    absl::optional<int64_t> expected_retransmission_time_ms,
+    std::vector<uint32_t> csrcs) {
   TRACE_EVENT_ASYNC_STEP1("webrtc", "Video", capture_time_ms, "Send", "type",
                           FrameTypeToString(video_header.frame_type));
   RTC_CHECK_RUNS_SERIALIZED(&send_checker_);
@@ -484,6 +486,7 @@
 
   if (payload.empty())
     return false;
+
   if (!rtp_sender_->SendingMedia()) {
     return false;
   }
@@ -530,6 +533,8 @@
     capture_time = Timestamp::Millis(capture_time_ms);
   }
 
+  rtp_sender_->SetCsrcs(std::move(csrcs));
+
   std::unique_ptr<RtpPacketToSend> single_packet =
       rtp_sender_->AllocatePacket();
   RTC_DCHECK_LE(packet_capacity, single_packet->capacity());
@@ -778,7 +783,7 @@
   }
   return SendVideo(payload_type, codec_type, rtp_timestamp,
                    encoded_image.capture_time_ms_, encoded_image, video_header,
-                   expected_retransmission_time_ms);
+                   expected_retransmission_time_ms, rtp_sender_->Csrcs());
 }
 
 uint32_t RTPSenderVideo::PacketizationOverheadBps() const {
diff --git a/modules/rtp_rtcp/source/rtp_sender_video.h b/modules/rtp_rtcp/source/rtp_sender_video.h
index ecff8d4..825209f2 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video.h
+++ b/modules/rtp_rtcp/source/rtp_sender_video.h
@@ -98,7 +98,8 @@
                  int64_t capture_time_ms,
                  rtc::ArrayView<const uint8_t> payload,
                  RTPVideoHeader video_header,
-                 absl::optional<int64_t> expected_retransmission_time_ms);
+                 absl::optional<int64_t> expected_retransmission_time_ms,
+                 std::vector<uint32_t> csrcs = {});
 
   bool SendEncodedImage(
       int payload_type,
diff --git a/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc b/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc
index 27b6a17..ecf8aed 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc
@@ -31,7 +31,8 @@
       absl::optional<VideoCodecType> codec_type,
       uint32_t rtp_timestamp,
       absl::optional<int64_t> expected_retransmission_time_ms,
-      uint32_t ssrc)
+      uint32_t ssrc,
+      std::vector<uint32_t> csrcs)
       : encoded_data_(encoded_image.GetEncodedData()),
         header_(video_header),
         metadata_(header_.GetAsMetadata()),
@@ -44,6 +45,7 @@
         ssrc_(ssrc) {
     RTC_DCHECK_GE(payload_type_, 0);
     RTC_DCHECK_LE(payload_type_, 127);
+    metadata_.SetCsrcs(std::move(csrcs));
   }
 
   ~TransformableVideoSenderFrame() override = default;
@@ -71,9 +73,12 @@
   const VideoFrameMetadata& GetMetadata() const override { return metadata_; }
   void SetMetadata(const VideoFrameMetadata& metadata) override {
     header_.SetFromMetadata(metadata);
+    std::vector<uint32_t> csrcs = metadata.GetCsrcs();
+
     // We have to keep a local copy because GetMetadata() has to return a
     // reference.
     metadata_ = header_.GetAsMetadata();
+    metadata_.SetCsrcs(std::move(csrcs));
   }
 
   const RTPVideoHeader& GetHeader() const { return header_; }
@@ -109,10 +114,12 @@
     RTPSenderVideo* sender,
     rtc::scoped_refptr<FrameTransformerInterface> frame_transformer,
     uint32_t ssrc,
+    std::vector<uint32_t> csrcs,
     TaskQueueFactory* task_queue_factory)
     : sender_(sender),
       frame_transformer_(std::move(frame_transformer)),
       ssrc_(ssrc),
+      csrcs_(csrcs),
       transformation_queue_(task_queue_factory->CreateTaskQueue(
           "video_frame_transformer",
           TaskQueueFactory::Priority::NORMAL)) {}
@@ -131,7 +138,7 @@
     absl::optional<int64_t> expected_retransmission_time_ms) {
   frame_transformer_->Transform(std::make_unique<TransformableVideoSenderFrame>(
       encoded_image, video_header, payload_type, codec_type, rtp_timestamp,
-      expected_retransmission_time_ms, ssrc_));
+      expected_retransmission_time_ms, ssrc_, csrcs_));
   return true;
 }
 
@@ -160,13 +167,14 @@
     return;
   auto* transformed_video_frame =
       static_cast<TransformableVideoSenderFrame*>(transformed_frame.get());
-  sender_->SendVideo(
-      transformed_video_frame->GetPayloadType(),
-      transformed_video_frame->GetCodecType(),
-      transformed_video_frame->GetTimestamp(),
-      transformed_video_frame->GetCaptureTimeMs(),
-      transformed_video_frame->GetData(), transformed_video_frame->GetHeader(),
-      transformed_video_frame->GetExpectedRetransmissionTimeMs());
+  sender_->SendVideo(transformed_video_frame->GetPayloadType(),
+                     transformed_video_frame->GetCodecType(),
+                     transformed_video_frame->GetTimestamp(),
+                     transformed_video_frame->GetCaptureTimeMs(),
+                     transformed_video_frame->GetData(),
+                     transformed_video_frame->GetHeader(),
+                     transformed_video_frame->GetExpectedRetransmissionTimeMs(),
+                     transformed_video_frame->GetMetadata().GetCsrcs());
 }
 
 void RTPSenderVideoFrameTransformerDelegate::SetVideoStructureUnderLock(
@@ -221,7 +229,7 @@
       encoded_image, new_header, original->GetPayloadType(), new_codec_type,
       original->GetTimestamp(),
       absl::nullopt,  // expected_retransmission_time_ms
-      original->GetSsrc());
+      original->GetSsrc(), original->GetMetadata().GetCsrcs());
 }
 
 }  // namespace webrtc
diff --git a/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.h b/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.h
index 55f7961..085f29b 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.h
+++ b/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.h
@@ -12,6 +12,7 @@
 #define MODULES_RTP_RTCP_SOURCE_RTP_SENDER_VIDEO_FRAME_TRANSFORMER_DELEGATE_H_
 
 #include <memory>
+#include <vector>
 
 #include "api/frame_transformer_interface.h"
 #include "api/scoped_refptr.h"
@@ -34,6 +35,7 @@
       RTPSenderVideo* sender,
       rtc::scoped_refptr<FrameTransformerInterface> frame_transformer,
       uint32_t ssrc,
+      std::vector<uint32_t> csrcs,
       TaskQueueFactory* send_transport_queue);
 
   void Init();
@@ -80,6 +82,7 @@
   RTPSenderVideo* sender_ RTC_GUARDED_BY(sender_lock_);
   rtc::scoped_refptr<FrameTransformerInterface> frame_transformer_;
   const uint32_t ssrc_;
+  std::vector<uint32_t> csrcs_;
   // Used when the encoded frames arrives without a current task queue. This can
   // happen if a hardware encoder was used.
   std::unique_ptr<TaskQueueBase, TaskQueueDeleter> transformation_queue_;
diff --git a/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc b/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc
index 72dfd02..0cfa24c 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc
@@ -215,7 +215,7 @@
   hdr.rotation = kVideoRotation_0;
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   VideoRotation rotation;
   EXPECT_TRUE(
@@ -242,7 +242,7 @@
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, kCaptureTimestamp,
                                kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   VideoSendTiming timing;
   EXPECT_TRUE(transport_.last_sent_packet().GetExtension<VideoTimingExtension>(
       &timing));
@@ -261,13 +261,13 @@
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   EXPECT_TRUE(
       rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                                   kDefaultExpectedRetransmissionTimeMs));
+                                   kDefaultExpectedRetransmissionTimeMs, {}));
 
   hdr.rotation = kVideoRotation_0;
   hdr.frame_type = VideoFrameType::kVideoFrameDelta;
-  EXPECT_TRUE(
-      rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp + 1, 0, kFrame,
-                                   hdr, kDefaultExpectedRetransmissionTimeMs));
+  EXPECT_TRUE(rtp_sender_video_->SendVideo(
+      kPayload, kType, kTimestamp + 1, 0, kFrame, hdr,
+      kDefaultExpectedRetransmissionTimeMs, {}));
 
   VideoRotation rotation;
   EXPECT_TRUE(
@@ -285,12 +285,12 @@
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   EXPECT_TRUE(
       rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                                   kDefaultExpectedRetransmissionTimeMs));
+                                   kDefaultExpectedRetransmissionTimeMs, {}));
 
   hdr.frame_type = VideoFrameType::kVideoFrameDelta;
-  EXPECT_TRUE(
-      rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp + 1, 0, kFrame,
-                                   hdr, kDefaultExpectedRetransmissionTimeMs));
+  EXPECT_TRUE(rtp_sender_video_->SendVideo(
+      kPayload, kType, kTimestamp + 1, 0, kFrame, hdr,
+      kDefaultExpectedRetransmissionTimeMs, {}));
 
   VideoRotation rotation;
   EXPECT_TRUE(
@@ -529,7 +529,7 @@
                                        DecodeTargetIndication::kSwitch};
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   ASSERT_EQ(transport_.packets_sent(), 1);
   DependencyDescriptor descriptor_key;
@@ -555,7 +555,7 @@
                                        DecodeTargetIndication::kRequired};
   hdr.frame_type = VideoFrameType::kVideoFrameDelta;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   EXPECT_EQ(transport_.packets_sent(), 2);
   DependencyDescriptor descriptor_delta;
@@ -604,7 +604,7 @@
                                        DecodeTargetIndication::kSwitch};
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   ASSERT_EQ(transport_.packets_sent(), 1);
   DependencyDescriptor descriptor_key;
@@ -620,7 +620,7 @@
                                        DecodeTargetIndication::kRequired};
   hdr.frame_type = VideoFrameType::kVideoFrameDelta;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   EXPECT_EQ(transport_.packets_sent(), 2);
   EXPECT_FALSE(transport_.last_sent_packet()
@@ -649,7 +649,7 @@
   generic.chain_diffs = {2};
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   ASSERT_EQ(transport_.packets_sent(), 1);
   DependencyDescriptor descriptor_key;
@@ -684,7 +684,7 @@
   generic.chain_diffs = {1};
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   ASSERT_EQ(transport_.packets_sent(), 1);
   DependencyDescriptor descriptor_key;
@@ -722,7 +722,7 @@
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SetVideoStructure(&video_structure1);
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   // Parse 1st extension.
   ASSERT_EQ(transport_.packets_sent(), 1);
   DependencyDescriptor descriptor_key1;
@@ -738,7 +738,7 @@
                                        DecodeTargetIndication::kNotPresent};
   hdr.frame_type = VideoFrameType::kVideoFrameDelta;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   ASSERT_EQ(transport_.packets_sent(), 2);
   RtpPacket delta_packet = transport_.last_sent_packet();
@@ -750,7 +750,7 @@
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SetVideoStructure(&video_structure2);
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   // Parse the 2nd key frame.
   ASSERT_EQ(transport_.packets_sent(), 3);
   DependencyDescriptor descriptor_key2;
@@ -804,7 +804,7 @@
   EXPECT_CALL(*encryptor,
               Encrypt(_, _, Not(IsEmpty()), ElementsAreArray(kFrame), _, _));
   rtp_sender_video.SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                             kDefaultExpectedRetransmissionTimeMs);
+                             kDefaultExpectedRetransmissionTimeMs, {});
   // Double check packet with the dependency descriptor is sent.
   ASSERT_EQ(transport_.packets_sent(), 1);
   EXPECT_TRUE(transport_.last_sent_packet()
@@ -826,7 +826,7 @@
   generic.dependencies.push_back(kFrameId - 500);
   hdr.frame_type = VideoFrameType::kVideoFrameDelta;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   RtpGenericFrameDescriptor descriptor_wire;
   EXPECT_EQ(1, transport_.packets_sent());
@@ -861,7 +861,7 @@
   hdr.frame_type = VideoFrameType::kVideoFrameDelta;
   rtp_sender_video_->SendVideo(kPayload, VideoCodecType::kVideoCodecVP8,
                                kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   ASSERT_EQ(transport_.packets_sent(), 1);
   // Expect only minimal 1-byte vp8 descriptor was generated.
@@ -898,7 +898,7 @@
   RTPVideoHeader hdr;
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   VideoLayersAllocation sent_allocation;
   EXPECT_TRUE(
@@ -908,7 +908,7 @@
 
   // Next key frame also have the allocation.
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   EXPECT_TRUE(
       transport_.last_sent_packet()
           .GetExtension<RtpVideoLayersAllocationExtension>(&sent_allocation));
@@ -935,21 +935,21 @@
   RTPVideoHeader hdr;
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   EXPECT_TRUE(transport_.last_sent_packet()
                   .HasExtension<RtpVideoLayersAllocationExtension>());
 
   // No allocation sent on delta frame unless it has been updated.
   hdr.frame_type = VideoFrameType::kVideoFrameDelta;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   EXPECT_FALSE(transport_.last_sent_packet()
                    .HasExtension<RtpVideoLayersAllocationExtension>());
 
   // Update the allocation.
   rtp_sender_video_->SetVideoLayersAllocation(allocation);
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   VideoLayersAllocation sent_allocation;
   EXPECT_TRUE(
@@ -984,7 +984,7 @@
   RTPVideoHeader hdr;
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   ASSERT_TRUE(transport_.last_sent_packet()
                   .HasExtension<RtpVideoLayersAllocationExtension>());
 
@@ -998,7 +998,7 @@
   rtp_sender_video_->SetVideoLayersAllocation(allocation);
   hdr.frame_type = VideoFrameType::kVideoFrameDelta;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   VideoLayersAllocation sent_allocation;
   EXPECT_TRUE(
@@ -1031,7 +1031,7 @@
   RTPVideoHeader hdr;
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   ASSERT_TRUE(transport_.last_sent_packet()
                   .HasExtension<RtpVideoLayersAllocationExtension>());
 
@@ -1040,7 +1040,7 @@
   rtp_sender_video_->SetVideoLayersAllocation(allocation);
   hdr.frame_type = VideoFrameType::kVideoFrameDelta;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   VideoLayersAllocation sent_allocation;
   EXPECT_TRUE(
@@ -1073,7 +1073,7 @@
   RTPVideoHeader hdr;
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   ASSERT_TRUE(transport_.last_sent_packet()
                   .HasExtension<RtpVideoLayersAllocationExtension>());
 
@@ -1082,7 +1082,7 @@
   rtp_sender_video_->SetVideoLayersAllocation(allocation);
   hdr.frame_type = VideoFrameType::kVideoFrameDelta;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   VideoLayersAllocation sent_allocation;
   EXPECT_TRUE(
@@ -1110,7 +1110,7 @@
   RTPVideoHeader hdr;
   hdr.frame_type = VideoFrameType::kVideoFrameDelta;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   VideoLayersAllocation sent_allocation;
   EXPECT_TRUE(
@@ -1120,14 +1120,14 @@
 
   // VideoLayersAllocation not sent on the next delta frame.
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   EXPECT_FALSE(transport_.last_sent_packet()
                    .HasExtension<RtpVideoLayersAllocationExtension>());
 
   // Update allocation. VideoLayesAllocation should be sent on the next frame.
   rtp_sender_video_->SetVideoLayersAllocation(allocation);
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   EXPECT_TRUE(
       transport_.last_sent_packet()
           .GetExtension<RtpVideoLayersAllocationExtension>(&sent_allocation));
@@ -1157,14 +1157,14 @@
   vp8_header.temporalIdx = 1;
 
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   EXPECT_FALSE(transport_.last_sent_packet()
                    .HasExtension<RtpVideoLayersAllocationExtension>());
 
   // Send a delta frame on tl0.
   vp8_header.temporalIdx = 0;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   EXPECT_TRUE(transport_.last_sent_packet()
                   .HasExtension<RtpVideoLayersAllocationExtension>());
 }
@@ -1179,7 +1179,7 @@
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp,
                                kAbsoluteCaptureTimestampMs, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   absl::optional<AbsoluteCaptureTime> absolute_capture_time;
 
@@ -1214,7 +1214,7 @@
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp,
                                /*capture_time_ms=*/0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   // No absolute capture time should be set as the capture_time_ms was the
   // default value.
   for (const RtpPacketReceived& packet : transport_.sent_packets()) {
@@ -1238,7 +1238,7 @@
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp,
                                kAbsoluteCaptureTimestampMs, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   absl::optional<AbsoluteCaptureTime> absolute_capture_time;
 
@@ -1276,7 +1276,7 @@
   hdr.absolute_capture_time = kAbsoluteCaptureTime;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp,
                                /*capture_time_ms=*/789, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
 
   absl::optional<AbsoluteCaptureTime> absolute_capture_time;
 
@@ -1311,7 +1311,7 @@
   vp8_header.temporalIdx = 0;
 
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   EXPECT_FALSE(
       transport_.last_sent_packet().HasExtension<PlayoutDelayLimits>());
 
@@ -1320,7 +1320,7 @@
   hdr.frame_type = VideoFrameType::kVideoFrameDelta;
   vp8_header.temporalIdx = 1;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   VideoPlayoutDelay received_delay = VideoPlayoutDelay();
   ASSERT_TRUE(transport_.last_sent_packet().GetExtension<PlayoutDelayLimits>(
       &received_delay));
@@ -1331,7 +1331,7 @@
   hdr.playout_delay = VideoPlayoutDelay();  // Indicates "no change".
   vp8_header.temporalIdx = 0;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   ASSERT_TRUE(transport_.last_sent_packet().GetExtension<PlayoutDelayLimits>(
       &received_delay));
   EXPECT_EQ(received_delay, kExpectedDelay);
@@ -1339,14 +1339,14 @@
   // The next frame does not need the extensions since it's delivery has
   // already been guaranteed.
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   EXPECT_FALSE(
       transport_.last_sent_packet().HasExtension<PlayoutDelayLimits>());
 
   // Insert key-frame, we need to refresh the state here.
   hdr.frame_type = VideoFrameType::kVideoFrameKey;
   rtp_sender_video_->SendVideo(kPayload, kType, kTimestamp, 0, kFrame, hdr,
-                               kDefaultExpectedRetransmissionTimeMs);
+                               kDefaultExpectedRetransmissionTimeMs, {});
   ASSERT_TRUE(transport_.last_sent_packet().GetExtension<PlayoutDelayLimits>(
       &received_delay));
   EXPECT_EQ(received_delay, kExpectedDelay);
@@ -1362,7 +1362,7 @@
   video_header.frame_type = VideoFrameType::kVideoFrameKey;
   ASSERT_TRUE(rtp_sender_video_->SendVideo(kPayloadType, kCodecType, 1234, 4321,
                                            kPayload, video_header,
-                                           absl::nullopt));
+                                           absl::nullopt, {}));
 
   rtc::ArrayView<const uint8_t> sent_payload =
       transport_.last_sent_packet().payload();
@@ -1376,7 +1376,7 @@
   video_header.frame_type = VideoFrameType::kVideoFrameDelta;
   ASSERT_TRUE(rtp_sender_video_->SendVideo(kPayloadType, kCodecType, 1234, 4321,
                                            kDeltaPayload, video_header,
-                                           absl::nullopt));
+                                           absl::nullopt, {}));
 
   sent_payload = sent_payload = transport_.last_sent_packet().payload();
   generic_header = sent_payload[0];
@@ -1394,7 +1394,7 @@
   video_header.frame_type = VideoFrameType::kVideoFrameKey;
   ASSERT_TRUE(rtp_sender_video_->SendVideo(kPayloadType, absl::nullopt, 1234,
                                            4321, kPayload, video_header,
-                                           absl::nullopt));
+                                           absl::nullopt, {}));
 
   rtc::ArrayView<const uint8_t> sent_payload =
       transport_.last_sent_packet().payload();