Update RTP timestamp based on capture timestamp when audio send stream is resumed.

This removes the previous approach where we continued to update the timestamp when the capturer is running but the send stream is stopped in favor of a more general approach that also works when the capturer is paused.

Some assumptions for this change to be correct: input sample rate and frame size will be the same before/after the stream is paused.

Bug: webrtc:12397
Change-Id: I3b03741cd6d3285cbc9aee3893800729852e6cfa
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/291526
Commit-Queue: Jakob Ivarsson‎ <jakobi@webrtc.org>
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#39213}
diff --git a/audio/channel_send.cc b/audio/channel_send.cc
index 5b0858a..26b5bf4 100644
--- a/audio/channel_send.cc
+++ b/audio/channel_send.cc
@@ -200,6 +200,8 @@
 
   // This is just an offset, RTP module will add its own random offset.
   uint32_t timestamp_ RTC_GUARDED_BY(audio_thread_race_checker_) = 0;
+  absl::optional<int64_t> last_capture_timestamp_ms_
+      RTC_GUARDED_BY(audio_thread_race_checker_);
 
   RmsLevel rms_level_ RTC_GUARDED_BY(encoder_queue_);
   bool input_mute_ RTC_GUARDED_BY(volume_settings_mutex_) = false;
@@ -218,6 +220,7 @@
 
   std::atomic<bool> include_audio_level_indication_ = false;
   std::atomic<bool> encoder_queue_is_active_ = false;
+  std::atomic<bool> first_frame_ = true;
 
   // E2EE Audio Frame Encryption
   rtc::scoped_refptr<FrameEncryptorInterface> frame_encryptor_
@@ -529,6 +532,7 @@
   RTC_DCHECK_EQ(0, ret);
 
   // It is now OK to start processing on the encoder task queue.
+  first_frame_.store(true);
   encoder_queue_is_active_.store(true);
 }
 
@@ -804,12 +808,32 @@
   RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
   RTC_DCHECK_LE(audio_frame->num_channels_, 8);
 
-  audio_frame->timestamp_ = timestamp_;
-  timestamp_ += audio_frame->samples_per_channel_;
   if (!encoder_queue_is_active_.load()) {
     return;
   }
 
+  // Update `timestamp_` based on the capture timestamp for the first frame
+  // after sending is resumed.
+  if (first_frame_.load()) {
+    first_frame_.store(false);
+    if (last_capture_timestamp_ms_ &&
+        audio_frame->absolute_capture_timestamp_ms()) {
+      int64_t diff_ms = *audio_frame->absolute_capture_timestamp_ms() -
+                        *last_capture_timestamp_ms_;
+      // Truncate to whole frames and subtract one since `timestamp_` was
+      // incremented after the last frame.
+      int64_t diff_frames = diff_ms * audio_frame->sample_rate_hz() / 1000 /
+                                audio_frame->samples_per_channel() -
+                            1;
+      timestamp_ += std::max<int64_t>(
+          diff_frames * audio_frame->samples_per_channel(), 0);
+    }
+  }
+
+  audio_frame->timestamp_ = timestamp_;
+  timestamp_ += audio_frame->samples_per_channel_;
+  last_capture_timestamp_ms_ = audio_frame->absolute_capture_timestamp_ms();
+
   // Profile time between when the audio frame is added to the task queue and
   // when the task is actually executed.
   audio_frame->UpdateProfileTimeStamp();
diff --git a/audio/channel_send_unittest.cc b/audio/channel_send_unittest.cc
index 50d8368..97882b9 100644
--- a/audio/channel_send_unittest.cc
+++ b/audio/channel_send_unittest.cc
@@ -28,9 +28,15 @@
 namespace voe {
 namespace {
 
+using ::testing::Invoke;
+using ::testing::NiceMock;
+using ::testing::Return;
+
 constexpr int kRtcpIntervalMs = 1000;
 constexpr int kSsrc = 333;
 constexpr int kPayloadType = 1;
+constexpr int kSampleRateHz = 48000;
+constexpr int kRtpRateHz = 48000;
 
 BitrateConstraints GetBitrateConfig() {
   BitrateConstraints bitrate_config;
@@ -40,14 +46,6 @@
   return bitrate_config;
 }
 
-std::unique_ptr<AudioFrame> CreateAudioFrame() {
-  auto frame = std::make_unique<AudioFrame>();
-  frame->samples_per_channel_ = 480;
-  frame->sample_rate_hz_ = 48000;
-  frame->num_channels_ = 1;
-  return frame;
-}
-
 class ChannelSendTest : public ::testing::Test {
  protected:
   ChannelSendTest()
@@ -60,52 +58,92 @@
                 .task_queue_factory = time_controller_.GetTaskQueueFactory(),
                 .trials = &field_trials_,
             }) {
-    transport_controller_.EnsureStarted();
-  }
-
-  std::unique_ptr<ChannelSendInterface> CreateChannelSend() {
-    return voe::CreateChannelSend(
+    channel_ = voe::CreateChannelSend(
         time_controller_.GetClock(), time_controller_.GetTaskQueueFactory(),
         &transport_, nullptr, &event_log_, nullptr, crypto_options_, false,
         kRtcpIntervalMs, kSsrc, nullptr, nullptr, field_trials_);
+    encoder_factory_ = CreateBuiltinAudioEncoderFactory();
+    std::unique_ptr<AudioEncoder> encoder = encoder_factory_->MakeAudioEncoder(
+        kPayloadType, SdpAudioFormat("opus", kRtpRateHz, 2), {});
+    channel_->SetEncoder(kPayloadType, std::move(encoder));
+    transport_controller_.EnsureStarted();
+    channel_->RegisterSenderCongestionControlObjects(&transport_controller_,
+                                                     nullptr);
+    ON_CALL(transport_, SendRtcp).WillByDefault(Return(true));
+    ON_CALL(transport_, SendRtp).WillByDefault(Return(true));
+  }
+
+  std::unique_ptr<AudioFrame> CreateAudioFrame() {
+    auto frame = std::make_unique<AudioFrame>();
+    frame->sample_rate_hz_ = kSampleRateHz;
+    frame->samples_per_channel_ = kSampleRateHz / 100;
+    frame->num_channels_ = 1;
+    frame->set_absolute_capture_timestamp_ms(
+        time_controller_.GetClock()->TimeInMilliseconds());
+    return frame;
+  }
+
+  void ProcessNextFrame() {
+    channel_->ProcessAndEncodeAudio(CreateAudioFrame());
+    // Advance time to process the task queue.
+    time_controller_.AdvanceTime(TimeDelta::Millis(10));
   }
 
   GlobalSimulatedTimeController time_controller_;
   webrtc::test::ScopedKeyValueConfig field_trials_;
   RtcEventLogNull event_log_;
-  MockTransport transport_;
-  RtpTransportControllerSend transport_controller_;
+  NiceMock<MockTransport> transport_;
   CryptoOptions crypto_options_;
+  RtpTransportControllerSend transport_controller_;
+  std::unique_ptr<ChannelSendInterface> channel_;
+  rtc::scoped_refptr<AudioEncoderFactory> encoder_factory_;
 };
 
 TEST_F(ChannelSendTest, StopSendShouldResetEncoder) {
-  std::unique_ptr<ChannelSendInterface> channel = CreateChannelSend();
-  rtc::scoped_refptr<AudioEncoderFactory> encoder_factory =
-      CreateBuiltinAudioEncoderFactory();
-  std::unique_ptr<AudioEncoder> encoder = encoder_factory->MakeAudioEncoder(
-      kPayloadType, SdpAudioFormat("opus", 48000, 2), {});
-  channel->SetEncoder(kPayloadType, std::move(encoder));
-  channel->RegisterSenderCongestionControlObjects(&transport_controller_,
-                                                  nullptr);
-  channel->StartSend();
-
+  channel_->StartSend();
   // Insert two frames which should trigger a new packet.
   EXPECT_CALL(transport_, SendRtp).Times(1);
-  channel->ProcessAndEncodeAudio(CreateAudioFrame());
-  time_controller_.AdvanceTime(webrtc::TimeDelta::Zero());
-  channel->ProcessAndEncodeAudio(CreateAudioFrame());
-  time_controller_.AdvanceTime(webrtc::TimeDelta::Zero());
+  ProcessNextFrame();
+  ProcessNextFrame();
 
   EXPECT_CALL(transport_, SendRtp).Times(0);
-  channel->ProcessAndEncodeAudio(CreateAudioFrame());
-  time_controller_.AdvanceTime(webrtc::TimeDelta::Zero());
+  ProcessNextFrame();
   // StopSend should clear the previous audio frame stored in the encoder.
-  channel->StopSend();
-  channel->StartSend();
+  channel_->StopSend();
+  channel_->StartSend();
   // The following frame should not trigger a new packet since the encoder
   // needs 20 ms audio.
-  channel->ProcessAndEncodeAudio(CreateAudioFrame());
-  time_controller_.AdvanceTime(webrtc::TimeDelta::Zero());
+  EXPECT_CALL(transport_, SendRtp).Times(0);
+  ProcessNextFrame();
+}
+
+TEST_F(ChannelSendTest, IncreaseRtpTimestampByPauseDuration) {
+  channel_->StartSend();
+  uint32_t timestamp;
+  int sent_packets = 0;
+  auto send_rtp = [&](const uint8_t* data, size_t length,
+                      const PacketOptions& options) {
+    ++sent_packets;
+    RtpPacketReceived packet;
+    packet.Parse(data, length);
+    timestamp = packet.Timestamp();
+    return true;
+  };
+  EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(send_rtp));
+  ProcessNextFrame();
+  ProcessNextFrame();
+  EXPECT_EQ(sent_packets, 1);
+  uint32_t first_timestamp = timestamp;
+  channel_->StopSend();
+  time_controller_.AdvanceTime(TimeDelta::Seconds(10));
+  channel_->StartSend();
+
+  ProcessNextFrame();
+  ProcessNextFrame();
+  EXPECT_EQ(sent_packets, 2);
+  int64_t timestamp_gap_ms =
+      static_cast<int64_t>(timestamp - first_timestamp) * 1000 / kRtpRateHz;
+  EXPECT_EQ(timestamp_gap_ms, 10020);
 }
 
 }  // namespace