Add absolute capture time to audio sender path.

WebRTC prototype:
https://webrtc-review.googlesource.com/c/src/+/158520

Bug: webrtc:10739
Change-Id: I07b7a60602b41dc04292a91923e878a8d753486f
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/161732
Reviewed-by: Minyue Li <minyue@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Commit-Queue: Ruslan Burakov <kuddai@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30335}
diff --git a/modules/rtp_rtcp/source/rtp_sender_audio.cc b/modules/rtp_rtcp/source/rtp_sender_audio.cc
index 67e98f6..4a47d33 100644
--- a/modules/rtp_rtcp/source/rtp_sender_audio.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_audio.cc
@@ -16,15 +16,21 @@
 #include <utility>
 
 #include "absl/strings/match.h"
+#include "absl/types/optional.h"
 #include "api/audio_codecs/audio_format.h"
+#include "api/rtp_headers.h"
+#include "modules/audio_coding/include/audio_coding_module_typedefs.h"
 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
+#include "modules/rtp_rtcp/source/absolute_capture_time_sender.h"
 #include "modules/rtp_rtcp/source/byte_io.h"
 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
 #include "modules/rtp_rtcp/source/rtp_packet.h"
 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
+#include "modules/rtp_rtcp/source/time_util.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
 #include "rtc_base/trace_event.h"
+#include "system_wrappers/include/ntp_time.h"
 
 namespace webrtc {
 
@@ -46,7 +52,9 @@
 }  // namespace
 
 RTPSenderAudio::RTPSenderAudio(Clock* clock, RTPSender* rtp_sender)
-    : clock_(clock), rtp_sender_(rtp_sender) {
+    : clock_(clock),
+      rtp_sender_(rtp_sender),
+      absolute_capture_time_sender_(clock) {
   RTC_DCHECK(clock_);
 }
 
@@ -83,6 +91,10 @@
     dtmf_payload_type_ = payload_type;
     dtmf_payload_freq_ = frequency;
     return 0;
+  } else if (payload_name == "audio") {
+    rtc::CritScope cs(&send_audio_critsect_);
+    encoder_rtp_timestamp_frequency_ = frequency;
+    return 0;
   }
   return 0;
 }
@@ -135,7 +147,19 @@
                                uint32_t rtp_timestamp,
                                const uint8_t* payload_data,
                                size_t payload_size) {
-  #if RTC_TRACE_EVENTS_ENABLED
+  return SendAudio(frame_type, payload_type, rtp_timestamp, payload_data,
+                   payload_size,
+                   // TODO(bugs.webrtc.org/10739) replace once plumbed.
+                   /*absolute_capture_timestamp_ms=*/0);
+}
+
+bool RTPSenderAudio::SendAudio(AudioFrameType frame_type,
+                               int8_t payload_type,
+                               uint32_t rtp_timestamp,
+                               const uint8_t* payload_data,
+                               size_t payload_size,
+                               int64_t absolute_capture_timestamp_ms) {
+#if RTC_TRACE_EVENTS_ENABLED
   TRACE_EVENT_ASYNC_STEP1("webrtc", "Audio", rtp_timestamp, "Send", "type",
                           FrameTypeToString(frame_type));
   #endif
@@ -148,10 +172,12 @@
   constexpr int kDtmfIntervalTimeMs = 50;
   uint8_t audio_level_dbov = 0;
   uint32_t dtmf_payload_freq = 0;
+  absl::optional<uint32_t> encoder_rtp_timestamp_frequency;
   {
     rtc::CritScope cs(&send_audio_critsect_);
     audio_level_dbov = audio_level_dbov_;
     dtmf_payload_freq = dtmf_payload_freq_;
+    encoder_rtp_timestamp_frequency = encoder_rtp_timestamp_frequency_;
   }
 
   // Check if we have pending DTMFs to send
@@ -244,6 +270,23 @@
   packet->SetExtension<AudioLevel>(
       frame_type == AudioFrameType::kAudioFrameSpeech, audio_level_dbov);
 
+  // Send absolute capture time periodically in order to optimize and save
+  // network traffic. Missing absolute capture times can be interpolated on the
+  // receiving end if sending intervals are small enough.
+  auto absolute_capture_time = absolute_capture_time_sender_.OnSendPacket(
+      AbsoluteCaptureTimeSender::GetSource(packet->Ssrc(), packet->Csrcs()),
+      packet->Timestamp(),
+      // Replace missing value with 0 (invalid frequency), this will trigger
+      // absolute capture time sending.
+      encoder_rtp_timestamp_frequency.value_or(0),
+      Int64MsToUQ32x32(absolute_capture_timestamp_ms + NtpOffsetMs()),
+      /*estimated_capture_clock_offset=*/absl::nullopt);
+  if (absolute_capture_time) {
+    // It also checks that extension was registered during SDP negotiation. If
+    // not then setter won't do anything.
+    packet->SetExtension<AbsoluteCaptureTimeExtension>(*absolute_capture_time);
+  }
+
   uint8_t* payload = packet->AllocatePayload(payload_size);
   if (!payload)  // Too large payload buffer.
     return false;
diff --git a/modules/rtp_rtcp/source/rtp_sender_audio.h b/modules/rtp_rtcp/source/rtp_sender_audio.h
index ccc2637..c2d8074 100644
--- a/modules/rtp_rtcp/source/rtp_sender_audio.h
+++ b/modules/rtp_rtcp/source/rtp_sender_audio.h
@@ -18,6 +18,7 @@
 
 #include "absl/strings/string_view.h"
 #include "modules/audio_coding/include/audio_coding_module_typedefs.h"
+#include "modules/rtp_rtcp/source/absolute_capture_time_sender.h"
 #include "modules/rtp_rtcp/source/dtmf_queue.h"
 #include "modules/rtp_rtcp/source/rtp_sender.h"
 #include "rtc_base/constructor_magic.h"
@@ -41,10 +42,17 @@
 
   bool SendAudio(AudioFrameType frame_type,
                  int8_t payload_type,
-                 uint32_t capture_timestamp,
+                 uint32_t rtp_timestamp,
                  const uint8_t* payload_data,
                  size_t payload_size);
 
+  bool SendAudio(AudioFrameType frame_type,
+                 int8_t payload_type,
+                 uint32_t rtp_timestamp,
+                 const uint8_t* payload_data,
+                 size_t payload_size,
+                 int64_t absolute_capture_timestamp_ms);
+
   // Store the audio level in dBov for
   // header-extension-for-audio-level-indication.
   // Valid range is [0,100]. Actual value is negative.
@@ -93,6 +101,11 @@
   uint8_t audio_level_dbov_ RTC_GUARDED_BY(send_audio_critsect_) = 0;
   OneTimeEvent first_packet_sent_;
 
+  absl::optional<uint32_t> encoder_rtp_timestamp_frequency_
+      RTC_GUARDED_BY(send_audio_critsect_);
+
+  AbsoluteCaptureTimeSender absolute_capture_time_sender_;
+
   RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RTPSenderAudio);
 };
 
diff --git a/modules/rtp_rtcp/source/rtp_sender_audio_unittest.cc b/modules/rtp_rtcp/source/rtp_sender_audio_unittest.cc
index f4840d1..3e35f42 100644
--- a/modules/rtp_rtcp/source/rtp_sender_audio_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_audio_unittest.cc
@@ -18,6 +18,7 @@
 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
 #include "modules/rtp_rtcp/source/rtp_packet_received.h"
+#include "modules/rtp_rtcp/source/time_util.h"
 #include "test/gmock.h"
 #include "test/gtest.h"
 
@@ -26,6 +27,7 @@
 namespace {
 enum : int {  // The first valid value is 1.
   kAudioLevelExtensionId = 1,
+  kAbsoluteCaptureTimeExtensionId = 2,
 };
 
 const uint16_t kSeqNum = 33;
@@ -39,6 +41,8 @@
  public:
   LoopbackTransportTest() {
     receivers_extensions_.Register<AudioLevel>(kAudioLevelExtensionId);
+    receivers_extensions_.Register<AbsoluteCaptureTimeExtension>(
+        kAbsoluteCaptureTimeExtensionId);
   }
 
   bool SendRtp(const uint8_t* data,
@@ -90,7 +94,8 @@
 
   ASSERT_TRUE(rtp_sender_audio_.SendAudio(AudioFrameType::kAudioFrameCN,
                                           payload_type, 4321, payload,
-                                          sizeof(payload)));
+                                          sizeof(payload),
+                                          /*absolute_capture_timestamp_ms=*/0));
 
   auto sent_payload = transport_.last_sent_packet().payload();
   EXPECT_THAT(sent_payload, ElementsAreArray(payload));
@@ -110,7 +115,8 @@
 
   ASSERT_TRUE(rtp_sender_audio_.SendAudio(AudioFrameType::kAudioFrameCN,
                                           payload_type, 4321, payload,
-                                          sizeof(payload)));
+                                          sizeof(payload),
+                                          /*absolute_capture_timestamp_ms=*/0));
 
   auto sent_payload = transport_.last_sent_packet().payload();
   EXPECT_THAT(sent_payload, ElementsAreArray(payload));
@@ -123,6 +129,44 @@
   EXPECT_FALSE(voice_activity);
 }
 
+TEST_F(RtpSenderAudioTest, SendAudioWithoutAbsoluteCaptureTime) {
+  constexpr uint32_t kAbsoluteCaptureTimestampMs = 521;
+  const char payload_name[] = "audio";
+  const uint8_t payload_type = 127;
+  ASSERT_EQ(0, rtp_sender_audio_.RegisterAudioPayload(
+                   payload_name, payload_type, 48000, 0, 1500));
+  uint8_t payload[] = {47, 11, 32, 93, 89};
+
+  ASSERT_TRUE(rtp_sender_audio_.SendAudio(
+      AudioFrameType::kAudioFrameCN, payload_type, 4321, payload,
+      sizeof(payload), kAbsoluteCaptureTimestampMs));
+
+  EXPECT_FALSE(transport_.last_sent_packet()
+                   .HasExtension<AbsoluteCaptureTimeExtension>());
+}
+
+TEST_F(RtpSenderAudioTest, SendAudioWithAbsoluteCaptureTime) {
+  rtp_module_->RegisterRtpHeaderExtension(AbsoluteCaptureTimeExtension::kUri,
+                                          kAbsoluteCaptureTimeExtensionId);
+  constexpr uint32_t kAbsoluteCaptureTimestampMs = 521;
+  const char payload_name[] = "audio";
+  const uint8_t payload_type = 127;
+  ASSERT_EQ(0, rtp_sender_audio_.RegisterAudioPayload(
+                   payload_name, payload_type, 48000, 0, 1500));
+  uint8_t payload[] = {47, 11, 32, 93, 89};
+
+  ASSERT_TRUE(rtp_sender_audio_.SendAudio(
+      AudioFrameType::kAudioFrameCN, payload_type, 4321, payload,
+      sizeof(payload), kAbsoluteCaptureTimestampMs));
+
+  auto absolute_capture_time =
+      transport_.last_sent_packet()
+          .GetExtension<AbsoluteCaptureTimeExtension>();
+  EXPECT_TRUE(absolute_capture_time);
+  EXPECT_EQ(absolute_capture_time->absolute_capture_timestamp,
+            Int64MsToUQ32x32(kAbsoluteCaptureTimestampMs + NtpOffsetMs()));
+}
+
 // As RFC4733, named telephone events are carried as part of the audio stream
 // and must use the same sequence number and timestamp base as the regular
 // audio channel.
@@ -148,22 +192,25 @@
   // During start, it takes the starting timestamp as last sent timestamp.
   // The duration is calculated as the difference of current and last sent
   // timestamp. So for first call it will skip since the duration is zero.
-  ASSERT_TRUE(rtp_sender_audio_.SendAudio(AudioFrameType::kEmptyFrame,
-                                          kPayloadType, capture_timestamp,
-                                          nullptr, 0));
+  ASSERT_TRUE(rtp_sender_audio_.SendAudio(
+      AudioFrameType::kEmptyFrame, kPayloadType, capture_timestamp, nullptr, 0,
+      /*absolute_capture_time_ms=0*/ 0));
+
   // DTMF Sample Length is (Frequency/1000) * Duration.
   // So in this case, it is (8000/1000) * 500 = 4000.
   // Sending it as two packets.
-  ASSERT_TRUE(
-      rtp_sender_audio_.SendAudio(AudioFrameType::kEmptyFrame, kPayloadType,
-                                  capture_timestamp + 2000, nullptr, 0));
+  ASSERT_TRUE(rtp_sender_audio_.SendAudio(AudioFrameType::kEmptyFrame,
+                                          kPayloadType,
+                                          capture_timestamp + 2000, nullptr, 0,
+                                          /*absolute_capture_time_ms=0*/ 0));
 
   // Marker Bit should be set to 1 for first packet.
   EXPECT_TRUE(transport_.last_sent_packet().Marker());
 
-  ASSERT_TRUE(
-      rtp_sender_audio_.SendAudio(AudioFrameType::kEmptyFrame, kPayloadType,
-                                  capture_timestamp + 4000, nullptr, 0));
+  ASSERT_TRUE(rtp_sender_audio_.SendAudio(AudioFrameType::kEmptyFrame,
+                                          kPayloadType,
+                                          capture_timestamp + 4000, nullptr, 0,
+                                          /*absolute_capture_time_ms=0*/ 0));
   // Marker Bit should be set to 0 for rest of the packets.
   EXPECT_FALSE(transport_.last_sent_packet().Marker());
 }