Add latency to remote source api.

Latency corresponds to base minimum delay on NetEq.

Bug: webrtc:10287
Change-Id: I538d202e3e4fe07b779c46bf560e2fde38e0468e
Reviewed-on: https://webrtc-review.googlesource.com/c/121704
Commit-Queue: Ruslan Burakov <kuddai@google.com>
Reviewed-by: Steve Anton <steveanton@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#26724}
diff --git a/api/media_stream_interface.cc b/api/media_stream_interface.cc
index 73566c4..b55a840 100644
--- a/api/media_stream_interface.cc
+++ b/api/media_stream_interface.cc
@@ -32,4 +32,8 @@
   return {};
 }
 
+double AudioSourceInterface::GetLatency() const {
+  return 0.0;
+}
+
 }  // namespace webrtc
diff --git a/api/media_stream_interface.h b/api/media_stream_interface.h
index b077480..e520361 100644
--- a/api/media_stream_interface.h
+++ b/api/media_stream_interface.h
@@ -201,6 +201,12 @@
   // be applied in the track in a way that does not affect clones of the track.
   virtual void SetVolume(double volume) {}
 
+  // Sets the minimum latency of the remote source until audio playout. Actual
+  // observered latency may differ depending on the source. |latency| is in the
+  // range of [0.0, 10.0] seconds.
+  virtual void SetLatency(double latency) {}
+  virtual double GetLatency() const;
+
   // Registers/unregisters observers to the audio source.
   virtual void RegisterAudioObserver(AudioObserver* observer) {}
   virtual void UnregisterAudioObserver(AudioObserver* observer) {}
diff --git a/media/base/fake_media_engine.cc b/media/base/fake_media_engine.cc
index 70fabc5..8297902 100644
--- a/media/base/fake_media_engine.cc
+++ b/media/base/fake_media_engine.cc
@@ -120,12 +120,14 @@
   if (!RtpHelper<VoiceMediaChannel>::AddRecvStream(sp))
     return false;
   output_scalings_[sp.first_ssrc()] = 1.0;
+  output_delays_[sp.first_ssrc()] = 0;
   return true;
 }
 bool FakeVoiceMediaChannel::RemoveRecvStream(uint32_t ssrc) {
   if (!RtpHelper<VoiceMediaChannel>::RemoveRecvStream(ssrc))
     return false;
   output_scalings_.erase(ssrc);
+  output_delays_.erase(ssrc);
   return true;
 }
 bool FakeVoiceMediaChannel::CanInsertDtmf() {
@@ -163,6 +165,23 @@
   *volume = output_scalings_[ssrc];
   return true;
 }
+bool FakeVoiceMediaChannel::SetBaseMinimumPlayoutDelayMs(uint32_t ssrc,
+                                                         int delay_ms) {
+  if (output_delays_.find(ssrc) == output_delays_.end()) {
+    return false;
+  } else {
+    output_delays_[ssrc] = delay_ms;
+    return true;
+  }
+}
+absl::optional<int> FakeVoiceMediaChannel::GetBaseMinimumPlayoutDelayMs(
+    uint32_t ssrc) const {
+  const auto it = output_delays_.find(ssrc);
+  if (it != output_delays_.end()) {
+    return it->second;
+  }
+  return absl::nullopt;
+}
 bool FakeVoiceMediaChannel::GetStats(VoiceMediaInfo* info) {
   return false;
 }
diff --git a/media/base/fake_media_engine.h b/media/base/fake_media_engine.h
index a41d4e4..f586580 100644
--- a/media/base/fake_media_engine.h
+++ b/media/base/fake_media_engine.h
@@ -349,6 +349,10 @@
   bool SetOutputVolume(uint32_t ssrc, double volume) override;
   bool GetOutputVolume(uint32_t ssrc, double* volume);
 
+  bool SetBaseMinimumPlayoutDelayMs(uint32_t ssrc, int delay_ms) override;
+  absl::optional<int> GetBaseMinimumPlayoutDelayMs(
+      uint32_t ssrc) const override;
+
   bool GetStats(VoiceMediaInfo* info) override;
 
   void SetRawAudioSink(
@@ -384,6 +388,7 @@
   std::vector<AudioCodec> recv_codecs_;
   std::vector<AudioCodec> send_codecs_;
   std::map<uint32_t, double> output_scalings_;
+  std::map<uint32_t, int> output_delays_;
   std::vector<DtmfInfo> dtmf_info_queue_;
   AudioOptions options_;
   std::map<uint32_t, std::unique_ptr<VoiceChannelAudioSink>> local_sinks_;
diff --git a/media/base/media_channel.h b/media/base/media_channel.h
index d20de75..5f5f32d 100644
--- a/media/base/media_channel.h
+++ b/media/base/media_channel.h
@@ -737,6 +737,13 @@
                             AudioSource* source) = 0;
   // Set speaker output volume of the specified ssrc.
   virtual bool SetOutputVolume(uint32_t ssrc, double volume) = 0;
+  // Set base minimum delay of the receive stream with specified ssrc.
+  // Base minimum delay sets lower bound on minimum delay value which
+  // determines minimum delay until audio playout.
+  // Returns false if there is no stream with given ssrc.
+  virtual bool SetBaseMinimumPlayoutDelayMs(uint32_t ssrc, int delay_ms) = 0;
+  virtual absl::optional<int> GetBaseMinimumPlayoutDelayMs(
+      uint32_t ssrc) const = 0;
   // Returns if the telephone-event has been negotiated.
   virtual bool CanInsertDtmf() = 0;
   // Send a DTMF |event|. The DTMF out-of-band signal will be used.
diff --git a/media/engine/fake_webrtc_call.h b/media/engine/fake_webrtc_call.h
index a0147e9..7df6b52 100644
--- a/media/engine/fake_webrtc_call.h
+++ b/media/engine/fake_webrtc_call.h
@@ -94,6 +94,9 @@
   float gain() const { return gain_; }
   bool DeliverRtp(const uint8_t* packet, size_t length, int64_t packet_time_us);
   bool started() const { return started_; }
+  int base_mininum_playout_delay_ms() const {
+    return base_mininum_playout_delay_ms_;
+  }
 
  private:
   // webrtc::AudioReceiveStream implementation.
@@ -105,11 +108,11 @@
   void SetSink(webrtc::AudioSinkInterface* sink) override;
   void SetGain(float gain) override;
   bool SetBaseMinimumPlayoutDelayMs(int delay_ms) override {
-    base_minimum_playout_delay_ms_ = delay_ms;
+    base_mininum_playout_delay_ms_ = delay_ms;
     return true;
   }
   int GetBaseMinimumPlayoutDelayMs() const override {
-    return base_minimum_playout_delay_ms_;
+    return base_mininum_playout_delay_ms_;
   }
   std::vector<webrtc::RtpSource> GetSources() const override {
     return std::vector<webrtc::RtpSource>();
@@ -123,7 +126,7 @@
   float gain_ = 1.0f;
   rtc::Buffer last_packet_;
   bool started_ = false;
-  int base_minimum_playout_delay_ms_ = 0;
+  int base_mininum_playout_delay_ms_ = 0;
 };
 
 class FakeVideoSendStream final
diff --git a/media/engine/webrtc_voice_engine.cc b/media/engine/webrtc_voice_engine.cc
index a055ae9..1f1b585 100644
--- a/media/engine/webrtc_voice_engine.cc
+++ b/media/engine/webrtc_voice_engine.cc
@@ -1173,6 +1173,29 @@
     playout_ = playout;
   }
 
+  bool SetBaseMinimumPlayoutDelayMs(int delay_ms) {
+    RTC_DCHECK(worker_thread_checker_.CalledOnValidThread());
+    RTC_DCHECK(stream_);
+    if (stream_->SetBaseMinimumPlayoutDelayMs(delay_ms)) {
+      // Memorize only valid delay because during stream recreation it will be
+      // passed to the constructor and it must be valid value.
+      config_.jitter_buffer_min_delay_ms = delay_ms;
+      return true;
+    } else {
+      RTC_LOG(LS_ERROR) << "Failed to SetBaseMinimumPlayoutDelayMs"
+                        << " on AudioReceiveStream on SSRC="
+                        << config_.rtp.remote_ssrc
+                        << " with delay_ms=" << delay_ms;
+      return false;
+    }
+  }
+
+  int GetBaseMinimumPlayoutDelayMs() const {
+    RTC_DCHECK(worker_thread_checker_.CalledOnValidThread());
+    RTC_DCHECK(stream_);
+    return stream_->GetBaseMinimumPlayoutDelayMs();
+  }
+
   std::vector<webrtc::RtpSource> GetSources() {
     RTC_DCHECK(worker_thread_checker_.CalledOnValidThread());
     RTC_DCHECK(stream_);
@@ -1952,6 +1975,44 @@
   return true;
 }
 
+bool WebRtcVoiceMediaChannel::SetBaseMinimumPlayoutDelayMs(uint32_t ssrc,
+                                                           int delay_ms) {
+  RTC_DCHECK(worker_thread_checker_.CalledOnValidThread());
+  std::vector<uint32_t> ssrcs(1, ssrc);
+  // SSRC of 0 represents the default receive stream.
+  if (ssrc == 0) {
+    default_recv_base_minimum_delay_ms_ = delay_ms;
+    ssrcs = unsignaled_recv_ssrcs_;
+  }
+  for (uint32_t ssrc : ssrcs) {
+    const auto it = recv_streams_.find(ssrc);
+    if (it == recv_streams_.end()) {
+      RTC_LOG(LS_WARNING) << "SetBaseMinimumPlayoutDelayMs: no recv stream "
+                          << ssrc;
+      return false;
+    }
+    it->second->SetBaseMinimumPlayoutDelayMs(delay_ms);
+    RTC_LOG(LS_INFO) << "SetBaseMinimumPlayoutDelayMs() to " << delay_ms
+                     << " for recv stream with ssrc " << ssrc;
+  }
+  return true;
+}
+
+absl::optional<int> WebRtcVoiceMediaChannel::GetBaseMinimumPlayoutDelayMs(
+    uint32_t ssrc) const {
+  // SSRC of 0 represents the default receive stream.
+  if (ssrc == 0) {
+    return default_recv_base_minimum_delay_ms_;
+  }
+
+  const auto it = recv_streams_.find(ssrc);
+
+  if (it != recv_streams_.end()) {
+    return it->second->GetBaseMinimumPlayoutDelayMs();
+  }
+  return absl::nullopt;
+}
+
 bool WebRtcVoiceMediaChannel::CanInsertDtmf() {
   return dtmf_payload_type_.has_value() && send_;
 }
@@ -2047,6 +2108,7 @@
   RTC_DCHECK_GE(kMaxUnsignaledRecvStreams, unsignaled_recv_ssrcs_.size());
 
   SetOutputVolume(ssrc, default_recv_volume_);
+  SetBaseMinimumPlayoutDelayMs(ssrc, default_recv_base_minimum_delay_ms_);
 
   // The default sink can only be attached to one stream at a time, so we hook
   // it up to the *latest* unsignaled stream we've seen, in order to support the
diff --git a/media/engine/webrtc_voice_engine.h b/media/engine/webrtc_voice_engine.h
index 4ee61a1..830887d 100644
--- a/media/engine/webrtc_voice_engine.h
+++ b/media/engine/webrtc_voice_engine.h
@@ -196,6 +196,10 @@
   // SSRC=0 will apply the new volume to current and future unsignaled streams.
   bool SetOutputVolume(uint32_t ssrc, double volume) override;
 
+  bool SetBaseMinimumPlayoutDelayMs(uint32_t ssrc, int delay_ms) override;
+  absl::optional<int> GetBaseMinimumPlayoutDelayMs(
+      uint32_t ssrc) const override;
+
   bool CanInsertDtmf() override;
   bool InsertDtmf(uint32_t ssrc, int event, int duration) override;
 
@@ -295,6 +299,10 @@
 
   // Volume for unsignaled streams, which may be set before the stream exists.
   double default_recv_volume_ = 1.0;
+
+  // Delay for unsignaled streams, which may be set before the stream exists.
+  int default_recv_base_minimum_delay_ms_ = 0;
+
   // Sink for latest unsignaled stream - may be set before the stream exists.
   std::unique_ptr<webrtc::AudioSinkInterface> default_sink_;
   // Default SSRC to use for RTCP receiver reports in case of no signaled
diff --git a/media/engine/webrtc_voice_engine_unittest.cc b/media/engine/webrtc_voice_engine_unittest.cc
index a3ccee94..4b1a528 100644
--- a/media/engine/webrtc_voice_engine_unittest.cc
+++ b/media/engine/webrtc_voice_engine_unittest.cc
@@ -3168,6 +3168,65 @@
   EXPECT_DOUBLE_EQ(4, GetRecvStream(kSsrcX).gain());
 }
 
+TEST_F(WebRtcVoiceEngineTestFake, BaseMinimumPlayoutDelayMs) {
+  EXPECT_TRUE(SetupChannel());
+  EXPECT_FALSE(channel_->SetBaseMinimumPlayoutDelayMs(kSsrcY, 200));
+  EXPECT_FALSE(channel_->GetBaseMinimumPlayoutDelayMs(kSsrcY).has_value());
+
+  cricket::StreamParams stream;
+  stream.ssrcs.push_back(kSsrcY);
+  EXPECT_TRUE(channel_->AddRecvStream(stream));
+  EXPECT_EQ(0, GetRecvStream(kSsrcY).base_mininum_playout_delay_ms());
+  EXPECT_TRUE(channel_->SetBaseMinimumPlayoutDelayMs(kSsrcY, 300));
+  EXPECT_EQ(300, GetRecvStream(kSsrcY).base_mininum_playout_delay_ms());
+}
+
+TEST_F(WebRtcVoiceEngineTestFake,
+       BaseMinimumPlayoutDelayMsUnsignaledRecvStream) {
+  // Here base minimum delay is abbreviated to delay in comments for shortness.
+  EXPECT_TRUE(SetupChannel());
+
+  // Spawn an unsignaled stream by sending a packet - delay should be 0.
+  DeliverPacket(kPcmuFrame, sizeof(kPcmuFrame));
+  EXPECT_EQ(0, channel_->GetBaseMinimumPlayoutDelayMs(kSsrc1).value_or(-1));
+  // Check that it doesn't provide default values for unknown ssrc.
+  EXPECT_FALSE(channel_->GetBaseMinimumPlayoutDelayMs(kSsrcY).has_value());
+
+  // Check that default value for unsignaled streams is 0.
+  EXPECT_EQ(0, channel_->GetBaseMinimumPlayoutDelayMs(kSsrc0).value_or(-1));
+
+  // Should remember the delay 100 which will be set on new unsignaled streams,
+  // and also set the delay to 100 on existing unsignaled streams.
+  EXPECT_TRUE(channel_->SetBaseMinimumPlayoutDelayMs(kSsrc0, 100));
+  EXPECT_EQ(100, channel_->GetBaseMinimumPlayoutDelayMs(kSsrc0).value_or(-1));
+  // Check that it doesn't provide default values for unknown ssrc.
+  EXPECT_FALSE(channel_->GetBaseMinimumPlayoutDelayMs(kSsrcY).has_value());
+
+  // Spawn an unsignaled stream by sending a packet - delay should be 100.
+  unsigned char pcmuFrame2[sizeof(kPcmuFrame)];
+  memcpy(pcmuFrame2, kPcmuFrame, sizeof(kPcmuFrame));
+  rtc::SetBE32(&pcmuFrame2[8], kSsrcX);
+  DeliverPacket(pcmuFrame2, sizeof(pcmuFrame2));
+  EXPECT_EQ(100, channel_->GetBaseMinimumPlayoutDelayMs(kSsrcX).value_or(-1));
+
+  // Setting delay with SSRC=0 should affect all unsignaled streams.
+  EXPECT_TRUE(channel_->SetBaseMinimumPlayoutDelayMs(kSsrc0, 300));
+  if (kMaxUnsignaledRecvStreams > 1) {
+    EXPECT_EQ(300, channel_->GetBaseMinimumPlayoutDelayMs(kSsrc1).value_or(-1));
+  }
+  EXPECT_EQ(300, channel_->GetBaseMinimumPlayoutDelayMs(kSsrcX).value_or(-1));
+
+  // Setting delay on an individual stream affects only that.
+  EXPECT_TRUE(channel_->SetBaseMinimumPlayoutDelayMs(kSsrcX, 400));
+  if (kMaxUnsignaledRecvStreams > 1) {
+    EXPECT_EQ(300, channel_->GetBaseMinimumPlayoutDelayMs(kSsrc1).value_or(-1));
+  }
+  EXPECT_EQ(400, channel_->GetBaseMinimumPlayoutDelayMs(kSsrcX).value_or(-1));
+  EXPECT_EQ(300, channel_->GetBaseMinimumPlayoutDelayMs(kSsrc0).value_or(-1));
+  // Check that it doesn't provide default values for unknown ssrc.
+  EXPECT_FALSE(channel_->GetBaseMinimumPlayoutDelayMs(kSsrcY).has_value());
+}
+
 TEST_F(WebRtcVoiceEngineTestFake, SetsSyncGroupFromStreamId) {
   const uint32_t kAudioSsrc = 123;
   const std::string kStreamId = "AvSyncLabel";
diff --git a/pc/peer_connection.cc b/pc/peer_connection.cc
index af2e554..385c3d0 100644
--- a/pc/peer_connection.cc
+++ b/pc/peer_connection.cc
@@ -4670,7 +4670,7 @@
         FindSenderInfo(*current_senders, kDefaultStreamId, default_sender_id);
     if (!default_sender_info) {
       current_senders->push_back(
-          RtpSenderInfo(kDefaultStreamId, default_sender_id, 0));
+          RtpSenderInfo(kDefaultStreamId, default_sender_id, /*ssrc=*/0));
       OnRemoteSenderAdded(current_senders->back(), media_type);
     }
   }
diff --git a/pc/remote_audio_source.cc b/pc/remote_audio_source.cc
index 0155ec6..63944c6 100644
--- a/pc/remote_audio_source.cc
+++ b/pc/remote_audio_source.cc
@@ -20,11 +20,17 @@
 #include "rtc_base/constructor_magic.h"
 #include "rtc_base/location.h"
 #include "rtc_base/logging.h"
+#include "rtc_base/numerics/safe_conversions.h"
 #include "rtc_base/thread.h"
 #include "rtc_base/thread_checker.h"
 
 namespace webrtc {
 
+namespace {
+constexpr int kDefaultLatency = 0;
+constexpr int kRoundToZeroThresholdMs = 10;
+}  // namespace
+
 // This proxy is passed to the underlying media engine to receive audio data as
 // they come in. The data will then be passed back up to the RemoteAudioSource
 // which will fan it out to all the sinks that have been added to it.
@@ -64,6 +70,13 @@
                               uint32_t ssrc) {
   RTC_DCHECK_RUN_ON(main_thread_);
   RTC_DCHECK(media_channel);
+  // Check that there are no consecutive start calls.
+  RTC_DCHECK(!media_channel_ && !ssrc_);
+
+  // Remember media channel ssrc pair for latency calls.
+  media_channel_ = media_channel;
+  ssrc_ = ssrc;
+
   // Register for callbacks immediately before AddSink so that we always get
   // notified when a channel goes out of scope (signaled when "AudioDataProxy"
   // is destroyed).
@@ -71,12 +84,22 @@
     media_channel->SetRawAudioSink(ssrc,
                                    absl::make_unique<AudioDataProxy>(this));
   });
+
+  // Trying to apply cached latency for the audio stream.
+  if (cached_latency_) {
+    SetLatency(cached_latency_.value());
+  }
 }
 
 void RemoteAudioSource::Stop(cricket::VoiceMediaChannel* media_channel,
                              uint32_t ssrc) {
   RTC_DCHECK_RUN_ON(main_thread_);
   RTC_DCHECK(media_channel);
+
+  // Assume that audio stream is no longer present for latency calls.
+  media_channel_ = nullptr;
+  ssrc_ = absl::nullopt;
+
   worker_thread_->Invoke<void>(
       RTC_FROM_HERE, [&] { media_channel->SetRawAudioSink(ssrc, nullptr); });
 }
@@ -99,6 +122,53 @@
   }
 }
 
+void RemoteAudioSource::SetLatency(double latency) {
+  RTC_DCHECK_GE(latency, 0);
+  RTC_DCHECK_LE(latency, 10);
+
+  int delay_ms = rtc::dchecked_cast<int>(latency * 1000);
+  // In NetEq 0 delay has special meaning of being unconstrained value that is
+  // why we round delay to 0 if it is small enough during conversion from
+  // latency.
+  if (delay_ms <= kRoundToZeroThresholdMs) {
+    delay_ms = 0;
+  }
+
+  cached_latency_ = latency;
+  SetDelayMs(delay_ms);
+}
+
+double RemoteAudioSource::GetLatency() const {
+  absl::optional<int> delay_ms = GetDelayMs();
+
+  if (delay_ms) {
+    return delay_ms.value() / 1000.0;
+  } else {
+    return cached_latency_.value_or(kDefaultLatency);
+  }
+}
+
+bool RemoteAudioSource::SetDelayMs(int delay_ms) {
+  if (!media_channel_ || !ssrc_) {
+    return false;
+  }
+
+  worker_thread_->Invoke<void>(RTC_FROM_HERE, [&] {
+    media_channel_->SetBaseMinimumPlayoutDelayMs(ssrc_.value(), delay_ms);
+  });
+  return true;
+}
+
+absl::optional<int> RemoteAudioSource::GetDelayMs() const {
+  if (!media_channel_ || !ssrc_) {
+    return absl::nullopt;
+  }
+
+  return worker_thread_->Invoke<absl::optional<int>>(RTC_FROM_HERE, [&] {
+    return media_channel_->GetBaseMinimumPlayoutDelayMs(ssrc_.value());
+  });
+}
+
 void RemoteAudioSource::RegisterAudioObserver(AudioObserver* observer) {
   RTC_DCHECK(observer != NULL);
   RTC_DCHECK(!absl::c_linear_search(audio_observers_, observer));
diff --git a/pc/remote_audio_source.h b/pc/remote_audio_source.h
index 399e7e3..0773c38 100644
--- a/pc/remote_audio_source.h
+++ b/pc/remote_audio_source.h
@@ -46,6 +46,8 @@
 
   // AudioSourceInterface implementation.
   void SetVolume(double volume) override;
+  void SetLatency(double latency) override;
+  double GetLatency() const override;
   void RegisterAudioObserver(AudioObserver* observer) override;
   void UnregisterAudioObserver(AudioObserver* observer) override;
 
@@ -63,12 +65,19 @@
 
   void OnMessage(rtc::Message* msg) override;
 
+  bool SetDelayMs(int delay_ms);
+  absl::optional<int> GetDelayMs() const;
+
   rtc::Thread* const main_thread_;
   rtc::Thread* const worker_thread_;
   std::list<AudioObserver*> audio_observers_;
   rtc::CriticalSection sink_lock_;
   std::list<AudioTrackSinkInterface*> sinks_;
   SourceState state_;
+  // Media channel and ssrc together uniqely identify audio stream.
+  cricket::VoiceMediaChannel* media_channel_ = nullptr;
+  absl::optional<uint32_t> ssrc_;
+  absl::optional<double> cached_latency_;
 };
 
 }  // namespace webrtc
diff --git a/pc/rtp_sender_receiver_unittest.cc b/pc/rtp_sender_receiver_unittest.cc
index b172460..1f015f6 100644
--- a/pc/rtp_sender_receiver_unittest.cc
+++ b/pc/rtp_sender_receiver_unittest.cc
@@ -48,6 +48,7 @@
 #include "pc/dtls_srtp_transport.h"
 #include "pc/local_audio_source.h"
 #include "pc/media_stream.h"
+#include "pc/remote_audio_source.h"
 #include "pc/rtp_receiver.h"
 #include "pc/rtp_sender.h"
 #include "pc/rtp_transport_internal.h"
@@ -522,6 +523,103 @@
   DestroyAudioRtpReceiver();
 }
 
+TEST_F(RtpSenderReceiverTest, RemoteAudioSourceLatencyCaching) {
+  absl::optional<int> delay_ms;  // In milliseconds.
+  double latency_s = 0.5;        // In seconds.
+  rtc::scoped_refptr<RemoteAudioSource> source =
+      new rtc::RefCountedObject<RemoteAudioSource>(rtc::Thread::Current());
+
+  // Check default value.
+  EXPECT_DOUBLE_EQ(source->GetLatency(), 0.0);
+
+  // Check caching behaviour.
+  source->SetLatency(latency_s);
+  EXPECT_DOUBLE_EQ(source->GetLatency(), latency_s);
+
+  // Check that cached value applied on the start.
+  source->Start(voice_media_channel_, kAudioSsrc);
+  delay_ms = voice_media_channel_->GetBaseMinimumPlayoutDelayMs(kAudioSsrc);
+  EXPECT_DOUBLE_EQ(latency_s, delay_ms.value_or(0) / 1000.0);
+
+  // Check that setting latency changes delay.
+  latency_s = 0.8;
+  source->SetLatency(latency_s);
+  delay_ms = voice_media_channel_->GetBaseMinimumPlayoutDelayMs(kAudioSsrc);
+  EXPECT_DOUBLE_EQ(latency_s, delay_ms.value_or(0) / 1000.0);
+  EXPECT_DOUBLE_EQ(latency_s, source->GetLatency());
+
+  // Check that if underlying delay is changed then remote source will reflect
+  // it.
+  delay_ms = 300;
+  voice_media_channel_->SetBaseMinimumPlayoutDelayMs(kAudioSsrc,
+                                                     delay_ms.value());
+  EXPECT_DOUBLE_EQ(source->GetLatency(), delay_ms.value() / 1000.0);
+
+  // Check that after stop we get last cached value.
+  source->Stop(voice_media_channel_, kAudioSsrc);
+  EXPECT_DOUBLE_EQ(latency_s, source->GetLatency());
+
+  // Check that if we start source again with new ssrc then cached value is
+  // applied.
+  source->Start(voice_media_channel_, kAudioSsrc2);
+  delay_ms = voice_media_channel_->GetBaseMinimumPlayoutDelayMs(kAudioSsrc2);
+  EXPECT_DOUBLE_EQ(latency_s, delay_ms.value_or(0) / 1000.0);
+
+  // Check rounding behavior.
+  source->SetLatency(2 / 1000.0);
+  delay_ms = voice_media_channel_->GetBaseMinimumPlayoutDelayMs(kAudioSsrc2);
+  EXPECT_EQ(0, delay_ms.value_or(-1));
+  EXPECT_DOUBLE_EQ(0, source->GetLatency());
+}
+
+TEST_F(RtpSenderReceiverTest, RemoteAudioSourceLatencyNoCaching) {
+  int delay_ms = 300;  // In milliseconds.
+  rtc::scoped_refptr<RemoteAudioSource> source =
+      new rtc::RefCountedObject<RemoteAudioSource>(rtc::Thread::Current());
+
+  // Set it to value different from default zero.
+  voice_media_channel_->SetBaseMinimumPlayoutDelayMs(kAudioSsrc, delay_ms);
+
+  // Check that calling GetLatency on the source that hasn't been started yet
+  // won't trigger caching.
+  EXPECT_DOUBLE_EQ(source->GetLatency(), 0);
+  source->Start(voice_media_channel_, kAudioSsrc);
+  EXPECT_DOUBLE_EQ(source->GetLatency(), delay_ms / 1000.0);
+}
+
+TEST_F(RtpSenderReceiverTest, RemoteAudioTrackSetLatency) {
+  CreateAudioRtpReceiver();
+
+  absl::optional<int> delay_ms;  // In milliseconds.
+  double latency_s = 0.5;        // In seconds.
+  audio_track_->GetSource()->SetLatency(latency_s);
+  delay_ms = voice_media_channel_->GetBaseMinimumPlayoutDelayMs(kAudioSsrc);
+  EXPECT_DOUBLE_EQ(latency_s, delay_ms.value_or(0) / 1000.0);
+
+  // Disabling the track should take no effect on previously set value.
+  audio_track_->set_enabled(false);
+  delay_ms = voice_media_channel_->GetBaseMinimumPlayoutDelayMs(kAudioSsrc);
+  EXPECT_DOUBLE_EQ(latency_s, delay_ms.value_or(0) / 1000.0);
+
+  // When the track is disabled, we still should be able to set latency.
+  latency_s = 0.3;
+  audio_track_->GetSource()->SetLatency(latency_s);
+  delay_ms = voice_media_channel_->GetBaseMinimumPlayoutDelayMs(kAudioSsrc);
+  EXPECT_DOUBLE_EQ(latency_s, delay_ms.value_or(0) / 1000.0);
+
+  // Enabling the track should take no effect on previously set value.
+  audio_track_->set_enabled(true);
+  delay_ms = voice_media_channel_->GetBaseMinimumPlayoutDelayMs(kAudioSsrc);
+  EXPECT_DOUBLE_EQ(latency_s, delay_ms.value_or(0) / 1000.0);
+
+  // We still should be able to change latency.
+  latency_s = 0.0;
+  audio_track_->GetSource()->SetLatency(latency_s);
+  delay_ms = voice_media_channel_->GetBaseMinimumPlayoutDelayMs(kAudioSsrc);
+  EXPECT_EQ(0, delay_ms.value_or(-1));
+  EXPECT_DOUBLE_EQ(latency_s, delay_ms.value_or(0) / 1000.0);
+}
+
 // Test that the media channel isn't enabled for sending if the audio sender
 // doesn't have both a track and SSRC.
 TEST_F(RtpSenderReceiverTest, AudioSenderWithoutTrackAndSsrc) {