Fix the chain that propagates the audio frame's rtp and ntp timestamp including:
* In AudioCodingModuleImpl::PlayoutData10Ms, don't reset the timestamp got from GetAudio.
* When there're more than one participant, set AudioFrame's RTP timestamp to 0.
* Copy ntp_time_ms_ in AudioFrame::CopyFrom method.
* In RemixAndResample, pass src frame's timestamp_ and ntp_time_ms_ to the dst frame.
* Fix how |elapsed_time_ms| is computed in channel.cc by adding GetPlayoutFrequency.
Tweaks on ntp_time_ms_:
* Init ntp_time_ms_ to -1 in AudioFrame ctor.
* When there're more than one participant, set AudioFrame's ntp_time_ms_ to an invalid value. I.e. we don't support ntp_time_ms_ in multiple participants case before the mixing is moved to chrome.
Added elapsed_time_ms to AudioFrame and pass it to chrome, where we don't have the information about the rtp timestmp's sample rate, i.e. can't convert rtp timestamp to ms.
BUG=3111
R=henrik.lundin@webrtc.org, turaj@webrtc.org, xians@webrtc.org
TBR=andrew
andrew to take another look on audio_conference_mixer_impl.cc
Review URL: https://webrtc-codereview.appspot.com/14559004
git-svn-id: http://webrtc.googlecode.com/svn/trunk@6346 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
index 613491a..052bd4f 100644
--- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
@@ -475,10 +475,17 @@
call_stats_.DecodedByNetEq(audio_frame->speech_type_);
// Computes the RTP timestamp of the first sample in |audio_frame| from
- // |PlayoutTimestamp|, which is the timestamp of the last sample of
+ // |GetPlayoutTimestamp|, which is the timestamp of the last sample of
// |audio_frame|.
- audio_frame->timestamp_ =
- PlayoutTimestamp() - audio_frame->samples_per_channel_;
+ uint32_t playout_timestamp = 0;
+ if (GetPlayoutTimestamp(&playout_timestamp)) {
+ audio_frame->timestamp_ =
+ playout_timestamp - audio_frame->samples_per_channel_;
+ } else {
+ // Remain 0 until we have a valid |playout_timestamp|.
+ audio_frame->timestamp_ = 0;
+ }
+
return 0;
}
@@ -596,13 +603,14 @@
id_ = id;
}
-uint32_t AcmReceiver::PlayoutTimestamp() {
+bool AcmReceiver::GetPlayoutTimestamp(uint32_t* timestamp) {
if (av_sync_) {
assert(initial_delay_manager_.get());
- if (initial_delay_manager_->buffering())
- return initial_delay_manager_->playout_timestamp();
+ if (initial_delay_manager_->buffering()) {
+ return initial_delay_manager_->GetPlayoutTimestamp(timestamp);
+ }
}
- return neteq_->PlayoutTimestamp();
+ return neteq_->GetPlayoutTimestamp(timestamp);
}
int AcmReceiver::last_audio_codec_id() const {
diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver.h b/webrtc/modules/audio_coding/main/acm2/acm_receiver.h
index 7a238ae..748744a 100644
--- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.h
+++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.h
@@ -242,9 +242,10 @@
void set_id(int id); // TODO(turajs): can be inline.
//
- // Returns the RTP timestamp of the last sample delivered by GetAudio().
+ // Gets the RTP timestamp of the last sample delivered by GetAudio().
+ // Returns true if the RTP timestamp is valid, otherwise false.
//
- uint32_t PlayoutTimestamp();
+ bool GetPlayoutTimestamp(uint32_t* timestamp);
//
// Return the index of the codec associated with the last non-CNG/non-DTMF
diff --git a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc
index 7f6c8405..a07e854 100644
--- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc
+++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc
@@ -1776,7 +1776,6 @@
}
audio_frame->id_ = id_;
- audio_frame->timestamp_ = 0;
return 0;
}
@@ -1917,8 +1916,7 @@
}
int AudioCodingModuleImpl::PlayoutTimestamp(uint32_t* timestamp) {
- *timestamp = receiver_.PlayoutTimestamp();
- return 0;
+ return receiver_.GetPlayoutTimestamp(timestamp) ? 0 : -1;
}
bool AudioCodingModuleImpl::HaveValidEncoder(const char* caller_name) const {
diff --git a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc
index c2b218c..786fb2e 100644
--- a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc
+++ b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc
@@ -219,6 +219,14 @@
return;
}
+bool InitialDelayManager::GetPlayoutTimestamp(uint32_t* playout_timestamp) {
+ if (!buffering_) {
+ return false;
+ }
+ *playout_timestamp = playout_timestamp_;
+ return true;
+}
+
void InitialDelayManager::DisableBuffering() {
buffering_ = false;
}
diff --git a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h
index 3c5ba3c..6edc115 100644
--- a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h
+++ b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h
@@ -65,8 +65,9 @@
// sequence of late (or perhaps missing) packets is computed.
void LatePackets(uint32_t timestamp_now, SyncStream* sync_stream);
- // Playout timestamp, valid when buffering.
- uint32_t playout_timestamp() { return playout_timestamp_; }
+ // Get playout timestamp.
+ // Returns true if the timestamp is valid (when buffering), otherwise false.
+ bool GetPlayoutTimestamp(uint32_t* playout_timestamp);
// True if buffered audio is less than the given initial delay (specified at
// the constructor). Buffering might be disabled by the client of this class.
diff --git a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc
index 15e88a5..38b7cfc 100644
--- a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc
+++ b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc
@@ -359,7 +359,9 @@
EXPECT_TRUE(manager_->buffering());
const uint32_t expected_playout_timestamp = rtp_info_.header.timestamp -
kInitDelayMs * kSamplingRateHz / 1000;
- EXPECT_EQ(expected_playout_timestamp, manager_->playout_timestamp());
+ uint32_t actual_playout_timestamp = 0;
+ EXPECT_TRUE(manager_->GetPlayoutTimestamp(&actual_playout_timestamp));
+ EXPECT_EQ(expected_playout_timestamp, actual_playout_timestamp);
NextRtpHeader(&rtp_info_, &rtp_receive_timestamp_);
}
diff --git a/webrtc/modules/audio_coding/neteq4/interface/neteq.h b/webrtc/modules/audio_coding/neteq4/interface/neteq.h
index 763da31..79a5dfb 100644
--- a/webrtc/modules/audio_coding/neteq4/interface/neteq.h
+++ b/webrtc/modules/audio_coding/neteq4/interface/neteq.h
@@ -228,8 +228,9 @@
// Disables post-decode VAD.
virtual void DisableVad() = 0;
- // Returns the RTP timestamp for the last sample delivered by GetAudio().
- virtual uint32_t PlayoutTimestamp() = 0;
+ // Gets the RTP timestamp for the last sample delivered by GetAudio().
+ // Returns true if the RTP timestamp is valid, otherwise false.
+ virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0;
// Not implemented.
virtual int SetTargetNumberOfChannels() = 0;
diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc
index 963a820..f860766 100644
--- a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc
@@ -335,9 +335,15 @@
vad_->Disable();
}
-uint32_t NetEqImpl::PlayoutTimestamp() {
+bool NetEqImpl::GetPlayoutTimestamp(uint32_t* timestamp) {
CriticalSectionScoped lock(crit_sect_.get());
- return timestamp_scaler_->ToExternal(playout_timestamp_);
+ if (first_packet_) {
+ // We don't have a valid RTP timestamp until we have decoded our first
+ // RTP packet.
+ return false;
+ }
+ *timestamp = timestamp_scaler_->ToExternal(playout_timestamp_);
+ return true;
}
int NetEqImpl::LastError() {
diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.h b/webrtc/modules/audio_coding/neteq4/neteq_impl.h
index 751de66..822a523 100644
--- a/webrtc/modules/audio_coding/neteq4/neteq_impl.h
+++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.h
@@ -166,8 +166,7 @@
// Disables post-decode VAD.
virtual void DisableVad();
- // Returns the RTP timestamp for the last sample delivered by GetAudio().
- virtual uint32_t PlayoutTimestamp();
+ virtual bool GetPlayoutTimestamp(uint32_t* timestamp);
virtual int SetTargetNumberOfChannels() { return kNotImplemented; }
diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc
index aedd8d5..26279aa 100644
--- a/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc
@@ -477,8 +477,10 @@
// The value of the last of the output samples is the same as the number of
// samples played from the decoded packet. Thus, this number + the RTP
// timestamp should match the playout timestamp.
+ uint32_t timestamp = 0;
+ EXPECT_TRUE(neteq_->GetPlayoutTimestamp(×tamp));
EXPECT_EQ(rtp_header.header.timestamp + output[samples_per_channel - 1],
- neteq_->PlayoutTimestamp());
+ timestamp);
// Check the timestamp for the last value in the sync buffer. This should
// be one full frame length ahead of the RTP timestamp.
diff --git a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc
index f66a3cf..c1a7e16 100644
--- a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc
@@ -228,6 +228,8 @@
void DuplicateCng();
+ uint32_t PlayoutTimestamp();
+
NetEq* neteq_;
FILE* rtp_fp_;
unsigned int sim_clock_;
@@ -736,7 +738,7 @@
}
EXPECT_EQ(kOutputNormal, type);
- int32_t delay_before = timestamp - neteq_->PlayoutTimestamp();
+ int32_t delay_before = timestamp - PlayoutTimestamp();
// Insert CNG for 1 minute (= 60000 ms).
const int kCngPeriodMs = 100;
@@ -829,7 +831,7 @@
// Check that the speech starts again within reasonable time.
double time_until_speech_returns_ms = t_ms - speech_restart_time_ms;
EXPECT_LT(time_until_speech_returns_ms, max_time_to_speech_ms);
- int32_t delay_after = timestamp - neteq_->PlayoutTimestamp();
+ int32_t delay_after = timestamp - PlayoutTimestamp();
// Compare delay before and after, and make sure it differs less than 20 ms.
EXPECT_LE(delay_after, delay_before + delay_tolerance_ms * 16);
EXPECT_GE(delay_after, delay_before - delay_tolerance_ms * 16);
@@ -1310,7 +1312,7 @@
ASSERT_EQ(1, num_channels);
// Expect delay (in samples) to be less than 2 packets.
- EXPECT_LE(timestamp - neteq_->PlayoutTimestamp(),
+ EXPECT_LE(timestamp - PlayoutTimestamp(),
static_cast<uint32_t>(kSamples * 2));
}
// Make sure we have actually tested wrap-around.
@@ -1391,7 +1393,7 @@
kMaxBlockSize, out_data_, &out_len, &num_channels, &type));
ASSERT_EQ(kBlockSize16kHz, out_len);
EXPECT_EQ(kOutputCNG, type);
- EXPECT_EQ(timestamp - algorithmic_delay_samples, neteq_->PlayoutTimestamp());
+ EXPECT_EQ(timestamp - algorithmic_delay_samples, PlayoutTimestamp());
// Insert the same CNG packet again. Note that at this point it is old, since
// we have already decoded the first copy of it.
@@ -1406,7 +1408,7 @@
ASSERT_EQ(kBlockSize16kHz, out_len);
EXPECT_EQ(kOutputCNG, type);
EXPECT_EQ(timestamp - algorithmic_delay_samples,
- neteq_->PlayoutTimestamp());
+ PlayoutTimestamp());
}
// Insert speech again.
@@ -1422,7 +1424,13 @@
ASSERT_EQ(kBlockSize16kHz, out_len);
EXPECT_EQ(kOutputNormal, type);
EXPECT_EQ(timestamp + kSamples - algorithmic_delay_samples,
- neteq_->PlayoutTimestamp());
+ PlayoutTimestamp());
+}
+
+uint32_t NetEqDecodingTest::PlayoutTimestamp() {
+ uint32_t playout_timestamp = 0;
+ EXPECT_TRUE(neteq_->GetPlayoutTimestamp(&playout_timestamp));
+ return playout_timestamp;
}
TEST_F(NetEqDecodingTest, DiscardDuplicateCng) { DuplicateCng(); }
diff --git a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
index f3883c0..26ef3e8 100644
--- a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
+++ b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
@@ -651,6 +651,11 @@
_audioFramePool->PushMemory(audioFrame);
continue;
}
+ if (_participantList.size() != 1) {
+ // TODO(wu): Issue 3390, add support for multiple participants case.
+ audioFrame->ntp_time_ms_ = -1;
+ }
+
// TODO(henrike): this assert triggers in some test cases where SRTP is
// used which prevents NetEQ from making a VAD. Temporarily disable this
// assert until the problem is fixed on a higher level.
@@ -950,6 +955,16 @@
return 0;
}
+ if (audioFrameList->size() == 1) {
+ mixedAudio.timestamp_ = audioFrameList->front()->timestamp_;
+ mixedAudio.elapsed_time_ms_ = audioFrameList->front()->elapsed_time_ms_;
+ } else {
+ // TODO(wu): Issue 3390.
+ // Audio frame timestamp is only supported in one channel case.
+ mixedAudio.timestamp_ = 0;
+ mixedAudio.elapsed_time_ms_ = -1;
+ }
+
for (AudioFrameList::const_iterator iter = audioFrameList->begin();
iter != audioFrameList->end();
++iter) {
diff --git a/webrtc/modules/audio_device/audio_device_buffer.cc b/webrtc/modules/audio_device/audio_device_buffer.cc
index ed1bf20..42fdaad 100644
--- a/webrtc/modules/audio_device/audio_device_buffer.cc
+++ b/webrtc/modules/audio_device/audio_device_buffer.cc
@@ -548,15 +548,15 @@
if (_ptrCbAudioTransport)
{
uint32_t res(0);
- uint32_t rtp_timestamp = 0;
- int64_t ntp_time_ms = 0;
+ int64_t elapsed_time_ms = -1;
+ int64_t ntp_time_ms = -1;
res = _ptrCbAudioTransport->NeedMorePlayData(_playSamples,
playBytesPerSample,
playChannels,
playSampleRate,
&_playBuffer[0],
nSamplesOut,
- &rtp_timestamp,
+ &elapsed_time_ms,
&ntp_time_ms);
if (res != 0)
{
diff --git a/webrtc/modules/audio_device/include/audio_device_defines.h b/webrtc/modules/audio_device/include/audio_device_defines.h
index f65e3a8..56a584e 100644
--- a/webrtc/modules/audio_device/include/audio_device_defines.h
+++ b/webrtc/modules/audio_device/include/audio_device_defines.h
@@ -71,7 +71,7 @@
const uint32_t samplesPerSec,
void* audioSamples,
uint32_t& nSamplesOut,
- uint32_t* rtp_timestamp,
+ int64_t* elapsed_time_ms,
int64_t* ntp_time_ms) = 0;
// Method to pass captured data directly and unmixed to network channels.
@@ -128,7 +128,7 @@
virtual void PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels, int number_of_frames,
void* audio_data,
- uint32_t* rtp_timestamp,
+ int64_t* elapsed_time_ms,
int64_t* ntp_time_ms) {}
protected:
diff --git a/webrtc/modules/audio_device/test/audio_device_test_api.cc b/webrtc/modules/audio_device/test/audio_device_test_api.cc
index b10accb..011fc10 100644
--- a/webrtc/modules/audio_device/test/audio_device_test_api.cc
+++ b/webrtc/modules/audio_device/test/audio_device_test_api.cc
@@ -117,7 +117,7 @@
const uint32_t sampleRate,
void* audioSamples,
uint32_t& nSamplesOut,
- uint32_t* rtp_timestamp,
+ int64_t* elapsed_time_ms,
int64_t* ntp_time_ms) {
play_count_++;
if (play_count_ % 100 == 0) {
@@ -152,7 +152,7 @@
virtual void PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels, int number_of_frames,
void* audio_data,
- uint32_t* rtp_timestamp,
+ int64_t* elapsed_time_ms,
int64_t* ntp_time_ms) {}
private:
uint32_t rec_count_;
diff --git a/webrtc/modules/audio_device/test/func_test_manager.cc b/webrtc/modules/audio_device/test/func_test_manager.cc
index a51ebfb..2a19287 100644
--- a/webrtc/modules/audio_device/test/func_test_manager.cc
+++ b/webrtc/modules/audio_device/test/func_test_manager.cc
@@ -293,7 +293,7 @@
const uint32_t samplesPerSec,
void* audioSamples,
uint32_t& nSamplesOut,
- uint32_t* rtp_timestamp,
+ int64_t* elapsed_time_ms,
int64_t* ntp_time_ms)
{
if (_fullDuplex)
@@ -554,7 +554,7 @@
int number_of_channels,
int number_of_frames,
void* audio_data,
- uint32_t* rtp_timestamp,
+ int64_t* elapsed_time_ms,
int64_t* ntp_time_ms) {}
FuncTestManager::FuncTestManager() :
diff --git a/webrtc/modules/audio_device/test/func_test_manager.h b/webrtc/modules/audio_device/test/func_test_manager.h
index 1a1c2a5..5cb4f46 100644
--- a/webrtc/modules/audio_device/test/func_test_manager.h
+++ b/webrtc/modules/audio_device/test/func_test_manager.h
@@ -119,7 +119,7 @@
const uint32_t samplesPerSec,
void* audioSamples,
uint32_t& nSamplesOut,
- uint32_t* rtp_timestamp,
+ int64_t* elapsed_time_ms,
int64_t* ntp_time_ms);
virtual int OnDataAvailable(const int voe_channels[],
@@ -141,7 +141,7 @@
virtual void PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels, int number_of_frames,
void* audio_data,
- uint32_t* rtp_timestamp,
+ int64_t* elapsed_time_ms,
int64_t* ntp_time_ms);
AudioTransportImpl(AudioDeviceModule* audioDevice);
diff --git a/webrtc/modules/interface/module_common_types.h b/webrtc/modules/interface/module_common_types.h
index 9d00de7..2c94707 100644
--- a/webrtc/modules/interface/module_common_types.h
+++ b/webrtc/modules/interface/module_common_types.h
@@ -690,6 +690,9 @@
int id_;
// RTP timestamp of the first sample in the AudioFrame.
uint32_t timestamp_;
+ // Time since the first frame in milliseconds.
+ // -1 represents an uninitialized value.
+ int64_t elapsed_time_ms_;
// NTP time of the estimated capture time in local timebase in milliseconds.
// -1 represents an uninitialized value.
int64_t ntp_time_ms_;
@@ -720,6 +723,7 @@
// TODO(wu): Zero is a valid value for |timestamp_|. We should initialize
// to an invalid value, or add a new member to indicate invalidity.
timestamp_ = 0;
+ elapsed_time_ms_ = -1;
ntp_time_ms_ = -1;
samples_per_channel_ = 0;
sample_rate_hz_ = 0;
@@ -759,6 +763,8 @@
id_ = src.id_;
timestamp_ = src.timestamp_;
+ elapsed_time_ms_ = src.elapsed_time_ms_;
+ ntp_time_ms_ = src.ntp_time_ms_;
samples_per_channel_ = src.samples_per_channel_;
sample_rate_hz_ = src.sample_rate_hz_;
speech_type_ = src.speech_type_;