Fix the chain that propagates the audio frame's rtp and ntp timestamp including: * In AudioCodingModuleImpl::PlayoutData10Ms, don't reset the timestamp got from GetAudio. * When there're more than one participant, set AudioFrame's RTP timestamp to 0. * Copy ntp_time_ms_ in AudioFrame::CopyFrom method. * In RemixAndResample, pass src frame's timestamp_ and ntp_time_ms_ to the dst frame. * Fix how |elapsed_time_ms| is computed in channel.cc by adding GetPlayoutFrequency. Tweaks on ntp_time_ms_: * Init ntp_time_ms_ to -1 in AudioFrame ctor. * When there're more than one participant, set AudioFrame's ntp_time_ms_ to an invalid value. I.e. we don't support ntp_time_ms_ in multiple participants case before the mixing is moved to chrome. Added elapsed_time_ms to AudioFrame and pass it to chrome, where we don't have the information about the rtp timestmp's sample rate, i.e. can't convert rtp timestamp to ms. BUG=3111 R=henrik.lundin@webrtc.org, turaj@webrtc.org, xians@webrtc.org TBR=andrew andrew to take another look on audio_conference_mixer_impl.cc Review URL: https://webrtc-codereview.appspot.com/14559004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@6346 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc index 613491a..052bd4f 100644 --- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc +++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
@@ -475,10 +475,17 @@ call_stats_.DecodedByNetEq(audio_frame->speech_type_); // Computes the RTP timestamp of the first sample in |audio_frame| from - // |PlayoutTimestamp|, which is the timestamp of the last sample of + // |GetPlayoutTimestamp|, which is the timestamp of the last sample of // |audio_frame|. - audio_frame->timestamp_ = - PlayoutTimestamp() - audio_frame->samples_per_channel_; + uint32_t playout_timestamp = 0; + if (GetPlayoutTimestamp(&playout_timestamp)) { + audio_frame->timestamp_ = + playout_timestamp - audio_frame->samples_per_channel_; + } else { + // Remain 0 until we have a valid |playout_timestamp|. + audio_frame->timestamp_ = 0; + } + return 0; } @@ -596,13 +603,14 @@ id_ = id; } -uint32_t AcmReceiver::PlayoutTimestamp() { +bool AcmReceiver::GetPlayoutTimestamp(uint32_t* timestamp) { if (av_sync_) { assert(initial_delay_manager_.get()); - if (initial_delay_manager_->buffering()) - return initial_delay_manager_->playout_timestamp(); + if (initial_delay_manager_->buffering()) { + return initial_delay_manager_->GetPlayoutTimestamp(timestamp); + } } - return neteq_->PlayoutTimestamp(); + return neteq_->GetPlayoutTimestamp(timestamp); } int AcmReceiver::last_audio_codec_id() const {
diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver.h b/webrtc/modules/audio_coding/main/acm2/acm_receiver.h index 7a238ae..748744a 100644 --- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.h +++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.h
@@ -242,9 +242,10 @@ void set_id(int id); // TODO(turajs): can be inline. // - // Returns the RTP timestamp of the last sample delivered by GetAudio(). + // Gets the RTP timestamp of the last sample delivered by GetAudio(). + // Returns true if the RTP timestamp is valid, otherwise false. // - uint32_t PlayoutTimestamp(); + bool GetPlayoutTimestamp(uint32_t* timestamp); // // Return the index of the codec associated with the last non-CNG/non-DTMF
diff --git a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc index 7f6c8405..a07e854 100644 --- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc +++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc
@@ -1776,7 +1776,6 @@ } audio_frame->id_ = id_; - audio_frame->timestamp_ = 0; return 0; } @@ -1917,8 +1916,7 @@ } int AudioCodingModuleImpl::PlayoutTimestamp(uint32_t* timestamp) { - *timestamp = receiver_.PlayoutTimestamp(); - return 0; + return receiver_.GetPlayoutTimestamp(timestamp) ? 0 : -1; } bool AudioCodingModuleImpl::HaveValidEncoder(const char* caller_name) const {
diff --git a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc index c2b218c..786fb2e 100644 --- a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc +++ b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.cc
@@ -219,6 +219,14 @@ return; } +bool InitialDelayManager::GetPlayoutTimestamp(uint32_t* playout_timestamp) { + if (!buffering_) { + return false; + } + *playout_timestamp = playout_timestamp_; + return true; +} + void InitialDelayManager::DisableBuffering() { buffering_ = false; }
diff --git a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h index 3c5ba3c..6edc115 100644 --- a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h +++ b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h
@@ -65,8 +65,9 @@ // sequence of late (or perhaps missing) packets is computed. void LatePackets(uint32_t timestamp_now, SyncStream* sync_stream); - // Playout timestamp, valid when buffering. - uint32_t playout_timestamp() { return playout_timestamp_; } + // Get playout timestamp. + // Returns true if the timestamp is valid (when buffering), otherwise false. + bool GetPlayoutTimestamp(uint32_t* playout_timestamp); // True if buffered audio is less than the given initial delay (specified at // the constructor). Buffering might be disabled by the client of this class.
diff --git a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc index 15e88a5..38b7cfc 100644 --- a/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc +++ b/webrtc/modules/audio_coding/main/acm2/initial_delay_manager_unittest.cc
@@ -359,7 +359,9 @@ EXPECT_TRUE(manager_->buffering()); const uint32_t expected_playout_timestamp = rtp_info_.header.timestamp - kInitDelayMs * kSamplingRateHz / 1000; - EXPECT_EQ(expected_playout_timestamp, manager_->playout_timestamp()); + uint32_t actual_playout_timestamp = 0; + EXPECT_TRUE(manager_->GetPlayoutTimestamp(&actual_playout_timestamp)); + EXPECT_EQ(expected_playout_timestamp, actual_playout_timestamp); NextRtpHeader(&rtp_info_, &rtp_receive_timestamp_); }
diff --git a/webrtc/modules/audio_coding/neteq4/interface/neteq.h b/webrtc/modules/audio_coding/neteq4/interface/neteq.h index 763da31..79a5dfb 100644 --- a/webrtc/modules/audio_coding/neteq4/interface/neteq.h +++ b/webrtc/modules/audio_coding/neteq4/interface/neteq.h
@@ -228,8 +228,9 @@ // Disables post-decode VAD. virtual void DisableVad() = 0; - // Returns the RTP timestamp for the last sample delivered by GetAudio(). - virtual uint32_t PlayoutTimestamp() = 0; + // Gets the RTP timestamp for the last sample delivered by GetAudio(). + // Returns true if the RTP timestamp is valid, otherwise false. + virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0; // Not implemented. virtual int SetTargetNumberOfChannels() = 0;
diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc index 963a820..f860766 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc +++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc
@@ -335,9 +335,15 @@ vad_->Disable(); } -uint32_t NetEqImpl::PlayoutTimestamp() { +bool NetEqImpl::GetPlayoutTimestamp(uint32_t* timestamp) { CriticalSectionScoped lock(crit_sect_.get()); - return timestamp_scaler_->ToExternal(playout_timestamp_); + if (first_packet_) { + // We don't have a valid RTP timestamp until we have decoded our first + // RTP packet. + return false; + } + *timestamp = timestamp_scaler_->ToExternal(playout_timestamp_); + return true; } int NetEqImpl::LastError() {
diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.h b/webrtc/modules/audio_coding/neteq4/neteq_impl.h index 751de66..822a523 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_impl.h +++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.h
@@ -166,8 +166,7 @@ // Disables post-decode VAD. virtual void DisableVad(); - // Returns the RTP timestamp for the last sample delivered by GetAudio(). - virtual uint32_t PlayoutTimestamp(); + virtual bool GetPlayoutTimestamp(uint32_t* timestamp); virtual int SetTargetNumberOfChannels() { return kNotImplemented; }
diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc index aedd8d5..26279aa 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc +++ b/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc
@@ -477,8 +477,10 @@ // The value of the last of the output samples is the same as the number of // samples played from the decoded packet. Thus, this number + the RTP // timestamp should match the playout timestamp. + uint32_t timestamp = 0; + EXPECT_TRUE(neteq_->GetPlayoutTimestamp(×tamp)); EXPECT_EQ(rtp_header.header.timestamp + output[samples_per_channel - 1], - neteq_->PlayoutTimestamp()); + timestamp); // Check the timestamp for the last value in the sync buffer. This should // be one full frame length ahead of the RTP timestamp.
diff --git a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc index f66a3cf..c1a7e16 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc +++ b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc
@@ -228,6 +228,8 @@ void DuplicateCng(); + uint32_t PlayoutTimestamp(); + NetEq* neteq_; FILE* rtp_fp_; unsigned int sim_clock_; @@ -736,7 +738,7 @@ } EXPECT_EQ(kOutputNormal, type); - int32_t delay_before = timestamp - neteq_->PlayoutTimestamp(); + int32_t delay_before = timestamp - PlayoutTimestamp(); // Insert CNG for 1 minute (= 60000 ms). const int kCngPeriodMs = 100; @@ -829,7 +831,7 @@ // Check that the speech starts again within reasonable time. double time_until_speech_returns_ms = t_ms - speech_restart_time_ms; EXPECT_LT(time_until_speech_returns_ms, max_time_to_speech_ms); - int32_t delay_after = timestamp - neteq_->PlayoutTimestamp(); + int32_t delay_after = timestamp - PlayoutTimestamp(); // Compare delay before and after, and make sure it differs less than 20 ms. EXPECT_LE(delay_after, delay_before + delay_tolerance_ms * 16); EXPECT_GE(delay_after, delay_before - delay_tolerance_ms * 16); @@ -1310,7 +1312,7 @@ ASSERT_EQ(1, num_channels); // Expect delay (in samples) to be less than 2 packets. - EXPECT_LE(timestamp - neteq_->PlayoutTimestamp(), + EXPECT_LE(timestamp - PlayoutTimestamp(), static_cast<uint32_t>(kSamples * 2)); } // Make sure we have actually tested wrap-around. @@ -1391,7 +1393,7 @@ kMaxBlockSize, out_data_, &out_len, &num_channels, &type)); ASSERT_EQ(kBlockSize16kHz, out_len); EXPECT_EQ(kOutputCNG, type); - EXPECT_EQ(timestamp - algorithmic_delay_samples, neteq_->PlayoutTimestamp()); + EXPECT_EQ(timestamp - algorithmic_delay_samples, PlayoutTimestamp()); // Insert the same CNG packet again. Note that at this point it is old, since // we have already decoded the first copy of it. @@ -1406,7 +1408,7 @@ ASSERT_EQ(kBlockSize16kHz, out_len); EXPECT_EQ(kOutputCNG, type); EXPECT_EQ(timestamp - algorithmic_delay_samples, - neteq_->PlayoutTimestamp()); + PlayoutTimestamp()); } // Insert speech again. @@ -1422,7 +1424,13 @@ ASSERT_EQ(kBlockSize16kHz, out_len); EXPECT_EQ(kOutputNormal, type); EXPECT_EQ(timestamp + kSamples - algorithmic_delay_samples, - neteq_->PlayoutTimestamp()); + PlayoutTimestamp()); +} + +uint32_t NetEqDecodingTest::PlayoutTimestamp() { + uint32_t playout_timestamp = 0; + EXPECT_TRUE(neteq_->GetPlayoutTimestamp(&playout_timestamp)); + return playout_timestamp; } TEST_F(NetEqDecodingTest, DiscardDuplicateCng) { DuplicateCng(); }
diff --git a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc index f3883c0..26ef3e8 100644 --- a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc +++ b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
@@ -651,6 +651,11 @@ _audioFramePool->PushMemory(audioFrame); continue; } + if (_participantList.size() != 1) { + // TODO(wu): Issue 3390, add support for multiple participants case. + audioFrame->ntp_time_ms_ = -1; + } + // TODO(henrike): this assert triggers in some test cases where SRTP is // used which prevents NetEQ from making a VAD. Temporarily disable this // assert until the problem is fixed on a higher level. @@ -950,6 +955,16 @@ return 0; } + if (audioFrameList->size() == 1) { + mixedAudio.timestamp_ = audioFrameList->front()->timestamp_; + mixedAudio.elapsed_time_ms_ = audioFrameList->front()->elapsed_time_ms_; + } else { + // TODO(wu): Issue 3390. + // Audio frame timestamp is only supported in one channel case. + mixedAudio.timestamp_ = 0; + mixedAudio.elapsed_time_ms_ = -1; + } + for (AudioFrameList::const_iterator iter = audioFrameList->begin(); iter != audioFrameList->end(); ++iter) {
diff --git a/webrtc/modules/audio_device/audio_device_buffer.cc b/webrtc/modules/audio_device/audio_device_buffer.cc index ed1bf20..42fdaad 100644 --- a/webrtc/modules/audio_device/audio_device_buffer.cc +++ b/webrtc/modules/audio_device/audio_device_buffer.cc
@@ -548,15 +548,15 @@ if (_ptrCbAudioTransport) { uint32_t res(0); - uint32_t rtp_timestamp = 0; - int64_t ntp_time_ms = 0; + int64_t elapsed_time_ms = -1; + int64_t ntp_time_ms = -1; res = _ptrCbAudioTransport->NeedMorePlayData(_playSamples, playBytesPerSample, playChannels, playSampleRate, &_playBuffer[0], nSamplesOut, - &rtp_timestamp, + &elapsed_time_ms, &ntp_time_ms); if (res != 0) {
diff --git a/webrtc/modules/audio_device/include/audio_device_defines.h b/webrtc/modules/audio_device/include/audio_device_defines.h index f65e3a8..56a584e 100644 --- a/webrtc/modules/audio_device/include/audio_device_defines.h +++ b/webrtc/modules/audio_device/include/audio_device_defines.h
@@ -71,7 +71,7 @@ const uint32_t samplesPerSec, void* audioSamples, uint32_t& nSamplesOut, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) = 0; // Method to pass captured data directly and unmixed to network channels. @@ -128,7 +128,7 @@ virtual void PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, void* audio_data, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) {} protected:
diff --git a/webrtc/modules/audio_device/test/audio_device_test_api.cc b/webrtc/modules/audio_device/test/audio_device_test_api.cc index b10accb..011fc10 100644 --- a/webrtc/modules/audio_device/test/audio_device_test_api.cc +++ b/webrtc/modules/audio_device/test/audio_device_test_api.cc
@@ -117,7 +117,7 @@ const uint32_t sampleRate, void* audioSamples, uint32_t& nSamplesOut, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) { play_count_++; if (play_count_ % 100 == 0) { @@ -152,7 +152,7 @@ virtual void PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, void* audio_data, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) {} private: uint32_t rec_count_;
diff --git a/webrtc/modules/audio_device/test/func_test_manager.cc b/webrtc/modules/audio_device/test/func_test_manager.cc index a51ebfb..2a19287 100644 --- a/webrtc/modules/audio_device/test/func_test_manager.cc +++ b/webrtc/modules/audio_device/test/func_test_manager.cc
@@ -293,7 +293,7 @@ const uint32_t samplesPerSec, void* audioSamples, uint32_t& nSamplesOut, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) { if (_fullDuplex) @@ -554,7 +554,7 @@ int number_of_channels, int number_of_frames, void* audio_data, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms) {} FuncTestManager::FuncTestManager() :
diff --git a/webrtc/modules/audio_device/test/func_test_manager.h b/webrtc/modules/audio_device/test/func_test_manager.h index 1a1c2a5..5cb4f46 100644 --- a/webrtc/modules/audio_device/test/func_test_manager.h +++ b/webrtc/modules/audio_device/test/func_test_manager.h
@@ -119,7 +119,7 @@ const uint32_t samplesPerSec, void* audioSamples, uint32_t& nSamplesOut, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms); virtual int OnDataAvailable(const int voe_channels[], @@ -141,7 +141,7 @@ virtual void PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, void* audio_data, - uint32_t* rtp_timestamp, + int64_t* elapsed_time_ms, int64_t* ntp_time_ms); AudioTransportImpl(AudioDeviceModule* audioDevice);
diff --git a/webrtc/modules/interface/module_common_types.h b/webrtc/modules/interface/module_common_types.h index 9d00de7..2c94707 100644 --- a/webrtc/modules/interface/module_common_types.h +++ b/webrtc/modules/interface/module_common_types.h
@@ -690,6 +690,9 @@ int id_; // RTP timestamp of the first sample in the AudioFrame. uint32_t timestamp_; + // Time since the first frame in milliseconds. + // -1 represents an uninitialized value. + int64_t elapsed_time_ms_; // NTP time of the estimated capture time in local timebase in milliseconds. // -1 represents an uninitialized value. int64_t ntp_time_ms_; @@ -720,6 +723,7 @@ // TODO(wu): Zero is a valid value for |timestamp_|. We should initialize // to an invalid value, or add a new member to indicate invalidity. timestamp_ = 0; + elapsed_time_ms_ = -1; ntp_time_ms_ = -1; samples_per_channel_ = 0; sample_rate_hz_ = 0; @@ -759,6 +763,8 @@ id_ = src.id_; timestamp_ = src.timestamp_; + elapsed_time_ms_ = src.elapsed_time_ms_; + ntp_time_ms_ = src.ntp_time_ms_; samples_per_channel_ = src.samples_per_channel_; sample_rate_hz_ = src.sample_rate_hz_; speech_type_ = src.speech_type_;