Delete VAD methods from AcmReceiver and move functionality inside NetEq
This change essentially does two things:
1. Remove the VAD-related methods from AcmReceiver. These are
EnableVad(), DisableVad(), and vad_enabled(). None of them were used
outside of unit tests.
2. Move the functionality to set AudioFrame::speech_type_ and
AudioFrame::vad_activity_ inside NetEq. This was previously done in
AcmReceiver, but based on information inherently owned by NetEq.
With the change in 2, NetEq's GetAudio interface can be simplified by
removing the output type parameter. This will be done in a follow-up
CL.
BUG=webrtc:5607
Review URL: https://codereview.webrtc.org/1772583002
Cr-Commit-Position: refs/heads/master@{#11902}
diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
index 02d165a..1990768 100644
--- a/webrtc/modules/audio_coding/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
@@ -35,77 +35,6 @@
namespace {
-// |vad_activity_| field of |audio_frame| is set to |previous_audio_activity_|
-// before the call to this function.
-void SetAudioFrameActivityAndType(bool vad_enabled,
- NetEqOutputType type,
- AudioFrame* audio_frame) {
- if (vad_enabled) {
- switch (type) {
- case kOutputNormal: {
- audio_frame->vad_activity_ = AudioFrame::kVadActive;
- audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
- break;
- }
- case kOutputVADPassive: {
- audio_frame->vad_activity_ = AudioFrame::kVadPassive;
- audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
- break;
- }
- case kOutputCNG: {
- audio_frame->vad_activity_ = AudioFrame::kVadPassive;
- audio_frame->speech_type_ = AudioFrame::kCNG;
- break;
- }
- case kOutputPLC: {
- // Don't change |audio_frame->vad_activity_|, it should be the same as
- // |previous_audio_activity_|.
- audio_frame->speech_type_ = AudioFrame::kPLC;
- break;
- }
- case kOutputPLCtoCNG: {
- audio_frame->vad_activity_ = AudioFrame::kVadPassive;
- audio_frame->speech_type_ = AudioFrame::kPLCCNG;
- break;
- }
- default:
- assert(false);
- }
- } else {
- // Always return kVadUnknown when receive VAD is inactive
- audio_frame->vad_activity_ = AudioFrame::kVadUnknown;
- switch (type) {
- case kOutputNormal: {
- audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
- break;
- }
- case kOutputCNG: {
- audio_frame->speech_type_ = AudioFrame::kCNG;
- break;
- }
- case kOutputPLC: {
- audio_frame->speech_type_ = AudioFrame::kPLC;
- break;
- }
- case kOutputPLCtoCNG: {
- audio_frame->speech_type_ = AudioFrame::kPLCCNG;
- break;
- }
- case kOutputVADPassive: {
- // Normally, we should no get any VAD decision if post-decoding VAD is
- // not active. However, if post-decoding VAD has been active then
- // disabled, we might be here for couple of frames.
- audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
- LOG(WARNING) << "Post-decoding VAD is disabled but output is "
- << "labeled VAD-passive";
- break;
- }
- default:
- assert(false);
- }
- }
-}
-
// Is the given codec a CNG codec?
// TODO(kwiberg): Move to RentACodec.
bool IsCng(int codec_id) {
@@ -120,10 +49,8 @@
AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
: last_audio_decoder_(nullptr),
- previous_audio_activity_(AudioFrame::kVadPassive),
last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
neteq_(NetEq::Create(config.neteq_config)),
- vad_enabled_(config.neteq_config.enable_post_decode_vad),
clock_(config.clock),
resampled_last_output_frame_(true) {
assert(clock_);
@@ -264,10 +191,6 @@
sizeof(int16_t) * audio_frame->samples_per_channel_ *
audio_frame->num_channels_);
- // Should set |vad_activity| before calling SetAudioFrameActivityAndType().
- audio_frame->vad_activity_ = previous_audio_activity_;
- SetAudioFrameActivityAndType(vad_enabled_, type, audio_frame);
- previous_audio_activity_ = audio_frame->vad_activity_;
call_stats_.DecodedByNetEq(audio_frame->speech_type_);
// Computes the RTP timestamp of the first sample in |audio_frame| from
@@ -351,18 +274,6 @@
return 0;
}
-void AcmReceiver::EnableVad() {
- neteq_->EnableVad();
- rtc::CritScope lock(&crit_sect_);
- vad_enabled_ = true;
-}
-
-void AcmReceiver::DisableVad() {
- neteq_->DisableVad();
- rtc::CritScope lock(&crit_sect_);
- vad_enabled_ = false;
-}
-
void AcmReceiver::FlushBuffers() {
neteq_->FlushBuffers();
}
diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.h b/webrtc/modules/audio_coding/acm2/acm_receiver.h
index ae3969b..77eb563 100644
--- a/webrtc/modules/audio_coding/acm2/acm_receiver.h
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver.h
@@ -175,21 +175,6 @@
void GetNetworkStatistics(NetworkStatistics* statistics);
//
- // Enable post-decoding VAD.
- //
- void EnableVad();
-
- //
- // Disable post-decoding VAD.
- //
- void DisableVad();
-
- //
- // Returns whether post-decoding VAD is enabled (true) or disabled (false).
- //
- bool vad_enabled() const { return vad_enabled_; }
-
- //
// Flushes the NetEq packet and speech buffers.
//
void FlushBuffers();
@@ -278,14 +263,12 @@
rtc::CriticalSection crit_sect_;
const Decoder* last_audio_decoder_ GUARDED_BY(crit_sect_);
- AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_);
ACMResampler resampler_ GUARDED_BY(crit_sect_);
std::unique_ptr<int16_t[]> last_audio_buffer_ GUARDED_BY(crit_sect_);
CallStatistics call_stats_ GUARDED_BY(crit_sect_);
NetEq* neteq_;
// Decoders map is keyed by payload type
std::map<uint8_t, Decoder> decoders_ GUARDED_BY(crit_sect_);
- bool vad_enabled_;
Clock* clock_; // TODO(henrik.lundin) Make const if possible.
bool resampled_last_output_frame_ GUARDED_BY(crit_sect_);
rtc::Optional<int> last_packet_sample_rate_hz_ GUARDED_BY(crit_sect_);
diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver_unittest_oldapi.cc b/webrtc/modules/audio_coding/acm2/acm_receiver_unittest_oldapi.cc
index a0f4e0e..a26b2e2 100644
--- a/webrtc/modules/audio_coding/acm2/acm_receiver_unittest_oldapi.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver_unittest_oldapi.cc
@@ -58,14 +58,13 @@
packet_sent_(false),
last_packet_send_timestamp_(timestamp_),
last_frame_type_(kEmptyFrame) {
- AudioCodingModule::Config config;
- acm_.reset(new AudioCodingModuleImpl(config));
- receiver_.reset(new AcmReceiver(config));
}
~AcmReceiverTestOldApi() {}
void SetUp() override {
+ acm_.reset(new AudioCodingModuleImpl(config_));
+ receiver_.reset(new AcmReceiver(config_));
ASSERT_TRUE(receiver_.get() != NULL);
ASSERT_TRUE(acm_.get() != NULL);
codecs_ = RentACodec::Database();
@@ -153,6 +152,7 @@
return 0;
}
+ AudioCodingModule::Config config_;
std::unique_ptr<AcmReceiver> receiver_;
rtc::ArrayView<const CodecInst> codecs_;
std::unique_ptr<AudioCodingModule> acm_;
@@ -295,8 +295,7 @@
#define MAYBE_PostdecodingVad PostdecodingVad
#endif
TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) {
- receiver_->EnableVad();
- EXPECT_TRUE(receiver_->vad_enabled());
+ EXPECT_TRUE(config_.neteq_config.enable_post_decode_vad);
const CodecIdInst codec(RentACodec::CodecId::kPCM16Bwb);
ASSERT_EQ(
0, receiver_->AddCodec(codec.id, codec.inst.pltype, codec.inst.channels,
@@ -310,10 +309,29 @@
ASSERT_EQ(0, receiver_->GetAudio(codec.inst.plfreq, &frame));
}
EXPECT_EQ(AudioFrame::kVadPassive, frame.vad_activity_);
+}
- receiver_->DisableVad();
- EXPECT_FALSE(receiver_->vad_enabled());
+class AcmReceiverTestPostDecodeVadPassiveOldApi : public AcmReceiverTestOldApi {
+ protected:
+ AcmReceiverTestPostDecodeVadPassiveOldApi() {
+ config_.neteq_config.enable_post_decode_vad = false;
+ }
+};
+#if defined(WEBRTC_ANDROID)
+#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad
+#else
+#define MAYBE_PostdecodingVad PostdecodingVad
+#endif
+TEST_F(AcmReceiverTestPostDecodeVadPassiveOldApi, MAYBE_PostdecodingVad) {
+ EXPECT_FALSE(config_.neteq_config.enable_post_decode_vad);
+ const CodecIdInst codec(RentACodec::CodecId::kPCM16Bwb);
+ ASSERT_EQ(
+ 0, receiver_->AddCodec(codec.id, codec.inst.pltype, codec.inst.channels,
+ codec.inst.plfreq, nullptr, ""));
+ const int kNumPackets = 5;
+ const int num_10ms_frames = codec.inst.pacsize / (codec.inst.plfreq / 100);
+ AudioFrame frame;
for (int n = 0; n < kNumPackets; ++n) {
InsertOnePacketOfSilence(codec.id);
for (int k = 0; k < num_10ms_frames; ++k)
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
index f899d07..fc74f2d 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@@ -148,6 +148,49 @@
return kOK;
}
+namespace {
+void SetAudioFrameActivityAndType(bool vad_enabled,
+ NetEqOutputType type,
+ AudioFrame::VADActivity last_vad_activity,
+ AudioFrame* audio_frame) {
+ switch (type) {
+ case kOutputNormal: {
+ audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
+ audio_frame->vad_activity_ = AudioFrame::kVadActive;
+ break;
+ }
+ case kOutputVADPassive: {
+ // This should only be reached if the VAD is enabled.
+ RTC_DCHECK(vad_enabled);
+ audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
+ audio_frame->vad_activity_ = AudioFrame::kVadPassive;
+ break;
+ }
+ case kOutputCNG: {
+ audio_frame->speech_type_ = AudioFrame::kCNG;
+ audio_frame->vad_activity_ = AudioFrame::kVadPassive;
+ break;
+ }
+ case kOutputPLC: {
+ audio_frame->speech_type_ = AudioFrame::kPLC;
+ audio_frame->vad_activity_ = last_vad_activity;
+ break;
+ }
+ case kOutputPLCtoCNG: {
+ audio_frame->speech_type_ = AudioFrame::kPLCCNG;
+ audio_frame->vad_activity_ = AudioFrame::kVadPassive;
+ break;
+ }
+ default:
+ RTC_NOTREACHED();
+ }
+ if (!vad_enabled) {
+ // Always set kVadUnknown when receive VAD is inactive.
+ audio_frame->vad_activity_ = AudioFrame::kVadUnknown;
+ }
+}
+}
+
int NetEqImpl::GetAudio(AudioFrame* audio_frame, NetEqOutputType* type) {
TRACE_EVENT0("webrtc", "NetEqImpl::GetAudio");
rtc::CritScope lock(&crit_sect_);
@@ -162,6 +205,9 @@
if (type) {
*type = LastOutputType();
}
+ SetAudioFrameActivityAndType(vad_->enabled(), LastOutputType(),
+ last_vad_activity_, audio_frame);
+ last_vad_activity_ = audio_frame->vad_activity_;
last_output_sample_rate_hz_ = audio_frame->sample_rate_hz_;
RTC_DCHECK(last_output_sample_rate_hz_ == 8000 ||
last_output_sample_rate_hz_ == 16000 ||
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.h b/webrtc/modules/audio_coding/neteq/neteq_impl.h
index 4575864..12cb6f4 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.h
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h
@@ -379,6 +379,8 @@
bool enable_fast_accelerate_ GUARDED_BY(crit_sect_);
std::unique_ptr<Nack> nack_ GUARDED_BY(crit_sect_);
bool nack_enabled_ GUARDED_BY(crit_sect_);
+ AudioFrame::VADActivity last_vad_activity_ GUARDED_BY(crit_sect_) =
+ AudioFrame::kVadPassive;
private:
RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);