Avoid flagging Opus DTX frames as speech.
Background: After 20 consecutive DTX frames, Opus encodes the background
noise in a normal frame and then goes back to outputting DTX frames.
Currently all Opus frames are flagged as containing speech.
This CL is has two effects on outgoing Opus packets:
1. DTX frames are flagged as non-speech.
2. A non-DTX frame that follows 20 consecutive DTX frames is flagged as
non-speech.
Bug: webrtc:8088
Change-Id: Ic36cf8c9d0a34f55ed4e57858362ad91e3897dda
Reviewed-on: https://webrtc-review.googlesource.com/23760
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20794}
diff --git a/modules/audio_coding/codecs/opus/audio_encoder_opus.cc b/modules/audio_coding/codecs/opus/audio_encoder_opus.cc
index 5a5ac34..f07cd42 100644
--- a/modules/audio_coding/codecs/opus/audio_encoder_opus.cc
+++ b/modules/audio_coding/codecs/opus/audio_encoder_opus.cc
@@ -380,7 +380,8 @@
inst_(nullptr),
packet_loss_fraction_smoother_(new PacketLossFractionSmoother()),
audio_network_adaptor_creator_(audio_network_adaptor_creator),
- bitrate_smoother_(std::move(bitrate_smoother)) {
+ bitrate_smoother_(std::move(bitrate_smoother)),
+ consecutive_dtx_frames_(0) {
RTC_DCHECK(0 <= payload_type && payload_type <= 127);
// Sanity check of the redundant payload type field that we want to get rid
@@ -603,14 +604,23 @@
});
input_buffer_.clear();
+ bool dtx_frame = (info.encoded_bytes <= 2);
+
// Will use new packet size for next encoding.
config_.frame_size_ms = next_frame_length_ms_;
info.encoded_timestamp = first_timestamp_in_buffer_;
info.payload_type = payload_type_;
info.send_even_if_empty = true; // Allows Opus to send empty packets.
- info.speech = (info.encoded_bytes > 0);
+ // After 20 DTX frames (MAX_CONSECUTIVE_DTX) Opus will send a frame
+ // coding the background noise. Avoid flagging this frame as speech
+ // (even though there is a probability of the frame being speech).
+ info.speech = !dtx_frame && (consecutive_dtx_frames_ != 20);
info.encoder_type = CodecType::kOpus;
+
+ // Increase or reset DTX counter.
+ consecutive_dtx_frames_ = (dtx_frame) ? (consecutive_dtx_frames_ + 1) : (0);
+
return info;
}
diff --git a/modules/audio_coding/codecs/opus/audio_encoder_opus.h b/modules/audio_coding/codecs/opus/audio_encoder_opus.h
index 8e51dbd..22967c4 100644
--- a/modules/audio_coding/codecs/opus/audio_encoder_opus.h
+++ b/modules/audio_coding/codecs/opus/audio_encoder_opus.h
@@ -161,6 +161,7 @@
rtc::Optional<size_t> overhead_bytes_per_packet_;
const std::unique_ptr<SmoothingFilter> bitrate_smoother_;
rtc::Optional<int64_t> bitrate_smoother_last_update_time_;
+ int consecutive_dtx_frames_;
friend struct AudioEncoderOpus;
RTC_DISALLOW_COPY_AND_ASSIGN(AudioEncoderOpusImpl);
diff --git a/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc b/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc
index 868de8c..c3ad488 100644
--- a/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc
+++ b/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc
@@ -753,4 +753,66 @@
EXPECT_EQ(64000, config.bitrate_bps);
}
+TEST(AudioEncoderOpusTest, OpusFlagDtxAsNonSpeech) {
+ // Create encoder with DTX enabled.
+ AudioEncoderOpusConfig config;
+ config.dtx_enabled = true;
+ constexpr int payload_type = 17;
+ const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type);
+
+ // Open file containing speech and silence.
+ const std::string kInputFileName =
+ webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
+ test::AudioLoop audio_loop;
+ // Use the file as if it were sampled at 48 kHz.
+ constexpr int kSampleRateHz = 48000;
+ EXPECT_EQ(kSampleRateHz, encoder->SampleRateHz());
+ constexpr size_t kMaxLoopLengthSamples =
+ kSampleRateHz * 10; // Max 10 second loop.
+ constexpr size_t kInputBlockSizeSamples =
+ 10 * kSampleRateHz / 1000; // 10 ms.
+ EXPECT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSamples,
+ kInputBlockSizeSamples));
+
+ // Encode.
+ AudioEncoder::EncodedInfo info;
+ rtc::Buffer encoded(500);
+ int nonspeech_frames = 0;
+ int max_nonspeech_frames = 0;
+ int dtx_frames = 0;
+ int max_dtx_frames = 0;
+ uint32_t rtp_timestamp = 0u;
+ for (size_t i = 0; i < 500; ++i) {
+ encoded.Clear();
+
+ // Every second call to the encoder will generate an Opus packet.
+ for (int j = 0; j < 2; j++) {
+ info =
+ encoder->Encode(rtp_timestamp, audio_loop.GetNextBlock(), &encoded);
+ rtp_timestamp += kInputBlockSizeSamples;
+ }
+
+ // Bookkeeping of number of DTX frames.
+ if (info.encoded_bytes <= 2) {
+ ++dtx_frames;
+ } else {
+ if (dtx_frames > max_dtx_frames)
+ max_dtx_frames = dtx_frames;
+ dtx_frames = 0;
+ }
+
+ // Bookkeeping of number of non-speech frames.
+ if (info.speech == 0) {
+ ++nonspeech_frames;
+ } else {
+ if (nonspeech_frames > max_nonspeech_frames)
+ max_nonspeech_frames = nonspeech_frames;
+ nonspeech_frames = 0;
+ }
+ }
+
+ // Maximum number of consecutive non-speech packets should exceed 20.
+ EXPECT_GT(max_nonspeech_frames, 20);
+}
+
} // namespace webrtc
diff --git a/modules/audio_coding/test/TestVADDTX.cc b/modules/audio_coding/test/TestVADDTX.cc
index 628582d..8064448 100644
--- a/modules/audio_coding/test/TestVADDTX.cc
+++ b/modules/audio_coding/test/TestVADDTX.cc
@@ -257,6 +257,7 @@
EXPECT_EQ(0, acm_send_->EnableOpusDtx());
expects[kEmptyFrame] = 1;
+ expects[kAudioFrameCN] = 1;
Run(webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
32000, 1, out_filename, true, expects);
@@ -265,12 +266,14 @@
RegisterCodec(kOpusStereo);
EXPECT_EQ(0, acm_send_->DisableOpusDtx());
expects[kEmptyFrame] = 0;
+ expects[kAudioFrameCN] = 0;
Run(webrtc::test::ResourcePath("audio_coding/teststereo32kHz", "pcm"),
32000, 2, out_filename, false, expects);
EXPECT_EQ(0, acm_send_->EnableOpusDtx());
expects[kEmptyFrame] = 1;
+ expects[kAudioFrameCN] = 1;
Run(webrtc::test::ResourcePath("audio_coding/teststereo32kHz", "pcm"),
32000, 2, out_filename, true, expects);
#endif