Revert "opus: take SILK vad result into account for voice detection"
This reverts commit 686a3709acfedcf0a4c798dd1c5902787c4a266b.
Reason for revert: crbug.com/1144220
Original change's description:
> opus: take SILK vad result into account for voice detection
>
> BUG=webrtc:11643
>
> Change-Id: Idc3a9b6bb7bd1a33f905843e5d6067ae19d5172c
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/176508
> Commit-Queue: Minyue Li <minyue@webrtc.org>
> Reviewed-by: Minyue Li <minyue@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#31743}
TBR=devicentepena@webrtc.org,minyue@webrtc.org,fippo@sip-communicator.org
Bug: webrtc:11643
Change-Id: I9c77e4f6e919c4b648a5783edf4188e1f8114602
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/191485
Commit-Queue: Minyue Li <minyue@webrtc.org>
Reviewed-by: Minyue Li <minyue@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32542}
diff --git a/modules/audio_coding/codecs/opus/audio_encoder_opus.cc b/modules/audio_coding/codecs/opus/audio_encoder_opus.cc
index 8d1a734..203cb5ae 100644
--- a/modules/audio_coding/codecs/opus/audio_encoder_opus.cc
+++ b/modules/audio_coding/codecs/opus/audio_encoder_opus.cc
@@ -367,7 +367,8 @@
inst_(nullptr),
packet_loss_fraction_smoother_(new PacketLossFractionSmoother()),
audio_network_adaptor_creator_(audio_network_adaptor_creator),
- bitrate_smoother_(std::move(bitrate_smoother)) {
+ bitrate_smoother_(std::move(bitrate_smoother)),
+ consecutive_dtx_frames_(0) {
RTC_DCHECK(0 <= payload_type && payload_type <= 127);
// Sanity check of the redundant payload type field that we want to get rid
@@ -589,7 +590,6 @@
Num10msFramesPerPacket() * SamplesPer10msFrame());
const size_t max_encoded_bytes = SufficientOutputBufferSize();
- const size_t start_offset_bytes = encoded->size();
EncodedInfo info;
info.encoded_bytes = encoded->AppendData(
max_encoded_bytes, [&](rtc::ArrayView<uint8_t> encoded) {
@@ -604,6 +604,8 @@
});
input_buffer_.clear();
+ bool dtx_frame = (info.encoded_bytes <= 2);
+
// Will use new packet size for next encoding.
config_.frame_size_ms = next_frame_length_ms_;
@@ -618,18 +620,14 @@
info.encoded_timestamp = first_timestamp_in_buffer_;
info.payload_type = payload_type_;
info.send_even_if_empty = true; // Allows Opus to send empty packets.
+ // After 20 DTX frames (MAX_CONSECUTIVE_DTX) Opus will send a frame
+ // coding the background noise. Avoid flagging this frame as speech
+ // (even though there is a probability of the frame being speech).
+ info.speech = !dtx_frame && (consecutive_dtx_frames_ != 20);
info.encoder_type = CodecType::kOpus;
- // Extract the VAD result from the encoded packet.
- int has_voice = WebRtcOpus_PacketHasVoiceActivity(
- &encoded->data()[start_offset_bytes], info.encoded_bytes);
- if (has_voice == -1) {
- // CELT mode packet or there was an error. This had set the speech flag to
- // true historically.
- info.speech = true;
- } else {
- info.speech = has_voice;
- }
+ // Increase or reset DTX counter.
+ consecutive_dtx_frames_ = (dtx_frame) ? (consecutive_dtx_frames_ + 1) : (0);
return info;
}
diff --git a/modules/audio_coding/codecs/opus/audio_encoder_opus.h b/modules/audio_coding/codecs/opus/audio_encoder_opus.h
index dc955ce..ab954fe 100644
--- a/modules/audio_coding/codecs/opus/audio_encoder_opus.h
+++ b/modules/audio_coding/codecs/opus/audio_encoder_opus.h
@@ -172,6 +172,7 @@
absl::optional<size_t> overhead_bytes_per_packet_;
const std::unique_ptr<SmoothingFilter> bitrate_smoother_;
absl::optional<int64_t> bitrate_smoother_last_update_time_;
+ int consecutive_dtx_frames_;
friend struct AudioEncoderOpus;
RTC_DISALLOW_COPY_AND_ASSIGN(AudioEncoderOpusImpl);
diff --git a/modules/audio_coding/codecs/opus/opus_interface.cc b/modules/audio_coding/codecs/opus/opus_interface.cc
index 455f175..ca39ed8 100644
--- a/modules/audio_coding/codecs/opus/opus_interface.cc
+++ b/modules/audio_coding/codecs/opus/opus_interface.cc
@@ -767,7 +767,7 @@
int silk_frames = WebRtcOpus_NumSilkFrames(payload);
if (silk_frames == 0)
- return 0;
+ return -1;
const int channels = opus_packet_get_nb_channels(payload);
RTC_DCHECK(channels == 1 || channels == 2);
diff --git a/modules/audio_coding/codecs/opus/opus_unittest.cc b/modules/audio_coding/codecs/opus/opus_unittest.cc
index 66ac5e7..80cab50 100644
--- a/modules/audio_coding/codecs/opus/opus_unittest.cc
+++ b/modules/audio_coding/codecs/opus/opus_unittest.cc
@@ -975,21 +975,4 @@
EXPECT_TRUE(WebRtcOpus_PacketHasVoiceActivity(twoMonoFrames, 3));
}
-TEST(OpusVadTest, DtxEmptyPacket) {
- const uint8_t dtx[] = {0x78};
- EXPECT_FALSE(WebRtcOpus_PacketHasVoiceActivity(dtx, 1));
-}
-
-TEST(OpusVadTest, DtxBackgroundNoisePacket) {
- // DTX sends a frame coding background noise every 20 packets:
- // https://tools.ietf.org/html/rfc6716#section-2.1.9
- // The packet below represents such a frame and was captured using
- // Wireshark while disabling encryption.
- const uint8_t dtx[] = {0x78, 0x07, 0xc9, 0x79, 0xc8, 0xc9, 0x57, 0xc0, 0xa2,
- 0x12, 0x23, 0xfa, 0xef, 0x67, 0xf3, 0x2e, 0xe3, 0xd3,
- 0xd5, 0xe9, 0xec, 0xdb, 0x3e, 0xbc, 0x80, 0xb6, 0x6e,
- 0x2a, 0xb7, 0x8c, 0x83, 0xcd, 0x83, 0xcd, 0x00};
- EXPECT_FALSE(WebRtcOpus_PacketHasVoiceActivity(dtx, 35));
-}
-
} // namespace webrtc
diff --git a/modules/audio_coding/test/TestVADDTX.cc b/modules/audio_coding/test/TestVADDTX.cc
index dce5433..17baef6 100644
--- a/modules/audio_coding/test/TestVADDTX.cc
+++ b/modules/audio_coding/test/TestVADDTX.cc
@@ -166,13 +166,11 @@
int i = &st - stats; // Calculate the current position in stats.
switch (expects[i]) {
case 0: {
- EXPECT_EQ(0u, st) << "stats[" << i << "] error. Output file "
- << out_filename;
+ EXPECT_EQ(0u, st) << "stats[" << i << "] error.";
break;
}
case 1: {
- EXPECT_GT(st, 0u) << "stats[" << i << "] error. Output file "
- << out_filename;
+ EXPECT_GT(st, 0u) << "stats[" << i << "] error.";
break;
}
}
@@ -191,29 +189,25 @@
// Test various configurations on VAD/DTX.
void TestWebRtcVadDtx::RunTestCases(const SdpAudioFormat& codec_format) {
- RegisterCodec(codec_format, absl::nullopt);
Test(/*new_outfile=*/true,
- /*expect_vad_packets=*/codec_format.name == "opus");
+ /*expect_dtx_enabled=*/RegisterCodec(codec_format, absl::nullopt));
- RegisterCodec(codec_format, Vad::kVadAggressive);
Test(/*new_outfile=*/false,
- /*expect_vad_packets=*/true);
+ /*expect_dtx_enabled=*/RegisterCodec(codec_format, Vad::kVadAggressive));
- RegisterCodec(codec_format, Vad::kVadLowBitrate);
Test(/*new_outfile=*/false,
- /*expect_vad_packets=*/true);
+ /*expect_dtx_enabled=*/RegisterCodec(codec_format, Vad::kVadLowBitrate));
- RegisterCodec(codec_format, Vad::kVadVeryAggressive);
- Test(/*new_outfile=*/false, /*expect_vad_packets=*/true);
+ Test(/*new_outfile=*/false, /*expect_dtx_enabled=*/RegisterCodec(
+ codec_format, Vad::kVadVeryAggressive));
- RegisterCodec(codec_format, Vad::kVadNormal);
Test(/*new_outfile=*/false,
- /*expect_vad_packets=*/true);
+ /*expect_dtx_enabled=*/RegisterCodec(codec_format, Vad::kVadNormal));
}
// Set the expectation and run the test.
-void TestWebRtcVadDtx::Test(bool new_outfile, bool expect_vad_packets) {
- int expects[] = {-1, 1, expect_vad_packets ? 1 : -1, 0, 0};
+void TestWebRtcVadDtx::Test(bool new_outfile, bool expect_dtx_enabled) {
+ int expects[] = {-1, 1, expect_dtx_enabled, 0, 0};
if (new_outfile) {
output_file_num_++;
}
@@ -226,20 +220,16 @@
// Following is the implementation of TestOpusDtx.
void TestOpusDtx::Perform() {
- int expects[] = {0, 0, 0, 0, 0};
+ int expects[] = {0, 1, 0, 0, 0};
// Register Opus as send codec
std::string out_filename =
webrtc::test::OutputPath() + "testOpusDtx_outFile_mono.pcm";
RegisterCodec({"opus", 48000, 2}, absl::nullopt);
-
acm_send_->ModifyEncoder([](std::unique_ptr<AudioEncoder>* encoder_ptr) {
(*encoder_ptr)->SetDtx(false);
});
- expects[static_cast<int>(AudioFrameType::kEmptyFrame)] = 0;
- expects[static_cast<int>(AudioFrameType::kAudioFrameSpeech)] = 1;
- expects[static_cast<int>(AudioFrameType::kAudioFrameCN)] = 1;
Run(webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"), 32000, 1,
out_filename, false, expects);
@@ -247,7 +237,6 @@
(*encoder_ptr)->SetDtx(true);
});
expects[static_cast<int>(AudioFrameType::kEmptyFrame)] = 1;
- expects[static_cast<int>(AudioFrameType::kAudioFrameSpeech)] = 1;
expects[static_cast<int>(AudioFrameType::kAudioFrameCN)] = 1;
Run(webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"), 32000, 1,
out_filename, true, expects);
@@ -255,12 +244,10 @@
// Register stereo Opus as send codec
out_filename = webrtc::test::OutputPath() + "testOpusDtx_outFile_stereo.pcm";
RegisterCodec({"opus", 48000, 2, {{"stereo", "1"}}}, absl::nullopt);
-
acm_send_->ModifyEncoder([](std::unique_ptr<AudioEncoder>* encoder_ptr) {
(*encoder_ptr)->SetDtx(false);
});
expects[static_cast<int>(AudioFrameType::kEmptyFrame)] = 0;
- expects[static_cast<int>(AudioFrameType::kAudioFrameSpeech)] = 1;
expects[static_cast<int>(AudioFrameType::kAudioFrameCN)] = 0;
Run(webrtc::test::ResourcePath("audio_coding/teststereo32kHz", "pcm"), 32000,
2, out_filename, false, expects);
@@ -274,7 +261,6 @@
});
expects[static_cast<int>(AudioFrameType::kEmptyFrame)] = 1;
- expects[static_cast<int>(AudioFrameType::kAudioFrameSpeech)] = 1;
expects[static_cast<int>(AudioFrameType::kAudioFrameCN)] = 1;
Run(webrtc::test::ResourcePath("audio_coding/teststereo32kHz", "pcm"), 32000,
2, out_filename, true, expects);