NetEq: Create an audio interruption metric
This CL adds a new metric to NetEq, which logs whenever a loss
concealment event has lasted longer than 150 ms (an "interruption").
The number of such events, as well as the sum length of them, is kept
in a SampleCounter, which can be queried at any time.
Any initial PLC at the beginning of a call, before the first packet is
decoded, is ignored.
Unit tests and piping to neteq_rtpplay are included.
Bug: webrtc:10549
Change-Id: I8a224a34254c47c74317617f420f6de997232d88
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/132796
Reviewed-by: Minyue Li <minyue@webrtc.org>
Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#27781}
diff --git a/modules/audio_coding/neteq/expand.cc b/modules/audio_coding/neteq/expand.cc
index acf7f28..bf9cfbf 100644
--- a/modules/audio_coding/neteq/expand.cc
+++ b/modules/audio_coding/neteq/expand.cc
@@ -324,12 +324,14 @@
lag_index_direction_ = 0;
stop_muting_ = true; // Do not mute signal any more.
statistics_->LogDelayedPacketOutageEvent(expand_duration_samples_, fs_hz_);
+ statistics_->EndExpandEvent(fs_hz_);
}
void Expand::SetParametersForMergeAfterExpand() {
current_lag_index_ = -1; /* out of the 3 possible ones */
lag_index_direction_ = 1; /* make sure we get the "optimal" lag */
stop_muting_ = true;
+ statistics_->EndExpandEvent(fs_hz_);
}
bool Expand::Muted() const {
diff --git a/modules/audio_coding/neteq/include/neteq.h b/modules/audio_coding/neteq/include/neteq.h
index 8721663..c0c836f 100644
--- a/modules/audio_coding/neteq/include/neteq.h
+++ b/modules/audio_coding/neteq/include/neteq.h
@@ -87,6 +87,11 @@
// packet can be made dynamic.
uint64_t relative_packet_arrival_delay_ms = 0;
uint64_t jitter_buffer_packets_received = 0;
+ // An interruption is a loss-concealment event lasting at least 150 ms. The
+ // two stats below count the number os such events and the total duration of
+ // these events.
+ uint64_t interruption_count = 0;
+ uint64_t total_interruption_duration_ms = 0;
};
// Metrics that describe the operations performed in NetEq, and the internal
diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc
index fcb09be..ad6becc 100644
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@@ -822,6 +822,9 @@
switch (operation) {
case kNormal: {
DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf);
+ if (length > 0) {
+ stats_->DecodedOutputPlayed();
+ }
break;
}
case kMerge: {
diff --git a/modules/audio_coding/neteq/neteq_impl_unittest.cc b/modules/audio_coding/neteq/neteq_impl_unittest.cc
index 00b3174..025ed69 100644
--- a/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/modules/audio_coding/neteq/neteq_impl_unittest.cc
@@ -689,6 +689,65 @@
}
}
+// This test verifies that audio interruption is not logged for the initial
+// PLC period before the first packet is deocoded.
+// TODO(henrik.lundin) Maybe move this test to neteq_network_stats_unittest.cc.
+TEST_F(NetEqImplTest, NoAudioInterruptionLoggedBeforeFirstDecode) {
+ UseNoMocks();
+ CreateInstance();
+
+ const uint8_t kPayloadType = 17; // Just an arbitrary number.
+ const uint32_t kReceiveTime = 17; // Value doesn't matter for this test.
+ const int kSampleRateHz = 8000;
+ const size_t kPayloadLengthSamples =
+ static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms.
+ const size_t kPayloadLengthBytes = kPayloadLengthSamples * 2;
+ uint8_t payload[kPayloadLengthBytes] = {0};
+ RTPHeader rtp_header;
+ rtp_header.payloadType = kPayloadType;
+ rtp_header.sequenceNumber = 0x1234;
+ rtp_header.timestamp = 0x12345678;
+ rtp_header.ssrc = 0x87654321;
+
+ // Register the payload type.
+ EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType,
+ SdpAudioFormat("l16", 8000, 1)));
+
+ // Pull audio several times. No packets have been inserted yet.
+ const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000);
+ AudioFrame output;
+ bool muted;
+ for (int i = 0; i < 100; ++i) {
+ EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted));
+ ASSERT_LE(output.samples_per_channel_, kMaxOutputSize);
+ EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
+ EXPECT_EQ(1u, output.num_channels_);
+ EXPECT_NE(AudioFrame::kNormalSpeech, output.speech_type_);
+ }
+
+ // Insert 10 packets.
+ for (size_t i = 0; i < 10; ++i) {
+ rtp_header.sequenceNumber++;
+ rtp_header.timestamp += kPayloadLengthSamples;
+ EXPECT_EQ(NetEq::kOK,
+ neteq_->InsertPacket(rtp_header, payload, kReceiveTime));
+ EXPECT_EQ(i + 1, packet_buffer_->NumPacketsInBuffer());
+ }
+
+ // Pull audio repeatedly and make sure we get normal output, that is not PLC.
+ for (size_t i = 0; i < 3; ++i) {
+ EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted));
+ ASSERT_LE(output.samples_per_channel_, kMaxOutputSize);
+ EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
+ EXPECT_EQ(1u, output.num_channels_);
+ EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_)
+ << "NetEq did not decode the packets as expected.";
+ }
+
+ auto lifetime_stats = neteq_->GetLifetimeStatistics();
+ EXPECT_EQ(0u, lifetime_stats.interruption_count);
+}
+
// This test verifies that NetEq can handle comfort noise and enters/quits codec
// internal CNG mode properly.
TEST_F(NetEqImplTest, CodecInternalCng) {
diff --git a/modules/audio_coding/neteq/statistics_calculator.cc b/modules/audio_coding/neteq/statistics_calculator.cc
index d0f37bc..4ef239a 100644
--- a/modules/audio_coding/neteq/statistics_calculator.cc
+++ b/modules/audio_coding/neteq/statistics_calculator.cc
@@ -29,6 +29,8 @@
"int must not be wider than size_t for this to work");
return (a < 0 && ret > b) ? 0 : ret;
}
+
+constexpr int kInterruptionLenMs = 150;
} // namespace
// Allocating the static const so that it can be passed by reference to
@@ -176,6 +178,24 @@
ConcealedSamplesCorrection(num_samples, false);
}
+void StatisticsCalculator::DecodedOutputPlayed() {
+ decoded_output_played_ = true;
+}
+
+void StatisticsCalculator::EndExpandEvent(int fs_hz) {
+ RTC_DCHECK_GE(lifetime_stats_.concealed_samples,
+ concealed_samples_at_event_end_);
+ const int event_duration_ms =
+ 1000 *
+ (lifetime_stats_.concealed_samples - concealed_samples_at_event_end_) /
+ fs_hz;
+ if (event_duration_ms >= kInterruptionLenMs && decoded_output_played_) {
+ lifetime_stats_.interruption_count++;
+ lifetime_stats_.total_interruption_duration_ms += event_duration_ms;
+ }
+ concealed_samples_at_event_end_ = lifetime_stats_.concealed_samples;
+}
+
void StatisticsCalculator::ConcealedSamplesCorrection(int num_samples,
bool is_voice) {
if (num_samples < 0) {
diff --git a/modules/audio_coding/neteq/statistics_calculator.h b/modules/audio_coding/neteq/statistics_calculator.h
index 4482812..a56d0f5 100644
--- a/modules/audio_coding/neteq/statistics_calculator.h
+++ b/modules/audio_coding/neteq/statistics_calculator.h
@@ -50,6 +50,11 @@
// Same as ExpandedVoiceSamplesCorrection but for noise samples.
void ExpandedNoiseSamplesCorrection(int num_samples);
+ void DecodedOutputPlayed();
+
+ // Mark end of expand event; triggers some stats to be reported.
+ void EndExpandEvent(int fs_hz);
+
// Reports that |num_samples| samples were produced through preemptive
// expansion.
void PreemptiveExpandedSamples(size_t num_samples);
@@ -200,6 +205,7 @@
size_t added_zero_samples_;
size_t expanded_speech_samples_;
size_t expanded_noise_samples_;
+ size_t concealed_samples_at_event_end_ = 0;
size_t discarded_packets_;
size_t lost_timestamps_;
uint32_t timestamps_since_last_report_;
@@ -209,6 +215,7 @@
PeriodicUmaCount delayed_packet_outage_counter_;
PeriodicUmaAverage excess_buffer_delay_;
PeriodicUmaCount buffer_full_counter_;
+ bool decoded_output_played_ = false;
RTC_DISALLOW_COPY_AND_ASSIGN(StatisticsCalculator);
};
diff --git a/modules/audio_coding/neteq/statistics_calculator_unittest.cc b/modules/audio_coding/neteq/statistics_calculator_unittest.cc
index 1fb8e1c..a851074 100644
--- a/modules/audio_coding/neteq/statistics_calculator_unittest.cc
+++ b/modules/audio_coding/neteq/statistics_calculator_unittest.cc
@@ -128,4 +128,60 @@
EXPECT_EQ(2u, stats_output.jitter_buffer_packets_received);
}
+TEST(StatisticsCalculator, InterruptionCounter) {
+ constexpr int fs_khz = 48;
+ constexpr int fs_hz = fs_khz * 1000;
+ StatisticsCalculator stats;
+ stats.DecodedOutputPlayed();
+ stats.EndExpandEvent(fs_hz);
+ auto lts = stats.GetLifetimeStatistics();
+ EXPECT_EQ(0u, lts.interruption_count);
+ EXPECT_EQ(0u, lts.total_interruption_duration_ms);
+
+ // Add an event that is shorter than 150 ms. Should not be logged.
+ stats.ExpandedVoiceSamples(10 * fs_khz, false); // 10 ms.
+ stats.ExpandedNoiseSamples(139 * fs_khz, false); // 139 ms.
+ stats.EndExpandEvent(fs_hz);
+ lts = stats.GetLifetimeStatistics();
+ EXPECT_EQ(0u, lts.interruption_count);
+
+ // Add an event that is longer than 150 ms. Should be logged.
+ stats.ExpandedVoiceSamples(140 * fs_khz, false); // 140 ms.
+ stats.ExpandedNoiseSamples(11 * fs_khz, false); // 11 ms.
+ stats.EndExpandEvent(fs_hz);
+ lts = stats.GetLifetimeStatistics();
+ EXPECT_EQ(1u, lts.interruption_count);
+ EXPECT_EQ(151u, lts.total_interruption_duration_ms);
+
+ // Add one more long event.
+ stats.ExpandedVoiceSamples(100 * fs_khz, false); // 100 ms.
+ stats.ExpandedNoiseSamples(5000 * fs_khz, false); // 5000 ms.
+ stats.EndExpandEvent(fs_hz);
+ lts = stats.GetLifetimeStatistics();
+ EXPECT_EQ(2u, lts.interruption_count);
+ EXPECT_EQ(5100u + 151u, lts.total_interruption_duration_ms);
+}
+
+TEST(StatisticsCalculator, InterruptionCounterDoNotLogBeforeDecoding) {
+ constexpr int fs_khz = 48;
+ constexpr int fs_hz = fs_khz * 1000;
+ StatisticsCalculator stats;
+
+ // Add an event that is longer than 150 ms. Should normally be logged, but we
+ // have not called DecodedOutputPlayed() yet, so it shouldn't this time.
+ stats.ExpandedVoiceSamples(151 * fs_khz, false); // 151 ms.
+ stats.EndExpandEvent(fs_hz);
+ auto lts = stats.GetLifetimeStatistics();
+ EXPECT_EQ(0u, lts.interruption_count);
+
+ // Call DecodedOutputPlayed(). Logging should happen after this.
+ stats.DecodedOutputPlayed();
+
+ // Add one more long event.
+ stats.ExpandedVoiceSamples(151 * fs_khz, false); // 151 ms.
+ stats.EndExpandEvent(fs_hz);
+ lts = stats.GetLifetimeStatistics();
+ EXPECT_EQ(1u, lts.interruption_count);
+}
+
} // namespace webrtc
diff --git a/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc b/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc
index 3f86cfd..c7bec9c 100644
--- a/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc
+++ b/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc
@@ -75,6 +75,18 @@
printf("%s\n", concealment_event.ToString().c_str());
printf(" end of concealment_events_ms\n");
}
+
+ const auto lifetime_stats_vector = stats_getter_->lifetime_stats();
+ if (!lifetime_stats_vector->empty()) {
+ auto lifetime_stats = lifetime_stats_vector->back().second;
+ printf(" num_interruptions: %" PRId64 "\n",
+ lifetime_stats.interruption_count);
+ printf(" sum_interruption_length_ms: %" PRId64 " ms\n",
+ lifetime_stats.total_interruption_duration_ms);
+ printf(" interruption ratio: %f%%\n",
+ 100.0 * lifetime_stats.total_interruption_duration_ms /
+ simulation_time_ms);
+ }
}
} // namespace test