NetEq: Create an audio interruption metric

This CL adds a new metric to NetEq, which logs whenever a loss
concealment event has lasted longer than 150 ms (an "interruption").
The number of such events, as well as the sum length of them, is kept
in a SampleCounter, which can be queried at any time.

Any initial PLC at the beginning of a call, before the first packet is
decoded, is ignored.

Unit tests and piping to neteq_rtpplay are included.

Bug: webrtc:10549
Change-Id: I8a224a34254c47c74317617f420f6de997232d88
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/132796
Reviewed-by: Minyue Li <minyue@webrtc.org>
Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#27781}
diff --git a/modules/audio_coding/neteq/expand.cc b/modules/audio_coding/neteq/expand.cc
index acf7f28..bf9cfbf 100644
--- a/modules/audio_coding/neteq/expand.cc
+++ b/modules/audio_coding/neteq/expand.cc
@@ -324,12 +324,14 @@
   lag_index_direction_ = 0;
   stop_muting_ = true;  // Do not mute signal any more.
   statistics_->LogDelayedPacketOutageEvent(expand_duration_samples_, fs_hz_);
+  statistics_->EndExpandEvent(fs_hz_);
 }
 
 void Expand::SetParametersForMergeAfterExpand() {
   current_lag_index_ = -1;  /* out of the 3 possible ones */
   lag_index_direction_ = 1; /* make sure we get the "optimal" lag */
   stop_muting_ = true;
+  statistics_->EndExpandEvent(fs_hz_);
 }
 
 bool Expand::Muted() const {
diff --git a/modules/audio_coding/neteq/include/neteq.h b/modules/audio_coding/neteq/include/neteq.h
index 8721663..c0c836f 100644
--- a/modules/audio_coding/neteq/include/neteq.h
+++ b/modules/audio_coding/neteq/include/neteq.h
@@ -87,6 +87,11 @@
   // packet can be made dynamic.
   uint64_t relative_packet_arrival_delay_ms = 0;
   uint64_t jitter_buffer_packets_received = 0;
+  // An interruption is a loss-concealment event lasting at least 150 ms. The
+  // two stats below count the number os such events and the total duration of
+  // these events.
+  uint64_t interruption_count = 0;
+  uint64_t total_interruption_duration_ms = 0;
 };
 
 // Metrics that describe the operations performed in NetEq, and the internal
diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc
index fcb09be..ad6becc 100644
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@@ -822,6 +822,9 @@
   switch (operation) {
     case kNormal: {
       DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf);
+      if (length > 0) {
+        stats_->DecodedOutputPlayed();
+      }
       break;
     }
     case kMerge: {
diff --git a/modules/audio_coding/neteq/neteq_impl_unittest.cc b/modules/audio_coding/neteq/neteq_impl_unittest.cc
index 00b3174..025ed69 100644
--- a/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/modules/audio_coding/neteq/neteq_impl_unittest.cc
@@ -689,6 +689,65 @@
   }
 }
 
+// This test verifies that audio interruption is not logged for the initial
+// PLC period before the first packet is deocoded.
+// TODO(henrik.lundin) Maybe move this test to neteq_network_stats_unittest.cc.
+TEST_F(NetEqImplTest, NoAudioInterruptionLoggedBeforeFirstDecode) {
+  UseNoMocks();
+  CreateInstance();
+
+  const uint8_t kPayloadType = 17;   // Just an arbitrary number.
+  const uint32_t kReceiveTime = 17;  // Value doesn't matter for this test.
+  const int kSampleRateHz = 8000;
+  const size_t kPayloadLengthSamples =
+      static_cast<size_t>(10 * kSampleRateHz / 1000);  // 10 ms.
+  const size_t kPayloadLengthBytes = kPayloadLengthSamples * 2;
+  uint8_t payload[kPayloadLengthBytes] = {0};
+  RTPHeader rtp_header;
+  rtp_header.payloadType = kPayloadType;
+  rtp_header.sequenceNumber = 0x1234;
+  rtp_header.timestamp = 0x12345678;
+  rtp_header.ssrc = 0x87654321;
+
+  // Register the payload type.
+  EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType,
+                                          SdpAudioFormat("l16", 8000, 1)));
+
+  // Pull audio several times. No packets have been inserted yet.
+  const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000);
+  AudioFrame output;
+  bool muted;
+  for (int i = 0; i < 100; ++i) {
+    EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted));
+    ASSERT_LE(output.samples_per_channel_, kMaxOutputSize);
+    EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
+    EXPECT_EQ(1u, output.num_channels_);
+    EXPECT_NE(AudioFrame::kNormalSpeech, output.speech_type_);
+  }
+
+  // Insert 10 packets.
+  for (size_t i = 0; i < 10; ++i) {
+    rtp_header.sequenceNumber++;
+    rtp_header.timestamp += kPayloadLengthSamples;
+    EXPECT_EQ(NetEq::kOK,
+              neteq_->InsertPacket(rtp_header, payload, kReceiveTime));
+    EXPECT_EQ(i + 1, packet_buffer_->NumPacketsInBuffer());
+  }
+
+  // Pull audio repeatedly and make sure we get normal output, that is not PLC.
+  for (size_t i = 0; i < 3; ++i) {
+    EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted));
+    ASSERT_LE(output.samples_per_channel_, kMaxOutputSize);
+    EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
+    EXPECT_EQ(1u, output.num_channels_);
+    EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_)
+        << "NetEq did not decode the packets as expected.";
+  }
+
+  auto lifetime_stats = neteq_->GetLifetimeStatistics();
+  EXPECT_EQ(0u, lifetime_stats.interruption_count);
+}
+
 // This test verifies that NetEq can handle comfort noise and enters/quits codec
 // internal CNG mode properly.
 TEST_F(NetEqImplTest, CodecInternalCng) {
diff --git a/modules/audio_coding/neteq/statistics_calculator.cc b/modules/audio_coding/neteq/statistics_calculator.cc
index d0f37bc..4ef239a 100644
--- a/modules/audio_coding/neteq/statistics_calculator.cc
+++ b/modules/audio_coding/neteq/statistics_calculator.cc
@@ -29,6 +29,8 @@
                 "int must not be wider than size_t for this to work");
   return (a < 0 && ret > b) ? 0 : ret;
 }
+
+constexpr int kInterruptionLenMs = 150;
 }  // namespace
 
 // Allocating the static const so that it can be passed by reference to
@@ -176,6 +178,24 @@
   ConcealedSamplesCorrection(num_samples, false);
 }
 
+void StatisticsCalculator::DecodedOutputPlayed() {
+  decoded_output_played_ = true;
+}
+
+void StatisticsCalculator::EndExpandEvent(int fs_hz) {
+  RTC_DCHECK_GE(lifetime_stats_.concealed_samples,
+                concealed_samples_at_event_end_);
+  const int event_duration_ms =
+      1000 *
+      (lifetime_stats_.concealed_samples - concealed_samples_at_event_end_) /
+      fs_hz;
+  if (event_duration_ms >= kInterruptionLenMs && decoded_output_played_) {
+    lifetime_stats_.interruption_count++;
+    lifetime_stats_.total_interruption_duration_ms += event_duration_ms;
+  }
+  concealed_samples_at_event_end_ = lifetime_stats_.concealed_samples;
+}
+
 void StatisticsCalculator::ConcealedSamplesCorrection(int num_samples,
                                                       bool is_voice) {
   if (num_samples < 0) {
diff --git a/modules/audio_coding/neteq/statistics_calculator.h b/modules/audio_coding/neteq/statistics_calculator.h
index 4482812..a56d0f5 100644
--- a/modules/audio_coding/neteq/statistics_calculator.h
+++ b/modules/audio_coding/neteq/statistics_calculator.h
@@ -50,6 +50,11 @@
   // Same as ExpandedVoiceSamplesCorrection but for noise samples.
   void ExpandedNoiseSamplesCorrection(int num_samples);
 
+  void DecodedOutputPlayed();
+
+  // Mark end of expand event; triggers some stats to be reported.
+  void EndExpandEvent(int fs_hz);
+
   // Reports that |num_samples| samples were produced through preemptive
   // expansion.
   void PreemptiveExpandedSamples(size_t num_samples);
@@ -200,6 +205,7 @@
   size_t added_zero_samples_;
   size_t expanded_speech_samples_;
   size_t expanded_noise_samples_;
+  size_t concealed_samples_at_event_end_ = 0;
   size_t discarded_packets_;
   size_t lost_timestamps_;
   uint32_t timestamps_since_last_report_;
@@ -209,6 +215,7 @@
   PeriodicUmaCount delayed_packet_outage_counter_;
   PeriodicUmaAverage excess_buffer_delay_;
   PeriodicUmaCount buffer_full_counter_;
+  bool decoded_output_played_ = false;
 
   RTC_DISALLOW_COPY_AND_ASSIGN(StatisticsCalculator);
 };
diff --git a/modules/audio_coding/neteq/statistics_calculator_unittest.cc b/modules/audio_coding/neteq/statistics_calculator_unittest.cc
index 1fb8e1c..a851074 100644
--- a/modules/audio_coding/neteq/statistics_calculator_unittest.cc
+++ b/modules/audio_coding/neteq/statistics_calculator_unittest.cc
@@ -128,4 +128,60 @@
   EXPECT_EQ(2u, stats_output.jitter_buffer_packets_received);
 }
 
+TEST(StatisticsCalculator, InterruptionCounter) {
+  constexpr int fs_khz = 48;
+  constexpr int fs_hz = fs_khz * 1000;
+  StatisticsCalculator stats;
+  stats.DecodedOutputPlayed();
+  stats.EndExpandEvent(fs_hz);
+  auto lts = stats.GetLifetimeStatistics();
+  EXPECT_EQ(0u, lts.interruption_count);
+  EXPECT_EQ(0u, lts.total_interruption_duration_ms);
+
+  // Add an event that is shorter than 150 ms. Should not be logged.
+  stats.ExpandedVoiceSamples(10 * fs_khz, false);   // 10 ms.
+  stats.ExpandedNoiseSamples(139 * fs_khz, false);  // 139 ms.
+  stats.EndExpandEvent(fs_hz);
+  lts = stats.GetLifetimeStatistics();
+  EXPECT_EQ(0u, lts.interruption_count);
+
+  // Add an event that is longer than 150 ms. Should be logged.
+  stats.ExpandedVoiceSamples(140 * fs_khz, false);  // 140 ms.
+  stats.ExpandedNoiseSamples(11 * fs_khz, false);   // 11 ms.
+  stats.EndExpandEvent(fs_hz);
+  lts = stats.GetLifetimeStatistics();
+  EXPECT_EQ(1u, lts.interruption_count);
+  EXPECT_EQ(151u, lts.total_interruption_duration_ms);
+
+  // Add one more long event.
+  stats.ExpandedVoiceSamples(100 * fs_khz, false);   // 100 ms.
+  stats.ExpandedNoiseSamples(5000 * fs_khz, false);  // 5000 ms.
+  stats.EndExpandEvent(fs_hz);
+  lts = stats.GetLifetimeStatistics();
+  EXPECT_EQ(2u, lts.interruption_count);
+  EXPECT_EQ(5100u + 151u, lts.total_interruption_duration_ms);
+}
+
+TEST(StatisticsCalculator, InterruptionCounterDoNotLogBeforeDecoding) {
+  constexpr int fs_khz = 48;
+  constexpr int fs_hz = fs_khz * 1000;
+  StatisticsCalculator stats;
+
+  // Add an event that is longer than 150 ms. Should normally be logged, but we
+  // have not called DecodedOutputPlayed() yet, so it shouldn't this time.
+  stats.ExpandedVoiceSamples(151 * fs_khz, false);  // 151 ms.
+  stats.EndExpandEvent(fs_hz);
+  auto lts = stats.GetLifetimeStatistics();
+  EXPECT_EQ(0u, lts.interruption_count);
+
+  // Call DecodedOutputPlayed(). Logging should happen after this.
+  stats.DecodedOutputPlayed();
+
+  // Add one more long event.
+  stats.ExpandedVoiceSamples(151 * fs_khz, false);  // 151 ms.
+  stats.EndExpandEvent(fs_hz);
+  lts = stats.GetLifetimeStatistics();
+  EXPECT_EQ(1u, lts.interruption_count);
+}
+
 }  // namespace webrtc
diff --git a/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc b/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc
index 3f86cfd..c7bec9c 100644
--- a/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc
+++ b/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc
@@ -75,6 +75,18 @@
       printf("%s\n", concealment_event.ToString().c_str());
     printf(" end of concealment_events_ms\n");
   }
+
+  const auto lifetime_stats_vector = stats_getter_->lifetime_stats();
+  if (!lifetime_stats_vector->empty()) {
+    auto lifetime_stats = lifetime_stats_vector->back().second;
+    printf("  num_interruptions: %" PRId64 "\n",
+           lifetime_stats.interruption_count);
+    printf("  sum_interruption_length_ms: %" PRId64 " ms\n",
+           lifetime_stats.total_interruption_duration_ms);
+    printf("  interruption ratio: %f%%\n",
+           100.0 * lifetime_stats.total_interruption_duration_ms /
+               simulation_time_ms);
+  }
 }
 
 }  // namespace test