stats: implement inbound-rtp totalProcessingDelay for video

https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats-totalprocessingdelay

BUG=webrtc:13984

Change-Id: Ifd821bd8553add46218f09a11366096d62f5d09f
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/259768
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Commit-Queue: Henrik Boström <hbos@webrtc.org>
Reviewed-by: Henrik Boström <hbos@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#36732}
diff --git a/api/stats/rtcstats_objects.h b/api/stats/rtcstats_objects.h
index a6d78f8..b22de2b 100644
--- a/api/stats/rtcstats_objects.h
+++ b/api/stats/rtcstats_objects.h
@@ -487,7 +487,6 @@
   RTCStatsMember<double> audio_level;
   RTCStatsMember<double> total_audio_energy;
   RTCStatsMember<double> total_samples_duration;
-  RTCStatsMember<int32_t> frames_received;
   // TODO(hbos): Collect and populate this value. https://bugs.webrtc.org/7065
   RTCStatsMember<double> round_trip_time;
   // TODO(hbos): Collect and populate this value. https://bugs.webrtc.org/7065
@@ -508,6 +507,8 @@
   RTCStatsMember<double> gap_loss_rate;
   // TODO(hbos): Collect and populate this value. https://bugs.webrtc.org/7065
   RTCStatsMember<double> gap_discard_rate;
+  // Stats below are only implemented or defined for video.
+  RTCStatsMember<int32_t> frames_received;
   RTCStatsMember<uint32_t> frame_width;
   RTCStatsMember<uint32_t> frame_height;
   RTCStatsMember<uint32_t> frame_bit_depth;
@@ -516,6 +517,7 @@
   RTCStatsMember<uint32_t> key_frames_decoded;
   RTCStatsMember<uint32_t> frames_dropped;
   RTCStatsMember<double> total_decode_time;
+  RTCStatsMember<double> total_processing_delay;
   RTCStatsMember<double> total_inter_frame_delay;
   RTCStatsMember<double> total_squared_inter_frame_delay;
   // https://henbos.github.io/webrtc-provisional-stats/#dom-rtcinboundrtpstreamstats-contenttype
diff --git a/call/video_receive_stream.h b/call/video_receive_stream.h
index 614d5db..7c66a4e 100644
--- a/call/video_receive_stream.h
+++ b/call/video_receive_stream.h
@@ -106,6 +106,8 @@
     uint32_t frames_decoded = 0;
     // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats-totaldecodetime
     uint64_t total_decode_time_ms = 0;
+    // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats-totalprocessingdelay
+    webrtc::TimeDelta total_processing_delay = webrtc::TimeDelta::Millis(0);
     // Total inter frame delay in seconds.
     // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats-totalinterframedelay
     double total_inter_frame_delay = 0;
diff --git a/media/base/media_channel.h b/media/base/media_channel.h
index a3ab18a..3673169 100644
--- a/media/base/media_channel.h
+++ b/media/base/media_channel.h
@@ -613,6 +613,8 @@
   absl::optional<uint64_t> qp_sum;
   // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats-totaldecodetime
   uint64_t total_decode_time_ms = 0;
+  // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats-totalprocessingdelay
+  webrtc::TimeDelta total_processing_delay = webrtc::TimeDelta::Millis(0);
   double total_inter_frame_delay = 0;
   double total_squared_inter_frame_delay = 0;
   int64_t interframe_delay_max_ms = -1;
diff --git a/media/engine/webrtc_video_engine.cc b/media/engine/webrtc_video_engine.cc
index f3ada60..e29d1ea 100644
--- a/media/engine/webrtc_video_engine.cc
+++ b/media/engine/webrtc_video_engine.cc
@@ -3166,6 +3166,7 @@
   info.frames_rendered = stats.frames_rendered;
   info.qp_sum = stats.qp_sum;
   info.total_decode_time_ms = stats.total_decode_time_ms;
+  info.total_processing_delay = stats.total_processing_delay;
   info.last_packet_received_timestamp_ms =
       stats.rtp_stats.last_packet_received_timestamp_ms;
   info.estimated_playout_ntp_timestamp_ms =
diff --git a/pc/rtc_stats_collector.cc b/pc/rtc_stats_collector.cc
index 5ceeba8..488b7a7 100644
--- a/pc/rtc_stats_collector.cc
+++ b/pc/rtc_stats_collector.cc
@@ -521,6 +521,9 @@
   inbound_video->total_decode_time =
       static_cast<double>(video_receiver_info.total_decode_time_ms) /
       rtc::kNumMillisecsPerSec;
+  inbound_video->total_processing_delay =
+      static_cast<double>(video_receiver_info.total_processing_delay.ms()) /
+      rtc::kNumMillisecsPerSec;
   inbound_video->total_inter_frame_delay =
       video_receiver_info.total_inter_frame_delay;
   inbound_video->total_squared_inter_frame_delay =
diff --git a/pc/rtc_stats_collector_unittest.cc b/pc/rtc_stats_collector_unittest.cc
index 942d308..b5a5704 100644
--- a/pc/rtc_stats_collector_unittest.cc
+++ b/pc/rtc_stats_collector_unittest.cc
@@ -2139,6 +2139,8 @@
   video_media_info.receivers[0].frames_dropped = 13;
   video_media_info.receivers[0].qp_sum = absl::nullopt;
   video_media_info.receivers[0].total_decode_time_ms = 9000;
+  video_media_info.receivers[0].total_processing_delay =
+      webrtc::TimeDelta::Millis(600);
   video_media_info.receivers[0].total_inter_frame_delay = 0.123;
   video_media_info.receivers[0].total_squared_inter_frame_delay = 0.00456;
   video_media_info.receivers[0].jitter_ms = 1199;
@@ -2188,6 +2190,7 @@
   expected_video.frames_dropped = 13;
   // `expected_video.qp_sum` should be undefined.
   expected_video.total_decode_time = 9.0;
+  expected_video.total_processing_delay = 0.6;
   expected_video.total_inter_frame_delay = 0.123;
   expected_video.total_squared_inter_frame_delay = 0.00456;
   expected_video.jitter = 1.199;
diff --git a/pc/rtc_stats_integrationtest.cc b/pc/rtc_stats_integrationtest.cc
index 07a8bdc..d94e4b6 100644
--- a/pc/rtc_stats_integrationtest.cc
+++ b/pc/rtc_stats_integrationtest.cc
@@ -915,6 +915,8 @@
       verifier.TestMemberIsNonNegative<double>(
           inbound_stream.total_decode_time);
       verifier.TestMemberIsNonNegative<double>(
+          inbound_stream.total_processing_delay);
+      verifier.TestMemberIsNonNegative<double>(
           inbound_stream.total_inter_frame_delay);
       verifier.TestMemberIsNonNegative<double>(
           inbound_stream.total_squared_inter_frame_delay);
@@ -926,6 +928,7 @@
       verifier.TestMemberIsUndefined(inbound_stream.key_frames_decoded);
       verifier.TestMemberIsUndefined(inbound_stream.frames_dropped);
       verifier.TestMemberIsUndefined(inbound_stream.total_decode_time);
+      verifier.TestMemberIsUndefined(inbound_stream.total_processing_delay);
       verifier.TestMemberIsUndefined(inbound_stream.total_inter_frame_delay);
       verifier.TestMemberIsUndefined(
           inbound_stream.total_squared_inter_frame_delay);
diff --git a/stats/rtcstats_objects.cc b/stats/rtcstats_objects.cc
index ae90bf3..f45ea35 100644
--- a/stats/rtcstats_objects.cc
+++ b/stats/rtcstats_objects.cc
@@ -674,7 +674,6 @@
     &audio_level,
     &total_audio_energy,
     &total_samples_duration,
-    &frames_received,
     &round_trip_time,
     &packets_repaired,
     &burst_packets_lost,
@@ -685,6 +684,7 @@
     &burst_discard_rate,
     &gap_loss_rate,
     &gap_discard_rate,
+    &frames_received,
     &frame_width,
     &frame_height,
     &frame_bit_depth,
@@ -693,6 +693,7 @@
     &key_frames_decoded,
     &frames_dropped,
     &total_decode_time,
+    &total_processing_delay,
     &total_inter_frame_delay,
     &total_squared_inter_frame_delay,
     &content_type,
@@ -729,7 +730,6 @@
       audio_level("audioLevel"),
       total_audio_energy("totalAudioEnergy"),
       total_samples_duration("totalSamplesDuration"),
-      frames_received("framesReceived"),
       round_trip_time("roundTripTime"),
       packets_repaired("packetsRepaired"),
       burst_packets_lost("burstPacketsLost"),
@@ -740,6 +740,7 @@
       burst_discard_rate("burstDiscardRate"),
       gap_loss_rate("gapLossRate"),
       gap_discard_rate("gapDiscardRate"),
+      frames_received("framesReceived"),
       frame_width("frameWidth"),
       frame_height("frameHeight"),
       frame_bit_depth("frameBitDepth"),
@@ -748,6 +749,7 @@
       key_frames_decoded("keyFramesDecoded"),
       frames_dropped("framesDropped"),
       total_decode_time("totalDecodeTime"),
+      total_processing_delay("totalProcessingDelay"),
       total_inter_frame_delay("totalInterFrameDelay"),
       total_squared_inter_frame_delay("totalSquaredInterFrameDelay"),
       content_type("contentType"),
@@ -780,7 +782,6 @@
       audio_level(other.audio_level),
       total_audio_energy(other.total_audio_energy),
       total_samples_duration(other.total_samples_duration),
-      frames_received(other.frames_received),
       round_trip_time(other.round_trip_time),
       packets_repaired(other.packets_repaired),
       burst_packets_lost(other.burst_packets_lost),
@@ -791,6 +792,7 @@
       burst_discard_rate(other.burst_discard_rate),
       gap_loss_rate(other.gap_loss_rate),
       gap_discard_rate(other.gap_discard_rate),
+      frames_received(other.frames_received),
       frame_width(other.frame_width),
       frame_height(other.frame_height),
       frame_bit_depth(other.frame_bit_depth),
@@ -799,6 +801,7 @@
       key_frames_decoded(other.key_frames_decoded),
       frames_dropped(other.frames_dropped),
       total_decode_time(other.total_decode_time),
+      total_processing_delay(other.total_processing_delay),
       total_inter_frame_delay(other.total_inter_frame_delay),
       total_squared_inter_frame_delay(other.total_squared_inter_frame_delay),
       content_type(other.content_type),
diff --git a/video/receive_statistics_proxy2.cc b/video/receive_statistics_proxy2.cc
index 5f12026..c369509 100644
--- a/video/receive_statistics_proxy2.cc
+++ b/video/receive_statistics_proxy2.cc
@@ -830,20 +830,33 @@
                                             absl::optional<uint8_t> qp,
                                             int32_t decode_time_ms,
                                             VideoContentType content_type) {
+  webrtc::TimeDelta processing_delay = webrtc::TimeDelta::Millis(0);
+  webrtc::Timestamp current_time = clock_->CurrentTime();
+  // TODO(bugs.webrtc.org/13984): some tests do not fill packet_infos().
+  if (frame.packet_infos().size() > 0) {
+    auto first_packet = std::min_element(
+        frame.packet_infos().cbegin(), frame.packet_infos().cend(),
+        [](const webrtc::RtpPacketInfo& a, const webrtc::RtpPacketInfo& b) {
+          return a.receive_time() < b.receive_time();
+        });
+    processing_delay = current_time - first_packet->receive_time();
+  }
   // See VCMDecodedFrameCallback::Decoded for more info on what thread/queue we
   // may be on. E.g. on iOS this gets called on
   // "com.apple.coremedia.decompressionsession.clientcallback"
-  VideoFrameMetaData meta(frame, clock_->CurrentTime());
-  worker_thread_->PostTask(ToQueuedTask(
-      task_safety_, [meta, qp, decode_time_ms, content_type, this]() {
-        OnDecodedFrame(meta, qp, decode_time_ms, content_type);
-      }));
+  VideoFrameMetaData meta(frame, current_time);
+  worker_thread_->PostTask(ToQueuedTask(task_safety_, [meta, qp, decode_time_ms,
+                                                       processing_delay,
+                                                       content_type, this]() {
+    OnDecodedFrame(meta, qp, decode_time_ms, processing_delay, content_type);
+  }));
 }
 
 void ReceiveStatisticsProxy::OnDecodedFrame(
     const VideoFrameMetaData& frame_meta,
     absl::optional<uint8_t> qp,
     int32_t decode_time_ms,
+    webrtc::TimeDelta processing_delay,
     VideoContentType content_type) {
   RTC_DCHECK_RUN_ON(&main_thread_);
 
@@ -884,6 +897,7 @@
   decode_time_counter_.Add(decode_time_ms);
   stats_.decode_ms = decode_time_ms;
   stats_.total_decode_time_ms += decode_time_ms;
+  stats_.total_processing_delay += processing_delay;
   if (enable_decode_time_histograms_) {
     UpdateDecodeTimeHistograms(frame_meta.width, frame_meta.height,
                                decode_time_ms);
diff --git a/video/receive_statistics_proxy2.h b/video/receive_statistics_proxy2.h
index 328c92c..28b31c7 100644
--- a/video/receive_statistics_proxy2.h
+++ b/video/receive_statistics_proxy2.h
@@ -68,6 +68,7 @@
   void OnDecodedFrame(const VideoFrameMetaData& frame_meta,
                       absl::optional<uint8_t> qp,
                       int32_t decode_time_ms,
+                      webrtc::TimeDelta processing_delay_ms,
                       VideoContentType content_type);
 
   void OnSyncOffsetUpdated(int64_t video_playout_ntp_ms,
diff --git a/video/receive_statistics_proxy2_unittest.cc b/video/receive_statistics_proxy2_unittest.cc
index 2bee54c..53453a2 100644
--- a/video/receive_statistics_proxy2_unittest.cc
+++ b/video/receive_statistics_proxy2_unittest.cc
@@ -177,6 +177,42 @@
             statistics_proxy_->GetStats().total_decode_time_ms);
 }
 
+TEST_F(ReceiveStatisticsProxy2Test, OnDecodedFrameIncreasesProcessingDelay) {
+  const TimeDelta kProcessingDelay = TimeDelta::Millis(10);
+  EXPECT_EQ(0u, statistics_proxy_->GetStats().frames_decoded);
+  webrtc::VideoFrame frame = CreateFrame(kWidth, kHeight);
+  TimeDelta expected_total_processing_delay = TimeDelta::Millis(0);
+  unsigned int expected_frames_decoded = 0;
+  // We set receive time fixed and increase the clock by 10ms
+  // in the loop which will increase the processing delay by
+  // 10/20/30ms respectively.
+  RtpPacketInfos::vector_type packet_infos = {
+      RtpPacketInfo({}, {}, {}, {}, {}, Now())};
+  frame.set_packet_infos(RtpPacketInfos(packet_infos));
+  for (int i = 1; i <= 3; ++i) {
+    fake_clock_.AdvanceTime(kProcessingDelay);
+    statistics_proxy_->OnDecodedFrame(frame, absl::nullopt, 1,
+                                      VideoContentType::UNSPECIFIED);
+    expected_total_processing_delay += i * kProcessingDelay;
+    ++expected_frames_decoded;
+    loop_.Flush();
+    EXPECT_EQ(expected_frames_decoded,
+              statistics_proxy_->GetStats().frames_decoded);
+    EXPECT_EQ(expected_total_processing_delay,
+              statistics_proxy_->GetStats().total_processing_delay);
+  }
+  fake_clock_.AdvanceTime(kProcessingDelay);
+  statistics_proxy_->OnDecodedFrame(frame, 1u, 3,
+                                    VideoContentType::UNSPECIFIED);
+  ++expected_frames_decoded;
+  expected_total_processing_delay += 4 * kProcessingDelay;
+  loop_.Flush();
+  EXPECT_EQ(expected_frames_decoded,
+            statistics_proxy_->GetStats().frames_decoded);
+  EXPECT_EQ(expected_total_processing_delay,
+            statistics_proxy_->GetStats().total_processing_delay);
+}
+
 TEST_F(ReceiveStatisticsProxy2Test, OnDecodedFrameIncreasesQpSum) {
   EXPECT_EQ(absl::nullopt, statistics_proxy_->GetStats().qp_sum);
   webrtc::VideoFrame frame = CreateFrame(kWidth, kHeight);
@@ -1351,14 +1387,16 @@
 
   for (int i = 0; i < kMinRequiredSamples; ++i) {
     VideoFrameMetaData meta = MetaData(frame);
-    statistics_proxy_->OnDecodedFrame(meta, absl::nullopt, 0, content_type_);
+    statistics_proxy_->OnDecodedFrame(
+        meta, absl::nullopt, 0, webrtc::TimeDelta::Millis(0), content_type_);
     statistics_proxy_->OnRenderedFrame(meta);
     fake_clock_.AdvanceTimeMilliseconds(kInterFrameDelayMs);
   }
   // Add extra freeze.
   fake_clock_.AdvanceTimeMilliseconds(kFreezeDelayMs);
   VideoFrameMetaData meta = MetaData(frame);
-  statistics_proxy_->OnDecodedFrame(meta, absl::nullopt, 0, content_type_);
+  statistics_proxy_->OnDecodedFrame(
+      meta, absl::nullopt, 0, webrtc::TimeDelta::Millis(0), content_type_);
   statistics_proxy_->OnRenderedFrame(meta);
 
   FlushAndUpdateHistograms(absl::nullopt, StreamDataCounters(), nullptr);
@@ -1441,7 +1479,8 @@
 
   for (int i = 0; i <= kMinRequiredSamples; ++i) {
     VideoFrameMetaData meta = MetaData(frame);
-    statistics_proxy_->OnDecodedFrame(meta, absl::nullopt, 0, content_type_);
+    statistics_proxy_->OnDecodedFrame(
+        meta, absl::nullopt, 0, webrtc::TimeDelta::Millis(0), content_type_);
     statistics_proxy_->OnRenderedFrame(meta);
     fake_clock_.AdvanceTimeMilliseconds(kInterFrameDelayMs);
   }
@@ -1451,7 +1490,8 @@
   // Second playback interval with triple the length.
   for (int i = 0; i <= kMinRequiredSamples * 3; ++i) {
     VideoFrameMetaData meta = MetaData(frame);
-    statistics_proxy_->OnDecodedFrame(meta, absl::nullopt, 0, content_type_);
+    statistics_proxy_->OnDecodedFrame(
+        meta, absl::nullopt, 0, webrtc::TimeDelta::Millis(0), content_type_);
     statistics_proxy_->OnRenderedFrame(meta);
     fake_clock_.AdvanceTimeMilliseconds(kInterFrameDelayMs);
   }
@@ -1512,14 +1552,16 @@
   // HD frames.
   for (int i = 0; i < kMinRequiredSamples; ++i) {
     VideoFrameMetaData meta = MetaData(frame_hd);
-    statistics_proxy_->OnDecodedFrame(meta, absl::nullopt, 0, content_type_);
+    statistics_proxy_->OnDecodedFrame(
+        meta, absl::nullopt, 0, webrtc::TimeDelta::Millis(0), content_type_);
     statistics_proxy_->OnRenderedFrame(meta);
     fake_clock_.AdvanceTimeMilliseconds(kInterFrameDelayMs);
   }
   // SD frames.
   for (int i = 0; i < 2 * kMinRequiredSamples; ++i) {
     VideoFrameMetaData meta = MetaData(frame_sd);
-    statistics_proxy_->OnDecodedFrame(meta, absl::nullopt, 0, content_type_);
+    statistics_proxy_->OnDecodedFrame(
+        meta, absl::nullopt, 0, webrtc::TimeDelta::Millis(0), content_type_);
     statistics_proxy_->OnRenderedFrame(meta);
     fake_clock_.AdvanceTimeMilliseconds(kInterFrameDelayMs);
   }
@@ -1548,14 +1590,16 @@
   // High quality frames.
   for (int i = 0; i < kMinRequiredSamples; ++i) {
     VideoFrameMetaData meta = MetaData(frame);
-    statistics_proxy_->OnDecodedFrame(meta, kLowQp, 0, content_type_);
+    statistics_proxy_->OnDecodedFrame(
+        meta, kLowQp, 0, webrtc::TimeDelta::Millis(0), content_type_);
     statistics_proxy_->OnRenderedFrame(meta);
     fake_clock_.AdvanceTimeMilliseconds(kInterFrameDelayMs);
   }
   // Blocky frames.
   for (int i = 0; i < 2 * kMinRequiredSamples; ++i) {
     VideoFrameMetaData meta = MetaData(frame);
-    statistics_proxy_->OnDecodedFrame(meta, kHighQp, 0, content_type_);
+    statistics_proxy_->OnDecodedFrame(
+        meta, kHighQp, 0, webrtc::TimeDelta::Millis(0), content_type_);
     statistics_proxy_->OnRenderedFrame(meta);
     fake_clock_.AdvanceTimeMilliseconds(kInterFrameDelayMs);
   }