New statistic added to VideoReceiveStream to determine latency to first decode.

This change introduces a new measurement into the VideoReceiveStream::Stats
structure to measure the latency between the first frame being received and
the first frame being decoded in WebRTC. The goal here is to measure the latency
difference when a FrameEncryptor is attached and not attached.

Change-Id: I0f0178aff73b66f25dbc6617098033e226da2958
Bug: webrtc:10105
Reviewed-on: https://webrtc-review.googlesource.com/c/113328
Commit-Queue: Benjamin Wright <benwright@webrtc.org>
Reviewed-by: Steve Anton <steveanton@webrtc.org>
Reviewed-by: Niels Moller <nisse@webrtc.org>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#25956}
diff --git a/api/statstypes.cc b/api/statstypes.cc
index ad68894..dc37abc 100644
--- a/api/statstypes.cc
+++ b/api/statstypes.cc
@@ -516,6 +516,8 @@
       return "googFirsReceived";
     case kStatsValueNameFirsSent:
       return "googFirsSent";
+    case kStatsValueNameFirstFrameReceivedToDecodedMs:
+      return "googFirstFrameReceivedToDecodedMs";
     case kStatsValueNameFrameHeightInput:
       return "googFrameHeightInput";
     case kStatsValueNameFrameHeightReceived:
diff --git a/api/statstypes.h b/api/statstypes.h
index 857b1af..e653d15 100644
--- a/api/statstypes.h
+++ b/api/statstypes.h
@@ -176,6 +176,7 @@
     kStatsValueNameFingerprintAlgorithm,
     kStatsValueNameFirsReceived,
     kStatsValueNameFirsSent,
+    kStatsValueNameFirstFrameReceivedToDecodedMs,
     kStatsValueNameFrameHeightInput,
     kStatsValueNameFrameHeightReceived,
     kStatsValueNameFrameHeightSent,
diff --git a/call/video_receive_stream.cc b/call/video_receive_stream.cc
index 672abd8..4984523 100644
--- a/call/video_receive_stream.cc
+++ b/call/video_receive_stream.cc
@@ -35,7 +35,7 @@
 VideoReceiveStream::Stats::~Stats() = default;
 
 std::string VideoReceiveStream::Stats::ToString(int64_t time_ms) const {
-  char buf[1024];
+  char buf[2048];
   rtc::SimpleStringBuilder ss(buf);
   ss << "VideoReceiveStream stats: " << time_ms << ", {ssrc: " << ssrc << ", ";
   ss << "total_bps: " << total_bitrate_bps << ", ";
@@ -48,6 +48,8 @@
   ss << "render_fps: " << render_frame_rate << ", ";
   ss << "decode_ms: " << decode_ms << ", ";
   ss << "max_decode_ms: " << max_decode_ms << ", ";
+  ss << "first_frame_received_to_decoded_ms: "
+     << first_frame_received_to_decoded_ms << ", ";
   ss << "cur_delay_ms: " << current_delay_ms << ", ";
   ss << "targ_delay_ms: " << target_delay_ms << ", ";
   ss << "jb_delay_ms: " << jitter_buffer_ms << ", ";
diff --git a/call/video_receive_stream.h b/call/video_receive_stream.h
index 2cd5f16..8388356 100644
--- a/call/video_receive_stream.h
+++ b/call/video_receive_stream.h
@@ -78,6 +78,7 @@
     int render_delay_ms = 10;
     int64_t interframe_delay_max_ms = -1;
     uint32_t frames_decoded = 0;
+    int64_t first_frame_received_to_decoded_ms = -1;
     absl::optional<uint64_t> qp_sum;
 
     int current_payload_type = -1;
diff --git a/media/base/mediachannel.h b/media/base/mediachannel.h
index fca6ee4..09e752a 100644
--- a/media/base/mediachannel.h
+++ b/media/base/mediachannel.h
@@ -558,6 +558,9 @@
   // Estimated capture start time in NTP time in ms.
   int64_t capture_start_ntp_time_ms = -1;
 
+  // First frame received to first frame decoded latency.
+  int64_t first_frame_received_to_decoded_ms = -1;
+
   // Timing frame info: all important timestamps for a full lifetime of a
   // single 'timing frame'.
   absl::optional<webrtc::TimingFrameInfo> timing_frame_info;
diff --git a/media/engine/webrtcvideoengine.cc b/media/engine/webrtcvideoengine.cc
index c5e4cdf..e429231 100644
--- a/media/engine/webrtcvideoengine.cc
+++ b/media/engine/webrtcvideoengine.cc
@@ -2532,7 +2532,8 @@
   info.frames_decoded = stats.frames_decoded;
   info.frames_rendered = stats.frames_rendered;
   info.qp_sum = stats.qp_sum;
-
+  info.first_frame_received_to_decoded_ms =
+      stats.first_frame_received_to_decoded_ms;
   info.interframe_delay_max_ms = stats.interframe_delay_max_ms;
 
   info.content_type = stats.content_type;
diff --git a/pc/statscollector.cc b/pc/statscollector.cc
index 3cd85e6..c08633b 100644
--- a/pc/statscollector.cc
+++ b/pc/statscollector.cc
@@ -252,6 +252,10 @@
     report->AddInt64(StatsReport::kStatsValueNameCaptureStartNtpTimeMs,
                      info.capture_start_ntp_time_ms);
   }
+  if (info.first_frame_received_to_decoded_ms >= 0) {
+    report->AddInt64(StatsReport::kStatsValueNameFirstFrameReceivedToDecodedMs,
+                     info.first_frame_received_to_decoded_ms);
+  }
   if (info.qp_sum)
     report->AddInt64(StatsReport::kStatsValueNameQpSum, *info.qp_sum);
 
diff --git a/video/receive_statistics_proxy.cc b/video/receive_statistics_proxy.cc
index 8d39887..3cdbc2d 100644
--- a/video/receive_statistics_proxy.cc
+++ b/video/receive_statistics_proxy.cc
@@ -642,6 +642,17 @@
     int64_t now_ms = clock_->TimeInMilliseconds();
     timing_frame_info_counter_.Add(info, now_ms);
   }
+
+  // Measure initial decoding latency between the first frame arriving and the
+  // first frame being decoded.
+  if (!first_frame_received_time_ms_.has_value()) {
+    first_frame_received_time_ms_ = info.receive_finish_ms;
+  }
+  if (stats_.first_frame_received_to_decoded_ms == -1 &&
+      first_decoded_frame_time_ms_) {
+    stats_.first_frame_received_to_decoded_ms =
+        *first_decoded_frame_time_ms_ - *first_frame_received_time_ms_;
+  }
 }
 
 void ReceiveStatisticsProxy::RtcpPacketTypesCounterUpdated(
@@ -749,8 +760,9 @@
         interframe_delay_ms);
     content_specific_stats->flow_duration_ms += interframe_delay_ms;
   }
-  if (stats_.frames_decoded == 1)
+  if (stats_.frames_decoded == 1) {
     first_decoded_frame_time_ms_.emplace(now);
+  }
   last_decoded_frame_time_ms_.emplace(now);
 }
 
diff --git a/video/receive_statistics_proxy.h b/video/receive_statistics_proxy.h
index 100e751..84c4e01 100644
--- a/video/receive_statistics_proxy.h
+++ b/video/receive_statistics_proxy.h
@@ -181,6 +181,7 @@
   mutable std::map<int64_t, size_t> frame_window_ RTC_GUARDED_BY(&crit_);
   VideoContentType last_content_type_ RTC_GUARDED_BY(&crit_);
   VideoCodecType last_codec_type_ RTC_GUARDED_BY(&crit_);
+  absl::optional<int64_t> first_frame_received_time_ms_ RTC_GUARDED_BY(&crit_);
   absl::optional<int64_t> first_decoded_frame_time_ms_ RTC_GUARDED_BY(&crit_);
   absl::optional<int64_t> last_decoded_frame_time_ms_ RTC_GUARDED_BY(&crit_);
   size_t num_delayed_frames_rendered_ RTC_GUARDED_BY(&crit_);