Improvements to scenario video stats for scenario tests.

* Adds capture to decode time.
* Calculating PSNR only for delivered frames, keeping the old PSNR
  value including freezes as a separate field.
* Calculates end to end delay only for delivered frames.
* Adds Count member for stats collectors.
* Minor cleanups.

Bug: webrtc:10365
Change-Id: Iaa7b1f0666a10764a513eecd1a08b9b6e76f3bc6
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/142812
Commit-Queue: Sebastian Jansson <srte@webrtc.org>
Reviewed-by: Rasmus Brandt <brandtr@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28355}
diff --git a/test/scenario/performance_stats.cc b/test/scenario/performance_stats.cc
index 0b2ae9f..0bbf86d 100644
--- a/test/scenario/performance_stats.cc
+++ b/test/scenario/performance_stats.cc
@@ -78,6 +78,10 @@
   return sqrt(Variance());
 }
 
+int SampleStats<double>::Count() {
+  return static_cast<int>(GetSamples().size());
+}
+
 void SampleStats<TimeDelta>::AddSample(TimeDelta delta) {
   RTC_DCHECK(delta.IsFinite());
   stats_.AddSample(delta.seconds<double>());
@@ -122,6 +126,10 @@
   return TimeDelta::seconds(stats_.StandardDeviation());
 }
 
+int SampleStats<TimeDelta>::Count() {
+  return stats_.Count();
+}
+
 void SampleStats<DataRate>::AddSample(DataRate sample) {
   stats_.AddSample(sample.bps<double>());
 }
@@ -166,6 +174,10 @@
   return DataRate::bps(stats_.StandardDeviation());
 }
 
+int SampleStats<DataRate>::Count() {
+  return stats_.Count();
+}
+
 void VideoFramesStats::AddFrameInfo(const VideoFrameBuffer& frame,
                                     Timestamp at_time) {
   ++count;
@@ -187,8 +199,10 @@
   render.AddStats(other.render);
   lost_count += other.lost_count;
   freeze_count += other.freeze_count;
+  capture_to_decoded_delay.AddSamples(other.capture_to_decoded_delay);
   end_to_end_delay.AddSamples(other.end_to_end_delay);
   psnr.AddSamples(other.psnr);
+  psnr_with_freeze.AddSamples(other.psnr_with_freeze);
   skipped_between_rendered.AddSamples(other.skipped_between_rendered);
   freeze_duration.AddSamples(other.freeze_duration);
   time_between_freezes.AddSamples(other.time_between_freezes);
diff --git a/test/scenario/performance_stats.h b/test/scenario/performance_stats.h
index 5a17654..b1ff398 100644
--- a/test/scenario/performance_stats.h
+++ b/test/scenario/performance_stats.h
@@ -23,6 +23,7 @@
   rtc::scoped_refptr<VideoFrameBuffer> captured;
   rtc::scoped_refptr<VideoFrameBuffer> decoded;
   Timestamp capture_time = Timestamp::MinusInfinity();
+  Timestamp decoded_time = Timestamp::PlusInfinity();
   Timestamp render_time = Timestamp::PlusInfinity();
   // A unique identifier for the spatial/temporal layer the decoded frame
   // belongs to. Note that this does not reflect the id as defined by the
@@ -48,6 +49,7 @@
   double Min();
   double Variance();
   double StandardDeviation();
+  int Count();
 };
 
 template <>
@@ -64,6 +66,7 @@
   TimeDelta Min();
   TimeDelta Variance();
   TimeDelta StandardDeviation();
+  int Count();
 
  private:
   SampleStats<double> stats_;
@@ -83,6 +86,7 @@
   DataRate Min();
   DataRate Variance();
   DataRate StandardDeviation();
+  int Count();
 
  private:
   SampleStats<double> stats_;
@@ -95,6 +99,7 @@
   bool IsEmpty() const;
   double Rate() const;
   SampleStats<TimeDelta>& interval() { return interval_; }
+  int Count() const { return event_count_; }
 
  private:
   Timestamp first_time_ = Timestamp::PlusInfinity();
@@ -117,10 +122,18 @@
   int freeze_count = 0;
   VideoFramesStats capture;
   VideoFramesStats render;
+  // Time from frame was captured on device to time frame was delivered from
+  // decoder.
+  SampleStats<TimeDelta> capture_to_decoded_delay;
   // Time from frame was captured on device to time frame was displayed on
   // device.
   SampleStats<TimeDelta> end_to_end_delay;
+  // PSNR for delivered frames. Note that this might go up for a worse
+  // connection due to frame dropping.
   SampleStats<double> psnr;
+  // PSNR for all frames, dropped or lost frames are compared to the last
+  // successfully delivered frame
+  SampleStats<double> psnr_with_freeze;
   // Frames skipped between two nearest.
   SampleStats<double> skipped_between_rendered;
   // In the next 2 metrics freeze is a pause that is longer, than maximum:
diff --git a/test/scenario/scenario_unittest.cc b/test/scenario/scenario_unittest.cc
index 1a0e1ff..bed3b1d 100644
--- a/test/scenario/scenario_unittest.cc
+++ b/test/scenario/scenario_unittest.cc
@@ -104,7 +104,7 @@
   }
   // Regression tests based on previous runs.
   EXPECT_EQ(analyzer.stats().lost_count, 0);
-  EXPECT_NEAR(analyzer.stats().psnr.Mean(), 38, 2);
+  EXPECT_NEAR(analyzer.stats().psnr_with_freeze.Mean(), 38, 2);
 }
 
 // TODO(bugs.webrtc.org/10515): Remove this when performance has been improved.
@@ -124,7 +124,7 @@
   }
   // Regression tests based on previous runs.
   EXPECT_LT(analyzer.stats().lost_count, 2);
-  EXPECT_NEAR(analyzer.stats().psnr.Mean(), 38, 10);
+  EXPECT_NEAR(analyzer.stats().psnr_with_freeze.Mean(), 38, 10);
 }
 
 TEST(ScenarioTest, SimTimeFakeing) {
diff --git a/test/scenario/stats_collection.cc b/test/scenario/stats_collection.cc
index 817d947..6c61112 100644
--- a/test/scenario/stats_collection.cc
+++ b/test/scenario/stats_collection.cc
@@ -63,16 +63,18 @@
   RTC_CHECK(sample.captured);
   HandleCapturedFrame(sample);
   if (!sample.decoded) {
+    // Can only happen in the beginning of a call or if the resolution is
+    // reduced. Otherwise we will detect a freeze.
     ++stats_.lost_count;
     ++skip_count_;
   } else {
     psnr = I420PSNR(*sample.captured->ToI420(), *sample.decoded->ToI420());
-    stats_.end_to_end_delay.AddSample(sample.render_time - sample.capture_time);
-    stats_.psnr.AddSample(psnr);
+    stats_.psnr_with_freeze.AddSample(psnr);
     if (sample.repeated) {
       ++stats_.freeze_count;
       ++skip_count_;
     } else {
+      stats_.psnr.AddSample(psnr);
       HandleRenderedFrame(sample);
     }
   }
@@ -92,6 +94,9 @@
 }
 
 void VideoLayerAnalyzer::HandleRenderedFrame(const VideoFramePair& sample) {
+  stats_.capture_to_decoded_delay.AddSample(sample.decoded_time -
+                                            sample.capture_time);
+  stats_.end_to_end_delay.AddSample(sample.render_time - sample.capture_time);
   stats_.render.AddFrameInfo(*sample.decoded, sample.render_time);
   stats_.skipped_between_rendered.AddSample(skip_count_);
   skip_count_ = 0;
diff --git a/test/scenario/stats_collection_unittest.cc b/test/scenario/stats_collection_unittest.cc
index 2d8fa39..d39ce58 100644
--- a/test/scenario/stats_collection_unittest.cc
+++ b/test/scenario/stats_collection_unittest.cc
@@ -53,7 +53,7 @@
   // might change due to changes in configuration and encoder etc. The main
   // purpose is to show how the stats can be used. To avoid being overly
   // sensistive to change, the ranges are chosen to be quite large.
-  EXPECT_NEAR(analyzer.stats().psnr.Mean(), 43, 10);
+  EXPECT_NEAR(analyzer.stats().psnr_with_freeze.Mean(), 43, 10);
   EXPECT_NEAR(stats.call.stats().target_rate.Mean().kbps(), 700, 300);
   EXPECT_NEAR(stats.video_send.stats().media_bitrate.Mean().kbps(), 500, 200);
   EXPECT_NEAR(stats.video_receive.stats().resolution.Mean(), 180, 10);
@@ -73,7 +73,7 @@
   }
   // This is a change detecting test, the targets are based on previous runs and
   // might change due to changes in configuration and encoder etc.
-  EXPECT_NEAR(analyzer.stats().psnr.Mean(), 16, 10);
+  EXPECT_NEAR(analyzer.stats().psnr_with_freeze.Mean(), 16, 10);
   EXPECT_NEAR(stats.call.stats().target_rate.Mean().kbps(), 75, 50);
   EXPECT_NEAR(stats.video_send.stats().media_bitrate.Mean().kbps(), 100, 50);
   EXPECT_NEAR(stats.video_receive.stats().resolution.Mean(), 180, 10);
diff --git a/test/scenario/video_frame_matcher.cc b/test/scenario/video_frame_matcher.cc
index 98e276c..a1faa81 100644
--- a/test/scenario/video_frame_matcher.cc
+++ b/test/scenario/video_frame_matcher.cc
@@ -25,7 +25,8 @@
 VideoFrameMatcher::VideoFrameMatcher(
     std::vector<std::function<void(const VideoFramePair&)> >
         frame_pair_handlers)
-    : frame_pair_handlers_(frame_pair_handlers), task_queue_("VideoAnalyzer") {}
+    : frame_pair_handlers_(std::move(frame_pair_handlers)),
+      task_queue_("VideoAnalyzer") {}
 
 VideoFrameMatcher::~VideoFrameMatcher() {
   task_queue_.SendTask([this] { Finalize(); });
@@ -58,14 +59,15 @@
 }
 
 void VideoFrameMatcher::OnDecodedFrame(const VideoFrame& frame,
+                                       int layer_id,
                                        Timestamp render_time,
-                                       int layer_id) {
+                                       Timestamp at_time) {
   rtc::scoped_refptr<DecodedFrame> decoded(new DecodedFrame{});
+  decoded->decoded_time = at_time;
   decoded->render_time = render_time;
   decoded->frame = frame.video_frame_buffer();
   decoded->thumb = ScaleVideoFrameBuffer(*frame.video_frame_buffer()->ToI420(),
                                          kThumbWidth, kThumbHeight);
-  decoded->render_time = render_time;
 
   task_queue_.PostTask([this, decoded, layer_id] {
     auto& layer = layers_[layer_id];
@@ -109,6 +111,7 @@
   if (captured.best_decode) {
     frame_pair.decode_id = captured.best_decode->id;
     frame_pair.decoded = captured.best_decode->frame;
+    frame_pair.decoded_time = captured.best_decode->decoded_time;
     frame_pair.render_time = captured.best_decode->render_time;
     frame_pair.repeated = captured.best_decode->repeat_count++;
   }
@@ -131,8 +134,6 @@
     rtc::VideoSourceInterface<VideoFrame>* source)
     : clock_(clock), matcher_(matcher), source_(source) {}
 
-ForwardingCapturedFrameTap::~ForwardingCapturedFrameTap() {}
-
 void ForwardingCapturedFrameTap::OnFrame(const VideoFrame& frame) {
   RTC_CHECK(sink_);
   matcher_->OnCapturedFrame(frame, clock_->CurrentTime());
@@ -156,14 +157,17 @@
   sink_ = nullptr;
 }
 
-DecodedFrameTap::DecodedFrameTap(VideoFrameMatcher* matcher, int layer_id)
-    : matcher_(matcher), layer_id_(layer_id) {
+DecodedFrameTap::DecodedFrameTap(Clock* clock,
+                                 VideoFrameMatcher* matcher,
+                                 int layer_id)
+    : clock_(clock), matcher_(matcher), layer_id_(layer_id) {
   matcher_->RegisterLayer(layer_id_);
 }
 
 void DecodedFrameTap::OnFrame(const VideoFrame& frame) {
-  matcher_->OnDecodedFrame(frame, Timestamp::ms(frame.render_time_ms()),
-                           layer_id_);
+  matcher_->OnDecodedFrame(frame, layer_id_,
+                           Timestamp::ms(frame.render_time_ms()),
+                           clock_->CurrentTime());
 }
 
 }  // namespace test
diff --git a/test/scenario/video_frame_matcher.h b/test/scenario/video_frame_matcher.h
index 1a4a60a..d27ed8f 100644
--- a/test/scenario/video_frame_matcher.h
+++ b/test/scenario/video_frame_matcher.h
@@ -38,14 +38,15 @@
   void RegisterLayer(int layer_id);
   void OnCapturedFrame(const VideoFrame& frame, Timestamp at_time);
   void OnDecodedFrame(const VideoFrame& frame,
+                      int layer_id,
                       Timestamp render_time,
-                      int layer_id);
+                      Timestamp at_time);
   bool Active() const;
-  Clock* clock();
 
  private:
   struct DecodedFrameBase {
     int id;
+    Timestamp decoded_time = Timestamp::PlusInfinity();
     Timestamp render_time = Timestamp::PlusInfinity();
     rtc::scoped_refptr<VideoFrameBuffer> frame;
     rtc::scoped_refptr<VideoFrameBuffer> thumb;
@@ -84,7 +85,6 @@
                              rtc::VideoSourceInterface<VideoFrame>* source);
   ForwardingCapturedFrameTap(ForwardingCapturedFrameTap&) = delete;
   ForwardingCapturedFrameTap& operator=(ForwardingCapturedFrameTap&) = delete;
-  ~ForwardingCapturedFrameTap();
 
   // VideoSinkInterface interface
   void OnFrame(const VideoFrame& frame) override;
@@ -94,7 +94,6 @@
   void AddOrUpdateSink(VideoSinkInterface<VideoFrame>* sink,
                        const rtc::VideoSinkWants& wants) override;
   void RemoveSink(VideoSinkInterface<VideoFrame>* sink) override;
-  VideoFrame PopFrame();
 
  private:
   Clock* const clock_;
@@ -106,11 +105,12 @@
 
 class DecodedFrameTap : public rtc::VideoSinkInterface<VideoFrame> {
  public:
-  explicit DecodedFrameTap(VideoFrameMatcher* matcher, int layer_id);
+  DecodedFrameTap(Clock* clock, VideoFrameMatcher* matcher, int layer_id);
   // VideoSinkInterface interface
   void OnFrame(const VideoFrame& frame) override;
 
  private:
+  Clock* const clock_;
   VideoFrameMatcher* const matcher_;
   int layer_id_;
 };
diff --git a/test/scenario/video_stream.cc b/test/scenario/video_stream.cc
index b7324bd..94a2438 100644
--- a/test/scenario/video_stream.cc
+++ b/test/scenario/video_stream.cc
@@ -525,7 +525,8 @@
   for (size_t i = 0; i < num_streams; ++i) {
     rtc::VideoSinkInterface<VideoFrame>* renderer = &fake_renderer_;
     if (matcher->Active()) {
-      render_taps_.emplace_back(absl::make_unique<DecodedFrameTap>(matcher, i));
+      render_taps_.emplace_back(
+          absl::make_unique<DecodedFrameTap>(receiver_->clock_, matcher, i));
       renderer = render_taps_.back().get();
     }
     auto recv_config = CreateVideoReceiveStreamConfig(