Reland "Reland "Update video_quality_analysis to align videos instead of using barcodes""

This is a reland of 9bb55fc09b6bfa00cba7779c37ad6c39b4206f7a

Original change's description:
> Reland "Update video_quality_analysis to align videos instead of using barcodes"
>
> This is a reland of d65e143801a7aaa9affdb939ea836aec1955cdcc
>
> The binary for frame_analyzer.cpp is precompiled and stored in the cloud, so it
> won't automatically pick up change to the source file. Therefore, restore all
> old code to be backwards compatible.
>
> Original change's description:
> > Update video_quality_analysis to align videos instead of using barcodes
> >
> > This CL is a follow-up to the previous CL
> > https://webrtc-review.googlesource.com/c/src/+/94773 that added generic
> > logic for aligning videos. This will allow us to easily extend
> > video_quality_analysis with new sophisticated video quality metrics.
> > Also, we can use any kind of video that does not necessarily need to
> > contain bar codes. Removing the need to decode barcodes also leads to a
> > big speedup for the tests.
> >
> > Bug: webrtc:9642
> > Change-Id: I74b0d630b3e1ed44781ad024115ded3143e28f50
> > Reviewed-on: https://webrtc-review.googlesource.com/94845
> > Reviewed-by: Paulina Hensman <phensman@webrtc.org>
> > Reviewed-by: Patrik Höglund <phoglund@webrtc.org>
> > Commit-Queue: Magnus Jedvert <magjed@webrtc.org>
> > Cr-Commit-Position: refs/heads/master@{#24423}
>
> TBR=phensman@webrtc.org,phoglund@webrtc.org
>
> Bug: webrtc:9642
> Change-Id: Id8d129ce103284504c67690f8363c03eaae3eee7
> Reviewed-on: https://webrtc-review.googlesource.com/96000
> Reviewed-by: Magnus Jedvert <magjed@webrtc.org>
> Reviewed-by: Patrik Höglund <phoglund@webrtc.org>
> Commit-Queue: Magnus Jedvert <magjed@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#24429}

TBR=phensman,phoglund

Bug: webrtc:9642
Change-Id: Ic248b7831ae148251a1a4ebeec5d154286f91a0a
Reviewed-on: https://webrtc-review.googlesource.com/98080
Commit-Queue: Magnus Jedvert <magjed@webrtc.org>
Reviewed-by: Patrik Höglund <phoglund@webrtc.org>
Reviewed-by: Magnus Jedvert <magjed@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24583}
diff --git a/rtc_tools/BUILD.gn b/rtc_tools/BUILD.gn
index 294cc5a..e84d003 100644
--- a/rtc_tools/BUILD.gn
+++ b/rtc_tools/BUILD.gn
@@ -102,6 +102,7 @@
     ":command_line_parser",
     ":video_file_reader",
     ":video_quality_analysis",
+    "../rtc_base:stringutils",
     "../test:perf_test",
   ]
 }
diff --git a/rtc_tools/frame_analyzer/frame_analyzer.cc b/rtc_tools/frame_analyzer/frame_analyzer.cc
index fd0d488..aab2758 100644
--- a/rtc_tools/frame_analyzer/frame_analyzer.cc
+++ b/rtc_tools/frame_analyzer/frame_analyzer.cc
@@ -15,7 +15,9 @@
 #include <string>
 #include <vector>
 
+#include "rtc_base/stringutils.h"
 #include "rtc_tools/frame_analyzer/video_quality_analysis.h"
+#include "rtc_tools/frame_analyzer/video_temporal_aligner.h"
 #include "rtc_tools/simple_command_line_parser.h"
 #include "rtc_tools/video_file_reader.h"
 #include "test/testsupport/perf_test.h"
@@ -24,22 +26,16 @@
  * A command line tool running PSNR and SSIM on a reference video and a test
  * video. The test video is a record of the reference video which can start at
  * an arbitrary point. It is possible that there will be repeated frames or
- * skipped frames as well. In order to have a way to compare corresponding
- * frames from the two videos, two stats files should be provided. One for the
- * reference video and one for the test video. The stats file
- * is a text file assumed to be in the format:
- * frame_xxxx yyyy where xxxx is the frame number in and yyyy is the
- * corresponding barcode. The video files should be 1420 YUV videos.
- * The tool prints the result to standard output in the Chromium perf format:
+ * skipped frames as well. The video files should be I420 .y4m or .yuv videos.
+ * If both files are .y4m, it's not needed to specify width/height. The tool
+ * prints the result to standard output in the Chromium perf format:
  * RESULT <metric>:<label>= <values>
  *
  * The max value for PSNR is 48.0 (between equal frames), as for SSIM it is 1.0.
  *
  * Usage:
  * frame_analyzer --label=<test_label> --reference_file=<name_of_file>
- * --test_file_ref=<name_of_file> --stats_file_test=<name_of_file>
- * --stats_file=<name_of_file> --width=<frame_width>
- * --height=<frame_height>
+ * --test_file_ref=<name_of_file> --width=<frame_width> --height=<frame_height>
  */
 int main(int argc, char* argv[]) {
   std::string program_name = argv[0];
@@ -55,13 +51,6 @@
       " Default: -1\n"
       "  - label(string): The label to use for the perf output."
       " Default: MY_TEST\n"
-      "  - stats_file_ref(string): The path to the stats file that will be"
-      " produced for the reference video file."
-      " Default: stats_ref.txt\n"
-      "  - stats_file_test(string): The path to the stats file that will be"
-      " produced for the test video file."
-      " Default: stats_test.txt\n"
-      "  - reference_file(string): The reference YUV file to compare against."
       " Default: ref.yuv\n"
       "  - test_file(string): The test YUV file to run the analysis for."
       " Default: test_file.yuv\n"
@@ -78,8 +67,6 @@
   parser.SetFlag("width", "-1");
   parser.SetFlag("height", "-1");
   parser.SetFlag("label", "MY_TEST");
-  parser.SetFlag("stats_file_ref", "stats_ref.txt");
-  parser.SetFlag("stats_file_test", "stats_test.txt");
   parser.SetFlag("reference_file", "ref.yuv");
   parser.SetFlag("test_file", "test.yuv");
   parser.SetFlag("chartjson_result_file", "");
@@ -92,34 +79,48 @@
   }
   parser.PrintEnteredFlags();
 
-  int width = strtol((parser.GetFlag("width")).c_str(), NULL, 10);
-  int height = strtol((parser.GetFlag("height")).c_str(), NULL, 10);
+  int width = strtol((parser.GetFlag("width")).c_str(), nullptr, 10);
+  int height = strtol((parser.GetFlag("height")).c_str(), nullptr, 10);
 
-  if (width <= 0 || height <= 0) {
-    fprintf(stderr, "Error: width or height cannot be <= 0!\n");
+  const std::string reference_file_name = parser.GetFlag("reference_file");
+  const std::string test_file_name = parser.GetFlag("test_file");
+
+  // .yuv files require explicit resolution.
+  if ((rtc::ends_with(reference_file_name.c_str(), ".yuv") ||
+       rtc::ends_with(test_file_name.c_str(), ".yuv")) &&
+      (width <= 0 || height <= 0)) {
+    fprintf(stderr,
+            "Error: You need to specify width and height when using .yuv "
+            "files\n");
     return -1;
   }
 
   webrtc::test::ResultsContainer results;
 
   rtc::scoped_refptr<webrtc::test::Video> reference_video =
-      webrtc::test::OpenYuvOrY4mFile(parser.GetFlag("reference_file"), width,
-                                     height);
+      webrtc::test::OpenYuvOrY4mFile(reference_file_name, width, height);
   rtc::scoped_refptr<webrtc::test::Video> test_video =
-      webrtc::test::OpenYuvOrY4mFile(parser.GetFlag("test_file"), width,
-                                     height);
+      webrtc::test::OpenYuvOrY4mFile(test_file_name, width, height);
 
   if (!reference_video || !test_video) {
     fprintf(stderr, "Error opening video files\n");
     return 0;
   }
 
-  webrtc::test::RunAnalysis(
-      reference_video, test_video, parser.GetFlag("stats_file_ref").c_str(),
-      parser.GetFlag("stats_file_test").c_str(), width, height, &results);
-  webrtc::test::GetMaxRepeatedAndSkippedFrames(
-      parser.GetFlag("stats_file_ref"), parser.GetFlag("stats_file_test"),
-      &results);
+  const std::vector<size_t> matching_indices =
+      webrtc::test::FindMatchingFrameIndices(reference_video, test_video);
+
+  results.frames =
+      webrtc::test::RunAnalysis(reference_video, test_video, matching_indices);
+
+  const std::vector<webrtc::test::Cluster> clusters =
+      webrtc::test::CalculateFrameClusters(matching_indices);
+  results.max_repeated_frames = webrtc::test::GetMaxRepeatedFrames(clusters);
+  results.max_skipped_frames = webrtc::test::GetMaxSkippedFrames(clusters);
+  results.total_skipped_frames =
+      webrtc::test::GetTotalNumberOfSkippedFrames(clusters);
+  results.decode_errors_ref = 0;
+  results.decode_errors_test = 0;
 
   webrtc::test::PrintAnalysisResults(parser.GetFlag("label"), &results);
 
diff --git a/rtc_tools/frame_analyzer/video_quality_analysis.cc b/rtc_tools/frame_analyzer/video_quality_analysis.cc
index dda55b8..1c30d08 100644
--- a/rtc_tools/frame_analyzer/video_quality_analysis.cc
+++ b/rtc_tools/frame_analyzer/video_quality_analysis.cc
@@ -10,87 +10,19 @@
 
 #include "rtc_tools/frame_analyzer/video_quality_analysis.h"
 
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <algorithm>
-#include <map>
-#include <string>
-#include <utility>
+#include <numeric>
 
 #include "test/testsupport/perf_test.h"
 #include "third_party/libyuv/include/libyuv/compare.h"
 #include "third_party/libyuv/include/libyuv/convert.h"
 
-#define STATS_LINE_LENGTH 32
-
 namespace webrtc {
 namespace test {
 
 ResultsContainer::ResultsContainer() {}
 ResultsContainer::~ResultsContainer() {}
 
-int GetI420FrameSize(int width, int height) {
-  int half_width = (width + 1) >> 1;
-  int half_height = (height + 1) >> 1;
-
-  int y_plane = width * height;            // I420 Y plane.
-  int u_plane = half_width * half_height;  // I420 U plane.
-  int v_plane = half_width * half_height;  // I420 V plane.
-
-  return y_plane + u_plane + v_plane;
-}
-
-int ExtractFrameSequenceNumber(std::string line) {
-  size_t space_position = line.find(' ');
-  if (space_position == std::string::npos) {
-    return -1;
-  }
-  std::string frame = line.substr(0, space_position);
-
-  size_t underscore_position = frame.find('_');
-  if (underscore_position == std::string::npos) {
-    return -1;
-  }
-  std::string frame_number = frame.substr(underscore_position + 1);
-
-  return strtol(frame_number.c_str(), NULL, 10);
-}
-
-int ExtractDecodedFrameNumber(std::string line) {
-  size_t space_position = line.find(' ');
-  if (space_position == std::string::npos) {
-    return -1;
-  }
-  std::string decoded_number = line.substr(space_position + 1);
-
-  return strtol(decoded_number.c_str(), NULL, 10);
-}
-
-bool IsThereBarcodeError(std::string line) {
-  size_t barcode_error_position = line.find("Barcode error");
-  if (barcode_error_position != std::string::npos) {
-    return true;
-  }
-  return false;
-}
-
-bool GetNextStatsLine(FILE* stats_file, char* line) {
-  int chars = 0;
-  char buf = 0;
-
-  while (buf != '\n') {
-    size_t chars_read = fread(&buf, 1, 1, stats_file);
-    if (chars_read != 1 || feof(stats_file)) {
-      return false;
-    }
-    line[chars] = buf;
-    ++chars;
-  }
-  line[chars - 1] = '\0';  // Strip the trailing \n and put end of string.
-  return true;
-}
-
 template <typename FrameMetricFunction>
 static double CalculateMetric(
     const FrameMetricFunction& frame_metric_function,
@@ -119,225 +51,73 @@
   return CalculateMetric(&libyuv::I420Ssim, ref_buffer, test_buffer);
 }
 
-void RunAnalysis(const rtc::scoped_refptr<webrtc::test::Video>& reference_video,
-                 const rtc::scoped_refptr<webrtc::test::Video>& test_video,
-                 const char* stats_file_reference_name,
-                 const char* stats_file_test_name,
-                 int width,
-                 int height,
-                 ResultsContainer* results) {
-  FILE* stats_file_ref = fopen(stats_file_reference_name, "r");
-  FILE* stats_file_test = fopen(stats_file_test_name, "r");
-
-  // String buffer for the lines in the stats file.
-  char line[STATS_LINE_LENGTH];
-
-  int previous_frame_number = -1;
-
-  // Maps barcode id to the frame id for the reference video.
-  // In case two frames have same id, then we only save the first one.
-  std::map<int, int> ref_barcode_to_frame;
-  // While there are entries in the stats file.
-  while (GetNextStatsLine(stats_file_ref, line)) {
-    int extracted_ref_frame = ExtractFrameSequenceNumber(line);
-    int decoded_frame_number = ExtractDecodedFrameNumber(line);
-
-    // Insert will only add if it is not in map already.
-    ref_barcode_to_frame.insert(
-        std::make_pair(decoded_frame_number, extracted_ref_frame));
-  }
-
-  while (GetNextStatsLine(stats_file_test, line)) {
-    int extracted_test_frame = ExtractFrameSequenceNumber(line);
-    int decoded_frame_number = ExtractDecodedFrameNumber(line);
-    auto it = ref_barcode_to_frame.find(decoded_frame_number);
-    if (it == ref_barcode_to_frame.end()) {
-      // Not found in the reference video.
-      // TODO(mandermo) print
+std::vector<AnalysisResult> RunAnalysis(
+    const rtc::scoped_refptr<webrtc::test::Video>& reference_video,
+    const rtc::scoped_refptr<webrtc::test::Video>& test_video,
+    const std::vector<size_t>& test_frame_indices) {
+  std::vector<AnalysisResult> results;
+  for (size_t i = 0; i < test_frame_indices.size(); ++i) {
+    // Ignore duplicated frames in the test video.
+    if (i > 0 && test_frame_indices[i] == test_frame_indices[i - 1])
       continue;
-    }
-    int extracted_ref_frame = it->second;
 
-    // If there was problem decoding the barcode in this frame or the frame has
-    // been duplicated, continue.
-    if (IsThereBarcodeError(line) ||
-        decoded_frame_number == previous_frame_number) {
-      continue;
-    }
-
-    assert(extracted_test_frame != -1);
-    assert(decoded_frame_number != -1);
-
-    const rtc::scoped_refptr<webrtc::I420BufferInterface> test_frame =
-        test_video->GetFrame(extracted_test_frame);
-    const rtc::scoped_refptr<webrtc::I420BufferInterface> reference_frame =
-        reference_video->GetFrame(extracted_ref_frame);
-
-    // Calculate the PSNR and SSIM.
-    double result_psnr = Psnr(reference_frame, test_frame);
-    double result_ssim = Ssim(reference_frame, test_frame);
-
-    previous_frame_number = decoded_frame_number;
+    const rtc::scoped_refptr<I420BufferInterface>& test_frame =
+        test_video->GetFrame(i);
+    const rtc::scoped_refptr<I420BufferInterface>& reference_frame =
+        reference_video->GetFrame(test_frame_indices[i] %
+                                  reference_video->number_of_frames());
 
     // Fill in the result struct.
     AnalysisResult result;
-    result.frame_number = decoded_frame_number;
-    result.psnr_value = result_psnr;
-    result.ssim_value = result_ssim;
-
-    results->frames.push_back(result);
+    result.frame_number = test_frame_indices[i];
+    result.psnr_value = Psnr(reference_frame, test_frame);
+    result.ssim_value = Ssim(reference_frame, test_frame);
+    results.push_back(result);
   }
 
-  // Cleanup.
-  fclose(stats_file_ref);
-  fclose(stats_file_test);
+  return results;
 }
 
-std::vector<std::pair<int, int> > CalculateFrameClusters(
-    FILE* file,
-    int* num_decode_errors) {
-  if (num_decode_errors) {
-    *num_decode_errors = 0;
-  }
-  std::vector<std::pair<int, int> > frame_cnt;
-  char line[STATS_LINE_LENGTH];
-  while (GetNextStatsLine(file, line)) {
-    int decoded_frame_number;
-    if (IsThereBarcodeError(line)) {
-      decoded_frame_number = DECODE_ERROR;
-      if (num_decode_errors) {
-        ++*num_decode_errors;
-      }
+std::vector<Cluster> CalculateFrameClusters(
+    const std::vector<size_t>& indices) {
+  std::vector<Cluster> clusters;
+
+  for (size_t index : indices) {
+    if (!clusters.empty() && clusters.back().index == index) {
+      // This frame belongs to the previous cluster.
+      ++clusters.back().number_of_repeated_frames;
     } else {
-      decoded_frame_number = ExtractDecodedFrameNumber(line);
-    }
-    if (frame_cnt.size() >= 2 && decoded_frame_number != DECODE_ERROR &&
-        frame_cnt.back().first == DECODE_ERROR &&
-        frame_cnt[frame_cnt.size() - 2].first == decoded_frame_number) {
-      // Handle when there is a decoding error inside a cluster of frames.
-      frame_cnt[frame_cnt.size() - 2].second += frame_cnt.back().second + 1;
-      frame_cnt.pop_back();
-    } else if (frame_cnt.empty() ||
-               frame_cnt.back().first != decoded_frame_number) {
-      frame_cnt.push_back(std::make_pair(decoded_frame_number, 1));
-    } else {
-      ++frame_cnt.back().second;
+      // Start a new cluster.
+      clusters.push_back({index, /* number_of_repeated_frames= */ 1});
     }
   }
-  return frame_cnt;
+
+  return clusters;
 }
 
-void GetMaxRepeatedAndSkippedFrames(const std::string& stats_file_ref_name,
-                                    const std::string& stats_file_test_name,
-                                    ResultsContainer* results) {
-  FILE* stats_file_ref = fopen(stats_file_ref_name.c_str(), "r");
-  FILE* stats_file_test = fopen(stats_file_test_name.c_str(), "r");
-  if (stats_file_ref == NULL) {
-    fprintf(stderr, "Couldn't open reference stats file for reading: %s\n",
-            stats_file_ref_name.c_str());
-    return;
+int GetMaxRepeatedFrames(const std::vector<Cluster>& clusters) {
+  int max_number_of_repeated_frames = 0;
+  for (const Cluster& cluster : clusters) {
+    max_number_of_repeated_frames = std::max(max_number_of_repeated_frames,
+                                             cluster.number_of_repeated_frames);
   }
-  if (stats_file_test == NULL) {
-    fprintf(stderr, "Couldn't open test stats file for reading: %s\n",
-            stats_file_test_name.c_str());
-    fclose(stats_file_ref);
-    return;
+  return max_number_of_repeated_frames;
+}
+
+int GetMaxSkippedFrames(const std::vector<Cluster>& clusters) {
+  size_t max_skipped_frames = 0;
+  for (size_t i = 1; i < clusters.size(); ++i) {
+    const size_t skipped_frames = clusters[i].index - clusters[i - 1].index - 1;
+    max_skipped_frames = std::max(max_skipped_frames, skipped_frames);
   }
+  return static_cast<int>(max_skipped_frames);
+}
 
-  int max_repeated_frames = 1;
-  int max_skipped_frames = 0;
-
-  int decode_errors_ref = 0;
-  int decode_errors_test = 0;
-
-  std::vector<std::pair<int, int> > frame_cnt_ref =
-      CalculateFrameClusters(stats_file_ref, &decode_errors_ref);
-
-  std::vector<std::pair<int, int> > frame_cnt_test =
-      CalculateFrameClusters(stats_file_test, &decode_errors_test);
-
-  fclose(stats_file_ref);
-  fclose(stats_file_test);
-
-  auto it_ref = frame_cnt_ref.begin();
-  auto it_test = frame_cnt_test.begin();
-  auto end_ref = frame_cnt_ref.end();
-  auto end_test = frame_cnt_test.end();
-
-  if (it_test == end_test || it_ref == end_ref) {
-    fprintf(stderr, "Either test or ref file is empty, nothing to print\n");
-    return;
-  }
-
-  while (it_test != end_test && it_test->first == DECODE_ERROR) {
-    ++it_test;
-  }
-
-  if (it_test == end_test) {
-    fprintf(stderr, "Test video only has barcode decode errors\n");
-    return;
-  }
-
-  // Find the first frame in the reference video that match the first frame in
-  // the test video.
-  while (it_ref != end_ref &&
-         (it_ref->first == DECODE_ERROR || it_ref->first != it_test->first)) {
-    ++it_ref;
-  }
-  if (it_ref == end_ref) {
-    fprintf(stderr,
-            "The barcode in the test video's first frame is not in the "
-            "reference video.\n");
-    return;
-  }
-
-  int total_skipped_frames = 0;
-  for (;;) {
-    max_repeated_frames =
-        std::max(max_repeated_frames, it_test->second - it_ref->second + 1);
-
-    bool passed_error = false;
-
-    ++it_test;
-    while (it_test != end_test && it_test->first == DECODE_ERROR) {
-      ++it_test;
-      passed_error = true;
-    }
-    if (it_test == end_test) {
-      break;
-    }
-
-    int skipped_frames = 0;
-    ++it_ref;
-    for (; it_ref != end_ref; ++it_ref) {
-      if (it_ref->first != DECODE_ERROR && it_ref->first >= it_test->first) {
-        break;
-      }
-      ++skipped_frames;
-    }
-    if (passed_error) {
-      // If we pass an error in the test video, then we are conservative
-      // and will not calculate skipped frames for that part.
-      skipped_frames = 0;
-    }
-    if (it_ref != end_ref && it_ref->first == it_test->first) {
-      total_skipped_frames += skipped_frames;
-      if (skipped_frames > max_skipped_frames) {
-        max_skipped_frames = skipped_frames;
-      }
-      continue;
-    }
-    fprintf(stdout,
-            "Found barcode %d in test video, which is not in reference video\n",
-            it_test->first);
-    break;
-  }
-
-  results->max_repeated_frames = max_repeated_frames;
-  results->max_skipped_frames = max_skipped_frames;
-  results->total_skipped_frames = total_skipped_frames;
-  results->decode_errors_ref = decode_errors_ref;
-  results->decode_errors_test = decode_errors_test;
+int GetTotalNumberOfSkippedFrames(const std::vector<Cluster>& clusters) {
+  // The number of reference frames the test video spans.
+  const size_t number_ref_frames =
+      clusters.empty() ? 0 : 1 + clusters.back().index - clusters.front().index;
+  return static_cast<int>(number_ref_frames - clusters.size());
 }
 
 void PrintAnalysisResults(const std::string& label, ResultsContainer* results) {
diff --git a/rtc_tools/frame_analyzer/video_quality_analysis.h b/rtc_tools/frame_analyzer/video_quality_analysis.h
index ae66700..ede2954 100644
--- a/rtc_tools/frame_analyzer/video_quality_analysis.h
+++ b/rtc_tools/frame_analyzer/video_quality_analysis.h
@@ -47,25 +47,12 @@
 // A function to run the PSNR and SSIM analysis on the test file. The test file
 // comprises the frames that were captured during the quality measurement test.
 // There may be missing or duplicate frames. Also the frames start at a random
-// position in the original video. We should provide a statistics file along
-// with the test video. The stats file contains the connection between the
-// actual frames in the test file and their bar code number. There is one file
-// for the reference video and one for the test video. The stats file should
-// be in the form 'frame_xxxx yyyy', where xxxx is the consecutive
-// number of the frame in the test video, and yyyy is the barcode number.
-// The stats file could be produced by
-// tools/barcode_tools/barcode_decoder.py. This script decodes the barcodes
-// integrated in every video and generates the stats file. If three was some
-// problem with the decoding there would be 'Barcode error' instead of yyyy.
-// The stat files are used to compare the right frames with each other and
-// to calculate statistics.
-void RunAnalysis(const rtc::scoped_refptr<webrtc::test::Video>& reference_video,
-                 const rtc::scoped_refptr<webrtc::test::Video>& test_video,
-                 const char* stats_file_reference_name,
-                 const char* stats_file_test_name,
-                 int width,
-                 int height,
-                 ResultsContainer* results);
+// position in the original video. We also need to provide a map from test frame
+// indices to reference frame indices.
+std::vector<AnalysisResult> RunAnalysis(
+    const rtc::scoped_refptr<webrtc::test::Video>& reference_video,
+    const rtc::scoped_refptr<webrtc::test::Video>& test_video,
+    const std::vector<size_t>& test_frame_indices);
 
 // Compute PSNR for an I420 buffer (all planes). The max return value (in the
 // case where the test and reference frames are exactly the same) will be 48.
@@ -87,45 +74,28 @@
                           const std::string& label,
                           ResultsContainer* results);
 
-// The barcode number that means that the barcode could not be decoded.
-const int DECODE_ERROR = -1;
+struct Cluster {
+  // Corresponding reference frame index for this cluster.
+  size_t index;
+  // The number of sequential frames that mapped to the same reference frame
+  // index.
+  int number_of_repeated_frames;
+};
 
-// Clusters the frames in the file. First in the pair is the frame number and
-// second is the number of frames in that cluster. So if first frame in video
-// has number 100 and it is repeated 3 after each other, then the first entry
-// in the returned vector has first set to 100 and second set to 3.
-// Decode errors between two frames with same barcode, then it interprets
-// the frame with the decode error as having the same id as the two frames
-// around it. Eg. [400, DECODE_ERROR, DECODE_ERROR, 400] is becomes an entry
-// in return vector with first==400 and second==4. In other cases with decode
-// errors like [400, DECODE_ERROR, 401] becomes three entries, each with
-// second==1 and the middle has first==DECODE_ERROR.
-std::vector<std::pair<int, int> > CalculateFrameClusters(
-    FILE* file,
-    int* num_decode_errors);
+// Clusters sequentially repeated frames. For example, the sequence {100, 102,
+// 102, 103} will be mapped to {{100, 1}, {102, 2}, {103, 1}}.
+std::vector<Cluster> CalculateFrameClusters(const std::vector<size_t>& indices);
 
-// Calculates max repeated and skipped frames and prints them to stdout in a
-// format that is compatible with Chromium performance numbers.
-void GetMaxRepeatedAndSkippedFrames(const std::string& stats_file_ref_name,
-                                    const std::string& stats_file_test_name,
-                                    ResultsContainer* results);
+// Get number of max sequentially repeated frames in the test video. This number
+// will be one if we only store unique frames in the test video.
+int GetMaxRepeatedFrames(const std::vector<Cluster>& clusters);
 
-// Gets the next line from an open stats file.
-bool GetNextStatsLine(FILE* stats_file, char* line);
+// Get the longest sequence of skipped reference frames. This corresponds to the
+// longest freeze in the test video.
+int GetMaxSkippedFrames(const std::vector<Cluster>& clusters);
 
-// Calculates the size of a I420 frame if given the width and height.
-int GetI420FrameSize(int width, int height);
-
-// Extract the sequence of the frame in the video. I.e. if line is
-// frame_0023 0284, we will get 23.
-int ExtractFrameSequenceNumber(std::string line);
-
-// Checks if there is 'Barcode error' for the given line.
-bool IsThereBarcodeError(std::string line);
-
-// Extract the frame number in the reference video. I.e. if line is
-// frame_0023 0284, we will get 284.
-int ExtractDecodedFrameNumber(std::string line);
+// Get total number of skipped frames in the test video.
+int GetTotalNumberOfSkippedFrames(const std::vector<Cluster>& clusters);
 
 }  // namespace test
 }  // namespace webrtc
diff --git a/rtc_tools/frame_analyzer/video_quality_analysis_unittest.cc b/rtc_tools/frame_analyzer/video_quality_analysis_unittest.cc
index d9565b2..8980d15 100644
--- a/rtc_tools/frame_analyzer/video_quality_analysis_unittest.cc
+++ b/rtc_tools/frame_analyzer/video_quality_analysis_unittest.cc
@@ -22,6 +22,22 @@
 namespace webrtc {
 namespace test {
 
+namespace {
+
+void VerifyLogOutput(const std::string& log_filename,
+                     const std::vector<std::string>& expected_out) {
+  std::ifstream logf(log_filename);
+  std::string line;
+
+  std::size_t i;
+  for (i = 0; i < expected_out.size() && getline(logf, line); ++i) {
+    ASSERT_EQ(expected_out.at(i), line);
+  }
+  ASSERT_TRUE(i == expected_out.size()) << "Not enough input data";
+}
+
+}  // namespace
+
 // Setup a log file to write the output to instead of stdout because we don't
 // want those numbers to be picked up as perf numbers.
 class VideoQualityAnalysisTest : public ::testing::Test {
@@ -31,14 +47,9 @@
                                             "VideoQualityAnalysisTest.log");
     logfile_ = fopen(log_filename.c_str(), "w");
     ASSERT_TRUE(logfile_ != NULL);
-
-    stats_filename_ref_ = TempFilename(OutputPath(), "stats-1.txt");
-    stats_filename_ = TempFilename(OutputPath(), "stats-2.txt");
   }
   void TearDown() { ASSERT_EQ(0, fclose(logfile_)); }
   FILE* logfile_;
-  std::string stats_filename_ref_;
-  std::string stats_filename_;
 };
 
 TEST_F(VideoQualityAnalysisTest, PrintAnalysisResultsEmpty) {
@@ -60,92 +71,21 @@
   PrintAnalysisResults(logfile_, "ThreeFrames", &result);
 }
 
-TEST_F(VideoQualityAnalysisTest, GetMaxRepeatedAndSkippedFramesInvalidFile) {
-  ResultsContainer result;
-  remove(stats_filename_.c_str());
-  GetMaxRepeatedAndSkippedFrames(stats_filename_ref_, stats_filename_, &result);
-}
-
-TEST_F(VideoQualityAnalysisTest, GetMaxRepeatedAndSkippedFramesEmptyStatsFile) {
-  ResultsContainer result;
-  std::ofstream stats_file;
-  stats_file.open(stats_filename_ref_.c_str());
-  stats_file.close();
-  stats_file.open(stats_filename_.c_str());
-  stats_file.close();
-  GetMaxRepeatedAndSkippedFrames(stats_filename_ref_, stats_filename_, &result);
-}
-
-TEST_F(VideoQualityAnalysisTest, GetMaxRepeatedAndSkippedFramesNormalFile) {
-  ResultsContainer result;
-  std::ofstream stats_file;
-
-  stats_file.open(stats_filename_ref_.c_str());
-  stats_file << "frame_0001 0100\n";
-  stats_file << "frame_0002 0101\n";
-  stats_file << "frame_0003 0102\n";
-  stats_file << "frame_0004 0103\n";
-  stats_file << "frame_0005 0106\n";
-  stats_file << "frame_0006 0107\n";
-  stats_file << "frame_0007 0108\n";
-  stats_file.close();
-
-  stats_file.open(stats_filename_.c_str());
-  stats_file << "frame_0001 0100\n";
-  stats_file << "frame_0002 0101\n";
-  stats_file << "frame_0003 0101\n";
-  stats_file << "frame_0004 0106\n";
-  stats_file.close();
-
-  GetMaxRepeatedAndSkippedFrames(stats_filename_ref_, stats_filename_, &result);
-}
-
-namespace {
-void VerifyLogOutput(const std::string& log_filename,
-                     const std::vector<std::string>& expected_out) {
-  std::ifstream logf(log_filename);
-  std::string line;
-
-  std::size_t i;
-  for (i = 0; i < expected_out.size() && getline(logf, line); ++i) {
-    ASSERT_EQ(expected_out.at(i), line);
-  }
-  ASSERT_TRUE(i == expected_out.size()) << "Not enough input data";
-}
-}  // unnamed namespace
-
 TEST_F(VideoQualityAnalysisTest,
        PrintMaxRepeatedAndSkippedFramesSkippedFrames) {
   ResultsContainer result;
-  std::ofstream stats_file;
 
   std::string log_filename =
       TempFilename(webrtc::test::OutputPath(), "log.log");
   FILE* logfile = fopen(log_filename.c_str(), "w");
   ASSERT_TRUE(logfile != NULL);
-  stats_file.open(stats_filename_ref_.c_str());
-  stats_file << "frame_0001 0100\n";
-  stats_file << "frame_0002 0101\n";
-  stats_file << "frame_0002 0101\n";
-  stats_file << "frame_0003 0103\n";
-  stats_file << "frame_0004 0103\n";
-  stats_file << "frame_0005 0106\n";
-  stats_file << "frame_0006 0106\n";
-  stats_file << "frame_0007 0108\n";
-  stats_file << "frame_0008 0110\n";
-  stats_file << "frame_0009 0112\n";
-  stats_file.close();
 
-  stats_file.open(stats_filename_.c_str());
-  stats_file << "frame_0001 0101\n";
-  stats_file << "frame_0002 0101\n";
-  stats_file << "frame_0003 0101\n";
-  stats_file << "frame_0004 0108\n";
-  stats_file << "frame_0005 0108\n";
-  stats_file << "frame_0006 0112\n";
-  stats_file.close();
+  result.max_repeated_frames = 2;
+  result.max_skipped_frames = 2;
+  result.total_skipped_frames = 3;
+  result.decode_errors_ref = 0;
+  result.decode_errors_test = 0;
 
-  GetMaxRepeatedAndSkippedFrames(stats_filename_ref_, stats_filename_, &result);
   PrintAnalysisResults(logfile, "NormalStatsFile", &result);
   ASSERT_EQ(0, fclose(logfile));
 
@@ -161,35 +101,17 @@
 TEST_F(VideoQualityAnalysisTest,
        PrintMaxRepeatedAndSkippedFramesDecodeErrorInTest) {
   ResultsContainer result;
-  std::ofstream stats_file;
 
   std::string log_filename =
       TempFilename(webrtc::test::OutputPath(), "log.log");
   FILE* logfile = fopen(log_filename.c_str(), "w");
   ASSERT_TRUE(logfile != NULL);
-  stats_file.open(stats_filename_ref_.c_str());
-  stats_file << "frame_0001 0100\n";
-  stats_file << "frame_0002 0100\n";
-  stats_file << "frame_0002 0101\n";
-  stats_file << "frame_0003 0103\n";
-  stats_file << "frame_0004 0103\n";
-  stats_file << "frame_0005 0106\n";
-  stats_file << "frame_0006 0107\n";
-  stats_file << "frame_0007 0107\n";
-  stats_file << "frame_0008 0110\n";
-  stats_file << "frame_0009 0112\n";
-  stats_file.close();
 
-  stats_file.open(stats_filename_.c_str());
-  stats_file << "frame_0001 0101\n";
-  stats_file << "frame_0002 Barcode error\n";
-  stats_file << "frame_0003 Barcode error\n";
-  stats_file << "frame_0004 Barcode error\n";
-  stats_file << "frame_0005 0107\n";
-  stats_file << "frame_0006 0110\n";
-  stats_file.close();
-
-  GetMaxRepeatedAndSkippedFrames(stats_filename_ref_, stats_filename_, &result);
+  result.max_repeated_frames = 1;
+  result.max_skipped_frames = 0;
+  result.total_skipped_frames = 0;
+  result.decode_errors_ref = 0;
+  result.decode_errors_test = 3;
   PrintAnalysisResults(logfile, "NormalStatsFile", &result);
   ASSERT_EQ(0, fclose(logfile));
 
@@ -203,113 +125,61 @@
 }
 
 TEST_F(VideoQualityAnalysisTest, CalculateFrameClustersOneValue) {
-  std::ofstream stats_file;
+  const std::vector<Cluster> result = CalculateFrameClusters({1});
+  EXPECT_EQ(1u, result.size());
+  EXPECT_EQ(1u, result[0].index);
+  EXPECT_EQ(1, result[0].number_of_repeated_frames);
+}
 
-  stats_file.open(stats_filename_.c_str());
-  stats_file << "frame_0001 0101\n";
-  stats_file.close();
+TEST_F(VideoQualityAnalysisTest, GetMaxRepeatedFramesOneValue) {
+  EXPECT_EQ(1, GetMaxRepeatedFrames(CalculateFrameClusters({1})));
+}
 
-  FILE* stats_filef = fopen(stats_filename_.c_str(), "r");
-  ASSERT_TRUE(stats_filef != NULL);
+TEST_F(VideoQualityAnalysisTest, GetMaxSkippedFramesOneValue) {
+  EXPECT_EQ(0, GetMaxSkippedFrames(CalculateFrameClusters({1})));
+}
 
-  auto clusters = CalculateFrameClusters(stats_filef, nullptr);
-  ASSERT_EQ(0, fclose(stats_filef));
-  decltype(clusters) expected = {std::make_pair(101, 1)};
-  ASSERT_EQ(expected, clusters);
+TEST_F(VideoQualityAnalysisTest, GetTotalNumberOfSkippedFramesOneValue) {
+  EXPECT_EQ(0, GetTotalNumberOfSkippedFrames(CalculateFrameClusters({1})));
 }
 
 TEST_F(VideoQualityAnalysisTest, CalculateFrameClustersOneOneTwo) {
-  std::ofstream stats_file;
-
-  stats_file.open(stats_filename_.c_str());
-  stats_file << "frame_0001 0101\n";
-  stats_file << "frame_0002 0101\n";
-  stats_file << "frame_0003 0102\n";
-  stats_file.close();
-
-  FILE* stats_filef = fopen(stats_filename_.c_str(), "r");
-  ASSERT_TRUE(stats_filef != NULL);
-
-  auto clusters = CalculateFrameClusters(stats_filef, nullptr);
-  ASSERT_EQ(0, fclose(stats_filef));
-  decltype(clusters) expected = {std::make_pair(101, 2),
-                                 std::make_pair(102, 1)};
-  ASSERT_EQ(expected, clusters);
+  const std::vector<Cluster> result = CalculateFrameClusters({1, 1, 2});
+  EXPECT_EQ(2u, result.size());
+  EXPECT_EQ(1u, result[0].index);
+  EXPECT_EQ(2, result[0].number_of_repeated_frames);
+  EXPECT_EQ(2u, result[1].index);
+  EXPECT_EQ(1, result[1].number_of_repeated_frames);
 }
 
-TEST_F(VideoQualityAnalysisTest, CalculateFrameClustersOneOneErrErrThree) {
-  std::ofstream stats_file;
-
-  stats_file.open(stats_filename_.c_str());
-  stats_file << "frame_0001 0101\n";
-  stats_file << "frame_0002 0101\n";
-  stats_file << "frame_0003 Barcode error\n";
-  stats_file << "frame_0004 Barcode error\n";
-  stats_file << "frame_0005 0103\n";
-  stats_file.close();
-
-  FILE* stats_filef = fopen(stats_filename_.c_str(), "r");
-  ASSERT_TRUE(stats_filef != NULL);
-
-  auto clusters = CalculateFrameClusters(stats_filef, nullptr);
-  ASSERT_EQ(0, fclose(stats_filef));
-  decltype(clusters) expected = {std::make_pair(101, 2),
-                                 std::make_pair(DECODE_ERROR, 2),
-                                 std::make_pair(103, 1)};
-  ASSERT_EQ(expected, clusters);
+TEST_F(VideoQualityAnalysisTest, GetMaxRepeatedFramesOneOneTwo) {
+  EXPECT_EQ(2, GetMaxRepeatedFrames(CalculateFrameClusters({1, 1, 2})));
 }
 
-TEST_F(VideoQualityAnalysisTest, CalculateFrameClustersErrErr) {
-  std::ofstream stats_file;
-
-  stats_file.open(stats_filename_.c_str());
-  stats_file << "frame_0001 Barcode error\n";
-  stats_file << "frame_0002 Barcode error\n";
-  stats_file.close();
-
-  FILE* stats_filef = fopen(stats_filename_.c_str(), "r");
-  ASSERT_TRUE(stats_filef != NULL);
-
-  auto clusters = CalculateFrameClusters(stats_filef, nullptr);
-  ASSERT_EQ(0, fclose(stats_filef));
-  decltype(clusters) expected = {std::make_pair(DECODE_ERROR, 2)};
-  ASSERT_EQ(expected, clusters);
+TEST_F(VideoQualityAnalysisTest, GetMaxSkippedFramesOneOneTwo) {
+  EXPECT_EQ(0, GetMaxSkippedFrames(CalculateFrameClusters({1, 1, 2})));
 }
 
-TEST_F(VideoQualityAnalysisTest, CalculateFrameClustersOneOneErrErrOneOne) {
-  std::ofstream stats_file;
-
-  stats_file.open(stats_filename_.c_str());
-  stats_file << "frame_0001 0101\n";
-  stats_file << "frame_0002 0101\n";
-  stats_file << "frame_0003 Barcode error\n";
-  stats_file << "frame_0004 Barcode error\n";
-  stats_file << "frame_0005 0101\n";
-  stats_file << "frame_0006 0101\n";
-  stats_file.close();
-
-  FILE* stats_filef = fopen(stats_filename_.c_str(), "r");
-  ASSERT_TRUE(stats_filef != NULL);
-
-  auto clusters = CalculateFrameClusters(stats_filef, nullptr);
-  ASSERT_EQ(0, fclose(stats_filef));
-  decltype(clusters) expected = {std::make_pair(101, 6)};
-  ASSERT_EQ(expected, clusters);
+TEST_F(VideoQualityAnalysisTest, GetTotalNumberOfSkippedFramesOneOneTwo) {
+  EXPECT_EQ(0,
+            GetTotalNumberOfSkippedFrames(CalculateFrameClusters({1, 1, 2})));
 }
 
 TEST_F(VideoQualityAnalysisTest, CalculateFrameClustersEmpty) {
-  std::ofstream stats_file;
-
-  stats_file.open(stats_filename_.c_str());
-  stats_file.close();
-
-  FILE* stats_filef = fopen(stats_filename_.c_str(), "r");
-  ASSERT_TRUE(stats_filef != NULL);
-
-  auto clusters = CalculateFrameClusters(stats_filef, nullptr);
-  ASSERT_EQ(0, fclose(stats_filef));
-  decltype(clusters) expected;
-  ASSERT_EQ(expected, clusters);
+  EXPECT_TRUE(CalculateFrameClusters({}).empty());
 }
+
+TEST_F(VideoQualityAnalysisTest, GetMaxRepeatedFramesEmpty) {
+  EXPECT_EQ(0, GetMaxRepeatedFrames({}));
+}
+
+TEST_F(VideoQualityAnalysisTest, GetMaxSkippedFramesEmpty) {
+  EXPECT_EQ(0, GetMaxSkippedFrames({}));
+}
+
+TEST_F(VideoQualityAnalysisTest, GetTotalNumberOfSkippedFramesEmpty) {
+  EXPECT_EQ(0, GetTotalNumberOfSkippedFrames({}));
+}
+
 }  // namespace test
 }  // namespace webrtc
diff --git a/rtc_tools/video_file_reader.cc b/rtc_tools/video_file_reader.cc
index 67b0e24..e648031 100644
--- a/rtc_tools/video_file_reader.cc
+++ b/rtc_tools/video_file_reader.cc
@@ -273,12 +273,8 @@
                                            int height) {
   if (rtc::ends_with(file_name.c_str(), ".yuv"))
     return OpenYuvFile(file_name, width, height);
-  if (rtc::ends_with(file_name.c_str(), ".y4m")) {
-    rtc::scoped_refptr<Video> video = OpenY4mFile(file_name);
-    RTC_DCHECK_EQ(width, video->width());
-    RTC_DCHECK_EQ(height, video->height());
-    return video;
-  }
+  if (rtc::ends_with(file_name.c_str(), ".y4m"))
+    return OpenY4mFile(file_name);
 
   RTC_LOG(LS_ERROR) << "Video file does not end in either .yuv or .y4m: "
                     << file_name;