[rtc_tools/video_encoder] Output ivf for all SVC decode targets

This CL extracts the ivf file writer from `TestEncodedImageCallback`
into separate .cc|.h files. Improve the `EncodedImageFileWriter` to
support SVC that output ivf for all decode targets.

EXAMPLE: Encode with VP9 L3T3_KEY, the outputs:
output-VP9-L3T3_KEY-L0T0.ivf
output-VP9-L3T3_KEY-L0T1.ivf
output-VP9-L3T3_KEY-L0T2.ivf
output-VP9-L3T3_KEY-L1T0.ivf
output-VP9-L3T3_KEY-L1T1.ivf
output-VP9-L3T3_KEY-L1T2.ivf
output-VP9-L3T3_KEY-L2T0.ivf
output-VP9-L3T3_KEY-L2T1.ivf
output-VP9-L3T3_KEY-L2T2.ivf

Bug: webrtc:15210
Change-Id: Iba46c897a7b783bb4b79ec18715e901476cb9f55
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/309280
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Commit-Queue: Jianhui J Dai <jianhui.j.dai@intel.com>
Cr-Commit-Position: refs/heads/main@{#40363}
diff --git a/rtc_tools/BUILD.gn b/rtc_tools/BUILD.gn
index b324438..517a739 100644
--- a/rtc_tools/BUILD.gn
+++ b/rtc_tools/BUILD.gn
@@ -428,7 +428,11 @@
   rtc_executable("video_encoder") {
     visibility = [ "*" ]
     testonly = true
-    sources = [ "video_encoder/video_encoder.cc" ]
+    sources = [
+      "video_encoder/encoded_image_file_writer.cc",
+      "video_encoder/encoded_image_file_writer.h",
+      "video_encoder/video_encoder.cc",
+    ]
     deps = [
       "//api:create_frame_generator",
       "//api:frame_generator_api",
diff --git a/rtc_tools/DEPS b/rtc_tools/DEPS
index 2a06bf0..f62653d 100644
--- a/rtc_tools/DEPS
+++ b/rtc_tools/DEPS
@@ -37,6 +37,10 @@
     "+modules/video_coding/codecs/av1/av1_svc_config.h",
     "+modules/video_coding/include/video_codec_interface.h",
     "+modules/video_coding/svc/scalability_mode_util.h",
+  ],
+  ".*encoded_image_file_writer\.(cc|h)": [
+    "+modules/video_coding/include/video_codec_interface.h",
+    "+modules/video_coding/svc/scalability_mode_util.h",
     "+modules/video_coding/utility/ivf_file_writer.h",
   ],
 }
diff --git a/rtc_tools/video_encoder/encoded_image_file_writer.cc b/rtc_tools/video_encoder/encoded_image_file_writer.cc
new file mode 100644
index 0000000..624bce3
--- /dev/null
+++ b/rtc_tools/video_encoder/encoded_image_file_writer.cc
@@ -0,0 +1,120 @@
+/*
+ *  Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "rtc_tools/video_encoder/encoded_image_file_writer.h"
+
+#include "modules/video_coding/svc/scalability_mode_util.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace test {
+
+EncodedImageFileWriter::EncodedImageFileWriter(
+    const VideoCodec& video_codec_setting)
+    : video_codec_setting_(video_codec_setting) {
+  const char* codec_string =
+      CodecTypeToPayloadString(video_codec_setting.codecType);
+
+  // Retrieve scalability mode information.
+  absl::optional<ScalabilityMode> scalability_mode =
+      video_codec_setting.GetScalabilityMode();
+  RTC_CHECK(scalability_mode);
+  spatial_layers_ = ScalabilityModeToNumSpatialLayers(*scalability_mode);
+  temporal_layers_ = ScalabilityModeToNumTemporalLayers(*scalability_mode);
+  inter_layer_pred_mode_ =
+      ScalabilityModeToInterLayerPredMode(*scalability_mode);
+
+  RTC_CHECK_GT(spatial_layers_, 0);
+  RTC_CHECK_GT(temporal_layers_, 0);
+  // Create writer for every decode target.
+  for (int i = 0; i < spatial_layers_; ++i) {
+    for (int j = 0; j < temporal_layers_; ++j) {
+      char buffer[256];
+      rtc::SimpleStringBuilder name(buffer);
+      name << "output-" << codec_string << "-"
+           << ScalabilityModeToString(*scalability_mode) << "-L" << i << "T"
+           << j << ".ivf";
+
+      decode_target_writers_.emplace_back(std::make_pair(
+          IvfFileWriter::Wrap(FileWrapper::OpenWriteOnly(name.str()), 0),
+          name.str()));
+    }
+  }
+}
+
+EncodedImageFileWriter::~EncodedImageFileWriter() {
+  for (size_t i = 0; i < decode_target_writers_.size(); ++i) {
+    decode_target_writers_[i].first->Close();
+    RTC_LOG(LS_INFO) << "Written: " << decode_target_writers_[i].second;
+  }
+}
+
+int EncodedImageFileWriter::Write(const EncodedImage& encoded_image) {
+  // L1T1 does not set `SpatialIndex` and `TemporalIndex` in `EncodedImage`.
+  const int spatial_index = encoded_image.SpatialIndex().value_or(0);
+  const int temporal_index = encoded_image.TemporalIndex().value_or(0);
+  RTC_CHECK_LT(spatial_index, spatial_layers_);
+  RTC_CHECK_LT(temporal_index, temporal_layers_);
+
+  if (spatial_index == 0) {
+    is_base_layer_key_frame =
+        (encoded_image._frameType == VideoFrameType::kVideoFrameKey);
+  }
+
+  switch (inter_layer_pred_mode_) {
+    case InterLayerPredMode::kOff: {
+      // Write to this spatial layer.
+      for (int j = temporal_index; j < temporal_layers_; ++j) {
+        const int index = spatial_index * temporal_layers_ + j;
+        RTC_CHECK_LT(index, decode_target_writers_.size());
+
+        decode_target_writers_[index].first->WriteFrame(
+            encoded_image, video_codec_setting_.codecType);
+      }
+      break;
+    }
+
+    case InterLayerPredMode::kOn: {
+      // Write to this and higher spatial layers.
+      for (int i = spatial_index; i < spatial_layers_; ++i) {
+        for (int j = temporal_index; j < temporal_layers_; ++j) {
+          const int index = i * temporal_layers_ + j;
+          RTC_CHECK_LT(index, decode_target_writers_.size());
+
+          decode_target_writers_[index].first->WriteFrame(
+              encoded_image, video_codec_setting_.codecType);
+        }
+      }
+      break;
+    }
+
+    case InterLayerPredMode::kOnKeyPic: {
+      for (int i = spatial_index; i < spatial_layers_; ++i) {
+        for (int j = temporal_index; j < temporal_layers_; ++j) {
+          const int index = i * temporal_layers_ + j;
+          RTC_CHECK_LT(index, decode_target_writers_.size());
+
+          decode_target_writers_[index].first->WriteFrame(
+              encoded_image, video_codec_setting_.codecType);
+        }
+
+        // Write to higher spatial layers only if key frame.
+        if (!is_base_layer_key_frame) {
+          break;
+        }
+      }
+      break;
+    }
+  }
+
+  return 0;
+}
+
+}  // namespace test
+}  // namespace webrtc
diff --git a/rtc_tools/video_encoder/encoded_image_file_writer.h b/rtc_tools/video_encoder/encoded_image_file_writer.h
new file mode 100644
index 0000000..abe01b6
--- /dev/null
+++ b/rtc_tools/video_encoder/encoded_image_file_writer.h
@@ -0,0 +1,51 @@
+/*
+ *  Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef RTC_TOOLS_VIDEO_ENCODER_ENCODED_IMAGE_FILE_WRITER_H_
+#define RTC_TOOLS_VIDEO_ENCODER_ENCODED_IMAGE_FILE_WRITER_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "modules/video_coding/include/video_codec_interface.h"
+#include "modules/video_coding/utility/ivf_file_writer.h"
+
+namespace webrtc {
+namespace test {
+
+// The `EncodedImageFileWriter` writes the `EncodedImage` into ivf output. It
+// supports SVC to output ivf for all decode targets.
+class EncodedImageFileWriter final {
+  // The pair of writer and output file name.
+  using IvfWriterPair = std::pair<std::unique_ptr<IvfFileWriter>, std::string>;
+
+ public:
+  explicit EncodedImageFileWriter(const VideoCodec& video_codec_setting);
+
+  ~EncodedImageFileWriter();
+
+  int Write(const EncodedImage& encoded_image);
+
+ private:
+  VideoCodec video_codec_setting_;
+
+  int spatial_layers_ = 0;
+  int temporal_layers_ = 0;
+  InterLayerPredMode inter_layer_pred_mode_ = InterLayerPredMode::kOff;
+
+  bool is_base_layer_key_frame = false;
+  std::vector<IvfWriterPair> decode_target_writers_;
+};
+
+}  // namespace test
+}  // namespace webrtc
+
+#endif  // RTC_TOOLS_VIDEO_ENCODER_ENCODED_IMAGE_FILE_WRITER_H_
diff --git a/rtc_tools/video_encoder/video_encoder.cc b/rtc_tools/video_encoder/video_encoder.cc
index 4c8835c..fe2c5b1 100644
--- a/rtc_tools/video_encoder/video_encoder.cc
+++ b/rtc_tools/video_encoder/video_encoder.cc
@@ -7,9 +7,6 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
-
-#include <stdlib.h>
-
 #include <string>
 
 #include "absl/flags/flag.h"
@@ -22,8 +19,8 @@
 #include "modules/video_coding/codecs/av1/av1_svc_config.h"
 #include "modules/video_coding/include/video_codec_interface.h"
 #include "modules/video_coding/svc/scalability_mode_util.h"
-#include "modules/video_coding/utility/ivf_file_writer.h"
 #include "rtc_base/logging.h"
+#include "rtc_tools/video_encoder/encoded_image_file_writer.h"
 
 ABSL_FLAG(std::string,
           video_codec,
@@ -153,110 +150,42 @@
 }
 
 // Wrapper of `EncodedImageCallback` that writes all encoded images into ivf
-// output. Each spatial layer has separated output including all its dependant
-// layers.
-class EncodedImageFileWriter : public EncodedImageCallback {
-  using TestIvfWriter = std::pair<std::unique_ptr<IvfFileWriter>, std::string>;
-
+// files through `test::EncodedImageFileWriter`.
+class TestEncodedImageCallback final : public EncodedImageCallback {
  public:
-  explicit EncodedImageFileWriter(const VideoCodec& video_codec_setting)
+  explicit TestEncodedImageCallback(const VideoCodec& video_codec_setting)
       : video_codec_setting_(video_codec_setting) {
-    const char* codec_string =
-        CodecTypeToPayloadString(video_codec_setting.codecType);
-
-    // Retrieve scalability mode information.
-    absl::optional<ScalabilityMode> scalability_mode =
-        video_codec_setting.GetScalabilityMode();
-    RTC_CHECK(scalability_mode);
-    spatial_layers_ = ScalabilityModeToNumSpatialLayers(*scalability_mode);
-    inter_layer_pred_mode_ =
-        ScalabilityModeToInterLayerPredMode(*scalability_mode);
-
-    RTC_CHECK_GT(spatial_layers_, 0);
-    // Create writer for every spatial layer with the "-Lx" postfix.
-    for (int i = 0; i < spatial_layers_; ++i) {
-      char buffer[256];
-      rtc::SimpleStringBuilder name(buffer);
-      name << "output-" << codec_string << "-"
-           << ScalabilityModeToString(*scalability_mode) << "-L" << i << ".ivf";
-
-      writers_.emplace_back(std::make_pair(
-          IvfFileWriter::Wrap(FileWrapper::OpenWriteOnly(name.str()), 0),
-          name.str()));
-    }
+    writer_ =
+        std::make_unique<test::EncodedImageFileWriter>(video_codec_setting);
   }
 
-  ~EncodedImageFileWriter() override {
-    for (size_t i = 0; i < writers_.size(); ++i) {
-      writers_[i].first->Close();
-      RTC_LOG(LS_INFO) << "Written: " << writers_[i].second;
-    }
-  }
+  ~TestEncodedImageCallback() = default;
 
  private:
   Result OnEncodedImage(const EncodedImage& encoded_image,
                         const CodecSpecificInfo* codec_specific_info) override {
-    RTC_CHECK(codec_specific_info);
-
-    ++frames_;
     RTC_LOG(LS_VERBOSE) << "frame " << frames_ << ": {"
                         << ToString(encoded_image)
                         << "}, codec_specific_info: {"
                         << ToString(*codec_specific_info) << "}";
 
-    if (spatial_layers_ == 1) {
-      // Single spatial layer stream.
-      RTC_CHECK_EQ(writers_.size(), 1);
-      RTC_CHECK(!encoded_image.SpatialIndex() ||
-                *encoded_image.SpatialIndex() == 0);
-      writers_[0].first->WriteFrame(encoded_image,
-                                    video_codec_setting_.codecType);
-    } else {
-      // Multiple spatial layers stream.
-      RTC_CHECK_GT(spatial_layers_, 1);
-      RTC_CHECK_GT(writers_.size(), 1);
-      RTC_CHECK(encoded_image.SpatialIndex());
-      int index = *encoded_image.SpatialIndex();
+    RTC_CHECK(writer_);
+    writer_->Write(encoded_image);
 
-      RTC_CHECK_LT(index, writers_.size());
-      switch (inter_layer_pred_mode_) {
-        case InterLayerPredMode::kOff:
-          writers_[index].first->WriteFrame(encoded_image,
-                                            video_codec_setting_.codecType);
-          break;
-
-        case InterLayerPredMode::kOn:
-          // Write the encoded image into this layer and higher spatial layers.
-          for (size_t i = index; i < writers_.size(); ++i) {
-            writers_[i].first->WriteFrame(encoded_image,
-                                          video_codec_setting_.codecType);
-          }
-          break;
-
-        case InterLayerPredMode::kOnKeyPic:
-          // Write the encoded image into this layer.
-          writers_[index].first->WriteFrame(encoded_image,
-                                            video_codec_setting_.codecType);
-          // If this is key frame, write to higher spatial layers as well.
-          if (encoded_image._frameType == VideoFrameType::kVideoFrameKey) {
-            for (size_t i = index + 1; i < writers_.size(); ++i) {
-              writers_[i].first->WriteFrame(encoded_image,
-                                            video_codec_setting_.codecType);
-            }
-          }
-          break;
-      }
+    RTC_CHECK(codec_specific_info);
+    // For SVC, every picture generates multiple encoded images of different
+    // spatial layers.
+    if (codec_specific_info->end_of_picture) {
+      ++frames_;
     }
 
     return Result(Result::Error::OK);
   }
 
-  VideoCodec video_codec_setting_ = {};
-  int spatial_layers_ = 0;
-  InterLayerPredMode inter_layer_pred_mode_ = InterLayerPredMode::kOff;
-
-  std::vector<TestIvfWriter> writers_;
+  VideoCodec video_codec_setting_;
   int32_t frames_ = 0;
+
+  std::unique_ptr<test::EncodedImageFileWriter> writer_;
 };
 
 // Wrapper of `BuiltinVideoEncoderFactory`.
@@ -267,7 +196,7 @@
     RTC_CHECK(builtin_video_encoder_factory_);
   }
 
-  ~TestVideoEncoderFactoryWrapper() {}
+  ~TestVideoEncoderFactoryWrapper() = default;
 
   void ListSupportedFormats() const {
     // Log all supported formats.
@@ -570,12 +499,14 @@
           video_codec_setting);
   RTC_CHECK(video_encoder);
 
-  // Create `EncodedImageFileWriter`.
-  std::unique_ptr<webrtc::EncodedImageFileWriter> encoded_image_file_writer =
-      std::make_unique<webrtc::EncodedImageFileWriter>(video_codec_setting);
-  RTC_CHECK(encoded_image_file_writer);
+  // Create `TestEncodedImageCallback`.
+  std::unique_ptr<webrtc::TestEncodedImageCallback>
+      test_encoded_image_callback =
+          std::make_unique<webrtc::TestEncodedImageCallback>(
+              video_codec_setting);
+  RTC_CHECK(test_encoded_image_callback);
   int ret = video_encoder->RegisterEncodeCompleteCallback(
-      encoded_image_file_writer.get());
+      test_encoded_image_callback.get());
   RTC_CHECK_EQ(ret, WEBRTC_VIDEO_CODEC_OK);
 
   // Start to encode frames.