[rtc_tools/video_encoder] Output ivf for all SVC decode targets
This CL extracts the ivf file writer from `TestEncodedImageCallback`
into separate .cc|.h files. Improve the `EncodedImageFileWriter` to
support SVC that output ivf for all decode targets.
EXAMPLE: Encode with VP9 L3T3_KEY, the outputs:
output-VP9-L3T3_KEY-L0T0.ivf
output-VP9-L3T3_KEY-L0T1.ivf
output-VP9-L3T3_KEY-L0T2.ivf
output-VP9-L3T3_KEY-L1T0.ivf
output-VP9-L3T3_KEY-L1T1.ivf
output-VP9-L3T3_KEY-L1T2.ivf
output-VP9-L3T3_KEY-L2T0.ivf
output-VP9-L3T3_KEY-L2T1.ivf
output-VP9-L3T3_KEY-L2T2.ivf
Bug: webrtc:15210
Change-Id: Iba46c897a7b783bb4b79ec18715e901476cb9f55
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/309280
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Commit-Queue: Jianhui J Dai <jianhui.j.dai@intel.com>
Cr-Commit-Position: refs/heads/main@{#40363}
diff --git a/rtc_tools/BUILD.gn b/rtc_tools/BUILD.gn
index b324438..517a739 100644
--- a/rtc_tools/BUILD.gn
+++ b/rtc_tools/BUILD.gn
@@ -428,7 +428,11 @@
rtc_executable("video_encoder") {
visibility = [ "*" ]
testonly = true
- sources = [ "video_encoder/video_encoder.cc" ]
+ sources = [
+ "video_encoder/encoded_image_file_writer.cc",
+ "video_encoder/encoded_image_file_writer.h",
+ "video_encoder/video_encoder.cc",
+ ]
deps = [
"//api:create_frame_generator",
"//api:frame_generator_api",
diff --git a/rtc_tools/DEPS b/rtc_tools/DEPS
index 2a06bf0..f62653d 100644
--- a/rtc_tools/DEPS
+++ b/rtc_tools/DEPS
@@ -37,6 +37,10 @@
"+modules/video_coding/codecs/av1/av1_svc_config.h",
"+modules/video_coding/include/video_codec_interface.h",
"+modules/video_coding/svc/scalability_mode_util.h",
+ ],
+ ".*encoded_image_file_writer\.(cc|h)": [
+ "+modules/video_coding/include/video_codec_interface.h",
+ "+modules/video_coding/svc/scalability_mode_util.h",
"+modules/video_coding/utility/ivf_file_writer.h",
],
}
diff --git a/rtc_tools/video_encoder/encoded_image_file_writer.cc b/rtc_tools/video_encoder/encoded_image_file_writer.cc
new file mode 100644
index 0000000..624bce3
--- /dev/null
+++ b/rtc_tools/video_encoder/encoded_image_file_writer.cc
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "rtc_tools/video_encoder/encoded_image_file_writer.h"
+
+#include "modules/video_coding/svc/scalability_mode_util.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace test {
+
+EncodedImageFileWriter::EncodedImageFileWriter(
+ const VideoCodec& video_codec_setting)
+ : video_codec_setting_(video_codec_setting) {
+ const char* codec_string =
+ CodecTypeToPayloadString(video_codec_setting.codecType);
+
+ // Retrieve scalability mode information.
+ absl::optional<ScalabilityMode> scalability_mode =
+ video_codec_setting.GetScalabilityMode();
+ RTC_CHECK(scalability_mode);
+ spatial_layers_ = ScalabilityModeToNumSpatialLayers(*scalability_mode);
+ temporal_layers_ = ScalabilityModeToNumTemporalLayers(*scalability_mode);
+ inter_layer_pred_mode_ =
+ ScalabilityModeToInterLayerPredMode(*scalability_mode);
+
+ RTC_CHECK_GT(spatial_layers_, 0);
+ RTC_CHECK_GT(temporal_layers_, 0);
+ // Create writer for every decode target.
+ for (int i = 0; i < spatial_layers_; ++i) {
+ for (int j = 0; j < temporal_layers_; ++j) {
+ char buffer[256];
+ rtc::SimpleStringBuilder name(buffer);
+ name << "output-" << codec_string << "-"
+ << ScalabilityModeToString(*scalability_mode) << "-L" << i << "T"
+ << j << ".ivf";
+
+ decode_target_writers_.emplace_back(std::make_pair(
+ IvfFileWriter::Wrap(FileWrapper::OpenWriteOnly(name.str()), 0),
+ name.str()));
+ }
+ }
+}
+
+EncodedImageFileWriter::~EncodedImageFileWriter() {
+ for (size_t i = 0; i < decode_target_writers_.size(); ++i) {
+ decode_target_writers_[i].first->Close();
+ RTC_LOG(LS_INFO) << "Written: " << decode_target_writers_[i].second;
+ }
+}
+
+int EncodedImageFileWriter::Write(const EncodedImage& encoded_image) {
+ // L1T1 does not set `SpatialIndex` and `TemporalIndex` in `EncodedImage`.
+ const int spatial_index = encoded_image.SpatialIndex().value_or(0);
+ const int temporal_index = encoded_image.TemporalIndex().value_or(0);
+ RTC_CHECK_LT(spatial_index, spatial_layers_);
+ RTC_CHECK_LT(temporal_index, temporal_layers_);
+
+ if (spatial_index == 0) {
+ is_base_layer_key_frame =
+ (encoded_image._frameType == VideoFrameType::kVideoFrameKey);
+ }
+
+ switch (inter_layer_pred_mode_) {
+ case InterLayerPredMode::kOff: {
+ // Write to this spatial layer.
+ for (int j = temporal_index; j < temporal_layers_; ++j) {
+ const int index = spatial_index * temporal_layers_ + j;
+ RTC_CHECK_LT(index, decode_target_writers_.size());
+
+ decode_target_writers_[index].first->WriteFrame(
+ encoded_image, video_codec_setting_.codecType);
+ }
+ break;
+ }
+
+ case InterLayerPredMode::kOn: {
+ // Write to this and higher spatial layers.
+ for (int i = spatial_index; i < spatial_layers_; ++i) {
+ for (int j = temporal_index; j < temporal_layers_; ++j) {
+ const int index = i * temporal_layers_ + j;
+ RTC_CHECK_LT(index, decode_target_writers_.size());
+
+ decode_target_writers_[index].first->WriteFrame(
+ encoded_image, video_codec_setting_.codecType);
+ }
+ }
+ break;
+ }
+
+ case InterLayerPredMode::kOnKeyPic: {
+ for (int i = spatial_index; i < spatial_layers_; ++i) {
+ for (int j = temporal_index; j < temporal_layers_; ++j) {
+ const int index = i * temporal_layers_ + j;
+ RTC_CHECK_LT(index, decode_target_writers_.size());
+
+ decode_target_writers_[index].first->WriteFrame(
+ encoded_image, video_codec_setting_.codecType);
+ }
+
+ // Write to higher spatial layers only if key frame.
+ if (!is_base_layer_key_frame) {
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+} // namespace test
+} // namespace webrtc
diff --git a/rtc_tools/video_encoder/encoded_image_file_writer.h b/rtc_tools/video_encoder/encoded_image_file_writer.h
new file mode 100644
index 0000000..abe01b6
--- /dev/null
+++ b/rtc_tools/video_encoder/encoded_image_file_writer.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef RTC_TOOLS_VIDEO_ENCODER_ENCODED_IMAGE_FILE_WRITER_H_
+#define RTC_TOOLS_VIDEO_ENCODER_ENCODED_IMAGE_FILE_WRITER_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "modules/video_coding/include/video_codec_interface.h"
+#include "modules/video_coding/utility/ivf_file_writer.h"
+
+namespace webrtc {
+namespace test {
+
+// The `EncodedImageFileWriter` writes the `EncodedImage` into ivf output. It
+// supports SVC to output ivf for all decode targets.
+class EncodedImageFileWriter final {
+ // The pair of writer and output file name.
+ using IvfWriterPair = std::pair<std::unique_ptr<IvfFileWriter>, std::string>;
+
+ public:
+ explicit EncodedImageFileWriter(const VideoCodec& video_codec_setting);
+
+ ~EncodedImageFileWriter();
+
+ int Write(const EncodedImage& encoded_image);
+
+ private:
+ VideoCodec video_codec_setting_;
+
+ int spatial_layers_ = 0;
+ int temporal_layers_ = 0;
+ InterLayerPredMode inter_layer_pred_mode_ = InterLayerPredMode::kOff;
+
+ bool is_base_layer_key_frame = false;
+ std::vector<IvfWriterPair> decode_target_writers_;
+};
+
+} // namespace test
+} // namespace webrtc
+
+#endif // RTC_TOOLS_VIDEO_ENCODER_ENCODED_IMAGE_FILE_WRITER_H_
diff --git a/rtc_tools/video_encoder/video_encoder.cc b/rtc_tools/video_encoder/video_encoder.cc
index 4c8835c..fe2c5b1 100644
--- a/rtc_tools/video_encoder/video_encoder.cc
+++ b/rtc_tools/video_encoder/video_encoder.cc
@@ -7,9 +7,6 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-
-#include <stdlib.h>
-
#include <string>
#include "absl/flags/flag.h"
@@ -22,8 +19,8 @@
#include "modules/video_coding/codecs/av1/av1_svc_config.h"
#include "modules/video_coding/include/video_codec_interface.h"
#include "modules/video_coding/svc/scalability_mode_util.h"
-#include "modules/video_coding/utility/ivf_file_writer.h"
#include "rtc_base/logging.h"
+#include "rtc_tools/video_encoder/encoded_image_file_writer.h"
ABSL_FLAG(std::string,
video_codec,
@@ -153,110 +150,42 @@
}
// Wrapper of `EncodedImageCallback` that writes all encoded images into ivf
-// output. Each spatial layer has separated output including all its dependant
-// layers.
-class EncodedImageFileWriter : public EncodedImageCallback {
- using TestIvfWriter = std::pair<std::unique_ptr<IvfFileWriter>, std::string>;
-
+// files through `test::EncodedImageFileWriter`.
+class TestEncodedImageCallback final : public EncodedImageCallback {
public:
- explicit EncodedImageFileWriter(const VideoCodec& video_codec_setting)
+ explicit TestEncodedImageCallback(const VideoCodec& video_codec_setting)
: video_codec_setting_(video_codec_setting) {
- const char* codec_string =
- CodecTypeToPayloadString(video_codec_setting.codecType);
-
- // Retrieve scalability mode information.
- absl::optional<ScalabilityMode> scalability_mode =
- video_codec_setting.GetScalabilityMode();
- RTC_CHECK(scalability_mode);
- spatial_layers_ = ScalabilityModeToNumSpatialLayers(*scalability_mode);
- inter_layer_pred_mode_ =
- ScalabilityModeToInterLayerPredMode(*scalability_mode);
-
- RTC_CHECK_GT(spatial_layers_, 0);
- // Create writer for every spatial layer with the "-Lx" postfix.
- for (int i = 0; i < spatial_layers_; ++i) {
- char buffer[256];
- rtc::SimpleStringBuilder name(buffer);
- name << "output-" << codec_string << "-"
- << ScalabilityModeToString(*scalability_mode) << "-L" << i << ".ivf";
-
- writers_.emplace_back(std::make_pair(
- IvfFileWriter::Wrap(FileWrapper::OpenWriteOnly(name.str()), 0),
- name.str()));
- }
+ writer_ =
+ std::make_unique<test::EncodedImageFileWriter>(video_codec_setting);
}
- ~EncodedImageFileWriter() override {
- for (size_t i = 0; i < writers_.size(); ++i) {
- writers_[i].first->Close();
- RTC_LOG(LS_INFO) << "Written: " << writers_[i].second;
- }
- }
+ ~TestEncodedImageCallback() = default;
private:
Result OnEncodedImage(const EncodedImage& encoded_image,
const CodecSpecificInfo* codec_specific_info) override {
- RTC_CHECK(codec_specific_info);
-
- ++frames_;
RTC_LOG(LS_VERBOSE) << "frame " << frames_ << ": {"
<< ToString(encoded_image)
<< "}, codec_specific_info: {"
<< ToString(*codec_specific_info) << "}";
- if (spatial_layers_ == 1) {
- // Single spatial layer stream.
- RTC_CHECK_EQ(writers_.size(), 1);
- RTC_CHECK(!encoded_image.SpatialIndex() ||
- *encoded_image.SpatialIndex() == 0);
- writers_[0].first->WriteFrame(encoded_image,
- video_codec_setting_.codecType);
- } else {
- // Multiple spatial layers stream.
- RTC_CHECK_GT(spatial_layers_, 1);
- RTC_CHECK_GT(writers_.size(), 1);
- RTC_CHECK(encoded_image.SpatialIndex());
- int index = *encoded_image.SpatialIndex();
+ RTC_CHECK(writer_);
+ writer_->Write(encoded_image);
- RTC_CHECK_LT(index, writers_.size());
- switch (inter_layer_pred_mode_) {
- case InterLayerPredMode::kOff:
- writers_[index].first->WriteFrame(encoded_image,
- video_codec_setting_.codecType);
- break;
-
- case InterLayerPredMode::kOn:
- // Write the encoded image into this layer and higher spatial layers.
- for (size_t i = index; i < writers_.size(); ++i) {
- writers_[i].first->WriteFrame(encoded_image,
- video_codec_setting_.codecType);
- }
- break;
-
- case InterLayerPredMode::kOnKeyPic:
- // Write the encoded image into this layer.
- writers_[index].first->WriteFrame(encoded_image,
- video_codec_setting_.codecType);
- // If this is key frame, write to higher spatial layers as well.
- if (encoded_image._frameType == VideoFrameType::kVideoFrameKey) {
- for (size_t i = index + 1; i < writers_.size(); ++i) {
- writers_[i].first->WriteFrame(encoded_image,
- video_codec_setting_.codecType);
- }
- }
- break;
- }
+ RTC_CHECK(codec_specific_info);
+ // For SVC, every picture generates multiple encoded images of different
+ // spatial layers.
+ if (codec_specific_info->end_of_picture) {
+ ++frames_;
}
return Result(Result::Error::OK);
}
- VideoCodec video_codec_setting_ = {};
- int spatial_layers_ = 0;
- InterLayerPredMode inter_layer_pred_mode_ = InterLayerPredMode::kOff;
-
- std::vector<TestIvfWriter> writers_;
+ VideoCodec video_codec_setting_;
int32_t frames_ = 0;
+
+ std::unique_ptr<test::EncodedImageFileWriter> writer_;
};
// Wrapper of `BuiltinVideoEncoderFactory`.
@@ -267,7 +196,7 @@
RTC_CHECK(builtin_video_encoder_factory_);
}
- ~TestVideoEncoderFactoryWrapper() {}
+ ~TestVideoEncoderFactoryWrapper() = default;
void ListSupportedFormats() const {
// Log all supported formats.
@@ -570,12 +499,14 @@
video_codec_setting);
RTC_CHECK(video_encoder);
- // Create `EncodedImageFileWriter`.
- std::unique_ptr<webrtc::EncodedImageFileWriter> encoded_image_file_writer =
- std::make_unique<webrtc::EncodedImageFileWriter>(video_codec_setting);
- RTC_CHECK(encoded_image_file_writer);
+ // Create `TestEncodedImageCallback`.
+ std::unique_ptr<webrtc::TestEncodedImageCallback>
+ test_encoded_image_callback =
+ std::make_unique<webrtc::TestEncodedImageCallback>(
+ video_codec_setting);
+ RTC_CHECK(test_encoded_image_callback);
int ret = video_encoder->RegisterEncodeCompleteCallback(
- encoded_image_file_writer.get());
+ test_encoded_image_callback.get());
RTC_CHECK_EQ(ret, WEBRTC_VIDEO_CODEC_OK);
// Start to encode frames.