api/video_codecs/simple_encoder_wrapper.cc - src/ - Git at Google

 /*
  *  Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include "api/video_codecs/simple_encoder_wrapper.h"

 #include <algorithm>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>

 #include "absl/algorithm/container.h"
 #include "absl/types/variant.h"
 #include "api/video_codecs/scalability_mode.h"
 #include "api/video_codecs/scalability_mode_helper.h"
 #include "api/video_codecs/video_encoder_factory_interface.h"
 #include "api/video_codecs/video_encoder_interface.h"
 #include "modules/video_coding/svc/create_scalability_structure.h"

 namespace webrtc {
 using PredictionConstraints =
     VideoEncoderFactoryInterface::Capabilities::PredictionConstraints;
 using FrameEncodeSettings = VideoEncoderInterface::FrameEncodeSettings;

 namespace {
 enum class Inter { kS, kL, kKey };
 enum class Scaling { k1_2, k2_3 };
 std::string SvcToString(int spatial_layers,
                         int temporal_layers,
                         Inter inter,
                         Scaling scaling) {
   RTC_CHECK(spatial_layers > 1 || inter == Inter::kL);
   std::string res;
   res += inter == Inter::kS ? "S" : "L";
   res += std::to_string(spatial_layers);
   res += "T";
   res += std::to_string(temporal_layers);
   if (scaling == Scaling::k2_3) {
     res += "h";
   }
   if (inter == Inter::kKey) {
     res += "_KEY";
   }

   return res;
 }
 }  // namespace

 // static
 std::vector<std::string> SimpleEncoderWrapper::SupportedWebrtcSvcModes(
     const PredictionConstraints& prediction_constraints) {
   std::vector<std::string> res;

   const int max_spatial_layers =
       std::min(3, prediction_constraints.max_spatial_layers);
   const int max_temporal_layers =
       std::min(3, prediction_constraints.max_temporal_layers);
   const bool scale_by_half = absl::c_linear_search(
       prediction_constraints.scaling_factors, Rational{1, 2});
   const bool scale_by_two_thirds = absl::c_linear_search(
       prediction_constraints.scaling_factors, Rational{2, 3});
   const bool inter_layer =
       prediction_constraints.max_references > 1 &&
       prediction_constraints.buffer_space_type !=
           PredictionConstraints::BufferSpaceType::kMultiInstance;

   for (int s = 1; s <= max_spatial_layers; ++s) {
     for (int t = 1; t <= max_temporal_layers; ++t) {
       if (prediction_constraints.num_buffers > ((std::max(1, t - 1) * s) - 1)) {
         if (s == 1 || inter_layer) {
           res.push_back(SvcToString(s, t, Inter::kL, Scaling::k1_2));
           if (s == 1) {
             continue;
           }
         }
         if (scale_by_half) {
           res.push_back(SvcToString(s, t, Inter::kS, Scaling::k1_2));
           if (inter_layer) {
             res.push_back(SvcToString(s, t, Inter::kKey, Scaling::k1_2));
           }
         }
         if (scale_by_two_thirds) {
           res.push_back(SvcToString(s, t, Inter::kS, Scaling::k2_3));
           if (inter_layer) {
             res.push_back(SvcToString(s, t, Inter::kKey, Scaling::k2_3));
             res.push_back(SvcToString(s, t, Inter::kL, Scaling::k2_3));
           }
         }
       }
     }
   }

   return res;
 }

 // static
 std::unique_ptr<SimpleEncoderWrapper> SimpleEncoderWrapper::Create(
     std::unique_ptr<VideoEncoderInterface> encoder,
     absl::string_view scalability_mode) {
   if (!encoder) {
     return nullptr;
   }

   absl::optional<ScalabilityMode> sm =
       ScalabilityModeStringToEnum(scalability_mode);
   if (!sm) {
     return nullptr;
   }

   std::unique_ptr<ScalableVideoController> svc_controller =
       CreateScalabilityStructure(*sm);
   if (!svc_controller) {
     return nullptr;
   }

   return std::make_unique<SimpleEncoderWrapper>(std::move(encoder),
                                                 std::move(svc_controller));
 }

 SimpleEncoderWrapper::SimpleEncoderWrapper(
     std::unique_ptr<VideoEncoderInterface> encoder,
     std::unique_ptr<ScalableVideoController> svc_controller)
     : encoder_(std::move(encoder)),
       svc_controller_(std::move(svc_controller)),
       layer_configs_(svc_controller_->StreamConfig()) {}

 void SimpleEncoderWrapper::SetEncodeQp(int qp) {
   target_qp_ = qp;
 }

 void SimpleEncoderWrapper::SetEncodeFps(int fps) {
   fps_ = fps;
 }

 void SimpleEncoderWrapper::Encode(
     rtc::scoped_refptr<webrtc::VideoFrameBuffer> frame_buffer,
     bool force_keyframe,
     EncodeResultCallback callback) {
   std::vector<ScalableVideoController::LayerFrameConfig> configs =
       svc_controller_->NextFrameConfig(force_keyframe);
   std::vector<FrameEncodeSettings> encode_settings;
   std::vector<GenericFrameInfo> frame_infos;
   bool include_dependency_structure = false;

   for (size_t s = 0; s < configs.size(); ++s) {
     const ScalableVideoController::LayerFrameConfig& config = configs[s];
     frame_infos.push_back(svc_controller_->OnEncodeDone(config));
     FrameEncodeSettings& settings = encode_settings.emplace_back();
     settings.rate_options = VideoEncoderInterface::FrameEncodeSettings::Cqp{
         .target_qp = target_qp_};
     settings.spatial_id = config.SpatialId();
     settings.temporal_id = config.TemporalId();
     const int num = layer_configs_.scaling_factor_num[s];
     const int den = layer_configs_.scaling_factor_den[s];
     settings.resolution = {(frame_buffer->width() * num / den),
                            (frame_buffer->height() * num / den)};

     bool buffer_updated = false;
     for (const CodecBufferUsage& buffer : config.Buffers()) {
       if (buffer.referenced) {
         settings.reference_buffers.push_back(buffer.id);
       }
       if (buffer.updated) {
         RTC_CHECK(!buffer_updated);
         settings.update_buffer = buffer.id;
         buffer_updated = true;
       }
     }

     if (settings.reference_buffers.empty()) {
       settings.frame_type = FrameType::kKeyframe;
       include_dependency_structure = true;
     }

     absl::optional<FrameDependencyStructure> dependency_structure;
     if (include_dependency_structure) {
       dependency_structure = svc_controller_->DependencyStructure();
     }

     settings.result_callback =
         [cb = callback, ds = std::move(dependency_structure),
          info = std::move(frame_infos[settings.spatial_id])](
             const VideoEncoderInterface::EncodeResult& result) mutable {
           auto* data =
               absl::get_if<VideoEncoderInterface::EncodedData>(&result);

           EncodeResult res;
           if (!data) {
             res.oh_no = true;
             cb(res);
             return;
           }

           res.frame_type = data->frame_type;
           res.bitstream_data = std::move(data->bitstream_data);
           res.generic_frame_info = info;
           if (res.frame_type == FrameType::kKeyframe) {
             res.dependency_structure = ds;
           }
           cb(res);
         };
   }

   encoder_->Encode(std::move(frame_buffer),
                    {.presentation_timestamp = presentation_timestamp_},
                    std::move(encode_settings));
   presentation_timestamp_ += 1 / Frequency::Hertz(fps_);
 }

 }  // namespace webrtc
	/*
	* Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include "api/video_codecs/simple_encoder_wrapper.h"

	#include <algorithm>
	#include <memory>
	#include <string>
	#include <utility>
	#include <vector>

	#include "absl/algorithm/container.h"
	#include "absl/types/variant.h"
	#include "api/video_codecs/scalability_mode.h"
	#include "api/video_codecs/scalability_mode_helper.h"
	#include "api/video_codecs/video_encoder_factory_interface.h"
	#include "api/video_codecs/video_encoder_interface.h"
	#include "modules/video_coding/svc/create_scalability_structure.h"

	namespace webrtc {
	using PredictionConstraints =
	VideoEncoderFactoryInterface::Capabilities::PredictionConstraints;
	using FrameEncodeSettings = VideoEncoderInterface::FrameEncodeSettings;

	namespace {
	enum class Inter { kS, kL, kKey };
	enum class Scaling { k1_2, k2_3 };
	std::string SvcToString(int spatial_layers,
	int temporal_layers,
	Inter inter,
	Scaling scaling) {
	RTC_CHECK(spatial_layers > 1 \|\| inter == Inter::kL);
	std::string res;
	res += inter == Inter::kS ? "S" : "L";
	res += std::to_string(spatial_layers);
	res += "T";
	res += std::to_string(temporal_layers);
	if (scaling == Scaling::k2_3) {
	res += "h";
	}
	if (inter == Inter::kKey) {
	res += "_KEY";
	}

	return res;
	}
	} // namespace

	// static
	std::vector<std::string> SimpleEncoderWrapper::SupportedWebrtcSvcModes(
	const PredictionConstraints& prediction_constraints) {
	std::vector<std::string> res;

	const int max_spatial_layers =
	std::min(3, prediction_constraints.max_spatial_layers);
	const int max_temporal_layers =
	std::min(3, prediction_constraints.max_temporal_layers);
	const bool scale_by_half = absl::c_linear_search(
	prediction_constraints.scaling_factors, Rational{1, 2});
	const bool scale_by_two_thirds = absl::c_linear_search(
	prediction_constraints.scaling_factors, Rational{2, 3});
	const bool inter_layer =
	prediction_constraints.max_references > 1 &&
	prediction_constraints.buffer_space_type !=
	PredictionConstraints::BufferSpaceType::kMultiInstance;

	for (int s = 1; s <= max_spatial_layers; ++s) {
	for (int t = 1; t <= max_temporal_layers; ++t) {
	if (prediction_constraints.num_buffers > ((std::max(1, t - 1) * s) - 1)) {
	if (s == 1 \|\| inter_layer) {
	res.push_back(SvcToString(s, t, Inter::kL, Scaling::k1_2));
	if (s == 1) {
	continue;
	}
	}
	if (scale_by_half) {
	res.push_back(SvcToString(s, t, Inter::kS, Scaling::k1_2));
	if (inter_layer) {
	res.push_back(SvcToString(s, t, Inter::kKey, Scaling::k1_2));
	}
	}
	if (scale_by_two_thirds) {
	res.push_back(SvcToString(s, t, Inter::kS, Scaling::k2_3));
	if (inter_layer) {
	res.push_back(SvcToString(s, t, Inter::kKey, Scaling::k2_3));
	res.push_back(SvcToString(s, t, Inter::kL, Scaling::k2_3));
	}
	}
	}
	}
	}

	return res;
	}

	// static
	std::unique_ptr<SimpleEncoderWrapper> SimpleEncoderWrapper::Create(
	std::unique_ptr<VideoEncoderInterface> encoder,
	absl::string_view scalability_mode) {
	if (!encoder) {
	return nullptr;
	}

	absl::optional<ScalabilityMode> sm =
	ScalabilityModeStringToEnum(scalability_mode);
	if (!sm) {
	return nullptr;
	}

	std::unique_ptr<ScalableVideoController> svc_controller =
	CreateScalabilityStructure(*sm);
	if (!svc_controller) {
	return nullptr;
	}

	return std::make_unique<SimpleEncoderWrapper>(std::move(encoder),
	std::move(svc_controller));
	}

	SimpleEncoderWrapper::SimpleEncoderWrapper(
	std::unique_ptr<VideoEncoderInterface> encoder,
	std::unique_ptr<ScalableVideoController> svc_controller)
	: encoder_(std::move(encoder)),
	svc_controller_(std::move(svc_controller)),
	layer_configs_(svc_controller_->StreamConfig()) {}

	void SimpleEncoderWrapper::SetEncodeQp(int qp) {
	target_qp_ = qp;
	}

	void SimpleEncoderWrapper::SetEncodeFps(int fps) {
	fps_ = fps;
	}

	void SimpleEncoderWrapper::Encode(
	rtc::scoped_refptr<webrtc::VideoFrameBuffer> frame_buffer,
	bool force_keyframe,
	EncodeResultCallback callback) {
	std::vector<ScalableVideoController::LayerFrameConfig> configs =
	svc_controller_->NextFrameConfig(force_keyframe);
	std::vector<FrameEncodeSettings> encode_settings;
	std::vector<GenericFrameInfo> frame_infos;
	bool include_dependency_structure = false;

	for (size_t s = 0; s < configs.size(); ++s) {
	const ScalableVideoController::LayerFrameConfig& config = configs[s];
	frame_infos.push_back(svc_controller_->OnEncodeDone(config));
	FrameEncodeSettings& settings = encode_settings.emplace_back();
	settings.rate_options = VideoEncoderInterface::FrameEncodeSettings::Cqp{
	.target_qp = target_qp_};
	settings.spatial_id = config.SpatialId();
	settings.temporal_id = config.TemporalId();
	const int num = layer_configs_.scaling_factor_num[s];
	const int den = layer_configs_.scaling_factor_den[s];
	settings.resolution = {(frame_buffer->width() * num / den),
	(frame_buffer->height() * num / den)};

	bool buffer_updated = false;
	for (const CodecBufferUsage& buffer : config.Buffers()) {
	if (buffer.referenced) {
	settings.reference_buffers.push_back(buffer.id);
	}
	if (buffer.updated) {
	RTC_CHECK(!buffer_updated);
	settings.update_buffer = buffer.id;
	buffer_updated = true;
	}
	}

	if (settings.reference_buffers.empty()) {
	settings.frame_type = FrameType::kKeyframe;
	include_dependency_structure = true;
	}

	absl::optional<FrameDependencyStructure> dependency_structure;
	if (include_dependency_structure) {
	dependency_structure = svc_controller_->DependencyStructure();
	}

	settings.result_callback =
	[cb = callback, ds = std::move(dependency_structure),
	info = std::move(frame_infos[settings.spatial_id])](
	const VideoEncoderInterface::EncodeResult& result) mutable {
	auto* data =
	absl::get_if<VideoEncoderInterface::EncodedData>(&result);

	EncodeResult res;
	if (!data) {
	res.oh_no = true;
	cb(res);
	return;
	}

	res.frame_type = data->frame_type;
	res.bitstream_data = std::move(data->bitstream_data);
	res.generic_frame_info = info;
	if (res.frame_type == FrameType::kKeyframe) {
	res.dependency_structure = ds;
	}
	cb(res);
	};
	}

	encoder_->Encode(std::move(frame_buffer),
	{.presentation_timestamp = presentation_timestamp_},
	std::move(encode_settings));
	presentation_timestamp_ += 1 / Frequency::Hertz(fps_);
	}

	} // namespace webrtc