blob: 086970afd67252eeafa316a5ca9113ae09e35f9f [file] [log] [blame]
/*
* Copyright (c) 2010 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "media/base/video_adapter.h"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <limits>
#include <numeric>
#include <optional>
#include <string>
#include <utility>
#include "api/video/resolution.h"
#include "api/video/video_source_interface.h"
#include "media/base/video_common.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/strings/string_builder.h"
#include "rtc_base/synchronization/mutex.h"
#include "rtc_base/time_utils.h"
namespace {
struct Fraction {
int numerator;
int denominator;
void DivideByGcd() {
int g = std::gcd(numerator, denominator);
numerator /= g;
denominator /= g;
}
// Determines number of output pixels if both width and height of an input of
// `input_pixels` pixels is scaled with the fraction numerator / denominator.
int scale_pixel_count(int input_pixels) {
return (numerator * numerator * static_cast<int64_t>(input_pixels)) /
(denominator * denominator);
}
};
// Round `value_to_round` to a multiple of `multiple`. Prefer rounding upwards,
// but never more than `max_value`.
int roundUp(int value_to_round, int multiple, int max_value) {
const int rounded_value =
(value_to_round + multiple - 1) / multiple * multiple;
return rounded_value <= max_value ? rounded_value
: (max_value / multiple * multiple);
}
// Generates a scale factor that makes `input_pixels` close to `target_pixels`,
// but no higher than `max_pixels`.
Fraction FindScale(int input_width,
int input_height,
int target_pixels,
int max_pixels) {
// This function only makes sense for a positive target.
RTC_DCHECK_GT(target_pixels, 0);
RTC_DCHECK_GT(max_pixels, 0);
RTC_DCHECK_GE(max_pixels, target_pixels);
const int input_pixels = input_width * input_height;
// Don't scale up original.
if (target_pixels >= input_pixels)
return Fraction{1, 1};
Fraction current_scale = Fraction{1, 1};
Fraction best_scale = Fraction{1, 1};
// Start scaling down by 2/3 depending on `input_width` and `input_height`.
if (input_width % 3 == 0 && input_height % 3 == 0) {
// 2/3 (then alternates 3/4, 2/3, 3/4,...).
current_scale = Fraction{6, 6};
}
if (input_width % 9 == 0 && input_height % 9 == 0) {
// 2/3, 2/3 (then alternates 3/4, 2/3, 3/4,...).
current_scale = Fraction{36, 36};
}
// The minimum (absolute) difference between the number of output pixels and
// the target pixel count.
int min_pixel_diff = std::numeric_limits<int>::max();
if (input_pixels <= max_pixels) {
// Start condition for 1/1 case, if it is less than max.
min_pixel_diff = std::abs(input_pixels - target_pixels);
}
// Alternately scale down by 3/4 and 2/3. This results in fractions which are
// effectively scalable. For instance, starting at 1280x720 will result in
// the series (3/4) => 960x540, (1/2) => 640x360, (3/8) => 480x270,
// (1/4) => 320x180, (3/16) => 240x125, (1/8) => 160x90.
while (current_scale.scale_pixel_count(input_pixels) > target_pixels) {
if (current_scale.numerator % 3 == 0 &&
current_scale.denominator % 2 == 0) {
// Multiply by 2/3.
current_scale.numerator /= 3;
current_scale.denominator /= 2;
} else {
// Multiply by 3/4.
current_scale.numerator *= 3;
current_scale.denominator *= 4;
}
int output_pixels = current_scale.scale_pixel_count(input_pixels);
if (output_pixels <= max_pixels) {
int diff = std::abs(target_pixels - output_pixels);
if (diff < min_pixel_diff) {
min_pixel_diff = diff;
best_scale = current_scale;
}
}
}
best_scale.DivideByGcd();
return best_scale;
}
std::optional<std::pair<int, int>> Swap(
const std::optional<std::pair<int, int>>& in) {
if (!in) {
return std::nullopt;
}
return std::make_pair(in->second, in->first);
}
} // namespace
namespace cricket {
VideoAdapter::VideoAdapter(int source_resolution_alignment)
: frames_in_(0),
frames_out_(0),
frames_scaled_(0),
adaption_changes_(0),
previous_width_(0),
previous_height_(0),
source_resolution_alignment_(source_resolution_alignment),
resolution_alignment_(source_resolution_alignment),
resolution_request_target_pixel_count_(std::numeric_limits<int>::max()),
resolution_request_max_pixel_count_(std::numeric_limits<int>::max()),
max_framerate_request_(std::numeric_limits<int>::max()) {}
VideoAdapter::VideoAdapter() : VideoAdapter(1) {}
VideoAdapter::~VideoAdapter() {}
bool VideoAdapter::DropFrame(int64_t in_timestamp_ns) {
int max_fps = max_framerate_request_;
if (output_format_request_.max_fps)
max_fps = std::min(max_fps, *output_format_request_.max_fps);
framerate_controller_.SetMaxFramerate(max_fps);
return framerate_controller_.ShouldDropFrame(in_timestamp_ns);
}
bool VideoAdapter::AdaptFrameResolution(int in_width,
int in_height,
int64_t in_timestamp_ns,
int* cropped_width,
int* cropped_height,
int* out_width,
int* out_height) {
webrtc::MutexLock lock(&mutex_);
++frames_in_;
// The max output pixel count is the minimum of the requests from
// OnOutputFormatRequest and OnResolutionFramerateRequest.
int max_pixel_count = resolution_request_max_pixel_count_;
// Select target aspect ratio and max pixel count depending on input frame
// orientation.
std::optional<std::pair<int, int>> target_aspect_ratio;
if (in_width > in_height) {
target_aspect_ratio = output_format_request_.target_landscape_aspect_ratio;
if (output_format_request_.max_landscape_pixel_count)
max_pixel_count = std::min(
max_pixel_count, *output_format_request_.max_landscape_pixel_count);
} else {
target_aspect_ratio = output_format_request_.target_portrait_aspect_ratio;
if (output_format_request_.max_portrait_pixel_count)
max_pixel_count = std::min(
max_pixel_count, *output_format_request_.max_portrait_pixel_count);
}
int target_pixel_count =
std::min(resolution_request_target_pixel_count_, max_pixel_count);
// Drop the input frame if necessary.
if (max_pixel_count <= 0 || DropFrame(in_timestamp_ns)) {
// Show VAdapt log every 90 frames dropped. (3 seconds)
if ((frames_in_ - frames_out_) % 90 == 0) {
// TODO(fbarchard): Reduce to LS_VERBOSE when adapter info is not needed
// in default calls.
RTC_LOG(LS_INFO) << "VAdapt Drop Frame: scaled " << frames_scaled_
<< " / out " << frames_out_ << " / in " << frames_in_
<< " Changes: " << adaption_changes_
<< " Input: " << in_width << "x" << in_height
<< " timestamp: " << in_timestamp_ns
<< " Output fps: " << max_framerate_request_ << "/"
<< output_format_request_.max_fps.value_or(-1)
<< " alignment: " << resolution_alignment_;
}
// Drop frame.
return false;
}
// Calculate how the input should be cropped.
if (!target_aspect_ratio || target_aspect_ratio->first <= 0 ||
target_aspect_ratio->second <= 0) {
*cropped_width = in_width;
*cropped_height = in_height;
} else {
const float requested_aspect =
target_aspect_ratio->first /
static_cast<float>(target_aspect_ratio->second);
*cropped_width =
std::min(in_width, static_cast<int>(in_height * requested_aspect));
*cropped_height =
std::min(in_height, static_cast<int>(in_width / requested_aspect));
}
const Fraction scale =
FindScale(*cropped_width, *cropped_height, target_pixel_count,
max_pixel_count);
// Adjust cropping slightly to get correctly aligned output size and a perfect
// scale factor.
*cropped_width = roundUp(*cropped_width,
scale.denominator * resolution_alignment_, in_width);
*cropped_height = roundUp(
*cropped_height, scale.denominator * resolution_alignment_, in_height);
RTC_DCHECK_EQ(0, *cropped_width % scale.denominator);
RTC_DCHECK_EQ(0, *cropped_height % scale.denominator);
// Calculate output size.
*out_width = *cropped_width / scale.denominator * scale.numerator;
*out_height = *cropped_height / scale.denominator * scale.numerator;
RTC_DCHECK_EQ(0, *out_width % resolution_alignment_);
RTC_DCHECK_EQ(0, *out_height % resolution_alignment_);
// Lastly, make the output size fit within the resolution restrictions as
// specified by `scale_resolution_down_to_`. This does not modify aspect ratio
// or cropping, only `out_width` and `out_height`.
if (scale_resolution_down_to_.has_value()) {
// Make frame and "scale to" have matching orientation.
webrtc::Resolution scale_resolution_down_to =
scale_resolution_down_to_.value();
if ((*out_width < *out_height) != (scale_resolution_down_to_->width <
scale_resolution_down_to_->height)) {
scale_resolution_down_to = {.width = scale_resolution_down_to_->height,
.height = scale_resolution_down_to_->width};
}
// Downscale by smallest scaling factor, if necessary.
if (*out_width > 0 && *out_height > 0 &&
(scale_resolution_down_to.width < *out_width ||
scale_resolution_down_to.height < *out_height)) {
double scale_factor = std::min(
scale_resolution_down_to.width / static_cast<double>(*out_width),
scale_resolution_down_to.height / static_cast<double>(*out_height));
*out_width =
roundUp(std::round(*out_width * scale_factor), resolution_alignment_,
scale_resolution_down_to.width);
*out_height =
roundUp(std::round(*out_height * scale_factor), resolution_alignment_,
scale_resolution_down_to.height);
RTC_DCHECK_EQ(0, *out_width % resolution_alignment_);
RTC_DCHECK_EQ(0, *out_height % resolution_alignment_);
}
}
++frames_out_;
if (scale.numerator != scale.denominator)
++frames_scaled_;
if (previous_width_ &&
(previous_width_ != *out_width || previous_height_ != *out_height)) {
++adaption_changes_;
RTC_LOG(LS_INFO) << "Frame size changed: scaled " << frames_scaled_
<< " / out " << frames_out_ << " / in " << frames_in_
<< " Changes: " << adaption_changes_
<< " Input: " << in_width << "x" << in_height
<< " Scale: " << scale.numerator << "/"
<< scale.denominator << " Output: " << *out_width << "x"
<< *out_height << " fps: " << max_framerate_request_ << "/"
<< output_format_request_.max_fps.value_or(-1)
<< " alignment: " << resolution_alignment_;
}
previous_width_ = *out_width;
previous_height_ = *out_height;
return true;
}
void VideoAdapter::OnOutputFormatRequest(
const std::optional<VideoFormat>& format) {
std::optional<std::pair<int, int>> target_aspect_ratio;
std::optional<int> max_pixel_count;
std::optional<int> max_fps;
if (format) {
target_aspect_ratio = std::make_pair(format->width, format->height);
max_pixel_count = format->width * format->height;
if (format->interval > 0)
max_fps = rtc::kNumNanosecsPerSec / format->interval;
}
OnOutputFormatRequest(target_aspect_ratio, max_pixel_count, max_fps);
}
void VideoAdapter::OnOutputFormatRequest(
const std::optional<std::pair<int, int>>& target_aspect_ratio,
const std::optional<int>& max_pixel_count,
const std::optional<int>& max_fps) {
std::optional<std::pair<int, int>> target_landscape_aspect_ratio;
std::optional<std::pair<int, int>> target_portrait_aspect_ratio;
if (target_aspect_ratio && target_aspect_ratio->first > 0 &&
target_aspect_ratio->second > 0) {
// Maintain input orientation.
const int max_side =
std::max(target_aspect_ratio->first, target_aspect_ratio->second);
const int min_side =
std::min(target_aspect_ratio->first, target_aspect_ratio->second);
target_landscape_aspect_ratio = std::make_pair(max_side, min_side);
target_portrait_aspect_ratio = std::make_pair(min_side, max_side);
}
OnOutputFormatRequest(target_landscape_aspect_ratio, max_pixel_count,
target_portrait_aspect_ratio, max_pixel_count, max_fps);
}
void VideoAdapter::OnOutputFormatRequest(
const std::optional<std::pair<int, int>>& target_landscape_aspect_ratio,
const std::optional<int>& max_landscape_pixel_count,
const std::optional<std::pair<int, int>>& target_portrait_aspect_ratio,
const std::optional<int>& max_portrait_pixel_count,
const std::optional<int>& max_fps) {
webrtc::MutexLock lock(&mutex_);
OutputFormatRequest request = {
.target_landscape_aspect_ratio = target_landscape_aspect_ratio,
.max_landscape_pixel_count = max_landscape_pixel_count,
.target_portrait_aspect_ratio = target_portrait_aspect_ratio,
.max_portrait_pixel_count = max_portrait_pixel_count,
.max_fps = max_fps};
if (stashed_output_format_request_) {
// Save the output format request for later use in case the encoder making
// this call would become active, because currently all active encoders use
// scale_resolution_down_to instead.
stashed_output_format_request_ = request;
RTC_LOG(LS_INFO) << "Stashing OnOutputFormatRequest: "
<< stashed_output_format_request_->ToString();
} else {
output_format_request_ = request;
RTC_LOG(LS_INFO) << "Setting output_format_request_: "
<< output_format_request_.ToString();
}
framerate_controller_.Reset();
}
void VideoAdapter::OnSinkWants(const rtc::VideoSinkWants& sink_wants) {
webrtc::MutexLock lock(&mutex_);
resolution_request_max_pixel_count_ = sink_wants.max_pixel_count;
resolution_request_target_pixel_count_ =
sink_wants.target_pixel_count.value_or(
resolution_request_max_pixel_count_);
max_framerate_request_ = sink_wants.max_framerate_fps;
resolution_alignment_ =
std::lcm(source_resolution_alignment_, sink_wants.resolution_alignment);
// Convert from std::optional<rtc::VideoSinkWants::FrameSize> to
// std::optional<webrtc::Resolution>. Both are {int,int}.
scale_resolution_down_to_ = std::nullopt;
if (sink_wants.requested_resolution.has_value()) {
scale_resolution_down_to_ = {
.width = sink_wants.requested_resolution->width,
.height = sink_wants.requested_resolution->height};
}
// If scale_resolution_down_to is used, and there are no active encoders
// that are NOT using scale_resolution_down_to (aka newapi), then override
// calls to OnOutputFormatRequest and use values from scale_resolution_down_to
// instead (combined with qualityscaling based on pixel counts above).
if (!sink_wants.requested_resolution) {
if (stashed_output_format_request_) {
// because current active_output_format_request is based on
// scale_resolution_down_to logic, while current encoder(s) doesn't want
// that, we have to restore the stashed request.
RTC_LOG(LS_INFO) << "Unstashing OnOutputFormatRequest: "
<< stashed_output_format_request_->ToString();
output_format_request_ = *stashed_output_format_request_;
stashed_output_format_request_.reset();
}
return;
}
// The code below is only needed when `scale_resolution_down_to` is signalled
// back to the video source which only happens if
// `VideoStreamEncoderSettings::use_standard_scale_resolution_down_to` is
// false.
// TODO(https://crbug.com/webrtc/366284861): Delete the code below as part of
// deleting this flag and only supporting the standard behavior.
if (sink_wants.aggregates.has_value() &&
sink_wants.aggregates->any_active_without_requested_resolution) {
return;
}
if (!stashed_output_format_request_) {
// The active output format request is about to be cleared due to
// request_resolution. We need to save it for later use in case the encoder
// which doesn't use request_resolution logic become active in the future.
stashed_output_format_request_ = output_format_request_;
RTC_LOG(LS_INFO) << "Stashing OnOutputFormatRequest: "
<< stashed_output_format_request_->ToString();
}
// Clear the output format request, `scale_resolution_down_to_` will be
// applied instead which happens inside AdaptFrameResolution().
output_format_request_ = {};
}
int VideoAdapter::GetTargetPixels() const {
webrtc::MutexLock lock(&mutex_);
return resolution_request_target_pixel_count_;
}
float VideoAdapter::GetMaxFramerate() const {
webrtc::MutexLock lock(&mutex_);
// Minimum of `output_format_request_.max_fps` and `max_framerate_request_` is
// used to throttle frame-rate.
int framerate =
std::min(max_framerate_request_,
output_format_request_.max_fps.value_or(max_framerate_request_));
if (framerate == std::numeric_limits<int>::max()) {
return std::numeric_limits<float>::infinity();
} else {
return max_framerate_request_;
}
}
std::string VideoAdapter::OutputFormatRequest::ToString() const {
rtc::StringBuilder oss;
oss << "[ ";
if (target_landscape_aspect_ratio == Swap(target_portrait_aspect_ratio) &&
max_landscape_pixel_count == max_portrait_pixel_count) {
if (target_landscape_aspect_ratio) {
oss << target_landscape_aspect_ratio->first << "x"
<< target_landscape_aspect_ratio->second;
} else {
oss << "unset-resolution";
}
if (max_landscape_pixel_count) {
oss << " max_pixel_count: " << *max_landscape_pixel_count;
}
} else {
oss << "[ landscape: ";
if (target_landscape_aspect_ratio) {
oss << target_landscape_aspect_ratio->first << "x"
<< target_landscape_aspect_ratio->second;
} else {
oss << "unset";
}
if (max_landscape_pixel_count) {
oss << " max_pixel_count: " << *max_landscape_pixel_count;
}
oss << " ] [ portrait: ";
if (target_portrait_aspect_ratio) {
oss << target_portrait_aspect_ratio->first << "x"
<< target_portrait_aspect_ratio->second;
}
if (max_portrait_pixel_count) {
oss << " max_pixel_count: " << *max_portrait_pixel_count;
}
oss << " ]";
}
oss << " max_fps: ";
if (max_fps) {
oss << *max_fps;
} else {
oss << "unset";
}
oss << " ]";
return oss.Release();
}
} // namespace cricket