blob: f110e1c9cb5b5d75b2fd6bda97a5865b4b7bd4f7 [file] [log] [blame]
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "video/overuse_frame_detector.h"
#include <assert.h>
#include <math.h>
#include <algorithm>
#include <list>
#include <map>
#include <string>
#include <utility>
#include "api/video/video_frame.h"
#include "common_video/include/frame_callback.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/exp_filter.h"
#include "rtc_base/timeutils.h"
#include "system_wrappers/include/field_trial.h"
#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
#include <mach/mach.h>
#endif // defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
namespace webrtc {
namespace {
const int64_t kCheckForOveruseIntervalMs = 5000;
const int64_t kTimeToFirstCheckForOveruseMs = 100;
// Delay between consecutive rampups. (Used for quick recovery.)
const int kQuickRampUpDelayMs = 10 * 1000;
// Delay between rampup attempts. Initially uses standard, scales up to max.
const int kStandardRampUpDelayMs = 40 * 1000;
const int kMaxRampUpDelayMs = 240 * 1000;
// Expontential back-off factor, to prevent annoying up-down behaviour.
const double kRampUpBackoffFactor = 2.0;
// Max number of overuses detected before always applying the rampup delay.
const int kMaxOverusesBeforeApplyRampupDelay = 4;
// The maximum exponent to use in VCMExpFilter.
const float kMaxExp = 7.0f;
// Default value used before first reconfiguration.
const int kDefaultFrameRate = 30;
// Default sample diff, default frame rate.
const float kDefaultSampleDiffMs = 1000.0f / kDefaultFrameRate;
// A factor applied to the sample diff on OnTargetFramerateUpdated to determine
// a max limit for the sample diff. For instance, with a framerate of 30fps,
// the sample diff is capped to (1000 / 30) * 1.35 = 45ms. This prevents
// triggering too soon if there are individual very large outliers.
const float kMaxSampleDiffMarginFactor = 1.35f;
// Minimum framerate allowed for usage calculation. This prevents crazy long
// encode times from being accepted if the frame rate happens to be low.
const int kMinFramerate = 7;
const int kMaxFramerate = 30;
const auto kScaleReasonCpu = AdaptationObserverInterface::AdaptReason::kCpu;
} // namespace
CpuOveruseOptions::CpuOveruseOptions()
: high_encode_usage_threshold_percent(85),
frame_timeout_interval_ms(1500),
min_frame_samples(120),
min_process_count(3),
high_threshold_consecutive_count(2) {
#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
// This is proof-of-concept code for letting the physical core count affect
// the interval into which we attempt to scale. For now, the code is Mac OS
// specific, since that's the platform were we saw most problems.
// TODO(torbjorng): Enhance SystemInfo to return this metric.
mach_port_t mach_host = mach_host_self();
host_basic_info hbi = {};
mach_msg_type_number_t info_count = HOST_BASIC_INFO_COUNT;
kern_return_t kr =
host_info(mach_host, HOST_BASIC_INFO, reinterpret_cast<host_info_t>(&hbi),
&info_count);
mach_port_deallocate(mach_task_self(), mach_host);
int n_physical_cores;
if (kr != KERN_SUCCESS) {
// If we couldn't get # of physical CPUs, don't panic. Assume we have 1.
n_physical_cores = 1;
LOG(LS_ERROR) << "Failed to determine number of physical cores, assuming 1";
} else {
n_physical_cores = hbi.physical_cpu;
LOG(LS_INFO) << "Number of physical cores:" << n_physical_cores;
}
// Change init list default for few core systems. The assumption here is that
// encoding, which we measure here, takes about 1/4 of the processing of a
// two-way call. This is roughly true for x86 using both vp8 and vp9 without
// hardware encoding. Since we don't affect the incoming stream here, we only
// control about 1/2 of the total processing needs, but this is not taken into
// account.
if (n_physical_cores == 1)
high_encode_usage_threshold_percent = 20; // Roughly 1/4 of 100%.
else if (n_physical_cores == 2)
high_encode_usage_threshold_percent = 40; // Roughly 1/4 of 200%.
#endif // defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
// Note that we make the interval 2x+epsilon wide, since libyuv scaling steps
// are close to that (when squared). This wide interval makes sure that
// scaling up or down does not jump all the way across the interval.
low_encode_usage_threshold_percent =
(high_encode_usage_threshold_percent - 1) / 2;
}
// Class for calculating the processing usage on the send-side (the average
// processing time of a frame divided by the average time difference between
// captured frames).
class OveruseFrameDetector::SendProcessingUsage {
public:
explicit SendProcessingUsage(const CpuOveruseOptions& options)
: kWeightFactorFrameDiff(0.998f),
kWeightFactorProcessing(0.995f),
kInitialSampleDiffMs(40.0f),
count_(0),
options_(options),
max_sample_diff_ms_(kDefaultSampleDiffMs * kMaxSampleDiffMarginFactor),
filtered_processing_ms_(new rtc::ExpFilter(kWeightFactorProcessing)),
filtered_frame_diff_ms_(new rtc::ExpFilter(kWeightFactorFrameDiff)) {
Reset();
}
virtual ~SendProcessingUsage() {}
void Reset() {
count_ = 0;
max_sample_diff_ms_ = kDefaultSampleDiffMs * kMaxSampleDiffMarginFactor;
filtered_frame_diff_ms_->Reset(kWeightFactorFrameDiff);
filtered_frame_diff_ms_->Apply(1.0f, kInitialSampleDiffMs);
filtered_processing_ms_->Reset(kWeightFactorProcessing);
filtered_processing_ms_->Apply(1.0f, InitialProcessingMs());
}
void SetMaxSampleDiffMs(float diff_ms) { max_sample_diff_ms_ = diff_ms; }
void AddCaptureSample(float sample_ms) {
float exp = sample_ms / kDefaultSampleDiffMs;
exp = std::min(exp, kMaxExp);
filtered_frame_diff_ms_->Apply(exp, sample_ms);
}
void AddSample(float processing_ms, int64_t diff_last_sample_ms) {
++count_;
float exp = diff_last_sample_ms / kDefaultSampleDiffMs;
exp = std::min(exp, kMaxExp);
filtered_processing_ms_->Apply(exp, processing_ms);
}
virtual int Value() {
if (count_ < static_cast<uint32_t>(options_.min_frame_samples)) {
return static_cast<int>(InitialUsageInPercent() + 0.5f);
}
float frame_diff_ms = std::max(filtered_frame_diff_ms_->filtered(), 1.0f);
frame_diff_ms = std::min(frame_diff_ms, max_sample_diff_ms_);
float encode_usage_percent =
100.0f * filtered_processing_ms_->filtered() / frame_diff_ms;
return static_cast<int>(encode_usage_percent + 0.5);
}
private:
float InitialUsageInPercent() const {
// Start in between the underuse and overuse threshold.
return (options_.low_encode_usage_threshold_percent +
options_.high_encode_usage_threshold_percent) / 2.0f;
}
float InitialProcessingMs() const {
return InitialUsageInPercent() * kInitialSampleDiffMs / 100;
}
const float kWeightFactorFrameDiff;
const float kWeightFactorProcessing;
const float kInitialSampleDiffMs;
uint64_t count_;
const CpuOveruseOptions options_;
float max_sample_diff_ms_;
std::unique_ptr<rtc::ExpFilter> filtered_processing_ms_;
std::unique_ptr<rtc::ExpFilter> filtered_frame_diff_ms_;
};
// Class used for manual testing of overuse, enabled via field trial flag.
class OveruseFrameDetector::OverdoseInjector
: public OveruseFrameDetector::SendProcessingUsage {
public:
OverdoseInjector(const CpuOveruseOptions& options,
int64_t normal_period_ms,
int64_t overuse_period_ms,
int64_t underuse_period_ms)
: OveruseFrameDetector::SendProcessingUsage(options),
normal_period_ms_(normal_period_ms),
overuse_period_ms_(overuse_period_ms),
underuse_period_ms_(underuse_period_ms),
state_(State::kNormal),
last_toggling_ms_(-1) {
RTC_DCHECK_GT(overuse_period_ms, 0);
RTC_DCHECK_GT(normal_period_ms, 0);
LOG(LS_INFO) << "Simulating overuse with intervals " << normal_period_ms
<< "ms normal mode, " << overuse_period_ms
<< "ms overuse mode.";
}
~OverdoseInjector() override {}
int Value() override {
int64_t now_ms = rtc::TimeMillis();
if (last_toggling_ms_ == -1) {
last_toggling_ms_ = now_ms;
} else {
switch (state_) {
case State::kNormal:
if (now_ms > last_toggling_ms_ + normal_period_ms_) {
state_ = State::kOveruse;
last_toggling_ms_ = now_ms;
LOG(LS_INFO) << "Simulating CPU overuse.";
}
break;
case State::kOveruse:
if (now_ms > last_toggling_ms_ + overuse_period_ms_) {
state_ = State::kUnderuse;
last_toggling_ms_ = now_ms;
LOG(LS_INFO) << "Simulating CPU underuse.";
}
break;
case State::kUnderuse:
if (now_ms > last_toggling_ms_ + underuse_period_ms_) {
state_ = State::kNormal;
last_toggling_ms_ = now_ms;
LOG(LS_INFO) << "Actual CPU overuse measurements in effect.";
}
break;
}
}
rtc::Optional<int> overried_usage_value;
switch (state_) {
case State::kNormal:
break;
case State::kOveruse:
overried_usage_value.emplace(250);
break;
case State::kUnderuse:
overried_usage_value.emplace(5);
break;
}
return overried_usage_value.value_or(SendProcessingUsage::Value());
}
private:
const int64_t normal_period_ms_;
const int64_t overuse_period_ms_;
const int64_t underuse_period_ms_;
enum class State { kNormal, kOveruse, kUnderuse } state_;
int64_t last_toggling_ms_;
};
std::unique_ptr<OveruseFrameDetector::SendProcessingUsage>
OveruseFrameDetector::CreateSendProcessingUsage(
const CpuOveruseOptions& options) {
std::unique_ptr<SendProcessingUsage> instance;
std::string toggling_interval =
field_trial::FindFullName("WebRTC-ForceSimulatedOveruseIntervalMs");
if (!toggling_interval.empty()) {
int normal_period_ms = 0;
int overuse_period_ms = 0;
int underuse_period_ms = 0;
if (sscanf(toggling_interval.c_str(), "%d-%d-%d", &normal_period_ms,
&overuse_period_ms, &underuse_period_ms) == 3) {
if (normal_period_ms > 0 && overuse_period_ms > 0 &&
underuse_period_ms > 0) {
instance.reset(new OverdoseInjector(
options, normal_period_ms, overuse_period_ms, underuse_period_ms));
} else {
LOG(LS_WARNING)
<< "Invalid (non-positive) normal/overuse/underuse periods: "
<< normal_period_ms << " / " << overuse_period_ms << " / "
<< underuse_period_ms;
}
} else {
LOG(LS_WARNING) << "Malformed toggling interval: " << toggling_interval;
}
}
if (!instance) {
// No valid overuse simulation parameters set, use normal usage class.
instance.reset(new SendProcessingUsage(options));
}
return instance;
}
class OveruseFrameDetector::CheckOveruseTask : public rtc::QueuedTask {
public:
explicit CheckOveruseTask(OveruseFrameDetector* overuse_detector)
: overuse_detector_(overuse_detector) {
rtc::TaskQueue::Current()->PostDelayedTask(
std::unique_ptr<rtc::QueuedTask>(this), kTimeToFirstCheckForOveruseMs);
}
void Stop() {
RTC_CHECK(task_checker_.CalledSequentially());
overuse_detector_ = nullptr;
}
private:
bool Run() override {
RTC_CHECK(task_checker_.CalledSequentially());
if (!overuse_detector_)
return true; // This will make the task queue delete this task.
overuse_detector_->CheckForOveruse();
rtc::TaskQueue::Current()->PostDelayedTask(
std::unique_ptr<rtc::QueuedTask>(this), kCheckForOveruseIntervalMs);
// Return false to prevent this task from being deleted. Ownership has been
// transferred to the task queue when PostDelayedTask was called.
return false;
}
rtc::SequencedTaskChecker task_checker_;
OveruseFrameDetector* overuse_detector_;
};
OveruseFrameDetector::OveruseFrameDetector(
const CpuOveruseOptions& options,
AdaptationObserverInterface* observer,
EncodedFrameObserver* encoder_timing,
CpuOveruseMetricsObserver* metrics_observer)
: check_overuse_task_(nullptr),
options_(options),
observer_(observer),
encoder_timing_(encoder_timing),
metrics_observer_(metrics_observer),
num_process_times_(0),
// TODO(nisse): Use rtc::Optional
last_capture_time_us_(-1),
last_processed_capture_time_us_(-1),
num_pixels_(0),
max_framerate_(kDefaultFrameRate),
last_overuse_time_ms_(-1),
checks_above_threshold_(0),
num_overuse_detections_(0),
last_rampup_time_ms_(-1),
in_quick_rampup_(false),
current_rampup_delay_ms_(kStandardRampUpDelayMs),
usage_(CreateSendProcessingUsage(options)) {
task_checker_.Detach();
}
OveruseFrameDetector::~OveruseFrameDetector() {
RTC_DCHECK(!check_overuse_task_) << "StopCheckForOverUse must be called.";
}
void OveruseFrameDetector::StartCheckForOveruse() {
RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
RTC_DCHECK(!check_overuse_task_);
check_overuse_task_ = new CheckOveruseTask(this);
}
void OveruseFrameDetector::StopCheckForOveruse() {
RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
check_overuse_task_->Stop();
check_overuse_task_ = nullptr;
}
void OveruseFrameDetector::EncodedFrameTimeMeasured(int encode_duration_ms) {
RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
if (!metrics_)
metrics_ = rtc::Optional<CpuOveruseMetrics>(CpuOveruseMetrics());
metrics_->encode_usage_percent = usage_->Value();
metrics_observer_->OnEncodedFrameTimeMeasured(encode_duration_ms, *metrics_);
}
bool OveruseFrameDetector::FrameSizeChanged(int num_pixels) const {
RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
if (num_pixels != num_pixels_) {
return true;
}
return false;
}
bool OveruseFrameDetector::FrameTimeoutDetected(int64_t now_us) const {
RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
if (last_capture_time_us_ == -1)
return false;
return (now_us - last_capture_time_us_) >
options_.frame_timeout_interval_ms * rtc::kNumMicrosecsPerMillisec;
}
void OveruseFrameDetector::ResetAll(int num_pixels) {
// Reset state, as a result resolution being changed. Do not however change
// the current frame rate back to the default.
RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
num_pixels_ = num_pixels;
usage_->Reset();
frame_timing_.clear();
last_capture_time_us_ = -1;
last_processed_capture_time_us_ = -1;
num_process_times_ = 0;
metrics_ = rtc::Optional<CpuOveruseMetrics>();
OnTargetFramerateUpdated(max_framerate_);
}
void OveruseFrameDetector::OnTargetFramerateUpdated(int framerate_fps) {
RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
RTC_DCHECK_GE(framerate_fps, 0);
max_framerate_ = std::min(kMaxFramerate, framerate_fps);
usage_->SetMaxSampleDiffMs((1000 / std::max(kMinFramerate, max_framerate_)) *
kMaxSampleDiffMarginFactor);
}
void OveruseFrameDetector::FrameCaptured(const VideoFrame& frame,
int64_t time_when_first_seen_us) {
RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
if (FrameSizeChanged(frame.width() * frame.height()) ||
FrameTimeoutDetected(time_when_first_seen_us)) {
ResetAll(frame.width() * frame.height());
}
if (last_capture_time_us_ != -1)
usage_->AddCaptureSample(
1e-3 * (time_when_first_seen_us - last_capture_time_us_));
last_capture_time_us_ = time_when_first_seen_us;
frame_timing_.push_back(FrameTiming(frame.timestamp_us(), frame.timestamp(),
time_when_first_seen_us));
}
void OveruseFrameDetector::FrameSent(uint32_t timestamp,
int64_t time_sent_in_us) {
RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
// Delay before reporting actual encoding time, used to have the ability to
// detect total encoding time when encoding more than one layer. Encoding is
// here assumed to finish within a second (or that we get enough long-time
// samples before one second to trigger an overuse even when this is not the
// case).
static const int64_t kEncodingTimeMeasureWindowMs = 1000;
for (auto& it : frame_timing_) {
if (it.timestamp == timestamp) {
it.last_send_us = time_sent_in_us;
break;
}
}
// TODO(pbos): Handle the case/log errors when not finding the corresponding
// frame (either very slow encoding or incorrect wrong timestamps returned
// from the encoder).
// This is currently the case for all frames on ChromeOS, so logging them
// would be spammy, and triggering overuse would be wrong.
// https://crbug.com/350106
while (!frame_timing_.empty()) {
FrameTiming timing = frame_timing_.front();
if (time_sent_in_us - timing.capture_us <
kEncodingTimeMeasureWindowMs * rtc::kNumMicrosecsPerMillisec) {
break;
}
if (timing.last_send_us != -1) {
int encode_duration_us =
static_cast<int>(timing.last_send_us - timing.capture_us);
if (encoder_timing_) {
// TODO(nisse): Update encoder_timing_ to also use us units.
encoder_timing_->OnEncodeTiming(timing.capture_time_us /
rtc::kNumMicrosecsPerMillisec,
encode_duration_us /
rtc::kNumMicrosecsPerMillisec);
}
if (last_processed_capture_time_us_ != -1) {
int64_t diff_us = timing.capture_us - last_processed_capture_time_us_;
usage_->AddSample(1e-3 * encode_duration_us, 1e-3 * diff_us);
}
last_processed_capture_time_us_ = timing.capture_us;
EncodedFrameTimeMeasured(encode_duration_us /
rtc::kNumMicrosecsPerMillisec);
}
frame_timing_.pop_front();
}
}
void OveruseFrameDetector::CheckForOveruse() {
RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
++num_process_times_;
if (num_process_times_ <= options_.min_process_count || !metrics_)
return;
int64_t now_ms = rtc::TimeMillis();
if (IsOverusing(*metrics_)) {
// If the last thing we did was going up, and now have to back down, we need
// to check if this peak was short. If so we should back off to avoid going
// back and forth between this load, the system doesn't seem to handle it.
bool check_for_backoff = last_rampup_time_ms_ > last_overuse_time_ms_;
if (check_for_backoff) {
if (now_ms - last_rampup_time_ms_ < kStandardRampUpDelayMs ||
num_overuse_detections_ > kMaxOverusesBeforeApplyRampupDelay) {
// Going up was not ok for very long, back off.
current_rampup_delay_ms_ *= kRampUpBackoffFactor;
if (current_rampup_delay_ms_ > kMaxRampUpDelayMs)
current_rampup_delay_ms_ = kMaxRampUpDelayMs;
} else {
// Not currently backing off, reset rampup delay.
current_rampup_delay_ms_ = kStandardRampUpDelayMs;
}
}
last_overuse_time_ms_ = now_ms;
in_quick_rampup_ = false;
checks_above_threshold_ = 0;
++num_overuse_detections_;
if (observer_)
observer_->AdaptDown(kScaleReasonCpu);
} else if (IsUnderusing(*metrics_, now_ms)) {
last_rampup_time_ms_ = now_ms;
in_quick_rampup_ = true;
if (observer_)
observer_->AdaptUp(kScaleReasonCpu);
}
int rampup_delay =
in_quick_rampup_ ? kQuickRampUpDelayMs : current_rampup_delay_ms_;
LOG(LS_VERBOSE) << " Frame stats: "
<< " encode usage " << metrics_->encode_usage_percent
<< " overuse detections " << num_overuse_detections_
<< " rampup delay " << rampup_delay;
}
bool OveruseFrameDetector::IsOverusing(const CpuOveruseMetrics& metrics) {
RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
if (metrics.encode_usage_percent >=
options_.high_encode_usage_threshold_percent) {
++checks_above_threshold_;
} else {
checks_above_threshold_ = 0;
}
return checks_above_threshold_ >= options_.high_threshold_consecutive_count;
}
bool OveruseFrameDetector::IsUnderusing(const CpuOveruseMetrics& metrics,
int64_t time_now) {
RTC_DCHECK_CALLED_SEQUENTIALLY(&task_checker_);
int delay = in_quick_rampup_ ? kQuickRampUpDelayMs : current_rampup_delay_ms_;
if (time_now < last_rampup_time_ms_ + delay)
return false;
return metrics.encode_usage_percent <
options_.low_encode_usage_threshold_percent;
}
} // namespace webrtc