| /* |
| * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "webrtc/video/overuse_frame_detector.h" |
| |
| #include <assert.h> |
| #include <math.h> |
| |
| #include <algorithm> |
| #include <list> |
| #include <map> |
| |
| #include "webrtc/base/checks.h" |
| #include "webrtc/base/exp_filter.h" |
| #include "webrtc/base/logging.h" |
| #include "webrtc/common_video/include/frame_callback.h" |
| #include "webrtc/system_wrappers/include/clock.h" |
| #include "webrtc/video_frame.h" |
| |
| #if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) |
| #include <mach/mach.h> |
| #endif // defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) |
| |
| namespace webrtc { |
| |
| namespace { |
| const int64_t kProcessIntervalMs = 5000; |
| |
| // Delay between consecutive rampups. (Used for quick recovery.) |
| const int kQuickRampUpDelayMs = 10 * 1000; |
| // Delay between rampup attempts. Initially uses standard, scales up to max. |
| const int kStandardRampUpDelayMs = 40 * 1000; |
| const int kMaxRampUpDelayMs = 240 * 1000; |
| // Expontential back-off factor, to prevent annoying up-down behaviour. |
| const double kRampUpBackoffFactor = 2.0; |
| |
| // Max number of overuses detected before always applying the rampup delay. |
| const int kMaxOverusesBeforeApplyRampupDelay = 4; |
| |
| // The maximum exponent to use in VCMExpFilter. |
| const float kSampleDiffMs = 33.0f; |
| const float kMaxExp = 7.0f; |
| |
| } // namespace |
| |
| CpuOveruseOptions::CpuOveruseOptions() |
| : high_encode_usage_threshold_percent(85), |
| frame_timeout_interval_ms(1500), |
| min_frame_samples(120), |
| min_process_count(3), |
| high_threshold_consecutive_count(2) { |
| #if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) |
| // This is proof-of-concept code for letting the physical core count affect |
| // the interval into which we attempt to scale. For now, the code is Mac OS |
| // specific, since that's the platform were we saw most problems. |
| // TODO(torbjorng): Enhance SystemInfo to return this metric. |
| |
| mach_port_t mach_host = mach_host_self(); |
| host_basic_info hbi = {}; |
| mach_msg_type_number_t info_count = HOST_BASIC_INFO_COUNT; |
| kern_return_t kr = |
| host_info(mach_host, HOST_BASIC_INFO, reinterpret_cast<host_info_t>(&hbi), |
| &info_count); |
| mach_port_deallocate(mach_task_self(), mach_host); |
| |
| int n_physical_cores; |
| if (kr != KERN_SUCCESS) { |
| // If we couldn't get # of physical CPUs, don't panic. Assume we have 1. |
| n_physical_cores = 1; |
| LOG(LS_ERROR) << "Failed to determine number of physical cores, assuming 1"; |
| } else { |
| n_physical_cores = hbi.physical_cpu; |
| LOG(LS_INFO) << "Number of physical cores:" << n_physical_cores; |
| } |
| |
| // Change init list default for few core systems. The assumption here is that |
| // encoding, which we measure here, takes about 1/4 of the processing of a |
| // two-way call. This is roughly true for x86 using both vp8 and vp9 without |
| // hardware encoding. Since we don't affect the incoming stream here, we only |
| // control about 1/2 of the total processing needs, but this is not taken into |
| // account. |
| if (n_physical_cores == 1) |
| high_encode_usage_threshold_percent = 20; // Roughly 1/4 of 100%. |
| else if (n_physical_cores == 2) |
| high_encode_usage_threshold_percent = 40; // Roughly 1/4 of 200%. |
| #endif // defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) |
| |
| // Note that we make the interval 2x+epsilon wide, since libyuv scaling steps |
| // are close to that (when squared). This wide interval makes sure that |
| // scaling up or down does not jump all the way across the interval. |
| low_encode_usage_threshold_percent = |
| (high_encode_usage_threshold_percent - 1) / 2; |
| } |
| |
| // Class for calculating the processing usage on the send-side (the average |
| // processing time of a frame divided by the average time difference between |
| // captured frames). |
| class OveruseFrameDetector::SendProcessingUsage { |
| public: |
| explicit SendProcessingUsage(const CpuOveruseOptions& options) |
| : kWeightFactorFrameDiff(0.998f), |
| kWeightFactorProcessing(0.995f), |
| kInitialSampleDiffMs(40.0f), |
| kMaxSampleDiffMs(45.0f), |
| count_(0), |
| options_(options), |
| filtered_processing_ms_(new rtc::ExpFilter(kWeightFactorProcessing)), |
| filtered_frame_diff_ms_(new rtc::ExpFilter(kWeightFactorFrameDiff)) { |
| Reset(); |
| } |
| ~SendProcessingUsage() {} |
| |
| void Reset() { |
| count_ = 0; |
| filtered_frame_diff_ms_->Reset(kWeightFactorFrameDiff); |
| filtered_frame_diff_ms_->Apply(1.0f, kInitialSampleDiffMs); |
| filtered_processing_ms_->Reset(kWeightFactorProcessing); |
| filtered_processing_ms_->Apply(1.0f, InitialProcessingMs()); |
| } |
| |
| void AddCaptureSample(float sample_ms) { |
| float exp = sample_ms / kSampleDiffMs; |
| exp = std::min(exp, kMaxExp); |
| filtered_frame_diff_ms_->Apply(exp, sample_ms); |
| } |
| |
| void AddSample(float processing_ms, int64_t diff_last_sample_ms) { |
| ++count_; |
| float exp = diff_last_sample_ms / kSampleDiffMs; |
| exp = std::min(exp, kMaxExp); |
| filtered_processing_ms_->Apply(exp, processing_ms); |
| } |
| |
| int Value() const { |
| if (count_ < static_cast<uint32_t>(options_.min_frame_samples)) { |
| return static_cast<int>(InitialUsageInPercent() + 0.5f); |
| } |
| float frame_diff_ms = std::max(filtered_frame_diff_ms_->filtered(), 1.0f); |
| frame_diff_ms = std::min(frame_diff_ms, kMaxSampleDiffMs); |
| float encode_usage_percent = |
| 100.0f * filtered_processing_ms_->filtered() / frame_diff_ms; |
| return static_cast<int>(encode_usage_percent + 0.5); |
| } |
| |
| private: |
| float InitialUsageInPercent() const { |
| // Start in between the underuse and overuse threshold. |
| return (options_.low_encode_usage_threshold_percent + |
| options_.high_encode_usage_threshold_percent) / 2.0f; |
| } |
| |
| float InitialProcessingMs() const { |
| return InitialUsageInPercent() * kInitialSampleDiffMs / 100; |
| } |
| |
| const float kWeightFactorFrameDiff; |
| const float kWeightFactorProcessing; |
| const float kInitialSampleDiffMs; |
| const float kMaxSampleDiffMs; |
| uint64_t count_; |
| const CpuOveruseOptions options_; |
| std::unique_ptr<rtc::ExpFilter> filtered_processing_ms_; |
| std::unique_ptr<rtc::ExpFilter> filtered_frame_diff_ms_; |
| }; |
| |
| OveruseFrameDetector::OveruseFrameDetector( |
| Clock* clock, |
| const CpuOveruseOptions& options, |
| CpuOveruseObserver* observer, |
| EncodedFrameObserver* encoder_timing, |
| CpuOveruseMetricsObserver* metrics_observer) |
| : options_(options), |
| observer_(observer), |
| encoder_timing_(encoder_timing), |
| metrics_observer_(metrics_observer), |
| clock_(clock), |
| num_process_times_(0), |
| last_capture_time_ms_(-1), |
| last_processed_capture_time_ms_(-1), |
| num_pixels_(0), |
| next_process_time_ms_(clock_->TimeInMilliseconds()), |
| last_overuse_time_ms_(-1), |
| checks_above_threshold_(0), |
| num_overuse_detections_(0), |
| last_rampup_time_ms_(-1), |
| in_quick_rampup_(false), |
| current_rampup_delay_ms_(kStandardRampUpDelayMs), |
| usage_(new SendProcessingUsage(options)) { |
| RTC_DCHECK(metrics_observer); |
| processing_thread_.DetachFromThread(); |
| } |
| |
| OveruseFrameDetector::~OveruseFrameDetector() { |
| } |
| |
| void OveruseFrameDetector::EncodedFrameTimeMeasured(int encode_duration_ms) { |
| if (!metrics_) |
| metrics_ = rtc::Optional<CpuOveruseMetrics>(CpuOveruseMetrics()); |
| metrics_->encode_usage_percent = usage_->Value(); |
| |
| metrics_observer_->OnEncodedFrameTimeMeasured(encode_duration_ms, *metrics_); |
| } |
| |
| int64_t OveruseFrameDetector::TimeUntilNextProcess() { |
| RTC_DCHECK(processing_thread_.CalledOnValidThread()); |
| return next_process_time_ms_ - clock_->TimeInMilliseconds(); |
| } |
| |
| bool OveruseFrameDetector::FrameSizeChanged(int num_pixels) const { |
| if (num_pixels != num_pixels_) { |
| return true; |
| } |
| return false; |
| } |
| |
| bool OveruseFrameDetector::FrameTimeoutDetected(int64_t now) const { |
| if (last_capture_time_ms_ == -1) |
| return false; |
| return (now - last_capture_time_ms_) > options_.frame_timeout_interval_ms; |
| } |
| |
| void OveruseFrameDetector::ResetAll(int num_pixels) { |
| num_pixels_ = num_pixels; |
| usage_->Reset(); |
| frame_timing_.clear(); |
| last_capture_time_ms_ = -1; |
| last_processed_capture_time_ms_ = -1; |
| num_process_times_ = 0; |
| metrics_ = rtc::Optional<CpuOveruseMetrics>(); |
| } |
| |
| void OveruseFrameDetector::FrameCaptured(const VideoFrame& frame) { |
| rtc::CritScope cs(&crit_); |
| |
| int64_t now = clock_->TimeInMilliseconds(); |
| if (FrameSizeChanged(frame.width() * frame.height()) || |
| FrameTimeoutDetected(now)) { |
| ResetAll(frame.width() * frame.height()); |
| } |
| |
| if (last_capture_time_ms_ != -1) |
| usage_->AddCaptureSample(now - last_capture_time_ms_); |
| |
| last_capture_time_ms_ = now; |
| |
| frame_timing_.push_back( |
| FrameTiming(frame.ntp_time_ms(), frame.timestamp(), now)); |
| } |
| |
| void OveruseFrameDetector::FrameSent(uint32_t timestamp) { |
| rtc::CritScope cs(&crit_); |
| // Delay before reporting actual encoding time, used to have the ability to |
| // detect total encoding time when encoding more than one layer. Encoding is |
| // here assumed to finish within a second (or that we get enough long-time |
| // samples before one second to trigger an overuse even when this is not the |
| // case). |
| static const int64_t kEncodingTimeMeasureWindowMs = 1000; |
| int64_t now = clock_->TimeInMilliseconds(); |
| for (auto& it : frame_timing_) { |
| if (it.timestamp == timestamp) { |
| it.last_send_ms = now; |
| break; |
| } |
| } |
| // TODO(pbos): Handle the case/log errors when not finding the corresponding |
| // frame (either very slow encoding or incorrect wrong timestamps returned |
| // from the encoder). |
| // This is currently the case for all frames on ChromeOS, so logging them |
| // would be spammy, and triggering overuse would be wrong. |
| // https://crbug.com/350106 |
| while (!frame_timing_.empty()) { |
| FrameTiming timing = frame_timing_.front(); |
| if (now - timing.capture_ms < kEncodingTimeMeasureWindowMs) |
| break; |
| if (timing.last_send_ms != -1) { |
| int encode_duration_ms = |
| static_cast<int>(timing.last_send_ms - timing.capture_ms); |
| if (encoder_timing_) { |
| encoder_timing_->OnEncodeTiming(timing.capture_ntp_ms, |
| encode_duration_ms); |
| } |
| if (last_processed_capture_time_ms_ != -1) { |
| int64_t diff_ms = timing.capture_ms - last_processed_capture_time_ms_; |
| usage_->AddSample(encode_duration_ms, diff_ms); |
| } |
| last_processed_capture_time_ms_ = timing.capture_ms; |
| EncodedFrameTimeMeasured(encode_duration_ms); |
| } |
| frame_timing_.pop_front(); |
| } |
| } |
| |
| void OveruseFrameDetector::Process() { |
| RTC_DCHECK(processing_thread_.CalledOnValidThread()); |
| |
| int64_t now = clock_->TimeInMilliseconds(); |
| |
| // Used to protect against Process() being called too often. |
| if (now < next_process_time_ms_) |
| return; |
| |
| next_process_time_ms_ = now + kProcessIntervalMs; |
| |
| CpuOveruseMetrics current_metrics; |
| { |
| rtc::CritScope cs(&crit_); |
| ++num_process_times_; |
| if (num_process_times_ <= options_.min_process_count || !metrics_) |
| return; |
| |
| current_metrics = *metrics_; |
| } |
| |
| if (IsOverusing(current_metrics)) { |
| // If the last thing we did was going up, and now have to back down, we need |
| // to check if this peak was short. If so we should back off to avoid going |
| // back and forth between this load, the system doesn't seem to handle it. |
| bool check_for_backoff = last_rampup_time_ms_ > last_overuse_time_ms_; |
| if (check_for_backoff) { |
| if (now - last_rampup_time_ms_ < kStandardRampUpDelayMs || |
| num_overuse_detections_ > kMaxOverusesBeforeApplyRampupDelay) { |
| // Going up was not ok for very long, back off. |
| current_rampup_delay_ms_ *= kRampUpBackoffFactor; |
| if (current_rampup_delay_ms_ > kMaxRampUpDelayMs) |
| current_rampup_delay_ms_ = kMaxRampUpDelayMs; |
| } else { |
| // Not currently backing off, reset rampup delay. |
| current_rampup_delay_ms_ = kStandardRampUpDelayMs; |
| } |
| } |
| |
| last_overuse_time_ms_ = now; |
| in_quick_rampup_ = false; |
| checks_above_threshold_ = 0; |
| ++num_overuse_detections_; |
| |
| if (observer_) |
| observer_->OveruseDetected(); |
| } else if (IsUnderusing(current_metrics, now)) { |
| last_rampup_time_ms_ = now; |
| in_quick_rampup_ = true; |
| |
| if (observer_) |
| observer_->NormalUsage(); |
| } |
| |
| int rampup_delay = |
| in_quick_rampup_ ? kQuickRampUpDelayMs : current_rampup_delay_ms_; |
| |
| LOG(LS_VERBOSE) << " Frame stats: " |
| << " encode usage " << current_metrics.encode_usage_percent |
| << " overuse detections " << num_overuse_detections_ |
| << " rampup delay " << rampup_delay; |
| } |
| |
| bool OveruseFrameDetector::IsOverusing(const CpuOveruseMetrics& metrics) { |
| if (metrics.encode_usage_percent >= |
| options_.high_encode_usage_threshold_percent) { |
| ++checks_above_threshold_; |
| } else { |
| checks_above_threshold_ = 0; |
| } |
| return checks_above_threshold_ >= options_.high_threshold_consecutive_count; |
| } |
| |
| bool OveruseFrameDetector::IsUnderusing(const CpuOveruseMetrics& metrics, |
| int64_t time_now) { |
| int delay = in_quick_rampup_ ? kQuickRampUpDelayMs : current_rampup_delay_ms_; |
| if (time_now < last_rampup_time_ms_ + delay) |
| return false; |
| |
| return metrics.encode_usage_percent < |
| options_.low_encode_usage_threshold_percent; |
| } |
| } // namespace webrtc |