383 lines
13 KiB
C++
383 lines
13 KiB
C++
/*
|
|
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "webrtc/video/overuse_frame_detector.h"
|
|
|
|
#include <assert.h>
|
|
#include <math.h>
|
|
|
|
#include <algorithm>
|
|
#include <list>
|
|
#include <map>
|
|
|
|
#include "webrtc/base/checks.h"
|
|
#include "webrtc/base/exp_filter.h"
|
|
#include "webrtc/base/logging.h"
|
|
#include "webrtc/common_video/include/frame_callback.h"
|
|
#include "webrtc/system_wrappers/include/clock.h"
|
|
#include "webrtc/video_frame.h"
|
|
|
|
#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
|
|
#include <mach/mach.h>
|
|
#endif // defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
|
|
|
|
namespace webrtc {
|
|
|
|
namespace {
|
|
const int64_t kProcessIntervalMs = 5000;
|
|
|
|
// Delay between consecutive rampups. (Used for quick recovery.)
|
|
const int kQuickRampUpDelayMs = 10 * 1000;
|
|
// Delay between rampup attempts. Initially uses standard, scales up to max.
|
|
const int kStandardRampUpDelayMs = 40 * 1000;
|
|
const int kMaxRampUpDelayMs = 240 * 1000;
|
|
// Expontential back-off factor, to prevent annoying up-down behaviour.
|
|
const double kRampUpBackoffFactor = 2.0;
|
|
|
|
// Max number of overuses detected before always applying the rampup delay.
|
|
const int kMaxOverusesBeforeApplyRampupDelay = 4;
|
|
|
|
// The maximum exponent to use in VCMExpFilter.
|
|
const float kSampleDiffMs = 33.0f;
|
|
const float kMaxExp = 7.0f;
|
|
|
|
} // namespace
|
|
|
|
CpuOveruseOptions::CpuOveruseOptions()
|
|
: high_encode_usage_threshold_percent(85),
|
|
frame_timeout_interval_ms(1500),
|
|
min_frame_samples(120),
|
|
min_process_count(3),
|
|
high_threshold_consecutive_count(2) {
|
|
#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
|
|
// This is proof-of-concept code for letting the physical core count affect
|
|
// the interval into which we attempt to scale. For now, the code is Mac OS
|
|
// specific, since that's the platform were we saw most problems.
|
|
// TODO(torbjorng): Enhance SystemInfo to return this metric.
|
|
|
|
mach_port_t mach_host = mach_host_self();
|
|
host_basic_info hbi = {};
|
|
mach_msg_type_number_t info_count = HOST_BASIC_INFO_COUNT;
|
|
kern_return_t kr =
|
|
host_info(mach_host, HOST_BASIC_INFO, reinterpret_cast<host_info_t>(&hbi),
|
|
&info_count);
|
|
mach_port_deallocate(mach_task_self(), mach_host);
|
|
|
|
int n_physical_cores;
|
|
if (kr != KERN_SUCCESS) {
|
|
// If we couldn't get # of physical CPUs, don't panic. Assume we have 1.
|
|
n_physical_cores = 1;
|
|
LOG(LS_ERROR) << "Failed to determine number of physical cores, assuming 1";
|
|
} else {
|
|
n_physical_cores = hbi.physical_cpu;
|
|
LOG(LS_INFO) << "Number of physical cores:" << n_physical_cores;
|
|
}
|
|
|
|
// Change init list default for few core systems. The assumption here is that
|
|
// encoding, which we measure here, takes about 1/4 of the processing of a
|
|
// two-way call. This is roughly true for x86 using both vp8 and vp9 without
|
|
// hardware encoding. Since we don't affect the incoming stream here, we only
|
|
// control about 1/2 of the total processing needs, but this is not taken into
|
|
// account.
|
|
if (n_physical_cores == 1)
|
|
high_encode_usage_threshold_percent = 20; // Roughly 1/4 of 100%.
|
|
else if (n_physical_cores == 2)
|
|
high_encode_usage_threshold_percent = 40; // Roughly 1/4 of 200%.
|
|
#endif // defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
|
|
|
|
// Note that we make the interval 2x+epsilon wide, since libyuv scaling steps
|
|
// are close to that (when squared). This wide interval makes sure that
|
|
// scaling up or down does not jump all the way across the interval.
|
|
low_encode_usage_threshold_percent =
|
|
(high_encode_usage_threshold_percent - 1) / 2;
|
|
}
|
|
|
|
// Class for calculating the processing usage on the send-side (the average
|
|
// processing time of a frame divided by the average time difference between
|
|
// captured frames).
|
|
class OveruseFrameDetector::SendProcessingUsage {
|
|
public:
|
|
explicit SendProcessingUsage(const CpuOveruseOptions& options)
|
|
: kWeightFactorFrameDiff(0.998f),
|
|
kWeightFactorProcessing(0.995f),
|
|
kInitialSampleDiffMs(40.0f),
|
|
kMaxSampleDiffMs(45.0f),
|
|
count_(0),
|
|
options_(options),
|
|
filtered_processing_ms_(new rtc::ExpFilter(kWeightFactorProcessing)),
|
|
filtered_frame_diff_ms_(new rtc::ExpFilter(kWeightFactorFrameDiff)) {
|
|
Reset();
|
|
}
|
|
~SendProcessingUsage() {}
|
|
|
|
void Reset() {
|
|
count_ = 0;
|
|
filtered_frame_diff_ms_->Reset(kWeightFactorFrameDiff);
|
|
filtered_frame_diff_ms_->Apply(1.0f, kInitialSampleDiffMs);
|
|
filtered_processing_ms_->Reset(kWeightFactorProcessing);
|
|
filtered_processing_ms_->Apply(1.0f, InitialProcessingMs());
|
|
}
|
|
|
|
void AddCaptureSample(float sample_ms) {
|
|
float exp = sample_ms / kSampleDiffMs;
|
|
exp = std::min(exp, kMaxExp);
|
|
filtered_frame_diff_ms_->Apply(exp, sample_ms);
|
|
}
|
|
|
|
void AddSample(float processing_ms, int64_t diff_last_sample_ms) {
|
|
++count_;
|
|
float exp = diff_last_sample_ms / kSampleDiffMs;
|
|
exp = std::min(exp, kMaxExp);
|
|
filtered_processing_ms_->Apply(exp, processing_ms);
|
|
}
|
|
|
|
int Value() const {
|
|
if (count_ < static_cast<uint32_t>(options_.min_frame_samples)) {
|
|
return static_cast<int>(InitialUsageInPercent() + 0.5f);
|
|
}
|
|
float frame_diff_ms = std::max(filtered_frame_diff_ms_->filtered(), 1.0f);
|
|
frame_diff_ms = std::min(frame_diff_ms, kMaxSampleDiffMs);
|
|
float encode_usage_percent =
|
|
100.0f * filtered_processing_ms_->filtered() / frame_diff_ms;
|
|
return static_cast<int>(encode_usage_percent + 0.5);
|
|
}
|
|
|
|
private:
|
|
float InitialUsageInPercent() const {
|
|
// Start in between the underuse and overuse threshold.
|
|
return (options_.low_encode_usage_threshold_percent +
|
|
options_.high_encode_usage_threshold_percent) / 2.0f;
|
|
}
|
|
|
|
float InitialProcessingMs() const {
|
|
return InitialUsageInPercent() * kInitialSampleDiffMs / 100;
|
|
}
|
|
|
|
const float kWeightFactorFrameDiff;
|
|
const float kWeightFactorProcessing;
|
|
const float kInitialSampleDiffMs;
|
|
const float kMaxSampleDiffMs;
|
|
uint64_t count_;
|
|
const CpuOveruseOptions options_;
|
|
std::unique_ptr<rtc::ExpFilter> filtered_processing_ms_;
|
|
std::unique_ptr<rtc::ExpFilter> filtered_frame_diff_ms_;
|
|
};
|
|
|
|
OveruseFrameDetector::OveruseFrameDetector(
|
|
Clock* clock,
|
|
const CpuOveruseOptions& options,
|
|
CpuOveruseObserver* observer,
|
|
EncodedFrameObserver* encoder_timing,
|
|
CpuOveruseMetricsObserver* metrics_observer)
|
|
: options_(options),
|
|
observer_(observer),
|
|
encoder_timing_(encoder_timing),
|
|
metrics_observer_(metrics_observer),
|
|
clock_(clock),
|
|
num_process_times_(0),
|
|
last_capture_time_ms_(-1),
|
|
last_processed_capture_time_ms_(-1),
|
|
num_pixels_(0),
|
|
next_process_time_ms_(clock_->TimeInMilliseconds()),
|
|
last_overuse_time_ms_(-1),
|
|
checks_above_threshold_(0),
|
|
num_overuse_detections_(0),
|
|
last_rampup_time_ms_(-1),
|
|
in_quick_rampup_(false),
|
|
current_rampup_delay_ms_(kStandardRampUpDelayMs),
|
|
usage_(new SendProcessingUsage(options)) {
|
|
RTC_DCHECK(metrics_observer);
|
|
processing_thread_.DetachFromThread();
|
|
}
|
|
|
|
OveruseFrameDetector::~OveruseFrameDetector() {
|
|
}
|
|
|
|
void OveruseFrameDetector::EncodedFrameTimeMeasured(int encode_duration_ms) {
|
|
if (!metrics_)
|
|
metrics_ = rtc::Optional<CpuOveruseMetrics>(CpuOveruseMetrics());
|
|
metrics_->encode_usage_percent = usage_->Value();
|
|
|
|
metrics_observer_->OnEncodedFrameTimeMeasured(encode_duration_ms, *metrics_);
|
|
}
|
|
|
|
int64_t OveruseFrameDetector::TimeUntilNextProcess() {
|
|
RTC_DCHECK(processing_thread_.CalledOnValidThread());
|
|
return next_process_time_ms_ - clock_->TimeInMilliseconds();
|
|
}
|
|
|
|
bool OveruseFrameDetector::FrameSizeChanged(int num_pixels) const {
|
|
if (num_pixels != num_pixels_) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool OveruseFrameDetector::FrameTimeoutDetected(int64_t now) const {
|
|
if (last_capture_time_ms_ == -1)
|
|
return false;
|
|
return (now - last_capture_time_ms_) > options_.frame_timeout_interval_ms;
|
|
}
|
|
|
|
void OveruseFrameDetector::ResetAll(int num_pixels) {
|
|
num_pixels_ = num_pixels;
|
|
usage_->Reset();
|
|
frame_timing_.clear();
|
|
last_capture_time_ms_ = -1;
|
|
last_processed_capture_time_ms_ = -1;
|
|
num_process_times_ = 0;
|
|
metrics_ = rtc::Optional<CpuOveruseMetrics>();
|
|
}
|
|
|
|
void OveruseFrameDetector::FrameCaptured(const VideoFrame& frame) {
|
|
rtc::CritScope cs(&crit_);
|
|
|
|
int64_t now = clock_->TimeInMilliseconds();
|
|
if (FrameSizeChanged(frame.width() * frame.height()) ||
|
|
FrameTimeoutDetected(now)) {
|
|
ResetAll(frame.width() * frame.height());
|
|
}
|
|
|
|
if (last_capture_time_ms_ != -1)
|
|
usage_->AddCaptureSample(now - last_capture_time_ms_);
|
|
|
|
last_capture_time_ms_ = now;
|
|
|
|
frame_timing_.push_back(
|
|
FrameTiming(frame.ntp_time_ms(), frame.timestamp(), now));
|
|
}
|
|
|
|
void OveruseFrameDetector::FrameSent(uint32_t timestamp) {
|
|
rtc::CritScope cs(&crit_);
|
|
// Delay before reporting actual encoding time, used to have the ability to
|
|
// detect total encoding time when encoding more than one layer. Encoding is
|
|
// here assumed to finish within a second (or that we get enough long-time
|
|
// samples before one second to trigger an overuse even when this is not the
|
|
// case).
|
|
static const int64_t kEncodingTimeMeasureWindowMs = 1000;
|
|
int64_t now = clock_->TimeInMilliseconds();
|
|
for (auto& it : frame_timing_) {
|
|
if (it.timestamp == timestamp) {
|
|
it.last_send_ms = now;
|
|
break;
|
|
}
|
|
}
|
|
// TODO(pbos): Handle the case/log errors when not finding the corresponding
|
|
// frame (either very slow encoding or incorrect wrong timestamps returned
|
|
// from the encoder).
|
|
// This is currently the case for all frames on ChromeOS, so logging them
|
|
// would be spammy, and triggering overuse would be wrong.
|
|
// https://crbug.com/350106
|
|
while (!frame_timing_.empty()) {
|
|
FrameTiming timing = frame_timing_.front();
|
|
if (now - timing.capture_ms < kEncodingTimeMeasureWindowMs)
|
|
break;
|
|
if (timing.last_send_ms != -1) {
|
|
int encode_duration_ms =
|
|
static_cast<int>(timing.last_send_ms - timing.capture_ms);
|
|
if (encoder_timing_) {
|
|
encoder_timing_->OnEncodeTiming(timing.capture_ntp_ms,
|
|
encode_duration_ms);
|
|
}
|
|
if (last_processed_capture_time_ms_ != -1) {
|
|
int64_t diff_ms = timing.capture_ms - last_processed_capture_time_ms_;
|
|
usage_->AddSample(encode_duration_ms, diff_ms);
|
|
}
|
|
last_processed_capture_time_ms_ = timing.capture_ms;
|
|
EncodedFrameTimeMeasured(encode_duration_ms);
|
|
}
|
|
frame_timing_.pop_front();
|
|
}
|
|
}
|
|
|
|
void OveruseFrameDetector::Process() {
|
|
RTC_DCHECK(processing_thread_.CalledOnValidThread());
|
|
|
|
int64_t now = clock_->TimeInMilliseconds();
|
|
|
|
// Used to protect against Process() being called too often.
|
|
if (now < next_process_time_ms_)
|
|
return;
|
|
|
|
next_process_time_ms_ = now + kProcessIntervalMs;
|
|
|
|
CpuOveruseMetrics current_metrics;
|
|
{
|
|
rtc::CritScope cs(&crit_);
|
|
++num_process_times_;
|
|
if (num_process_times_ <= options_.min_process_count || !metrics_)
|
|
return;
|
|
|
|
current_metrics = *metrics_;
|
|
}
|
|
|
|
if (IsOverusing(current_metrics)) {
|
|
// If the last thing we did was going up, and now have to back down, we need
|
|
// to check if this peak was short. If so we should back off to avoid going
|
|
// back and forth between this load, the system doesn't seem to handle it.
|
|
bool check_for_backoff = last_rampup_time_ms_ > last_overuse_time_ms_;
|
|
if (check_for_backoff) {
|
|
if (now - last_rampup_time_ms_ < kStandardRampUpDelayMs ||
|
|
num_overuse_detections_ > kMaxOverusesBeforeApplyRampupDelay) {
|
|
// Going up was not ok for very long, back off.
|
|
current_rampup_delay_ms_ *= kRampUpBackoffFactor;
|
|
if (current_rampup_delay_ms_ > kMaxRampUpDelayMs)
|
|
current_rampup_delay_ms_ = kMaxRampUpDelayMs;
|
|
} else {
|
|
// Not currently backing off, reset rampup delay.
|
|
current_rampup_delay_ms_ = kStandardRampUpDelayMs;
|
|
}
|
|
}
|
|
|
|
last_overuse_time_ms_ = now;
|
|
in_quick_rampup_ = false;
|
|
checks_above_threshold_ = 0;
|
|
++num_overuse_detections_;
|
|
|
|
if (observer_)
|
|
observer_->OveruseDetected();
|
|
} else if (IsUnderusing(current_metrics, now)) {
|
|
last_rampup_time_ms_ = now;
|
|
in_quick_rampup_ = true;
|
|
|
|
if (observer_)
|
|
observer_->NormalUsage();
|
|
}
|
|
|
|
int rampup_delay =
|
|
in_quick_rampup_ ? kQuickRampUpDelayMs : current_rampup_delay_ms_;
|
|
|
|
LOG(LS_VERBOSE) << " Frame stats: "
|
|
<< " encode usage " << current_metrics.encode_usage_percent
|
|
<< " overuse detections " << num_overuse_detections_
|
|
<< " rampup delay " << rampup_delay;
|
|
}
|
|
|
|
bool OveruseFrameDetector::IsOverusing(const CpuOveruseMetrics& metrics) {
|
|
if (metrics.encode_usage_percent >=
|
|
options_.high_encode_usage_threshold_percent) {
|
|
++checks_above_threshold_;
|
|
} else {
|
|
checks_above_threshold_ = 0;
|
|
}
|
|
return checks_above_threshold_ >= options_.high_threshold_consecutive_count;
|
|
}
|
|
|
|
bool OveruseFrameDetector::IsUnderusing(const CpuOveruseMetrics& metrics,
|
|
int64_t time_now) {
|
|
int delay = in_quick_rampup_ ? kQuickRampUpDelayMs : current_rampup_delay_ms_;
|
|
if (time_now < last_rampup_time_ms_ + delay)
|
|
return false;
|
|
|
|
return metrics.encode_usage_percent <
|
|
options_.low_encode_usage_threshold_percent;
|
|
}
|
|
} // namespace webrtc
|