blob: 9cd0c257300f3f5651ded0391d63c5e960dd80c8 [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "ui/latency/frame_metrics.h"
#include <cmath>
#include <limits>
#include <vector>
#include "base/bit_cast.h"
#include "base/trace_event/trace_event.h"
#include "base/trace_event/traced_value.h"
namespace ui {
namespace {
// How often to report results.
// This needs to be short enough to avoid overflow in the accumulators.
constexpr base::TimeDelta kDefaultReportPeriod =
base::TimeDelta::FromSeconds(1);
// Gives the histogram for skips the highest precision just above a
// skipped:produced ratio of 1.
constexpr int64_t kFixedPointMultiplierSkips =
frame_metrics::kFixedPointMultiplier;
// Gives latency a precision of 1 microsecond in both the histogram and
// the fixed point values.
constexpr int64_t kFixedPointMultiplierLatency = 1;
// This is used to weigh each latency sample by a constant value since
// we don't weigh it by the frame duration like other metrics.
// A larger weight improves precision in the fixed point accumulators, but we
// don't want to make it so big that it causes overflow before we start a new
// reporting period.
constexpr uint32_t kLatencySampleWeight = 1u << 10;
constexpr uint32_t kMaxFramesBeforeOverflowPossible =
std::numeric_limits<uint32_t>::max() / kLatencySampleWeight;
// Gives the histogram for latency speed the highest precision just above a
// (latency delta : frame delta) ratio of 1.
constexpr int64_t kFixedPointMultiplierLatencySpeed =
frame_metrics::kFixedPointMultiplier;
// Gives the histogram for latency acceleration the highest precision just
// above a (latency speed delta : frame delta) of 1/1024.
// A value ~1k was chosen since frame deltas are on the order of microseconds.
// Use 1024 instead of 1000 since powers of 2 let the compiler optimize integer
// multiplies with shifts if it wants.
// TODO(brianderson): Fine tune these values. http://crbug.com/837434
constexpr int64_t kFixedPointMultiplierLatencyAcceleration =
frame_metrics::kFixedPointMultiplier * 1024;
// Converts a ratio to a fixed point value.
// Each threshold is offset by 0.5 to filter out jitter/inaccuracies.
constexpr uint32_t RatioThreshold(double fraction) {
return static_cast<uint32_t>((fraction + 0.5) *
frame_metrics::kFixedPointMultiplier);
}
// Converts frequency as a floating point value into a fixed point value
// representing microseconds of latency.
// The result is scaled by 110% to allow for slack in cases the actual refresh
// period is slightly longer (common) or if there is some jitter in the
// timestamp sampling.
constexpr uint32_t LatencyThreshold(double Hz) {
return static_cast<uint32_t>((1.1 / Hz) *
base::TimeTicks::kMicrosecondsPerSecond);
}
// The skip thresholds are selected to track each time more than 0, 1, 2, or 4
// frames were skipped at once.
constexpr std::initializer_list<uint32_t> kSkipThresholds = {
RatioThreshold(0), RatioThreshold(1), RatioThreshold(2), RatioThreshold(4),
};
// The latency thresholds are selected based on common display frequencies.
// We often begin a frames on a vsync which would result in whole vsync periods
// of latency. However, in case begin frames are offset slightly from the vsync,
// which is common on Android, the frequency goes all the way to 240Hz.
constexpr std::initializer_list<uint32_t> kLatencyThresholds = {
LatencyThreshold(240), // 4.17 ms * 110% = 4.58 ms
LatencyThreshold(120), // 8.33 ms * 110% = 9.17 ms
LatencyThreshold(60), // 16.67 ms * 110% = 18.33 ms
LatencyThreshold(30), // 33.33 ms * 110% = 36.67 ms
};
// The latency speed thresholds are chosen to track each frame where the
// latency was constant (0) or when there was a jump of 1, 2, or 4 frame
// periods.
constexpr std::initializer_list<uint32_t> kLatencySpeedThresholds = {
RatioThreshold(0), RatioThreshold(1), RatioThreshold(2), RatioThreshold(4),
};
// The latency acceleration thresholds here are tentative.
// TODO(brianderson): Fine tune these values. http://crbug.com/837434
constexpr std::initializer_list<uint32_t> kLatencyAccelerationThresholds = {
RatioThreshold(0), RatioThreshold(1), RatioThreshold(2), RatioThreshold(4),
};
constexpr const char kTraceCategories[] = "gpu,benchmark";
// uint32_t should be plenty of range for real world values, but clip individual
// entries to make sure no single value dominates and also to avoid overflow
// in the accumulators and the fixed point math.
// This also makes sure overflowing values saturate instead of wrapping around
// and skewing our results.
// TODO(brianderson): Report warning if clipping occurred.
uint32_t CapValue(int64_t value) {
return static_cast<uint32_t>(std::min<int64_t>(
std::llabs(value), std::numeric_limits<uint32_t>::max()));
}
uint32_t CapDuration(const base::TimeDelta duration) {
constexpr base::TimeDelta kDurationCap = base::TimeDelta::FromMinutes(1);
return std::min(duration, kDurationCap).InMicroseconds();
}
const char* ToString(FrameMetricsSource source) {
switch (source) {
case FrameMetricsSource::UnitTest:
return "UnitTest";
case FrameMetricsSource::RendererCompositor:
return "RendererCompositor";
case FrameMetricsSource::UiCompositor:
return "UiCompositor";
case FrameMetricsSource::Unknown:
break;
};
return "Unknown";
}
const char* ToString(FrameMetricsSourceThread thread) {
switch (thread) {
case FrameMetricsSourceThread::Blink:
return "Blink";
case FrameMetricsSourceThread::RendererCompositor:
return "RendererCompositor";
case FrameMetricsSourceThread::Ui:
return "Ui";
case FrameMetricsSourceThread::UiCompositor:
return "UiCompositor";
case FrameMetricsSourceThread::VizCompositor:
return "VizCompositor";
case FrameMetricsSourceThread::Unknown:
break;
}
return "Unknown";
}
const char* ToString(FrameMetricsCompileTarget target) {
switch (target) {
case FrameMetricsCompileTarget::Chromium:
return "Chromium";
case FrameMetricsCompileTarget::SynchronousCompositor:
return "SynchronousCompositor";
case FrameMetricsCompileTarget::Headless:
return "Headless";
case FrameMetricsCompileTarget::Unknown:
break;
}
return "Unknown";
}
} // namespace
void FrameMetricsSettings::AsValueInto(
base::trace_event::TracedValue* state) const {
state->SetString("source", ToString(source));
state->SetString("thread", ToString(source_thread));
state->SetString("compile_target", ToString(compile_target));
}
namespace frame_metrics {
// Converts result to fraction of frames skipped.
// The internal skip values are (skipped:produced). This transform converts
// the result to (skipped:total), which is:
// a) Easier to interpret as a human, and
// b) In the same units as latency speed, which may help us create a unified
// smoothness metric in the future.
// The internal representation uses (skipped:produced) to:
// a) Allow RMS, SMR, StdDev, etc to be performed on values that increase
// linearly (rather than asymptotically to 1) with the amount of jank, and
// b) Give us better precision where it's important when stored as a fixed
// point number and in histogram buckets.
double SkipClient::TransformResult(double result) const {
// Avoid divide by zero.
if (result < 1e-32)
return 0;
return 1.0 / (1.0 + (kFixedPointMultiplierSkips / result));
}
// Converts result to seconds.
double LatencyClient::TransformResult(double result) const {
return result / (base::TimeTicks::kMicrosecondsPerSecond *
kFixedPointMultiplierLatency);
}
// Converts result to s/s. ie: fraction of frames traveled.
double LatencySpeedClient::TransformResult(double result) const {
return result / kFixedPointMultiplierLatencySpeed;
}
// Converts result to (s/s^2).
// ie: change in fraction of frames traveled per second.
double LatencyAccelerationClient::TransformResult(double result) const {
return (result * base::TimeTicks::kMicrosecondsPerSecond) /
kFixedPointMultiplierLatencyAcceleration;
}
} // namespace frame_metrics
FrameMetrics::FrameMetrics(FrameMetricsSettings settings)
: settings_(settings),
shared_skip_client_(settings_.max_window_size),
shared_latency_client_(settings_.max_window_size),
frame_skips_analyzer_(&skip_client_,
&shared_skip_client_,
kSkipThresholds,
std::make_unique<frame_metrics::RatioHistogram>()),
latency_analyzer_(&latency_client_,
&shared_latency_client_,
kLatencyThresholds,
std::make_unique<frame_metrics::VSyncHistogram>()),
latency_speed_analyzer_(
&latency_speed_client_,
&shared_latency_client_,
kLatencySpeedThresholds,
std::make_unique<frame_metrics::RatioHistogram>()),
latency_acceleration_analyzer_(
&latency_acceleration_client_,
&shared_latency_client_,
kLatencyAccelerationThresholds,
std::make_unique<frame_metrics::RatioHistogram>()) {}
FrameMetrics::~FrameMetrics() = default;
base::TimeDelta FrameMetrics::ReportPeriod() {
return kDefaultReportPeriod;
}
void FrameMetrics::AddFrameProduced(base::TimeTicks source_timestamp,
base::TimeDelta amount_produced,
base::TimeDelta amount_skipped) {
DCHECK_GE(amount_skipped, base::TimeDelta());
DCHECK_GT(amount_produced, base::TimeDelta());
base::TimeDelta source_timestamp_delta;
if (!skip_timestamp_queue_.empty()) {
source_timestamp_delta = source_timestamp - skip_timestamp_queue_.back();
DCHECK_GT(source_timestamp_delta, base::TimeDelta());
}
// Periodically report all metrics and reset the accumulators.
// Do this before adding any samples to avoid overflow before it might happen.
time_since_start_of_report_period_ += source_timestamp_delta;
frames_produced_since_start_of_report_period_++;
if (time_since_start_of_report_period_ > ReportPeriod() ||
frames_produced_since_start_of_report_period_ >
kMaxFramesBeforeOverflowPossible) {
StartNewReportPeriod();
}
if (skip_timestamp_queue_.size() >= settings_.max_window_size) {
skip_timestamp_queue_.pop_front();
}
skip_timestamp_queue_.push_back(source_timestamp);
shared_skip_client_.window_begin = skip_timestamp_queue_.front();
shared_skip_client_.window_end = source_timestamp;
int64_t skipped_to_produced_ratio =
(amount_skipped * kFixedPointMultiplierSkips) / amount_produced;
DCHECK_GE(skipped_to_produced_ratio, 0);
frame_skips_analyzer_.AddSample(CapValue(skipped_to_produced_ratio),
CapDuration(amount_produced));
}
void FrameMetrics::AddFrameDisplayed(base::TimeTicks source_timestamp,
base::TimeTicks display_timestamp) {
TRACE_EVENT0(kTraceCategories, "AddFrameDisplayed");
// Frame timestamps shouldn't go back in time, but check and drop them just
// in case. Much of the code assumes a positive and non-zero delta.
if (source_timestamp <= source_timestamp_prev_) {
// TODO(brianderson): Flag a warning.
return;
}
base::TimeDelta latency = display_timestamp - source_timestamp;
if (latency_timestamp_queue_.size() >= settings_.max_window_size) {
latency_timestamp_queue_.pop_front();
}
latency_timestamp_queue_.push_back(source_timestamp);
shared_latency_client_.window_begin = latency_timestamp_queue_.front();
shared_latency_client_.window_end = source_timestamp;
// TODO(brianderson): Handle negative latency better.
// For now, reporting the magnitude of the latency will reflect
// how far off the ideal display time the frame was, but it won't indicate
// in which direction. This might be important for sources like video, where
// a frame might be displayed a little bit earlier than its ideal display
// time.
int64_t latency_value =
latency.InMicroseconds() * kFixedPointMultiplierLatency;
latency_analyzer_.AddSample(CapValue(latency_value), kLatencySampleWeight);
base::TimeDelta latency_delta = latency - latency_prev_;
base::TimeDelta source_duration = source_timestamp - source_timestamp_prev_;
// Only calculate speed if there's enough history.
if (latencies_added_ >= 1 && settings_.is_frame_latency_speed_on()) {
int64_t latency_velocity =
(latency_delta * kFixedPointMultiplierLatencySpeed) / source_duration;
// This should be plenty of range for real world values, but clip
// entries to avoid overflow in the accumulators just in case.
latency_speed_analyzer_.AddSample(CapValue(latency_velocity),
CapDuration(source_duration));
}
// Only calculate acceleration if there's enough history.
if (latencies_added_ >= 2 && settings_.is_frame_latency_acceleration_on()) {
base::TimeDelta source_duration_average =
(source_duration + source_duration_prev_) / 2;
int64_t latency_acceleration =
(((latency_delta * kFixedPointMultiplierLatencyAcceleration) /
source_duration) -
((latency_delta_prev_ * kFixedPointMultiplierLatencyAcceleration) /
source_duration_prev_)) /
source_duration_average.InMicroseconds();
latency_acceleration_analyzer_.AddSample(
CapValue(latency_acceleration), CapDuration(source_duration_average));
}
// Update history.
if (latencies_added_ >= 1) {
source_duration_prev_ = source_duration;
latency_delta_prev_ = latency_delta;
}
source_timestamp_prev_ = source_timestamp;
latency_prev_ = latency;
latencies_added_++;
}
void FrameMetrics::Reset() {
TRACE_EVENT0(kTraceCategories, "FrameMetrics::Reset");
skip_timestamp_queue_.clear();
latency_timestamp_queue_.clear();
time_since_start_of_report_period_ = base::TimeDelta();
latencies_added_ = 0;
source_timestamp_prev_ = base::TimeTicks();
latency_prev_ = base::TimeDelta();
source_duration_prev_ = base::TimeDelta();
latency_delta_prev_ = base::TimeDelta();
frame_skips_analyzer_.Reset();
latency_analyzer_.Reset();
if (settings_.is_frame_latency_speed_on())
latency_speed_analyzer_.Reset();
if (settings_.is_frame_latency_acceleration_on())
latency_acceleration_analyzer_.Reset();
}
// Reset analyzers, but don't reset resent latency history so we can get
// latency speed and acceleration values immediately.
// TODO(brianderson): Once we support UKM reporting, store the frame skips
// result and defer it's reporting until the latency numbers are also
// available. Reporting everything at this point would put some frames in
// different reporting periods, which could skew the results.
void FrameMetrics::StartNewReportPeriod() {
TRACE_EVENT0(kTraceCategories, "FrameMetrics::StartNewReportPeriod");
bool tracing_enabled = 0;
TRACE_EVENT_CATEGORY_GROUP_ENABLED(kTraceCategories, &tracing_enabled);
if (tracing_enabled)
TraceStats();
time_since_start_of_report_period_ = base::TimeDelta();
frames_produced_since_start_of_report_period_ = 0;
frame_skips_analyzer_.StartNewReportPeriod();
latency_analyzer_.StartNewReportPeriod();
latency_speed_analyzer_.StartNewReportPeriod();
latency_acceleration_analyzer_.StartNewReportPeriod();
}
double FrameMetrics::FastApproximateSqrt(double x) {
if (x <= 0)
return 0;
// Basically performs x*fastinvSqrt(x) - using high precision (3 steps)
double y = x;
double xhalf = 0.5f * x;
float xf = static_cast<float>(x);
int32_t i = bit_cast<int32_t>(xf);
// Magic Number for initial guess. Reference:
// http://www.lomont.org/Math/Papers/2003/InvSqrt.pdf.
i = 0x5f3759df - (i >> 1);
x = static_cast<double>(bit_cast<float>(i));
// Newton step.
x = x * (1.5 - xhalf * x * x);
// Newton step.
x = x * (1.5 - xhalf * x * x);
// Newton step.
x = x * (1.5 - xhalf * x * x);
return y * x;
}
namespace {
// FrameMetricsTraceData delegates tracing logic to TracedValue in a deferred
// manner. Rather that making copies of keys, values, and strings when the
// trace is emitted (as using TracedValue directly would), it implements
// ConvertableToTraceFormat so we do the copying after the capture period.
// i.e. when we are producing the trace output. On a Linux z840, deferring
// decreases the cost of emitting a trace from ~25us to ~7us.
class FrameMetricsTraceData
: public base::trace_event::ConvertableToTraceFormat {
public:
FrameMetricsTraceData() = default;
~FrameMetricsTraceData() override = default;
void ToTracedValue(base::trace_event::TracedValue* state) const {
state->BeginDictionary("Source");
settings.AsValueInto(state);
state->EndDictionary();
state->BeginDictionary("Skips");
skips.AsValueInto(state);
state->EndDictionary();
state->BeginDictionary("Latency");
latency.AsValueInto(state);
state->EndDictionary();
if (settings.is_frame_latency_speed_on()) {
state->BeginDictionary("Speed");
speed.AsValueInto(state);
state->EndDictionary();
}
if (settings.is_frame_latency_acceleration_on()) {
state->BeginDictionary("Acceleration");
acceleration.AsValueInto(state);
state->EndDictionary();
}
}
void AppendAsTraceFormat(std::string* out) const override {
base::trace_event::TracedValue state;
ToTracedValue(&state);
state.AppendAsTraceFormat(out);
}
bool AppendToProto(ProtoAppender* appender) override {
base::trace_event::TracedValue state;
ToTracedValue(&state);
return state.AppendToProto(appender);
}
void EstimateTraceMemoryOverhead(
base::trace_event::TraceEventMemoryOverhead* overhead) override {
overhead->Add(base::trace_event::TraceEventMemoryOverhead::kFrameMetrics,
sizeof(FrameMetricsTraceData));
}
FrameMetricsSettings settings;
StreamAnalysis skips, latency, speed, acceleration;
};
} // namespace
void FrameMetrics::TraceStats() const {
auto trace_data = std::make_unique<FrameMetricsTraceData>();
{
TRACE_EVENT0(kTraceCategories, "CalculateFrameDisplayed");
trace_data->settings = settings_;
frame_skips_analyzer_.ComputeSummary(&trace_data->skips);
latency_analyzer_.ComputeSummary(&trace_data->latency);
if (settings_.is_frame_latency_speed_on())
latency_speed_analyzer_.ComputeSummary(&trace_data->speed);
if (settings_.is_frame_latency_acceleration_on())
latency_acceleration_analyzer_.ComputeSummary(&trace_data->acceleration);
}
TRACE_EVENT_INSTANT1(kTraceCategories, "FrameMetrics",
TRACE_EVENT_SCOPE_THREAD, "Info", std::move(trace_data));
}
} // namespace ui