blob: 0dd7502f10950ed1870a2d981a29389baad72ee3 [file] [log] [blame]
// Copyright 2018 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef ML_REQUEST_METRICS_H_
#define ML_REQUEST_METRICS_H_
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include <base/bind.h>
#include <base/files/file_path.h>
#include <base/logging.h>
#include <base/macros.h>
#include <base/process/process_metrics.h>
#include <base/sys_info.h>
#include <base/time/time.h>
#include <metrics/metrics_library.h>
#include "metrics/timer.h"
#include "ml/util.h"
namespace ml {
// Performs UMA metrics logging for LoadModel, CreateGraphExecutor and Execute.
// Metrics includes events(enumerators defined by RequestEventEnum),
// memory_usage, elapsed_time and cpu_time.
// RequestEventEnum is an enum class which defines different events for some
// specific actions, currently we reuse the enum classes defined in mojoms.
// The enum class generally contains an OK and several different Errors,
// besides, there should be a kMax which shares the value of the highest
// enumerator.
template <class RequestEventEnum>
class RequestMetrics {
public:
// Creates a RequestMetrics with the specified metrics base name.
// Records UMA metrics named with the prefix
// "MachineLearningService.|name_base|."
explicit RequestMetrics(const std::string& name_base);
// Logs (to UMA) the specified |event| associated with this request.
void RecordRequestEvent(RequestEventEnum event);
// When you want to record metrics of some action, call Start func at the
// begining of it.
void StartRecordingPerformanceMetrics();
// Send performance metrics(memory_usage, elapsed_time, cpu_time) to UMA
// This would usually be called only if the action completes successfully.
void FinishRecordingPerformanceMetrics();
private:
MetricsLibrary metrics_library_;
const std::string name_base_;
std::unique_ptr<base::ProcessMetrics> process_metrics_;
chromeos_metrics::Timer timer_;
int64_t initial_memory_;
DISALLOW_COPY_AND_ASSIGN(RequestMetrics);
};
// UMA metric names:
constexpr char kGlobalMetricsPrefix[] = "MachineLearningService.";
constexpr char kEventSuffix[] = ".Event";
constexpr char kTotalMemoryDeltaSuffix[] = ".TotalMemoryDeltaKb";
constexpr char kElapsedTimeSuffix[] = ".ElapsedTimeMicrosec";
constexpr char kCpuTimeSuffix[] = ".CpuTimeMicrosec";
// UMA histogram ranges:
constexpr int kMemoryDeltaMinKb = 1; // 1 KB
constexpr int kMemoryDeltaMaxKb = 10000000; // 10 GB
constexpr int kMemoryDeltaBuckets = 100;
constexpr int kElapsedTimeMinMicrosec = 1; // 1 μs
constexpr int kElapsedTimeMaxMicrosec = 1800000000; // 30 min
constexpr int kElapsedTimeBuckets = 100;
constexpr int kCpuTimeMinMicrosec = 1; // 1 μs
constexpr int kCpuTimeMaxMicrosec = 1800000000; // 30 min
constexpr int kCpuTimeBuckets = 100;
template <class RequestEventEnum>
RequestMetrics<RequestEventEnum>::RequestMetrics(const std::string& name_base)
: name_base_(std::string(kGlobalMetricsPrefix) + name_base),
process_metrics_(nullptr) {}
template <class RequestEventEnum>
void RequestMetrics<RequestEventEnum>::RecordRequestEvent(
RequestEventEnum event) {
metrics_library_.SendEnumToUMA(name_base_ + kEventSuffix,
static_cast<int>(event),
static_cast<int>(RequestEventEnum::kMax));
process_metrics_.reset(nullptr);
}
template <class RequestEventEnum>
void RequestMetrics<RequestEventEnum>::StartRecordingPerformanceMetrics() {
DCHECK(process_metrics_ == nullptr);
process_metrics_ = base::ProcessMetrics::CreateCurrentProcessMetrics();
// Call GetCPUUsage in order to set the "zero" point of the CPU usage counter
// of process_metrics_.
process_metrics_->GetCPUUsage();
timer_.Start();
// Query memory usage.
size_t usage = 0;
if (!GetTotalProcessMemoryUsage(&usage)) {
LOG(DFATAL) << "Getting process memory usage failed.";
return;
}
initial_memory_ = static_cast<int64_t>(usage);
}
template <class RequestEventEnum>
void RequestMetrics<RequestEventEnum>::FinishRecordingPerformanceMetrics() {
DCHECK(process_metrics_ != nullptr);
// Elapsed time
timer_.Stop();
base::TimeDelta elapsed_time;
DCHECK(timer_.GetElapsedTime(&elapsed_time));
const int64_t elapsed_time_microsec = elapsed_time.InMicroseconds();
// CPU usage, 12.34 means 12.34%, and the range is 0 to 100 * numCPUCores.
// That's to say it can exceed 100 when there're multi CPUs.
// For example, if the device has 4 CPUs and the process fully uses 2 of
// them, the percent will be 200%.
const double cpu_usage_percent = process_metrics_->GetCPUUsage();
// CPU time, as mentioned above, "100 microseconds" means "1 CPU core fully
// utilized for 100 microseconds".
const int64_t cpu_time_microsec =
static_cast<int64_t>(cpu_usage_percent * elapsed_time_microsec / 100.);
// Memory usage
size_t usage = 0;
if (!GetTotalProcessMemoryUsage(&usage)) {
LOG(DFATAL) << "Getting process memory usage failed.";
return;
}
const int64_t memory_usage_kb =
static_cast<int64_t>(usage) - initial_memory_;
metrics_library_.SendToUMA(name_base_ + kTotalMemoryDeltaSuffix,
memory_usage_kb,
kMemoryDeltaMinKb,
kMemoryDeltaMaxKb,
kMemoryDeltaBuckets);
metrics_library_.SendToUMA(name_base_ + kElapsedTimeSuffix,
elapsed_time_microsec,
kElapsedTimeMinMicrosec,
kElapsedTimeMaxMicrosec,
kElapsedTimeBuckets);
metrics_library_.SendToUMA(name_base_ + kCpuTimeSuffix,
cpu_time_microsec,
kCpuTimeMinMicrosec,
kCpuTimeMaxMicrosec,
kCpuTimeBuckets);
}
} // namespace ml
#endif // ML_REQUEST_METRICS_H_