blob: e62e32c43c01e49d1390c0ea2188a7e5bce5d44b [file] [log] [blame]
# Copyright 2016 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# Copyright (c) 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""System metrics."""
from __future__ import print_function
import errno
import os
import platform
import sys
import time
import psutil
from chromite.lib import cros_logging as logging
from infra_libs import ts_mon
cpu_count = ts_mon.GaugeMetric('dev/cpu/count',
description='Number of CPU cores.')
cpu_time = ts_mon.FloatMetric('dev/cpu/time',
description='percentage of time spent by the CPU'
' in different states.')
disk_free = ts_mon.GaugeMetric(
'dev/disk/free',
description='Available bytes on disk partition.',
units=ts_mon.MetricsDataUnits.BYTES)
disk_total = ts_mon.GaugeMetric('dev/disk/total',
description='Total bytes on disk partition.',
units=ts_mon.MetricsDataUnits.BYTES)
# inode counts are only available on Unix.
if os.name == 'posix': # pragma: no cover
inodes_free = ts_mon.GaugeMetric(
'dev/inodes/free',
description='Number of available inodes on '
'disk partition (unix only).')
inodes_total = ts_mon.GaugeMetric(
'dev/inodes/total',
description='Number of possible inodes on '
'disk partition (unix only)')
mem_free = ts_mon.GaugeMetric('dev/mem/free',
description='Amount of memory available to a '
'process (in Bytes). Buffers are considered '
'free memory.',
units=ts_mon.MetricsDataUnits.BYTES)
mem_total = ts_mon.GaugeMetric('dev/mem/total',
description='Total physical memory in Bytes.',
units=ts_mon.MetricsDataUnits.BYTES)
START_TIME = psutil.boot_time()
net_up = ts_mon.CounterMetric('dev/net/bytes/up', start_time=START_TIME,
description='Number of bytes sent on interface.',
units=ts_mon.MetricsDataUnits.BYTES)
net_down = ts_mon.CounterMetric('dev/net/bytes/down', start_time=START_TIME,
description='Number of Bytes received on '
'interface.',
units=ts_mon.MetricsDataUnits.BYTES)
net_err_up = ts_mon.CounterMetric('dev/net/err/up', start_time=START_TIME,
description='Total number of errors when '
'sending (per interface).')
net_err_down = ts_mon.CounterMetric('dev/net/err/down', start_time=START_TIME,
description='Total number of errors when '
'receiving (per interface).')
net_drop_up = ts_mon.CounterMetric('dev/net/drop/up', start_time=START_TIME,
description='Total number of outgoing '
'packets that have been dropped.')
net_drop_down = ts_mon.CounterMetric(
'dev/net/drop/down', start_time=START_TIME,
description='Total number of incoming '
'packets that have been dropped.')
disk_read = ts_mon.CounterMetric('dev/disk/read', start_time=START_TIME,
description='Number of Bytes read on disk.',
units=ts_mon.MetricsDataUnits.BYTES)
disk_write = ts_mon.CounterMetric('dev/disk/write', start_time=START_TIME,
description='Number of Bytes written on '
'disk.',
units=ts_mon.MetricsDataUnits.BYTES)
uptime = ts_mon.GaugeMetric('dev/uptime',
description='Machine uptime, in seconds.',
units=ts_mon.MetricsDataUnits.SECONDS)
proc_count = ts_mon.GaugeMetric('dev/proc/count',
description='Number of processes currently '
'running.')
load_average = ts_mon.FloatMetric('dev/proc/load_average',
description='Number of processes currently '
'in the system run queue.')
# tsmon pipeline uses backend clocks when assigning timestamps to metric points.
# By comparing point timestamp to the point value (i.e. time by machine's local
# clock), we can potentially detect some anomalies (clock drift, unusually high
# metrics pipeline delay, completely wrong clocks, etc).
#
# It is important to gather this metric right before the flush.
unix_time = ts_mon.GaugeMetric(
'dev/unix_time',
description='Number of milliseconds since epoch '
'based on local machine clock.')
os_name = ts_mon.StringMetric('proc/os/name',
description='OS name on the machine ')
os_version = ts_mon.StringMetric('proc/os/version',
description='OS version on the machine ')
os_arch = ts_mon.StringMetric('proc/os/arch',
description='OS architecture on this machine')
python_arch = ts_mon.StringMetric('proc/python/arch',
description='python userland '
'architecture on this machine')
def get_uptime():
uptime.set(int(time.time() - START_TIME))
def get_cpu_info():
cpu_count.set(psutil.cpu_count())
times = psutil.cpu_times_percent()
for mode in ('user', 'system', 'idle'):
cpu_time.set(getattr(times, mode), {'mode': mode})
def get_disk_info(mountpoints=None):
if mountpoints is None:
mountpoints = [disk.mountpoint for disk in psutil.disk_partitions()]
for mountpoint in mountpoints:
fields = {'path': mountpoint}
try:
usage = psutil.disk_usage(mountpoint)
except OSError as ex:
if ex.errno == errno.ENOENT:
# This happens on Windows when querying a removable drive that
# doesn't have any media inserted right now.
continue
raise # pragma: no cover
disk_free.set(usage.free, fields=fields)
disk_total.set(usage.total, fields=fields)
# inode counts are only available on Unix.
if os.name == 'posix': # pragma: no cover
stats = os.statvfs(mountpoint)
inodes_free.set(stats.f_favail, fields=fields)
inodes_total.set(stats.f_files, fields=fields)
try:
for disk, counters in psutil.disk_io_counters(perdisk=True).iteritems():
fields = {'disk': disk}
disk_read.set(counters.read_bytes, fields=fields)
disk_write.set(counters.write_bytes, fields=fields)
except RuntimeError as ex:
if "couldn't find any physical disk" in str(ex):
# Disk performance counters aren't enabled on Windows.
pass
else:
raise
def get_mem_info():
# We don't report mem.used because (due to virtual memory) it is not
# useful.
mem = psutil.virtual_memory()
mem_free.set(mem.available)
mem_total.set(mem.total)
def get_net_info():
metric_counter_names = [
(net_up, 'bytes_sent'),
(net_down, 'bytes_recv'),
(net_err_up, 'errout'),
(net_err_down, 'errin'),
(net_drop_up, 'dropout'),
(net_drop_down, 'dropin'),
]
nics = psutil.net_io_counters(pernic=True)
for nic, counters in nics.iteritems():
# TODO(ayatane): Use a different way of identifying virtual interfaces
if nic.startswith('veth'):
# Skip virtual interfaces
continue
fields = {'interface': nic}
for metric, counter_name in metric_counter_names:
try:
metric.set(getattr(counters, counter_name), fields=fields)
except ts_mon.MonitoringDecreasingValueError as ex:
# This normally shouldn't happen, but might if the network
# driver module is reloaded, so log an error and continue
# instead of raising an exception.
logging.error(str(ex))
def get_os_info():
os_name_data = ''
os_version_data = ''
os_name_data = platform.system().lower()
if 'windows' in os_name_data:
os_name_data = 'windows'
# os_release will be something like '7', 'vista', or 'xp'
os_version_data = platform.release()
elif 'linux' in os_name_data:
# will return something like ('Ubuntu', '14.04', 'trusty')
dist_info_data = platform.dist()
os_name_data = dist_info_data[0]
os_version_data = dist_info_data[1]
# on mac platform.system() reports 'darwin'
else:
# this tuple is only populated on mac systems
mac_ver_data = platform.mac_ver()
# [0] will be '10.11.5' or similar on a valid mac or will be '' on a
# non-mac
os_version_data = mac_ver_data[0]
if os_version_data:
# we found a valid mac
os_name_data = 'mac'
else:
# not a mac, unable to find platform information, reset
os_name_data = ''
os_version_data = ''
# normalize to lower case
os_name_data = os_name_data.lower()
os_version_data = os_version_data.lower()
python_arch_data = '32'
if sys.maxsize > 2**32:
python_arch_data = '64'
# construct metrics
os_name.set(os_name_data)
os_version.set(os_version_data)
os_arch.set(platform.machine())
python_arch.set(python_arch_data)
def clear_os_info():
os_name.reset()
os_version.reset()
os_arch.reset()
python_arch.reset()
def get_proc_info():
procs = psutil.pids()
proc_count.set(len(procs))
if os.name == 'posix': # pragma: no cover
try:
avg1, avg5, avg15 = os.getloadavg()
except OSError: # pragma: no cover
pass
else:
load_average.set(avg1, fields={'minutes': 1})
load_average.set(avg5, fields={'minutes': 5})
load_average.set(avg15, fields={'minutes': 15})
def get_unix_time():
unix_time.set(int(time.time() * 1000))