blob: 8c78c0a76c2191202718e8e0e300117cb9d22cf7 [file] [log] [blame]
#!/usr/bin/python
# Copyright 2008 The Native Client Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
Filter service runtime logging output and compute system call statistics.
To use this script, define the BENCHMARK symbol to be zero (default)
in nacl_syscall_hook.c. Next, run the service runtime with NACLLOG
set to an output file name. When the run is complete, run this script
with that file as input.
"""
from __future__ import print_function
import math
import re
import sys
class Stats:
"""
Compute basic statistics.
"""
def __init__(self):
self._sum_x = 0.0
self._sum_x_squared = 0.0
self._n = 0
def Enter(self, val):
"""Enter a new value.
Args:
val: the new (floating point) value
"""
self._sum_x += val
self._sum_x_squared += val * val
self._n += 1
def Mean(self):
"""Returns the mean of entered values.
"""
return self._sum_x / self._n
def Variance(self):
"""Returns the variance of entered values.
"""
mean = self.Mean()
return self._sum_x_squared / self._n - mean * mean
def Stddev(self):
"""Returns the standard deviation of entered values.
"""
return math.sqrt(self.Variance())
def NumEntries(self):
"""Returns the number of data points entered.
"""
return self._n
class PeakStats:
"""Compute min and max for a data set. While far less efficient
than using a reduce, this class makes streaming data handling
easier.
"""
def __init__(self):
self._min = 1L << 64
self._max = -1
def Enter(self, val):
"""Enter a new datum.
Args:
val: the new datum to be entered.
"""
if val > self._max:
self._max = val
if val < self._min:
self._min = val
def Max(self):
"""Returns the maximum value found so far.
"""
return self._max
def Min(self):
"""Returns the minimum value found so far.
"""
return self._min
class WindowedRate:
"""Class for computing statistics on events based on counting the
number of occurrences in a time interval. Statistcs on these
bucketed counts are then available.
"""
def __init__(self, duration):
self._t_start = -1
self._t_duration = duration
self._t_end = -1
self._event_count = 0
self._rate_stats = Stats()
self._peak_stats = PeakStats()
def Enter(self, t):
"""Enter in a new event that occurred at time t.
Args:
t: the time at which an event occurred.
"""
if self._t_start == -1:
self._t_start = t
self._t_end = t + self._t_duration
return
# [ t_start, t_start + duration )
if t < self._t_end:
self._event_count += 1
return
self.Compute()
self._event_count = 1
next_end = self._t_end
while next_end < t:
next_end += self._t_duration
self._t_end = next_end
self._t_start = next_end - self._t_duration
def Compute(self):
"""Finalize the last bucket.
"""
self._rate_stats.Enter(self._event_count)
self._peak_stats.Enter(self._event_count)
self._event_count = 0
def RateStats(self):
"""Returns the event rate statistics object.
"""
return self._rate_stats
def PeakStats(self):
"""Returns the peak event rate statistics object.
"""
return self._peak_stats
class TimestampParser:
"""
A class to parse timestamp strings. This is needed because there is
implicit state: the timestamp string is HH:MM:SS.fract and may cross
a 24 hour boundary -- we do not log the date since that would make
the log file much larger and generally it is not needed (implicit in
file modification time) -- so we convert to a numeric representation
that is relative to an arbitrary epoch start, and the state enables
us to correctly handle midnight.
This code assumes that the timestamps are monotonically
non-decreasing.
"""
def __init__(self):
self._min_time = -1
def Convert(self, timestamp):
"""Converts a timestamp string into a numeric timestamp value.
Args:
timestamp: A timestamp string in HH:MM:SS.fraction format.
Returns:
a numeric timestamp (arbitrary epoch)
"""
(hh, mm, ss) = map(float,timestamp.split(':'))
t = ((hh * 60) + mm) * 60 + ss
if self._min_time == -1:
self._min_time = t
while t < self._min_time:
t += 24 * 60 * 60
self._min_time = t
return t
def ReadFileHandle(fh, duration):
"""Reads log data from the provided file handle, and compute and
print various statistics on the system call rate based on the log
data.
"""
# log format "[pid:timestamp] msg" where the timestamp is
log_re = re.compile(r'\[[0-9,]+:([:.0-9]+)\] system call [0-9]+')
parser = TimestampParser()
inter_stats = Stats()
rate_stats = Stats()
windowed = WindowedRate(duration)
prev_time = -1
start_time = 0
for line in fh: # generator
m = log_re.search(line)
if m is not None:
timestamp = m.group(1)
t = parser.Convert(timestamp)
windowed.Enter(t)
if prev_time != -1:
elapsed = t - prev_time
inter_stats.Enter(elapsed)
rate_stats.Enter(1.0/elapsed)
else:
start_time = t
prev_time = t
print('\nInter-syscall time')
print('Mean: %g' % inter_stats.Mean())
print('Stddev: %g' % inter_stats.Stddev())
print('\nInstantaneous Syscall Rate (unweighted!)')
print('Mean : %g' % rate_stats.Mean())
print('Stddev: %g' % rate_stats.Stddev())
print('\nAvg Syscall Rate: %g' % (rate_stats.NumEntries() /
(prev_time - start_time)))
print('\nSyscalls in %f interval' % duration)
print('Mean: %g' % windowed.RateStats().Mean())
print('Stddev: %g' % windowed.RateStats().Stddev())
print('Min: %g' % windowed.PeakStats().Min())
print('Max: %g' % windowed.PeakStats().Max())
def main(argv):
if len(argv) > 1:
print('no arguments expected\n', file=sys.stderr)
return 1
ReadFileHandle(sys.stdin, 0.010)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))