mtstat/mtstat.py - chromiumos/platform/mttools - Git at Google

 #! /usr/bin/env python
 # Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 from collections import Counter, defaultdict, namedtuple, OrderedDict
 from itertools import chain
 from mtreplay import MTReplay
 from queryengine import Query, QueryEngine, QueryMatch, QueryResult
 import re


 class MTStatQuery(Query):
   """ Searches for MTStat lines.

   An MTStat line looks like 'MTStat:124.321:Key=Value' and is used
   for generating statistics and searching of numeric values.
   """
   line_regex = re.compile('MTStat:([0-9]+\.[0-9]+):(\w+)([=:])(\w+)')
   line_match = namedtuple("LineMatch", "timestamp key operator value")

   @classmethod
   def MatchLine(cls, line):
     match = cls.line_regex.match(line)
     if match:
       return cls.line_match(
         timestamp=match.group(1),
         key=match.group(2),
         operator=match.group(3),
         value=match.group(4)
       )
     return None

   # supported operators for queries
   search_operators = {
     ">": lambda a, b: float(a) > float(b),
     ">=": lambda a, b: float(a) >= float(b),
     "<": lambda a, b: float(a) < float(b),
     "<=": lambda a, b: float(a) <= float(b),
     "=": lambda a, b: str(a) == str(b),
     "==": lambda a, b: str(a) == str(b),
     "!=": lambda a, b: str(a) != str(b)
   }

   def __init__(self, query=None, capture_logs=False):
     self.query = query
     self.capture_logs = capture_logs
     # parse search query
     if self.query:
       # example: Key >= 32
       query_regex = re.compile('\s*(\w+)\s*([<>=!]+)\s*([0-9a-zA-Z]+)\s*')
       match = query_regex.match(query)
       if not match:
         raise ValueError(query, " is not a valid query")
       self.key = match.group(1)
       self.op = match.group(2)
       self.value = match.group(3)

   def FindMatches(self, replay_results, filename):
     # find MTStat lines in log
     lines = replay_results.gestures_log.splitlines()
     lines = filter(lambda line: MTStatQuery.MatchLine(line), lines)
     all_matches = map(lambda line: MTStatMatch(line, filename), lines)

     # samples are denoted by the : operator, all of them are returned.
     samples = filter(lambda match: match.operator == ":", all_matches)

     # updates are denoted by the = operator and force only the last
     # update to be returned.
     updates = filter(lambda match: match.operator == "=", all_matches)
     last_updates = dict([(update.key, update) for update in updates])

     matches = samples + last_updates.values()

     # filter by search query if requested
     if self.query:
       matches = filter(self.Match, matches)
     return matches

   def Match(self, match):
     op = MTStatQuery.search_operators[self.op]
     return (match.key == self.key and
             op(match.value, self.value))


 class MTStatMatch(QueryMatch):
   def __init__(self, line, file):
     match = MTStatQuery.MatchLine(line)
     QueryMatch.__init__(self, file, float(match.timestamp), line)
     self.key = match.key
     self.operator = match.operator
     self.value = match.value


 class RegexQuery(Query):
   """ Searches the raw gestures log with a regular expression """
   def __init__(self, query):
     self.regex = re.compile(query)
     self.capture_logs = True

   def FindMatches(self, replay_results, filename):
     lines = replay_results.gestures_log.splitlines()
     lines = filter(lambda line: self.regex.match(line), lines)
     matches = map(lambda line: QueryMatch(filename, None, line), lines)
     return matches


 class GesturesQuery(Query):
   """ Searches for gestures with matching type """
   def __init__(self, type, capture_logs):
     self.type = type
     self.capture_logs = capture_logs

   def FindMatches(self, replay_results, filename):
     gestures = replay_results.gestures.gestures
     if self.type:
       gestures = filter(lambda g: g.type == self.type, gestures)
     return map(lambda g: GesturesMatch(filename, g), gestures)


 class GesturesMatch(QueryMatch):
   def __init__(self, filename, gesture):
     QueryMatch.__init__(self, filename, gesture.start, str(gesture))
     self.gesture = gesture


 class GesturesDiffQuery(GesturesQuery):
   """ Compare gestures to 'original' QueryResult and search for changes """
   def __init__(self, type, original):
     self.type = type
     self.original = original
     self.capture_logs = True

   def FindMatches(self, replay_results, filename):
     self_matches = GesturesQuery.FindMatches(self, replay_results, filename)
     original_matches = self.original.matches

     size = min(len(original_matches), len(self_matches))
     matches = []
     for i in range(size):
       if str(self_matches[i].gesture) != str(original_matches[i].gesture):
         match = GesturesDiffMatch(filename, self_matches[i].gesture,
                                   original_matches[i].gesture)
         matches.append(match)
     return matches


 class GesturesDiffMatch(GesturesMatch):
   def __init__(self, filename, new, original):
     GesturesMatch.__init__(self, filename, new)
     self.original = original

   def __str__(self):
     return "%f: %s != %s" % (self.timestamp, str(self.gesture),
                              str(self.original))


 class MTStat(object):
   def SearchChanges(self, type=None, number=None, platform=None, parallel=True):
     """ Search for changed gestures.

     This command will compare the gestures output of the HEAD version of
     the gestures library with the local version.
     Optionally the type of gestures to look at can be specified by 'type'
     """
     engine = QueryEngine()
     files = engine.SelectFiles(number, platform)

     MTReplay().Recompile(head=True)
     gestures_query = GesturesQuery(type, False)
     original_results = engine.Execute(files, gestures_query, parallel=parallel)

     MTReplay().Recompile()
     diff_queries =dict([
       (file, GesturesDiffQuery(type, original_results[file]))
       for file in files if file in original_results])
     return engine.Execute(files, diff_queries, parallel=parallel)

   def Search(self, search=None, gestures=None, regex=None,
              number=None, platform=None, parallel=True):
     """ Search for occurences of a specific tag or regex.

     Specify either a 'search' or a 'regex'. Search queries are formatted
     in a simple "key operator value" format. For example:
     "MyKey > 5" will return all matches where MyKey has a value
     of greater than 5.
     Supported operators are: >, <, >=, <=, =, !=

     number: optional number of random reports to use
     parallel: use parallel processing

     returns a dictionary of lists containing the matches
     for each file.
     """
     engine = QueryEngine()
     files = engine.SelectFiles(number, platform)

     if search:
       query = MTStatQuery(search, True)
     elif regex:
       query = RegexQuery(regex)
     elif gestures:
       query = GesturesQuery(gestures, True)
     else:
       return None

     return engine.Execute(files, query, parallel=parallel)

   def GatherStats(self, number=None, platform=None, parallel=True, num_bins=10):
     """ Gathers stats on feedback reports.

     Returns a dictionary with a histogram for each recorded key.
     """
     engine = QueryEngine()
     files = engine.SelectFiles(number, platform)
     results = engine.Execute(files, MTStatQuery(), parallel=parallel)

     # gather values for each key in a list
     all_matches = chain(*[result.matches for result in results.values()])
     value_collection = defaultdict(list)
     for match in all_matches:
       value_collection[match.key].append(match.value)

     # build histograms
     histograms = {}
     for key, values in value_collection.items():
       histograms[key] = self._Histogram(values, num_bins)
     return OrderedDict(sorted(histograms.items(), key=lambda t: t[0]))

   def Download(self, num, offset, parallel=True):
     QueryEngine().Download(num, offset, parallel)

   def _Histogram(self, values, num_bins):
     def RawHistogram(values):
       return OrderedDict(Counter(values))

     # convert all items to integers.
     integers = []
     for value in values:
       try:
         integers.append(int(value))
       except:
         # not an integer.
         return RawHistogram(values)

     # don't condense lists that are already small enough
     if len(set(integers)) <= num_bins:
       return RawHistogram(integers)

     # all integer values, use bins for histogram
     histogram = OrderedDict()
     integers = sorted(integers)

     # calculate bin size (append one at the end to include last value)
     begin = integers[0]
     end = integers[-1] + 1
     bin_size = float(end - begin) / float(num_bins)

     # remove each bins integers from the list and count them.
     for i in range(num_bins):
       high_v = round((i + 1) * bin_size) + begin

       filtered = filter(lambda i: i >= high_v, integers)
       histogram["<%d" % high_v] = len(integers) - len(filtered)
       integers = filtered
     return histogram
	#! /usr/bin/env python
	# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	from collections import Counter, defaultdict, namedtuple, OrderedDict
	from itertools import chain
	from mtreplay import MTReplay
	from queryengine import Query, QueryEngine, QueryMatch, QueryResult
	import re


	class MTStatQuery(Query):
	""" Searches for MTStat lines.

	An MTStat line looks like 'MTStat:124.321:Key=Value' and is used
	for generating statistics and searching of numeric values.
	"""
	line_regex = re.compile('MTStat:([0-9]+\.[0-9]+):(\w+)([=:])(\w+)')
	line_match = namedtuple("LineMatch", "timestamp key operator value")

	@classmethod
	def MatchLine(cls, line):
	match = cls.line_regex.match(line)
	if match:
	return cls.line_match(
	timestamp=match.group(1),
	key=match.group(2),
	operator=match.group(3),
	value=match.group(4)
	)
	return None

	# supported operators for queries
	search_operators = {
	">": lambda a, b: float(a) > float(b),
	">=": lambda a, b: float(a) >= float(b),
	"<": lambda a, b: float(a) < float(b),
	"<=": lambda a, b: float(a) <= float(b),
	"=": lambda a, b: str(a) == str(b),
	"==": lambda a, b: str(a) == str(b),
	"!=": lambda a, b: str(a) != str(b)
	}

	def __init__(self, query=None, capture_logs=False):
	self.query = query
	self.capture_logs = capture_logs
	# parse search query
	if self.query:
	# example: Key >= 32
	query_regex = re.compile('\s(\w+)\s([<>=!]+)\s([0-9a-zA-Z]+)\s')
	match = query_regex.match(query)
	if not match:
	raise ValueError(query, " is not a valid query")
	self.key = match.group(1)
	self.op = match.group(2)
	self.value = match.group(3)

	def FindMatches(self, replay_results, filename):
	# find MTStat lines in log
	lines = replay_results.gestures_log.splitlines()
	lines = filter(lambda line: MTStatQuery.MatchLine(line), lines)
	all_matches = map(lambda line: MTStatMatch(line, filename), lines)

	# samples are denoted by the : operator, all of them are returned.
	samples = filter(lambda match: match.operator == ":", all_matches)

	# updates are denoted by the = operator and force only the last
	# update to be returned.
	updates = filter(lambda match: match.operator == "=", all_matches)
	last_updates = dict([(update.key, update) for update in updates])

	matches = samples + last_updates.values()

	# filter by search query if requested
	if self.query:
	matches = filter(self.Match, matches)
	return matches

	def Match(self, match):
	op = MTStatQuery.search_operators[self.op]
	return (match.key == self.key and
	op(match.value, self.value))


	class MTStatMatch(QueryMatch):
	def __init__(self, line, file):
	match = MTStatQuery.MatchLine(line)
	QueryMatch.__init__(self, file, float(match.timestamp), line)
	self.key = match.key
	self.operator = match.operator
	self.value = match.value


	class RegexQuery(Query):
	""" Searches the raw gestures log with a regular expression """
	def __init__(self, query):
	self.regex = re.compile(query)
	self.capture_logs = True

	def FindMatches(self, replay_results, filename):
	lines = replay_results.gestures_log.splitlines()
	lines = filter(lambda line: self.regex.match(line), lines)
	matches = map(lambda line: QueryMatch(filename, None, line), lines)
	return matches


	class GesturesQuery(Query):
	""" Searches for gestures with matching type """
	def __init__(self, type, capture_logs):
	self.type = type
	self.capture_logs = capture_logs

	def FindMatches(self, replay_results, filename):
	gestures = replay_results.gestures.gestures
	if self.type:
	gestures = filter(lambda g: g.type == self.type, gestures)
	return map(lambda g: GesturesMatch(filename, g), gestures)


	class GesturesMatch(QueryMatch):
	def __init__(self, filename, gesture):
	QueryMatch.__init__(self, filename, gesture.start, str(gesture))
	self.gesture = gesture


	class GesturesDiffQuery(GesturesQuery):
	""" Compare gestures to 'original' QueryResult and search for changes """
	def __init__(self, type, original):
	self.type = type
	self.original = original
	self.capture_logs = True

	def FindMatches(self, replay_results, filename):
	self_matches = GesturesQuery.FindMatches(self, replay_results, filename)
	original_matches = self.original.matches

	size = min(len(original_matches), len(self_matches))
	matches = []
	for i in range(size):
	if str(self_matches[i].gesture) != str(original_matches[i].gesture):
	match = GesturesDiffMatch(filename, self_matches[i].gesture,
	original_matches[i].gesture)
	matches.append(match)
	return matches


	class GesturesDiffMatch(GesturesMatch):
	def __init__(self, filename, new, original):
	GesturesMatch.__init__(self, filename, new)
	self.original = original

	def __str__(self):
	return "%f: %s != %s" % (self.timestamp, str(self.gesture),
	str(self.original))


	class MTStat(object):
	def SearchChanges(self, type=None, number=None, platform=None, parallel=True):
	""" Search for changed gestures.

	This command will compare the gestures output of the HEAD version of
	the gestures library with the local version.
	Optionally the type of gestures to look at can be specified by 'type'
	"""
	engine = QueryEngine()
	files = engine.SelectFiles(number, platform)

	MTReplay().Recompile(head=True)
	gestures_query = GesturesQuery(type, False)
	original_results = engine.Execute(files, gestures_query, parallel=parallel)

	MTReplay().Recompile()
	diff_queries =dict([
	(file, GesturesDiffQuery(type, original_results[file]))
	for file in files if file in original_results])
	return engine.Execute(files, diff_queries, parallel=parallel)

	def Search(self, search=None, gestures=None, regex=None,
	number=None, platform=None, parallel=True):
	""" Search for occurences of a specific tag or regex.

	Specify either a 'search' or a 'regex'. Search queries are formatted
	in a simple "key operator value" format. For example:
	"MyKey > 5" will return all matches where MyKey has a value
	of greater than 5.
	Supported operators are: >, <, >=, <=, =, !=

	number: optional number of random reports to use
	parallel: use parallel processing

	returns a dictionary of lists containing the matches
	for each file.
	"""
	engine = QueryEngine()
	files = engine.SelectFiles(number, platform)

	if search:
	query = MTStatQuery(search, True)
	elif regex:
	query = RegexQuery(regex)
	elif gestures:
	query = GesturesQuery(gestures, True)
	else:
	return None

	return engine.Execute(files, query, parallel=parallel)

	def GatherStats(self, number=None, platform=None, parallel=True, num_bins=10):
	""" Gathers stats on feedback reports.

	Returns a dictionary with a histogram for each recorded key.
	"""
	engine = QueryEngine()
	files = engine.SelectFiles(number, platform)
	results = engine.Execute(files, MTStatQuery(), parallel=parallel)

	# gather values for each key in a list
	all_matches = chain(*[result.matches for result in results.values()])
	value_collection = defaultdict(list)
	for match in all_matches:
	value_collection[match.key].append(match.value)

	# build histograms
	histograms = {}
	for key, values in value_collection.items():
	histograms[key] = self._Histogram(values, num_bins)
	return OrderedDict(sorted(histograms.items(), key=lambda t: t[0]))

	def Download(self, num, offset, parallel=True):
	QueryEngine().Download(num, offset, parallel)

	def _Histogram(self, values, num_bins):
	def RawHistogram(values):
	return OrderedDict(Counter(values))

	# convert all items to integers.
	integers = []
	for value in values:
	try:
	integers.append(int(value))
	except:
	# not an integer.
	return RawHistogram(values)

	# don't condense lists that are already small enough
	if len(set(integers)) <= num_bins:
	return RawHistogram(integers)

	# all integer values, use bins for histogram
	histogram = OrderedDict()
	integers = sorted(integers)

	# calculate bin size (append one at the end to include last value)
	begin = integers[0]
	end = integers[-1] + 1
	bin_size = float(end - begin) / float(num_bins)

	# remove each bins integers from the list and count them.
	for i in range(num_bins):
	high_v = round((i + 1) * bin_size) + begin

	filtered = filter(lambda i: i >= high_v, integers)
	histogram["<%d" % high_v] = len(integers) - len(filtered)
	integers = filtered
	return histogram