#!/usr/bin/env python
# Copyright (c) 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import argparse
import errno
import json
import os
import re
import sys
import urllib
import urllib2
# Where all the data lives.
# TODO(groby) - support multi-line search from the command line. Useful when
# scanning for classes of failures, see below.
SEARCH_STRING = """<p class=\"failure result\">
Failed memory test: content
# Location of the log cache.
CACHE_DIR = "buildlogs.tmp"
# If we don't find anything after searching |CUTOFF| logs, we're probably done.
CUTOFF = 200
def EnsurePath(path):
"""Makes sure |path| does exist, tries to create it if it doesn't."""
except OSError as exception:
if exception.errno != errno.EEXIST:
class Cache(object):
def __init__(self, root_dir):
self._root_dir = os.path.abspath(root_dir)
def _LocalName(self, name):
"""If name is a relative path, treat it as relative to cache root.
If it is absolute and under cache root, pass it through.
Otherwise, raise error.
if os.path.isabs(name):
assert os.path.commonprefix([name, self._root_dir]) == self._root_dir
name = os.path.join(self._root_dir, name)
return name
def _FetchLocal(self, local_name):
local_name = self._LocalName(local_name)
if os.path.exists(local_name):
f = open(local_name, 'r')
return f.readlines();
return None
def _FetchRemote(self, remote_name):
response = urllib2.urlopen(remote_name)
print "Could not fetch", remote_name
def Update(self, local_name, remote_name):
local_name = self._LocalName(local_name)
blob = self._FetchRemote(remote_name)
f = open(local_name, "w")
return blob.splitlines()
def FetchData(self, local_name, remote_name):
result = self._FetchLocal(local_name)
if result:
return result
# If we get here, the local cache does not exist yet. Fetch, and store.
return self.Update(local_name, remote_name)
class Builder(object):
def __init__(self, waterfall, name):
self._name = name
self._waterfall = waterfall
def Name(self):
return self._name
def LatestBuild(self):
return self._waterfall.GetLatestBuild(self._name)
def GetBuildPath(self, build_num):
return "%s/%s/builds/%d" % (
self._waterfall._root_url, urllib.quote(self._name), build_num)
def _FetchBuildLog(self, build_num):
local_build_path = "builds/%s" % self._name
local_build_file = os.path.join(local_build_path, "%d.log" % build_num)
return self._waterfall._cache.FetchData(local_build_file,
def _CheckLog(self, build_num, tester):
log_lines = self._FetchBuildLog(build_num)
return any(tester(line) for line in log_lines)
def ScanLogs(self, tester):
occurrences = []
build = self.LatestBuild()
no_results = 0
while build != 0 and no_results < CUTOFF:
if self._CheckLog(build, tester):
no_results = no_results + 1
build = build - 1
return occurrences
class Waterfall(object):
def __init__(self, root_url, cache_dir):
self._root_url = root_url
self._builders = {}
self._top_revision = {}
self._cache = Cache(cache_dir)
def Builders(self):
return self._builders.values()
def Update(self):
self._cache.Update("builders", self._root_url)
def FetchInfo(self):
if self._top_revision:
html = self._cache.FetchData("builders", self._root_url)
""" Search for both builders and latest build number in HTML
<td class="box"><a href="builders/<builder-name>"> identifies a builder
<a href="builders/<builder-name>/builds/<build-num>"> is the latest build.
box_matcher = re.compile('.*a href[^>]*>([^<]*)\<')
build_matcher = re.compile('.*a href=\"builders/(.*)/builds/([0-9]+)\".*')
last_builder = ""
for line in html:
if 'a href="builders/' in line:
if 'td class="box"' in line:
last_builder = box_matcher.match(line).group(1)
self._builders[last_builder] = Builder(self, last_builder)
result = build_matcher.match(line)
builder =
assert builder == urllib.quote(last_builder)
self._top_revision[last_builder] = int(
def GetLatestBuild(self, name):
assert self._top_revision
return self._top_revision[name]
class MultiLineChange(object):
def __init__(self, lines):
self._tracked_lines = lines
self._current = 0
def __call__(self, line):
""" Test a single line against multi-line change.
If it matches the currently active line, advance one line.
If the current line is the last line, report a match.
if self._tracked_lines[self._current] in line:
self._current = self._current + 1
if self._current == len(self._tracked_lines):
self._current = 0
return True
self._current = 0
return False
def main(argv):
# Create argument parser.
parser = argparse.ArgumentParser()
commands = parser.add_mutually_exclusive_group(required=True)
commands.add_argument("--update", action='store_true')
commands.add_argument("--find", metavar='search term')
parser.add_argument("--json", action='store_true',
help="Output in JSON format")
args = parser.parse_args()
path = os.path.abspath(os.path.dirname(argv[0]))
cache_path = os.path.join(path, CACHE_DIR)
fyi = Waterfall(ROOT_URL, cache_path)
if args.update:
for builder in fyi.Builders():
print "Updating", builder.Name()
builder.ScanLogs(lambda x:False)
if args.find:
result = []
tester = MultiLineChange(args.find.splitlines())
if not args.json:
print "SCANNING FOR ", args.find
for builder in fyi.Builders():
if not args.json:
print "Scanning", builder.Name()
occurrences = builder.ScanLogs(tester)
if occurrences:
min_build = min(occurrences)
path = builder.GetBuildPath(min_build)
if args.json:
data = {}
data['builder'] = builder.Name()
data['first_affected'] = min_build
data['last_affected'] = max(occurrences)
data['last_build'] = builder.LatestBuild()
data['frequency'] = ((int(builder.LatestBuild()) - int(min_build)) /
data['total'] = len(occurrences)
data['first_url'] = path
print "Earliest occurrence in build %d" % min_build
print "Latest occurrence in build %d" % max(occurrences)
print "Latest build: %d" % builder.LatestBuild()
print path
print "%d total" % len(occurrences)
if args.json:
json.dump(result, sys.stdout, indent=2, sort_keys=True)
if __name__ == "__main__":