blob: bc451894d00d08e5feb746be5295e34149af0ed7 [file] [log] [blame]
#!/usr/bin/env python
## Copyright (c) 2012 The WebM project authors. All Rights Reserved.
##
## Use of this source code is governed by a BSD-style license
## that can be found in the LICENSE file in the root of the source
## tree. An additional intellectual property rights grant can be found
## in the file PATENTS. All contributing project authors may
## be found in the AUTHORS file in the root of the source tree.
##
# Setup django to silence deprecation warning for 0.96
import os
os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
from google.appengine.dist import use_library
use_library('django', '1.2')
from google.appengine.ext import webapp
from google.appengine.ext.webapp import template
from google.appengine.ext.webapp import util as webapp_util
from google.appengine.ext import db
from google.appengine.api import oauth
from google.appengine.api import users
# Standard libraries
import datetime
import hashlib
from django.utils import simplejson as json
import pickle
import StringIO
import urllib
import logging
import re
# App libraries
from drilldown import drilldown
from cache import cache_result, CachedDataView
import curve_compare
import model
import util
import urllib
GERRIT_LINK_HTML=("<a target='_blank' href=\"https://gerrit.chromium.org/gerrit/"
"#q,%s,n,z\">%s</a>")
GERRIT_LINK_PATTERN="(I[a-f0-9]{40})"
OAUTH_SCOPE = 'https://www.googleapis.com/auth/userinfo.email'
# We give metrics their own handler for convenience
class ImportMetricHandler(webapp.RequestHandler):
def post(self):
assert util.development() or oauth.is_current_user_admin()
util.log_upload_data(self.request.path, self.request.get("data"))
data = StringIO.StringIO(self.request.get("data"))
for line in data:
data = json.loads(line)
# We first load the fileset into the database
# For use later, we also add a list of filenames in the fileset
m = model.Metric(key_name=data["name"],
display_name=data["display name"],
distortion=data["distortion"],
yaxis=data.get("yaxis", None))
m.put()
model.metrics().invalidate()
class ImportFileSetHandler(webapp.RequestHandler):
def post(self):
assert util.development() or oauth.is_current_user_admin()
util.log_upload_data(self.request.path, self.request.get("data"))
files_added = {}
data = StringIO.StringIO(self.request.get("data"))
for line in data:
data = json.loads(line)
# We first load the fileset into the database
# For use later, we also add a list of filenames in the fileset
f = model.FileSet(key_name=data["name"],
display_name=data["name"],
files=data["setfiles"])
f.put()
for filename in data["setfiles"]:
if filename not in files_added:
files_added[filename] = [data["name"]]
else:
files_added[filename].append(data["name"])
# We now update the database with the elements in files_added
for filename in files_added:
# TODO: Is there a better way of assigning display names?
split_index = filename.rfind("_")
model.File(key_name=filename,
display_name=filename[:split_index],
file_sets=files_added[filename]).put()
model.filesets().invalidate()
model.files().invalidate()
class ImportCodecMetricHandler(webapp.RequestHandler):
def put_metric_index(self, parent, metrics, files):
assert util.development() or oauth.is_current_user_admin()
util.log_upload_data(self.request.path, self.request.get("data"))
if metrics and files:
metric_list = list(metrics)
file_list = list(files)
h = hashlib.sha1()
h.update(parent.key().name())
h.update(parent.commit)
h.update(parent.config_name)
map(h.update, metric_list)
map(h.update, file_list)
model.CodecMetricIndex(key_name=h.hexdigest(),
parent=parent,
commit=parent.commit,
config_name=parent.config_name,
metrics=metric_list,
files=file_list).put()
def update_drilldown(self, parent, metrics, files):
# TODO(jkoleszar): if drilldown is moved to a backend, maybe post this
# through a task queue.
commit = set([parent.commit])
config = set([parent.config_name])
drilldown.insert(metrics, set(config), files, set(commit))
def post(self):
for line in StringIO.StringIO(self.request.get("data")):
# Key off a hash of the input line to make the import idempotent
key = hashlib.sha1(line).hexdigest()
# Deserialize data, populating optional fields
data = {"config_flags": None,
"runtime_flags": None
}
data.update(json.loads(line))
# Put the data
m = model.CodecMetric(key_name=key,
commit=data["commit"],
config_flags=data["config_flags"],
runtime_flags=data["runtime_flags"],
config_name=data["config"],
data=data["data"])
m.put()
# Build indexes
metrics = set()
files = set()
for filename, metric_entries in data["data"].iteritems():
for metric_entry in metric_entries:
this_metrics = set(metric_entry.keys())
if this_metrics != metrics:
self.put_metric_index(m, metrics, files)
self.update_drilldown(m, metrics, files)
metrics = this_metrics
files = set()
files.add(filename)
self.put_metric_index(m, metrics, files)
self.update_drilldown(m, metrics, files)
drilldown.save()
def pretty_json(x):
return json.dumps(x, indent=2, sort_keys=True)
@cache_result()
def fetch_codec_metric(metric, config, filename, commit):
'''This function fetches the data for a given metric, config, filename,
commit tuple. This functionality is used multiple places, such as
CodecMetricHandler and AverageImprovementHandler.'''
indexes = model.CodecMetricIndex.all(keys_only = True)
indexes = indexes.filter('metrics =', metric)
indexes = indexes.filter('config_name =', config)
indexes = indexes.filter('files =', filename)
indexes = indexes.filter('commit =', commit)
keys = [k.parent() for k in indexes]
if len(keys) == 0:
return None
metric_data = model.metrics()[metric]
result=[]
for cm in db.get(keys):
for run in cm.data[filename]:
this_run_data = []
if metric_data.distortion:
this_run_data.append(run["Bitrate"])
this_run_data.append(run["target_bitrate"])
this_run_data.append(run[metric])
result.append(this_run_data)
# Sanity checks
for r in result[1:]:
assert len(r) == len(result[0])
# Result is a list of lists. Sort by the first element of the nested
# list.
result = sorted(result, key=lambda x:x[0])
return result
@cache_result()
def fetch_metric_for_fileset(metric, config, files, commit):
"""This function is a bulk version of fetch_codec_metric()"""
indexes = model.CodecMetricIndex.all(keys_only = True)
indexes = indexes.filter('metrics =', metric)
indexes = indexes.filter('config_name =', config)
indexes = indexes.filter('commit =', commit)
keys = [k.parent() for k in indexes]
if len(keys) == 0:
return None
metric_data = model.metrics()[metric]
results_by_file = {}
for cm in db.get(keys):
for filename, runs in cm.data.iteritems():
if filename not in files:
continue
result = results_by_file.get(filename, [])
for run in runs:
this_run_data = []
if metric_data.distortion:
this_run_data.append(run["Bitrate"])
this_run_data.append(run[metric])
result.append(this_run_data)
results_by_file[filename] = result
# Sanity checks
for filename, result in results_by_file.iteritems():
for r in result[1:]:
assert len(r) == len(result[0])
# Result is a list of lists. Sort by the first element of the nested
# list.
results_by_file[filename] = sorted(result, key=lambda x:x[0])
return results_by_file
def fetch_time_series(metric, config, files, commit):
branch = commit[1:]
q = model.CodecMetricTimeSeries.all()
q = q.filter('metric =', metric)
q = q.filter('config_name =', config)
q = q.filter('branch =', branch)
result = {}
for data in q:
if data.file_or_set_name in files:
result[data.file_or_set_name] = zip(
[(x.year, x.month - 1, x.day, x.hour, x.minute, x.second)
for x in data.times],
data.commits,
[(x-1.0)*100.0 for x in data.values])
return result
class CodecMetricHandler(webapp.RequestHandler):
def get(self, metric, config, filename, commit):
"""Fetches the requested metric data as JSON"""
if not metric or not config or not filename or not commit:
self.error(404)
return
filename = urllib.unquote(filename)
commit = urllib.unquote(commit)
if commit[0] == "~":
result = {'yaxis': "Percent Improvement",
'data': fetch_time_series(metric, config, filename,
commit)[filename],
}
else:
result = {'yaxis': model.metrics()[metric].yaxis,
'data': fetch_codec_metric(metric, config, filename,
commit),
}
# Return the result
if result['data']:
self.response.headers['Content-Type'] = 'application/json'
self.response.out.write(pretty_json(result))
else:
self.error(404)
@cache_result()
def find_baseline(metric, config, filename, commits):
def find_first_parent(commit, data, candidates):
while True:
parents = data[commit].parents
if not parents:
# root node
return None
commit = parents[0]
if commit in candidates:
return commit
# Removes some errors when no commits are selected
if len(commits) == 0:
return None
candidates = drilldown.query(metric, config, filename, commits)[3]
commit_data = model.commits()
commits = util.field_list(commits)
parentage = {}
for commit in commits:
parentage[commit] = []
root_nodes_seen = 0
while root_nodes_seen < len(commits):
for commit1 in commits:
parents = parentage[commit1]
if parents:
this_commit = parents[-1]
else:
this_commit = commit1
# already hit the root for this commit?
if this_commit is None:
continue
parent = find_first_parent(this_commit, commit_data, candidates)
parents.append(parent)
if parent is None:
root_nodes_seen += 1
continue
n = 0
for commit2 in commits:
if parent in parentage[commit2]:
n += 1
if n == len(commits):
# parent is found in all lineages
return parent
return None
def rd_improvement(base_data, data):
return curve_compare.DataBetter(base_data, data) * 100
def mean_improvement(base_data, data):
def sum0(vals):
return sum([x[0] for x in vals])
base_mean = sum0(base_data) / len(base_data)
mean = sum0(data) / len(data)
return (mean / base_mean - 1) * 100
@cache_result()
def calculate_improvement(m, cfg, fs, cm, base_data, composite_fn):
'''Calculates the average improvement given the set up and the parent
commit, caching the result'''
data = fetch_metric_for_fileset(m, cfg, fs, cm)
result = {}
sum_overall = 0
count_overall = 0
for f in fs:
if f not in base_data or f not in data:
continue
composite = composite_fn(base_data[f], data[f])
sum_overall += composite
count_overall += 1
result[f] = composite
if result:
return sum_overall / count_overall, result
return None, result
class AverageImprovementHandler(webapp.RequestHandler):
@cache_result()
def get_adhoc_improvement(self, metrics, configs, filenames, commits):
"""Calculates the requested composite metrics and outputs as JSON"""
# Find the baseline based on the raw URL variables
parent = find_baseline(metrics, configs, filenames, commits)
# We format the end of the table with extra info
if parent:
parent_str = parent[:9]
else:
parent_str = "None found"
result = []
metrics = util.field_list(metrics)
configs = util.field_list(configs)
filenames = util.filename_list(filenames)
commits = util.field_list(commits)
# Fix for the case that a commit in commits has no parent
# In this case we choose the oldest commit as the parent, ie the one
# without a parent.
if not parent:
parent = commits[-1]
metrics_cache = model.metrics()
for m in metrics:
if metrics_cache[m].distortion:
improvement = rd_improvement
else:
improvement = mean_improvement
for cfg in configs:
baseline_data = fetch_metric_for_fileset(m, cfg, filenames,
parent)
for cm in commits:
col = [] # Each m, cfg, cm combination will be a column in
# the table
average, results = calculate_improvement(
m, cfg, filenames, cm, baseline_data, improvement)
for f, composite in results.iteritems():
col.append([f, composite])
# Build the column name
col_name = []
if len(metrics) > 1:
col_name.append(m)
if len(configs) > 1:
col_name.append(cfg)
if len(col_name) == 0 or len(commits) > 1:
col_name.append(cm[:9])
col_name = "/".join(col_name)
col.append(['OVERALL: (' + parent_str + ')', average])
result.append({'col': col_name,
'data': col})
# return the results
result = {'baseline': parent,
'data': result,
'commits': ','.join(commits)
}
self.response.headers['Content-Type'] = 'application/json'
self.response.out.write(pretty_json(result))
def get_time_series(self, metrics, configs, filenames, commits):
metrics = util.field_list(metrics)
configs = util.field_list(configs)
filesets = util.field_list(filenames)
branches = util.field_list(commits)
result = []
for m in metrics:
for c in configs:
for f in filesets:
for b in branches:
# Get all the data for all files in the set
files_and_set = util.filename_list(f)
files_and_set.append(f)
data = fetch_time_series(m, c, files_and_set, b)
# Remove unnecessary commit info
formatted_data = {}
for key in data:
data_list = data[key]
formatted_data_list = []
for e in data_list:
formatted_data_list.append([e[0], e[2]])
formatted_data[key] = formatted_data_list
data = formatted_data
# Build the column name
col_name = []
if len(metrics) > 1:
col_name.append(m)
if len(configs) > 1:
col_name.append(c)
if len(filesets) > 1:
col_name.append(f)
if len(col_name) == 0 or len(branches) > 1:
col_name.append(b[1:])
col_name = "/".join(col_name)
# Build the rows for this column
col = []
for filename, filedata in data.iteritems():
improvement = filedata[-1][1]
col.append([filename, improvement])
result.append({'col': col_name,
'data': col})
# return the results
result = {'data': result,
'commits': ','.join(branches)
}
self.response.headers['Content-Type'] = 'application/json'
self.response.out.write(pretty_json(result))
def get(self, metrics, configs, filenames, commits):
if urllib.unquote(commits)[0] == "~":
self.get_time_series(metrics, configs, filenames, commits)
else:
self.get_adhoc_improvement(metrics, configs, filenames, commits)
class MainHandler(webapp.RequestHandler):
def get(self):
devel = util.development()
values = {
"user": users.get_current_user(),
"login_url": users.create_login_url("/"),
"logout_url": users.create_logout_url("/")
}
if devel:
values["development"] = True
self.response.out.write(template.render("home.html", values))
class ExploreHandler(webapp.RequestHandler):
def get(self):
devel = util.development()
values = {
"user": users.get_current_user(),
"login_url": users.create_login_url("/"),
"logout_url": users.create_logout_url("/")
}
if devel:
values["development"] = True
self.response.out.write(template.render("index.html", values))
class SharedMainHandler(webapp.RequestHandler):
'''This Handler provides a way of linking to specific dashboard views for
sharing with others.'''
def get(self, metrics, configs, filesets, commits, filenames, opentrees):
# Note that we also must keep track of which trees are open
devel = util.development()
values = {
"metrics": urllib.unquote(metrics),
"configs": urllib.unquote(configs),
"filesets": urllib.unquote(filesets),
"commits": urllib.unquote(commits),
"filenames": urllib.unquote(filenames),
"opentrees": urllib.unquote(opentrees),
"user": users.get_current_user(),
"login_url": users.create_login_url("/"),
"logout_url": users.create_logout_url("/")
}
if devel:
values["development"] = True
self.response.out.write(template.render("index.html", values))
class ChartHandler(webapp.RequestHandler):
def get(self):
self.response.out.write(template.render("graph.html", {}))
@cache_result()
def build_history(commit, visited=set()):
to_visit = [commit]
history = []
commit_cache = model.commits()
while(to_visit):
commit = to_visit.pop(0)
if commit not in visited:
visited.add(commit)
history.insert(0, commit)
commit = commit_cache[commit]
to_visit.extend(commit.parents)
return history
@cache_result()
def initial_visited(c1):
visited=set()
commit_cache = model.commits()
while c1:
c1 = commit_cache[c1]
visited.update(c1.parents)
if c1.parents:
c1 = c1.parents[0]
else:
break
return visited
class CommitInfoHandler(webapp.RequestHandler):
'''This hander is used to get all the information regarding a set of
commits and their baseline commit.'''
def get(self, commits, baseline):
def gerrit_link(m):
return GERRIT_LINK_HTML%(m.group(0), m.group(0))
if baseline == '':
# we will handle this case specially
baseline = False
commits = util.field_list(commits)
# Look up the commit data for these commits
selected_commits = {}
commit_cache = model.commits()
for commit in commits:
if commit not in selected_commits:
selected_commits[commit] = commit_cache[commit]
# Sort in topological order
commits = sorted(selected_commits.keys(),
key=lambda x: selected_commits[x].depth, reverse=True)
visited = initial_visited(commits[-1])
history = build_history(commits[0], visited)
history.reverse()
formatted = []
for commit in history:
commit_data = commit_cache[commit]
message = commit_data.message.split("\n")
nonempty_lines = sum(map(bool, message))
data = {'commit': commit_data.key().name()[:9],
'author': commit_data.author,
'subject': message[0],
'body': message[1:],
'date': commit_data.author_time}
formatted.append(data)
# We also get the baseline
if baseline:
commit_data = commit_cache[baseline]
message = commit_data.message.split("\n")
nonempty_lines = sum(map(bool, message))
baseline = {'commit': commit_data.key().name()[:9],
'author': commit_data.author,
'subject': message[0],
'body': message[1:],
'date': commit_data.author_time}
html = template.render("commitinfo.html", {"commits": formatted, 'baseline':baseline})
html = re.sub(GERRIT_LINK_PATTERN, gerrit_link, html)
self.response.out.write(html)
@cache_result()
def fetch_config_info(metric, config, filename, commit):
'''This function fetches the data for a given metric, config, filename,
commit tuple. This functionality is used multiple places, such as
CodecMetricHandler and AverageImprovementHandler.'''
indexes = model.CodecMetricIndex.all(keys_only = True)
indexes = indexes.filter('metrics =', metric)
indexes = indexes.filter('config_name =', config)
indexes = indexes.filter('files =', filename)
indexes = indexes.filter('commit =', commit)
keys = [k.parent() for k in indexes]
if len(keys) == 0:
return None
metric_data = model.metrics()[metric]
result=[]
for cm in db.get(keys): # cm = codec metric
# we get the runtime and config flags
config_flags = cm.config_flags
runtime_flags = cm.runtime_flags
commit = cm.commit
result.append((commit, config_flags, runtime_flags))
# Sanity checks - we only want one runtime configuration
assert len(result) == 1
# We go ahead and return the tuple
result = result[0]
return result
class ConfigInfoHandler(webapp.RequestHandler):
'''This hander is used to get all the information regarding the config
required to reproduce a data point'''
def get(self, metric, config, filename, commit, bitrate):
config_info = fetch_config_info(metric, config, filename, commit)
commit, config_flags, runtime_flags = config_info
if bitrate != '':
bitrate = float(bitrate)
# Now we replace the string ${target_bitrate} in runtime_flags
i = runtime_flags.find('${target_bitrate}')
runtime_flags = runtime_flags[:i] + str(bitrate)
# We see if this commit is in gerrit
commit_data = model.commits()[commit]
if commit_data.gerrit_url is not None:
commit_url = commit_data.gerrit_url
commit_ref = commit_data.gerrit_patchset_ref
commit_in_gerrit = True
commit = {'commitid': commit,
'commit_in_gerrit': True,
'commit_url': commit_data.gerrit_url,
'commit_ref': commit_data.gerrit_patchset_ref}
else:
commit = {'commitid': commit,
'commit_in_gerrit': False}
response = {'commit': commit,
'config_flags': config_flags,
'runtime_flags': runtime_flags}
html = template.render("configinfo.html", response)
self.response.out.write(html)
class HistoryHandler(webapp.RequestHandler):
def get(self, commits):
def gerrit_link(m):
return GERRIT_LINK_HTML%(m.group(0), m.group(0))
def commit_group(commits, rollup):
return {'commits': commits, 'count': len(commits),
'rollup': rollup, 'id': commits[0]['commit']}
commits = util.field_list(commits)
# Don't print history for the whole branch
for commit in commits:
if commit[0] == '~':
return
# Find the oldest commit
visited = set(commits[:1])
for commit in commits:
if commit in visited:
visited = initial_visited(commit)
history = [build_history(c, set(visited)) for c in commits]
#self.response.out.write("\n".join(map(str, history)))
history = sorted(history, key=lambda x:len(x))
collapsed_history = history[0]
collapsed_history_set = set(collapsed_history)
for h in history[1:]:
for c in h:
if c not in collapsed_history_set:
collapsed_history_set.add(c)
collapsed_history.append(c)
formatted = []
rollup = []
commit_cache = model.commits()
for commit in collapsed_history:
commit_data = commit_cache[commit]
message = commit_data.message.split("\n")
nonempty_lines = sum(map(bool, message))
data = {'commit': commit_data.key().name()[:9],
'author': commit_data.author,
'subject': message[0],
'body': message[1:],
'selected': False,
'expandable': nonempty_lines > 1}
if commit in commits:
if rollup:
formatted.append(commit_group(rollup, rollup=True))
rollup = []
data['selected'] = True
formatted.append(commit_group([data], rollup=False))
else:
rollup.append(data)
html = template.render("history.html", {"commit_groups": formatted})
html = re.sub(GERRIT_LINK_PATTERN, gerrit_link, html)
self.response.out.write(html)
def main():
application = webapp.WSGIApplication([
('/', MainHandler),
('/explore', ExploreHandler),
('/import-metrics', ImportMetricHandler),
('/import-filesets', ImportFileSetHandler),
('/import-codec-metrics', ImportCodecMetricHandler),
(r'/history/(.*)', HistoryHandler),
(r'/commit-info/(.*)/(.*)', CommitInfoHandler),
(r'/config-info/(.*)/(.*)/(.*)/(.*)/(.*)', ConfigInfoHandler),
(r'/metric-data/(.*)/(.*)/(.*)/(.*)', CodecMetricHandler),
(r'/average-improvement/(.*)/(.*)/(.*)/(.*)', AverageImprovementHandler),
('/explore/(.*)/(.*)/(.*)/(.*)/(.*)/(.*)', SharedMainHandler),
('/graph', ChartHandler)
], debug=True)
webapp_util.run_wsgi_app(application)
if __name__ == '__main__':
main()