blob: 6f8cc92a8f42e99fbb5f610622282c3014d013c1 [file] [log] [blame]
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import datetime
import json
import math
import subprocess
import time
import common
from common import TestDriver
from common import IntegrationTest
from common import NotAndroid
# The maximum number of data points that will be saved.
MAX_DATA_POINTS = 365
# The persistant storage for compression data is kept in Google Storage with
# this bucket name.
BUCKET = 'chrome_proxy_compression'
# The data file name in the Google Storage bucket, above. The data file is also
# saved locally under the same name.
DATA_FILE = 'compression_data.json'
class CompressionRegression(IntegrationTest):
"""This class is responsible for alerting the Chrome Proxy team to regression
in the compression metrics of the proxy. At present, this class will simply
gather data and save it to a Google Storage bucket. Once enough data has been
gathered to form a reasonable model, alerting will be added to check for
regression.
Before running the test, this class will fetch the JSON data file from Google
Storage in a subprocess and store it on the local disk with the same file
name. The data is then read from that file. After running the test, if the
data has changed the file will be uploaded back to Google Storage.
The JSON data object and data dict object used widely in this class has the
following structure:
{
"2017-02-29": {
"html": 0.314,
"jpg": 0.1337,
"png": 0.1234,
"mp4": 0.9876
}
}
where keys are date stamps in the form "YYYY-MM-DD", and each key in the child
object is the resource type with its compression value.
Also frequently referenced is the compression_average dict object, which
contains the compression data just now gathered from Chrome in
getCurrentCompressionMetrics(). That object has the following structure:
{
"test/html": 0.314,
"image/jpg": 0.1337,
"image/png": 0.1234,
"video/mp4": 0.9876
}
where keys are the content type with its compression value.
Due to the complexity of several methods in this class, a number of local
unit tests can be found at the bottom of this file.
Please note that while this test uses the IntegrationTest framework, it is
classified as a regression test.
"""
@NotAndroid
def testCompression(self):
"""This function is the main test function for regression compression
checking and facilitates the test with all of the helper functions'
behavior.
"""
compression_average = self.getCurrentCompressionMetricsWithRetry()
self.fetchFromGoogleStorage()
data = {}
with open(DATA_FILE, 'r') as data_fp:
data = json.load(data_fp)
if self.updateDataObject(compression_average, data):
with open(DATA_FILE, 'w') as data_fp:
json.dump(data, data_fp)
self.uploadToGoogleStorage()
def getCurrentCompressionMetricsWithRetry(self, max_attempts=10):
"""This function allows some number of attempts to be tried to fetch
compressed responses. Sometimes, the proxy will not have compressed results
available immediately, especially for video resources.
Args:
max_attempts: the maximum number of attempts to try to fetch compressed
resources.
Returns:
a dict object mapping resource type to compression
"""
attempts = 0
while attempts < max_attempts:
try:
return self.getCurrentCompressionMetrics()
except Exception as e:
attempts += 1
time.sleep(2)
if attempts >= max_attempts:
raise Exception("Didn't get good response after %d attempts" % attempts)
def getCurrentCompressionMetrics(self):
"""This function uses the ChromeDriver framework to open Chrome and navigate
to a number of static resources of different types, like jpg, png, mp4, gif,
html. Multiple resources of a single type are supported. This function will
check that each resource was fetched via the Chrome Proxy, and then compute
the compression as a percentage from the Content-Length and
X-Original-Content-Length headers where compression = 1 - (cl / xocl). The
function will then return the average compression for each of the resource
types.
Returns:
a dict object mapping resource type to compression
"""
def AddToCompression(compression, key, value):
if key in compression:
compression[key].append(value)
else:
compression[key] = [value]
with TestDriver() as t:
t.AddChromeArg('--enable-spdy-proxy-auth')
t.AddChromeArg('--data-reduction-proxy-server-experiments-disabled')
t.LoadURL('http://check.googlezip.net/metrics/local.gif')
t.LoadURL('http://check.googlezip.net/metrics/local.png')
t.LoadURL('http://check.googlezip.net/metrics/local.jpg')
t.LoadURL(
'http://check.googlezip.net/cacheable/video/buck_bunny_tiny.html')
compression = {}
for response in t.GetHTTPResponses():
# Check that the response was proxied.
self.assertHasChromeProxyViaHeader(response)
# Compute compression metrics.
cl = response.response_headers['content-length']
ocl = response.response_headers['x-original-content-length']
content_type = response.response_headers['content-type']
compression_rate = 1.0 - (float(cl) / float(ocl))
if 'html' in response.response_headers['content-type']:
AddToCompression(compression, 'html', compression_rate)
else:
resource = response.url[response.url.rfind('/'):]
AddToCompression(compression, resource[resource.rfind('.') + 1:],
compression_rate)
# Compute the average compression for each resource type.
compression_average = {}
for resource_type in compression:
compression_average[resource_type] = (sum(compression[resource_type]) /
float(len(compression[resource_type])))
return compression_average
def updateDataObject(self, compression_average, data,
today=datetime.date.today()):
"""This function handles the updating of the data object when new data is
available. Given the existing data object, the results of the
getCurrentCompressionMetrics() func, and a date object, it will check if
data exists for today. If it does, the method will do nothing and return
False. Otherwise, it will update the data object with the compression data.
If needed, it will also find the least recent entry in the data object and
remove it.
Args:
compression_average: the compression data from
getCurrentCompressionMetrics()
data: the current data object, a dict
today: a date object, specifiable here for testing purposes.
Returns:
True iff the data object was changed
"""
datestamp = today.strftime('%Y-%m-%d')
# Check if this data has already been recorded.
if datestamp in data:
return False
# Append new data, removing the least recent if needed.
data[datestamp] = compression_average
if len(data) > MAX_DATA_POINTS:
min_date = None
for date_str in data:
date = datetime.date(*[int(d) for d in date_str.split('-')])
if min_date == None or date < min_date:
min_date = date
del data[min_date.strftime('%Y-%m-%d')]
return True
def uploadToGoogleStorage(self):
"""This function uses the gsutil command to upload the local data file to
Google Storage.
"""
gs_location = 'gs://%s/%s' % (BUCKET, DATA_FILE)
cmd = ['gsutil', 'cp', DATA_FILE, gs_location]
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
if proc.returncode:
raise Exception('Uploading to Google Storage failed! output: %s %s' %
(stdout, stderr))
def fetchFromGoogleStorage(self):
"""This function uses the gsutil command to fetch the local data file from
Google Storage.
"""
gs_location = 'gs://%s/%s' % (BUCKET, DATA_FILE)
cmd = ['gsutil', 'cp', gs_location, DATA_FILE]
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
if proc.returncode:
raise Exception('Fetching to Google Storage failed! output: %s %s' %
(stdout, stderr))
def test0UpdateDataObject_NoUpdate(self):
"""This unit test asserts that the updateDataObject() function doesn't
update the data object when today is already contained in the data object.
"""
data = { '2017-02-06': {'hello': 'world'}}
new_data = {'Benoit': 'Mandelbrot'}
test_day = datetime.date(2017, 02, 06)
changed = self.updateDataObject(new_data, data, today=test_day)
self.assertFalse(changed, "No data should have been recorded!")
def test0UpdateDataObject_Update(self):
"""This unit test asserts that the updateDataObject() function updates the
data object when there is new data available, also removing the least recent
data point.
"""
start_date = datetime.date(2017, 2, 6)
data = {}
for i in range(MAX_DATA_POINTS):
date_obj = start_date + datetime.timedelta(days=i)
datestamp = date_obj.strftime('%Y-%m-%d')
data[datestamp] = {'hello': 'world'}
new_data = {'Benoit': 'Mandelbrot'}
test_day = datetime.date(2017, 02, 06) + datetime.timedelta(
days=(MAX_DATA_POINTS))
changed = self.updateDataObject(new_data, data, today=test_day)
self.assertTrue(changed, "Data should have been recorded!")
self.assertNotIn('2017-02-06', data)
self.assertIn(test_day.strftime('%Y-%m-%d'), data)
if __name__ == '__main__':
IntegrationTest.RunAllTests()