Nuke tools/android/loading
In attempt to avoid breaking customtabs_benchmark also move
chrome_setup.py to tools/android/customtabs_benchmark/scripts.
Reason: currently unused, does not comply with presubmit checks, no
plans to develop it further.
Change-Id: I01486f7fcbd663281e43c6db95bd621969a5f7d2
Reviewed-on: https://chromium-review.googlesource.com/c/1366275
Reviewed-by: Matthew Cary <mattcary@chromium.org>
Reviewed-by: Benoit L <lizeb@chromium.org>
Commit-Queue: Egor Pasko <pasko@chromium.org>
Cr-Original-Commit-Position: refs/heads/master@{#614691}
Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src
Cr-Mirrored-Commit: 17f636f29fef89d40edce0727806d72eaf4baba8
diff --git a/loading/chrome_setup.py b/customtabs_benchmark/scripts/chrome_setup.py
similarity index 100%
rename from loading/chrome_setup.py
rename to customtabs_benchmark/scripts/chrome_setup.py
diff --git a/customtabs_benchmark/scripts/customtabs_benchmark.py b/customtabs_benchmark/scripts/customtabs_benchmark.py
index a907ce7..b5a3afe 100755
--- a/customtabs_benchmark/scripts/customtabs_benchmark.py
+++ b/customtabs_benchmark/scripts/customtabs_benchmark.py
@@ -29,7 +29,6 @@
sys.path.append(os.path.join(_SRC_PATH, 'build', 'android'))
import devil_chromium
-sys.path.append(os.path.join(_SRC_PATH, 'tools', 'android', 'loading'))
import chrome_setup
diff --git a/loading/OWNERS b/loading/OWNERS
deleted file mode 100644
index 82a4803..0000000
--- a/loading/OWNERS
+++ /dev/null
@@ -1,4 +0,0 @@
-droger@chromium.org
-lizeb@chromium.org
-mattcary@chromium.org
-pasko@chromium.org
diff --git a/loading/PRESUBMIT.py b/loading/PRESUBMIT.py
deleted file mode 100644
index cd740ef..0000000
--- a/loading/PRESUBMIT.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Top-level presubmit script for loading.
-
-See http://dev.chromium.org/developers/how-tos/depottools/presubmit-scripts
-for more details on the presubmit API built into depot_tools.
-"""
-
-
-def CommonChecks(input_api, output_api):
- output = []
- blacklist = [r'cloud/frontend/lib/*']
- output.extend(input_api.canned_checks.RunPylint(
- input_api, output_api, black_list=blacklist))
- output.extend(input_api.canned_checks.RunUnitTests(
- input_api,
- output_api,
- [input_api.os_path.join(input_api.PresubmitLocalPath(), 'run_tests')]))
-
- if input_api.is_committing:
- output.extend(input_api.canned_checks.PanProjectChecks(input_api,
- output_api,
- owners_check=False))
- return output
-
-
-def CheckChangeOnUpload(input_api, output_api):
- return CommonChecks(input_api, output_api)
-
-
-def CheckChangeOnCommit(input_api, output_api):
- return CommonChecks(input_api, output_api)
diff --git a/loading/activity_lens.py b/loading/activity_lens.py
deleted file mode 100644
index 3e3a98e..0000000
--- a/loading/activity_lens.py
+++ /dev/null
@@ -1,303 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Gives a picture of the CPU activity between timestamps.
-
-When executed as a script, takes a loading trace, and prints the activity
-breakdown for the request dependencies.
-"""
-
-import collections
-import logging
-import operator
-
-import request_track
-
-
-class ActivityLens(object):
- """Reconstructs the activity of the main renderer thread between requests."""
- _SCRIPT_EVENT_NAMES = ('EvaluateScript', 'FunctionCall')
- _PARSING_EVENT_NAMES = ('ParseHTML', 'ParseAuthorStyleSheet')
-
- def __init__(self, trace):
- """Initializes an instance of ActivityLens.
-
- Args:
- trace: (LoadingTrace) loading trace.
- """
- self._trace = trace
- events = trace.tracing_track.GetEvents()
- self._renderer_main_pid_tid = self._GetRendererMainThreadId(events)
- self._tracing = self._trace.tracing_track.Filter(
- *self._renderer_main_pid_tid)
-
- @classmethod
- def _GetRendererMainThreadId(cls, events):
- """Returns the most active main renderer thread.
-
- Several renderers may be running concurrently, but we assume that only one
- of them is busy during the time covered by the loading trace.. It can be
- selected by looking at the number of trace events generated.
-
- Args:
- events: [tracing.Event] List of trace events.
-
- Returns:
- (PID (int), TID (int)) of the busiest renderer main thread.
- """
- events_count_per_pid_tid = collections.defaultdict(int)
- main_renderer_thread_ids = set()
- for event in events:
- tracing_event = event.tracing_event
- pid = event.tracing_event['pid']
- tid = event.tracing_event['tid']
- events_count_per_pid_tid[(pid, tid)] += 1
- if (tracing_event['cat'] == '__metadata'
- and tracing_event['name'] == 'thread_name'
- and event.args['name'] == 'CrRendererMain'):
- main_renderer_thread_ids.add((pid, tid))
- events_count_per_pid_tid = {
- pid_tid: count for (pid_tid, count) in events_count_per_pid_tid.items()
- if pid_tid in main_renderer_thread_ids}
- pid_tid_events_counts = sorted(events_count_per_pid_tid.items(),
- key=operator.itemgetter(1), reverse=True)
- if (len(pid_tid_events_counts) > 1
- and pid_tid_events_counts[0][1] < 2 * pid_tid_events_counts[1][1]):
- logging.warning(
- 'Several active renderers (%d and %d with %d and %d events).'
- % (pid_tid_events_counts[0][0][0], pid_tid_events_counts[1][0][0],
- pid_tid_events_counts[0][1], pid_tid_events_counts[1][1]))
- return pid_tid_events_counts[0][0]
-
- def _OverlappingMainRendererThreadEvents(self, start_msec, end_msec):
- return self._tracing.OverlappingEvents(start_msec, end_msec)
-
- @classmethod
- def _ClampedDuration(cls, event, start_msec, end_msec):
- return max(0, (min(end_msec, event.end_msec)
- - max(start_msec, event.start_msec)))
-
- @classmethod
- def _ThreadBusyness(cls, events, start_msec, end_msec):
- """Amount of time a thread spent executing from the message loop."""
- busy_duration = 0
- message_loop_events = [
- e for e in events
- if (e.tracing_event['cat'] == 'toplevel'
- and e.tracing_event['name'] == 'MessageLoop::RunTask')]
- for event in message_loop_events:
- clamped_duration = cls._ClampedDuration(event, start_msec, end_msec)
- busy_duration += clamped_duration
- interval_msec = end_msec - start_msec
- assert busy_duration <= interval_msec
- return busy_duration
-
- @classmethod
- def _ScriptsExecuting(cls, events, start_msec, end_msec):
- """Returns the time during which scripts executed within an interval.
-
- Args:
- events: ([tracing.Event]) list of tracing events.
- start_msec: (float) start time in ms, inclusive.
- end_msec: (float) end time in ms, inclusive.
-
- Returns:
- A dict {URL (str) -> duration_msec (float)}. The dict may have a None key
- for scripts that aren't associated with a URL.
- """
- script_to_duration = collections.defaultdict(float)
- script_events = [e for e in events
- if ('devtools.timeline' in e.tracing_event['cat']
- and (e.tracing_event['name']
- in cls._SCRIPT_EVENT_NAMES))]
- for event in script_events:
- clamped_duration = cls._ClampedDuration(event, start_msec, end_msec)
- script_url = event.args['data'].get('scriptName', None)
- script_to_duration[script_url] += clamped_duration
- return dict(script_to_duration)
-
- @classmethod
- def _FullyIncludedEvents(cls, events, event):
- """Return a list of events wholly included in the |event| span."""
- (start, end) = (event.start_msec, event.end_msec)
- result = []
- for event in events:
- if start <= event.start_msec < end and start <= event.end_msec < end:
- result.append(event)
- return result
-
- @classmethod
- def _Parsing(cls, events, start_msec, end_msec):
- """Returns the HTML/CSS parsing time within an interval.
-
- Args:
- events: ([tracing.Event]) list of events.
- start_msec: (float) start time in ms, inclusive.
- end_msec: (float) end time in ms, inclusive.
-
- Returns:
- A dict {URL (str) -> duration_msec (float)}. The dict may have a None key
- for tasks that aren't associated with a URL.
- """
- url_to_duration = collections.defaultdict(float)
- parsing_events = [e for e in events
- if ('devtools.timeline' in e.tracing_event['cat']
- and (e.tracing_event['name']
- in cls._PARSING_EVENT_NAMES))]
- for event in parsing_events:
- # Parsing events can contain nested script execution events, avoid
- # double-counting by discounting these.
- nested_events = cls._FullyIncludedEvents(events, event)
- events_tree = _EventsTree(event, nested_events)
- js_events = events_tree.DominatingEventsWithNames(cls._SCRIPT_EVENT_NAMES)
- duration_to_subtract = sum(
- cls._ClampedDuration(e, start_msec, end_msec) for e in js_events)
- tracing_event = event.tracing_event
- clamped_duration = cls._ClampedDuration(event, start_msec, end_msec)
- if tracing_event['name'] == 'ParseAuthorStyleSheet':
- url = tracing_event['args']['data']['styleSheetUrl']
- else:
- url = tracing_event['args']['beginData']['url']
- parsing_duration = clamped_duration - duration_to_subtract
- assert parsing_duration >= 0
- url_to_duration[url] += parsing_duration
- return dict(url_to_duration)
-
- def GenerateEdgeActivity(self, dep):
- """For a dependency between two requests, returns the renderer activity
- breakdown.
-
- Args:
- dep: (Request, Request, str) As returned from
- RequestDependencyLens.GetRequestDependencies().
-
- Returns:
- {'edge_cost': (float) ms, 'busy': (float) ms,
- 'parsing': {'url' -> time_ms}, 'script' -> {'url' -> time_ms}}
- """
- (first, second, reason) = dep
- (start_msec, end_msec) = request_track.IntervalBetween(
- first, second, reason)
- assert end_msec - start_msec >= 0.
- events = self._OverlappingMainRendererThreadEvents(start_msec, end_msec)
- result = {'edge_cost': end_msec - start_msec,
- 'busy': self._ThreadBusyness(events, start_msec, end_msec)}
- result.update(self.ComputeActivity(start_msec, end_msec))
- return result
-
- def ComputeActivity(self, start_msec, end_msec):
- """Returns a breakdown of the main renderer thread activity between two
- timestamps.
-
- Args:
- start_msec: (float)
- end_msec: (float)
-
- Returns:
- {'parsing': {'url' -> time_ms}, 'script': {'url' -> time_ms}}.
- """
- assert end_msec - start_msec >= 0.
- events = self._OverlappingMainRendererThreadEvents(start_msec, end_msec)
- result = {'parsing': self._Parsing(events, start_msec, end_msec),
- 'script': self._ScriptsExecuting(events, start_msec, end_msec)}
- return result
-
- def BreakdownEdgeActivityByInitiator(self, dep):
- """For a dependency between two requests, categorizes the renderer activity.
-
- Args:
- dep: (Request, Request, str) As returned from
- RequestDependencyLens.GetRequestDependencies().
-
- Returns:
- {'script': float, 'parsing': float, 'other_url': float,
- 'unknown_url': float, 'unrelated_work': float}
- where the values are durations in ms:
- - idle: The renderer main thread was idle.
- - script: The initiating file was executing.
- - parsing: The initiating file was being parsed.
- - other_url: Other scripts and/or parsing activities.
- - unknown_url: Activity which is not associated with a URL.
- - unrelated_work: Activity unrelated to scripts or parsing.
- """
- activity = self.GenerateEdgeActivity(dep)
- breakdown = {'unrelated_work': activity['busy'],
- 'idle': activity['edge_cost'] - activity['busy'],
- 'script': 0, 'parsing': 0,
- 'other_url': 0, 'unknown_url': 0}
- for kind in ('script', 'parsing'):
- for (script_name, duration_ms) in activity[kind].items():
- if not script_name:
- breakdown['unknown_url'] += duration_ms
- elif script_name == dep[0].url:
- breakdown[kind] += duration_ms
- else:
- breakdown['other_url'] += duration_ms
- breakdown['unrelated_work'] -= sum(
- breakdown[x] for x in ('script', 'parsing', 'other_url', 'unknown_url'))
- return breakdown
-
- def MainRendererThreadBusyness(self, start_msec, end_msec):
- """Returns the amount of time the main renderer thread was busy.
-
- Args:
- start_msec: (float) Start of the interval.
- end_msec: (float) End of the interval.
- """
- events = self._OverlappingMainRendererThreadEvents(start_msec, end_msec)
- return self._ThreadBusyness(events, start_msec, end_msec)
-
-
-class _EventsTree(object):
- """Builds the hierarchy of events from a list of fully nested events."""
- def __init__(self, root_event, events):
- """Creates the tree.
-
- Args:
- root_event: (Event) Event held by the tree root.
- events: ([Event]) List of events that are fully included in |root_event|.
- """
- self.event = root_event
- self.start_msec = root_event.start_msec
- self.end_msec = root_event.end_msec
- self.children = []
- events.sort(key=operator.attrgetter('start_msec'))
- if not events:
- return
- current_child = (events[0], [])
- for event in events[1:]:
- if event.end_msec < current_child[0].end_msec:
- current_child[1].append(event)
- else:
- self.children.append(_EventsTree(current_child[0], current_child[1]))
- current_child = (event, [])
- self.children.append(_EventsTree(current_child[0], current_child[1]))
-
- def DominatingEventsWithNames(self, names):
- """Returns a list of the top-most events in the tree with a matching name.
- """
- if self.event.name in names:
- return [self.event]
- else:
- result = []
- for child in self.children:
- result += child.DominatingEventsWithNames(names)
- return result
-
-
-if __name__ == '__main__':
- import sys
- import json
- import loading_trace
- import request_dependencies_lens
-
- filename = sys.argv[1]
- json_dict = json.load(open(filename))
- loading_trace = loading_trace.LoadingTrace.FromJsonDict(json_dict)
- activity_lens = ActivityLens(loading_trace)
- dependencies_lens = request_dependencies_lens.RequestDependencyLens(
- loading_trace)
- deps = dependencies_lens.GetRequestDependencies()
- for requests_dep in deps:
- print activity_lens.GenerateEdgeActivity(requests_dep)
diff --git a/loading/activity_lens_unittest.py b/loading/activity_lens_unittest.py
deleted file mode 100644
index cfa4689..0000000
--- a/loading/activity_lens_unittest.py
+++ /dev/null
@@ -1,348 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import collections
-import copy
-import unittest
-
-from activity_lens import (ActivityLens, _EventsTree)
-import clovis_constants
-import test_utils
-import tracing_track
-
-
-class ActivityLensTestCase(unittest.TestCase):
- @classmethod
- def _EventsFromRawEvents(cls, raw_events):
- track = tracing_track.TracingTrack(None,
- clovis_constants.DEFAULT_CATEGORIES)
- track.Handle(
- 'Tracing.dataCollected', {'params': {'value': raw_events}})
- return track.GetEvents()
-
- def setUp(self):
- self.track = tracing_track.TracingTrack(None,
- clovis_constants.DEFAULT_CATEGORIES)
-
- def testGetRendererMainThread(self):
- first_renderer_tid = 12345
- second_renderer_tid = 123456
- raw_events = [
- {u'args': {u'name': u'CrBrowserMain'},
- u'cat': u'__metadata',
- u'name': u'thread_name',
- u'ph': u'M',
- u'pid': 1,
- u'tid': 123,
- u'ts': 0},
- {u'args': {u'name': u'CrRendererMain'},
- u'cat': u'__metadata',
- u'name': u'thread_name',
- u'ph': u'M',
- u'pid': 1,
- u'tid': first_renderer_tid,
- u'ts': 0},
- {u'args': {u'name': u'CrRendererMain'},
- u'cat': u'__metadata',
- u'name': u'thread_name',
- u'ph': u'M',
- u'pid': 1,
- u'tid': second_renderer_tid,
- u'ts': 0}]
- raw_events += [
- {u'args': {u'data': {}},
- u'cat': u'devtools.timeline,v8',
- u'name': u'FunctionCall',
- u'ph': u'X',
- u'pid': 1,
- u'tdur': 0,
- u'tid': first_renderer_tid,
- u'ts': 251427174674,
- u'tts': 5107725}] * 100
- raw_events += [
- {u'args': {u'data': {}},
- u'cat': u'devtools.timeline,v8',
- u'name': u'FunctionCall',
- u'ph': u'X',
- u'pid': 1,
- u'tdur': 0,
- u'tid': second_renderer_tid,
- u'ts': 251427174674,
- u'tts': 5107725}] * 150
- # There are more events from first_renderer_tid when (incorrectly) ignoring
- # the PID.
- raw_events += [
- {u'args': {u'data': {}},
- u'cat': u'devtools.timeline,v8',
- u'name': u'FunctionCall',
- u'ph': u'X',
- u'pid': 12,
- u'tdur': 0,
- u'tid': first_renderer_tid,
- u'ts': 251427174674,
- u'tts': 5107725}] * 100
- events = self._EventsFromRawEvents(raw_events)
- self.assertEquals((1, second_renderer_tid),
- ActivityLens._GetRendererMainThreadId(events))
-
- def testThreadBusyness(self):
- raw_events = [
- {u'args': {},
- u'cat': u'toplevel',
- u'dur': 200 * 1000,
- u'name': u'MessageLoop::RunTask',
- u'ph': u'X',
- u'pid': 123,
- u'tid': 123,
- u'ts': 0,
- u'tts': 56485},
- {u'args': {},
- u'cat': u'toplevel',
- u'dur': 8 * 200,
- u'name': u'MessageLoop::NestedSomething',
- u'ph': u'X',
- u'pid': 123,
- u'tid': 123,
- u'ts': 0,
- u'tts': 0}]
- events = self._EventsFromRawEvents(raw_events)
- self.assertEquals(200, ActivityLens._ThreadBusyness(events, 0, 1000))
- # Clamping duration.
- self.assertEquals(100, ActivityLens._ThreadBusyness(events, 0, 100))
- self.assertEquals(50, ActivityLens._ThreadBusyness(events, 25, 75))
-
- def testScriptExecuting(self):
- url = u'http://example.com/script.js'
- raw_events = [
- {u'args': {u'data': {u'scriptName': url}},
- u'cat': u'devtools.timeline,v8',
- u'dur': 250 * 1000,
- u'name': u'FunctionCall',
- u'ph': u'X',
- u'pid': 123,
- u'tdur': 247,
- u'tid': 123,
- u'ts': 0,
- u'tts': 0},
- {u'args': {u'data': {}},
- u'cat': u'devtools.timeline,v8',
- u'dur': 350 * 1000,
- u'name': u'EvaluateScript',
- u'ph': u'X',
- u'pid': 123,
- u'tdur': 247,
- u'tid': 123,
- u'ts': 0,
- u'tts': 0}]
- events = self._EventsFromRawEvents(raw_events)
- self.assertEquals(2, len(ActivityLens._ScriptsExecuting(events, 0, 1000)))
- self.assertTrue(None in ActivityLens._ScriptsExecuting(events, 0, 1000))
- self.assertEquals(
- 350, ActivityLens._ScriptsExecuting(events, 0, 1000)[None])
- self.assertTrue(url in ActivityLens._ScriptsExecuting(events, 0, 1000))
- self.assertEquals(250, ActivityLens._ScriptsExecuting(events, 0, 1000)[url])
- # Aggreagates events.
- raw_events.append({u'args': {u'data': {}},
- u'cat': u'devtools.timeline,v8',
- u'dur': 50 * 1000,
- u'name': u'EvaluateScript',
- u'ph': u'X',
- u'pid': 123,
- u'tdur': 247,
- u'tid': 123,
- u'ts': 0,
- u'tts': 0})
- events = self._EventsFromRawEvents(raw_events)
- self.assertEquals(
- 350 + 50, ActivityLens._ScriptsExecuting(events, 0, 1000)[None])
-
- def testParsing(self):
- css_url = u'http://example.com/style.css'
- html_url = u'http://example.com/yeah.htnl'
- raw_events = [
- {u'args': {u'data': {u'styleSheetUrl': css_url}},
- u'cat': u'blink,devtools.timeline',
- u'dur': 400 * 1000,
- u'name': u'ParseAuthorStyleSheet',
- u'ph': u'X',
- u'pid': 32723,
- u'tdur': 49721,
- u'tid': 32738,
- u'ts': 0,
- u'tts': 216148},
- {u'args': {u'beginData': {u'url': html_url}},
- u'cat': u'devtools.timeline',
- u'dur': 42 * 1000,
- u'name': u'ParseHTML',
- u'ph': u'X',
- u'pid': 32723,
- u'tdur': 49721,
- u'tid': 32738,
- u'ts': 0,
- u'tts': 5032310},]
- events = self._EventsFromRawEvents(raw_events)
- self.assertEquals(2, len(ActivityLens._Parsing(events, 0, 1000)))
- self.assertTrue(css_url in ActivityLens._Parsing(events, 0, 1000))
- self.assertEquals(400, ActivityLens._Parsing(events, 0, 1000)[css_url])
- self.assertTrue(html_url in ActivityLens._Parsing(events, 0, 1000))
- self.assertEquals(42, ActivityLens._Parsing(events, 0, 1000)[html_url])
-
- def testBreakdownEdgeActivityByInitiator(self):
- requests = [test_utils.MakeRequest(0, 1, 10, 20, 30),
- test_utils.MakeRequest(0, 1, 50, 60, 70)]
- raw_events = [
- {u'args': {u'beginData': {u'url': requests[0].url}},
- u'cat': u'devtools.timeline',
- u'dur': 12 * 1000,
- u'name': u'ParseHTML',
- u'ph': u'X',
- u'pid': 1,
- u'tid': 1,
- u'ts': 25 * 1000},
- {u'args': {u'data': {'scriptName': requests[0].url}},
- u'cat': u'devtools.timeline,v8',
- u'dur': 0,
- u'name': u'EvaluateScript',
- u'ph': u'X',
- u'pid': 1,
- u'tid': 1,
- u'ts': 0},
- {u'cat': u'toplevel',
- u'dur': 100 * 1000,
- u'name': u'MessageLoop::RunTask',
- u'ph': u'X',
- u'pid': 1,
- u'tid': 1,
- u'ts': 0},
- {u'args': {u'name': u'CrRendererMain'},
- u'cat': u'__metadata',
- u'name': u'thread_name',
- u'ph': u'M',
- u'pid': 1,
- u'tid': 1,
- u'ts': 0}]
- activity = self._ActivityLens(requests, raw_events)
- dep = (requests[0], requests[1], 'parser')
- self.assertEquals(
- {'unrelated_work': 18, 'idle': 0, 'script': 0, 'parsing': 12,
- 'other_url': 0, 'unknown_url': 0},
- activity.BreakdownEdgeActivityByInitiator(dep))
- dep = (requests[0], requests[1], 'other')
- # Truncating the event from the parent request end.
- self.assertEquals(
- {'unrelated_work': 13, 'idle': 0, 'script': 0, 'parsing': 7,
- 'other_url': 0, 'unknown_url': 0},
- activity.BreakdownEdgeActivityByInitiator(dep))
- # Unknown URL
- raw_events[0]['args']['beginData']['url'] = None
- activity = self._ActivityLens(requests, raw_events)
- dep = (requests[0], requests[1], 'parser')
- self.assertEquals(
- {'unrelated_work': 18, 'idle': 0, 'script': 0, 'parsing': 0,
- 'other_url': 0, 'unknown_url': 12},
- activity.BreakdownEdgeActivityByInitiator(dep))
- # Script
- raw_events[1]['ts'] = 40 * 1000
- raw_events[1]['dur'] = 6 * 1000
- activity = self._ActivityLens(requests, raw_events)
- dep = (requests[0], requests[1], 'script')
- self.assertEquals(
- {'unrelated_work': 7, 'idle': 0, 'script': 6, 'parsing': 0,
- 'other_url': 0, 'unknown_url': 7},
- activity.BreakdownEdgeActivityByInitiator(dep))
- # Other URL
- raw_events[1]['args']['data']['scriptName'] = 'http://other.com/url'
- activity = self._ActivityLens(requests, raw_events)
- self.assertEquals(
- {'unrelated_work': 7, 'idle': 0, 'script': 0., 'parsing': 0.,
- 'other_url': 6., 'unknown_url': 7.},
- activity.BreakdownEdgeActivityByInitiator(dep))
-
- def testMainRendererThreadBusyness(self):
- raw_events = [
- {u'args': {u'name': u'CrRendererMain'},
- u'cat': u'__metadata',
- u'name': u'thread_name',
- u'ph': u'M',
- u'pid': 1,
- u'tid': 12,
- u'ts': 0},
- {u'args': {},
- u'cat': u'toplevel',
- u'dur': 200 * 1000,
- u'name': u'MessageLoop::RunTask',
- u'ph': u'X',
- u'pid': 1,
- u'tid': 12,
- u'ts': 0,
- u'tts': 56485},
- {u'args': {},
- u'cat': u'toplevel',
- u'dur': 8 * 200,
- u'name': u'MessageLoop::NestedSomething',
- u'ph': u'X',
- u'pid': 1,
- u'tid': 12,
- u'ts': 0,
- u'tts': 0},
- {u'args': {},
- u'cat': u'toplevel',
- u'dur': 500 * 1000,
- u'name': u'MessageLoop::RunTask',
- u'ph': u'X',
- u'pid': 12,
- u'tid': 12,
- u'ts': 0,
- u'tts': 56485}]
- lens = self._ActivityLens([], raw_events)
- # Ignore events from another PID.
- self.assertEquals(200, lens.MainRendererThreadBusyness(0, 1000))
- # Clamping duration.
- self.assertEquals(100, lens.MainRendererThreadBusyness(0, 100))
- self.assertEquals(50, lens.MainRendererThreadBusyness(25, 75))
- # Other PID.
- raw_events[0]['pid'] = 12
- lens = self._ActivityLens([], raw_events)
- self.assertEquals(500, lens.MainRendererThreadBusyness(0, 1000))
-
- def _ActivityLens(self, requests, raw_events):
- loading_trace = test_utils.LoadingTraceFromEvents(
- requests, None, raw_events)
- return ActivityLens(loading_trace)
-
-
-class EventsTreeTestCase(unittest.TestCase):
- FakeEvent = collections.namedtuple(
- 'FakeEvent', ('name', 'start_msec', 'end_msec'))
- _ROOT_EVENT = FakeEvent('-1', 0, 20)
- _EVENTS = [
- FakeEvent('0', 2, 4), FakeEvent('1', 1, 5),
- FakeEvent('2', 6, 9),
- FakeEvent('3', 13, 14), FakeEvent('4', 14, 17), FakeEvent('5', 12, 18)]
-
- def setUp(self):
- self.tree = _EventsTree(self._ROOT_EVENT, copy.deepcopy(self._EVENTS))
-
- def testEventsTreeConstruction(self):
- self.assertEquals(self._ROOT_EVENT, self.tree.event)
- self.assertEquals(3, len(self.tree.children))
- self.assertEquals(self._EVENTS[1], self.tree.children[0].event)
- self.assertEquals(self._EVENTS[0], self.tree.children[0].children[0].event)
- self.assertEquals(self._EVENTS[2], self.tree.children[1].event)
- self.assertEquals([], self.tree.children[1].children)
- self.assertEquals(self._EVENTS[5], self.tree.children[2].event)
- self.assertEquals(2, len(self.tree.children[2].children))
-
- def testDominatingEventsWithNames(self):
- self.assertListEqual(
- [self._ROOT_EVENT], self.tree.DominatingEventsWithNames(('-1')))
- self.assertListEqual(
- [self._ROOT_EVENT], self.tree.DominatingEventsWithNames(('-1', '0')))
- self.assertListEqual(
- [self._EVENTS[1], self._EVENTS[5]],
- self.tree.DominatingEventsWithNames(('1', '5')))
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/analyze.py b/loading/analyze.py
deleted file mode 100755
index 91ed008..0000000
--- a/loading/analyze.py
+++ /dev/null
@@ -1,339 +0,0 @@
-#! /usr/bin/python
-# Copyright 2015 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import argparse
-import cgi
-import json
-import logging
-import os
-import subprocess
-import sys
-import tempfile
-import time
-
-_SRC_DIR = os.path.abspath(os.path.join(
- os.path.dirname(__file__), '..', '..', '..'))
-
-sys.path.append(os.path.join(_SRC_DIR, 'third_party', 'catapult', 'devil'))
-from devil.android import device_utils
-from devil.android.sdk import intent
-
-sys.path.append(os.path.join(_SRC_DIR, 'build', 'android'))
-import devil_chromium
-from pylib import constants
-
-import activity_lens
-import clovis_constants
-import content_classification_lens
-import controller
-import device_setup
-import frame_load_lens
-import loading_graph_view
-import loading_graph_view_visualization
-import loading_trace
-import options
-import request_dependencies_lens
-import request_track
-import xvfb_helper
-
-# TODO(mattcary): logging.info isn't that useful, as the whole (tools) world
-# uses logging info; we need to introduce logging modules to get finer-grained
-# output. For now we just do logging.warning.
-
-
-OPTIONS = options.OPTIONS
-
-
-def _LoadPage(device, url):
- """Load a page on chrome on our device.
-
- Args:
- device: an AdbWrapper for the device on which to load the page.
- url: url as a string to load.
- """
- load_intent = intent.Intent(
- package=OPTIONS.ChromePackage().package,
- activity=OPTIONS.ChromePackage().activity,
- data=url)
- logging.warning('Loading ' + url)
- device.StartActivity(load_intent, blocking=True)
-
-
-def _GetPrefetchHtml(graph_view, name=None):
- """Generate prefetch page for the resources in resource graph.
-
- Args:
- graph_view: (LoadingGraphView)
- name: optional string used in the generated page.
-
- Returns:
- HTML as a string containing all the link rel=prefetch directives necessary
- for prefetching the given ResourceGraph.
- """
- if name:
- title = 'Prefetch for ' + cgi.escape(name)
- else:
- title = 'Generated prefetch page'
- output = []
- output.append("""<!DOCTYPE html>
-<html>
-<head>
-<title>%s</title>
-""" % title)
- for node in graph_view.deps_graph.graph.Nodes():
- output.append('<link rel="prefetch" href="%s">\n' % node.request.url)
- output.append("""</head>
-<body>%s</body>
-</html>
- """ % title)
- return '\n'.join(output)
-
-
-def _LogRequests(url, clear_cache_override=None):
- """Logs requests for a web page.
-
- Args:
- url: url to log as string.
- clear_cache_override: if not None, set clear_cache different from OPTIONS.
-
- Returns:
- JSON dict of logged information (ie, a dict that describes JSON).
- """
- xvfb_process = None
- if OPTIONS.local:
- chrome_ctl = controller.LocalChromeController()
- if OPTIONS.headless:
- xvfb_process = xvfb_helper.LaunchXvfb()
- chrome_ctl.SetChromeEnvOverride(xvfb_helper.GetChromeEnvironment())
- else:
- chrome_ctl = controller.RemoteChromeController(
- device_setup.GetFirstDevice())
-
- clear_cache = (clear_cache_override if clear_cache_override is not None
- else OPTIONS.clear_cache)
- if OPTIONS.emulate_device:
- chrome_ctl.SetDeviceEmulation(OPTIONS.emulate_device)
- if OPTIONS.emulate_network:
- chrome_ctl.SetNetworkEmulation(OPTIONS.emulate_network)
- try:
- with chrome_ctl.Open() as connection:
- if clear_cache:
- connection.ClearCache()
- trace = loading_trace.LoadingTrace.RecordUrlNavigation(
- url, connection, chrome_ctl.ChromeMetadata(),
- categories=clovis_constants.DEFAULT_CATEGORIES)
- except controller.ChromeControllerError as e:
- e.Dump(sys.stderr)
- raise
-
- if xvfb_process:
- xvfb_process.terminate()
-
- return trace.ToJsonDict()
-
-
-def _FullFetch(url, json_output, prefetch):
- """Do a full fetch with optional prefetching."""
- if not url.startswith('http') and not url.startswith('file'):
- url = 'http://' + url
- logging.warning('Cold fetch')
- cold_data = _LogRequests(url)
- assert cold_data, 'Cold fetch failed to produce data. Check your phone.'
- if prefetch:
- assert not OPTIONS.local
- logging.warning('Generating prefetch')
- prefetch_html = _GetPrefetchHtml(_ProcessJsonTrace(cold_data), name=url)
- tmp = tempfile.NamedTemporaryFile()
- tmp.write(prefetch_html)
- tmp.flush()
- # We hope that the tmpfile name is unique enough for the device.
- target = os.path.join('/sdcard/Download', os.path.basename(tmp.name))
- device = device_setup.GetFirstDevice()
- device.adb.Push(tmp.name, target)
- logging.warning('Pushed prefetch %s to device at %s' % (tmp.name, target))
- _LoadPage(device, 'file://' + target)
- time.sleep(OPTIONS.prefetch_delay_seconds)
- logging.warning('Warm fetch')
- warm_data = _LogRequests(url, clear_cache_override=False)
- with open(json_output, 'w') as f:
- json.dump(warm_data, f)
- logging.warning('Wrote ' + json_output)
- with open(json_output + '.cold', 'w') as f:
- json.dump(cold_data, f)
- logging.warning('Wrote ' + json_output + '.cold')
- else:
- with open(json_output, 'w') as f:
- json.dump(cold_data, f)
- logging.warning('Wrote ' + json_output)
-
-
-def _ProcessTraceFile(filename):
- with open(filename) as f:
- return _ProcessJsonTrace(json.load(f))
-
-
-def _ProcessJsonTrace(json_dict):
- trace = loading_trace.LoadingTrace.FromJsonDict(json_dict)
- content_lens = (
- content_classification_lens.ContentClassificationLens.WithRulesFiles(
- trace, OPTIONS.ad_rules, OPTIONS.tracking_rules))
- frame_lens = frame_load_lens.FrameLoadLens(trace)
- activity = activity_lens.ActivityLens(trace)
- deps_lens = request_dependencies_lens.RequestDependencyLens(trace)
- graph_view = loading_graph_view.LoadingGraphView(
- trace, deps_lens, content_lens, frame_lens, activity)
- if OPTIONS.noads:
- graph_view.RemoveAds()
- return graph_view
-
-
-def InvalidCommand(cmd):
- sys.exit('Invalid command "%s"\nChoices are: %s' %
- (cmd, ' '.join(COMMAND_MAP.keys())))
-
-
-def DoPng(arg_str):
- OPTIONS.ParseArgs(arg_str, description='Generates a PNG from a trace',
- extra=['request_json', ('--png_output', ''),
- ('--eog', False)])
- graph_view = _ProcessTraceFile(OPTIONS.request_json)
- visualization = (
- loading_graph_view_visualization.LoadingGraphViewVisualization(
- graph_view))
- tmp = tempfile.NamedTemporaryFile()
- visualization.OutputDot(tmp)
- tmp.flush()
- png_output = OPTIONS.png_output
- if not png_output:
- if OPTIONS.request_json.endswith('.json'):
- png_output = OPTIONS.request_json[
- :OPTIONS.request_json.rfind('.json')] + '.png'
- else:
- png_output = OPTIONS.request_json + '.png'
- subprocess.check_call(['dot', '-Tpng', tmp.name, '-o', png_output])
- logging.warning('Wrote ' + png_output)
- if OPTIONS.eog:
- subprocess.Popen(['eog', png_output])
- tmp.close()
-
-
-def DoPrefetchSetup(arg_str):
- OPTIONS.ParseArgs(arg_str, description='Sets up prefetch',
- extra=['request_json', 'target_html', ('--upload', False)])
- graph_view = _ProcessTraceFile(OPTIONS.request_json)
- with open(OPTIONS.target_html, 'w') as html:
- html.write(_GetPrefetchHtml(
- graph_view, name=os.path.basename(OPTIONS.request_json)))
- if OPTIONS.upload:
- device = device_setup.GetFirstDevice()
- destination = os.path.join('/sdcard/Download',
- os.path.basename(OPTIONS.target_html))
- device.adb.Push(OPTIONS.target_html, destination)
-
- logging.warning(
- 'Pushed %s to device at %s' % (OPTIONS.target_html, destination))
-
-
-def DoLogRequests(arg_str):
- OPTIONS.ParseArgs(arg_str, description='Logs requests of a load',
- extra=['--url', '--output', ('--prefetch', False)])
- _FullFetch(url=OPTIONS.url,
- json_output=OPTIONS.output,
- prefetch=OPTIONS.prefetch)
-
-
-def DoFetch(arg_str):
- OPTIONS.ParseArgs(arg_str,
- description=('Fetches SITE into DIR with '
- 'standard naming that can be processed by '
- './cost_to_csv.py. Both warm and cold '
- 'fetches are done. SITE can be a full url '
- 'but the filename may be strange so better '
- 'to just use a site (ie, domain).'),
- extra=['--site', '--dir'])
- if not os.path.exists(OPTIONS.dir):
- os.makedirs(OPTIONS.dir)
- _FullFetch(url=OPTIONS.site,
- json_output=os.path.join(OPTIONS.dir, OPTIONS.site + '.json'),
- prefetch=True)
-
-
-def DoLongPole(arg_str):
- OPTIONS.ParseArgs(arg_str, description='Calculates long pole',
- extra='request_json')
- graph_view = _ProcessTraceFile(OPTIONS.request_json)
- path_list = []
- cost = graph_view.deps_graph.Cost(path_list=path_list)
- print '%s (%s)' % (path_list[-1].request.url, cost)
-
-
-def DoNodeCost(arg_str):
- OPTIONS.ParseArgs(arg_str,
- description='Calculates node cost',
- extra='request_json')
- graph_view = _ProcessTraceFile(OPTIONS.request_json)
- print sum((n.cost for n in graph_view.deps_graph.graph.Nodes()))
-
-
-def DoCost(arg_str):
- OPTIONS.ParseArgs(arg_str,
- description='Calculates total cost',
- extra=['request_json', ('--path', False)])
- graph_view = _ProcessTraceFile(OPTIONS.request_json)
- path_list = []
- print 'Graph cost: %s' % graph_view.deps_graph.Cost(path_list=path_list)
- if OPTIONS.path:
- for n in path_list:
- print ' ' + request_track.ShortName(n.request.url)
-
-
-COMMAND_MAP = {
- 'png': DoPng,
- 'prefetch_setup': DoPrefetchSetup,
- 'log_requests': DoLogRequests,
- 'longpole': DoLongPole,
- 'nodecost': DoNodeCost,
- 'cost': DoCost,
- 'fetch': DoFetch,
-}
-
-def main():
- logging.basicConfig(level=logging.WARNING)
- OPTIONS.AddGlobalArgument(
- 'clear_cache', True, 'clear browser cache before loading')
- OPTIONS.AddGlobalArgument(
- 'emulate_device', '',
- 'Name of the device to emulate. Must be present '
- 'in --devices_file, or empty for no emulation.')
- OPTIONS.AddGlobalArgument('emulate_network', '',
- 'Type of network emulation. Empty for no emulation.')
- OPTIONS.AddGlobalArgument(
- 'local', False,
- 'run against local desktop chrome rather than device '
- '(see also --local_binary and local_profile_dir)')
- OPTIONS.AddGlobalArgument(
- 'noads', False, 'ignore ad resources in modeling')
- OPTIONS.AddGlobalArgument(
- 'ad_rules', '', 'AdBlocker+ ad rules file.')
- OPTIONS.AddGlobalArgument(
- 'tracking_rules', '', 'AdBlocker+ tracking rules file.')
- OPTIONS.AddGlobalArgument(
- 'prefetch_delay_seconds', 5,
- 'delay after requesting load of prefetch page '
- '(only when running full fetch)')
- OPTIONS.AddGlobalArgument(
- 'headless', False, 'Do not display Chrome UI (only works in local mode).')
-
- parser = argparse.ArgumentParser(description='Analyzes loading')
- parser.add_argument('command', help=' '.join(COMMAND_MAP.keys()))
- parser.add_argument('rest', nargs=argparse.REMAINDER)
- args = parser.parse_args()
- devil_chromium.Initialize()
- COMMAND_MAP.get(args.command,
- lambda _: InvalidCommand(args.command))(args.rest)
-
-
-if __name__ == '__main__':
- main()
diff --git a/loading/chrome_cache.py b/loading/chrome_cache.py
deleted file mode 100644
index 24d3f75..0000000
--- a/loading/chrome_cache.py
+++ /dev/null
@@ -1,423 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Takes care of manipulating the chrome's HTTP cache.
-"""
-
-from datetime import datetime
-import json
-import os
-import re
-import shutil
-import subprocess
-import sys
-import tempfile
-import zipfile
-
-_SRC_DIR = os.path.abspath(os.path.join(
- os.path.dirname(__file__), '..', '..', '..'))
-
-sys.path.append(os.path.join(_SRC_DIR, 'build', 'android'))
-from pylib import constants
-
-import device_setup
-import options
-
-
-OPTIONS = options.OPTIONS
-
-
-# Cache back-end types supported by cachetool.
-BACKEND_TYPES = {'simple', 'blockfile'}
-
-# Regex used to parse HTTP headers line by line.
-HEADER_PARSING_REGEX = re.compile(r'^(?P<header>\S+):(?P<value>.*)$')
-
-
-def _EnsureCleanCacheDirectory(directory_dest_path):
- """Ensure that a cache directory is created and clean.
-
- Args:
- directory_dest_path: Path of the cache directory to ensure cleanliness.
- """
- if os.path.isdir(directory_dest_path):
- shutil.rmtree(directory_dest_path)
- elif not os.path.isdir(os.path.dirname(directory_dest_path)):
- os.makedirs(os.path.dirname(directory_dest_path))
- assert not os.path.exists(directory_dest_path)
-
-
-def _RemoteCacheDirectory():
- """Returns the path of the cache directory's on the remote device."""
- return '/data/data/{}/cache/Cache'.format(
- constants.PACKAGE_INFO[OPTIONS.chrome_package_name].package)
-
-
-def _AdbShell(adb, cmd):
- adb.Shell(subprocess.list2cmdline(cmd))
-
-
-def PullBrowserCache(device):
- """Pulls the browser cache from the device and saves it locally.
-
- Cache is saved with the same file structure as on the device. Timestamps are
- important to preserve because indexing and eviction depends on them.
-
- Returns:
- Temporary directory containing all the browser cache.
- """
- _INDEX_DIRECTORY_NAME = 'index-dir'
- _REAL_INDEX_FILE_NAME = 'the-real-index'
-
- remote_cache_directory = _RemoteCacheDirectory()
- save_target = tempfile.mkdtemp(suffix='.cache')
-
- # Pull the cache recursively.
- device.adb.Pull(remote_cache_directory, save_target)
-
- # Update the modification time stamp on the local cache copy.
- def _UpdateTimestampFromAdbStat(filename, stat):
- assert os.path.exists(filename)
- os.utime(filename, (stat.st_time, stat.st_time))
-
- for filename, stat in device.adb.Ls(remote_cache_directory):
- if filename == '..':
- continue
- if filename == '.':
- cache_directory_stat = stat
- continue
- original_file = os.path.join(remote_cache_directory, filename)
- saved_file = os.path.join(save_target, filename)
- _UpdateTimestampFromAdbStat(saved_file, stat)
- if filename == _INDEX_DIRECTORY_NAME:
- # The directory containing the index was pulled recursively, update the
- # timestamps for known files. They are ignored by cache backend, but may
- # be useful for debugging.
- index_dir_stat = stat
- saved_index_dir = os.path.join(save_target, _INDEX_DIRECTORY_NAME)
- saved_index_file = os.path.join(saved_index_dir, _REAL_INDEX_FILE_NAME)
- for sub_file, sub_stat in device.adb.Ls(original_file):
- if sub_file == _REAL_INDEX_FILE_NAME:
- _UpdateTimestampFromAdbStat(saved_index_file, sub_stat)
- break
- _UpdateTimestampFromAdbStat(saved_index_dir, index_dir_stat)
-
- # Store the cache directory modification time. It is important to update it
- # after all files in it have been written. The timestamp is compared with
- # the contents of the index file when freshness is determined.
- _UpdateTimestampFromAdbStat(save_target, cache_directory_stat)
- return save_target
-
-
-def PushBrowserCache(device, local_cache_path):
- """Pushes the browser cache saved locally to the device.
-
- Args:
- device: Android device.
- local_cache_path: The directory's path containing the cache locally.
- """
- remote_cache_directory = _RemoteCacheDirectory()
-
- # Clear previous cache.
- _AdbShell(device.adb, ['rm', '-rf', remote_cache_directory])
- _AdbShell(device.adb, ['mkdir', '-p', remote_cache_directory])
-
- # Push cache content.
- device.adb.Push(local_cache_path, remote_cache_directory)
-
- # Command queue to touch all files with correct timestamp.
- command_queue = []
-
- # Walk through the local cache to update mtime on the device.
- def MirrorMtime(local_path):
- cache_relative_path = os.path.relpath(local_path, start=local_cache_path)
- remote_path = os.path.join(remote_cache_directory, cache_relative_path)
- timestamp = os.stat(local_path).st_mtime
- touch_stamp = datetime.fromtimestamp(timestamp).strftime('%Y%m%d.%H%M%S')
- command_queue.append(['touch', '-t', touch_stamp, remote_path])
-
- for local_directory_path, dirnames, filenames in os.walk(
- local_cache_path, topdown=False):
- for filename in filenames:
- MirrorMtime(os.path.join(local_directory_path, filename))
- for dirname in dirnames:
- MirrorMtime(os.path.join(local_directory_path, dirname))
- MirrorMtime(local_cache_path)
-
- device_setup.DeviceSubmitShellCommandQueue(device, command_queue)
-
-
-def ZipDirectoryContent(root_directory_path, archive_dest_path):
- """Zip a directory's content recursively with all the directories'
- timestamps preserved.
-
- Args:
- root_directory_path: The directory's path to archive.
- archive_dest_path: Archive destination's path.
- """
- with zipfile.ZipFile(archive_dest_path, 'w') as zip_output:
- timestamps = {}
- root_directory_stats = os.stat(root_directory_path)
- timestamps['.'] = {
- 'atime': root_directory_stats.st_atime,
- 'mtime': root_directory_stats.st_mtime}
- for directory_path, dirnames, filenames in os.walk(root_directory_path):
- for dirname in dirnames:
- subdirectory_path = os.path.join(directory_path, dirname)
- subdirectory_relative_path = os.path.relpath(subdirectory_path,
- root_directory_path)
- subdirectory_stats = os.stat(subdirectory_path)
- timestamps[subdirectory_relative_path] = {
- 'atime': subdirectory_stats.st_atime,
- 'mtime': subdirectory_stats.st_mtime}
- for filename in filenames:
- file_path = os.path.join(directory_path, filename)
- file_archive_name = os.path.join('content',
- os.path.relpath(file_path, root_directory_path))
- file_stats = os.stat(file_path)
- timestamps[file_archive_name[8:]] = {
- 'atime': file_stats.st_atime,
- 'mtime': file_stats.st_mtime}
- zip_output.write(file_path, arcname=file_archive_name)
- zip_output.writestr('timestamps.json',
- json.dumps(timestamps, indent=2))
-
-
-def UnzipDirectoryContent(archive_path, directory_dest_path):
- """Unzip a directory's content recursively with all the directories'
- timestamps preserved.
-
- Args:
- archive_path: Archive's path to unzip.
- directory_dest_path: Directory destination path.
- """
- _EnsureCleanCacheDirectory(directory_dest_path)
- with zipfile.ZipFile(archive_path) as zip_input:
- timestamps = None
- for file_archive_name in zip_input.namelist():
- if file_archive_name == 'timestamps.json':
- timestamps = json.loads(zip_input.read(file_archive_name))
- elif file_archive_name.startswith('content/'):
- file_relative_path = file_archive_name[8:]
- file_output_path = os.path.join(directory_dest_path, file_relative_path)
- file_parent_directory_path = os.path.dirname(file_output_path)
- if not os.path.exists(file_parent_directory_path):
- os.makedirs(file_parent_directory_path)
- with open(file_output_path, 'w') as f:
- f.write(zip_input.read(file_archive_name))
-
- assert timestamps
- # os.utime(file_path, ...) modifies modification time of file_path's parent
- # directories. Therefore we call os.utime on files and directories that have
- # longer relative paths first.
- for relative_path in sorted(timestamps.keys(), key=len, reverse=True):
- stats = timestamps[relative_path]
- output_path = os.path.join(directory_dest_path, relative_path)
- if not os.path.exists(output_path):
- os.makedirs(output_path)
- os.utime(output_path, (stats['atime'], stats['mtime']))
-
-
-def CopyCacheDirectory(directory_src_path, directory_dest_path):
- """Copies a cache directory recursively with all the directories'
- timestamps preserved.
-
- Args:
- directory_src_path: Path of the cache directory source.
- directory_dest_path: Path of the cache directory destination.
- """
- assert os.path.isdir(directory_src_path)
- _EnsureCleanCacheDirectory(directory_dest_path)
- shutil.copytree(directory_src_path, directory_dest_path)
-
-
-class CacheBackend(object):
- """Takes care of reading and deleting cached keys.
- """
-
- def __init__(self, cache_directory_path, cache_backend_type):
- """Chrome cache back-end constructor.
-
- Args:
- cache_directory_path: The directory path where the cache is locally
- stored.
- cache_backend_type: A cache back-end type in BACKEND_TYPES.
- """
- assert os.path.isdir(cache_directory_path)
- assert cache_backend_type in BACKEND_TYPES
- self._cache_directory_path = cache_directory_path
- self._cache_backend_type = cache_backend_type
- # Make sure cache_directory_path is a valid cache.
- self._CachetoolCmd('validate')
-
- def GetSize(self):
- """Gets total size of cache entries in bytes."""
- size = self._CachetoolCmd('get_size')
- return int(size.strip())
-
- def ListKeys(self):
- """Lists cache's keys.
-
- Returns:
- A list of all keys stored in the cache.
- """
- return [k.strip() for k in self._CachetoolCmd('list_keys').split('\n')[:-1]]
-
- def GetStreamForKey(self, key, index):
- """Gets a key's stream.
-
- Args:
- key: The key to access the stream.
- index: The stream index:
- index=0 is the HTTP response header;
- index=1 is the transport encoded content;
- index=2 is the compiled content.
-
- Returns:
- String holding stream binary content.
- """
- return self._CachetoolCmd('get_stream', [key, str(index)])
-
- def DeleteStreamForKey(self, key, index):
- """Delete a key's stream.
-
- Args:
- key: The key to access the stream.
- index: The stream index
- """
- self._CachetoolCmd('delete_stream', [key, str(index)])
-
- def DeleteKey(self, key):
- """Deletes a key from the cache.
-
- Args:
- key: The key delete.
- """
- self._CachetoolCmd('delete_key', [key])
-
- def _CachetoolCmd(self, operation, args=None, stdin=''):
- """Runs the cache editor tool and return the stdout.
-
- Args:
- operation: Cachetool operation.
- args: Additional operation argument to append to the command line.
- stdin: String to pipe to the Cachetool's stdin.
-
- Returns:
- Cachetool's stdout string.
- """
- editor_tool_cmd = [
- OPTIONS.LocalBinary('cachetool'),
- self._cache_directory_path,
- self._cache_backend_type,
- operation]
- editor_tool_cmd.extend(args or [])
- process = subprocess.Popen(
- editor_tool_cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE)
- stdout_data, _ = process.communicate(input=stdin)
- assert process.returncode == 0
- return stdout_data
-
- def UpdateRawResponseHeaders(self, key, raw_headers):
- """Updates a key's raw response headers.
-
- Args:
- key: The key to modify.
- raw_headers: Raw response headers to set.
- """
- self._CachetoolCmd('update_raw_headers', [key], stdin=raw_headers)
-
- def GetDecodedContentForKey(self, key):
- """Gets a key's decoded content.
-
- HTTP cache is storing into key's index stream 1 the transport layer resource
- binary. However, the resources might be encoded using a compression
- algorithm specified in the Content-Encoding response header. This method
- takes care of returning decoded binary content of the resource.
-
- Args:
- key: The key to access the decoded content.
-
- Returns:
- String holding binary content.
- """
- response_headers = self.GetStreamForKey(key, 0)
- content_encoding = None
- for response_header_line in response_headers.split('\n'):
- match = HEADER_PARSING_REGEX.match(response_header_line)
- if not match:
- continue
- if match.group('header').lower() == 'content-encoding':
- content_encoding = match.group('value')
- break
- encoded_content = self.GetStreamForKey(key, 1)
- if content_encoding == None:
- return encoded_content
-
- cmd = [OPTIONS.LocalBinary('content_decoder_tool')]
- cmd.extend([s.strip() for s in content_encoding.split(',')])
- process = subprocess.Popen(cmd,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE)
- decoded_content, _ = process.communicate(input=encoded_content)
- assert process.returncode == 0
- return decoded_content
-
-
-def ApplyUrlWhitelistToCacheArchive(cache_archive_path,
- whitelisted_urls,
- output_cache_archive_path):
- """Generate a new cache archive containing only whitelisted urls.
-
- Args:
- cache_archive_path: Path of the cache archive to apply the white listing.
- whitelisted_urls: Set of url to keep in cache.
- output_cache_archive_path: Destination path of cache archive containing only
- white-listed urls.
- """
- cache_temp_directory = tempfile.mkdtemp(suffix='.cache')
- try:
- UnzipDirectoryContent(cache_archive_path, cache_temp_directory)
- backend = CacheBackend(cache_temp_directory, 'simple')
- cached_urls = backend.ListKeys()
- for cached_url in cached_urls:
- if cached_url not in whitelisted_urls:
- backend.DeleteKey(cached_url)
- for cached_url in backend.ListKeys():
- assert cached_url in whitelisted_urls
- ZipDirectoryContent(cache_temp_directory, output_cache_archive_path)
- finally:
- shutil.rmtree(cache_temp_directory)
-
-
-def ManualTestMain():
- import argparse
- parser = argparse.ArgumentParser(description='Tests cache back-end.')
- parser.add_argument('cache_archive_path', type=str)
- parser.add_argument('backend_type', type=str, choices=BACKEND_TYPES)
- command_line_args = parser.parse_args()
-
- cache_path = tempfile.mkdtemp()
- UnzipDirectoryContent(command_line_args.cache_archive_path, cache_path)
-
- cache_backend = CacheBackend(
- cache_directory_path=cache_path,
- cache_backend_type=command_line_args.backend_type)
- keys = sorted(cache_backend.ListKeys())
- selected_key = None
- for key in keys:
- if key.endswith('.js'):
- selected_key = key
- break
- assert selected_key
- print '{}\'s HTTP response header:'.format(selected_key)
- print cache_backend.GetStreamForKey(selected_key, 0)
- print cache_backend.GetDecodedContentForKey(selected_key)
- cache_backend.DeleteKey(keys[1])
- assert keys[1] not in cache_backend.ListKeys()
- shutil.rmtree(cache_path)
-
-
-if __name__ == '__main__':
- ManualTestMain()
diff --git a/loading/chrome_cache_unittest.py b/loading/chrome_cache_unittest.py
deleted file mode 100644
index 051a938..0000000
--- a/loading/chrome_cache_unittest.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import os
-import shutil
-import tempfile
-import unittest
-
-import chrome_cache
-
-
-LOADING_DIR = os.path.dirname(os.path.abspath(__file__))
-THIS_BASEMAME = os.path.basename(__file__)
-
-
-class CacheDirectoryTest(unittest.TestCase):
- def setUp(self):
- self._temp_dir = tempfile.mkdtemp()
-
- def tearDown(self):
- shutil.rmtree(self._temp_dir)
-
- def GetTempPath(self, temp_name):
- return os.path.join(self._temp_dir, temp_name)
-
- def CreateNewGarbageFile(self, file_path):
- assert not os.path.exists(file_path)
- with open(file_path, 'w') as f:
- f.write('garbage content')
- assert os.path.isfile(file_path)
-
- @classmethod
- def CompareDirectories(cls, reference_path, generated_path):
- def CompareNode(relative_path):
- reference_stat = os.stat(os.path.join(reference_path, relative_path))
- generated_stat = os.stat(os.path.join(generated_path, relative_path))
- assert int(reference_stat.st_mtime) == int(generated_stat.st_mtime), \
- "{}: invalid mtime.".format(relative_path)
- for reference_parent_path, dir_names, file_names in os.walk(reference_path):
- parent_path = os.path.relpath(reference_parent_path, reference_path)
- reference_nodes = sorted(dir_names + file_names)
- generated_nodes = sorted(os.listdir(
- os.path.join(generated_path, parent_path)))
- assert reference_nodes == generated_nodes, \
- '{}: directory entries don\'t match.'.format(parent_path)
- for node in file_names:
- CompareNode(os.path.join(parent_path, node))
- CompareNode(parent_path)
-
- def testCompareDirectories(self):
- generated_path = self.GetTempPath('dir0')
- shutil.copytree(LOADING_DIR, generated_path)
- self.CompareDirectories(LOADING_DIR, generated_path)
-
- generated_path = self.GetTempPath('dir1')
- shutil.copytree(LOADING_DIR, generated_path)
- self.CreateNewGarbageFile(os.path.join(generated_path, 'garbage'))
- assert 'garbage' in os.listdir(generated_path)
- with self.assertRaisesRegexp(AssertionError, r'^.* match\.$'):
- self.CompareDirectories(LOADING_DIR, generated_path)
-
- generated_path = self.GetTempPath('dir2')
- shutil.copytree(LOADING_DIR, generated_path)
- self.CreateNewGarbageFile(os.path.join(generated_path, 'testdata/garbage'))
- with self.assertRaisesRegexp(AssertionError, r'^.* match\.$'):
- self.CompareDirectories(LOADING_DIR, generated_path)
-
- generated_path = self.GetTempPath('dir3')
- shutil.copytree(LOADING_DIR, generated_path)
- os.remove(os.path.join(generated_path, THIS_BASEMAME))
- with self.assertRaisesRegexp(AssertionError, r'^.* match\.$'):
- self.CompareDirectories(LOADING_DIR, generated_path)
- self.CreateNewGarbageFile(os.path.join(generated_path, 'garbage'))
- with self.assertRaisesRegexp(AssertionError, r'^.* match\.$'):
- self.CompareDirectories(LOADING_DIR, generated_path)
-
- def TouchHelper(temp_name, relative_name, timestamps):
- generated_path = self.GetTempPath(temp_name)
- shutil.copytree(LOADING_DIR, generated_path)
- os.utime(os.path.join(generated_path, relative_name), timestamps)
- with self.assertRaisesRegexp(AssertionError, r'^.* invalid mtime\.$'):
- self.CompareDirectories(LOADING_DIR, generated_path)
-
- TouchHelper('dir4', THIS_BASEMAME, (1256925858, 1256463122))
- TouchHelper('dir5', 'testdata', (1256918318, 1256568641))
- TouchHelper('dir6', 'trace_test/test_server.py', (1255116211, 1256156632))
- TouchHelper('dir7', './', (1255115332, 1256251864))
-
- def testCacheArchive(self):
- zip_dest = self.GetTempPath('cache.zip')
- chrome_cache.ZipDirectoryContent(LOADING_DIR, zip_dest)
-
- unzip_dest = self.GetTempPath('cache')
- chrome_cache.UnzipDirectoryContent(zip_dest, unzip_dest)
- self.CompareDirectories(LOADING_DIR, unzip_dest)
-
- self.CreateNewGarbageFile(os.path.join(unzip_dest, 'garbage'))
- chrome_cache.UnzipDirectoryContent(zip_dest, unzip_dest)
- self.CompareDirectories(LOADING_DIR, unzip_dest)
-
- unzip_dest = self.GetTempPath('foo/bar/cache')
- chrome_cache.UnzipDirectoryContent(zip_dest, unzip_dest)
- self.CompareDirectories(LOADING_DIR, unzip_dest)
-
- def testCopyCacheDirectory(self):
- copy_dest = self.GetTempPath('cache')
- chrome_cache.CopyCacheDirectory(LOADING_DIR, copy_dest)
- self.CompareDirectories(LOADING_DIR, copy_dest)
-
- self.CreateNewGarbageFile(os.path.join(copy_dest, 'garbage'))
- chrome_cache.CopyCacheDirectory(LOADING_DIR, copy_dest)
- self.CompareDirectories(LOADING_DIR, copy_dest)
-
- copy_dest = self.GetTempPath('foo/bar/cache')
- chrome_cache.CopyCacheDirectory(LOADING_DIR, copy_dest)
- self.CompareDirectories(LOADING_DIR, copy_dest)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/cloud/__init__.py b/loading/cloud/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/loading/cloud/__init__.py
+++ /dev/null
diff --git a/loading/cloud/backend/README.md b/loading/cloud/backend/README.md
deleted file mode 100644
index 3376457..0000000
--- a/loading/cloud/backend/README.md
+++ /dev/null
@@ -1,197 +0,0 @@
-# Clovis in the Cloud: Developer Guide
-
-This document describes the backend-side of the trace collection, using Google
-Compute Engine.
-
-When the [frontend][3] spawns new tasks, it pushes them into a [TaskQueue][4]
-called `clovis-queue` with a unique tag.
-Then it creates backend instances (as an instance group) and passes them the
-TaskQueue tag.
-
-The backend instances then pull tasks from the TaskQueue and process them until
-it is empty. When there is no task left in the queue, the backend instances
-kill themselves.
-
-The main files for the backend are:
-
-- `startup-script.sh`: initializes an instance (installs the dependencies,
- downloads the code and the configuration).
-- `worker.py`: the main worker script.
-- Task handlers have a `Run()` method taking a `ClovisTask` parameter.
- - `clovis_task_handler.py`: Main entry point, dispatches the tasks to the
- more specialized handlers below.
- - `trace_task_handler.py`: Handles `trace` tasks.
- - `report_task_handler.py`: Handles `report` tasks.
-
-[TOC]
-
-## Initial setup for development
-
-Install the [gcloud command line tool][1].
-
-## Deploy the code
-
-This step deploys all the source code needed by the backend workers, as well as
-the Chromium binaries required for trace collection.
-
-```shell
-# Build Chrome (do not use the component build).
-BUILD_DIR=out/Release
-ninja -C $BUILD_DIR -j1000 -l60 chrome chrome_sandbox
-
-# Deploy to GCE
-# CLOUD_STORAGE_PATH is the path in Google Cloud Storage under which the
-# Clovis deployment will be uploaded.
-
-./tools/android/loading/cloud/backend/deploy.sh $BUILD_DIR $CLOUD_STORAGE_PATH
-```
-
-## Start the app in the cloud
-
-The application is automatically started by the frontend, and should not need to
-be started manually.
-
-If you really want to create an instance manually (when debugging for example),
-this can be done like this:
-
-```shell
-gcloud compute instances create $INSTANCE_NAME \
- --machine-type n1-standard-1 \
- --image ubuntu-14-04 \
- --zone europe-west1-c \
- --scopes cloud-platform,https://www.googleapis.com/auth/cloud-taskqueue \
- --metadata \
- cloud-storage-path=$CLOUD_STORAGE_PATH,task-dir=dir,taskqueue-tag=tag \
- --metadata-from-file \
- startup-script=$CHROMIUM_SRC/tools/android/loading/cloud/backend/startup-script.sh
-```
-
-If you are debbugging, you probably want to set additional metadata:
-
-- `auto-start=false`: to start an instance without automatically starting the
- app on it. This can be useful when doing iterative development on the
- instance using ssh, to be able to stop and restart the app manually.
-- `self-destruct=false`: to prevent the instance from self-destructing when
- the queue is empty.
-
-**Notes:**
-
-- If you use `auto-start=false`, and then try to ssh on the instance and
- launch `worker.py`, it will not work because of various issues, such as:
- - Environment variables defined by the startup script are not available
- to your user and you will need to redefine them.
- - You will not have permissions to access the files, and need to run
- `sudo chown` to give yourself permissions.
- - You need to activate `virtualenv`.
- Get in touch with *droger@* if you need this or want to improve it.
-- It can take a few minutes for the instance to start. You can follow the
- progress of the startup script on the gcloud console web interface (menu
- "Compute Engine" > "VM instances" then click on your instance and scroll
- down to see the "Serial console output") or from the command line using:
-
-```shell
-gcloud compute instances get-serial-port-output $INSTANCE_NAME
-```
-
-## `worker.py` configuration file
-
-`worker.py` takes a configuration file as command line parameter. This is a JSON
-dictionary with the keys:
-
-- `project_name` (string): Name of the Google Cloud project
-- `task_storage_path` (string): Path in Google Storage where task output is
- generated.
-- `binaries_path` (string): Path to the executables (Containing chrome).
-- `src_path` (string): Path to the Chromium source directory.
-- `taskqueue_tag` (string): Tag used by the worker when pulling tasks from
- `clovis-queue`.
-- `ad_rules_filename` and `tracking_rules_filename` (string): Path to the ad
- and tracking filtering rules.
-- `instance_name` (string, optional): Name of the Compute Engine instance this
- script is running on.
-- `worker_log_path` (string, optional): Path to the log file capturing the
- output of `worker.py`, to be uploaded to Cloud Storage.
-- `self_destruct` (boolean, optional): Whether the worker will destroy the
- Compute Engine instance when there are no remaining tasks to process. This
- is only relevant when running in the cloud, and requires `instance_name` to
- be defined.
-
-## Use the app
-
-Create tasks from the associated AppEngine application, see [documentation][3].
-
-If you want the frontend to send tasks to a particular instance that you created
-manually, make sure the `tag` and `storage_bucket` of the AppEngine request
-match the ones of your ComputeEngine instance, and set `instance_count` to `0`.
-
-## Stop the app in the cloud
-
-To stop a single instance that you started manually, do:
-
-```shell
-gcloud compute instances delete $INSTANCE_NAME
-```
-
-To stop instances that were created by the frontend, you must delete the
-instance group, not the individual instances. Otherwise the instance group will
-just recreate the deleted instances. You can do this from the Google Cloud
-console web interface, or using the `gcloud compute groups` commands.
-
-## Connect to the instance with SSH
-
-```shell
-gcloud compute ssh $INSTANCE_NAME
-```
-
-## Run the app locally
-
-From a new directory, set up a local environment:
-
-```shell
-virtualenv env
-source env/bin/activate
-pip install -r \
- $CHROMIUM_SRC/tools/android/loading/cloud/backend/pip_requirements.txt
-```
-
-The first time, you may need to get more access tokens:
-
-```shell
-gcloud beta auth application-default login --scopes \
- https://www.googleapis.com/auth/cloud-taskqueue \
- https://www.googleapis.com/auth/cloud-platform
-```
-
-Create a local configuration file for `worker.py`. Example:
-
-```shell
-cat >$CONFIG_FILE << EOF
-{
- "project_name" : "$PROJECT_NAME",
- "cloud_storage_path" : "$CLOUD_STORAGE_PATH",
- "binaries_path" : "$BUILD_DIR",
- "src_path" : "$CHROMIUM_SRC",
- "taskqueue_tag" : "some-tag"
-}
-EOF
-```
-
-Launch the app, passing the path to the deployment configuration file:
-
-```shell
-python $CHROMIUM_SRC/tools/android/loading/cloud/backend/worker.py \
- --config $CONFIG_FILE
-```
-
-You can now [use the app][2].
-
-Tear down the local environment:
-
-```shell
-deactivate
-```
-
-[1]: https://cloud.google.com/sdk
-[2]: #Use-the-app
-[3]: ../frontend/README.md
-[4]: https://cloud.google.com/appengine/docs/python/taskqueue
diff --git a/loading/cloud/backend/clovis_task_handler.py b/loading/cloud/backend/clovis_task_handler.py
deleted file mode 100644
index 6e05ee9..0000000
--- a/loading/cloud/backend/clovis_task_handler.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import os
-
-from common.clovis_task import ClovisTask
-from failure_database import FailureDatabase
-from report_task_handler import ReportTaskHandler
-from trace_task_handler import TraceTaskHandler
-
-
-class ClovisTaskHandler(object):
- """Handles all the supported clovis tasks."""
-
- def __init__(self, project_name, base_path, failure_database,
- google_storage_accessor, bigquery_service, binaries_path,
- ad_rules_filename, tracking_rules_filename, logger,
- instance_name=None):
- """Creates a ClovisTaskHandler.
-
- Args:
- project_name (str): Name of the project.
- base_path(str): Base path where results are written.
- failure_database (FailureDatabase): Failure Database.
- google_storage_accessor (GoogleStorageAccessor): Cloud storage accessor.
- bigquery_service (googleapiclient.discovery.Resource): Bigquery service.
- binaries_path(str): Path to the directory where Chrome executables are.
- ad_rules_filename (str): Path to the ad filtering rules.
- tracking_rules_filename (str): Path to the tracking filtering rules.
- instance_name(str, optional): Name of the ComputeEngine instance.
- """
- self._failure_database = failure_database
- self._handlers = {
- 'trace': TraceTaskHandler(
- base_path, failure_database, google_storage_accessor, binaries_path,
- logger, instance_name),
- 'report': ReportTaskHandler(
- project_name, failure_database, google_storage_accessor,
- bigquery_service, logger, ad_rules_filename,
- tracking_rules_filename)}
-
- def Run(self, clovis_task):
- """Runs a clovis_task.
-
- Args:
- clovis_task(ClovisTask): The task to run.
- """
- handler = self._handlers.get(clovis_task.Action())
- if not handler:
- self._logger.error('Unsupported task action: %s' % clovis_task.Action())
- self._failure_database.AddFailure('unsupported_action',
- clovis_task.Action())
- return
- handler.Run(clovis_task)
-
- def Finalize(self):
- """Called once before the handler is destroyed."""
- for handler in self._handlers.values():
- handler.Finalize()
diff --git a/loading/cloud/backend/deploy.sh b/loading/cloud/backend/deploy.sh
deleted file mode 100755
index 75f0703..0000000
--- a/loading/cloud/backend/deploy.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/bin/bash
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-# This script copies all dependencies required for trace collection.
-# Usage:
-# deploy.sh builddir gcs_path
-#
-# Where:
-# builddir is the build directory for Chrome
-# gcs_path is the Google Storage bucket under which the deployment is
-# installed
-
-builddir=$1
-tmpdir=`mktemp -d`
-deployment_gcs_path=$2/deployment
-
-# Extract needed sources.
-src_suffix=src
-tmp_src_dir=$tmpdir/$src_suffix
-
-# Copy files from tools/android/loading.
-mkdir -p $tmp_src_dir/tools/android/loading/cloud
-cp -r tools/android/loading/cloud/backend \
- $tmp_src_dir/tools/android/loading/cloud/
-cp -r tools/android/loading/cloud/common \
- $tmp_src_dir/tools/android/loading/cloud/
-cp tools/android/loading/*.py $tmp_src_dir/tools/android/loading
-cp tools/android/loading/cloud/*.py $tmp_src_dir/tools/android/loading/cloud
-
-# Copy other dependencies.
-mkdir $tmp_src_dir/third_party
-rsync -av --exclude=".*" --exclude "*.pyc" --exclude "*.html" --exclude "*.md" \
- third_party/catapult $tmp_src_dir/third_party
-mkdir $tmp_src_dir/tools/perf
-cp -r tools/perf/chrome_telemetry_build $tmp_src_dir/tools/perf
-mkdir -p $tmp_src_dir/build/android
-cp build/android/devil_chromium.py $tmp_src_dir/build/android/
-cp build/android/video_recorder.py $tmp_src_dir/build/android/
-cp build/android/devil_chromium.json $tmp_src_dir/build/android/
-cp -r build/android/pylib $tmp_src_dir/build/android/
-mkdir -p \
- $tmp_src_dir/third_party/blink/renderer/devtools/front_end/emulated_devices
-cp third_party/blink/renderer/devtools/front_end/emulated_devices/module.json \
- $tmp_src_dir/third_party/blink/renderer/devtools/front_end/emulated_devices/
-
-# Tar up the source and copy it to Google Cloud Storage.
-source_tarball=$tmpdir/source.tgz
-tar -cvzf $source_tarball -C $tmpdir $src_suffix
-gsutil cp $source_tarball gs://$deployment_gcs_path/source/
-
-# Copy the chrome executable to Google Cloud Storage.
-chrome/tools/build/make_zip.py $builddir chrome/tools/build/linux/FILES.cfg \
- $tmpdir/linux.zip
-gsutil cp $tmpdir/linux.zip gs://$deployment_gcs_path/binaries/linux.zip
-
-# Copy the startup script uncompressed so that it can be executed.
-gsutil cp tools/android/loading/cloud/backend/startup-script.sh \
- gs://$deployment_gcs_path/
-
-# Generate and upload metadata about this deployment.
-CHROMIUM_REV=$(git merge-base HEAD origin/master)
-cat >$tmpdir/build_metadata.json << EOF
-{
- "chromium_rev": "$CHROMIUM_REV"
-}
-EOF
-gsutil cp $tmpdir/build_metadata.json \
- gs://$deployment_gcs_path/deployment_metadata.json
-rm -rf $tmpdir
diff --git a/loading/cloud/backend/failure_database.py b/loading/cloud/backend/failure_database.py
deleted file mode 100644
index a044f04..0000000
--- a/loading/cloud/backend/failure_database.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import json
-
-class FailureDatabase(object):
- """Logs the failures happening in the Clovis backend."""
- DIRTY_STATE_ERROR = 'startup_with_dirty_state'
- CRITICAL_ERROR = 'critical_error'
-
- def __init__(self, json_string=None):
- """Loads a FailureDatabase from a string returned by ToJsonString()."""
- self.is_dirty = False
- if json_string:
- self._failures_dict = json.loads(json_string)
- else:
- self._failures_dict = {}
-
- def ToJsonDict(self):
- """Returns a dict representing this instance."""
- return self._failures_dict
-
- def ToJsonString(self):
- """Returns a string representing this instance."""
- return json.dumps(self.ToJsonDict(), indent=2)
-
- def AddFailure(self, failure_name, failure_content=None):
- """Adds a failure with the given name and content. If the failure already
- exists, it will increment the associated count.
- Sets the 'is_dirty' bit to True.
-
- Args:
- failure_name (str): name of the failure.
- failure_content (str): content of the failure (e.g. the URL or task that
- is failing).
- """
- self.is_dirty = True
- content = failure_content if failure_content else 'error_count'
- if failure_name not in self._failures_dict:
- self._failures_dict[failure_name] = {}
- error_count = self._failures_dict[failure_name].get(content, 0)
- self._failures_dict[failure_name][content] = error_count + 1
-
diff --git a/loading/cloud/backend/google_storage_accessor.py b/loading/cloud/backend/google_storage_accessor.py
deleted file mode 100644
index c95d742..0000000
--- a/loading/cloud/backend/google_storage_accessor.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import gcloud.exceptions
-import gcloud.storage
-
-
-class GoogleStorageAccessor(object):
- """Utility class providing helpers for Google Cloud Storage.
- """
- def __init__(self, credentials, project_name, bucket_name):
- """project_name is the name of the Google Cloud project.
- bucket_name is the name of the bucket that is used for Cloud Storage calls.
- """
- self._credentials = credentials
- self._project_name = project_name
- self._bucket_name = bucket_name
-
- def _GetStorageClient(self):
- """Returns the storage client associated with the project."""
- return gcloud.storage.Client(project = self._project_name,
- credentials = self._credentials)
-
- def _GetStorageBucket(self, storage_client):
- return storage_client.get_bucket(self._bucket_name)
-
- def BucketName(self):
- """Returns the name of the bucket associated with this instance."""
- return self._bucket_name
-
- def DownloadAsString(self, remote_filename):
- """Returns the content of a remote file as a string, or None if the file
- does not exist.
- """
- client = self._GetStorageClient()
- bucket = self._GetStorageBucket(client)
- blob = bucket.get_blob(remote_filename)
- if not blob:
- return None
- try:
- return blob.download_as_string()
- except gcloud.exceptions.NotFound:
- return None
-
- def UploadFile(self, filename_src, filename_dest):
- """Uploads a file to Google Cloud Storage.
-
- Args:
- filename_src: name of the local file.
- filename_dest: name of the file in Google Cloud Storage.
-
- Returns:
- The URL of the file in Google Cloud Storage.
- """
- client = self._GetStorageClient()
- bucket = self._GetStorageBucket(client)
- blob = bucket.blob(filename_dest)
- with open(filename_src) as file_src:
- blob.upload_from_file(file_src)
- return blob.public_url
-
- def UploadString(self, data_string, filename_dest):
- """Uploads a string to Google Cloud Storage.
-
- Args:
- data_string: the contents of the file to be uploaded.
- filename_dest: name of the file in Google Cloud Storage.
-
- Returns:
- The URL of the file in Google Cloud Storage.
- """
- client = self._GetStorageClient()
- bucket = self._GetStorageBucket(client)
- blob = bucket.blob(filename_dest)
- blob.upload_from_string(data_string)
- return blob.public_url
-
diff --git a/loading/cloud/backend/multiprocessing_helper.py b/loading/cloud/backend/multiprocessing_helper.py
deleted file mode 100644
index 3bd7879..0000000
--- a/loading/cloud/backend/multiprocessing_helper.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import multiprocessing
-import os
-import Queue
-import resource
-import signal
-
-import psutil
-
-
-def _LimitMemory(memory_share):
- """Limits the memory available to this process, to avoid OOM issues.
-
- Args:
- memory_share: (float) Share coefficient of the total physical memory that
- the process can use.
- """
- total_memory = psutil.virtual_memory().total
- memory_limit = memory_share * total_memory
- resource.setrlimit(resource.RLIMIT_AS, (memory_limit, -1L))
-
-
-def _MultiprocessingWrapper(queue, memory_share, function, args):
- """Helper function that sets a memory limit on the current process, then
- calls |function| on |args| and writes the results to |queue|.
-
- Args:
- queue: (multiprocessing.Queue) Queue where the results of the wrapped
- function are written.
- memory_share: (float) Share coefficient of the total physical memory that
- the process can use.
- function: The wrapped function.
- args: (list) Arguments for the wrapped function.
- """
- try:
- if memory_share:
- _LimitMemory(memory_share)
-
- queue.put(function(*args))
- except Exception:
- queue.put(None)
-
-
-def RunInSeparateProcess(function, args, logger, timeout_seconds,
- memory_share=None):
- """Runs a function in a separate process, and kills it after the timeout is
- reached.
-
- Args:
- function: The function to run.
- args: (list) Arguments for the wrapped function.
- timeout_seconds: (float) Timeout in seconds after which the subprocess is
- terminated.
- memory_share: (float) Set this parameter to limit the memory available to
- the spawned subprocess. This is a ratio of the total system
- memory (between 0 and 1).
- Returns:
- The result of the wrapped function, or None if the call failed.
- """
- queue = multiprocessing.Queue()
- process = multiprocessing.Process(target=_MultiprocessingWrapper,
- args=(queue, memory_share, function, args))
- process.daemon = True
- process.start()
-
- result = None
-
- try:
- logger.info('Wait for result.')
- # Note: If the subprocess somehow crashes (e.g. Python crashing), this
- # process will wait the full timeout. Could be avoided but probably not
- # worth the extra complexity.
- result = queue.get(block=True, timeout=timeout_seconds)
- except Queue.Empty:
- logger.warning('Subprocess timeout.')
- process.terminate()
-
- logger.info('Wait for process to terminate.')
- process.join(timeout=5)
-
- if process.is_alive():
- logger.warning('Process still alive, hard killing now.')
- os.kill(process.pid, signal.SIGKILL)
-
- return result
diff --git a/loading/cloud/backend/pip_requirements.txt b/loading/cloud/backend/pip_requirements.txt
deleted file mode 100644
index f2eb36b..0000000
--- a/loading/cloud/backend/pip_requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-gcloud==0.10.1
-google-api-python-client==1.5.0
-psutil==4.1.0
-adblockparser==0.5
diff --git a/loading/cloud/backend/report_task_handler.py b/loading/cloud/backend/report_task_handler.py
deleted file mode 100644
index e59d65e..0000000
--- a/loading/cloud/backend/report_task_handler.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import json
-import math
-
-from googleapiclient import errors
-
-import common.google_bigquery_helper
-from common.loading_trace_database import LoadingTraceDatabase
-import common.google_error_helper as google_error_helper
-from failure_database import FailureDatabase
-from loading_trace import LoadingTrace
-from report import LoadingReport
-
-
-def LoadRemoteTrace(storage_accessor, remote_trace_path, logger):
- """Loads and returns the LoadingTrace located at the remote trace path.
-
- Args:
- storage_accessor: (GoogleStorageAccessor) Used to download the trace from
- CloudStorage.
- remote_trace_path: (str) Path to the trace file.
- """
-
- # Cut the gs://<bucket_name> prefix from trace paths if needed.
- prefix = 'gs://%s/' % storage_accessor.BucketName()
- prefix_length = len(prefix)
- if remote_trace_path.startswith(prefix):
- remote_trace_path = remote_trace_path[prefix_length:]
-
- trace_string = storage_accessor.DownloadAsString(
- remote_trace_path)
- if not trace_string:
- logger.error('Failed to download: ' + remote_trace_path)
- return None
-
- trace_dict = json.loads(trace_string)
- if not trace_dict:
- logger.error('Failed to parse: ' + remote_trace_path)
- return None
-
- trace = LoadingTrace.FromJsonDict(trace_dict)
- if not trace:
- logger.error('Invalid format for: ' + remote_trace_path)
- return None
-
- return trace
-
-
-class ReportTaskHandler(object):
- """Handles 'report' tasks.
-
- This handler loads the traces given in the task parameters, generates a report
- from them, and add them to a BigQuery table.
- The BigQuery table is implicitly created from a template (using the stream
- mode), and identified by the task tag.
- """
-
- def __init__(self, project_name, failure_database, google_storage_accessor,
- bigquery_service, logger, ad_rules_filename,
- tracking_rules_filename):
- self._project_name = project_name
- self._failure_database = failure_database
- self._google_storage_accessor = google_storage_accessor
- self._bigquery_service = bigquery_service
- self._logger = logger
- self._ad_rules_filename = ad_rules_filename
- self._tracking_rules_filename = tracking_rules_filename
-
- def _IsBigQueryValueValid(self, value):
- """Returns whether a value is valid and can be uploaded to BigQuery."""
- if value is None:
- return False
- # BigQuery rejects NaN.
- if type(value) is float and (math.isnan(value) or math.isinf(value)):
- return False
- return True
-
- def _StreamRowsToBigQuery(self, rows, table_id):
- """Uploads a list of rows to the BigQuery table associated with the given
- table_id.
-
- Args:
- rows: (list of dict) Each dictionary is a row to add to the table.
- table_id: (str) Identifier of the BigQuery table to update.
- """
- try:
- response = common.google_bigquery_helper.InsertInTemplatedBigQueryTable(
- self._bigquery_service, self._project_name, table_id, rows,
- self._logger)
- except errors.HttpError as http_error:
- # Handles HTTP error response codes (such as 404), typically indicating a
- # problem in parameters other than 'body'.
- error_content = google_error_helper.GetErrorContent(http_error)
- error_reason = google_error_helper.GetErrorReason(error_content)
- self._logger.error('BigQuery API error (reason: "%s"):\n%s' % (
- error_reason, http_error))
- self._failure_database.AddFailure('big_query_error', error_reason)
- if error_content:
- self._logger.error('Error details:\n%s' % error_content)
- return
-
- # Handles other errors, typically when the body is ill-formatted.
- insert_errors = response.get('insertErrors')
- if insert_errors:
- self._logger.error('BigQuery API error:\n' + str(insert_errors))
- for insert_error in insert_errors:
- self._failure_database.AddFailure('big_query_insert_error',
- str(insert_error.get('errors')))
-
- def Finalize(self):
- """Called once before the handler is destroyed."""
- pass
-
- def Run(self, clovis_task):
- """Runs a 'report' clovis_task.
-
- Args:
- clovis_task: (ClovisTask) The task to run.
- """
- if clovis_task.Action() != 'report':
- self._logger.error('Unsupported task action: %s' % clovis_task.Action())
- self._failure_database.AddFailure(FailureDatabase.CRITICAL_ERROR,
- 'report_task_handler_run')
- return
-
- ad_rules = open(self._ad_rules_filename).readlines()
- tracking_rules = open(self._tracking_rules_filename).readlines()
-
- rows = []
- for path in clovis_task.ActionParams()['traces']:
- self._logger.info('Generating report for: ' + path)
- trace = LoadRemoteTrace(self._google_storage_accessor, path, self._logger)
- if not trace:
- self._logger.error('Failed loading trace at: ' + path)
- self._failure_database.AddFailure('missing_trace_for_report', path)
- continue
- report = LoadingReport(trace, ad_rules, tracking_rules).GenerateReport()
- if not report:
- self._logger.error('Failed generating report for: ' + path)
- self._failure_database.AddFailure('report_generation_failed', path)
- continue
- # Filter out bad values.
- for key, value in report.items():
- if not self._IsBigQueryValueValid(value):
- url = report.get('url')
- self._logger.error('Invalid %s for URL:%s' % (key, url))
- self._failure_database.AddFailure('invalid_bigquery_value', url)
- del report[key]
- rows.append(report)
-
- if rows:
- table_id = common.google_bigquery_helper.GetBigQueryTableID(clovis_task)
- self._StreamRowsToBigQuery(rows, table_id)
diff --git a/loading/cloud/backend/startup-script.sh b/loading/cloud/backend/startup-script.sh
deleted file mode 100644
index 66ec2a4..0000000
--- a/loading/cloud/backend/startup-script.sh
+++ /dev/null
@@ -1,133 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-# Script executed at instance startup. It installs the required dependencies,
-# downloads the source code, and starts a web server.
-
-set -v
-
-get_instance_metadata() {
- curl -fs http://metadata/computeMetadata/v1/instance/attributes/$1 \
- -H "Metadata-Flavor: Google"
-}
-
-# Talk to the metadata server to get the project id and the instance id
-PROJECTID=$(curl -s \
- "http://metadata.google.internal/computeMetadata/v1/project/project-id" \
- -H "Metadata-Flavor: Google")
-
-INSTANCE_NAME=$(curl -s \
- "http://metadata.google.internal/computeMetadata/v1/instance/hostname" \
- -H "Metadata-Flavor: Google")
-
-# Install dependencies from apt
-apt-get update
-# Basic dependencies
-apt-get install -yq git supervisor python-pip python-dev unzip
-# Web server dependencies
-apt-get install -yq libffi-dev libssl-dev
-# Chrome dependencies
-apt-get install -yq libpangocairo-1.0-0 libXcomposite1 libXcursor1 libXdamage1 \
- libXi6 libXtst6 libnss3 libcups2 libgconf2-4 libXss1 libXrandr2 \
- libatk1.0-0 libasound2 libgtk-3-0
-# Trace collection dependencies
-apt-get install -yq xvfb
-
-# Create a pythonapp user. The application will run as this user.
-useradd -m -d /home/pythonapp pythonapp
-
-# pip from apt is out of date, so make it update itself and install virtualenv.
-pip install --upgrade pip virtualenv
-
-# Download the Clovis deployment from Google Cloud Storage and unzip it.
-# It is expected that the contents of the deployment have been generated using
-# the tools/android/loading/cloud/backend/deploy.sh script.
-CLOUD_STORAGE_PATH=`get_instance_metadata cloud-storage-path`
-DEPLOYMENT_PATH=$CLOUD_STORAGE_PATH/deployment
-
-mkdir -p /opt/app/clovis
-gsutil cp gs://$DEPLOYMENT_PATH/source/source.tgz /opt/app/clovis/source.tgz
-tar xvf /opt/app/clovis/source.tgz -C /opt/app/clovis
-rm /opt/app/clovis/source.tgz
-
-# Install app dependencies
-virtualenv /opt/app/clovis/env
-/opt/app/clovis/env/bin/pip install -r \
- /opt/app/clovis/src/tools/android/loading/cloud/backend/pip_requirements.txt
-
-mkdir /opt/app/clovis/binaries
-gsutil cp gs://$DEPLOYMENT_PATH/binaries/* /opt/app/clovis/binaries/
-unzip /opt/app/clovis/binaries/linux.zip -d /opt/app/clovis/binaries/
-
-# Ad and tracking filtering rules.
-# Made by the EasyList authors (https://easylist.github.io/).
-DATA_DIR=/opt/app/clovis/data
-mkdir $DATA_DIR && cd $DATA_DIR
-curl https://easylist.github.io/easylist/easylist.txt > easylist.txt
-curl https://easylist.github.io/easylist/easyprivacy.txt > easyprivacy.txt
-
-# Install the Chrome sandbox
-cp /opt/app/clovis/binaries/chrome_sandbox /usr/local/sbin/chrome-devel-sandbox
-chown root:root /usr/local/sbin/chrome-devel-sandbox
-chmod 4755 /usr/local/sbin/chrome-devel-sandbox
-
-# Make sure the pythonapp user owns the application code.
-chown -R pythonapp:pythonapp /opt/app
-
-# Create the configuration file for this deployment.
-DEPLOYMENT_CONFIG_PATH=/opt/app/clovis/deployment_config.json
-TASKQUEUE_TAG=`get_instance_metadata taskqueue-tag`
-TASK_DIR=`get_instance_metadata task-dir`
-TASK_STORAGE_PATH=$CLOUD_STORAGE_PATH/$TASK_DIR
-if [ "$(get_instance_metadata self-destruct)" == "false" ]; then
- SELF_DESTRUCT="False"
-else
- SELF_DESTRUCT="True"
-fi
-WORKER_LOG_PATH=/opt/app/clovis/worker.log
-
-cat >$DEPLOYMENT_CONFIG_PATH << EOF
-{
- "instance_name" : "$INSTANCE_NAME",
- "project_name" : "$PROJECTID",
- "task_storage_path" : "$TASK_STORAGE_PATH",
- "binaries_path" : "/opt/app/clovis/binaries",
- "src_path" : "/opt/app/clovis/src",
- "taskqueue_tag" : "$TASKQUEUE_TAG",
- "worker_log_path" : "$WORKER_LOG_PATH",
- "self_destruct" : "$SELF_DESTRUCT",
- "ad_rules_filename": "$DATA_DIR/easylist.txt",
- "tracking_rules_filename": "$DATA_DIR/easyprivacy.txt"
-}
-EOF
-
-# Check if auto-start is enabled
-AUTO_START=`get_instance_metadata auto-start`
-
-# Exit early if auto start is not enabled.
-if [ "$AUTO_START" == "false" ]; then
- exit 1
-fi
-
-# Configure supervisor to start the worker inside of our virtualenv.
-cat >/etc/supervisor/conf.d/python-app.conf << EOF
-[program:pythonapp]
-directory=/opt/app/clovis/src/tools/android/loading/cloud/backend
-command=python -u worker.py --config $DEPLOYMENT_CONFIG_PATH
-autostart=true
-autorestart=unexpected
-user=pythonapp
-# Environment variables ensure that the application runs inside of the
-# configured virtualenv.
-environment=VIRTUAL_ENV="/opt/app/clovis/env", \
- PATH="/opt/app/clovis/env/bin:/usr/bin", \
- HOME="/home/pythonapp",USER="pythonapp", \
- CHROME_DEVEL_SANDBOX="/usr/local/sbin/chrome-devel-sandbox"
-stdout_logfile=$WORKER_LOG_PATH
-stderr_logfile=$WORKER_LOG_PATH
-EOF
-
-supervisorctl reread
-supervisorctl update
-
diff --git a/loading/cloud/backend/trace_task_handler.py b/loading/cloud/backend/trace_task_handler.py
deleted file mode 100644
index e031636..0000000
--- a/loading/cloud/backend/trace_task_handler.py
+++ /dev/null
@@ -1,272 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import os
-import re
-import sys
-import traceback
-
-import clovis_constants
-import common.clovis_paths
-from common.clovis_task import ClovisTask
-from common.loading_trace_database import LoadingTraceDatabase
-import controller
-from failure_database import FailureDatabase
-import loading_trace
-import multiprocessing_helper
-import options
-import xvfb_helper
-
-
-def GenerateTrace(url, emulate_device, emulate_network, filename, log_filename):
- """ Generates a trace.
-
- Args:
- url: URL as a string.
- emulate_device: Name of the device to emulate. Empty for no emulation.
- emulate_network: Type of network emulation. Empty for no emulation.
- filename: Name of the file where the trace is saved.
- log_filename: Name of the file where standard output and errors are
- logged.
-
- Returns:
- A dictionary of metadata about the trace, including a 'succeeded' field
- indicating whether the trace was successfully generated.
- """
- try:
- os.remove(filename) # Remove any existing trace for this URL.
- except OSError:
- pass # Nothing to remove.
-
- old_stdout = sys.stdout
- old_stderr = sys.stderr
-
- trace_metadata = { 'succeeded' : False, 'url' : url }
- trace = None
- if not url.startswith('http') and not url.startswith('file'):
- url = 'http://' + url
- with open(log_filename, 'w') as sys.stdout:
- try:
- sys.stderr = sys.stdout
-
- sys.stdout.write('Starting trace generation for: %s.\n' % url)
-
- # Set up the controller.
- chrome_ctl = controller.LocalChromeController()
- chrome_ctl.SetChromeEnvOverride(xvfb_helper.GetChromeEnvironment())
- if emulate_device:
- chrome_ctl.SetDeviceEmulation(emulate_device)
- if emulate_network:
- chrome_ctl.SetNetworkEmulation(emulate_network)
-
- # Record and write the trace.
- with chrome_ctl.Open() as connection:
- connection.ClearCache()
- trace = loading_trace.LoadingTrace.RecordUrlNavigation(
- url, connection, chrome_ctl.ChromeMetadata(),
- clovis_constants.DEFAULT_CATEGORIES)
- trace_metadata['succeeded'] = True
- trace_metadata.update(trace.ToJsonDict()[trace._METADATA_KEY])
- sys.stdout.write('Trace generation success.\n')
- except controller.ChromeControllerError as e:
- e.Dump(sys.stderr)
- except Exception as e:
- sys.stderr.write('Unknown exception:\n' + str(e))
- traceback.print_exc(file=sys.stderr)
-
- if trace:
- sys.stdout.write('Dumping trace to file.\n')
- trace.ToJsonFile(filename)
- else:
- sys.stderr.write('No trace generated.\n')
-
- sys.stdout.write('Trace generation finished.\n')
-
- sys.stdout = old_stdout
- sys.stderr = old_stderr
-
- return trace_metadata
-
-
-class TraceTaskHandler(object):
- """Handles 'trace' tasks."""
-
- def __init__(self, base_path, failure_database,
- google_storage_accessor, binaries_path, logger,
- instance_name=None):
- """Args:
- base_path(str): Base path where results are written.
- binaries_path(str): Path to the directory where Chrome executables are.
- instance_name(str, optional): Name of the ComputeEngine instance.
- """
- self._failure_database = failure_database
- self._logger = logger
- self._google_storage_accessor = google_storage_accessor
- self._base_path = base_path
- self._is_initialized = False
- self._trace_database = None
- self._xvfb_process = None
- trace_database_filename = common.clovis_paths.TRACE_DATABASE_PREFIX
- if instance_name:
- trace_database_filename += '_%s.json' % instance_name
- else:
- trace_database_filename += '.json'
- self._trace_database_path = os.path.join(base_path, trace_database_filename)
-
- # Initialize the global options that will be used during trace generation.
- options.OPTIONS.ParseArgs(['--local_build_dir', binaries_path])
-
- def _Initialize(self):
- """Initializes the trace task handler. Can be called multiple times."""
- if self._is_initialized:
- return
- self._is_initialized = True
-
- self._xvfb_process = xvfb_helper.LaunchXvfb()
-
- # Recover any existing traces in case the worker died.
- self._DownloadTraceDatabase()
- if self._trace_database.ToJsonDict():
- # There are already files from a previous run in the directory, likely
- # because the script is restarting after a crash.
- self._failure_database.AddFailure(FailureDatabase.DIRTY_STATE_ERROR,
- 'trace_database')
-
- def _DownloadTraceDatabase(self):
- """Downloads the trace database from CloudStorage."""
- self._logger.info('Downloading trace database')
- trace_database_string = self._google_storage_accessor.DownloadAsString(
- self._trace_database_path) or '{}'
- self._trace_database = LoadingTraceDatabase.FromJsonString(
- trace_database_string)
-
- def _UploadTraceDatabase(self):
- """Uploads the trace database to CloudStorage."""
- self._logger.info('Uploading trace database')
- assert self._is_initialized
- self._google_storage_accessor.UploadString(
- self._trace_database.ToJsonString(),
- self._trace_database_path)
-
- def _GenerateTraceOutOfProcess(self, url, emulate_device, emulate_network,
- filename, log_filename):
- """ Generates a trace in a separate process by calling GenerateTrace().
-
- The generation is done out of process to avoid issues where the system would
- run out of memory when the trace is very large. This ensures that the system
- can reclaim all the memory when the trace generation is done.
-
- See the GenerateTrace() documentation for a description of the parameters
- and return values.
- """
- self._logger.info('Starting external process for trace generation.')
- result = multiprocessing_helper.RunInSeparateProcess(
- GenerateTrace,
- (url, emulate_device, emulate_network, filename, log_filename),
- self._logger, timeout_seconds=180, memory_share=0.9)
-
- self._logger.info('Cleaning up Chrome processes.')
- controller.LocalChromeController.KillChromeProcesses()
-
- if not result:
- self._failure_database.AddFailure('trace_process_timeout', url)
- return {'succeeded':False, 'url':url}
- return result
-
-
- def _HandleTraceGenerationResults(self, local_filename, log_filename,
- remote_filename, trace_metadata):
- """Updates the trace database and the failure database after a trace
- generation. Uploads the trace and the log.
- Results related to successful traces are uploaded in the 'traces' directory,
- and failures are uploaded in the 'failures' directory.
-
- Args:
- local_filename (str): Path to the local file containing the trace.
- log_filename (str): Path to the local file containing the log.
- remote_filename (str): Name of the target remote file where the trace and
- the log (with a .log extension added) are uploaded.
- trace_metadata (dict): Metadata associated with the trace generation.
- """
- assert self._is_initialized
- if trace_metadata['succeeded']:
- traces_dir = os.path.join(self._base_path, 'traces')
- remote_trace_location = os.path.join(traces_dir, remote_filename)
- full_cloud_storage_path = os.path.join(
- 'gs://' + self._google_storage_accessor.BucketName(),
- remote_trace_location)
- self._trace_database.SetTrace(full_cloud_storage_path, trace_metadata)
- else:
- url = trace_metadata['url']
- self._logger.warning('Trace generation failed for URL: %s' % url)
- failures_dir = os.path.join(self._base_path, 'failures')
- remote_trace_location = os.path.join(failures_dir, remote_filename)
- self._failure_database.AddFailure('trace_collection', url)
-
- if os.path.isfile(local_filename):
- self._logger.debug('Uploading: %s' % remote_trace_location)
- self._google_storage_accessor.UploadFile(local_filename,
- remote_trace_location)
- os.remove(local_filename) # The trace may be very large.
- else:
- self._logger.warning('No trace found at: ' + local_filename)
-
- if os.path.isfile(log_filename):
- self._logger.debug('Uploading analyze log')
- remote_log_location = remote_trace_location + '.log'
- self._google_storage_accessor.UploadFile(
- log_filename, remote_log_location)
- else:
- self._logger.warning('No log file found at: {}'.format(log_filename))
-
- def Finalize(self):
- """Called once before the handler is destroyed."""
- if self._xvfb_process:
- try:
- self._xvfb_process.terminate()
- except OSError:
- self._logger.error('Could not terminate Xvfb.')
-
- def Run(self, clovis_task):
- """Runs a 'trace' clovis_task.
-
- Args:
- clovis_task(ClovisTask): The task to run.
- """
- if clovis_task.Action() != 'trace':
- self._logger.error('Unsupported task action: %s' % clovis_task.Action())
- self._failure_database.AddFailure(FailureDatabase.CRITICAL_ERROR,
- 'trace_task_handler_run')
- return
-
- self._Initialize()
-
- # Extract the task parameters.
- params = clovis_task.ActionParams()
- urls = params['urls']
- repeat_count = params.get('repeat_count', 1)
- emulate_device = params.get('emulate_device')
- emulate_network = params.get('emulate_network')
-
- log_filename = 'analyze.log'
- # Avoid special characters in storage object names
- pattern = re.compile(r"[#\?\[\]\*/]")
-
- success_happened = False
-
- while len(urls) > 0:
- url = urls.pop()
- local_filename = pattern.sub('_', url)
- for repeat in range(repeat_count):
- self._logger.debug('Generating trace for URL: %s' % url)
- trace_metadata = self._GenerateTraceOutOfProcess(
- url, emulate_device, emulate_network, local_filename, log_filename)
- if trace_metadata['succeeded']:
- success_happened = True
- remote_filename = os.path.join(local_filename, str(repeat))
- self._HandleTraceGenerationResults(
- local_filename, log_filename, remote_filename, trace_metadata)
-
- if success_happened:
- self._UploadTraceDatabase()
diff --git a/loading/cloud/backend/worker.py b/loading/cloud/backend/worker.py
deleted file mode 100644
index 6b3a369..0000000
--- a/loading/cloud/backend/worker.py
+++ /dev/null
@@ -1,226 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import argparse
-import json
-import logging
-import os
-import random
-import sys
-import time
-
-from googleapiclient import discovery
-from oauth2client.client import GoogleCredentials
-
-# NOTE: The parent directory needs to be first in sys.path to avoid conflicts
-# with catapult modules that have colliding names, as catapult inserts itself
-# into the path as the second element. This is an ugly and fragile hack.
-_CLOUD_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)),
- os.pardir)
-sys.path.insert(0, os.path.join(_CLOUD_DIR, os.pardir))
-# Add _CLOUD_DIR to the path to access common code through the same path as the
-# frontend.
-sys.path.append(_CLOUD_DIR)
-
-from common.clovis_task import ClovisTask
-import common.google_bigquery_helper
-from common.google_instance_helper import GoogleInstanceHelper
-from clovis_task_handler import ClovisTaskHandler
-from failure_database import FailureDatabase
-from google_storage_accessor import GoogleStorageAccessor
-
-
-class Worker(object):
- def __init__(self, config, logger):
- """See README.md for the config format."""
- self._project_name = config['project_name']
- self._taskqueue_tag = config['taskqueue_tag']
- self._src_path = config['src_path']
- self._instance_name = config.get('instance_name')
- self._worker_log_path = config.get('worker_log_path')
- self._credentials = GoogleCredentials.get_application_default()
- self._logger = logger
- self._self_destruct = config.get('self_destruct')
- if self._self_destruct and not self._instance_name:
- self._logger.error('Self destruction requires an instance name.')
-
- # Separate the task storage path into the bucket and the base path under
- # the bucket.
- storage_path_components = config['task_storage_path'].split('/')
- self._bucket_name = storage_path_components[0]
- self._base_path_in_bucket = ''
- if len(storage_path_components) > 1:
- self._base_path_in_bucket = '/'.join(storage_path_components[1:])
- if not self._base_path_in_bucket.endswith('/'):
- self._base_path_in_bucket += '/'
-
- self._google_storage_accessor = GoogleStorageAccessor(
- credentials=self._credentials, project_name=self._project_name,
- bucket_name=self._bucket_name)
-
- if self._instance_name:
- failure_database_filename = \
- 'failure_database_%s.json' % self._instance_name
- else:
- failure_database_filename = 'failure_dabatase.json'
- self._failure_database_path = os.path.join(self._base_path_in_bucket,
- failure_database_filename)
-
- # Recover any existing failures in case the worker died.
- self._failure_database = self._GetFailureDatabase()
-
- if self._failure_database.ToJsonDict():
- # Script is restarting after a crash, or there are already files from a
- # previous run in the directory.
- self._failure_database.AddFailure(FailureDatabase.DIRTY_STATE_ERROR,
- 'failure_database')
-
- bigquery_service = common.google_bigquery_helper.GetBigQueryService(
- self._credentials)
- self._clovis_task_handler = ClovisTaskHandler(
- self._project_name, self._base_path_in_bucket, self._failure_database,
- self._google_storage_accessor, bigquery_service,
- config['binaries_path'], config['ad_rules_filename'],
- config['tracking_rules_filename'], self._logger, self._instance_name)
-
- self._UploadFailureDatabase()
-
- def Start(self):
- """Main worker loop.
-
- Repeatedly pulls tasks from the task queue and processes them. Returns when
- the queue is empty.
- """
- task_api = discovery.build('taskqueue', 'v1beta2',
- credentials=self._credentials)
- queue_name = 'clovis-queue'
- # Workaround for
- # https://code.google.com/p/googleappengine/issues/detail?id=10199
- project = 's~' + self._project_name
-
- while True:
- self._logger.debug('Fetching new task.')
- (clovis_task, task_id) = self._FetchClovisTask(project, task_api,
- queue_name)
- if not clovis_task:
- break
-
- self._logger.info('Processing task %s' % task_id)
- self._clovis_task_handler.Run(clovis_task)
- self._UploadFailureDatabase()
- self._logger.debug('Deleting task %s' % task_id)
- task_api.tasks().delete(project=project, taskqueue=queue_name,
- task=task_id).execute()
- self._logger.info('Finished task %s' % task_id)
- self._Finalize()
-
- def _GetFailureDatabase(self):
- """Downloads the failure database from CloudStorage."""
- self._logger.info('Downloading failure database')
- failure_database_string = self._google_storage_accessor.DownloadAsString(
- self._failure_database_path)
- return FailureDatabase(failure_database_string)
-
- def _UploadFailureDatabase(self):
- """Uploads the failure database to CloudStorage."""
- if not self._failure_database.is_dirty:
- return
- self._logger.info('Uploading failure database')
- self._google_storage_accessor.UploadString(
- self._failure_database.ToJsonString(),
- self._failure_database_path)
- self._failure_database.is_dirty = False
-
- def _FetchClovisTask(self, project_name, task_api, queue_name):
- """Fetches a ClovisTask from the task queue.
-
- Params:
- project_name(str): The name of the Google Cloud project.
- task_api: The TaskQueue service.
- queue_name(str): The name of the task queue.
-
- Returns:
- (ClovisTask, str): The fetched ClovisTask and its task ID, or (None, None)
- if no tasks are found.
- """
- response = task_api.tasks().lease(
- project=project_name, taskqueue=queue_name, numTasks=1, leaseSecs=600,
- groupByTag=True, tag=self._taskqueue_tag).execute()
- if (not response.get('items')) or (len(response['items']) < 1):
- return (None, None) # The task queue is empty.
-
- google_task = response['items'][0]
- task_id = google_task['id']
-
- # Delete the task without processing if it already failed multiple times.
- # TODO(droger): This is a workaround for internal bug b/28442122, revisit
- # once it is fixed.
- retry_count = google_task['retry_count']
- max_retry_count = 3
- skip_task = retry_count >= max_retry_count
- if skip_task:
- task_api.tasks().delete(project=project_name, taskqueue=queue_name,
- task=task_id).execute()
-
- clovis_task = ClovisTask.FromBase64(google_task['payloadBase64'])
-
- if retry_count > 0:
- self._failure_database.AddFailure('task_queue_retry',
- clovis_task.ToJsonString())
- self._UploadFailureDatabase()
-
- if skip_task:
- return self._FetchClovisTask(project_name, task_api, queue_name)
-
- return (clovis_task, task_id)
-
- def _Finalize(self):
- """Called before exiting."""
- self._logger.info('Done')
- self._clovis_task_handler.Finalize()
- # Upload the worker log.
- if self._worker_log_path:
- self._logger.info('Uploading worker log.')
- remote_log_path = os.path.join(self._base_path_in_bucket, 'worker_log')
- if self._instance_name:
- remote_log_path += '_' + self._instance_name
- self._google_storage_accessor.UploadFile(self._worker_log_path,
- remote_log_path)
- # Self destruct.
- if self._self_destruct:
- # Workaround for ComputeEngine internal bug b/28760288.
- random_delay = random.random() * 600.0 # Up to 10 minutes.
- self._logger.info(
- 'Wait %.0fs to avoid load spikes on compute engine.' % random_delay)
- time.sleep(random_delay)
-
- self._logger.info('Starting instance destruction: ' + self._instance_name)
- google_instance_helper = GoogleInstanceHelper(
- self._credentials, self._project_name, self._logger)
- success = google_instance_helper.DeleteInstance(self._taskqueue_tag,
- self._instance_name)
- if not success:
- self._logger.error('Self destruction failed.')
- # Do not add anything after this line, as the instance might be killed at
- # any time.
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser(
- description='ComputeEngine Worker for Clovis')
- parser.add_argument('--config', required=True,
- help='Path to the configuration file.')
- args = parser.parse_args()
-
- # Configure logging.
- logging.basicConfig(level=logging.WARNING,
- format='[%(asctime)s][%(levelname)s] %(message)s',
- datefmt='%y-%m-%d %H:%M:%S')
- logging.Formatter.converter = time.gmtime
- worker_logger = logging.getLogger('worker')
- worker_logger.setLevel(logging.INFO)
-
- worker_logger.info('Reading configuration')
- with open(args.config) as config_json:
- worker = Worker(json.load(config_json), worker_logger)
- worker.Start()
diff --git a/loading/cloud/common/__init__.py b/loading/cloud/common/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/loading/cloud/common/__init__.py
+++ /dev/null
diff --git a/loading/cloud/common/clovis_paths.py b/loading/cloud/common/clovis_paths.py
deleted file mode 100644
index 1ee10e4..0000000
--- a/loading/cloud/common/clovis_paths.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-# Trace path constants.
-
-# Prefix for the loading trace database files.
-TRACE_DATABASE_PREFIX = 'trace_database'
diff --git a/loading/cloud/common/clovis_task.py b/loading/cloud/common/clovis_task.py
deleted file mode 100644
index 97c88a5..0000000
--- a/loading/cloud/common/clovis_task.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import base64
-import json
-import uuid
-
-class ClovisTask(object):
- """Generic task, generated by the AppEngine frontend and consumed by the
- ComputeEngine backend.
- """
-
- def __init__(self, action, action_params, backend_params):
- """ See tools/android/loading/cloud/frontend/README.md for a specification
- of the parameters.
-
- Args:
- action(str): Action accomplished by this task.
- action_params(dict): Parameters of task.
- backend_params(dict): Parameters of the instances running the task.
- If this is None, no instances are created. If this dictionary has no
- 'tag' key, a unique tag will be generated.
- """
- self._action = action
- self._action_params = action_params or {}
- self._backend_params = backend_params or {}
- # If no tag is specified, generate a unique tag.
- if not self._backend_params.get('tag'):
- self._backend_params.update({'tag': str(uuid.uuid1())})
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- """Loads a ClovisTask from a JSON dictionary.
-
- Returns:
- ClovisTask: The task, or None if the string is invalid.
- """
- try:
- action = json_dict['action']
- action_params = json_dict['action_params']
- # Vaidate the format.
- if action == 'trace':
- urls = action_params['urls']
- if (type(urls) is not list) or (len(urls) == 0):
- return None
- elif action == 'report':
- if not action_params.get('trace_bucket'):
- return None
- else:
- # When more actions are supported, check that they are valid here.
- return None
- return cls(action, action_params, json_dict.get('backend_params'))
- except Exception:
- return None
-
- @classmethod
- def FromJsonString(cls, json_string):
- """Loads a ClovisTask from a JSON string.
-
- Returns:
- ClovisTask: The task, or None if the string is invalid.
- """
- try:
- return cls.FromJsonDict(json.loads(json_string))
- except Exception:
- return None
-
- @classmethod
- def FromBase64(cls, base64_string):
- """Loads a ClovisTask from a base 64 string."""
- return ClovisTask.FromJsonString(base64.b64decode(base64_string))
-
- def ToJsonDict(self):
- """Returns the JSON representation of the task as a dictionary."""
- return {'action': self._action, 'action_params': self._action_params,
- 'backend_params': self._backend_params}
-
- def ToJsonString(self):
- """Returns the JSON representation of the task as a string."""
- return json.dumps(self.ToJsonDict())
-
- def Action(self):
- return self._action
-
- def ActionParams(self):
- return self._action_params
-
- def BackendParams(self):
- return self._backend_params
-
diff --git a/loading/cloud/common/google_bigquery_helper.py b/loading/cloud/common/google_bigquery_helper.py
deleted file mode 100644
index 547c434..0000000
--- a/loading/cloud/common/google_bigquery_helper.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import os
-import uuid
-
-from googleapiclient import (discovery, errors)
-
-import common.google_error_helper as google_error_helper
-
-# Name of the dataset.
-BIGQUERY_DATASET = 'clovis_dataset'
-# Name of the table used as a template for new tables.
-BIGQUERY_TABLE_TEMPLATE = 'report'
-
-
-def GetBigQueryService(credentials):
- """Returns the BigQuery service."""
- return discovery.build('bigquery', 'v2', credentials=credentials)
-
-
-def GetBigQueryTableID(clovis_report_task):
- """Returns the ID of the BigQuery table associated with the task.
- This ID is appended at the end of the table name.
-
- Args:
- clovis_report_task: (ClovisTask) The task, must be a 'report' task.
-
- Returns:
- str: The table ID.
- """
- assert (clovis_report_task.Action() == 'report')
- # Name the table after the last path component of the trace bucket.
- trace_bucket = clovis_report_task.ActionParams()['trace_bucket']
- table_id = os.path.basename(os.path.normpath(trace_bucket))
- task_name = clovis_report_task.BackendParams().get('task_name')
- if task_name is not None:
- table_id += '_' + task_name
- # BigQuery table names can contain only alpha numeric characters and
- # underscores.
- return ''.join(c for c in table_id if c.isalnum() or c == '_')
-
-
-def GetBigQueryTableURL(project_name, table_id):
- """Returns the full URL for the BigQuery table associated with table_id."""
- return 'https://bigquery.cloud.google.com/table/%s:%s.%s_%s' % (
- project_name, BIGQUERY_DATASET, BIGQUERY_TABLE_TEMPLATE, table_id)
-
-
-def InsertInTemplatedBigQueryTable(bigquery_service, project_name, table_id,
- rows, logger):
- """Inserts rows in the BigQuery table corresponding to table_id.
- Assumes that the BigQuery dataset and table template already exist.
-
- Args:
- bigquery_service: The BigQuery service.
- project_name: (str) Name of the Google Cloud project.
- table_id: (str) table_id as returned by GetBigQueryTableID().
- rows: (list) Rows to insert in the table.
- logger: (logging.Logger) The logger.
-
- Returns:
- dict: The BigQuery service response.
- """
- rows_data = [{'json': row, 'insertId': str(uuid.uuid4())} for row in rows]
- body = {'rows': rows_data, 'templateSuffix':'_'+table_id}
- logger.info('BigQuery API request:\n' + str(body))
- response = bigquery_service.tabledata().insertAll(
- projectId=project_name, datasetId=BIGQUERY_DATASET,
- tableId=BIGQUERY_TABLE_TEMPLATE, body=body).execute()
- logger.info('BigQuery API response:\n' + str(response))
- return response
-
-
-def DoesBigQueryTableExist(bigquery_service, project_name, table_id, logger):
- """Returns wether the BigQuery table identified by table_id exists.
-
- Raises a HttpError exception if the call to BigQuery API fails.
-
- Args:
- bigquery_service: The BigQuery service.
- project_name: (str) Name of the Google Cloud project.
- table_id: (str) table_id as returned by GetBigQueryTableID().
-
- Returns:
- bool: True if the table exists.
- """
- table_name = BIGQUERY_TABLE_TEMPLATE + '_' + table_id
- logger.info('Getting table information for %s.' % table_name)
- try:
- table = bigquery_service.tables().get(projectId=project_name,
- datasetId=BIGQUERY_DATASET,
- tableId=table_name).execute()
- return bool(table)
-
- except errors.HttpError as http_error:
- error_content = google_error_helper.GetErrorContent(http_error)
- error_reason = google_error_helper.GetErrorReason(error_content)
- if error_reason == google_error_helper.REASON_NOT_FOUND:
- return False
- else:
- logger.error('BigQuery API error (reason: "%s"):\n%s' % (
- error_reason, http_error))
- if error_content:
- logger.error('Error details:\n%s' % error_content)
- raise # Re-raise the exception.
-
- return False
diff --git a/loading/cloud/common/google_error_helper.py b/loading/cloud/common/google_error_helper.py
deleted file mode 100644
index eac968a..0000000
--- a/loading/cloud/common/google_error_helper.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Helper functions to manage errors returned by Google Compute APIs."""
-
-import json
-
-
-# Error reason returned by GetErrorReason() when a resource is not found.
-REASON_NOT_FOUND = 'notFound'
-
-
-def GetErrorContent(error):
- """Returns the contents of an error returned by Google Compute APIs as a
- dictionary or None.
- """
- if not error.resp.get('content-type', '').startswith('application/json'):
- return None
- return json.loads(error.content)
-
-
-def GetErrorReason(error_content):
- """Returns the error reason as a string."""
- if not error_content:
- return None
- if (not error_content.get('error') or
- not error_content['error'].get('errors')):
- return None
- error_list = error_content['error']['errors']
- if not error_list:
- return None
- return error_list[0].get('reason')
diff --git a/loading/cloud/common/google_instance_helper.py b/loading/cloud/common/google_instance_helper.py
deleted file mode 100644
index e4199b5..0000000
--- a/loading/cloud/common/google_instance_helper.py
+++ /dev/null
@@ -1,210 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import json
-import time
-
-from googleapiclient import (discovery, errors)
-
-import common.google_error_helper as google_error_helper
-
-
-class GoogleInstanceHelper(object):
- """Helper class for the Google Compute API, allowing to manage groups of
- instances more easily. Groups of instances are identified by a tag."""
- _COMPUTE_API_ROOT = 'https://www.googleapis.com/compute/v1/projects/'
-
- def __init__(self, credentials, project, logger):
- self._compute_api = discovery.build('compute','v1', credentials=credentials)
- self._project = project
- self._project_api_url = self._COMPUTE_API_ROOT + project
- self._region = 'europe-west1'
- self._zone = 'europe-west1-c'
- self._logger = logger
-
- def _ExecuteApiRequest(self, request, retry_count=3):
- """ Executes a Compute API request and returns True on success.
-
- Returns:
- (True, Response) in case of success, or (False, error_content) otherwise.
- """
- self._logger.info('Compute API request:\n' + request.to_json())
- try:
- response = request.execute()
- self._logger.info('Compute API response:\n' + str(response))
- return (True, response)
- except errors.HttpError as err:
- error_content = google_error_helper.GetErrorContent(err)
- error_reason = google_error_helper.GetErrorReason(error_content)
- if error_reason == 'resourceNotReady' and retry_count > 0:
- # Retry after a delay
- delay_seconds = 1
- self._logger.info(
- 'Resource not ready, retrying in %i seconds.' % delay_seconds)
- time.sleep(delay_seconds)
- return self._ExecuteApiRequest(request, retry_count - 1)
- else:
- self._logger.error('Compute API error (reason: "%s"):\n%s' % (
- error_reason, err))
- if error_content:
- self._logger.error('Error details:\n%s' % error_content)
- return (False, error_content)
-
- def _GetTemplateName(self, tag):
- """Returns the name of the instance template associated with tag."""
- return 'template-' + tag
-
- def _GetInstanceGroupName(self, tag):
- """Returns the name of the instance group associated with tag."""
- return 'group-' + tag
-
- def CreateTemplate(self, tag, bucket, task_dir):
- """Creates an instance template for instances identified by tag.
-
- Args:
- tag: (string) Tag associated to a task.
- bucket: (string) Root bucket where the deployment is located.
- task_dir: (string) Subdirectory of |bucket| where task data is read and
- written.
-
- Returns:
- boolean: True if successful.
- """
- image_url = self._COMPUTE_API_ROOT + \
- 'ubuntu-os-cloud/global/images/ubuntu-1404-trusty-v20160406'
- request_body = {
- 'name': self._GetTemplateName(tag),
- 'properties': {
- 'machineType': 'n1-standard-1',
- 'networkInterfaces': [{
- 'network': self._project_api_url + '/global/networks/default',
- 'accessConfigs': [{
- 'name': 'external-IP',
- 'type': 'ONE_TO_ONE_NAT'
- }]}],
- 'disks': [{
- 'type': 'PERSISTENT',
- 'boot': True,
- 'autoDelete': True,
- 'mode': 'READ_WRITE',
- 'initializeParams': {'sourceImage': image_url}}],
- 'canIpForward': False,
- 'scheduling': {
- 'automaticRestart': True,
- 'onHostMaintenance': 'MIGRATE',
- 'preemptible': False},
- 'serviceAccounts': [{
- 'scopes': [
- 'https://www.googleapis.com/auth/cloud-platform',
- 'https://www.googleapis.com/auth/cloud-taskqueue'],
- 'email': 'default'}],
- 'metadata': { 'items': [
- {'key': 'cloud-storage-path',
- 'value': bucket},
- {'key': 'task-dir',
- 'value': task_dir},
- {'key': 'startup-script-url',
- 'value': 'gs://%s/deployment/startup-script.sh' % bucket},
- {'key': 'taskqueue-tag', 'value': tag}]}}}
- request = self._compute_api.instanceTemplates().insert(
- project=self._project, body=request_body)
- return self._ExecuteApiRequest(request)[0]
-
- def DeleteTemplate(self, tag):
- """Deletes the instance template associated with tag. Returns True if
- successful.
- """
- template_name = self._GetTemplateName(tag)
- request = self._compute_api.instanceTemplates().delete(
- project=self._project, instanceTemplate=template_name)
- (success, result) = self._ExecuteApiRequest(request)
- if success:
- return True
- if google_error_helper.GetErrorReason(result) == \
- google_error_helper.REASON_NOT_FOUND:
- # The template does not exist, nothing to do.
- self._logger.warning('Template not found: ' + template_name)
- return True
- return False
-
- def CreateInstances(self, tag, instance_count):
- """Creates an instance group associated with tag. The instance template must
- exist for this to succeed. Returns True if successful.
- """
- template_url = '%s/global/instanceTemplates/%s' % (
- self._project_api_url, self._GetTemplateName(tag))
- request_body = {
- 'zone': self._zone, 'targetSize': instance_count,
- 'baseInstanceName': 'instance-' + tag,
- 'instanceTemplate': template_url,
- 'name': self._GetInstanceGroupName(tag)}
- request = self._compute_api.instanceGroupManagers().insert(
- project=self._project, zone=self._zone,
- body=request_body)
- return self._ExecuteApiRequest(request)[0]
-
- def DeleteInstance(self, tag, instance_hostname):
- """Deletes one instance from the instance group identified by tag. Returns
- True if successful.
- """
- # The instance hostname may be of the form <name>.c.<project>.internal but
- # only the <name> part should be passed to the compute API.
- name = instance_hostname.split('.')[0]
- instance_url = self._project_api_url + (
- "/zones/%s/instances/%s" % (self._zone, name))
- request = self._compute_api.instanceGroupManagers().deleteInstances(
- project=self._project, zone=self._zone,
- instanceGroupManager=self._GetInstanceGroupName(tag),
- body={'instances': [instance_url]})
- return self._ExecuteApiRequest(request)[0]
-
- def DeleteInstanceGroup(self, tag):
- """Deletes the instance group identified by tag. If instances are still
- running in this group, they are deleted as well.
- """
- group_name = self._GetInstanceGroupName(tag)
- request = self._compute_api.instanceGroupManagers().delete(
- project=self._project, zone=self._zone,
- instanceGroupManager=group_name)
- (success, result) = self._ExecuteApiRequest(request)
- if success:
- return True
- if google_error_helper.GetErrorReason(result) == \
- google_error_helper.REASON_NOT_FOUND:
- # The group does not exist, nothing to do.
- self._logger.warning('Instance group not found: ' + group_name)
- return True
- return False
-
- def GetInstanceCount(self, tag):
- """Returns the number of instances in the instance group identified by
- tag, or -1 in case of failure.
- """
- request = self._compute_api.instanceGroupManagers().listManagedInstances(
- project=self._project, zone=self._zone,
- instanceGroupManager=self._GetInstanceGroupName(tag))
- (success, response) = self._ExecuteApiRequest(request)
- if not success:
- return -1
- return len(response.get('managedInstances', []))
-
-
- def GetAvailableInstanceCount(self):
- """Returns the number of instances that can be created, according to the
- ComputeEngine quotas, or -1 on failure.
- """
- request = self._compute_api.regions().get(project=self._project,
- region=self._region)
- (success, response) = self._ExecuteApiRequest(request)
- if not success:
- self._logger.error('Could not get ComputeEngine region information.')
- return -1
- metric_name = 'IN_USE_ADDRESSES'
- for quota in response.get('quotas', []):
- if quota['metric'] == metric_name:
- return quota['limit'] - quota['usage']
- self._logger.error(
- metric_name + ' quota not found in ComputeEngine response.')
- return -1
-
diff --git a/loading/cloud/common/loading_trace_database.py b/loading/cloud/common/loading_trace_database.py
deleted file mode 100644
index f6e946a..0000000
--- a/loading/cloud/common/loading_trace_database.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Represents a database of on-disk traces."""
-
-import json
-
-
-class LoadingTraceDatabase(object):
- def __init__(self, traces_dict):
- """traces_dict is a dictionary mapping filenames of traces to metadata
- about those traces."""
- self._traces_dict = traces_dict
-
- def SetTrace(self, filename, trace_dict):
- """Sets a mapping from |filename| to |trace_dict| into the database.
- If there is an existing mapping for filename, it is replaced.
- """
- self._traces_dict[filename] = trace_dict
-
- def GetTraceFilesForURL(self, url):
- """Given a URL, returns the set of filenames of traces that were generated
- for this URL."""
- trace_files = [f for f in self._traces_dict.keys()
- if self._traces_dict[f]["url"] == url]
- return trace_files
-
- def ToJsonDict(self):
- """Returns a dict representing this instance."""
- return self._traces_dict
-
- def ToJsonString(self):
- """Returns a string representing this instance."""
- return json.dumps(self._traces_dict, indent=2)
-
- def ToJsonFile(self, json_path):
- """Saves a json file representing this instance."""
- json_dict = self.ToJsonDict()
- with open(json_path, 'w') as output_file:
- json.dump(json_dict, output_file, indent=2)
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- """Returns an instance from a dict returned by ToJsonDict()."""
- return LoadingTraceDatabase(json_dict)
-
- @classmethod
- def FromJsonString(cls, json_string):
- """Returns an instance from a string returned by ToJsonString()."""
- return LoadingTraceDatabase(json.loads(json_string))
-
- @classmethod
- def FromJsonFile(cls, json_path):
- """Returns an instance from a json file saved by ToJsonFile()."""
- with open(json_path) as input_file:
- return cls.FromJsonDict(json.load(input_file))
diff --git a/loading/cloud/common/loading_trace_database_unittest.py b/loading/cloud/common/loading_trace_database_unittest.py
deleted file mode 100644
index 72ffcba..0000000
--- a/loading/cloud/common/loading_trace_database_unittest.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import unittest
-
-from cloud.common.loading_trace_database import LoadingTraceDatabase
-
-
-class LoadingTraceDatabaseUnittest(unittest.TestCase):
- _JSON_DATABASE = {
- "traces/trace1.json" : { "url" : "http://bar.html", },
- "traces/trace2.json" : { "url" : "http://bar.html", },
- "traces/trace3.json" : { "url" : "http://qux.html", },
- }
-
- def setUp(self):
- self.database = LoadingTraceDatabase.FromJsonDict(self._JSON_DATABASE)
-
- def testGetTraceFilesForURL(self):
- # Test a URL with no matching traces.
- self.assertEqual(
- self.database.GetTraceFilesForURL("http://foo.html"),
- [])
-
- # Test a URL with matching traces.
- self.assertEqual(
- set(self.database.GetTraceFilesForURL("http://bar.html")),
- set(["traces/trace1.json", "traces/trace2.json"]))
-
- def testSerialization(self):
- self.assertEqual(
- self._JSON_DATABASE, self.database.ToJsonDict())
-
- def testSetTrace(self):
- dummy_url = "http://dummy.com"
- new_trace_file = "traces/new_trace.json"
- self.assertEqual(self.database.GetTraceFilesForURL(dummy_url), [])
- self.database.SetTrace(new_trace_file, {"url" : dummy_url})
- self.assertEqual(self.database.GetTraceFilesForURL(dummy_url),
- [new_trace_file])
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/cloud/frontend/.gitignore b/loading/cloud/frontend/.gitignore
deleted file mode 100644
index c3af857..0000000
--- a/loading/cloud/frontend/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-lib/
diff --git a/loading/cloud/frontend/README.md b/loading/cloud/frontend/README.md
deleted file mode 100644
index ec9b8d9..0000000
--- a/loading/cloud/frontend/README.md
+++ /dev/null
@@ -1,171 +0,0 @@
-# Appengine Frontend for Clovis
-
-[TOC]
-
-## Usage
-
-Visit the application URL in your browser, and upload a JSON dictionary with the
-following keys:
-
-- `action` (string): the action to perform. Only `trace` and `report` are
- supported.
-- `action_params` (dictionary): the parameters associated to the action.
- See below for more details.
-- `backend_params` (dictionary): the parameters configuring the backend for
- this task. See below for more details.
-
-### Parameters for `backend_params`
-
-- `storage_bucket` (string): Name of the storage bucket used by the backend
- instances. Backend code and data must have been previously deployed to this
- bucket using the `deploy.sh` [script][4].
-- `instance_count` (int, optional): Number of Compute Engine instances that
- will be started for this task. If not specified, the number of instances is
- determined automatically depending on the size of the task and the number
- of available instances.
-- `task_name` (string, opitonal): Name of the task, used to build the name of
- the output directory.
-- `tag` (string, optional): tag internally used to associate tasks to backend
- ComputeEngine instances. This parameter should not be set in general, as it
- is mostly exposed for development purposes. If this parameter is not
- specified, a unique tag will be generated.
-- `timeout_hours` (int, optional): if workers are still alive after this
- delay, they will be forcibly killed, to avoid wasting Compute Engine
- resources. If not specified, the timeout is determined automatically.
-
-### Parameters for the `trace` action
-
-The trace action takes a list of URLs as input and generates a list of traces by
-running Chrome.
-
-- `urls` (list of strings): the list of URLs to process.
-- `repeat_count` (integer, optional): the number of traces to be generated
- for each URL. Defaults to 1.
-- `emulate_device` (string, optional): the device to emulate (e.g. `Nexus 4`).
-- `emulate_network` (string, optional): the network to emulate.
-
-### Parameters for the `report` action
-
-Finds all the traces in the specified bucket and generates a report in BigQuery.
-
-- `trace_bucket` (string): Name of the storage bucket where trace databases can
- be found. It can be either absolute or relative to the `storage_bucket`
- specified in the backend parameters.
-
-This requires an existing `clovis_dataset.report` BigQuery table that will be
-used as a template. The schema of this template is not updated automatically and
-must match the format of the report (as generated by `report.py`).
-See [how to update the schema manually][7].
-
-## Development
-
-### Design overview
-
-This is a [python AppEngine][5] application using [Flask][6].
-
-- Appengine configuration:
- - `app.yaml` defines the handlers. There is a static handler for all URLs
- in the `static/` directory, and all other URLs are handled by the
- `clovis_frontend.py` script.
- - `queue.yaml` defines the task queues associated with the application. In
- particular, the `clovis-queue` is a pull-queue where tasks are added by
- the AppEngine frontend and consummed by the ComputeEngine backend.
- See the [TaskQueue documentation][2] for more details.
-- `templates/form.html` is a static HTML document allowing the user to upload
- a JSON file. `clovis_frontend.py` is then invoked with the contents of the
- file (see the `/form_sent` handler).
-- `clovis_task.py` defines a task to be run by the backend. It is sent through
- the `clovis-queue` task queue.
-- `clovis_frontend.py` is the script that processes the file uploaded by the
- form, creates the tasks and enqueues them in `clovis-queue`.
-
-### Prerequisites
-
-- Install the gcloud [tool][1]
-- Add a `queue.yaml` file in the application directory (i.e. next to
- `app.yaml`) defining a `clovis-queue` pull queue that can be accessed by the
- ComputeEngine service worker associated to the project. Add your email too
- if you want to run the application locally. See the [TaskQueue configuration
- documentation][3] for more details. Example:
-
-```
-# queue.yaml
-- name: clovis-queue
- mode: pull
- acl:
- - user_email: me@address.com # For local development.
- - user_email: 123456789-compute@developer.gserviceaccount.com
-```
-
-### Run Locally
-
-```shell
-# Install dependencies in the lib/ directory. Note that this will pollute your
-# Chromium checkout, see the cleanup intructions below.
-pip install -r requirements.txt -t lib
-# Start the local server.
-dev_appserver.py -A $PROJECT_NAME .
-```
-
-Visit the application [http://localhost:8080](http://localhost:8080).
-
-After you are done, cleanup your Chromium checkout:
-
-```shell
-rm -rf $CHROMIUM_SRC/tools/android/loading/frontend/lib
-```
-
-### Deploy
-
-```shell
-# Install dependencies in the lib/ directory.
-pip install -r requirements.txt -t lib
-# Deploy.
-gcloud preview app deploy app.yaml
-```
-
-To deploy to a staging/test version of the server, you can do:
-
-```shell
-gcloud preview app deploy --no-promote --version $MY_VERSION
-```
-
-where `MY_VERSION` can be something like `staging` or something more unique to
-ensure there is no name collision. You can then access the application live on
-the web by prefixing the URL of the service with `$MY_VERSION-dot-`.
-
-### Updating the Database Schema
-
-When a change is made to the dictionary returned by report.py, the BigQuery
-database schema must be updated accordingly.
-
-To update the schema, run:
-
-```shell
-bq update \
- --schema \
- $CHROMIUM_SRC/tools/android/loading/cloud/frontend/bigquery_schema.json \
- -t clovis_dataset.report
-```
-
-Adding a new field is harmless, but don't modify existing ones.
-
-If the above command does not work, this is probably because you are doing more
-than adding fields.
-In this case, you can delete and recreate the `clovis_dataset.report` table from
-the [BigQuery web interface][8]:
-- Expand `clovis_dataset` from the left menu, and delete the `report` table.
-- Create a new table in `clovis_dataset`, and call it `report`.
-- Set `Location` to `None` in order to create an empty table.
-- Click `Edit as Text` in the `Schema` section , and paste the contents of
- `bigquery_schema.json` there.
-
-
-[1]: https://cloud.google.com/sdk
-[2]: https://cloud.google.com/appengine/docs/python/taskqueue
-[3]: https://cloud.google.com/appengine/docs/python/config/queue
-[4]: ../backend/README.md#Deploy-the-code
-[5]: https://cloud.google.com/appengine/docs/python
-[6]: http://flask.pocoo.org
-[7]: #Updating-the-Database-Schema
-[8]: https://bigquery.cloud.google.com
diff --git a/loading/cloud/frontend/app.yaml b/loading/cloud/frontend/app.yaml
deleted file mode 100644
index 48bf775..0000000
--- a/loading/cloud/frontend/app.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-runtime: python27
-api_version: 1
-threadsafe: yes
-instance_class: F4_1G
-
-builtins:
-- deferred: on
-
-handlers:
-
-- url: /_ah/queue/deferred
- # For the deferred API (https://cloud.google.com/appengine/articles/deferred).
- script: google.appengine.ext.deferred.deferred.application
- login: admin
-
-- url: /static
- # Static content.
- static_dir: static
-
-- url: .*
- script: clovis_frontend.app
-
-libraries:
-- name: ssl
- version: latest
diff --git a/loading/cloud/frontend/appengine_config.py b/loading/cloud/frontend/appengine_config.py
deleted file mode 100644
index 608cd73..0000000
--- a/loading/cloud/frontend/appengine_config.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-from google.appengine.ext import vendor
-vendor.add('lib')
diff --git a/loading/cloud/frontend/bigquery_schema.json b/loading/cloud/frontend/bigquery_schema.json
deleted file mode 100644
index 254e475..0000000
--- a/loading/cloud/frontend/bigquery_schema.json
+++ /dev/null
@@ -1,275 +0,0 @@
-[
- {
- "name": "url",
- "type": "STRING",
- "mode": "REQUIRED"
- },
- {
- "name": "ad_or_tracking_initiated_requests",
- "type": "INTEGER"
- },
- {
- "name": "ad_or_tracking_initiated_transfer_size",
- "type": "INTEGER"
- },
- {
- "name": "ad_or_tracking_parsing_frac",
- "type": "FLOAT"
- },
- {
- "name": "ad_or_tracking_requests",
- "type": "INTEGER"
- },
- {
- "name": "ad_or_tracking_script_frac",
- "type": "FLOAT"
- },
- {
- "name": "ad_requests",
- "type": "INTEGER"
- },
- {
- "name": "connection_cost_ms",
- "type": "FLOAT"
- },
- {
- "name": "connections",
- "type": "INTEGER"
- },
- {
- "name": "contentful_activity_frac",
- "type": "FLOAT"
- },
- {
- "name": "contentful_byte_frac",
- "type": "FLOAT"
- },
- {
- "name": "contentful_inversion",
- "type": "STRING"
- },
- {
- "name": "contentful_ms",
- "type": "FLOAT"
- },
- {
- "name": "contentful_parsing_frac",
- "type": "FLOAT"
- },
- {
- "name": "contentful_predicted_no_state_prefetch_ms",
- "type": "FLOAT"
- },
- {
- "name": "contentful_preloaded_requests",
- "type": "INTEGER"
- },
- {
- "name": "contentful_preloaded_requests_cost",
- "type": "FLOAT"
- },
- {
- "name": "contentful_requests",
- "type": "INTEGER"
- },
- {
- "name": "contentful_requests_cost",
- "type": "FLOAT"
- },
- {
- "name": "contentful_script_frac",
- "type": "FLOAT"
- },
- {
- "name": "data_requests",
- "type": "INTEGER"
- },
- {
- "name": "dns_cost_ms",
- "type": "FLOAT"
- },
- {
- "name": "dns_requests",
- "type": "INTEGER"
- },
- {
- "name": "domains",
- "type": "INTEGER"
- },
- {
- "name": "first_text_activity_frac",
- "type": "FLOAT"
- },
- {
- "name": "first_text_byte_frac",
- "type": "FLOAT"
- },
- {
- "name": "first_text_inversion",
- "type": "STRING"
- },
- {
- "name": "first_text_ms",
- "type": "FLOAT"
- },
- {
- "name": "first_text_parsing_frac",
- "type": "FLOAT"
- },
- {
- "name": "first_text_predicted_no_state_prefetch_ms",
- "type": "FLOAT"
- },
- {
- "name": "first_text_preloaded_requests",
- "type": "INTEGER"
- },
- {
- "name": "first_text_preloaded_requests_cost",
- "type": "FLOAT"
- },
- {
- "name": "first_text_requests",
- "type": "INTEGER"
- },
- {
- "name": "first_text_requests_cost",
- "type": "FLOAT"
- },
- {
- "name": "first_text_script_frac",
- "type": "FLOAT"
- },
- {
- "name": "h2_requests",
- "type": "INTEGER"
- },
- {
- "name": "http11_requests",
- "type": "INTEGER"
- },
- {
- "name": "plt_activity_frac",
- "type": "FLOAT"
- },
- {
- "name": "plt_byte_frac",
- "type": "FLOAT"
- },
- {
- "name": "plt_inversion",
- "type": "STRING"
- },
- {
- "name": "plt_ms",
- "type": "FLOAT"
- },
- {
- "name": "plt_parsing_frac",
- "type": "FLOAT"
- },
- {
- "name": "plt_predicted_no_state_prefetch_ms",
- "type": "FLOAT"
- },
- {
- "name": "plt_preloaded_requests",
- "type": "INTEGER"
- },
- {
- "name": "plt_preloaded_requests_cost",
- "type": "FLOAT"
- },
- {
- "name": "plt_requests",
- "type": "INTEGER"
- },
- {
- "name": "plt_requests_cost",
- "type": "FLOAT"
- },
- {
- "name": "plt_script_frac",
- "type": "FLOAT"
- },
- {
- "name": "significant_activity_frac",
- "type": "FLOAT"
- },
- {
- "name": "significant_byte_frac",
- "type": "FLOAT"
- },
- {
- "name": "significant_inversion",
- "type": "STRING"
- },
- {
- "name": "significant_ms",
- "type": "FLOAT"
- },
- {
- "name": "significant_parsing_frac",
- "type": "FLOAT"
- },
- {
- "name": "significant_predicted_no_state_prefetch_ms",
- "type": "FLOAT"
- },
- {
- "name": "significant_preloaded_requests",
- "type": "INTEGER"
- },
- {
- "name": "significant_preloaded_requests_cost",
- "type": "FLOAT"
- },
- {
- "name": "significant_requests",
- "type": "INTEGER"
- },
- {
- "name": "significant_requests_cost",
- "type": "FLOAT"
- },
- {
- "name": "significant_script_frac",
- "type": "FLOAT"
- },
- {
- "name": "ssl_connections",
- "type": "INTEGER"
- },
- {
- "name": "ssl_cost_ms",
- "type": "FLOAT"
- },
- {
- "name": "tracking_requests",
- "type": "INTEGER"
- },
- {
- "name": "transfer_size",
- "type": "INTEGER"
- },
- {
- "name": "total_queuing_blocked_msec",
- "type": "FLOAT"
- },
- {
- "name": "total_queuing_load_msec",
- "type": "FLOAT"
- },
- {
- "name": "average_blocking_request_count",
- "type": "FLOAT"
- },
- {
- "name": "median_blocking_request_count",
- "type": "FLOAT"
- },
- {
- "name": "total_requests",
- "type": "INTEGER"
- }
-]
diff --git a/loading/cloud/frontend/clovis_frontend.py b/loading/cloud/frontend/clovis_frontend.py
deleted file mode 100644
index 675e2af..0000000
--- a/loading/cloud/frontend/clovis_frontend.py
+++ /dev/null
@@ -1,604 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import logging
-import datetime
-import math
-import os
-import sys
-import time
-import traceback
-
-import cloudstorage
-import flask
-from google.appengine.api import (app_identity, taskqueue)
-from google.appengine.ext import deferred
-from oauth2client.client import GoogleCredentials
-
-import common.clovis_paths
-from common.clovis_task import ClovisTask
-import common.google_bigquery_helper
-import common.google_instance_helper
-from common.loading_trace_database import LoadingTraceDatabase
-import email_helper
-from frontend_job import FrontendJob
-from memory_logs import MemoryLogs
-
-
-# Global variables.
-logging.Formatter.converter = time.gmtime
-clovis_logger = logging.getLogger('clovis_frontend')
-clovis_logger.setLevel(logging.DEBUG)
-project_name = app_identity.get_application_id()
-instance_helper = common.google_instance_helper.GoogleInstanceHelper(
- credentials=GoogleCredentials.get_application_default(),
- project=project_name,
- logger=clovis_logger)
-app = flask.Flask(__name__)
-
-
-def RenderJobCreationPage(message, memory_logs=None):
- """Renders the log.html template.
-
- Args:
- message (str): Main content of the page.
- memory_logs (MemoryLogs): Optional logs.
- """
- log = None
- if memory_logs:
- log = memory_logs.Flush().split('\n')
- return flask.render_template('log.html', body=message, log=log,
- title='Job Creation Status')
-
-
-def PollWorkers(tag, start_time, timeout_hours, email_address, task_url):
- """Checks if there are workers associated with tag, by polling the instance
- group. When all workers are finished, the instance group and the instance
- template are destroyed.
- After some timeout delay, the instance group is destroyed even if there are
- still workers associated to it, which has the effect of killing all these
- workers.
-
- Args:
- tag (string): Tag of the task that is polled.
- start_time (float): Time when the polling started, as returned by
- time.time().
- timeout_hours (int): Timeout after which workers are terminated.
- email_address (str): Email address to notify when the task is complete.
- task_url (str): URL where the results of the task can be found.
- """
- if (time.time() - start_time) > (3600 * timeout_hours):
- clovis_logger.error('Worker timeout for tag %s, shuting down.' % tag)
- Finalize(tag, email_address, 'TIMEOUT', task_url)
- return
-
- clovis_logger.info('Polling workers for tag: ' + tag)
- live_instance_count = instance_helper.GetInstanceCount(tag)
- clovis_logger.info('%i live instances for tag %s.' % (
- live_instance_count, tag))
-
- if live_instance_count > 0 or live_instance_count == -1:
- clovis_logger.info('Retry later, instances still alive for tag: ' + tag)
- poll_interval_minutes = 10
- deferred.defer(PollWorkers, tag, start_time, timeout_hours, email_address,
- task_url, _countdown=(60 * poll_interval_minutes))
- return
-
- Finalize(tag, email_address, 'SUCCESS', task_url)
-
-
-def Finalize(tag, email_address, status, task_url):
- """Cleans up the remaining ComputeEngine resources and notifies the user.
-
- Args:
- tag (str): Tag of the task to finalize.
- email_address (str): Email address of the user to be notified.
- status (str): Status of the task, indicating the success or the cause of
- failure.
- task_url (str): URL where the results of the task can be found.
- """
- email_helper.SendEmailTaskComplete(
- to_address=email_address, tag=tag, status=status, task_url=task_url,
- logger=clovis_logger)
- clovis_logger.info('Scheduling instance group destruction for tag: ' + tag)
- deferred.defer(DeleteInstanceGroup, tag)
- FrontendJob.DeleteForTag(tag)
-
-
-def GetEstimatedTaskDurationInSeconds(task):
- """Returns an estimation of the time required to run the task.
-
- Args:
- task: (ClovisTask) The task.
-
- Returns:
- float: Time estimation in seconds, or -1 in case of failure.
- """
- action_params = task.ActionParams()
- if task.Action() == 'trace':
- estimated_trace_time_s = 40.0
- return (len(action_params['urls']) * action_params.get('repeat_count', 1) *
- estimated_trace_time_s)
- elif task.Action() == 'report':
- estimated_report_time_s = 20.0
- return len(action_params['traces']) * estimated_report_time_s
- else:
- clovis_logger.error('Unexpected action.')
- return -1
-
-
-def CreateInstanceTemplate(task, task_dir):
- """Create the Compute Engine instance template that will be used to create the
- instances.
- """
- backend_params = task.BackendParams()
- instance_count = backend_params.get('instance_count', 0)
- if instance_count <= 0:
- clovis_logger.info('No template required.')
- return True
- bucket = backend_params.get('storage_bucket')
- if not bucket:
- clovis_logger.error('Missing bucket in backend_params.')
- return False
- return instance_helper.CreateTemplate(task.BackendParams()['tag'], bucket,
- task_dir)
-
-
-def CreateInstances(task):
- """Creates the Compute engine requested by the task."""
- backend_params = task.BackendParams()
- instance_count = backend_params.get('instance_count', 0)
- if instance_count <= 0:
- clovis_logger.info('No instances to create.')
- return True
- return instance_helper.CreateInstances(backend_params['tag'], instance_count)
-
-
-def DeleteInstanceGroup(tag, try_count=0):
- """Deletes the instance group associated with tag, and schedules the deletion
- of the instance template."""
- clovis_logger.info('Instance group destruction for tag: ' + tag)
- if not instance_helper.DeleteInstanceGroup(tag):
- clovis_logger.info('Instance group destruction failed for: ' + tag)
- if try_count <= 5:
- deferred.defer(DeleteInstanceGroup, tag, try_count + 1, _countdown=60)
- return
- clovis_logger.error('Giving up group destruction for: ' + tag)
- clovis_logger.info('Scheduling instance template destruction for tag: ' + tag)
- # Wait a little before deleting the instance template, because it may still be
- # considered in use, causing failures.
- deferred.defer(DeleteInstanceTemplate, tag, _countdown=30)
-
-
-def DeleteInstanceTemplate(tag, try_count=0):
- """Deletes the instance template associated with tag."""
- clovis_logger.info('Instance template destruction for tag: ' + tag)
- if not instance_helper.DeleteTemplate(tag):
- clovis_logger.info('Instance template destruction failed for: ' + tag)
- if try_count <= 5:
- deferred.defer(DeleteInstanceTemplate, tag, try_count + 1, _countdown=60)
- return
- clovis_logger.error('Giving up template destruction for: ' + tag)
- clovis_logger.info('Cleanup complete for tag: ' + tag)
-
-
-def SplitClovisTask(task):
- """Splits a ClovisTask in smaller ClovisTasks.
-
- Args:
- task: (ClovisTask) The task to split.
-
- Returns:
- list: The list of ClovisTasks.
- """
- # For report task, need to find the traces first.
- if task.Action() == 'report':
- trace_bucket = task.ActionParams().get('trace_bucket')
- if not trace_bucket:
- clovis_logger.error('Missing trace bucket for report task.')
- return None
-
- # Allow passing the trace bucket as absolute or relative to the base bucket.
- base_bucket = task.BackendParams().get('storage_bucket', '')
- if not trace_bucket.startswith(base_bucket):
- trace_bucket = os.path.join(base_bucket, trace_bucket)
-
- traces = GetTracePaths(trace_bucket)
- if not traces:
- clovis_logger.error('No traces found in bucket: ' + trace_bucket)
- return None
- task.ActionParams()['traces'] = traces
-
- # Compute the split key.
- # Keep the tasks small, but large enough to avoid "rate exceeded" errors when
- # pulling them from the TaskQueue.
- split_params_for_action = {'trace': ('urls', 3), 'report': ('traces', 5)}
- (split_key, slice_size) = split_params_for_action.get(task.Action(),
- (None, 0))
- if not split_key:
- clovis_logger.error('Cannot split task with action: ' + task.Action())
- return None
-
- # Split the task using the split key.
- clovis_logger.debug('Splitting task by: ' + split_key)
- action_params = task.ActionParams()
- values = action_params[split_key]
- sub_tasks = []
- for i in range(0, len(values), slice_size):
- sub_task_params = action_params.copy()
- sub_task_params[split_key] = [v for v in values[i:i+slice_size]]
- sub_tasks.append(ClovisTask(task.Action(), sub_task_params,
- task.BackendParams()))
- return sub_tasks
-
-
-def GetTracePaths(bucket):
- """Returns a list of trace files in a bucket.
-
- Finds and loads the trace databases, and returns their content as a list of
- paths.
-
- This function assumes a specific structure for the files in the bucket. These
- assumptions must match the behavior of the backend:
- - The trace databases are located in the bucket.
- - The trace databases files are the only objects with the
- TRACE_DATABASE_PREFIX prefix in their name.
-
- Returns:
- list: The list of paths to traces, as strings.
- """
- traces = []
- prefix = os.path.join('/', bucket, common.clovis_paths.TRACE_DATABASE_PREFIX)
- file_stats = cloudstorage.listbucket(prefix)
-
- for file_stat in file_stats:
- database_file = file_stat.filename
- clovis_logger.info('Loading trace database: ' + database_file)
-
- with cloudstorage.open(database_file) as remote_file:
- json_string = remote_file.read()
- if not json_string:
- clovis_logger.warning('Failed to download: ' + database_file)
- continue
-
- database = LoadingTraceDatabase.FromJsonString(json_string)
- if not database:
- clovis_logger.warning('Failed to parse: ' + database_file)
- continue
-
- for path in database.ToJsonDict():
- traces.append(path)
-
- return traces
-
-
-def GetTaskURL(task, task_dir):
- """Returns the URL where the task output are generated, or None.
-
- Args:
- task: (ClovisTask) The task.
- task_dir: (str) Working directory for the backend, it is a subdirectory of
- the deployment bucket.
-
- Returns:
- str: The URL.
- """
- clovis_logger.info('Building task result URL.')
-
- if task.Action() == 'trace':
- storage_bucket = task.BackendParams().get('storage_bucket')
- if not storage_bucket:
- clovis_logger.error('Missing storage_bucket for trace action.')
- return None
- return 'https://console.cloud.google.com/storage/%s/%s' % (storage_bucket,
- task_dir)
-
- elif task.Action() == 'report':
- table_id = common.google_bigquery_helper.GetBigQueryTableID(task)
- task_url = common.google_bigquery_helper.GetBigQueryTableURL(project_name,
- table_id)
- # Abort if the table already exists.
- bigquery_service = common.google_bigquery_helper.GetBigQueryService(
- GoogleCredentials.get_application_default())
- try:
- table_exists = common.google_bigquery_helper.DoesBigQueryTableExist(
- bigquery_service, project_name, table_id, clovis_logger)
- except Exception:
- return None
- if table_exists:
- clovis_logger.error('BigQuery table %s already exists.' % task_url)
- return None
- return task_url
-
- else:
- clovis_logger.error('Unsupported action: %s.' % task.Action())
- return None
-
-
-def StartFromJsonString(http_body_str):
- """Main function handling a JSON task posted by the user."""
- # Set up logging.
- memory_logs = MemoryLogs(clovis_logger)
- memory_logs.Start()
-
- # Load the task from JSON.
- task = ClovisTask.FromJsonString(http_body_str)
- if not task:
- clovis_logger.error('Invalid JSON task.')
- return RenderJobCreationPage(
- 'Invalid JSON task:\n' + http_body_str, memory_logs)
-
- task_tag = task.BackendParams()['tag']
- clovis_logger.info('Start processing %s task with tag %s.' % (task.Action(),
- task_tag))
- user_email = email_helper.GetUserEmail()
-
- # Write the job to the datastore.
- frontend_job = FrontendJob.CreateForTag(task_tag)
- frontend_job.email = user_email
- frontend_job.status = 'not_started'
- frontend_job.clovis_task = task.ToJsonString()
- frontend_job.put()
-
- # Process the job on the queue, to avoid timeout issues.
- deferred.defer(SpawnTasksOnBackgroundQueue, task_tag)
-
- return RenderJobCreationPage(
- flask.Markup(
- '<a href="%s">See progress.</a>' % FrontendJob.GetJobURL(task_tag)),
- memory_logs)
-
-
-def SpawnTasksOnBackgroundQueue(task_tag):
- """Spawns Clovis tasks associated with task_tag from the backgound queue.
-
- This function is mostly a wrapper around SpawnTasks() that catches exceptions.
- It is assumed that a FrontendJob for task_tag exists.
- """
- memory_logs = MemoryLogs(clovis_logger)
- memory_logs.Start()
- clovis_logger.info('Spawning tasks on background queue.')
-
- try:
- frontend_job = FrontendJob.GetFromTag(task_tag)
- frontend_job.status = 'will_start'
- SpawnTasks(frontend_job)
- except Exception as e:
- clovis_logger.error('Exception spawning tasks: ' + str(e))
- clovis_logger.error(traceback.print_exc())
-
- # Update the task.
- if frontend_job:
- frontend_job.log = memory_logs.Flush()
- frontend_job.put()
-
-
-def SpawnTasks(frontend_job):
- """ Spawns Clovis tasks associated with the frontend job."""
- user_email = frontend_job.email
- task = ClovisTask.FromJsonString(frontend_job.clovis_task)
- task_tag = task.BackendParams()['tag']
-
- # Delete the clovis task from the FrontendJob because it can make the object
- # very heavy and it is no longer needed.
- frontend_job.clovis_task = None
-
- # Compute the task directory.
- frontend_job.status = 'building_task_dir'
- task_dir_components = []
- user_name = None
- if user_email:
- user_name = user_email[:user_email.find('@')]
- if user_name:
- task_dir_components.append(user_name)
- task_name = task.BackendParams().get('task_name')
- if task_name:
- task_dir_components.append(task_name)
- task_dir_components.append(task_tag)
- task_dir = os.path.join(task.Action(), '_'.join(task_dir_components))
-
- # Build the URL where the result will live.
- frontend_job.status = 'building_task_url'
- task_url = GetTaskURL(task, task_dir)
- if task_url:
- clovis_logger.info('Task result URL: ' + task_url)
- frontend_job.task_url = task_url
- else:
- frontend_job.status = 'task_url_error'
- return
-
- # Split the task in smaller tasks.
- frontend_job.status = 'splitting_task'
- frontend_job.put()
- sub_tasks = SplitClovisTask(task)
- if not sub_tasks:
- frontend_job.status = 'task_split_error'
- return
-
- # Compute estimates for the work duration, in order to compute the instance
- # count and the timeout.
- frontend_job.status = 'estimating_duration'
- sequential_duration_s = \
- GetEstimatedTaskDurationInSeconds(sub_tasks[0]) * len(sub_tasks)
- if sequential_duration_s <= 0:
- frontend_job.status = 'time_estimation_error'
- return
-
- # Compute the number of required instances if not specified.
- if task.BackendParams().get('instance_count') is None:
- frontend_job.status = 'estimating_instance_count'
- target_parallel_duration_s = 1800.0 # 30 minutes.
- task.BackendParams()['instance_count'] = math.ceil(
- sequential_duration_s / target_parallel_duration_s)
-
- # Check the instance quotas.
- clovis_logger.info(
- 'Requesting %i instances.' % task.BackendParams()['instance_count'])
- frontend_job.status = 'checking_instance_quotas'
- max_instances = instance_helper.GetAvailableInstanceCount()
- if max_instances == -1:
- frontend_job.status = 'instance_count_error'
- return
- elif max_instances == 0 and task.BackendParams()['instance_count'] > 0:
- frontend_job.status = 'no_instance_available_error'
- return
- elif max_instances < task.BackendParams()['instance_count']:
- clovis_logger.warning(
- 'Instance count limited by quota: %i available / %i requested.' % (
- max_instances, task.BackendParams()['instance_count']))
- task.BackendParams()['instance_count'] = max_instances
-
- # Compute the timeout if there is none specified.
- expected_duration_s = sequential_duration_s / (
- task.BackendParams()['instance_count'])
- frontend_job.eta = datetime.datetime.now() + datetime.timedelta(
- seconds=expected_duration_s)
- if not task.BackendParams().get('timeout_hours'):
- # Timeout is at least 1 hour.
- task.BackendParams()['timeout_hours'] = max(
- 1, 5 * expected_duration_s / 3600.0)
- clovis_logger.info(
- 'Timeout delay: %.1f hours. ' % task.BackendParams()['timeout_hours'])
-
- frontend_job.status = 'queueing_tasks'
- if not EnqueueTasks(sub_tasks, task_tag):
- frontend_job.status = 'task_creation_error'
- return
-
- # Start polling the progress.
- clovis_logger.info('Creating worker polling task.')
- first_poll_delay_minutes = 10
- deferred.defer(PollWorkers, task_tag, time.time(),
- task.BackendParams()['timeout_hours'], user_email,
- task_url, _countdown=(60 * first_poll_delay_minutes))
-
- # Start the instances if required.
- frontend_job.status = 'creating_instances'
- frontend_job.put()
- if not CreateInstanceTemplate(task, task_dir):
- frontend_job.status = 'instance_template_error'
- return
- if not CreateInstances(task):
- frontend_job.status = 'instance_creation_error'
- return
-
- frontend_job.status = 'started'
-
-def EnqueueTasks(tasks, task_tag):
- """Enqueues a list of tasks in the Google Cloud task queue, for consumption by
- Google Compute Engine.
- """
- q = taskqueue.Queue('clovis-queue')
- # Add tasks to the queue by groups.
- # TODO(droger): This supports thousands of tasks, but maybe not millions.
- # Defer the enqueuing if it times out.
- group_size = 100
- callbacks = []
- try:
- for i in range(0, len(tasks), group_size):
- group = tasks[i:i+group_size]
- taskqueue_tasks = [
- taskqueue.Task(payload=task.ToJsonString(), method='PULL',
- tag=task_tag)
- for task in group]
- rpc = taskqueue.create_rpc()
- q.add_async(task=taskqueue_tasks, rpc=rpc)
- callbacks.append(rpc)
- for callback in callbacks:
- callback.get_result()
- except Exception as e:
- clovis_logger.error('Exception:' + type(e).__name__ + ' ' + str(e.args))
- return False
- clovis_logger.info('Pushed %i tasks with tag: %s.' % (len(tasks), task_tag))
- return True
-
-
-@app.route('/')
-def Root():
- """Home page: show the new task form."""
- return flask.render_template('form.html')
-
-
-@app.route('/form_sent', methods=['POST'])
-def StartFromForm():
- """HTML form endpoint."""
- data_stream = flask.request.files.get('json_task')
- if not data_stream:
- return RenderJobCreationPage('Failed, no content.')
- http_body_str = data_stream.read()
- return StartFromJsonString(http_body_str)
-
-
-@app.route('/kill_job')
-def KillJob():
- tag = flask.request.args.get('tag')
- page_title = 'Kill Job'
- if not tag:
- return flask.render_template('log.html', body='Failed: Invalid tag.',
- title=page_title)
-
- frontend_job = FrontendJob.GetFromTag(tag)
-
- if not frontend_job:
- return flask.render_template('log.html', body='Job not found.',
- title=page_title)
- Finalize(tag, frontend_job.email, 'CANCELED', frontend_job.task_url)
-
- body = 'Killed job %s.' % tag
- return flask.render_template('log.html', body=body, title=page_title)
-
-
-@app.route('/list_jobs')
-def ShowJobList():
- """Shows a list of all active jobs."""
- tags = FrontendJob.ListJobs()
- page_title = 'Active Jobs'
-
- if not tags:
- return flask.render_template('log.html', body='No active job.',
- title=page_title)
-
- html = ''
- for tag in tags:
- html += flask.Markup(
- '<li><a href="%s">%s</a></li>') % (FrontendJob.GetJobURL(tag), tag)
- html += flask.Markup('</ul>')
- return flask.render_template('log.html', body=html, title=page_title)
-
-
-@app.route(FrontendJob.SHOW_JOB_URL)
-def ShowJob():
- """Shows basic information abour a job."""
- tag = flask.request.args.get('tag')
- page_title = 'Job Information'
- if not tag:
- return flask.render_template('log.html', body='Invalid tag.',
- title=page_title)
-
- frontend_job = FrontendJob.GetFromTag(tag)
-
- if not frontend_job:
- return flask.render_template('log.html', body='Job not found.',
- title=page_title)
-
- log = None
- if frontend_job.log:
- log = frontend_job.log.split('\n')
-
- body = flask.Markup(frontend_job.RenderAsHtml())
- body += flask.Markup('<a href="/kill_job?tag=%s">Kill</a>' % tag)
- return flask.render_template('log.html', log=log, title=page_title,
- body=body)
-
-
-@app.errorhandler(404)
-def PageNotFound(e): # pylint: disable=unused-argument
- """Return a custom 404 error."""
- return 'Sorry, Nothing at this URL.', 404
-
-
-@app.errorhandler(500)
-def ApplicationError(e):
- """Return a custom 500 error."""
- return 'Sorry, unexpected error: {}'.format(e), 499
diff --git a/loading/cloud/frontend/common b/loading/cloud/frontend/common
deleted file mode 120000
index 60d3b0a..0000000
--- a/loading/cloud/frontend/common
+++ /dev/null
@@ -1 +0,0 @@
-../common
\ No newline at end of file
diff --git a/loading/cloud/frontend/email_helper.py b/loading/cloud/frontend/email_helper.py
deleted file mode 100644
index dd82ee7..0000000
--- a/loading/cloud/frontend/email_helper.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-from google.appengine.api import (mail, users)
-
-
-def GetUserEmail():
- """Returns the email address of the user currently making the request or None.
- """
- user = users.get_current_user()
- if user:
- return user.email()
- return None
-
-
-def SendEmailTaskComplete(to_address, tag, status, task_url, logger):
- """Sends an email to to_address notifying that the task identified by tag is
- complete.
-
- Args:
- to_address (str): The email address to notify.
- tag (str): The tag of the task.
- status (str): Status of the task.
- task_url (str): URL where the results of the task can be found.
- logger (logging.logger): Used for logging.
- """
- if not to_address:
- logger.error('No email address to notify for task ' + tag)
- return
-
- logger.info('Notify task %s complete to %s.' % (tag, to_address))
- # The sender address must be in the "Email API authorized senders", configured
- # in the Application Settings of AppEngine.
- sender_address = 'clovis-noreply@google.com'
- subject = 'Task %s complete' % tag
- body = 'Your Clovis task %s is now complete with status: %s.' % (tag, status)
- if task_url:
- body += '\nCheck the results at ' + task_url
- mail.send_mail(sender=sender_address, to=to_address, subject=subject,
- body=body)
-
diff --git a/loading/cloud/frontend/frontend_job.py b/loading/cloud/frontend/frontend_job.py
deleted file mode 100644
index 0c5783e..0000000
--- a/loading/cloud/frontend/frontend_job.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-from google.appengine.ext import ndb
-
-
-class FrontendJob(ndb.Model):
- """Class representing a frontend job.
-
- A frontend job is a Clovis task sent by the user, and associated metadata
- (such as the username, the start time...).
- It is persisted in the Google Cloud datastore.
-
- All frontend jobs are ancestors of a single entity called 'FrontendJobList'.
- This allows to benefit from strong consistency when querying the job
- associated to a tag.
- """
- # Base URL path to get information about a job.
- SHOW_JOB_URL = '/show_job'
-
- # ndb properties persisted in the datastore. Indexing is not needed.
- email = ndb.StringProperty(indexed=False)
- status = ndb.StringProperty(indexed=False)
- task_url = ndb.StringProperty(indexed=False)
- eta = ndb.DateTimeProperty(indexed=False)
- start_time = ndb.DateTimeProperty(auto_now_add=True, indexed=False)
- # Not indexed by default.
- clovis_task = ndb.TextProperty(compressed=True, indexed=False)
- log = ndb.TextProperty(indexed=False)
-
- @classmethod
- def _GetParentKeyFromTag(cls, tag):
- """Gets the key that can be used to retrieve a frontend job from the job
- list.
- """
- return ndb.Key('FrontendJobList', tag)
-
- @classmethod
- def CreateForTag(cls, tag):
- """Creates a frontend job associated with tag."""
- parent_key = cls._GetParentKeyFromTag(tag)
- return cls(parent=parent_key)
-
- @classmethod
- def GetFromTag(cls, tag):
- """Gets the frontend job associated with tag."""
- parent_key = cls._GetParentKeyFromTag(tag)
- return cls.query(ancestor=parent_key).get()
-
- @classmethod
- def DeleteForTag(cls, tag):
- """Deletes the frontend job assowiated with tag."""
- parent_key = cls._GetParentKeyFromTag(tag)
- frontend_job = cls.query(ancestor=parent_key).get(keys_only=True)
- if frontend_job:
- frontend_job.delete()
-
- @classmethod
- def ListJobs(cls):
- """Lists all the frontend jobs.
-
- Returns:
- list of strings: The list of tags corresponding to existing frontend jobs.
- """
- return [key.parent().string_id() for key in cls.query().fetch(
- 100, keys_only=True)]
-
- @classmethod
- def GetJobURL(cls, tag):
- """Gets the URL that can be used to get information about a specific job."""
- return cls.SHOW_JOB_URL + '?tag=' + tag
-
- def RenderAsHtml(self):
- """Render a short job description as a HTML table.
-
- The log and ClovisTask are not included, because they are potentially very
- large.
- """
- html = '<table>'
-
- for p in FrontendJob._properties:
- if p == 'log' or p == 'clovis_task':
- continue
- value = getattr(self, p)
- if value:
- html += '<tr><td>' + p + '</td><td>' + str(value) + '</td></tr>'
-
- html += '</table>'
- return html
diff --git a/loading/cloud/frontend/lib/common b/loading/cloud/frontend/lib/common
deleted file mode 120000
index dc879ab..0000000
--- a/loading/cloud/frontend/lib/common
+++ /dev/null
@@ -1 +0,0 @@
-../../common
\ No newline at end of file
diff --git a/loading/cloud/frontend/memory_logs.py b/loading/cloud/frontend/memory_logs.py
deleted file mode 100644
index afd1897..0000000
--- a/loading/cloud/frontend/memory_logs.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import logging
-from StringIO import StringIO
-
-
-class MemoryLogs(object):
- """Collects logs in memory."""
-
- def __init__(self, logger):
- self._logger = logger
- self._log_buffer = StringIO()
- self._log_handler = logging.StreamHandler(self._log_buffer)
- formatter = logging.Formatter("[%(asctime)s][%(levelname)s] %(message)s",
- "%y-%m-%d %H:%M:%S")
- self._log_handler.setFormatter(formatter)
-
- def Start(self):
- """Starts collecting the logs."""
- self._logger.addHandler(self._log_handler)
-
- def Flush(self):
- """Stops collecting the logs and returns the logs collected since Start()
- was called.
- """
- self._logger.removeHandler(self._log_handler)
- self._log_handler.flush()
- self._log_buffer.flush()
- result = self._log_buffer.getvalue()
- self._log_buffer.truncate(0)
- return result
-
diff --git a/loading/cloud/frontend/requirements.txt b/loading/cloud/frontend/requirements.txt
deleted file mode 100644
index 4d9f882..0000000
--- a/loading/cloud/frontend/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-Flask==0.10
-google-api-python-client
-GoogleAppEngineCloudStorageClient
diff --git a/loading/cloud/frontend/static/base.css b/loading/cloud/frontend/static/base.css
deleted file mode 100644
index 3a5dfbf..0000000
--- a/loading/cloud/frontend/static/base.css
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Copyright 2016 The Chromium Authors. All rights reserved.
- Use of this source code is governed by a BSD-style license that can be
- found in the LICENSE file. */
-
-body {
- font-family:'Arial', sans-serif;
-}
-
-.menu {
- margin-bottom: 1em;
- --padding: 14px 16px;
- border: 1px solid #e7e7e7;
-}
-
-.menu div {
- padding: var(--padding);
- font-weight:bold;
- color: #ffca00;
- background-color: #cc0000;
- cursor: default;
-}
-
-.menu ul {
- list-style-type: none;
- margin: 0;
- padding: 0;
- overflow: hidden;
-}
-
-.menu li {
- float: left;
- text-align: center;
-}
-
-.menu li a {
- color: #666;
- background-color: #f3f3f3;
- text-decoration: none;
- display: block;
- padding: var(--padding);
-}
-
-.menu li a:hover {
- background-color: #ddd;
-}
-
diff --git a/loading/cloud/frontend/static/crown_icon.png b/loading/cloud/frontend/static/crown_icon.png
deleted file mode 100644
index 2d5ac45..0000000
--- a/loading/cloud/frontend/static/crown_icon.png
+++ /dev/null
Binary files differ
diff --git a/loading/cloud/frontend/templates/base.html b/loading/cloud/frontend/templates/base.html
deleted file mode 100644
index 72d732c..0000000
--- a/loading/cloud/frontend/templates/base.html
+++ /dev/null
@@ -1,35 +0,0 @@
-{# Copyright 2016 The Chromium Authors. All rights reserved.
- Use of this source code is governed by a BSD-style license that can be
- found in the LICENSE file.
-#}
-
-<!DOCTYPE html>
-<html>
-
-<head>
- <meta charset="utf-8">
- <title>Clovis</title>
- <link rel="stylesheet" type="text/css" href="/static/base.css"/>
- <link rel="icon" href="/static/crown_icon.png"/>
-</head>
-
-<body>
-<header>
- <div class="menu">
- <ul style="border: 1px solid #e7e7e7; background-color: #f3f3f3;">
- <li> <div> Clovis </div>
- <li> <a href="/">New Job</a>
- <li> <a href="/list_jobs">Active Jobs</a>
- <li> <a href="https://chromium.googlesource.com/chromium/src/+/master/tools/android/loading/cloud/frontend/README.md">
- Documentation
- </a>
- </ul>
- </div>
-</header>
-
-{# The main content of the page goes here #}
-{% block content %}
-{% endblock %}
-
-</body>
-</html>
diff --git a/loading/cloud/frontend/templates/form.html b/loading/cloud/frontend/templates/form.html
deleted file mode 100644
index 9003259..0000000
--- a/loading/cloud/frontend/templates/form.html
+++ /dev/null
@@ -1,15 +0,0 @@
-{# Copyright 2016 The Chromium Authors. All rights reserved.
- Use of this source code is governed by a BSD-style license that can be
- found in the LICENSE file.
-#}
-
-{% extends "base.html" %}
-
-{% block content %}
-<h2>Submit New Task</h2>
-<p> Select JSON file </p>
-<form action="/form_sent" method="POST" enctype="multipart/form-data">
- <input type="file" name="json_task"/>
- <input type="submit" name="submit" value="Upload"/>
-</form>
-{% endblock %}
diff --git a/loading/cloud/frontend/templates/log.html b/loading/cloud/frontend/templates/log.html
deleted file mode 100644
index 9b43f87..0000000
--- a/loading/cloud/frontend/templates/log.html
+++ /dev/null
@@ -1,42 +0,0 @@
-{# Copyright 2016 The Chromium Authors. All rights reserved.
- Use of this source code is governed by a BSD-style license that can be
- found in the LICENSE file.
-#}
-
-{# Template for a page displaying a body and logs (optional) under a collapsible
- section.
-#}
-{% extends "base.html" %}
-
-{% block content %}
-
-<h2> {{ title }} </h2>
-
-{{ body }}
-
-{% if log %}
-
-<p><a onclick="javascript:ShowHide('HiddenDiv'); return false;" href="#">
- Show/hide details
-</a></p>
-
-<div id="HiddenDiv"
- style="display:none; font: 0.8em 'Droid Sans Mono', monospace;">
-
-{# Loop over the lines of the log to add linebreaks. #}
-{%- for line in log -%}
- {{ line }}<br/>
-{%- endfor -%}
-
-</div>
-
-<script type="text/javascript">
-function ShowHide(divId) {
- element = document.getElementById(divId)
- element.style.display = (element.style.display == 'none') ? 'block' : 'none';
-}
-</script>
-
-{% endif %}
-
-{% endblock %}
diff --git a/loading/clovis_constants.py b/loading/clovis_constants.py
deleted file mode 100644
index e64ddf0..0000000
--- a/loading/clovis_constants.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Provides common constants for Clovis."""
-
-QUEUING_CATEGORY = 'disabled-by-default-loading.resource'
-
-# Categories to enable or disable for all traces collected. Disabled categories
-# are prefixed with '-'.
-DEFAULT_CATEGORIES = [
- QUEUING_CATEGORY,
- 'blink',
- 'blink.net',
- 'blink.user_timing',
- 'devtools.timeline',
- 'disabled-by-default-blink.debug.layout',
- 'toplevel',
- 'v8',
- '-cc', # Contains a lot of events, none of which we use.
-]
diff --git a/loading/common_util.py b/loading/common_util.py
deleted file mode 100644
index 899062c..0000000
--- a/loading/common_util.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright (c) 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import contextlib
-import json
-import logging
-import os
-import re
-import shutil
-import signal
-import subprocess
-import sys
-import tempfile
-import time
-
-
-def VerboseCompileRegexOrAbort(regex):
- """Compiles a user-provided regular expression, exits the program on error."""
- try:
- return re.compile(regex)
- except re.error as e:
- sys.stderr.write('invalid regex: {}\n{}\n'.format(regex, e))
- sys.exit(2)
-
-
-def PollFor(condition, condition_name, interval=5):
- """Polls for a function to return true.
-
- Args:
- condition: Function to wait its return to be True.
- condition_name: The condition's name used for logging.
- interval: Periods to wait between tries in seconds.
-
- Returns:
- What condition has returned to stop waiting.
- """
- while True:
- result = condition()
- logging.info('Polling condition %s is %s' % (
- condition_name, 'met' if result else 'not met'))
- if result:
- return result
- time.sleep(interval)
-
-
-def SerializeAttributesToJsonDict(json_dict, instance, attributes):
- """Adds the |attributes| from |instance| to a |json_dict|.
-
- Args:
- json_dict: (dict) Dict to update.
- instance: (object) instance to take the values from.
- attributes: ([str]) List of attributes to serialize.
-
- Returns:
- json_dict
- """
- json_dict.update({attr: getattr(instance, attr) for attr in attributes})
- return json_dict
-
-
-def DeserializeAttributesFromJsonDict(json_dict, instance, attributes):
- """Sets a list of |attributes| in |instance| according to their value in
- |json_dict|.
-
- Args:
- json_dict: (dict) Dict containing values dumped by
- SerializeAttributesToJsonDict.
- instance: (object) instance to modify.
- attributes: ([str]) List of attributes to set.
-
- Raises:
- AttributeError if one of the attribute doesn't exist in |instance|.
-
- Returns:
- instance
- """
- for attr in attributes:
- getattr(instance, attr) # To raise AttributeError if attr doesn't exist.
- setattr(instance, attr, json_dict[attr])
- return instance
-
-
-@contextlib.contextmanager
-def TemporaryDirectory(suffix='', prefix='tmp'):
- """Returns a freshly-created directory that gets automatically deleted after
- usage.
- """
- name = tempfile.mkdtemp(suffix=suffix, prefix=prefix)
- try:
- yield name
- finally:
- shutil.rmtree(name)
-
-
-def EnsureParentDirectoryExists(path):
- """Verifies that the parent directory exists or creates it if missing."""
- parent_directory_path = os.path.abspath(os.path.dirname(path))
- if not os.path.isdir(parent_directory_path):
- os.makedirs(parent_directory_path)
-
-
-def GetCommandLineForLogging(cmd, env_diff=None):
- """Get command line string.
-
- Args:
- cmd: Command line argument
- env_diff: Environment modification for the command line.
-
- Returns:
- Command line string.
- """
- cmd_str = ''
- if env_diff:
- for key, value in env_diff.iteritems():
- cmd_str += '{}={} '.format(key, value)
- return cmd_str + subprocess.list2cmdline(cmd)
-
-
-# TimeoutError inherit from BaseException to pass through DevUtils' retries
-# decorator that catches only exceptions inheriting from Exception.
-class TimeoutError(BaseException):
- pass
-
-
-# If this exception is ever raised, then might be better to replace this
-# implementation with Thread.join(timeout=XXX).
-class TimeoutCollisionError(Exception):
- pass
-
-
-@contextlib.contextmanager
-def TimeoutScope(seconds, error_name):
- """Raises TimeoutError if the with statement is finished within |seconds|."""
- assert seconds > 0
- def _signal_callback(signum, frame):
- del signum, frame # unused.
- raise TimeoutError(error_name)
-
- try:
- signal.signal(signal.SIGALRM, _signal_callback)
- if signal.alarm(seconds) != 0:
- raise TimeoutCollisionError(
- 'Discarding an alarm that was scheduled before.')
- yield
- finally:
- signal.alarm(0)
- if signal.getsignal(signal.SIGALRM) != _signal_callback:
- raise TimeoutCollisionError('Looks like there is a signal.signal(signal.'
- 'SIGALRM) made within the with statement.')
diff --git a/loading/common_util_unittest.py b/loading/common_util_unittest.py
deleted file mode 100644
index 62ddaee..0000000
--- a/loading/common_util_unittest.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import signal
-import time
-import unittest
-
-import common_util
-
-
-class SerializeAttributesTestCase(unittest.TestCase):
- class Foo(object):
- def __init__(self, foo_fighters, whisky_bar):
- # Pylint doesn't like foo and bar, but I guess musical references are OK.
- self.foo_fighters = foo_fighters
- self.whisky_bar = whisky_bar
-
- def testSerialization(self):
- foo_fighters = self.Foo('1', 2)
- json_dict = common_util.SerializeAttributesToJsonDict(
- {}, foo_fighters, ['foo_fighters', 'whisky_bar'])
- self.assertDictEqual({'foo_fighters': '1', 'whisky_bar': 2}, json_dict)
- # Partial update
- json_dict = common_util.SerializeAttributesToJsonDict(
- {'baz': 42}, foo_fighters, ['whisky_bar'])
- self.assertDictEqual({'baz': 42, 'whisky_bar': 2}, json_dict)
- # Non-existing attribute.
- with self.assertRaises(AttributeError):
- json_dict = common_util.SerializeAttributesToJsonDict(
- {}, foo_fighters, ['foo_fighters', 'whisky_bar', 'baz'])
-
- def testDeserialization(self):
- foo_fighters = self.Foo('hello', 'world')
- json_dict = {'foo_fighters': 12, 'whisky_bar': 42}
- # Partial.
- foo_fighters = common_util.DeserializeAttributesFromJsonDict(
- json_dict, foo_fighters, ['foo_fighters'])
- self.assertEqual(12, foo_fighters.foo_fighters)
- self.assertEqual('world', foo_fighters.whisky_bar)
- # Complete.
- foo_fighters = common_util.DeserializeAttributesFromJsonDict(
- json_dict, foo_fighters, ['foo_fighters', 'whisky_bar'])
- self.assertEqual(42, foo_fighters.whisky_bar)
- # Non-existing attribute.
- with self.assertRaises(AttributeError):
- json_dict['baz'] = 'bad'
- foo_fighters = common_util.DeserializeAttributesFromJsonDict(
- json_dict, foo_fighters, ['foo_fighters', 'whisky_bar', 'baz'])
-
-
-class TimeoutScopeTestCase(unittest.TestCase):
- def testTimeoutRaise(self):
- self.assertEquals(0, signal.alarm(0))
-
- with self.assertRaisesRegexp(common_util.TimeoutError, 'hello'):
- with common_util.TimeoutScope(seconds=1, error_name='hello'):
- signal.pause()
- self.fail()
- self.assertEquals(0, signal.alarm(0))
-
- with self.assertRaisesRegexp(common_util.TimeoutError, 'world'):
- with common_util.TimeoutScope(seconds=1, error_name='world'):
- time.sleep(2)
- self.assertEquals(0, signal.alarm(0))
-
- def testCollisionDetection(self):
- ONE_YEAR = 365 * 24 * 60 * 60
-
- def _mock_callback(signum, frame):
- del signum, frame # unused.
-
- flag = False
- with self.assertRaises(common_util.TimeoutCollisionError):
- with common_util.TimeoutScope(seconds=ONE_YEAR, error_name=''):
- flag = True
- signal.signal(signal.SIGALRM, _mock_callback)
- self.assertTrue(flag)
- self.assertEquals(0, signal.alarm(0))
-
- flag = False
- with self.assertRaises(common_util.TimeoutCollisionError):
- with common_util.TimeoutScope(seconds=ONE_YEAR, error_name=''):
- flag = True
- with common_util.TimeoutScope(seconds=ONE_YEAR, error_name=''):
- self.fail()
- self.assertTrue(flag)
- self.assertEquals(0, signal.alarm(0))
-
- signal.alarm(ONE_YEAR)
- with self.assertRaises(common_util.TimeoutCollisionError):
- with common_util.TimeoutScope(seconds=ONE_YEAR, error_name=''):
- self.fail()
- self.assertEquals(0, signal.alarm(0))
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/content_classification_lens.py b/loading/content_classification_lens.py
deleted file mode 100644
index 89f40e2..0000000
--- a/loading/content_classification_lens.py
+++ /dev/null
@@ -1,192 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Labels requests according to the type of content they represent."""
-
-import collections
-import logging
-import operator
-import os
-import urlparse
-
-import loading_trace
-import request_track
-
-
-class ContentClassificationLens(object):
- """Associates requests and frames with the type of content they represent."""
- def __init__(self, trace, ad_rules, tracking_rules):
- """Initializes an instance of ContentClassificationLens.
-
- Args:
- trace: (LoadingTrace) loading trace.
- ad_rules: ([str]) List of Adblock+ compatible rules used to classify ads.
- tracking_rules: ([str]) List of Adblock+ compatible rules used to
- classify tracking and analytics.
- """
- self._trace = trace
- self._requests = trace.request_track.GetEvents()
- self._requests_by_id = {r.request_id: r for r in self._requests}
- self._main_frame_id = trace.page_track.GetEvents()[0]['frame_id']
- self._frame_to_requests = collections.defaultdict(list)
- self._ad_requests = set()
- self._tracking_requests = set()
- self._ad_matcher = _RulesMatcher(ad_rules, True)
- self._tracking_matcher = _RulesMatcher(tracking_rules, True)
- self._document_url = self._GetDocumentUrl()
- self._GroupRequestsByFrameId()
- self._LabelRequests()
-
- def IsAdRequest(self, request):
- """Returns True iff the request matches one of the ad_rules."""
- return request.request_id in self._ad_requests
-
- def IsTrackingRequest(self, request):
- """Returns True iff the request matches one of the tracking_rules."""
- return request.request_id in self._tracking_requests
-
- def IsAdOrTrackingFrame(self, frame_id):
- """A Frame is an Ad frame if it's not the main frame and its main resource
- is ad or tracking-related.
- """
- if (frame_id not in self._frame_to_requests
- or frame_id == self._main_frame_id):
- return False
- frame_requests = [self._requests_by_id[request_id]
- for request_id in self._frame_to_requests[frame_id]]
- sorted_frame_resources = sorted(
- frame_requests, key=operator.attrgetter('start_msec'))
- frame_main_resource = sorted_frame_resources[0]
- return (frame_main_resource.request_id in self._ad_requests
- or frame_main_resource.request_id in self._tracking_requests)
-
- def AdAndTrackingRequests(self):
- """Returns a list of requests linked to ads and tracking.
-
- Returns the union of:
- - Requests tagged as ad or tracking.
- - Requests originating from an ad frame.
- """
- frame_ids = {r.frame_id for r in self._requests}
- ad_frame_ids = filter(self.IsAdOrTrackingFrame, frame_ids)
- return filter(lambda r: self.IsAdRequest(r) or self.IsTrackingRequest(r)
- or r.frame_id in ad_frame_ids, self._requests)
-
- @classmethod
- def WithRulesFiles(cls, trace, ad_rules_filename, tracking_rules_filename):
- """Returns an instance of ContentClassificationLens with the rules read
- from files.
- """
- ad_rules = []
- tracking_rules = []
- if os.path.exists(ad_rules_filename):
- ad_rules = open(ad_rules_filename, 'r').readlines()
- if os.path.exists(tracking_rules_filename):
- tracking_rules = open(tracking_rules_filename, 'r').readlines()
- return ContentClassificationLens(trace, ad_rules, tracking_rules)
-
- def _GroupRequestsByFrameId(self):
- for request in self._requests:
- frame_id = request.frame_id
- self._frame_to_requests[frame_id].append(request.request_id)
-
- def _LabelRequests(self):
- for request in self._requests:
- request_id = request.request_id
- if self._ad_matcher.Matches(request, self._document_url):
- self._ad_requests.add(request_id)
- if self._tracking_matcher.Matches(request, self._document_url):
- self._tracking_requests.add(request_id)
-
- def _GetDocumentUrl(self):
- main_frame_id = self._trace.page_track.GetMainFrameId()
- # Take the last one as JS redirects can change the document URL.
- document_url = None
- for r in self._requests:
- # 304: not modified.
- if r.frame_id == main_frame_id and r.status in (200, 304):
- document_url = r.document_url
- return document_url
-
-
-class _RulesMatcher(object):
- """Matches requests with rules in Adblock+ format."""
- _WHITELIST_PREFIX = '@@'
- _RESOURCE_TYPE_TO_OPTIONS_KEY = {
- 'Script': 'script', 'Stylesheet': 'stylesheet', 'Image': 'image',
- 'XHR': 'xmlhttprequest'}
- def __init__(self, rules, no_whitelist):
- """Initializes an instance of _RulesMatcher.
-
- Args:
- rules: ([str]) list of rules.
- no_whitelist: (bool) Whether the whitelisting rules should be ignored.
- """
- self._rules = self._FilterRules(rules, no_whitelist)
- if self._rules:
- try:
- import adblockparser
- self._matcher = adblockparser.AdblockRules(self._rules)
- except ImportError:
- logging.critical('Likely you need to install adblockparser. Try:\n'
- ' pip install --user adblockparser\n'
- 'For 10-100x better performance, also try:\n'
- " pip install --user 're2 >= 0.2.21'")
- raise
- else:
- self._matcher = None
-
- def Matches(self, request, document_url):
- """Returns whether a request matches one of the rules."""
- if self._matcher is None:
- return False
- url = request.url
- return self._matcher.should_block(
- url, self._GetOptions(request, document_url))
-
- @classmethod
- def _GetOptions(cls, request, document_url):
- options = {}
- resource_type = request.resource_type
- option = cls._RESOURCE_TYPE_TO_OPTIONS_KEY.get(resource_type)
- if option:
- options[option] = True
- if cls._IsThirdParty(request.url, document_url):
- options['third-party'] = True
- return options
-
- @classmethod
- def _FilterRules(cls, rules, no_whitelist):
- if not no_whitelist:
- return rules
- else:
- return [rule for rule in rules
- if not rule.startswith(cls._WHITELIST_PREFIX)]
-
- @classmethod
- def _IsThirdParty(cls, url, document_url):
- # Common definition of "third-party" is "not from the same TLD+1".
- # Unfortunately, knowing what is a TLD is not trivial. To do it without a
- # database, we use the following simple (and incorrect) rules:
- # - co.{in,uk,jp,hk} is a TLD
- # - com.{au,hk} is a TLD
- # Otherwise, this is the part after the last dot.
- return cls._GetTldPlusOne(url) != cls._GetTldPlusOne(document_url)
-
- @classmethod
- def _GetTldPlusOne(cls, url):
- hostname = urlparse.urlparse(url).hostname
- if not hostname:
- return hostname
- parts = hostname.split('.')
- if len(parts) <= 2:
- return hostname
- tld_parts_count = 1
- may_be_tld = parts[-2:]
- if may_be_tld[0] == 'co' and may_be_tld[1] in ('in', 'uk', 'jp'):
- tld_parts_count = 2
- elif may_be_tld[0] == 'com' and may_be_tld[1] in ('au', 'hk'):
- tld_parts_count = 2
- tld_plus_one = '.'.join(parts[-(tld_parts_count + 1):])
- return tld_plus_one
diff --git a/loading/content_classification_lens_unittest.py b/loading/content_classification_lens_unittest.py
deleted file mode 100644
index 18025b0..0000000
--- a/loading/content_classification_lens_unittest.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import copy
-import unittest
-
-from content_classification_lens import (ContentClassificationLens,
- _RulesMatcher)
-from request_track import Request
-import test_utils
-
-
-class ContentClassificationLensTestCase(unittest.TestCase):
- _DOCUMENT_URL = 'http://bla.com'
- _MAIN_FRAME_ID = '123.1'
- _REQUEST = Request.FromJsonDict({'url': _DOCUMENT_URL,
- 'document_url': _DOCUMENT_URL,
- 'request_id': '1234.1',
- 'frame_id': _MAIN_FRAME_ID,
- 'initiator': {'type': 'other'},
- 'timestamp': 2,
- 'status': 200,
- 'timing': {},
- 'resource_type': 'Document'})
- _PAGE_EVENTS = [{'method': 'Page.frameStartedLoading',
- 'frame_id': _MAIN_FRAME_ID},
- {'method': 'Page.frameAttached',
- 'frame_id': '123.13', 'parent_frame_id': _MAIN_FRAME_ID}]
- _RULES = ['bla.com']
-
- def testGetDocumentUrl(self):
- trace = test_utils.LoadingTraceFromEvents(
- [self._REQUEST], self._PAGE_EVENTS)
- lens = ContentClassificationLens(trace, [], [])
- self.assertEquals(self._DOCUMENT_URL, lens._GetDocumentUrl())
- # Don't be fooled by redirects.
- request = copy.deepcopy(self._REQUEST)
- request.status = 302
- request.document_url = 'http://www.bla.com'
- trace = test_utils.LoadingTraceFromEvents(
- [request, self._REQUEST], self._PAGE_EVENTS)
- lens = ContentClassificationLens(trace, [], [])
- self.assertEquals(self._DOCUMENT_URL, lens._GetDocumentUrl())
-
- def testGetDocumentUrlSeveralChanges(self):
- request = copy.deepcopy(self._REQUEST)
- request.status = 200
- request.document_url = 'http://www.blabla.com'
- request2 = copy.deepcopy(request)
- request2.document_url = 'http://www.blablabla.com'
- trace = test_utils.LoadingTraceFromEvents(
- [self._REQUEST, request, request2], self._PAGE_EVENTS)
- lens = ContentClassificationLens(trace, [], [])
- self.assertEquals(request2.document_url, lens._GetDocumentUrl())
-
- def testNoRules(self):
- trace = test_utils.LoadingTraceFromEvents(
- [self._REQUEST], self._PAGE_EVENTS)
- lens = ContentClassificationLens(trace, [], [])
- self.assertFalse(lens.IsAdRequest(self._REQUEST))
- self.assertFalse(lens.IsTrackingRequest(self._REQUEST))
-
- def testAdRequest(self):
- trace = test_utils.LoadingTraceFromEvents(
- [self._REQUEST], self._PAGE_EVENTS)
- lens = ContentClassificationLens(trace, self._RULES, [])
- self.assertTrue(lens.IsAdRequest(self._REQUEST))
- self.assertFalse(lens.IsTrackingRequest(self._REQUEST))
-
- def testTrackingRequest(self):
- trace = test_utils.LoadingTraceFromEvents(
- [self._REQUEST], self._PAGE_EVENTS)
- lens = ContentClassificationLens(trace, [], self._RULES)
- self.assertFalse(lens.IsAdRequest(self._REQUEST))
- self.assertTrue(lens.IsTrackingRequest(self._REQUEST))
-
- def testMainFrameIsNotAnAdFrame(self):
- trace = test_utils.LoadingTraceFromEvents(
- [self._REQUEST], self._PAGE_EVENTS)
- lens = ContentClassificationLens(trace, self._RULES, [])
- self.assertFalse(lens.IsAdOrTrackingFrame(self._MAIN_FRAME_ID))
-
- def testAdFrame(self):
- request = copy.deepcopy(self._REQUEST)
- request.request_id = '1234.2'
- request.frame_id = '123.123'
- trace = test_utils.LoadingTraceFromEvents(
- [self._REQUEST, request], self._PAGE_EVENTS)
- lens = ContentClassificationLens(trace, self._RULES, [])
- self.assertTrue(lens.IsAdOrTrackingFrame(request.frame_id))
-
- def testAdAndTrackingRequests(self):
- ad_request = copy.deepcopy(self._REQUEST)
- ad_request.request_id = '1234.2'
- ad_request.frame_id = '123.123'
- non_ad_request_non_ad_frame = copy.deepcopy(self._REQUEST)
- non_ad_request_non_ad_frame.request_id = '1234.3'
- non_ad_request_non_ad_frame.url = 'http://www.example.com'
- non_ad_request_non_ad_frame.frame_id = '123.456'
- non_ad_request_ad_frame = copy.deepcopy(self._REQUEST)
- non_ad_request_ad_frame.request_id = '1234.4'
- non_ad_request_ad_frame.url = 'http://www.example.com'
- non_ad_request_ad_frame.frame_id = ad_request.frame_id
-
- trace = test_utils.LoadingTraceFromEvents(
- [self._REQUEST, ad_request, non_ad_request_non_ad_frame,
- non_ad_request_ad_frame], self._PAGE_EVENTS)
- lens = ContentClassificationLens(trace, self._RULES, [])
- self.assertSetEqual(
- set([self._REQUEST, ad_request, non_ad_request_ad_frame]),
- set(lens.AdAndTrackingRequests()))
-
-
-class _MatcherTestCase(unittest.TestCase):
- _RULES_WITH_WHITELIST = ['/thisisanad.', '@@myadvertisingdomain.com/*',
- '@@||www.mydomain.com/ads/$elemhide']
- _SCRIPT_RULE = 'domainwithscripts.com/*$script'
- _THIRD_PARTY_RULE = 'domainwithscripts.com/*$third-party'
- _SCRIPT_REQUEST = Request.FromJsonDict(
- {'url': 'http://domainwithscripts.com/bla.js',
- 'resource_type': 'Script',
- 'request_id': '1234.1',
- 'frame_id': '123.1',
- 'initiator': {'type': 'other'},
- 'timestamp': 2,
- 'timing': {}})
-
- def testRemovesWhitelistRules(self):
- matcher = _RulesMatcher(self._RULES_WITH_WHITELIST, False)
- self.assertEquals(3, len(matcher._rules))
- matcher = _RulesMatcher(self._RULES_WITH_WHITELIST, True)
- self.assertEquals(1, len(matcher._rules))
-
- def testScriptRule(self):
- matcher = _RulesMatcher([self._SCRIPT_RULE], False)
- request = copy.deepcopy(self._SCRIPT_REQUEST)
- request.resource_type = 'Stylesheet'
- self.assertFalse(matcher.Matches(
- request, ContentClassificationLensTestCase._DOCUMENT_URL))
- self.assertTrue(matcher.Matches(
- self._SCRIPT_REQUEST, ContentClassificationLensTestCase._DOCUMENT_URL))
-
- def testGetTldPlusOne(self):
- self.assertEquals(
- 'easy.com',
- _RulesMatcher._GetTldPlusOne('http://www.easy.com/hello/you'))
- self.assertEquals(
- 'not-so-easy.co.uk',
- _RulesMatcher._GetTldPlusOne('http://www.not-so-easy.co.uk/hello/you'))
- self.assertEquals(
- 'hard.co.uk',
- _RulesMatcher._GetTldPlusOne('http://hard.co.uk/'))
-
- def testThirdPartyRule(self):
- matcher = _RulesMatcher([self._THIRD_PARTY_RULE], False)
- request = copy.deepcopy(self._SCRIPT_REQUEST)
- document_url = 'http://www.domainwithscripts.com/good-morning'
- self.assertFalse(matcher.Matches(request, document_url))
- document_url = 'http://anotherdomain.com/good-morning'
- self.assertTrue(matcher.Matches(request, document_url))
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/controller.py b/loading/controller.py
deleted file mode 100644
index 0cad7f1..0000000
--- a/loading/controller.py
+++ /dev/null
@@ -1,630 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Controller objects that control the context in which chrome runs.
-
-This is responsible for the setup necessary for launching chrome, and for
-creating a DevToolsConnection. There are remote device and local
-desktop-specific versions.
-"""
-
-import contextlib
-import copy
-import datetime
-import errno
-import logging
-import os
-import platform
-import shutil
-import socket
-import subprocess
-import sys
-import tempfile
-import time
-import traceback
-
-import psutil
-
-import chrome_cache
-import chrome_setup
-import common_util
-import device_setup
-import devtools_monitor
-import emulation
-from options import OPTIONS
-
-_SRC_DIR = os.path.abspath(os.path.join(
- os.path.dirname(__file__), '..', '..', '..'))
-_CATAPULT_DIR = os.path.join(_SRC_DIR, 'third_party', 'catapult')
-
-sys.path.append(os.path.join(_CATAPULT_DIR, 'devil'))
-from devil.android import device_errors
-from devil.android import flag_changer
-from devil.android.sdk import intent
-
-sys.path.append(
- os.path.join(_CATAPULT_DIR, 'telemetry', 'third_party', 'websocket-client'))
-import websocket
-
-
-class ChromeControllerMetadataGatherer(object):
- """Gather metadata for the ChromeControllerBase."""
-
- def __init__(self):
- self._chromium_commit = None
-
- def GetMetadata(self):
- """Gets metadata to update in the ChromeControllerBase"""
- if self._chromium_commit is None:
- def _GitCommand(subcmd):
- return subprocess.check_output(['git', '-C', _SRC_DIR] + subcmd).strip()
- try:
- self._chromium_commit = _GitCommand(['merge-base', 'master', 'HEAD'])
- if self._chromium_commit != _GitCommand(['rev-parse', 'HEAD']):
- self._chromium_commit = 'unknown'
- except subprocess.CalledProcessError:
- self._chromium_commit = 'git_error'
- return {
- 'chromium_commit': self._chromium_commit,
- 'date': datetime.datetime.utcnow().isoformat(),
- 'seconds_since_epoch': time.time()
- }
-
-
-class ChromeControllerInternalError(Exception):
- pass
-
-
-def _AllocateTcpListeningPort():
- """Allocates a TCP listening port.
-
- Note: The use of this function is inherently OS level racy because the
- port returned by this function might be re-used by another running process.
- """
- temp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- try:
- temp_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
- temp_socket.bind(('', 0))
- return temp_socket.getsockname()[1]
- finally:
- temp_socket.close()
-
-
-class ChromeControllerError(Exception):
- """Chrome error with detailed log.
-
- Note:
- Some of these errors might be known intermittent errors that can usually be
- retried by the caller after re-doing any specific setup again.
- """
- _INTERMITTENT_WHITE_LIST = {websocket.WebSocketTimeoutException,
- devtools_monitor.DevToolsConnectionTargetCrashed}
- _PASSTHROUGH_WHITE_LIST = (MemoryError, SyntaxError)
-
- def __init__(self, log):
- """Constructor
-
- Args:
- log: String containing the log of the running Chrome instance that was
- running. It will be interleaved with any other running Android
- package.
- """
- self.error_type, self.error_value, self.error_traceback = sys.exc_info()
- super(ChromeControllerError, self).__init__(repr(self.error_value))
- self.parent_stack = traceback.extract_stack()
- self.log = log
-
- def Dump(self, output):
- """Dumps the entire error's infos into file-like object."""
- output.write('-' * 60 + ' {}:\n'.format(self.__class__.__name__))
- output.write(repr(self) + '\n')
- output.write('{} is {}known as intermittent.\n'.format(
- self.error_type.__name__, '' if self.IsIntermittent() else 'NOT '))
- output.write(
- '-' * 60 + ' {}\'s full traceback:\n'.format(self.error_type.__name__))
- output.write(''.join(traceback.format_list(self.parent_stack)))
- traceback.print_tb(self.error_traceback, file=output)
- output.write('-' * 60 + ' Begin log\n')
- output.write(self.log)
- output.write('-' * 60 + ' End log\n')
-
- def IsIntermittent(self):
- """Returns whether the error is an known intermittent error."""
- return self.error_type in self._INTERMITTENT_WHITE_LIST
-
- def RaiseOriginal(self):
- """Raises the original exception that has caused <self>."""
- raise self.error_type, self.error_value, self.error_traceback
-
-
-class ChromeControllerBase(object):
- """Base class for all controllers.
-
- Defines common operations but should not be created directly.
- """
- METADATA_GATHERER = ChromeControllerMetadataGatherer()
- DEVTOOLS_CONNECTION_ATTEMPTS = 10
- DEVTOOLS_CONNECTION_ATTEMPT_INTERVAL_SECONDS = 1
-
- def __init__(self):
- self._chrome_args = chrome_setup.CHROME_ARGS + [
- # Tests & dev-tools related stuff.
- '--enable-test-events',
- '--remote-debugging-port=%d' % OPTIONS.devtools_port,
-
- # Detailed log.
- '--enable-logging=stderr',
- '--v=1',
- ]
- self._wpr_attributes = None
- self._metadata = {}
- self._emulated_device = None
- self._network_name = None
- self._slow_death = False
-
- def AddChromeArguments(self, args):
- """Add command-line arguments to the chrome execution."""
- self._chrome_args.extend(args)
-
- @contextlib.contextmanager
- def Open(self):
- """Context that returns a connection/chrome instance.
-
- Returns:
- DevToolsConnection instance for which monitoring has been set up but not
- started.
- """
- raise NotImplementedError
-
- def ChromeMetadata(self):
- """Return metadata such as emulation information.
-
- Returns:
- Metadata as JSON dictionary.
- """
- return self._metadata
-
- def GetDevice(self):
- """Returns an android device, or None if chrome is local."""
- return None
-
- def SetDeviceEmulation(self, device_name):
- """Set device emulation.
-
- Args:
- device_name: (str) Key from --devices_file.
- """
- devices = emulation.LoadEmulatedDevices(file(OPTIONS.devices_file))
- self._emulated_device = devices[device_name]
-
- def SetNetworkEmulation(self, network_name):
- """Set network emulation.
-
- Args:
- network_name: (str) Key from emulation.NETWORK_CONDITIONS or None to
- disable network emulation.
- """
- assert network_name in emulation.NETWORK_CONDITIONS or network_name is None
- self._network_name = network_name
-
- def ResetBrowserState(self):
- """Resets the chrome's browser state."""
- raise NotImplementedError
-
- def PushBrowserCache(self, cache_path):
- """Pushes the HTTP chrome cache to the profile directory.
-
- Caution:
- The chrome cache backend type differ according to the platform. On
- desktop, the cache backend type is `blockfile` versus `simple` on Android.
- This method assumes that your are pushing a cache with the correct backend
- type, and will NOT verify for you.
-
- Args:
- cache_path: The directory's path containing the cache locally.
- """
- raise NotImplementedError
-
- def PullBrowserCache(self):
- """Pulls the HTTP chrome cache from the profile directory.
-
- Returns:
- Temporary directory containing all the browser cache. Caller will need to
- remove this directory manually.
- """
- raise NotImplementedError
-
- def SetSlowDeath(self, slow_death=True):
- """Set to pause before final kill of chrome.
-
- Gives time for caches to write.
-
- Args:
- slow_death: (bool) True if you want that which comes to all who live, to
- be slow.
- """
- self._slow_death = slow_death
-
- @contextlib.contextmanager
- def OpenWprHost(self, wpr_archive_path, record=False,
- network_condition_name=None,
- disable_script_injection=False,
- out_log_path=None):
- """Opens a Web Page Replay host context.
-
- Args:
- wpr_archive_path: host sided WPR archive's path.
- record: Enables or disables WPR archive recording.
- network_condition_name: Network condition name available in
- emulation.NETWORK_CONDITIONS.
- disable_script_injection: Disable JavaScript file injections that is
- fighting against resources name entropy.
- out_log_path: Path of the WPR host's log.
- """
- raise NotImplementedError
-
- def _StartConnection(self, connection):
- """This should be called after opening an appropriate connection."""
- if self._emulated_device:
- self._metadata.update(emulation.SetUpDeviceEmulationAndReturnMetadata(
- connection, self._emulated_device))
- if self._network_name:
- network_condition = emulation.NETWORK_CONDITIONS[self._network_name]
- logging.info('Set up network emulation %s (latency=%dms, down=%d, up=%d)'
- % (self._network_name, network_condition['latency'],
- network_condition['download'], network_condition['upload']))
- emulation.SetUpNetworkEmulation(connection, **network_condition)
- self._metadata['network_emulation'] = copy.copy(network_condition)
- self._metadata['network_emulation']['name'] = self._network_name
- else:
- self._metadata['network_emulation'] = \
- {k: 'disabled' for k in ['name', 'download', 'upload', 'latency']}
- self._metadata.update(self.METADATA_GATHERER.GetMetadata())
- logging.info('Devtools connection success')
-
- def _GetChromeArguments(self):
- """Get command-line arguments for the chrome execution."""
- chrome_args = self._chrome_args[:]
- if self._wpr_attributes:
- chrome_args.extend(self._wpr_attributes.chrome_args)
- return chrome_args
-
-
-class RemoteChromeController(ChromeControllerBase):
- """A controller for an android device, aka remote chrome instance."""
- # An estimate of time to wait for the device to become idle after expensive
- # operations, such as opening the launcher activity.
- TIME_TO_IDLE_SECONDS = 2
-
- def __init__(self, device):
- """Initialize the controller.
-
- Caution: The browser state might need to be manually reseted.
-
- Args:
- device: (device_utils.DeviceUtils) an android device.
- """
- assert device is not None, 'Should you be using LocalController instead?'
- super(RemoteChromeController, self).__init__()
- self._device = device
- self._metadata['platform'] = {
- 'os': 'A-' + device.build_id,
- 'product_model': device.product_model
- }
- self._InitDevice()
-
- def GetDevice(self):
- """Overridden android device."""
- return self._device
-
- @contextlib.contextmanager
- def Open(self):
- """Overridden connection creation."""
- if self._wpr_attributes:
- assert self._wpr_attributes.chrome_env_override == {}, \
- 'Remote controller doesn\'t support chrome environment variables.'
- package_info = OPTIONS.ChromePackage()
- self._device.ForceStop(package_info.package)
- with flag_changer.CustomCommandLineFlags(
- self._device, package_info.cmdline_file, self._GetChromeArguments()):
- self._DismissCrashDialogIfNeeded()
- start_intent = intent.Intent(
- package=package_info.package, activity=package_info.activity,
- data='about:blank')
- self._device.adb.Logcat(clear=True, dump=True)
- self._device.StartActivity(start_intent, blocking=True)
- try:
- for attempt_id in xrange(self.DEVTOOLS_CONNECTION_ATTEMPTS):
- logging.info('Devtools connection attempt %d' % attempt_id)
- # Adb forwarding does not provide a way to print the port number if
- # it is allocated atomically by the OS by passing port=0, but we need
- # dynamically allocated listening port here to handle parallel run on
- # different devices.
- host_side_port = _AllocateTcpListeningPort()
- logging.info('Allocated host sided listening port for devtools '
- 'connection: %d', host_side_port)
- try:
- with device_setup.ForwardPort(
- self._device, 'tcp:%d' % host_side_port,
- 'localabstract:chrome_devtools_remote'):
- try:
- connection = devtools_monitor.DevToolsConnection(
- OPTIONS.devtools_hostname, host_side_port)
- self._StartConnection(connection)
- except socket.error as e:
- if e.errno != errno.ECONNRESET:
- raise
- time.sleep(self.DEVTOOLS_CONNECTION_ATTEMPT_INTERVAL_SECONDS)
- continue
- yield connection
- if self._slow_death:
- self._device.adb.Shell('am start com.google.android.launcher')
- time.sleep(self.TIME_TO_IDLE_SECONDS)
- break
- except device_errors.AdbCommandFailedError as error:
- _KNOWN_ADB_FORWARDER_FAILURES = [
- 'cannot bind to socket: Address already in use',
- 'cannot rebind existing socket: Resource temporarily unavailable']
- for message in _KNOWN_ADB_FORWARDER_FAILURES:
- if message in error.message:
- break
- else:
- raise
- continue
- else:
- raise ChromeControllerInternalError(
- 'Failed to connect to Chrome devtools after {} '
- 'attempts.'.format(self.DEVTOOLS_CONNECTION_ATTEMPTS))
- except ChromeControllerError._PASSTHROUGH_WHITE_LIST:
- raise
- except Exception:
- logcat = ''.join([l + '\n' for l in self._device.adb.Logcat(dump=True)])
- raise ChromeControllerError(log=logcat)
- finally:
- self._device.ForceStop(package_info.package)
- self._DismissCrashDialogIfNeeded()
-
- def ResetBrowserState(self):
- """Override resetting Chrome local state."""
- logging.info('Resetting Chrome local state')
- chrome_setup.ResetChromeLocalState(self._device,
- OPTIONS.ChromePackage().package)
-
-
- def RebootDevice(self):
- """Reboot the remote device."""
- assert self._wpr_attributes is None, 'WPR should be closed before rebooting'
- logging.warning('Rebooting the device')
- device_setup.Reboot(self._device)
- self._InitDevice()
-
- def PushBrowserCache(self, cache_path):
- """Override for chrome cache pushing."""
- logging.info('Push cache from %s' % cache_path)
- chrome_cache.PushBrowserCache(self._device, cache_path)
-
- def PullBrowserCache(self):
- """Override for chrome cache pulling."""
- assert self._slow_death, 'Must do SetSlowDeath() before opening chrome.'
- logging.info('Pull cache from device')
- return chrome_cache.PullBrowserCache(self._device)
-
- @contextlib.contextmanager
- def OpenWprHost(self, wpr_archive_path, record=False,
- network_condition_name=None,
- disable_script_injection=False,
- out_log_path=None):
- """Starts a WPR host, overrides Chrome flags until contextmanager exit."""
- assert not self._wpr_attributes, 'WPR is already running.'
- with device_setup.RemoteWprHost(self._device, wpr_archive_path,
- record=record,
- network_condition_name=network_condition_name,
- disable_script_injection=disable_script_injection,
- out_log_path=out_log_path) as wpr_attributes:
- self._wpr_attributes = wpr_attributes
- yield
- self._wpr_attributes = None
-
- def _DismissCrashDialogIfNeeded(self):
- for _ in xrange(10):
- if not self._device.DismissCrashDialogIfNeeded():
- break
-
- def _InitDevice(self):
- self._device.EnableRoot()
-
-
-class LocalChromeController(ChromeControllerBase):
- """Controller for a local (desktop) chrome instance."""
-
- def __init__(self):
- """Initialize the controller.
-
- Caution: The browser state might need to be manually reseted.
- """
- super(LocalChromeController, self).__init__()
- if OPTIONS.no_sandbox:
- self.AddChromeArguments(['--no-sandbox'])
- self._profile_dir = OPTIONS.local_profile_dir
- self._using_temp_profile_dir = self._profile_dir is None
- if self._using_temp_profile_dir:
- self._profile_dir = tempfile.mkdtemp(suffix='.profile')
- self._chrome_env_override = {}
- self._metadata['platform'] = {
- 'os': platform.system()[0] + '-' + platform.release(),
- 'product_model': 'unknown'
- }
-
- def __del__(self):
- if self._using_temp_profile_dir:
- shutil.rmtree(self._profile_dir)
-
- @staticmethod
- def KillChromeProcesses():
- """Kills all the running instances of Chrome.
-
- Returns: (int) The number of processes that were killed.
- """
- killed_count = 0
- chrome_path = OPTIONS.LocalBinary('chrome')
- for process in psutil.process_iter():
- try:
- process_bin_path = None
- # In old versions of psutil, process.exe is a member, in newer ones it's
- # a method.
- if type(process.exe) == str:
- process_bin_path = process.exe
- else:
- process_bin_path = process.exe()
- if os.path.abspath(process_bin_path) == os.path.abspath(chrome_path):
- process.terminate()
- killed_count += 1
- try:
- process.wait(timeout=10)
- except psutil.TimeoutExpired:
- process.kill()
- except psutil.AccessDenied:
- pass
- except psutil.NoSuchProcess:
- pass
- return killed_count
-
- def SetChromeEnvOverride(self, env):
- """Set the environment for Chrome.
-
- Args:
- env: (dict) Environment.
- """
- self._chrome_env_override = env
-
- @contextlib.contextmanager
- def Open(self):
- """Overridden connection creation."""
- # Kill all existing Chrome instances.
- killed_count = LocalChromeController.KillChromeProcesses()
- if killed_count > 0:
- logging.warning('Killed existing Chrome instance.')
-
- chrome_cmd = [OPTIONS.LocalBinary('chrome')]
- chrome_cmd.extend(self._GetChromeArguments())
- # Force use of simple cache.
- chrome_cmd.append('--use-simple-cache-backend=on')
- chrome_cmd.append('--user-data-dir=%s' % self._profile_dir)
- # Navigates to about:blank for couples of reasons:
- # - To find the correct target descriptor at devtool connection;
- # - To avoid cache and WPR pollution by the NTP.
- chrome_cmd.append('about:blank')
-
- tmp_log = \
- tempfile.NamedTemporaryFile(prefix="chrome_controller_", suffix='.log')
- chrome_process = None
- try:
- chrome_env_override = self._chrome_env_override.copy()
- if self._wpr_attributes:
- chrome_env_override.update(self._wpr_attributes.chrome_env_override)
-
- chrome_env = os.environ.copy()
- chrome_env.update(chrome_env_override)
-
- # Launch Chrome.
- logging.info(common_util.GetCommandLineForLogging(chrome_cmd,
- chrome_env_override))
- chrome_process = subprocess.Popen(chrome_cmd, stdout=tmp_log.file,
- stderr=tmp_log.file, env=chrome_env)
- # Attempt to connect to Chrome's devtools
- for attempt_id in xrange(self.DEVTOOLS_CONNECTION_ATTEMPTS):
- logging.info('Devtools connection attempt %d' % attempt_id)
- process_result = chrome_process.poll()
- if process_result is not None:
- raise ChromeControllerInternalError(
- 'Unexpected Chrome exit: {}'.format(process_result))
- try:
- connection = devtools_monitor.DevToolsConnection(
- OPTIONS.devtools_hostname, OPTIONS.devtools_port)
- break
- except socket.error as e:
- if e.errno != errno.ECONNREFUSED:
- raise
- time.sleep(self.DEVTOOLS_CONNECTION_ATTEMPT_INTERVAL_SECONDS)
- else:
- raise ChromeControllerInternalError(
- 'Failed to connect to Chrome devtools after {} '
- 'attempts.'.format(self.DEVTOOLS_CONNECTION_ATTEMPTS))
- # Start and yield the devtool connection.
- self._StartConnection(connection)
- yield connection
- if self._slow_death:
- connection.Close()
- chrome_process.wait()
- chrome_process = None
- except ChromeControllerError._PASSTHROUGH_WHITE_LIST:
- raise
- except Exception:
- raise ChromeControllerError(log=open(tmp_log.name).read())
- finally:
- if OPTIONS.local_noisy:
- sys.stderr.write(open(tmp_log.name).read())
- del tmp_log
- if chrome_process:
- try:
- chrome_process.kill()
- except OSError:
- pass # Chrome is already dead.
-
- def ResetBrowserState(self):
- """Override for chrome state reseting."""
- assert os.path.isdir(self._profile_dir)
- logging.info('Reset chrome\'s profile')
- # Don't do a rmtree(self._profile_dir) because it might be a temp directory.
- for filename in os.listdir(self._profile_dir):
- path = os.path.join(self._profile_dir, filename)
- if os.path.isdir(path):
- shutil.rmtree(path)
- else:
- os.remove(path)
-
- def PushBrowserCache(self, cache_path):
- """Override for chrome cache pushing."""
- self._EnsureProfileDirectory()
- profile_cache_path = self._GetCacheDirectoryPath()
- logging.info('Copy cache directory from %s to %s.' % (
- cache_path, profile_cache_path))
- chrome_cache.CopyCacheDirectory(cache_path, profile_cache_path)
-
- def PullBrowserCache(self):
- """Override for chrome cache pulling."""
- cache_path = tempfile.mkdtemp()
- profile_cache_path = self._GetCacheDirectoryPath()
- logging.info('Copy cache directory from %s to %s.' % (
- profile_cache_path, cache_path))
- chrome_cache.CopyCacheDirectory(profile_cache_path, cache_path)
- return cache_path
-
- @contextlib.contextmanager
- def OpenWprHost(self, wpr_archive_path, record=False,
- network_condition_name=None,
- disable_script_injection=False,
- out_log_path=None):
- """Override for WPR context."""
- assert not self._wpr_attributes, 'WPR is already running.'
- with device_setup.LocalWprHost(wpr_archive_path,
- record=record,
- network_condition_name=network_condition_name,
- disable_script_injection=disable_script_injection,
- out_log_path=out_log_path) as wpr_attributes:
- self._wpr_attributes = wpr_attributes
- yield
- self._wpr_attributes = None
-
- def _EnsureProfileDirectory(self):
- if (not os.path.isdir(self._profile_dir) or
- os.listdir(self._profile_dir) == []):
- # Launch chrome so that it populates the profile directory.
- with self.Open():
- pass
- assert os.path.isdir(self._profile_dir)
- assert os.path.isdir(os.path.dirname(self._GetCacheDirectoryPath()))
-
- def _GetCacheDirectoryPath(self):
- return os.path.join(self._profile_dir, 'Default', 'Cache')
diff --git a/loading/core_set.py b/loading/core_set.py
deleted file mode 100644
index 910c280..0000000
--- a/loading/core_set.py
+++ /dev/null
@@ -1,169 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Compute core set for a page.
-
-This script is a collection of utilities for working with core sets.
-"""
-
-import argparse
-import glob
-import json
-import logging
-import multiprocessing
-import os
-import sys
-
-import dependency_graph
-import loading_trace
-import request_dependencies_lens
-import resource_sack
-
-
-def _Progress(x):
- sys.stderr.write(x + '\n')
-
-
-def _PageCore(prefix, graph_set_names, output):
- """Compute the page core over sets defined by graph_set_names."""
- assert graph_set_names
- graph_sets = []
- sack = resource_sack.GraphSack()
- for name in graph_set_names:
- name_graphs = []
- _Progress('Processing %s' % name)
- for filename in glob.iglob('-'.join([prefix, name, '*.trace'])):
- _Progress('Reading %s' % filename)
- trace = loading_trace.LoadingTrace.FromJsonFile(filename)
- graph = dependency_graph.RequestDependencyGraph(
- trace.request_track.GetEvents(),
- request_dependencies_lens.RequestDependencyLens(trace))
- sack.ConsumeGraph(graph)
- name_graphs.append(graph)
- graph_sets.append(name_graphs)
- core = sack.CoreSet(*graph_sets)
- json.dump({'page_core': [{'label': b.label,
- 'name': b.name,
- 'count': b.num_nodes}
- for b in core],
- 'non_core': [{'label': b.label,
- 'name': b.name,
- 'count': b.num_nodes}
- for b in sack.bags if b not in core],
- 'threshold': sack.CORE_THRESHOLD},
- output, sort_keys=True, indent=2)
- output.write('\n')
-
-
-def _DoSite(site, graph_sets, input_dir, output_dir):
- """Compute the appropriate page core for a site.
-
- Used by _Spawn.
- """
- _Progress('Doing %s on %s' % (site, '/'.join(graph_sets)))
- prefix = os.path.join(input_dir, site)
- with file(os.path.join(output_dir,
- '%s-%s.json' % (site, '.'.join(graph_sets))),
- 'w') as output:
- _PageCore(prefix, graph_sets, output)
-
-
-def _DoSiteRedirect(t):
- """Unpack arguments for map call.
-
- Note that multiprocessing.Pool.map cannot use a lambda (as it needs to be
- serialized into the executing process).
- """
- _DoSite(*t)
-
-
-def _Spawn(site_list_file, graph_sets, input_dir, output_dir, workers):
- """Spool site computation out to a multiprocessing pool."""
- with file(site_list_file) as site_file:
- sites = [l.strip() for l in site_file.readlines()]
- _Progress('Using sites:\n %s' % '\n '.join(sites))
- pool = multiprocessing.Pool(workers, maxtasksperchild=1)
- pool.map(_DoSiteRedirect, [(s, graph_sets, input_dir, output_dir)
- for s in sites])
-
-
-def _ReadCoreSet(filename):
- data = json.load(open(filename))
- return set(page['name'] for page in data['page_core'])
-
-
-def _Compare(a_name, b_name, csv):
- """Compare two core sets."""
- a = _ReadCoreSet(a_name)
- b = _ReadCoreSet(b_name)
- result = (resource_sack.GraphSack.CoreSimilarity(a, b),
- ' Equal' if a == b else 'UnEqual',
- 'a<=b' if a <= b else 'a!<b',
- 'a>=b' if b <= a else 'a!>b')
- if csv:
- print '%s,%s,%s,%s' % result
- else:
- print '%.2f %s %s %s' % result
-
-
-if __name__ == '__main__':
- logging.basicConfig(level=logging.ERROR)
- parser = argparse.ArgumentParser()
- subparsers = parser.add_subparsers()
-
- spawn = subparsers.add_parser(
- 'spawn', help=('spawn page core set computation from a sites list.\n'
- 'A core set will be computed for each site by '
- 'combining all run indicies from site traces for each '
- '--set, then computing the page core over the sets. '
- 'Assumes trace file names in form {input-dir}/'
- '{site}-{set}-{run index}.trace'))
- spawn.add_argument('--sets', required=True,
- help='sets to combine, comma-separated')
- spawn.add_argument('--sites', required=True, help='file containing sites')
- spawn.add_argument('--workers', default=8, type=int,
- help=('number of parallel workers. Each worker seems to '
- 'use about 0.5-1G/trace when processing. Total '
- 'memory usage should be kept less than physical '
- 'memory for the job to run in a reasonable time'))
- spawn.add_argument('--input_dir', required=True,
- help='trace input directory')
- spawn.add_argument('--output_dir', required=True,
- help=('core set output directory. Each site will have one '
- 'JSON file generated listing the core set as well '
- 'as some metadata like the threshold used'))
- spawn.set_defaults(executor=lambda args:
- _Spawn(site_list_file=args.sites,
- graph_sets=args.sets.split(','),
- input_dir=args.input_dir,
- output_dir=args.output_dir,
- workers=args.workers))
-
- page_core = subparsers.add_parser(
- 'page_core',
- help=('compute page core set for a group of files of form '
- '{--prefix}{set}*.trace over each set in --sets'))
- page_core.add_argument('--sets', required=True,
- help='sets to combine, comma-separated')
- page_core.add_argument('--prefix', required=True,
- help='trace file prefix')
- page_core.add_argument('--output', required=True,
- help='JSON output file name')
- page_core.set_defaults(executor=lambda args:
- _PageCore(args.prefix, args.sets.split(','),
- file(args.output, 'w')))
-
- compare = subparsers.add_parser(
- 'compare',
- help=('compare two core sets (as output by spawn, page_core or '
- 'all_cores) using Jaccard index. Outputs on stdout'))
- compare.add_argument('--a', required=True, help='the first core set JSON')
- compare.add_argument('--b', required=True, help='the second core set JSON')
- compare.add_argument('--csv', action='store_true', help='output as CSV')
- compare.set_defaults(
- executor=lambda args:
- _Compare(args.a, args.b, args.csv))
-
- args = parser.parse_args()
- args.executor(args)
diff --git a/loading/cost_to_csv.py b/loading/cost_to_csv.py
deleted file mode 100755
index d271d06..0000000
--- a/loading/cost_to_csv.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#! /usr/bin/python
-# Copyright 2015 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import argparse
-import logging
-import os
-import sys
-
-from processing import (SitesFromDir, WarmGraph, ColdGraph)
-
-
-def main():
- logging.basicConfig(level=logging.ERROR)
- parser = argparse.ArgumentParser(
- description=('Convert a directory created by ./analyze.py fetch '
- 'to a CSV.'))
- parser.add_argument('--datadir', required=True)
- parser.add_argument('--csv', required=True)
- parser.add_argument('--noads', action='store_true')
- args = parser.parse_args()
- sites = SitesFromDir(args.datadir)
- with open(args.csv, 'w') as output:
- output.write('site,kind,cost\n')
- for site in sites:
- print site
- warm = WarmGraph(args.datadir, site)
- if args.noads:
- warm.Set(node_filter=warm.FilterAds)
- cold = ColdGraph(args.datadir, site)
- if args.noads:
- cold.Set(node_filter=cold.FilterAds)
- output.write('%s,%s,%s\n' % (site, 'warm', warm.Cost()))
- warm.Set(cache_all=True)
- output.write('%s,%s,%s\n' % (site, 'warm-cache', warm.Cost()))
- output.write('%s,%s,%s\n' % (site, 'cold', cold.Cost()))
- cold.Set(cache_all=True)
- output.write('%s,%s,%s\n' % (site, 'cold-cache', cold.Cost()))
-
-
-if __name__ == '__main__':
- main()
diff --git a/loading/csv_util.py b/loading/csv_util.py
deleted file mode 100644
index ccdfb92..0000000
--- a/loading/csv_util.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import csv
-import logging
-import os
-
-
-def CollectCSVsFromDirectory(directory_path, file_output):
- """Collects recursively all .csv files from directory into one.
-
- Note: The list of CSV columns must be identical across all files.
-
- Args:
- directory_path: Path of the directory to collect from.
- file_output: File-like object to dump the CSV to.
- """
- # List CSVs.
- csv_list = []
- for root, _, files in os.walk(directory_path):
- for file_name in files:
- file_path = os.path.join(root, file_name)
- if os.path.abspath(file_path) == os.path.abspath(file_output.name):
- continue
- if file_name.endswith('.csv'):
- csv_list.append(os.path.join(root, file_name))
- if not csv_list:
- logging.error('No CSV files found in %s' % directory_path)
- return False
-
- # List rows.
- csv_list.sort()
- csv_field_names = None
- csv_rows = []
- for csv_file in csv_list:
- logging.info('collecting %s' % csv_file)
- with open(csv_file) as csvfile:
- reader = csv.DictReader(csvfile)
- if csv_field_names is None:
- csv_field_names = reader.fieldnames
- else:
- assert reader.fieldnames == csv_field_names, (
- 'Different field names in: {}'.format(csv_file))
- for row in reader:
- csv_rows.append(row)
-
- # Export rows.
- writer = csv.DictWriter(file_output, fieldnames=csv_field_names)
- writer.writeheader()
- for row in csv_rows:
- writer.writerow(row)
- return True
diff --git a/loading/dag.py b/loading/dag.py
deleted file mode 100644
index 2eacd3a..0000000
--- a/loading/dag.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright 2015 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Support for directed acyclic graphs.
-
-Used in the ResourceGraph model for chrome loading.
-"""
-
-class Node(object):
- """A node in a DAG.
-
- We do not enforce at a node level that a graph is a DAG. Methods like
- TopologicalSort will assume a DAG and may fail if that's not the case.
-
- Nodes are identified with an index that must be unique for a particular graph
- (it is used for hashing and equality). A graph is represented as a list of
- nodes, for example in the TopologicalSort class method. By convention a node's
- index is its position in this list, making it easy to store auxillary
- information.
- """
- def __init__(self, index):
- """Create a new node.
-
- Args:
- index: index of the node. We assume these indicies uniquely identify a
- node (and so use it for hashing and equality).
- """
- self._predecessors = set()
- self._successors = set()
- self._index = index
-
- def Predecessors(self):
- return self._predecessors
-
- def Successors(self):
- return self._successors
-
- def AddSuccessor(self, s):
- """Add a successor.
-
- Updates appropriate links. Any existing parents of s are unchanged; to move
- a node you must do a combination of RemoveSuccessor and AddSuccessor.
-
- Args:
- s: the node to add as a successor.
- """
- self._successors.add(s)
- s._predecessors.add(self)
-
- def RemoveSuccessor(self, s):
- """Removes a successor.
-
- Updates appropriate links.
-
- Args:
- s: the node to remove as a successor. Will raise a set exception if s is
- not an existing successor.
- """
- self._successors.remove(s)
- s._predecessors.remove(self)
-
- def SortedSuccessors(self):
- children = [c for c in self.Successors()]
- children.sort(key=lambda c: c.Index())
- return children
-
- def Index(self):
- return self._index
-
- def __eq__(self, o):
- return self.Index() == o.Index()
-
- def __hash__(self):
- return hash(self.Index())
-
-
-def TopologicalSort(nodes, node_filter=None):
- """Topological sort.
-
- We use a BFS-like walk which ensures that sibling are always grouped
- together in the output. This is more convenient for some later analyses.
-
- Args:
- nodes: [Node, ...] Nodes to sort.
- node_filter: a filter Node->boolean to restrict the graph. A node passes the
- filter on a return value of True. Only the subgraph reachable from a root
- passing the filter is considered.
-
- Returns:
- A list of Nodes in topological order. Note that node indicies are
- unchanged; the original list nodes is not modified.
- """
- if node_filter is None:
- node_filter = lambda _: True
- sorted_nodes = []
- sources = []
- remaining_in_edges = {}
- for n in nodes:
- if n.Predecessors():
- remaining_in_edges[n] = len(n.Predecessors())
- elif node_filter(n):
- sources.append(n)
- while sources:
- n = sources.pop(0)
- assert node_filter(n)
- sorted_nodes.append(n)
- # We sort by index to get consistent sorts across runs/machines.
- for c in n.SortedSuccessors():
- assert remaining_in_edges[c] > 0
- if not node_filter(c):
- continue
- remaining_in_edges[c] -= 1
- if not remaining_in_edges[c]:
- sources.append(c)
- return sorted_nodes
diff --git a/loading/dag_unittest.py b/loading/dag_unittest.py
deleted file mode 100644
index 6701c9e..0000000
--- a/loading/dag_unittest.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright 2015 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import os
-import sys
-import unittest
-
-import dag
-
-class DagTestCase(unittest.TestCase):
-
- def MakeDag(self, links):
- """Make a graph from a description of links.
-
- Args:
- links: A list of (index, (successor index...)) tuples. Index must equal
- the location of the tuple in the list and are provided to make it easier
- to read.
-
- Returns:
- A list of Nodes.
- """
- nodes = []
- for i in xrange(len(links)):
- assert i == links[i][0]
- nodes.append(dag.Node(i))
- for l in links:
- for s in l[1]:
- nodes[l[0]].AddSuccessor(nodes[s])
- return nodes
-
- def SortedIndicies(self, graph, node_filter=None):
- return [n.Index() for n in dag.TopologicalSort(graph, node_filter)]
-
- def SuccessorIndicies(self, node):
- return [c.Index() for c in node.SortedSuccessors()]
-
- def test_SimpleSorting(self):
- graph = self.MakeDag([(0, (1,2)),
- (1, (3,)),
- (2, ()),
- (3, (4,)),
- (4, ()),
- (5, (6,)),
- (6, ())])
- self.assertEqual(self.SuccessorIndicies(graph[0]), [1, 2])
- self.assertEqual(self.SuccessorIndicies(graph[1]), [3])
- self.assertEqual(self.SuccessorIndicies(graph[2]), [])
- self.assertEqual(self.SuccessorIndicies(graph[3]), [4])
- self.assertEqual(self.SuccessorIndicies(graph[4]), [])
- self.assertEqual(self.SuccessorIndicies(graph[5]), [6])
- self.assertEqual(self.SuccessorIndicies(graph[6]), [])
- self.assertEqual(self.SortedIndicies(graph), [0, 5, 1, 2, 6, 3, 4])
-
- def test_SortSiblingsAreGrouped(self):
- graph = self.MakeDag([(0, (1, 2, 3)),
- (1, (4,)),
- (2, (5, 6)),
- (3, (7, 8)),
- (4, ()),
- (5, ()),
- (6, ()),
- (7, ()),
- (8, ())])
- self.assertEqual(self.SortedIndicies(graph), [0, 1, 2, 3, 4, 5, 6, 7, 8])
-
- def test_FilteredSorting(self):
- # 0 is a filtered-out root, which means the subgraph containing 1, 2, 3 and
- # 4 should be ignored. 5 is an unfiltered root, and the subgraph containing
- # 6, 7, 8 and 10 should be sorted. 9 and 11 are filtered out, and should
- # exclude the unfiltred node 12.
- graph = self.MakeDag([(0, (1,)),
- (1, (2, 3)),
- (2, ()),
- (3, (4,)),
- (4, ()),
- (5, (6, 7)),
- (6, (11,)),
- (7, (8,)),
- (8, (9, 10)),
- (9, ()),
- (10, ()),
- (11, (12,)),
- (12, ())])
- self.assertEqual(self.SortedIndicies(
- graph, lambda n: n.Index() not in (0, 3, 9, 11)),
- [5, 6, 7, 8, 10])
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/dependency_graph.py b/loading/dependency_graph.py
deleted file mode 100644
index a2c1f17..0000000
--- a/loading/dependency_graph.py
+++ /dev/null
@@ -1,243 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Request dependency graph."""
-
-import logging
-import sys
-
-import common_util
-import graph
-import request_track
-
-
-class RequestNode(graph.Node):
- def __init__(self, request=None):
- super(RequestNode, self).__init__()
- self.request = request
- self.cost = request.Cost() if request else None # Deserialization.
-
- def ToJsonDict(self):
- json_dict = super(RequestNode, self).ToJsonDict()
- json_dict.update({'request': self.request.ToJsonDict()})
- return json_dict
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- result = super(RequestNode, cls).FromJsonDict(json_dict)
- result.request = request_track.Request.FromJsonDict(json_dict['request'])
- return common_util.DeserializeAttributesFromJsonDict(
- json_dict, result, ['cost'])
-
-
-class Edge(graph.Edge):
- def __init__(self, from_node, to_node, reason=None):
- super(Edge, self).__init__(from_node, to_node)
- self.reason = reason
- self.cost = None
- self.is_timing = None
- if from_node is None: # Deserialization.
- return
- self.reason = reason
- self.cost = request_track.TimeBetween(
- self.from_node.request, self.to_node.request, self.reason)
- self.is_timing = False
-
- def ToJsonDict(self):
- result = {}
- return common_util.SerializeAttributesToJsonDict(
- result, self, ['reason', 'cost', 'is_timing'])
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- result = cls(None, None, None)
- return common_util.DeserializeAttributesFromJsonDict(
- json_dict, result, ['reason', 'cost', 'is_timing'])
-
-
-class RequestDependencyGraph(object):
- """Request dependency graph."""
- # This resource type may induce a timing dependency. See _SplitChildrenByTime
- # for details.
- # TODO(lizeb,mattcary): are these right?
- _CAN_BE_TIMING_PARENT = set(['script', 'magic-debug-content'])
- _CAN_MAKE_TIMING_DEPENDENCE = set(['json', 'other', 'magic-debug-content'])
-
- def __init__(self, requests, dependencies_lens,
- node_class=RequestNode, edge_class=Edge):
- """Creates a request dependency graph.
-
- Args:
- requests: ([Request]) a list of requests.
- dependencies_lens: (RequestDependencyLens)
- node_class: (subclass of RequestNode)
- edge_class: (subclass of Edge)
- """
- self._requests = None
- self._first_request_node = None
- self._deps_graph = None
- self._nodes_by_id = None
- if requests is None: # Deserialization.
- return
- assert issubclass(node_class, RequestNode)
- assert issubclass(edge_class, Edge)
- self._requests = requests
- deps = dependencies_lens.GetRequestDependencies()
- self._nodes_by_id = {r.request_id : node_class(r) for r in self._requests}
- edges = []
- for (parent_request, child_request, reason) in deps:
- if (parent_request.request_id not in self._nodes_by_id
- or child_request.request_id not in self._nodes_by_id):
- continue
- parent_node = self._nodes_by_id[parent_request.request_id]
- child_node = self._nodes_by_id[child_request.request_id]
- edges.append(edge_class(parent_node, child_node, reason))
- self._first_request_node = self._nodes_by_id[self._requests[0].request_id]
- self._deps_graph = graph.DirectedGraph(self._nodes_by_id.values(), edges)
- self._HandleTimingDependencies()
-
- @property
- def graph(self):
- """Return the Graph we're based on."""
- return self._deps_graph
-
- def UpdateRequestsCost(self, request_id_to_cost):
- """Updates the cost of the nodes identified by their request ID.
-
- Args:
- request_id_to_cost: ({request_id: new_cost}) Can be a superset of the
- requests actually present in the graph.
- """
- for node in self._deps_graph.Nodes():
- request_id = node.request.request_id
- if request_id in request_id_to_cost:
- node.cost = request_id_to_cost[request_id]
-
- def Cost(self, from_first_request=True, path_list=None, costs_out=None):
- """Returns the cost of the graph, that is the costliest path.
-
- Args:
- from_first_request: (boolean) If True, only considers paths that originate
- from the first request node.
- path_list: (list) See graph.Cost().
- costs_out: (list) See graph.Cost().
- """
- if from_first_request:
- return self._deps_graph.Cost(
- [self._first_request_node], path_list, costs_out)
- else:
- return self._deps_graph.Cost(path_list=path_list, costs_out=costs_out)
-
- def AncestorRequests(self, descendants):
- """Return requests that are ancestors of a set of requests.
-
- Args:
- descendants: ([Request]) List of requests.
-
- Returns:
- List of Requests that are ancestors of descendants.
- """
- return [n.request for n in self.graph.AncestorNodes(
- self._nodes_by_id[r.request_id] for r in descendants)]
-
- def _HandleTimingDependencies(self):
- try:
- for n in self._deps_graph.TopologicalSort():
- self._SplitChildrenByTime(n)
- except AssertionError as exc:
- sys.stderr.write('Bad topological sort: %s\n'
- 'Skipping child split\n' % str(exc))
-
- def _SplitChildrenByTime(self, parent):
- """Splits children of a node by request times.
-
- The initiator of a request may not be the true dependency of a request. For
- example, a script may appear to load several resources independently, but in
- fact one of them may be a JSON data file, and the remaining resources assets
- described in the JSON. The assets should be dependent upon the JSON data
- file, and not the original script.
-
- This function approximates that by rearranging the children of a node
- according to their request times. The predecessor of each child is made to
- be the node with the greatest finishing time, that is before the start time
- of the child.
-
- We do this by sorting the nodes twice, once by start time and once by end
- time. We mark the earliest end time, and then we walk the start time list,
- advancing the end time mark when it is less than our current start time.
-
- This is refined by only considering assets which we believe actually create
- a dependency. We only split if the original parent is a script, and the new
- parent a data file.
- We incorporate this heuristic by skipping over any non-script/json resources
- when moving the end mark.
-
- TODO(mattcary): More heuristics, like incorporating cachability somehow, and
- not just picking arbitrarily if there are two nodes with the same end time
- (does that ever really happen?)
-
- Args:
- parent: (_RequestNode) The children of this node are processed by this
- function.
- """
- if parent.request.GetContentType() not in self._CAN_BE_TIMING_PARENT:
- return
- edges = self._deps_graph.OutEdges(parent)
- edges_by_start_time = sorted(
- edges, key=lambda e: e.to_node.request.start_msec)
- edges_by_end_time = sorted(
- edges, key=lambda e: e.to_node.request.end_msec)
- end_mark = 0
- for current in edges_by_start_time:
- assert current.from_node is parent
- if current.to_node.request.start_msec < parent.request.end_msec - 1e-5:
- parent_url = parent.request.url
- child_url = current.to_node.request.url
- logging.warning('Child loaded before parent finished: %s -> %s',
- request_track.ShortName(parent_url),
- request_track.ShortName(child_url))
- go_to_next_child = False
- while end_mark < len(edges_by_end_time):
- if edges_by_end_time[end_mark] == current:
- go_to_next_child = True
- break
- elif (edges_by_end_time[end_mark].to_node.request.GetContentType()
- not in self._CAN_MAKE_TIMING_DEPENDENCE):
- end_mark += 1
- elif (end_mark < len(edges_by_end_time) - 1 and
- edges_by_end_time[end_mark + 1].to_node.request.end_msec
- < current.to_node.request.start_msec):
- end_mark += 1
- else:
- break
- if end_mark >= len(edges_by_end_time):
- break # It's not possible to rearrange any more children.
- if go_to_next_child:
- continue # We can't rearrange this child, but the next child may be
- # eligible.
- if (edges_by_end_time[end_mark].to_node.request.end_msec
- <= current.to_node.request.start_msec):
- current.is_timing = True
- self._deps_graph.UpdateEdge(
- current, edges_by_end_time[end_mark].to_node,
- current.to_node)
-
- def ToJsonDict(self):
- result = {'graph': self.graph.ToJsonDict()}
- result['requests'] = [r.ToJsonDict() for r in self._requests]
- return result
-
- @classmethod
- def FromJsonDict(cls, json_dict, node_class, edge_class):
- result = cls(None, None)
- graph_dict = json_dict['graph']
- g = graph.DirectedGraph.FromJsonDict(graph_dict, node_class, edge_class)
- result._requests = [request_track.Request.FromJsonDict(r)
- for r in json_dict['requests']]
- result._nodes_by_id = {node.request.request_id: node
- for node in g.Nodes()}
- result._first_request_node = result._nodes_by_id[
- result._requests[0].request_id]
- result._deps_graph = g
- return result
diff --git a/loading/dependency_graph_unittest.py b/loading/dependency_graph_unittest.py
deleted file mode 100644
index 23441f4..0000000
--- a/loading/dependency_graph_unittest.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import unittest
-
-import dependency_graph
-import request_dependencies_lens
-from request_dependencies_lens_unittest import TestRequests
-import request_track
-import test_utils
-
-
-class RequestDependencyGraphTestCase(unittest.TestCase):
- def setUp(self):
- super(RequestDependencyGraphTestCase, self).setUp()
- self.trace = TestRequests.CreateLoadingTrace()
-
- def testUpdateRequestCost(self, serialize=False):
- requests = self.trace.request_track.GetEvents()
- requests[0].timing = request_track.Timing.FromDevToolsDict(
- {'requestTime': 12, 'loadingFinished': 10})
- dependencies_lens = request_dependencies_lens.RequestDependencyLens(
- self.trace)
- g = dependency_graph.RequestDependencyGraph(requests, dependencies_lens)
- if serialize:
- g = self._SerializeDeserialize(g)
- self.assertEqual(10, g.Cost())
- request_id = requests[0].request_id
- g.UpdateRequestsCost({request_id: 100})
- self.assertEqual(100, g.Cost())
- g.UpdateRequestsCost({'unrelated_id': 1000})
- self.assertEqual(100, g.Cost())
-
- def testCost(self, serialize=False):
- requests = self.trace.request_track.GetEvents()
- for (index, request) in enumerate(requests):
- request.timing = request_track.Timing.FromDevToolsDict(
- {'requestTime': index, 'receiveHeadersEnd': 10,
- 'loadingFinished': 10})
- dependencies_lens = request_dependencies_lens.RequestDependencyLens(
- self.trace)
- g = dependency_graph.RequestDependencyGraph(requests, dependencies_lens)
- if serialize:
- g = self._SerializeDeserialize(g)
- # First redirect -> Second redirect -> Redirected Request -> Request ->
- # JS Request 2
- self.assertEqual(7010, g.Cost())
- # Not on the critical path
- g.UpdateRequestsCost({TestRequests.JS_REQUEST.request_id: 0})
- self.assertEqual(7010, g.Cost())
- g.UpdateRequestsCost({TestRequests.FIRST_REDIRECT_REQUEST.request_id: 0})
- self.assertEqual(7000, g.Cost())
- g.UpdateRequestsCost({TestRequests.SECOND_REDIRECT_REQUEST.request_id: 0})
- self.assertEqual(6990, g.Cost())
-
- def testHandleTimingDependencies(self, serialize=False):
- # Timing adds node 1 as a parent to 2 but not 3.
- requests = [
- test_utils.MakeRequest(0, 'null', 100, 110, 110,
- magic_content_type=True),
- test_utils.MakeRequest(1, 0, 115, 120, 120,
- magic_content_type=True),
- test_utils.MakeRequest(2, 0, 121, 122, 122,
- magic_content_type=True),
- test_utils.MakeRequest(3, 0, 112, 119, 119,
- magic_content_type=True),
- test_utils.MakeRequest(4, 2, 122, 126, 126),
- test_utils.MakeRequest(5, 2, 122, 126, 126)]
-
- g = self._GraphFromRequests(requests)
- if serialize:
- g = self._SerializeDeserialize(g)
- self.assertSetEqual(
- self._Successors(g, requests[0]), set([requests[1], requests[3]]))
- self.assertSetEqual(
- self._Successors(g, requests[1]), set([requests[2]]))
- self.assertSetEqual(
- self._Successors(g, requests[2]), set([requests[4], requests[5]]))
- self.assertSetEqual(self._Successors(g, requests[3]), set())
- self.assertSetEqual(self._Successors(g, requests[4]), set())
- self.assertSetEqual(self._Successors(g, requests[5]), set())
-
- # Change node 1 so it is a parent of 3, which becomes the parent of 2.
- requests[1] = test_utils.MakeRequest(
- 1, 0, 110, 111, 111, magic_content_type=True)
- g = self._GraphFromRequests(requests)
- self.assertSetEqual(self._Successors(g, requests[0]), set([requests[1]]))
- self.assertSetEqual(self._Successors(g, requests[1]), set([requests[3]]))
- self.assertSetEqual(self._Successors(g, requests[2]),
- set([requests[4], requests[5]]))
- self.assertSetEqual(self._Successors(g, requests[3]), set([requests[2]]))
- self.assertSetEqual(self._Successors(g, requests[4]), set())
- self.assertSetEqual(self._Successors(g, requests[5]), set())
-
- # Add an initiator dependence to 1 that will become the parent of 3.
- requests[1] = test_utils.MakeRequest(
- 1, 0, 110, 111, 111, magic_content_type=True)
- requests.append(test_utils.MakeRequest(6, 1, 111, 112, 112))
- g = self._GraphFromRequests(requests)
- # Check it doesn't change until we change the content type of 6.
- self.assertEqual(self._Successors(g, requests[6]), set())
- requests[6] = test_utils.MakeRequest(6, 1, 111, 112, 112,
- magic_content_type=True)
- g = self._GraphFromRequests(requests)
- self.assertSetEqual(self._Successors(g, requests[0]), set([requests[1]]))
- self.assertSetEqual(self._Successors(g, requests[1]), set([requests[6]]))
- self.assertSetEqual(self._Successors(g, requests[2]),
- set([requests[4], requests[5]]))
- self.assertSetEqual(self._Successors(g, requests[3]), set([requests[2]]))
- self.assertSetEqual(self._Successors(g, requests[4]), set())
- self.assertSetEqual(self._Successors(g, requests[5]), set())
- self.assertSetEqual(self._Successors(g, requests[6]), set([requests[3]]))
-
- def testHandleTimingDependenciesImages(self, serialize=False):
- # If we're all image types, then we shouldn't split by timing.
- requests = [test_utils.MakeRequest(0, 'null', 100, 110, 110),
- test_utils.MakeRequest(1, 0, 115, 120, 120),
- test_utils.MakeRequest(2, 0, 121, 122, 122),
- test_utils.MakeRequest(3, 0, 112, 119, 119),
- test_utils.MakeRequest(4, 2, 122, 126, 126),
- test_utils.MakeRequest(5, 2, 122, 126, 126)]
- for r in requests:
- r.response_headers['Content-Type'] = 'image/gif'
- g = self._GraphFromRequests(requests)
- if serialize:
- g = self._SerializeDeserialize(g)
- self.assertSetEqual(self._Successors(g, requests[0]),
- set([requests[1], requests[2], requests[3]]))
- self.assertSetEqual(self._Successors(g, requests[1]), set())
- self.assertSetEqual(self._Successors(g, requests[2]),
- set([requests[4], requests[5]]))
- self.assertSetEqual(self._Successors(g, requests[3]), set())
- self.assertSetEqual(self._Successors(g, requests[4]), set())
- self.assertSetEqual(self._Successors(g, requests[5]), set())
-
- def testSerializeDeserialize(self):
- # Redo the tests, with a graph that has been serialized / deserialized.
- self.testUpdateRequestCost(True)
- self.testCost(True)
- self.testHandleTimingDependencies(True)
- self.testHandleTimingDependenciesImages(True)
-
- @classmethod
- def _SerializeDeserialize(cls, g):
- json_dict = g.ToJsonDict()
- return dependency_graph.RequestDependencyGraph.FromJsonDict(
- json_dict, dependency_graph.RequestNode, dependency_graph.Edge)
-
- @classmethod
- def _GraphFromRequests(cls, requests):
- trace = test_utils.LoadingTraceFromEvents(requests)
- deps_lens = test_utils.SimpleLens(trace)
- return dependency_graph.RequestDependencyGraph(requests, deps_lens)
-
- @classmethod
- def _Successors(cls, g, parent_request):
- parent_node = g._nodes_by_id[parent_request.request_id]
- edges = g._deps_graph.OutEdges(parent_node)
- return set(e.to_node.request for e in edges)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/device_setup.py b/loading/device_setup.py
deleted file mode 100644
index cfa19bf..0000000
--- a/loading/device_setup.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import collections
-import contextlib
-import logging
-import os
-import shutil
-import subprocess
-import sys
-import tempfile
-import time
-
-_SRC_DIR = os.path.abspath(os.path.join(
- os.path.dirname(__file__), '..', '..', '..'))
-
-_CATAPULT_DIR = os.path.join(_SRC_DIR, 'third_party', 'catapult')
-sys.path.append(os.path.join(_CATAPULT_DIR, 'devil'))
-from devil.android import device_utils
-from devil.android import forwarder
-from devil.android.sdk import adb_wrapper
-from devil.android.sdk import intent
-from devil.android.sdk import keyevent
-
-sys.path.append(os.path.join(_SRC_DIR, 'build', 'android'))
-from pylib import constants
-from video_recorder import video_recorder
-
-sys.path.append(os.path.join(_SRC_DIR, 'tools', 'perf'))
-from core import path_util
-sys.path.append(path_util.GetTelemetryDir())
-
-from telemetry.internal.image_processing import video
-from telemetry.internal.util import wpr_server
-
-sys.path.append(os.path.join(
- _CATAPULT_DIR, 'telemetry', 'third_party', 'webpagereplay'))
-import adb_install_cert
-import certutils
-
-import common_util
-import devtools_monitor
-import emulation
-import options
-
-
-OPTIONS = options.OPTIONS
-
-# The speed index's video recording's bit rate in Mb/s.
-_SPEED_INDEX_VIDEO_BITRATE = 4
-
-
-class DeviceSetupException(Exception):
- def __init__(self, msg):
- super(DeviceSetupException, self).__init__(msg)
- logging.error(msg)
-
-
-def GetFirstDevice():
- """Returns the first connected device.
-
- Raises:
- DeviceSetupException if there is no such device.
- """
- devices = device_utils.DeviceUtils.HealthyDevices()
- if not devices:
- raise DeviceSetupException('No devices found')
- return devices[0]
-
-
-def GetDeviceFromSerial(android_device_serial):
- """Returns the DeviceUtils instance."""
- devices = device_utils.DeviceUtils.HealthyDevices()
- for device in devices:
- if device.adb._device_serial == android_device_serial:
- return device
- raise DeviceSetupException(
- 'Device {} not found'.format(android_device_serial))
-
-
-def Reboot(device):
- """Reboot the device.
-
- Args:
- device: Device to reboot, from DeviceUtils.
- """
- # Kills the device -> host forwarder running on the device so that
- # forwarder.Forwarder have correct state tracking after having rebooted.
- forwarder.Forwarder.UnmapAllDevicePorts(device)
- # Reboot the device.
- device.Reboot()
- # Pass through the lock screen.
- time.sleep(3)
- device.SendKeyEvent(keyevent.KEYCODE_MENU)
-
-
-def DeviceSubmitShellCommandQueue(device, command_queue):
- """Executes on the device a command queue.
-
- Args:
- device: The device to execute the shell commands to.
- command_queue: a list of commands to be executed in that order.
- """
- REMOTE_COMMAND_FILE_PATH = '/data/local/tmp/adb_command_file.sh'
- if not command_queue:
- return
- with tempfile.NamedTemporaryFile(prefix='adb_command_file_',
- suffix='.sh') as command_file:
- command_file.write('#!/bin/sh\n')
- command_file.write('# Shell file generated by {}\'s {}\n'.format(
- __file__, DeviceSubmitShellCommandQueue.__name__))
- command_file.write('set -e\n')
- for command in command_queue:
- command_file.write(subprocess.list2cmdline(command) + ' ;\n')
- command_file.write('exit 0;\n'.format(
- REMOTE_COMMAND_FILE_PATH))
- command_file.flush()
- device.adb.Push(command_file.name, REMOTE_COMMAND_FILE_PATH)
- device.adb.Shell('sh {p} && rm {p}'.format(p=REMOTE_COMMAND_FILE_PATH))
-
-
-@contextlib.contextmanager
-def ForwardPort(device, local, remote):
- """Forwards a local port to a remote one on a device in a context."""
- # If we're logging requests from a local desktop chrome instance there is no
- # device.
- if not device:
- yield
- return
- device.adb.Forward(local, remote)
- try:
- yield
- finally:
- device.adb.ForwardRemove(local)
-
-
-# WPR specific attributes to set up chrome.
-#
-# Members:
-# chrome_args: Additional flags list that may be used for chromium to load web
-# page through the running web page replay host.
-# chrome_env_override: Dictionary of environment variables to override at
-# Chrome's launch time.
-WprAttribute = collections.namedtuple('WprAttribute',
- ['chrome_args', 'chrome_env_override'])
-
-
-@contextlib.contextmanager
-def _WprHost(wpr_archive_path, record=False,
- network_condition_name=None,
- disable_script_injection=False,
- wpr_ca_cert_path=None,
- out_log_path=None):
- assert wpr_archive_path
-
- def PathWorkaround(path):
- # webpagereplay.ReplayServer is doing a os.path.exist(os.path.dirname(p))
- # that fails if p = 'my_file.txt' because os.path.dirname(p) = '' != '.'.
- # This workaround just sends absolute path to work around this bug.
- return os.path.abspath(path)
-
- wpr_server_args = ['--use_closest_match']
- if record:
- wpr_server_args.append('--record')
- if os.path.exists(wpr_archive_path):
- os.remove(wpr_archive_path)
- else:
- assert os.path.exists(wpr_archive_path)
- if network_condition_name:
- condition = emulation.NETWORK_CONDITIONS[network_condition_name]
- if record:
- logging.warning('WPR network condition is ignored when recording.')
- else:
- wpr_server_args.extend([
- '--down', emulation.BandwidthToString(condition['download']),
- '--up', emulation.BandwidthToString(condition['upload']),
- '--delay_ms', str(condition['latency']),
- '--shaping_type', 'proxy'])
-
- if disable_script_injection:
- # Remove default WPR injected scripts like deterministic.js which
- # overrides Math.random.
- wpr_server_args.extend(['--inject_scripts', ''])
- if wpr_ca_cert_path:
- wpr_server_args.extend(['--should_generate_certs',
- '--https_root_ca_cert_path=' + PathWorkaround(wpr_ca_cert_path)])
- if out_log_path:
- # --log_level debug to extract the served URLs requests from the log.
- wpr_server_args.extend(['--log_level', 'debug',
- '--log_file', PathWorkaround(out_log_path)])
- # Don't append to previously existing log.
- if os.path.exists(out_log_path):
- os.remove(out_log_path)
-
- # Set up WPR server and device forwarder.
- server = wpr_server.ReplayServer(PathWorkaround(wpr_archive_path),
- '127.0.0.1', 0, 0, None, wpr_server_args)
- http_port, https_port = server.StartServer()[:-1]
-
- logging.info('WPR server listening on HTTP=%s, HTTPS=%s (options=%s)' % (
- http_port, https_port, wpr_server_args))
- try:
- yield http_port, https_port
- finally:
- server.StopServer()
-
-
-def _VerifySilentWprHost(record, network_condition_name):
- assert not record, 'WPR cannot record without a specified archive.'
- assert not network_condition_name, ('WPR cannot emulate network condition' +
- ' without a specified archive.')
-
-
-def _FormatWPRRelatedChromeArgumentFor(http_port, https_port):
- HOST_RULES='MAP * 127.0.0.1,EXCLUDE localhost'
- return [
- '--testing-fixed-http-port={}'.format(http_port),
- '--testing-fixed-https-port={}'.format(https_port),
- '--host-resolver-rules={}'.format(HOST_RULES)]
-
-
-@contextlib.contextmanager
-def LocalWprHost(wpr_archive_path, record=False,
- network_condition_name=None,
- disable_script_injection=False,
- out_log_path=None):
- """Launches web page replay host.
-
- Args:
- wpr_archive_path: host sided WPR archive's path.
- record: Enables or disables WPR archive recording.
- network_condition_name: Network condition name available in
- emulation.NETWORK_CONDITIONS.
- disable_script_injection: Disable JavaScript file injections that is
- fighting against resources name entropy.
- out_log_path: Path of the WPR host's log.
-
- Returns:
- WprAttribute
- """
- if wpr_archive_path == None:
- _VerifySilentWprHost(record, network_condition_name)
- yield []
- return
-
- with common_util.TemporaryDirectory() as temp_home_dir:
- # Generate a root certification authority certificate for WPR.
- private_ca_cert_path = os.path.join(temp_home_dir, 'wpr.pem')
- ca_cert_path = os.path.join(temp_home_dir, 'wpr-cert.pem')
- certutils.write_dummy_ca_cert(*certutils.generate_dummy_ca_cert(),
- cert_path=private_ca_cert_path)
- assert os.path.isfile(ca_cert_path)
- certutils.install_cert_in_nssdb(temp_home_dir, ca_cert_path)
-
- with _WprHost(
- wpr_archive_path,
- record=record,
- network_condition_name=network_condition_name,
- disable_script_injection=disable_script_injection,
- wpr_ca_cert_path=private_ca_cert_path,
- out_log_path=out_log_path) as (http_port, https_port):
- chrome_args = _FormatWPRRelatedChromeArgumentFor(http_port, https_port)
- yield WprAttribute(chrome_args=chrome_args,
- chrome_env_override={'HOME': temp_home_dir})
-
-
-@contextlib.contextmanager
-def RemoteWprHost(device, wpr_archive_path, record=False,
- network_condition_name=None,
- disable_script_injection=False,
- out_log_path=None):
- """Launches web page replay host.
-
- Args:
- device: Android device.
- wpr_archive_path: host sided WPR archive's path.
- record: Enables or disables WPR archive recording.
- network_condition_name: Network condition name available in
- emulation.NETWORK_CONDITIONS.
- disable_script_injection: Disable JavaScript file injections that is
- fighting against resources name entropy.
- out_log_path: Path of the WPR host's log.
-
- Returns:
- WprAttribute
- """
- assert device
- if wpr_archive_path == None:
- _VerifySilentWprHost(record, network_condition_name)
- yield []
- return
- # Deploy certification authority to the device.
- temp_certificate_dir = tempfile.mkdtemp()
- wpr_ca_cert_path = os.path.join(temp_certificate_dir, 'testca.pem')
- certutils.write_dummy_ca_cert(*certutils.generate_dummy_ca_cert(),
- cert_path=wpr_ca_cert_path)
- device_cert_util = adb_install_cert.AndroidCertInstaller(
- device.adb.GetDeviceSerial(), None, wpr_ca_cert_path,
- adb_wrapper.AdbWrapper.GetAdbPath())
- device_cert_util.install_cert(overwrite_cert=True)
- try:
- # Set up WPR server
- with _WprHost(
- wpr_archive_path,
- record=record,
- network_condition_name=network_condition_name,
- disable_script_injection=disable_script_injection,
- wpr_ca_cert_path=wpr_ca_cert_path,
- out_log_path=out_log_path) as (http_port, https_port):
- # Set up the forwarder.
- forwarder.Forwarder.Map([(0, http_port), (0, https_port)], device)
- device_http_port = forwarder.Forwarder.DevicePortForHostPort(http_port)
- device_https_port = forwarder.Forwarder.DevicePortForHostPort(https_port)
- try:
- chrome_args = _FormatWPRRelatedChromeArgumentFor(device_http_port,
- device_https_port)
- yield WprAttribute(chrome_args=chrome_args, chrome_env_override={})
- finally:
- # Tear down the forwarder.
- forwarder.Forwarder.UnmapDevicePort(device_http_port, device)
- forwarder.Forwarder.UnmapDevicePort(device_https_port, device)
- finally:
- # Remove certification authority from the device.
- device_cert_util.remove_cert()
- shutil.rmtree(temp_certificate_dir)
-
-
-# Deprecated
-@contextlib.contextmanager
-def _RemoteVideoRecorder(device, local_output_path, megabits_per_second):
- """Record a video on Device.
-
- Args:
- device: (device_utils.DeviceUtils) Android device to connect to.
- local_output_path: Output path were to save the video locally.
- megabits_per_second: Video recorder Mb/s.
-
- Yields:
- None
- """
- assert device
- if megabits_per_second > 100:
- raise ValueError('Android video capture cannot capture at %dmbps. '
- 'Max capture rate is 100mbps.' % megabits_per_second)
- assert local_output_path.endswith('.mp4')
- recorder = video_recorder.VideoRecorder(device, megabits_per_second)
- recorder.Start()
- try:
- yield
- recorder.Stop()
- recorder.Pull(host_file=local_output_path)
- recorder = None
- finally:
- if recorder:
- recorder.Stop()
-
-
-@contextlib.contextmanager
-def RemoteSpeedIndexRecorder(device, connection, local_output_path):
- """Records on a device a video compatible for speed-index computation.
-
- Note:
- Chrome should be opened with the --disable-infobars command line argument to
- avoid web page viewport size to be changed, that can change speed-index
- value.
-
- Args:
- device: (device_utils.DeviceUtils) Android device to connect to.
- connection: devtools connection.
- local_output_path: Output path were to save the video locally.
-
- Yields:
- None
- """
- # Paint the current HTML document with the ORANGE that video is detecting with
- # the view-port position and size.
- color = video.HIGHLIGHT_ORANGE_FRAME
- connection.ExecuteJavaScript("""
- (function() {
- var screen = document.createElement('div');
- screen.style.background = 'rgb(%d, %d, %d)';
- screen.style.position = 'fixed';
- screen.style.top = '0';
- screen.style.left = '0';
- screen.style.width = '100%%';
- screen.style.height = '100%%';
- screen.style.zIndex = '2147483638';
- document.body.appendChild(screen);
- requestAnimationFrame(function() {
- requestAnimationFrame(function() {
- window.__speedindex_screen = screen;
- });
- });
- })();
- """ % (color.r, color.g, color.b))
- connection.PollForJavaScriptExpression('!!window.__speedindex_screen', 1)
-
- with _RemoteVideoRecorder(device, local_output_path,
- megabits_per_second=_SPEED_INDEX_VIDEO_BITRATE):
- # Paint the current HTML document with white so that it is not troubling the
- # speed index measurement.
- connection.ExecuteJavaScript("""
- (function() {
- requestAnimationFrame(function() {
- var screen = window.__speedindex_screen;
- screen.style.background = 'rgb(255, 255, 255)';
- });
- })();
- """)
- yield
diff --git a/loading/devtools_monitor.py b/loading/devtools_monitor.py
deleted file mode 100644
index 6a5ed10..0000000
--- a/loading/devtools_monitor.py
+++ /dev/null
@@ -1,494 +0,0 @@
-# Copyright (c) 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Library handling DevTools websocket interaction.
-"""
-
-import datetime
-import httplib
-import json
-import logging
-import os
-import sys
-import time
-
-file_dir = os.path.dirname(__file__)
-sys.path.append(os.path.join(file_dir, '..', '..', 'perf'))
-from core import path_util
-sys.path.append(path_util.GetTelemetryDir())
-
-from telemetry.internal.backends.chrome_inspector import inspector_websocket
-from telemetry.internal.backends.chrome_inspector import websocket
-
-import common_util
-
-
-DEFAULT_TIMEOUT_SECONDS = 10
-
-_WEBSOCKET_TIMEOUT_SECONDS = 10
-
-
-class DevToolsConnectionException(Exception):
- def __init__(self, message):
- super(DevToolsConnectionException, self).__init__(message)
- logging.warning("DevToolsConnectionException: " + message)
-
-class DevToolsConnectionTargetCrashed(DevToolsConnectionException):
- pass
-
-
-# Taken from telemetry.internal.backends.chrome_inspector.tracing_backend.
-# TODO(mattcary): combine this with the above and export?
-class _StreamReader(object):
- def __init__(self, inspector, stream_handle):
- self._inspector_websocket = inspector
- self._handle = stream_handle
- self._callback = None
- self._data = None
-
- def Read(self, callback):
- # Do not allow the instance of this class to be reused, as
- # we only read data sequentially at the moment, so a stream
- # can only be read once.
- assert not self._callback
- self._data = []
- self._callback = callback
- self._ReadChunkFromStream()
- # Queue one extra read ahead to avoid latency.
- self._ReadChunkFromStream()
-
- def _ReadChunkFromStream(self):
- # Limit max block size to avoid fragmenting memory in sock.recv(),
- # (see https://github.com/liris/websocket-client/issues/163 for details)
- req = {'method': 'IO.read', 'params': {
- 'handle': self._handle, 'size': 32768}}
- self._inspector_websocket.AsyncRequest(req, self._GotChunkFromStream)
-
- def _GotChunkFromStream(self, response):
- # Quietly discard responses from reads queued ahead after EOF.
- if self._data is None:
- return
- if 'error' in response:
- raise DevToolsConnectionException(
- 'Reading trace failed: %s' % response['error']['message'])
- result = response['result']
- self._data.append(result['data'])
- if not result.get('eof', False):
- self._ReadChunkFromStream()
- return
- req = {'method': 'IO.close', 'params': {'handle': self._handle}}
- self._inspector_websocket.SendAndIgnoreResponse(req)
- trace_string = ''.join(self._data)
- self._data = None
- self._callback(trace_string)
-
-
-class DevToolsConnection(object):
- """Handles the communication with a DevTools server.
- """
- TRACING_DOMAIN = 'Tracing'
- TRACING_END_METHOD = 'Tracing.end'
- TRACING_DATA_METHOD = 'Tracing.dataCollected'
- TRACING_DONE_EVENT = 'Tracing.tracingComplete'
- TRACING_STREAM_EVENT = 'Tracing.tracingComplete' # Same as TRACING_DONE.
- TRACING_TIMEOUT = 300
- HTTP_ATTEMPTS = 10
- HTTP_ATTEMPT_INTERVAL_SECONDS = 0.1
-
- def __init__(self, hostname, port):
- """Initializes the connection with a DevTools server.
-
- Args:
- hostname: server hostname.
- port: port number.
- """
- self._http_hostname = hostname
- self._http_port = port
- self._event_listeners = {}
- self._domain_listeners = {}
- self._scoped_states = {}
- self._domains_to_enable = set()
- self._tearing_down_tracing = False
- self._ws = None
- self._target_descriptor = None
- self._stop_delay_multiplier = 0
- self._monitoring_start_timestamp = None
- self._monitoring_stop_timestamp = None
-
- self._Connect()
- self.RegisterListener('Inspector.targetCrashed', self)
-
- def RegisterListener(self, name, listener):
- """Registers a listener for an event.
-
- Also takes care of enabling the relevant domain before starting monitoring.
-
- Args:
- name: (str) Domain or event the listener wants to listen to, e.g.
- "Network.requestWillBeSent" or "Tracing".
- listener: (Listener) listener instance.
- """
- if '.' in name:
- domain = name[:name.index('.')]
- self._event_listeners[name] = listener
- else:
- domain = name
- self._domain_listeners[domain] = listener
- self._domains_to_enable.add(domain)
-
- def UnregisterListener(self, listener):
- """Unregisters a listener.
-
- Args:
- listener: (Listener) listener to unregister.
- """
- keys = ([k for k, l in self._event_listeners if l is listener] +
- [k for k, l in self._domain_listeners if l is listener])
- assert keys, "Removing non-existent listener"
- for key in keys:
- if key in self._event_listeners:
- del(self._event_listeners[key])
- if key in self._domain_listeners:
- del(self._domain_listeners[key])
-
- def SetScopedState(self, method, params, default_params, enable_domain):
- """Changes state at the beginning the monitoring and resets it at the end.
-
- |method| is called with |params| at the beginning of the monitoring. After
- the monitoring completes, the state is reset by calling |method| with
- |default_params|.
-
- Args:
- method: (str) Method.
- params: (dict) Parameters to set when the monitoring starts.
- default_params: (dict) Parameters to reset the state at the end.
- enable_domain: (bool) True if enabling the domain is required.
- """
- if enable_domain:
- if '.' in method:
- domain = method[:method.index('.')]
- assert domain, 'No valid domain'
- self._domains_to_enable.add(domain)
- scoped_state_value = (params, default_params)
- if self._scoped_states.has_key(method):
- assert self._scoped_states[method] == scoped_state_value
- else:
- self._scoped_states[method] = scoped_state_value
-
- def SyncRequest(self, method, params=None):
- """Issues a synchronous request to the DevTools server.
-
- Args:
- method: (str) Method.
- params: (dict) Optional parameters to the request.
-
- Returns:
- The answer.
- """
- request = {'method': method}
- if params:
- request['params'] = params
- return self._ws.SyncRequest(request, timeout=_WEBSOCKET_TIMEOUT_SECONDS)
-
- def SendAndIgnoreResponse(self, method, params=None):
- """Issues a request to the DevTools server, do not wait for the response.
-
- Args:
- method: (str) Method.
- params: (dict) Optional parameters to the request.
- """
- request = {'method': method}
- if params:
- request['params'] = params
- self._ws.SendAndIgnoreResponse(request)
-
- def SyncRequestNoResponse(self, method, params=None):
- """As SyncRequest, but asserts that no meaningful response was received.
-
- Args:
- method: (str) Method.
- params: (dict) Optional parameters to the request.
- """
- result = self.SyncRequest(method, params)
- if 'error' in result or ('result' in result and
- result['result']):
- raise DevToolsConnectionException(
- 'Unexpected response for %s: %s' % (method, result))
-
- def ClearCache(self):
- """Clears buffer cache.
-
- Will assert that the browser supports cache clearing.
- """
- res = self.SyncRequest('Network.canClearBrowserCache')
- assert res['result'], 'Cache clearing is not supported by this browser.'
- self.SyncRequest('Network.clearBrowserCache')
-
- def MonitorUrl(self, url, timeout_seconds=DEFAULT_TIMEOUT_SECONDS,
- stop_delay_multiplier=0):
- """Navigate to url and dispatch monitoring loop.
-
- Unless you have registered a listener that will call StopMonitoring, this
- will run until timeout from chrome.
-
- Args:
- url: (str) a URL to navigate to before starting monitoring loop.
- timeout_seconds: timeout in seconds for monitoring loop.
- stop_delay_multiplier: (float) How long to wait after page load completed
- before tearing down, relative to the time it took to reach the page load
- to complete.
- """
- for domain in self._domains_to_enable:
- self._ws.RegisterDomain(domain, self._OnDataReceived)
- if domain != self.TRACING_DOMAIN:
- self.SyncRequestNoResponse('%s.enable' % domain)
- # Tracing setup must be done by the tracing track to control filtering
- # and output.
- for scoped_state in self._scoped_states:
- self.SyncRequestNoResponse(scoped_state,
- self._scoped_states[scoped_state][0])
- self._tearing_down_tracing = False
-
- logging.info('Navigate to %s' % url)
- self.SendAndIgnoreResponse('Page.navigate', {'url': url})
- self._monitoring_start_timestamp = datetime.datetime.now()
- self._Dispatch(timeout=timeout_seconds,
- stop_delay_multiplier=stop_delay_multiplier)
- self._monitoring_start_timestamp = None
- logging.info('Tearing down monitoring.')
- self._TearDownMonitoring()
-
- def StopMonitoring(self):
- """Sets the timestamp when to stop monitoring.
-
- Args:
- address_delayed_stop: Whether the MonitorUrl()'s stop_delay_multiplier
- should be addressed or not.
- """
- if self._stop_delay_multiplier == 0:
- self._StopMonitoringImmediately()
- elif self._monitoring_stop_timestamp is None:
- assert self._monitoring_start_timestamp is not None
- current_time = datetime.datetime.now()
- stop_delay_duration_seconds = self._stop_delay_multiplier * (
- current_time - self._monitoring_start_timestamp).seconds
- logging.info('Delaying monitoring stop for %.1fs',
- stop_delay_duration_seconds)
- self._monitoring_stop_timestamp = (
- current_time + datetime.timedelta(
- seconds=stop_delay_duration_seconds))
-
- def ExecuteJavaScript(self, expression):
- """Run JavaScript expression.
-
- Args:
- expression: JavaScript expression to run.
-
- Returns:
- The return value from the JavaScript expression.
- """
- # Note: Clients may be tempted to do naive string interpolation to inject
- # Python values into the JavaScript expression, which could lead to syntax
- # errors during evaluation (e.g. injecting strings with special characters).
- # If this becomes an issue, consider extending the interface of this method
- # as in: https://github.com/catapult-project/catapult/issues/3028
- response = self.SyncRequest('Runtime.evaluate', {
- 'expression': expression,
- 'returnByValue': True})
- if 'error' in response:
- raise Exception(response['error']['message'])
- if 'wasThrown' in response['result'] and response['result']['wasThrown']:
- raise Exception(response['error']['result']['description'])
- if response['result']['result']['type'] == 'undefined':
- return None
- return response['result']['result']['value']
-
- def PollForJavaScriptExpression(self, expression, interval):
- """Wait until JavaScript expression is true.
-
- Args:
- expression: JavaScript expression to run.
- interval: Period between expression evaluation in seconds.
- """
- common_util.PollFor(lambda: bool(self.ExecuteJavaScript(expression)),
- 'JavaScript: {}'.format(expression),
- interval)
-
- def Close(self):
- """Cleanly close chrome by closing the only tab."""
- assert self._ws
- response = self._HttpRequest('/close/' + self._target_descriptor['id'])
- assert response == 'Target is closing'
- self._ws = None
-
- def _StopMonitoringImmediately(self):
- self._monitoring_stop_timestamp = datetime.datetime.now()
-
- def _Dispatch(self, timeout, kind='Monitoring', stop_delay_multiplier=0):
- self._monitoring_stop_timestamp = None
- self._stop_delay_multiplier = stop_delay_multiplier
- while True:
- try:
- self._ws.DispatchNotifications(timeout=timeout)
- except websocket.WebSocketTimeoutException:
- if self._monitoring_stop_timestamp is None:
- logging.warning('%s stopped on a timeout.' % kind)
- break
- if self._monitoring_stop_timestamp:
- # After the first timeout reduce the timeout to check when to stop
- # monitoring more often, because the page at this moment can already be
- # loaded and not many events would be arriving from it.
- timeout = 1
- if datetime.datetime.now() >= self._monitoring_stop_timestamp:
- break
-
- def Handle(self, method, event):
- del event # unused
- if method == 'Inspector.targetCrashed':
- raise DevToolsConnectionTargetCrashed('Renderer crashed.')
-
- def _TearDownMonitoring(self):
- if self.TRACING_DOMAIN in self._domains_to_enable:
- logging.info('Fetching tracing')
- self.SyncRequestNoResponse(self.TRACING_END_METHOD)
- self._tearing_down_tracing = True
- self._Dispatch(timeout=self.TRACING_TIMEOUT, kind='Tracing')
- for scoped_state in self._scoped_states:
- self.SyncRequestNoResponse(scoped_state,
- self._scoped_states[scoped_state][1])
- for domain in self._domains_to_enable:
- if domain != self.TRACING_DOMAIN:
- self.SyncRequest('%s.disable' % domain)
- self._ws.UnregisterDomain(domain)
- self._domains_to_enable.clear()
- self._domain_listeners.clear()
- self._event_listeners.clear()
- self._scoped_states.clear()
-
- def _OnDataReceived(self, msg):
- if 'method' not in msg:
- raise DevToolsConnectionException('Malformed message: %s' % msg)
- method = msg['method']
- domain = method[:method.index('.')]
-
- if self._tearing_down_tracing and method == self.TRACING_STREAM_EVENT:
- stream_handle = msg.get('params', {}).get('stream')
- if not stream_handle:
- self._tearing_down_tracing = False
- self._StopMonitoringImmediately()
- # Fall through to regular dispatching.
- else:
- _StreamReader(self._ws, stream_handle).Read(self._TracingStreamDone)
- # Skip regular dispatching.
- return
-
- if (method not in self._event_listeners and
- domain not in self._domain_listeners):
- return
- if method in self._event_listeners:
- self._event_listeners[method].Handle(method, msg)
- if domain in self._domain_listeners:
- self._domain_listeners[domain].Handle(method, msg)
- if self._tearing_down_tracing and method == self.TRACING_DONE_EVENT:
- self._tearing_down_tracing = False
- self._StopMonitoringImmediately()
-
- def _TracingStreamDone(self, data):
- tracing_events = json.loads(data)
- for evt in tracing_events:
- self._OnDataReceived({'method': self.TRACING_DATA_METHOD,
- 'params': {'value': [evt]}})
- if self._please_stop:
- break
- self._tearing_down_tracing = False
- self._StopMonitoringImmediately()
-
- def _HttpRequest(self, path):
- assert path[0] == '/'
- for _ in xrange(self.HTTP_ATTEMPTS):
- r = httplib.HTTPConnection(self._http_hostname, self._http_port)
- try:
- r.request('GET', '/json' + path)
- response = r.getresponse()
- if response.status != 200:
- raise DevToolsConnectionException(
- 'Cannot connect to DevTools, reponse code %d' % response.status)
- return response.read()
- except httplib.BadStatusLine as exception:
- logging.warning('Devtools HTTP connection failed: %s' % repr(exception))
- time.sleep(self.HTTP_ATTEMPT_INTERVAL_SECONDS)
- finally:
- r.close()
- # Raise the exception that has failed the last attempt.
- raise
-
- def _Connect(self):
- assert not self._ws
- assert not self._target_descriptor
- for target_descriptor in json.loads(self._HttpRequest('/list')):
- if target_descriptor['type'] == 'page':
- self._target_descriptor = target_descriptor
- break
- if not self._target_descriptor:
- raise DevToolsConnectionException(
- 'No pages are open, connected to a wrong instance?')
- if self._target_descriptor['url'] != 'about:blank':
- raise DevToolsConnectionException(
- 'Looks like devtools connection was made to a different instance.')
- self._ws = inspector_websocket.InspectorWebsocket()
- self._ws.Connect(self._target_descriptor['webSocketDebuggerUrl'],
- timeout=_WEBSOCKET_TIMEOUT_SECONDS)
-
-
-class Listener(object):
- """Listens to events forwarded by a DevToolsConnection instance."""
- def __init__(self, connection):
- """Initializes a Listener instance.
-
- Args:
- connection: (DevToolsConnection).
- """
- pass
-
- def Handle(self, method, msg):
- """Handles an event this instance listens for.
-
- Args:
- event_name: (str) Event name, as registered.
- event: (dict) complete event.
- """
- raise NotImplementedError
-
-
-class Track(Listener):
- """Collects data from a DevTools server."""
- def GetEvents(self):
- """Returns a list of collected events, finalizing the state if necessary."""
- raise NotImplementedError
-
- def ToJsonDict(self):
- """Serializes to a dictionary, to be dumped as JSON.
-
- Returns:
- A dict that can be dumped by the json module, and loaded by
- FromJsonDict().
- """
- raise NotImplementedError
-
- @classmethod
- def FromJsonDict(cls, _json_dict):
- """Returns a Track instance constructed from data dumped by
- Track.ToJsonDict().
-
- Args:
- json_data: (dict) Parsed from a JSON file using the json module.
-
- Returns:
- a Track instance.
- """
- # There is no sensible way to deserialize this abstract class, but
- # subclasses are not required to define a deserialization method. For
- # example, for testing we have a FakeRequestTrack which is never
- # deserialized; instead fake instances are deserialized as RequestTracks.
- assert False
diff --git a/loading/emulation.py b/loading/emulation.py
deleted file mode 100644
index d7f05c6..0000000
--- a/loading/emulation.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Device and network emulation utilities via devtools."""
-
-import json
-
-# Copied from
-# WebKit/Source/devtools/front_end/network/NetworkConditionsSelector.js
-# Units:
-# download/upload: byte/s
-# latency: ms
-NETWORK_CONDITIONS = {
- 'GPRS': {
- 'download': 50 * 1024 / 8, 'upload': 20 * 1024 / 8, 'latency': 500},
- 'Regular2G': {
- 'download': 250 * 1024 / 8, 'upload': 50 * 1024 / 8, 'latency': 300},
- 'Good2G': {
- 'download': 450 * 1024 / 8, 'upload': 150 * 1024 / 8, 'latency': 150},
- 'Regular3G': {
- 'download': 750 * 1024 / 8, 'upload': 250 * 1024 / 8, 'latency': 100},
- 'Good3G': {
- 'download': 1.5 * 1024 * 1024 / 8, 'upload': 750 * 1024 / 8,
- 'latency': 40},
- 'Regular4G': {
- 'download': 4 * 1024 * 1024 / 8, 'upload': 3 * 1024 * 1024 / 8,
- 'latency': 20},
- 'DSL': {
- 'download': 2 * 1024 * 1024 / 8, 'upload': 1 * 1024 * 1024 / 8,
- 'latency': 5},
- 'WiFi': {
- 'download': 30 * 1024 * 1024 / 8, 'upload': 15 * 1024 * 1024 / 8,
- 'latency': 2}
-}
-
-
-def LoadEmulatedDevices(registry):
- """Loads a list of emulated devices from the DevTools JSON registry.
-
- See, for example, third_party/WebKit/Source/devtools/front_end
- /emulated_devices/module.json.
-
- Args:
- registry: A file-like object for the device registry (should be JSON).
-
- Returns:
- {'device_name': device}
- """
- json_dict = json.load(registry)
- devices = {}
- for device in json_dict['extensions']:
- device = device['device']
- devices[device['title']] = device
- return devices
-
-
-def SetUpDeviceEmulationAndReturnMetadata(connection, device):
- """Configures an instance of Chrome for device emulation.
-
- Args:
- connection: (DevToolsConnection)
- device: (dict) An entry from LoadEmulatedDevices().
-
- Returns:
- A dict containing the device emulation metadata.
- """
- res = connection.SyncRequest('Emulation.canEmulate')
- assert res['result'], 'Cannot set device emulation.'
- data = _GetDeviceEmulationMetadata(device)
- connection.SyncRequestNoResponse(
- 'Emulation.setDeviceMetricsOverride',
- {'width': data['width'],
- 'height': data['height'],
- 'deviceScaleFactor': data['deviceScaleFactor'],
- 'mobile': data['mobile'],
- 'fitWindow': True})
- connection.SyncRequestNoResponse('Network.setUserAgentOverride',
- {'userAgent': data['userAgent']})
- return data
-
-
-def SetUpNetworkEmulation(connection, latency, download, upload):
- """Configures an instance of Chrome for network emulation.
-
- See NETWORK_CONDITIONS for example (or valid?) emulation options.
-
- Args:
- connection: (DevToolsConnection)
- latency: (float) Latency in ms.
- download: (float) Download speed (Bytes / s).
- upload: (float) Upload speed (Bytes / s).
- """
- res = connection.SyncRequest('Network.canEmulateNetworkConditions')
- assert res['result'], 'Cannot set network emulation.'
- connection.SyncRequestNoResponse(
- 'Network.emulateNetworkConditions',
- {'offline': False, 'latency': latency, 'downloadThroughput': download,
- 'uploadThroughput': upload})
-
-
-def BandwidthToString(bandwidth):
- """Converts a bandwidth to string.
-
- Args:
- bandwidth: The bandwidth to convert in byte/s. Must be a multiple of 1024/8.
-
- Returns:
- A string compatible with wpr --{up,down} command line flags.
- """
- assert bandwidth % (1024/8) == 0
- bandwidth_kbps = (int(bandwidth) * 8) / 1024
- if bandwidth_kbps % 1024:
- return '{}Kbit/s'.format(bandwidth_kbps)
- return '{}Mbit/s'.format(bandwidth_kbps / 1024)
-
-
-def _GetDeviceEmulationMetadata(device):
- """Returns the metadata associated with a given device."""
- return {'width': device['screen']['vertical']['width'],
- 'height': device['screen']['vertical']['height'],
- 'deviceScaleFactor': device['screen']['device-pixel-ratio'],
- 'mobile': 'mobile' in device['capabilities'],
- 'userAgent': device['user-agent']}
diff --git a/loading/emulation_unittest.py b/loading/emulation_unittest.py
deleted file mode 100644
index 44bd5ff..0000000
--- a/loading/emulation_unittest.py
+++ /dev/null
@@ -1,81 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import os
-from StringIO import StringIO
-import unittest
-
-import emulation
-import test_utils
-
-
-_SRC_DIR = os.path.abspath(os.path.join(
- os.path.dirname(__file__), '..', '..', '..'))
-
-
-class EmulationTestCase(unittest.TestCase):
- def testLoadDevices(self):
- devices = emulation.LoadEmulatedDevices(file(os.path.join(
- _SRC_DIR, 'third_party/blink/renderer/devtools/front_end',
- 'emulated_devices/module.json')))
- # Just check we have something. We'll assume that if we were able to read
- # the file without dying we must be ok.
- self.assertTrue(devices)
-
- def testSetUpDevice(self):
- registry = StringIO("""{
- "extensions": [
- {
- "type": "emulated-device",
- "device": {
- "show-by-default": false,
- "title": "mattPhone" ,
- "screen": {
- "horizontal": {
- "width": 480,
- "height": 320
- },
- "device-pixel-ratio": 2,
- "vertical": {
- "width": 320,
- "height": 480
- }
- },
- "capabilities": [
- "touch",
- "mobile"
- ],
- "user-agent": "James Bond"
- }
- } ]}""")
- devices = emulation.LoadEmulatedDevices(registry)
- connection = test_utils.MockConnection(self)
- connection.ExpectSyncRequest({'result': True}, 'Emulation.canEmulate')
- metadata = emulation.SetUpDeviceEmulationAndReturnMetadata(
- connection, devices['mattPhone'])
- self.assertEqual(320, metadata['width'])
- self.assertEqual('James Bond', metadata['userAgent'])
- self.assertTrue(connection.AllExpectationsUsed())
- self.assertEqual('Emulation.setDeviceMetricsOverride',
- connection.no_response_requests_seen[0][0])
-
- def testSetUpNetwork(self):
- connection = test_utils.MockConnection(self)
- connection.ExpectSyncRequest({'result': True},
- 'Network.canEmulateNetworkConditions')
- emulation.SetUpNetworkEmulation(connection, 120, 2048, 1024)
- self.assertTrue(connection.AllExpectationsUsed())
- self.assertEqual('Network.emulateNetworkConditions',
- connection.no_response_requests_seen[0][0])
- self.assertEqual(
- 1024,
- connection.no_response_requests_seen[0][1]['uploadThroughput'])
-
- def testBandwidthToString(self):
- self.assertEqual('16Kbit/s', emulation.BandwidthToString(2048))
- self.assertEqual('8Mbit/s', emulation.BandwidthToString(1024 * 1024))
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/frame_load_lens.py b/loading/frame_load_lens.py
deleted file mode 100644
index e0479ab..0000000
--- a/loading/frame_load_lens.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Gathers and infers dependencies between requests.
-
-When executed as a script, loads a trace and outputs synthetic frame load nodes
-and the new introduced dependencies.
-"""
-
-import bisect
-import collections
-import logging
-import operator
-
-import loading_trace
-
-
-class FrameLoadLens(object):
- """Analyses and creates request dependencies for inferred frame events."""
- _FRAME_EVENT = 'RenderFrameImpl::didFinishLoad'
- _REQUEST_TO_LOAD_GAP_MSEC = 100
- _LOAD_TO_REQUEST_GAP_MSEC = 100
- def __init__(self, trace):
- """Instance initialization.
-
- Args:
- trace: (LoadingTrace) Loading trace.
- """
- self._frame_load_events = self._GetFrameLoadEvents(trace.tracing_track)
- self._request_track = trace.request_track
- self._tracing_track = trace.tracing_track
- self._load_dependencies = []
- self._request_dependencies = []
- for i, load in enumerate(self._frame_load_events):
- self._load_dependencies.extend(
- [(i, r) for r in self._GetLoadDependencies(load)])
- self._request_dependencies.extend(
- [(r, i) for r in self._GetRequestDependencies(load)])
-
- def GetFrameLoadInfo(self):
- """Returns [(index, msec)]."""
- return [collections.namedtuple('LoadInfo', ['index', 'msec'])._make(
- (i, self._frame_load_events[i].start_msec))
- for i in xrange(len(self._frame_load_events))]
-
- def GetFrameResourceComplete(self, request_track):
- """Returns [(frame id, msec)]."""
- frame_to_end_msec = collections.defaultdict(int)
- for r in request_track.GetEvents():
- if r.end_msec > frame_to_end_msec[r.frame_id]:
- frame_to_end_msec[r.frame_id] = r.end_msec
- loads = []
- for f in sorted(frame_to_end_msec.keys()):
- loads.append((f, frame_to_end_msec[f]))
- return loads
-
- def GetFrameLoadDependencies(self):
- """Returns a list of frame load dependencies.
-
- Returns:
- ([(frame load index, request), ...],
- [(request, frame load index), ...]), where request are instances of
- request_trace.Request, and frame load index is an integer. The first list
- in the tuple gives the requests that are dependent on the given frame
- load, and the second list gives the frame loads that are dependent on the
- given request.
- """
- return (self._load_dependencies, self._request_dependencies)
-
- def _GetFrameLoadEvents(self, tracing_track):
- events = []
- for e in tracing_track.GetEvents():
- if e.tracing_event['name'] == self._FRAME_EVENT:
- events.append(e)
- return events
-
- def _GetLoadDependencies(self, load):
- for r in self._request_track.GetEventsStartingBetween(
- load.start_msec, load.start_msec + self._LOAD_TO_REQUEST_GAP_MSEC):
- yield r
-
- def _GetRequestDependencies(self, load):
- for r in self._request_track.GetEventsEndingBetween(
- load.start_msec - self._REQUEST_TO_LOAD_GAP_MSEC, load.start_msec):
- yield r
-
-
-if __name__ == '__main__':
- import loading_trace
- import json
- import sys
- lens = FrameLoadLens(loading_trace.LoadingTrace.FromJsonDict(
- json.load(open(sys.argv[1]))))
- load_times = lens.GetFrameLoadInfo()
- for t in load_times:
- print t
- print (lens._request_track.GetFirstRequestMillis(),
- lens._request_track.GetLastRequestMillis())
- load_dep, request_dep = lens.GetFrameLoadDependencies()
- rq_str = lambda r: '%s (%d-%d)' % (
- r.request_id,
- r.start_msec - lens._request_track.GetFirstRequestMillis(),
- r.end_msec - lens._request_track.GetFirstRequestMillis())
- load_str = lambda i: '%s (%d)' % (i, load_times[i][1])
- for load_idx, request in load_dep:
- print '%s -> %s' % (load_str(load_idx), rq_str(request))
- for request, load_idx in request_dep:
- print '%s -> %s' % (rq_str(request), load_str(load_idx))
diff --git a/loading/graph.py b/loading/graph.py
deleted file mode 100644
index 684fee6..0000000
--- a/loading/graph.py
+++ /dev/null
@@ -1,277 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Support for graphs."""
-
-import collections
-
-import common_util
-
-
-class Node(object):
- """A node in a Graph.
-
- Nodes are identified within a graph using object identity.
- """
- def __init__(self):
- """Create a new node."""
- self.cost = 0
-
- def ToJsonDict(self):
- return common_util.SerializeAttributesToJsonDict({}, self, ['cost'])
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- return common_util.DeserializeAttributesFromJsonDict(
- json_dict, cls(), ['cost'])
-
-
-class Edge(object):
- """Represents an edge in a graph."""
- def __init__(self, from_node, to_node):
- """Creates an Edge.
-
- Args:
- from_node: (Node) Start node.
- to_node: (Node) End node.
- """
- self.from_node = from_node
- self.to_node = to_node
- self.cost = 0
-
- def ToJsonDict(self):
- return common_util.SerializeAttributesToJsonDict(
- {}, self, ['from_node', 'to_node', 'cost'])
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- result = cls(None, None)
- return common_util.DeserializeAttributesFromJsonDict(
- json_dict, result, ['from_node', 'to_node', 'cost'])
-
-
-class DirectedGraph(object):
- """Directed graph.
-
- A graph is identified by a list of nodes and a list of edges. It does not need
- to be acyclic, but then some methods will fail.
- """
- __GRAPH_NODE_INDEX = '__graph_node_index'
- __TO_NODE_INDEX = '__to_node_index'
- __FROM_NODE_INDEX = '__from_node_index'
-
- def __init__(self, nodes, edges):
- """Builds a graph from a set of node and edges.
-
- Note that the edges referencing a node not in the provided list are dropped.
-
- Args:
- nodes: ([Node]) Sequence of Nodes.
- edges: ([Edge]) Sequence of Edges.
- """
- self._nodes = set(nodes)
- self._edges = set(filter(
- lambda e: e.from_node in self._nodes and e.to_node in self._nodes,
- edges))
- assert all(isinstance(node, Node) for node in self._nodes)
- assert all(isinstance(edge, Edge) for edge in self._edges)
- self._in_edges = {n: [] for n in self._nodes}
- self._out_edges = {n: [] for n in self._nodes}
- for edge in self._edges:
- self._out_edges[edge.from_node].append(edge)
- self._in_edges[edge.to_node].append(edge)
-
- def OutEdges(self, node):
- """Returns a list of edges starting from a node.
- """
- return self._out_edges[node]
-
- def InEdges(self, node):
- """Returns a list of edges ending at a node."""
- return self._in_edges[node]
-
- def Nodes(self):
- """Returns the set of nodes of this graph."""
- return self._nodes
-
- def Edges(self):
- """Returns the set of edges of this graph."""
- return self._edges
-
- def RootNodes(self):
- """Returns an iterable of nodes that have no incoming edges."""
- return filter(lambda n: not self.InEdges(n), self._nodes)
-
- def UpdateEdge(self, edge, new_from_node, new_to_node):
- """Updates an edge.
-
- Args:
- edge:
- new_from_node:
- new_to_node:
- """
- assert edge in self._edges
- assert new_from_node in self._nodes
- assert new_to_node in self._nodes
- self._in_edges[edge.to_node].remove(edge)
- self._out_edges[edge.from_node].remove(edge)
- edge.from_node = new_from_node
- edge.to_node = new_to_node
- # TODO(lizeb): Check for duplicate edges?
- self._in_edges[edge.to_node].append(edge)
- self._out_edges[edge.from_node].append(edge)
-
- def TopologicalSort(self, roots=None):
- """Returns a list of nodes, in topological order.
-
- Args:
- roots: ([Node]) If set, the topological sort will only consider nodes
- reachable from this list of sources.
- """
- sorted_nodes = []
- if roots is None:
- nodes_subset = self._nodes
- else:
- nodes_subset = self.ReachableNodes(roots)
- remaining_in_edges = {n: 0 for n in nodes_subset}
- for edge in self._edges:
- if edge.from_node in nodes_subset and edge.to_node in nodes_subset:
- remaining_in_edges[edge.to_node] += 1
- sources = [node for (node, count) in remaining_in_edges.items()
- if count == 0]
- while sources:
- node = sources.pop(0)
- sorted_nodes.append(node)
- for e in self.OutEdges(node):
- successor = e.to_node
- if successor not in nodes_subset:
- continue
- assert remaining_in_edges[successor] > 0
- remaining_in_edges[successor] -= 1
- if remaining_in_edges[successor] == 0:
- sources.append(successor)
- return sorted_nodes
-
- def ReachableNodes(self, roots, should_stop=lambda n: False):
- """Returns a list of nodes from a set of root nodes.
-
- Args:
- roots: ([Node]) List of roots to start from.
- should_stop: (callable) Returns True when a node should stop the
- exploration and be skipped.
- """
- return self._ExploreFrom(
- roots, lambda n: (e.to_node for e in self.OutEdges(n)),
- should_stop=should_stop)
-
- def AncestorNodes(self, descendants):
- """Returns a set of nodes that are ancestors of a set of nodes.
-
- This is not quite the opposite of ReachableNodes, because (in a tree) it
- will not include |descendants|.
-
- Args:
- descendants: ([Node]) List of nodes to start from.
-
- """
- return set(self._ExploreFrom(
- descendants,
- lambda n: (e.from_node for e in self.InEdges(n)))) - set(descendants)
-
- def Cost(self, roots=None, path_list=None, costs_out=None):
- """Compute the cost of the graph.
-
- Args:
- roots: ([Node]) If set, only compute the cost of the paths reachable
- from this list of nodes.
- path_list: if not None, gets a list of nodes in the longest path.
- costs_out: if not None, gets a vector of node costs by node.
-
- Returns:
- Cost of the longest path.
- """
- if not self._nodes:
- return 0
- costs = {n: 0 for n in self._nodes}
- for node in self.TopologicalSort(roots):
- cost = 0
- if self.InEdges(node):
- cost = max([costs[e.from_node] + e.cost for e in self.InEdges(node)])
- costs[node] = cost + node.cost
- max_cost = max(costs.values())
- if costs_out is not None:
- del costs_out[:]
- costs_out.extend(costs)
- if path_list is not None:
- del path_list[:]
- node = (i for i in self._nodes if costs[i] == max_cost).next()
- path_list.append(node)
- while self.InEdges(node):
- predecessors = [e.from_node for e in self.InEdges(node)]
- node = reduce(
- lambda costliest_node, next_node:
- next_node if costs[next_node] > costs[costliest_node]
- else costliest_node, predecessors)
- path_list.insert(0, node)
- return max_cost
-
- def ToJsonDict(self):
- node_dicts = []
- node_to_index = {node: index for (index, node) in enumerate(self._nodes)}
- for (node, index) in node_to_index.items():
- node_dict = node.ToJsonDict()
- assert self.__GRAPH_NODE_INDEX not in node_dict
- node_dict.update({self.__GRAPH_NODE_INDEX: index})
- node_dicts.append(node_dict)
- edge_dicts = []
- for edge in self._edges:
- edge_dict = edge.ToJsonDict()
- assert self.__TO_NODE_INDEX not in edge_dict
- assert self.__FROM_NODE_INDEX not in edge_dict
- edge_dict.update({self.__TO_NODE_INDEX: node_to_index[edge.to_node],
- self.__FROM_NODE_INDEX: node_to_index[edge.from_node]})
- edge_dicts.append(edge_dict)
- return {'nodes': node_dicts, 'edges': edge_dicts}
-
- @classmethod
- def FromJsonDict(cls, json_dict, node_class, edge_class):
- """Returns an instance from a dict.
-
- Note that the classes of the nodes and edges need to be specified here.
- This is done to reduce the likelihood of error.
- """
- index_to_node = {
- node_dict[cls.__GRAPH_NODE_INDEX]: node_class.FromJsonDict(node_dict)
- for node_dict in json_dict['nodes']}
- edges = []
- for edge_dict in json_dict['edges']:
- edge = edge_class.FromJsonDict(edge_dict)
- edge.from_node = index_to_node[edge_dict[cls.__FROM_NODE_INDEX]]
- edge.to_node = index_to_node[edge_dict[cls.__TO_NODE_INDEX]]
- edges.append(edge)
- result = DirectedGraph(index_to_node.values(), edges)
- return result
-
- def _ExploreFrom(self, initial, expand, should_stop=lambda n: False):
- """Explore from a set of nodes.
-
- Args:
- initial: ([Node]) List of nodes to start from.
- expand: (callable) Given a node, return an iterator of nodes to explore
- from that node.
- should_stop: (callable) Returns True when a node should stop the
- exploration and be skipped.
- """
- visited = set()
- fifo = collections.deque([n for n in initial if not should_stop(n)])
- while fifo:
- node = fifo.pop()
- if should_stop(node):
- continue
- visited.add(node)
- for n in expand(node):
- if n not in visited and not should_stop(n):
- visited.add(n)
- fifo.appendleft(n)
- return list(visited)
diff --git a/loading/graph_unittest.py b/loading/graph_unittest.py
deleted file mode 100644
index 28e9e21..0000000
--- a/loading/graph_unittest.py
+++ /dev/null
@@ -1,228 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import operator
-import os
-import sys
-import unittest
-
-import common_util
-import graph
-
-
-class _IndexedNode(graph.Node):
- def __init__(self, index=None):
- super(_IndexedNode, self).__init__()
- self.index = index
-
- def ToJsonDict(self):
- return common_util.SerializeAttributesToJsonDict(
- super(_IndexedNode, self).ToJsonDict(), self, ['index'])
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- result = super(_IndexedNode, cls).FromJsonDict(json_dict)
- return common_util.DeserializeAttributesFromJsonDict(
- json_dict, result, ['index'])
-
-
-class GraphTestCase(unittest.TestCase):
- @classmethod
- def MakeGraph(cls, count, edge_tuples, serialize=False):
- """Makes a graph from a list of edges.
-
- Args:
- count: Number of nodes.
- edge_tuples: (from_index, to_index). Both indices must be in [0, count),
- and uniquely identify a node. Must be sorted
- lexicographically by node indices.
- """
- nodes = [_IndexedNode(i) for i in xrange(count)]
- edges = [graph.Edge(nodes[from_index], nodes[to_index])
- for (from_index, to_index) in edge_tuples]
- g = graph.DirectedGraph(nodes, edges)
- if serialize:
- g = graph.DirectedGraph.FromJsonDict(
- g.ToJsonDict(), _IndexedNode, graph.Edge)
- nodes = sorted(g.Nodes(), key=operator.attrgetter('index'))
- edges = sorted(g.Edges(), key=operator.attrgetter(
- 'from_node.index', 'to_node.index'))
- return (nodes, edges, g)
-
- @classmethod
- def _NodesIndices(cls, g):
- return map(operator.attrgetter('index'), g.Nodes())
-
- def testBuildGraph(self, serialize=False):
- (nodes, edges, g) = self.MakeGraph(
- 7,
- [(0, 1),
- (0, 2),
- (1, 3),
- (3, 4),
- (5, 6)], serialize)
- self.assertListEqual(range(7), sorted(self._NodesIndices(g)))
- self.assertSetEqual(set(edges), set(g.Edges()))
-
- self.assertSetEqual(set([edges[0], edges[1]]), set(g.OutEdges(nodes[0])))
- self.assertFalse(g.InEdges(nodes[0]))
- self.assertSetEqual(set([edges[2]]), set(g.OutEdges(nodes[1])))
- self.assertSetEqual(set([edges[0]]), set(g.InEdges(nodes[1])))
- self.assertFalse(g.OutEdges(nodes[2]))
- self.assertSetEqual(set([edges[1]]), set(g.InEdges(nodes[2])))
- self.assertSetEqual(set([edges[3]]), set(g.OutEdges(nodes[3])))
- self.assertSetEqual(set([edges[2]]), set(g.InEdges(nodes[3])))
- self.assertFalse(g.OutEdges(nodes[4]))
- self.assertSetEqual(set([edges[3]]), set(g.InEdges(nodes[4])))
- self.assertSetEqual(set([edges[4]]), set(g.OutEdges(nodes[5])))
- self.assertFalse(g.InEdges(nodes[5]))
- self.assertFalse(g.OutEdges(nodes[6]))
- self.assertSetEqual(set([edges[4]]), set(g.InEdges(nodes[6])))
-
- def testIgnoresUnknownEdges(self):
- nodes = [_IndexedNode(i) for i in xrange(7)]
- edges = [graph.Edge(nodes[from_index], nodes[to_index])
- for (from_index, to_index) in [
- (0, 1), (0, 2), (1, 3), (3, 4), (5, 6)]]
- edges.append(graph.Edge(nodes[4], _IndexedNode(42)))
- edges.append(graph.Edge(_IndexedNode(42), nodes[5]))
- g = graph.DirectedGraph(nodes, edges)
- self.assertListEqual(range(7), sorted(self._NodesIndices(g)))
- self.assertEqual(5, len(g.Edges()))
-
- def testUpdateEdge(self, serialize=False):
- (nodes, edges, g) = self.MakeGraph(
- 7,
- [(0, 1),
- (0, 2),
- (1, 3),
- (3, 4),
- (5, 6)], serialize)
- edge = edges[1]
- self.assertTrue(edge in g.OutEdges(nodes[0]))
- self.assertTrue(edge in g.InEdges(nodes[2]))
- g.UpdateEdge(edge, nodes[2], nodes[3])
- self.assertFalse(edge in g.OutEdges(nodes[0]))
- self.assertFalse(edge in g.InEdges(nodes[2]))
- self.assertTrue(edge in g.OutEdges(nodes[2]))
- self.assertTrue(edge in g.InEdges(nodes[3]))
-
- def testTopologicalSort(self, serialize=False):
- (_, edges, g) = self.MakeGraph(
- 7,
- [(0, 1),
- (0, 2),
- (1, 3),
- (3, 4),
- (5, 6)], serialize)
- sorted_nodes = g.TopologicalSort()
- node_to_sorted_index = dict(zip(sorted_nodes, xrange(len(sorted_nodes))))
- for e in edges:
- self.assertTrue(
- node_to_sorted_index[e.from_node] < node_to_sorted_index[e.to_node])
-
- def testReachableNodes(self, serialize=False):
- (nodes, _, g) = self.MakeGraph(
- 7,
- [(0, 1),
- (0, 2),
- (1, 3),
- (3, 4),
- (5, 6)], serialize)
- self.assertSetEqual(
- set([0, 1, 2, 3, 4]),
- set(n.index for n in g.ReachableNodes([nodes[0]])))
- self.assertSetEqual(
- set([0, 1, 2, 3, 4]),
- set(n.index for n in g.ReachableNodes([nodes[0], nodes[1]])))
- self.assertSetEqual(
- set([5, 6]),
- set(n.index for n in g.ReachableNodes([nodes[5]])))
- self.assertSetEqual(
- set([6]),
- set(n.index for n in g.ReachableNodes([nodes[6]])))
-
- def testAncestorNodes(self, serialize=False):
- (nodes, _, g) = self.MakeGraph(
- 7,
- [(0, 1),
- (0, 2),
- (1, 3),
- (3, 4),
- (5, 6)], serialize)
- self.assertSetEqual(
- set([0, 1, 3]),
- set(n.index for n in g.AncestorNodes([nodes[4]])))
- self.assertSetEqual(
- set([0, 1]),
- set(n.index for n in g.AncestorNodes([nodes[3]])))
- self.assertSetEqual(
- set([0]),
- set(n.index for n in g.AncestorNodes([nodes[1]])))
- self.assertSetEqual(
- set(),
- set(n.index for n in g.AncestorNodes([nodes[0]])))
- self.assertSetEqual(
- set([0]),
- set(n.index for n in g.AncestorNodes([nodes[2]])))
- self.assertSetEqual(
- set([5]),
- set(n.index for n in g.AncestorNodes([nodes[6]])))
- self.assertSetEqual(
- set(),
- set(n.index for n in g.AncestorNodes([nodes[5]])))
-
- def testCost(self, serialize=False):
- (nodes, edges, g) = self.MakeGraph(
- 7,
- [(0, 1),
- (0, 2),
- (1, 3),
- (3, 4),
- (5, 6)], serialize)
- for (i, node) in enumerate(nodes):
- node.cost = i + 1
- nodes[6].cost = 6
- for edge in edges:
- edge.cost = 1
- self.assertEqual(15, g.Cost())
- path_list = []
- g.Cost(path_list=path_list)
- self.assertListEqual([nodes[i] for i in (0, 1, 3, 4)], path_list)
- nodes[6].cost = 9
- self.assertEqual(16, g.Cost())
- g.Cost(path_list=path_list)
- self.assertListEqual([nodes[i] for i in (5, 6)], path_list)
-
- def testCostWithRoots(self, serialize=False):
- (nodes, edges, g) = self.MakeGraph(
- 7,
- [(0, 1),
- (0, 2),
- (1, 3),
- (3, 4),
- (5, 6)], serialize)
- for (i, node) in enumerate(nodes):
- node.cost = i + 1
- nodes[6].cost = 9
- for edge in edges:
- edge.cost = 1
- path_list = []
- self.assertEqual(16, g.Cost(path_list=path_list))
- self.assertListEqual([nodes[i] for i in (5, 6)], path_list)
- self.assertEqual(15, g.Cost(roots=[nodes[0]], path_list=path_list))
- self.assertListEqual([nodes[i] for i in (0, 1, 3, 4)], path_list)
-
- def testSerialize(self):
- # Re-do tests with a deserialized graph.
- self.testBuildGraph(True)
- self.testUpdateEdge(True)
- self.testTopologicalSort(True)
- self.testReachableNodes(True)
- self.testCost(True)
- self.testCostWithRoots(True)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/loading_graph_view.py b/loading/loading_graph_view.py
deleted file mode 100644
index 67c993c..0000000
--- a/loading/loading_graph_view.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Views a trace as an annotated request dependency graph."""
-
-import dependency_graph
-import request_dependencies_lens
-
-
-class RequestNode(dependency_graph.RequestNode):
- """Represents a request in the graph.
-
- is_ad and is_tracking are set according to the ContentClassificationLens
- passed to LoadingGraphView.
- """
- def __init__(self, request):
- super(RequestNode, self).__init__(request)
- self.is_ad = False
- self.is_tracking = False
-
-
-class Edge(dependency_graph.Edge):
- """Represents a dependency between two nodes.
-
- activity is set according to the ActivityLens passed to LoadingGraphView.
- """
- def __init__(self, from_node, to_node, reason):
- super(Edge, self).__init__(from_node, to_node, reason)
- self.activity = {}
-
-
-class LoadingGraphView(object):
- """Represents a trace as a dependency graph. The graph is annotated using
- optional lenses passed to it.
- """
- def __init__(self, trace, dependencies_lens, content_lens=None,
- frame_lens=None, activity=None):
- """Initalizes a LoadingGraphView instance.
-
- Args:
- trace: (LoadingTrace) a loading trace.
- dependencies_lens: (RequestDependencyLens)
- content_lens: (ContentClassificationLens)
- frame_lens: (FrameLoadLens)
- activity: (ActivityLens)
- """
- self._requests = trace.request_track.GetEvents()
- self._deps_lens = dependencies_lens
- self._content_lens = content_lens
- self._frame_lens = frame_lens
- self._activity_lens = activity
- self._graph = None
- self._BuildGraph()
-
- @classmethod
- def FromTrace(cls, trace):
- """Create a graph from a trace with no additional annotation."""
- return cls(trace, request_dependencies_lens.RequestDependencyLens(trace))
-
- def RemoveAds(self):
- """Updates the graph to remove the Ads.
-
- Nodes that are only reachable through ad nodes are excluded as well.
- """
- roots = self._graph.graph.RootNodes()
- self._requests = [n.request for n in self._graph.graph.ReachableNodes(
- roots, should_stop=lambda n: n.is_ad or n.is_tracking)]
- self._BuildGraph()
-
- def GetInversionsAtTime(self, msec):
- """Return the inversions, if any for an event.
-
- An inversion is when a node is finished before an event, but an ancestor is
- not finished. For example, an image is loaded before a first paint, but the
- HTML which requested the image has not finished loading at the time of the
- paint due to incremental parsing.
-
- Args:
- msec: the time of the event, from the same base as requests.
-
- Returns:
- The inverted Requests, ordered by start time, or None if there is no
- inversion.
- """
- completed_requests = []
- for rq in self._requests:
- if rq.end_msec <= msec:
- completed_requests.append(rq)
- inversions = []
- for rq in self._graph.AncestorRequests(completed_requests):
- if rq.end_msec > msec:
- inversions.append(rq)
- if inversions:
- inversions.sort(key=lambda rq: rq.start_msec)
- return inversions
- return None
-
- @property
- def deps_graph(self):
- return self._graph
-
- def _BuildGraph(self):
- self._graph = dependency_graph.RequestDependencyGraph(
- self._requests, self._deps_lens, RequestNode, Edge)
- self._AnnotateNodes()
- self._AnnotateEdges()
-
- def _AnnotateNodes(self):
- if self._content_lens is None:
- return
- for node in self._graph.graph.Nodes():
- node.is_ad = self._content_lens.IsAdRequest(node.request)
- node.is_tracking = self._content_lens.IsTrackingRequest(node.request)
-
- def _AnnotateEdges(self):
- if self._activity_lens is None:
- return
- for edge in self._graph.graph.Edges():
- dep = (edge.from_node.request, edge.to_node.request, edge.reason)
- activity = self._activity_lens.BreakdownEdgeActivityByInitiator(dep)
- edge.activity = activity
diff --git a/loading/loading_graph_view_unittest.py b/loading/loading_graph_view_unittest.py
deleted file mode 100644
index d134d8e..0000000
--- a/loading/loading_graph_view_unittest.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import unittest
-
-import loading_graph_view
-import request_dependencies_lens
-from request_dependencies_lens_unittest import TestRequests
-
-
-class MockContentClassificationLens(object):
- def __init__(self, ad_request_ids, tracking_request_ids):
- self._ad_requests_ids = ad_request_ids
- self._tracking_request_ids = tracking_request_ids
-
- def IsAdRequest(self, request):
- return request.request_id in self._ad_requests_ids
-
- def IsTrackingRequest(self, request):
- return request.request_id in self._tracking_request_ids
-
-
-class LoadingGraphViewTestCase(unittest.TestCase):
- def setUp(self):
- super(LoadingGraphViewTestCase, self).setUp()
- self.trace = TestRequests.CreateLoadingTrace()
- self.deps_lens = request_dependencies_lens.RequestDependencyLens(self.trace)
-
- def testAnnotateNodesNoLenses(self):
- graph_view = loading_graph_view.LoadingGraphView(self.trace, self.deps_lens)
- for node in graph_view.deps_graph.graph.Nodes():
- self.assertFalse(node.is_ad)
- self.assertFalse(node.is_tracking)
- for edge in graph_view.deps_graph.graph.Edges():
- self.assertFalse(edge.is_timing)
-
- def testAnnotateNodesContentLens(self):
- ad_request_ids = set([TestRequests.JS_REQUEST_UNRELATED_FRAME.request_id])
- tracking_request_ids = set([TestRequests.JS_REQUEST.request_id])
- content_lens = MockContentClassificationLens(
- ad_request_ids, tracking_request_ids)
- graph_view = loading_graph_view.LoadingGraphView(self.trace, self.deps_lens,
- content_lens)
- for node in graph_view.deps_graph.graph.Nodes():
- request_id = node.request.request_id
- self.assertEqual(request_id in ad_request_ids, node.is_ad)
- self.assertEqual(request_id in tracking_request_ids, node.is_tracking)
-
- def testRemoveAds(self):
- ad_request_ids = set([TestRequests.JS_REQUEST_UNRELATED_FRAME.request_id])
- tracking_request_ids = set([TestRequests.JS_REQUEST.request_id])
- content_lens = MockContentClassificationLens(
- ad_request_ids, tracking_request_ids)
- graph_view = loading_graph_view.LoadingGraphView(self.trace, self.deps_lens,
- content_lens)
- graph_view.RemoveAds()
- request_ids = set([n.request.request_id
- for n in graph_view.deps_graph.graph.Nodes()])
- expected_request_ids = set([r.request_id for r in [
- TestRequests.FIRST_REDIRECT_REQUEST,
- TestRequests.SECOND_REDIRECT_REQUEST,
- TestRequests.REDIRECTED_REQUEST,
- TestRequests.REQUEST,
- TestRequests.JS_REQUEST_OTHER_FRAME]])
- self.assertSetEqual(expected_request_ids, request_ids)
-
- def testRemoveAdsPruneGraph(self):
- ad_request_ids = set([TestRequests.SECOND_REDIRECT_REQUEST.request_id])
- tracking_request_ids = set([])
- content_lens = MockContentClassificationLens(
- ad_request_ids, tracking_request_ids)
- graph_view = loading_graph_view.LoadingGraphView(
- self.trace, self.deps_lens, content_lens)
- graph_view.RemoveAds()
- request_ids = set([n.request.request_id
- for n in graph_view.deps_graph.graph.Nodes()])
- expected_request_ids = set(
- [TestRequests.FIRST_REDIRECT_REQUEST.request_id])
- self.assertSetEqual(expected_request_ids, request_ids)
-
- def testEventInversion(self):
- self._UpdateRequestTiming({
- '1234.redirect.1': (0, 0),
- '1234.redirect.2': (0, 0),
- '1234.1': (10, 100),
- '1234.12': (20, 50),
- '1234.42': (40, 70),
- '1234.56': (40, 150)})
- graph_view = loading_graph_view.LoadingGraphView(
- self.trace, self.deps_lens)
- self.assertEqual(None, graph_view.GetInversionsAtTime(40))
- self.assertEqual('1234.1', graph_view.GetInversionsAtTime(60)[0].request_id)
- self.assertEqual('1234.1', graph_view.GetInversionsAtTime(80)[0].request_id)
- self.assertEqual(None, graph_view.GetInversionsAtTime(110))
- self.assertEqual(None, graph_view.GetInversionsAtTime(160))
-
- def _UpdateRequestTiming(self, changes):
- for rq in self.trace.request_track.GetEvents():
- if rq.request_id in changes:
- start_msec, end_msec = changes[rq.request_id]
- rq.timing.request_time = float(start_msec) / 1000
- rq.timing.loading_finished = end_msec - start_msec
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/loading_graph_view_visualization.py b/loading/loading_graph_view_visualization.py
deleted file mode 100644
index 7a89f6b..0000000
--- a/loading/loading_graph_view_visualization.py
+++ /dev/null
@@ -1,198 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Visualize a loading_graph_view.LoadingGraphView.
-
-When executed as a script, takes a loading trace and generates a png of the
-loading graph."""
-
-import activity_lens
-import request_track
-
-
-class LoadingGraphViewVisualization(object):
- """Manipulate visual representations of a request graph.
-
- Currently only DOT output is supported.
- """
- _LONG_EDGE_THRESHOLD_MS = 2000 # Time in milliseconds.
-
- _CONTENT_KIND_TO_COLOR = {
- 'application': 'blue', # Scripts.
- 'font': 'grey70',
- 'image': 'orange', # This probably catches gifs?
- 'video': 'hotpink1',
- 'audio': 'hotpink2',
- }
-
- _CONTENT_TYPE_TO_COLOR = {
- 'html': 'red',
- 'css': 'green',
- 'script': 'blue',
- 'javascript': 'blue',
- 'json': 'purple',
- 'gif': 'grey',
- 'image': 'orange',
- 'jpeg': 'orange',
- 'ping': 'cyan', # Empty response
- 'redirect': 'forestgreen',
- 'png': 'orange',
- 'plain': 'brown3',
- 'octet-stream': 'brown3',
- 'other': 'white',
- }
-
- _EDGE_REASON_TO_COLOR = {
- 'redirect': 'black',
- 'parser': 'red',
- 'script': 'blue',
- 'script_inferred': 'purple',
- }
-
- _ACTIVITY_TYPE_LABEL = (
- ('idle', 'I'), ('unrelated_work', 'W'), ('script', 'S'),
- ('parsing', 'P'), ('other_url', 'O'), ('unknown_url', 'U'))
-
- def __init__(self, graph_view):
- """Initialize.
-
- Args:
- graph_view: (loading_graph_view.LoadingGraphView) the graph to visualize.
- """
- self._graph_view = graph_view
- self._global_start = None
-
- def OutputDot(self, output):
- """Output DOT (graphviz) representation.
-
- Args:
- output: a file-like output stream to receive the dot file.
- """
- nodes = self._graph_view.deps_graph.graph.Nodes()
- self._global_start = min(n.request.start_msec for n in nodes)
- g = self._graph_view.deps_graph.graph
-
- output.write("""digraph dependencies {
- rankdir = LR;
- """)
-
- isolated_nodes = [
- n for n in nodes if (
- len(g.InEdges(n)) == 0 and len(g.OutEdges(n)) == 0)]
- if isolated_nodes:
- output.write("""subgraph cluster_isolated {
- color=black;
- label="Isolated Nodes";
- """)
- for n in isolated_nodes:
- output.write(self._DotNode(n))
- output.write('}\n')
-
- output.write("""subgraph cluster_nodes {
- color=invis;
- """)
- for n in nodes:
- if n in isolated_nodes:
- continue
- output.write(self._DotNode(n))
-
- edges = g.Edges()
- for edge in edges:
- output.write(self._DotEdge(edge))
-
- output.write('}\n')
- output.write('}\n')
-
- def _ContentTypeToColor(self, content_type):
- if not content_type:
- type_str = 'other'
- elif '/' in content_type:
- kind, type_str = content_type.split('/', 1)
- if kind in self._CONTENT_KIND_TO_COLOR:
- return self._CONTENT_KIND_TO_COLOR[kind]
- else:
- type_str = content_type
- return self._CONTENT_TYPE_TO_COLOR[type_str]
-
- def _DotNode(self, node):
- """Returns a graphviz node description for a given node.
-
- Args:
- node: (RequestNode)
-
- Returns:
- A string describing the resource in graphviz format.
- The resource is color-coded according to its content type, and its shape
- is oval if its max-age is less than 300s (or if it's not cacheable).
- """
- color = self._ContentTypeToColor(node.request.GetContentType())
- request = node.request
- max_age = request.MaxAge()
- shape = 'polygon' if max_age > 300 else 'oval'
- styles = ['filled']
- if node.is_ad or node.is_tracking:
- styles += ['bold', 'diagonals']
- return ('"%s" [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; '
- 'fillcolor = %s; shape = %s];\n'
- % (request.request_id, request_track.ShortName(request.url),
- request.start_msec - self._global_start,
- request.end_msec - self._global_start,
- request.end_msec - request.start_msec,
- ','.join(styles), color, shape))
-
- def _DotEdge(self, edge):
- """Returns a graphviz edge description for a given edge.
-
- Args:
- edge: (Edge)
-
- Returns:
- A string encoding the graphviz representation of the edge.
- """
- style = {'color': 'orange'}
- label = '%.02f' % edge.cost
- if edge.is_timing:
- style['style'] = 'dashed'
- style['color'] = self._EDGE_REASON_TO_COLOR[edge.reason]
- if edge.cost > self._LONG_EDGE_THRESHOLD_MS:
- style['penwidth'] = '5'
- style['weight'] = '2'
- style_str = '; '.join('%s=%s' % (k, v) for (k, v) in style.items())
-
- label = '%.02f' % edge.cost
- if edge.activity:
- separator = ' - '
- for activity_type, activity_label in self._ACTIVITY_TYPE_LABEL:
- label += '%s%s:%.02f ' % (
- separator, activity_label, edge.activity[activity_type])
- separator = ' '
- arrow = '[%s; label="%s"]' % (style_str, label)
- from_request_id = edge.from_node.request.request_id
- to_request_id = edge.to_node.request.request_id
- return '"%s" -> "%s" %s;\n' % (from_request_id, to_request_id, arrow)
-
-def main(trace_file):
- import subprocess
-
- import loading_graph_view
- import loading_trace
- import request_dependencies_lens
-
- trace = loading_trace.LoadingTrace.FromJsonFile(trace_file)
- dependencies_lens = request_dependencies_lens.RequestDependencyLens(trace)
- activity = activity_lens.ActivityLens(trace)
- graph_view = loading_graph_view.LoadingGraphView(trace, dependencies_lens,
- activity=activity)
- visualization = LoadingGraphViewVisualization(graph_view)
-
- dotfile = trace_file + '.dot'
- pngfile = trace_file + '.png'
- with file(dotfile, 'w') as output:
- visualization.OutputDot(output)
- subprocess.check_call(['dot', '-Tpng', dotfile, '-o', pngfile])
-
-
-if __name__ == '__main__':
- import sys
- main(sys.argv[1])
diff --git a/loading/loading_trace.py b/loading/loading_trace.py
deleted file mode 100644
index cad3f30..0000000
--- a/loading/loading_trace.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Copyright (c) 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Represents the trace of a page load."""
-
-import datetime
-try:
- import ujson as json
-except ImportError:
- import json
-import time
-
-import devtools_monitor
-import page_track
-import request_track
-import tracing_track
-
-
-class LoadingTrace(object):
- """Represents the trace of a page load."""
- _URL_KEY = 'url'
- _METADATA_KEY = 'metadata'
- _PAGE_KEY = 'page_track'
- _REQUEST_KEY = 'request_track'
- _TRACING_KEY = 'tracing_track'
-
- def __init__(self, url, metadata, page, request, track):
- """Initializes a loading trace instance.
-
- Args:
- url: (str) URL that has been loaded
- metadata: (dict) Metadata associated with the load.
- page: (PageTrack) instance of PageTrack.
- request: (RequestTrack) instance of RequestTrack.
- track: (TracingTrack) instance of TracingTrack.
- """
- self.url = url
- self.metadata = metadata
- self.page_track = page
- self.request_track = request
- self._tracing_track = track
- self._tracing_json_str = None
-
- def ToJsonDict(self):
- """Returns a dictionary representing this instance."""
- result = {self._URL_KEY: self.url, self._METADATA_KEY: self.metadata,
- self._PAGE_KEY: self.page_track.ToJsonDict(),
- self._REQUEST_KEY: self.request_track.ToJsonDict(),
- self._TRACING_KEY: (self.tracing_track.ToJsonDict()
- if self.tracing_track else None)}
- return result
-
- def ToJsonFile(self, json_path):
- """Save a json file representing this instance."""
- json_dict = self.ToJsonDict()
- with open(json_path, 'w') as output_file:
- json.dump(json_dict, output_file)
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- """Returns an instance from a dictionary returned by ToJsonDict()."""
- keys = (cls._URL_KEY, cls._METADATA_KEY, cls._PAGE_KEY, cls._REQUEST_KEY,
- cls._TRACING_KEY)
- assert all(key in json_dict for key in keys)
- page = page_track.PageTrack.FromJsonDict(json_dict[cls._PAGE_KEY])
- request = request_track.RequestTrack.FromJsonDict(
- json_dict[cls._REQUEST_KEY])
- track = tracing_track.TracingTrack.FromJsonDict(
- json_dict[cls._TRACING_KEY])
- return LoadingTrace(json_dict[cls._URL_KEY], json_dict[cls._METADATA_KEY],
- page, request, track)
-
- @classmethod
- def FromJsonFile(cls, json_path):
- """Returns an instance from a json file saved by ToJsonFile()."""
- with open(json_path) as input_file:
- return cls.FromJsonDict(json.load(input_file))
-
- @classmethod
- def RecordUrlNavigation(
- cls, url, connection, chrome_metadata, categories,
- timeout_seconds=devtools_monitor.DEFAULT_TIMEOUT_SECONDS,
- stop_delay_multiplier=0):
- """Create a loading trace by using controller to fetch url.
-
- Args:
- url: (str) url to fetch.
- connection: An opened devtools connection.
- chrome_metadata: Dictionary of chrome metadata.
- categories: as in tracing_track.TracingTrack
- timeout_seconds: monitoring connection timeout in seconds.
- stop_delay_multiplier: How long to wait after page load completed before
- tearing down, relative to the time it took to reach the page load to
- complete.
-
- Returns:
- LoadingTrace instance.
- """
- page = page_track.PageTrack(connection)
- request = request_track.RequestTrack(connection)
- trace = tracing_track.TracingTrack(connection, categories)
- start_date_str = datetime.datetime.utcnow().isoformat()
- seconds_since_epoch=time.time()
- connection.MonitorUrl(url,
- timeout_seconds=timeout_seconds,
- stop_delay_multiplier=stop_delay_multiplier)
- trace = cls(url, chrome_metadata, page, request, trace)
- trace.metadata.update(date=start_date_str,
- seconds_since_epoch=seconds_since_epoch)
- return trace
-
- @property
- def tracing_track(self):
- if not self._tracing_track:
- self._RestoreTracingTrack()
- return self._tracing_track
-
- def Slim(self):
- """Slims the memory usage of a trace by dropping the TraceEvents from it.
-
- The tracing track is restored on-demand when accessed.
- """
- self._tracing_json_str = json.dumps(self._tracing_track.ToJsonDict())
- self._tracing_track = None
-
- def _RestoreTracingTrack(self):
- if not self._tracing_json_str:
- return None
- self._tracing_track = tracing_track.TracingTrack.FromJsonDict(
- json.loads(self._tracing_json_str))
- self._tracing_json_str = None
diff --git a/loading/loading_trace_analyzer.py b/loading/loading_trace_analyzer.py
deleted file mode 100755
index 3c7882d..0000000
--- a/loading/loading_trace_analyzer.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#! /usr/bin/env python
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import argparse
-import json
-import re
-import sys
-
-from loading_trace import LoadingTrace
-import request_track
-
-
-def _ArgumentParser():
- """Builds a command line argument's parser.
- """
- parser = argparse.ArgumentParser()
- subparsers = parser.add_subparsers(dest='subcommand', help='subcommand line')
-
- # requests listing subcommand.
- requests_parser = subparsers.add_parser('requests',
- help='Lists all request from the loading trace.')
- requests_parser.add_argument('loading_trace', type=str,
- help='Input loading trace to see the cache usage from.')
- requests_parser.add_argument('--output',
- type=argparse.FileType('w'),
- default=sys.stdout,
- help='Output destination path if different from stdout.')
- requests_parser.add_argument('--output-format', type=str, default='{url}',
- help='Output line format (Default to "{url}")')
- requests_parser.add_argument('--where',
- dest='where_statement', type=str,
- nargs=2, metavar=('FORMAT', 'REGEX'), default=[],
- help='Where statement to filter such as: --where "{protocol}" "https?"')
-
- # requests listing subcommand.
- prune_parser = subparsers.add_parser('prune',
- help='Prunes some stuff from traces to make them small.')
- prune_parser.add_argument('loading_trace', type=file,
- help='Input path of the loading trace.')
- prune_parser.add_argument('-t', '--trace-filters',
- type=str, nargs='+', metavar='REGEX', default=[],
- help='Regex filters to whitelist trace events.')
- prune_parser.add_argument('-r', '--request-member-filter',
- type=str, nargs='+', metavar='REGEX', default=[],
- help='Regex filters to whitelist requests\' members.')
- prune_parser.add_argument('-i', '--indent', type=int, default=2,
- help='Number of space to indent the output.')
- prune_parser.add_argument('-o', '--output',
- type=argparse.FileType('w'), default=sys.stdout,
- help='Output destination path if different from stdout.')
- return parser
-
-
-def ListRequests(loading_trace_path,
- output_format='{url}',
- where_format='{url}',
- where_statement=None):
- """`loading_trace_analyzer.py requests` Command line tool entry point.
-
- Args:
- loading_trace_path: Path of the loading trace.
- output_format: Output format of the generated strings.
- where_format: String formated to be regex tested with <where_statement>
- where_statement: Regex for selecting request event.
-
- Yields:
- Formated string of the selected request event.
-
- Example:
- Lists all request with timing:
- ... requests --output-format "{timing} {url}"
-
- Lists HTTP/HTTPS requests that have used the cache:
- ... requests --where "{protocol} {from_disk_cache}" "https?\S* True"
- """
- if where_statement:
- where_statement = re.compile(where_statement)
- loading_trace = LoadingTrace.FromJsonFile(loading_trace_path)
- for request_event in loading_trace.request_track.GetEvents():
- request_event_json = request_event.ToJsonDict()
- if where_statement != None:
- where_in = where_format.format(**request_event_json)
- if not where_statement.match(where_in):
- continue
- yield output_format.format(**request_event_json)
-
-
-def _PruneMain(args):
- """`loading_trace_analyzer.py requests` Command line tool entry point.
-
- Args:
- args: Command line parsed arguments.
-
- Example:
- Keep only blink.net trace event category:
- ... prune -t "blink.net"
-
- Keep only requestStart trace events:
- ... prune -t "requestStart"
-
- Keep only requestStart trace events of the blink.user_timing category:
- ... prune -t "blink.user_timing:requestStart"
-
- Keep only all blink trace event categories:
- ... prune -t "^blink\.*"
-
- Keep only requests' url member:
- ... prune -r "^url$"
-
- Keep only requests' url and document_url members:
- ... prune -r "^./url$"
-
- Keep only requests' url, document_url and initiator members:
- ... prune -r "^./url$" "initiator"
- """
- trace_json = json.load(args.loading_trace)
-
- # Filter trace events.
- regexes = [re.compile(f) for f in args.trace_filters]
- events = []
- for event in trace_json['tracing_track']['events']:
- prune = True
- for cat in event['cat'].split(','):
- event_name = cat + ':' + event['name']
- for regex in regexes:
- if regex.search(event_name):
- prune = False
- break
- if not prune:
- events.append(event)
- break
- trace_json['tracing_track']['events'] = events
-
- # Filter members of requests.
- regexes = [re.compile(f) for f in args.request_member_filter]
- for request in trace_json['request_track']['events']:
- for key in request.keys():
- prune = True
- for regex in regexes:
- if regex.search(key):
- prune = False
- break
- if prune:
- del request[key]
-
- json.dump(trace_json, args.output, indent=args.indent)
- return 0
-
-
-def main(command_line_args):
- """Command line tool entry point."""
- args = _ArgumentParser().parse_args(command_line_args)
- if args.subcommand == 'requests':
- try:
- where_format = None
- where_statement = None
- if args.where_statement:
- where_format = args.where_statement[0]
- where_statement = args.where_statement[1]
- for output_line in ListRequests(loading_trace_path=args.loading_trace,
- output_format=args.output_format,
- where_format=where_format,
- where_statement=where_statement):
- args.output.write(output_line + '\n')
- return 0
- except re.error as e:
- sys.stderr.write("Invalid where statement REGEX: {}\n{}\n".format(
- where_statement[1], str(e)))
- return 1
- elif args.subcommand == 'prune':
- return _PruneMain(args)
- assert False
-
-
-if __name__ == '__main__':
- sys.exit(main(sys.argv[1:]))
diff --git a/loading/metrics.py b/loading/metrics.py
deleted file mode 100644
index 3a82d1e..0000000
--- a/loading/metrics.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Descriptive metrics for Clovis.
-
-When executed as a script, prints the amount of data attributed to Ads, and
-shows a graph of the amount of data to download for a new visit to the same
-page, with a given time interval.
-"""
-
-import collections
-import urlparse
-
-import content_classification_lens
-from request_track import CachingPolicy
-
-HTTP_OK_LENGTH = len("HTTP/1.1 200 OK\r\n")
-
-def _RequestTransferSize(request):
- def HeadersSize(headers):
- # 4: ':', ' ', '\r', '\n'
- return sum(len(k) + len(v) + 4 for (k, v) in headers.items())
- if request.protocol == 'data':
- return {'get': 0, 'request_headers': 0, 'response_headers': 0, 'body': 0}
- return {'get': len('GET ') + len(request.url) + 2,
- 'request_headers': HeadersSize(request.request_headers or {}),
- 'response_headers': HeadersSize(request.response_headers or {}),
- 'body': request.encoded_data_length}
-
-
-def TransferSize(requests):
- """Returns the total transfer size (uploaded, downloaded) of requests.
-
- This is an estimate as we assume:
- - 200s (for the size computation)
- - GET only.
-
- Args:
- requests: ([Request]) List of requests.
-
- Returns:
- (uploaded_bytes (int), downloaded_bytes (int))
- """
- uploaded_bytes = 0
- downloaded_bytes = 0
- for request in requests:
- request_bytes = _RequestTransferSize(request)
- uploaded_bytes += request_bytes['get'] + request_bytes['request_headers']
- downloaded_bytes += (HTTP_OK_LENGTH
- + request_bytes['response_headers']
- + request_bytes['body'])
- return (uploaded_bytes, downloaded_bytes)
-
-
-def TotalTransferSize(trace):
- """Returns the total transfer size (uploaded, downloaded) from a trace."""
- return TransferSize(trace.request_track.GetEvents())
-
-
-def TransferredDataRevisit(trace, after_time_s, assume_validation_ok=False):
- """Returns the amount of data transferred for a revisit.
-
- Args:
- trace: (LoadingTrace) loading trace.
- after_time_s: (float) Time in s after which the site is revisited.
- assume_validation_ok: (bool) Assumes that the resources to validate return
- 304s.
-
- Returns:
- (uploaded_bytes, downloaded_bytes)
- """
- uploaded_bytes = 0
- downloaded_bytes = 0
- for request in trace.request_track.GetEvents():
- caching_policy = CachingPolicy(request)
- policy = caching_policy.PolicyAtDate(request.wall_time + after_time_s)
- request_bytes = _RequestTransferSize(request)
- if policy == CachingPolicy.VALIDATION_NONE:
- continue
- uploaded_bytes += request_bytes['get'] + request_bytes['request_headers']
- if (policy in (CachingPolicy.VALIDATION_SYNC,
- CachingPolicy.VALIDATION_ASYNC)
- and caching_policy.HasValidators() and assume_validation_ok):
- downloaded_bytes += len('HTTP/1.1 304 NOT MODIFIED\r\n')
- continue
- downloaded_bytes += (HTTP_OK_LENGTH
- + request_bytes['response_headers']
- + request_bytes['body'])
- return (uploaded_bytes, downloaded_bytes)
-
-
-def AdsAndTrackingTransferSize(trace, ad_rules_filename,
- tracking_rules_filename):
- """Returns the transfer size attributed to ads and tracking.
-
- Args:
- trace: (LoadingTrace) a loading trace.
- ad_rules_filename: (str) Path to an ad rules file.
- tracking_rules_filename: (str) Path to a tracking rules file.
-
- Returns:
- (uploaded_bytes (int), downloaded_bytes (int))
- """
- content_lens = (
- content_classification_lens.ContentClassificationLens.WithRulesFiles(
- trace, ad_rules_filename, tracking_rules_filename))
- requests = content_lens.AdAndTrackingRequests()
- return TransferSize(requests)
-
-
-def DnsRequestsAndCost(trace):
- """Returns the number and cost of DNS requests for a trace."""
- requests = trace.request_track.GetEvents()
- requests_with_dns = [r for r in requests if r.timing.dns_start != -1]
- dns_requests_count = len(requests_with_dns)
- dns_cost = sum(r.timing.dns_end - r.timing.dns_start
- for r in requests_with_dns)
- return (dns_requests_count, dns_cost)
-
-
-def ConnectionMetrics(trace):
- """Returns the connection metrics for a given trace.
-
- Returns:
- {
- 'connections': int,
- 'connection_cost_ms': float,
- 'ssl_connections': int,
- 'ssl_cost_ms': float,
- 'http11_requests': int,
- 'h2_requests': int,
- 'data_requests': int,
- 'domains': int
- }
- """
- requests = trace.request_track.GetEvents()
- requests_with_connect = [r for r in requests if r.timing.connect_start != -1]
- requests_with_connect_count = len(requests_with_connect)
- connection_cost = sum(r.timing.connect_end - r.timing.connect_start
- for r in requests_with_connect)
- ssl_requests = [r for r in requests if r.timing.ssl_start != -1]
- ssl_requests_count = len(ssl_requests)
- ssl_cost = sum(r.timing.ssl_end - r.timing.ssl_start for r in ssl_requests)
- requests_per_protocol = collections.defaultdict(int)
- for r in requests:
- requests_per_protocol[r.protocol] += 1
-
- domains = set()
- for r in requests:
- if r.protocol == 'data':
- continue
- domain = urlparse.urlparse(r.url).hostname
- domains.add(domain)
-
- return {
- 'connections': requests_with_connect_count,
- 'connection_cost_ms': connection_cost,
- 'ssl_connections': ssl_requests_count,
- 'ssl_cost_ms': ssl_cost,
- 'http11_requests': requests_per_protocol['http/1.1'],
- 'h2_requests': requests_per_protocol['h2'],
- 'data_requests': requests_per_protocol['data'],
- 'domains': len(domains)
- }
-
-
-def PlotTransferSizeVsTimeBetweenVisits(trace):
- times = [10, 60, 300, 600, 3600, 4 * 3600, 12 * 3600, 24 * 3600]
- labels = ['10s', '1m', '10m', '1h', '4h', '12h', '1d']
- (_, total_downloaded) = TotalTransferSize(trace)
- downloaded = [TransferredDataRevisit(trace, delta_t)[1] for delta_t in times]
- plt.figure()
- plt.title('Amount of data to download for a revisit - %s' % trace.url)
- plt.xlabel('Time between visits (log)')
- plt.ylabel('Amount of data (bytes)')
- plt.plot(times, downloaded, 'k+--')
- plt.axhline(total_downloaded, color='k', linewidth=2)
- plt.xscale('log')
- plt.xticks(times, labels)
- plt.show()
-
-
-def main(trace_filename, ad_rules_filename, tracking_rules_filename):
- trace = loading_trace.LoadingTrace.FromJsonFile(trace_filename)
- (_, ads_downloaded_bytes) = AdsAndTrackingTransferSize(
- trace, ad_rules_filename, tracking_rules_filename)
- (_, total_downloaded_bytes) = TotalTransferSize(trace)
- print '%e bytes linked to Ads/Tracking (%.02f%%)' % (
- ads_downloaded_bytes,
- (100. * ads_downloaded_bytes) / total_downloaded_bytes)
- PlotTransferSizeVsTimeBetweenVisits(trace)
-
-
-if __name__ == '__main__':
- import sys
- from matplotlib import pylab as plt
- import loading_trace
- if len(sys.argv) != 4:
- print (
- 'Usage: %s trace_filename ad_rules_filename tracking_rules_filename'
- % sys.argv[0])
- sys.exit(0)
- main(*sys.argv[1:])
diff --git a/loading/metrics_unittest.py b/loading/metrics_unittest.py
deleted file mode 100644
index b10a529..0000000
--- a/loading/metrics_unittest.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import copy
-import unittest
-
-import metrics
-import request_track
-import test_utils
-
-
-class MetricsTestCase(unittest.TestCase):
- _BODY_SIZE = 14187
- _URL = 'http://www.example.com/'
- _REQUEST_HEADERS_SIZE = (len(_URL) + len('GET ') + 2
- + len('Accept: Everything\r\n'))
- _RESPONSE_HEADERS_SIZE = 124
- _REQUEST = {
- 'encoded_data_length': _BODY_SIZE,
- 'request_id': '2291.1',
- 'request_headers': {
- 'Accept': 'Everything',
- },
- 'response_headers': {
- 'Age': '866',
- 'Content-Length': str(_BODY_SIZE),
- 'Etag': 'ABCD',
- 'Date': 'Fri, 22 Apr 2016 08:56:19 -0200',
- 'Vary': 'Accept-Encoding',
- },
- 'timestamp': 5535648.730768,
- 'timing': {
- 'receive_headers_end': 47.0650000497699,
- 'request_time': 5535648.73264,
- },
- 'url': _URL,
- 'status': 200,
- 'wall_time': 1461322579.59422}
-
- def testTransferredDataRevisitNoCache(self):
- trace = self._MakeTrace()
- (uploaded, downloaded) = metrics.TransferredDataRevisit(trace, 10)
- self.assertEqual(self._REQUEST_HEADERS_SIZE, uploaded)
- self.assertEqual(self._BODY_SIZE + self._RESPONSE_HEADERS_SIZE, downloaded)
-
- def testTransferredDataRevisitNoCacheAssumeValidates(self):
- trace = self._MakeTrace()
- (uploaded, downloaded) = metrics.TransferredDataRevisit(trace, 10, True)
- self.assertEqual(self._REQUEST_HEADERS_SIZE, uploaded)
- not_modified_length = len('HTTP/1.1 304 NOT MODIFIED\r\n')
- self.assertEqual(not_modified_length, downloaded)
-
- def testTransferredDataRevisitCacheable(self):
- trace = self._MakeTrace()
- r = trace.request_track.GetEvents()[0]
- r.response_headers['Cache-Control'] = 'max-age=1000'
- (uploaded, downloaded) = metrics.TransferredDataRevisit(trace, 10)
- self.assertEqual(0, uploaded)
- self.assertEqual(0, downloaded)
- (uploaded, downloaded) = metrics.TransferredDataRevisit(trace, 1000)
- self.assertEqual(self._REQUEST_HEADERS_SIZE, uploaded)
- cache_control_length = len('Cache-Control: max-age=1000\r\n')
- self.assertEqual(
- self._BODY_SIZE + self._RESPONSE_HEADERS_SIZE + cache_control_length,
- downloaded)
-
- def testTransferSize(self):
- trace = self._MakeTrace()
- r = trace.request_track.GetEvents()[0]
- (_, downloaded) = metrics.TransferSize([r])
- self.assertEqual(self._BODY_SIZE + self._RESPONSE_HEADERS_SIZE,
- downloaded)
-
- def testDnsRequestsAndCost(self):
- trace = self._MakeTrace()
- (count, cost) = metrics.DnsRequestsAndCost(trace)
- self.assertEqual(0, count)
- self.assertEqual(0, cost)
- r = trace.request_track.GetEvents()[0]
- r.timing.dns_end = 12
- r.timing.dns_start = 4
- (count, cost) = metrics.DnsRequestsAndCost(trace)
- self.assertEqual(1, count)
- self.assertEqual(8, cost)
-
- def testConnectionMetrics(self):
- requests = [request_track.Request.FromJsonDict(copy.deepcopy(self._REQUEST))
- for _ in xrange(3)]
- requests[0].url = 'http://chromium.org/'
- requests[0].protocol = 'http/1.1'
- requests[0].timing.connect_start = 12
- requests[0].timing.connect_end = 42
- requests[0].timing.ssl_start = 50
- requests[0].timing.ssl_end = 70
- requests[1].url = 'https://chromium.org/where-am-i/'
- requests[1].protocol = 'h2'
- requests[1].timing.connect_start = 22
- requests[1].timing.connect_end = 73
- requests[2].url = 'http://www.chromium.org/here/'
- requests[2].protocol = 'http/42'
- trace = test_utils.LoadingTraceFromEvents(requests)
- stats = metrics.ConnectionMetrics(trace)
- self.assertEqual(2, stats['connections'])
- self.assertEqual(81, stats['connection_cost_ms'])
- self.assertEqual(1, stats['ssl_connections'])
- self.assertEqual(20, stats['ssl_cost_ms'])
- self.assertEqual(1, stats['http11_requests'])
- self.assertEqual(1, stats['h2_requests'])
- self.assertEqual(0, stats['data_requests'])
- self.assertEqual(2, stats['domains'])
-
- @classmethod
- def _MakeTrace(cls):
- request = request_track.Request.FromJsonDict(copy.deepcopy(cls._REQUEST))
- return test_utils.LoadingTraceFromEvents([request])
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/network_activity_lens.py b/loading/network_activity_lens.py
deleted file mode 100644
index 7e087e4..0000000
--- a/loading/network_activity_lens.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Gives a picture of the network activity between timestamps."""
-
-import bisect
-import collections
-import itertools
-import operator
-
-
-class NetworkActivityLens(object):
- """Reconstructs the network activity during a trace.
-
- The {uploaded,downloaded}_bytes_timeline timelines are:
- ([timestamp_msec], [value_at_timestamp]). Bytes are counted when a
- network event completes.
-
- The rate timelines are:
- ([timestamp_msec], [rate]), where the rate is computed over the time
- period ending at timestamp_msec.
-
- For all the timelines, the list of timestamps are identical.
- """
- def __init__(self, trace):
- """Initializes a NetworkActivityLens instance.
-
- Args:
- trace: (LoadingTrace)
- """
- self._trace = trace
- self._start_end_times = []
- self._active_events_list = []
- self._uploaded_bytes_timeline = []
- self._downloaded_bytes_timeline = []
- self._upload_rate_timeline = []
- self._download_rate_timeline = []
- self._total_downloaded_bytes = 0
- requests = trace.request_track.GetEvents()
- self._network_events = list(itertools.chain.from_iterable(
- NetworkEvent.EventsFromRequest(request) for request in requests))
- self._IndexEvents()
- self._CreateTimelines()
-
- @property
- def uploaded_bytes_timeline(self): # (timestamps, data)
- return (self._start_end_times, self._uploaded_bytes_timeline)
-
- @property
- def downloaded_bytes_timeline(self):
- return (self._start_end_times, self._downloaded_bytes_timeline)
-
- @property
- def upload_rate_timeline(self):
- return (self._start_end_times, self._upload_rate_timeline)
-
- @property
- def download_rate_timeline(self):
- return (self._start_end_times, self._download_rate_timeline)
-
- @property
- def total_download_bytes(self):
- return self._total_downloaded_bytes
-
- def DownloadedBytesAt(self, time_msec):
- """Return the the downloaded bytes at a given timestamp.
-
- Args:
- time_msec: a timestamp, in the same scale as the timelines.
-
- Returns:
- The total bytes downloaded up until the time period ending at time_msec.
- """
- # We just do a linear cumulative sum. Currently this is only called a couple
- # of times, so making an indexed cumulative sum does not seem to be worth
- # the bother.
- total_bytes = 0
- previous_msec = self.downloaded_bytes_timeline[0][0]
- for msec, nbytes in zip(*self.downloaded_bytes_timeline):
- if msec < time_msec:
- total_bytes += nbytes
- previous_msec = msec
- else:
- if time_msec > previous_msec:
- fraction_of_chunk = ((time_msec - previous_msec)
- / (msec - previous_msec))
- total_bytes += float(nbytes) * fraction_of_chunk
- break
- return total_bytes
-
- def _IndexEvents(self):
- start_end_times_set = set()
- for event in self._network_events:
- start_end_times_set.add(event.start_msec)
- start_end_times_set.add(event.end_msec)
- self._start_end_times = sorted(list(start_end_times_set))
- self._active_events_list = [[] for _ in self._start_end_times]
- for event in self._network_events:
- start_index = bisect.bisect_right(
- self._start_end_times, event.start_msec) - 1
- end_index = bisect.bisect_right(
- self._start_end_times, event.end_msec)
- for index in range(start_index, end_index):
- self._active_events_list[index].append(event)
-
- def _CreateTimelines(self):
- for (index, timestamp) in enumerate(self._start_end_times):
- upload_rate = sum(
- e.UploadRate() for e in self._active_events_list[index]
- if timestamp != e.end_msec)
- download_rate = sum(
- e.DownloadRate() for e in self._active_events_list[index]
- if timestamp != e.end_msec)
- uploaded_bytes = sum(
- e.UploadedBytes() for e in self._active_events_list[index]
- if timestamp == e.end_msec)
- downloaded_bytes = sum(
- e.DownloadedBytes() for e in self._active_events_list[index]
- if timestamp == e.end_msec)
- self._total_downloaded_bytes += downloaded_bytes
- self._uploaded_bytes_timeline.append(uploaded_bytes)
- self._downloaded_bytes_timeline.append(downloaded_bytes)
- self._upload_rate_timeline.append(upload_rate)
- self._download_rate_timeline.append(download_rate)
-
-
-class NetworkEvent(object):
- """Represents a network event."""
- KINDS = set(
- ('dns', 'connect', 'send', 'receive_headers', 'receive_body'))
- def __init__(self, request, kind, start_msec, end_msec, chunk_index=None):
- """Creates a NetworkEvent."""
- self._request = request
- self._kind = kind
- self.start_msec = start_msec
- self.end_msec = end_msec
- self._chunk_index = chunk_index
-
- @classmethod
- def _GetStartEndOffsetsMsec(cls, request, kind, index=None):
- start_offset, end_offset = (0, 0)
- r = request
- if kind == 'dns':
- start_offset = r.timing.dns_start
- end_offset = r.timing.dns_end
- elif kind == 'connect':
- start_offset = r.timing.connect_start
- end_offset = r.timing.connect_end
- elif kind == 'send':
- start_offset = r.timing.send_start
- end_offset = r.timing.send_end
- elif kind == 'receive_headers': # There is no responseReceived timing.
- start_offset = r.timing.send_end
- end_offset = r.timing.receive_headers_end
- elif kind == 'receive_body':
- if index is None:
- start_offset = r.timing.receive_headers_end
- end_offset = r.timing.loading_finished
- else:
- # Some chunks can correspond to no data.
- i = index - 1
- while i >= 0:
- (offset, size) = r.data_chunks[i]
- if size != 0:
- previous_chunk_start = offset
- break
- i -= 1
- else:
- previous_chunk_start = r.timing.receive_headers_end
- start_offset = previous_chunk_start
- end_offset = r.data_chunks[index][0]
- return (start_offset, end_offset)
-
- @classmethod
- def EventsFromRequest(cls, request):
- # TODO(lizeb): This ignore forced revalidations.
- if (request.from_disk_cache or request.served_from_cache
- or request.IsDataRequest()):
- return []
- events = []
- for kind in cls.KINDS - set(['receive_body']):
- event = cls._EventWithKindFromRequest(request, kind)
- if event:
- events.append(event)
- kind = 'receive_body'
- if request.data_chunks:
- for (index, chunk) in enumerate(request.data_chunks):
- if chunk[0] != 0:
- event = cls._EventWithKindFromRequest(request, kind, index)
- if event:
- events.append(event)
- else:
- event = cls._EventWithKindFromRequest(request, kind, None)
- if event:
- events.append(event)
- return events
-
- @classmethod
- def _EventWithKindFromRequest(cls, request, kind, index=None):
- (start_offset, end_offset) = cls._GetStartEndOffsetsMsec(
- request, kind, index)
- event = cls(request, kind, request.start_msec + start_offset,
- request.start_msec + end_offset, index)
- if start_offset == -1 or end_offset == -1:
- return None
- return event
-
- def UploadedBytes(self):
- """Returns the number of bytes uploaded during this event."""
- if self._kind not in ('send'):
- return 0
- # Headers are not compressed (ignoring SPDY / HTTP/2)
- if not self._request.request_headers:
- return 0
- return sum(len(k) + len(str(v)) for (k, v)
- in self._request.request_headers.items())
-
- def DownloadedBytes(self):
- """Returns the number of bytes downloaded during this event."""
- if self._kind not in ('receive_headers', 'receive_body'):
- return 0
- if self._kind == 'receive_headers':
- return sum(len(k) + len(str(v)) for (k, v)
- in self._request.response_headers.items())
- else:
- if self._chunk_index is None:
- return self._request.encoded_data_length
- else:
- return self._request.data_chunks[self._chunk_index][1]
-
- def UploadRate(self):
- """Returns the upload rate of this event in Bytes / s."""
- return 1000 * self.UploadedBytes() / float(self.end_msec - self.start_msec)
-
- def DownloadRate(self):
- """Returns the download rate of this event in Bytes / s."""
- downloaded_bytes = self.DownloadedBytes()
- value = 1000 * downloaded_bytes / float(self.end_msec - self.start_msec)
- return value
diff --git a/loading/network_activity_lens_unittest.py b/loading/network_activity_lens_unittest.py
deleted file mode 100644
index 7b2031e..0000000
--- a/loading/network_activity_lens_unittest.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import copy
-import unittest
-
-from network_activity_lens import NetworkActivityLens
-import test_utils
-
-
-class NetworkActivityLensTestCase(unittest.TestCase):
- def testTimeline(self):
- timing_dict = {
- 'requestTime': 1.2,
- 'dnsStart': 20, 'dnsEnd': 30,
- 'connectStart': 50, 'connectEnd': 60,
- 'sendStart': 70, 'sendEnd': 80,
- 'receiveHeadersEnd': 90,
- 'loadingFinished': 100}
- request = test_utils.MakeRequestWithTiming(1, 2, timing_dict)
- lens = self._NetworkActivityLens([request])
- start_end_times = lens.uploaded_bytes_timeline[0]
- expected_start_times = [
- 1220., 1230., 1250., 1260., 1270., 1280., 1290., 1300.]
- self.assertListEqual(expected_start_times, start_end_times)
- timing_dict = copy.copy(timing_dict)
- timing_dict['requestTime'] += .005
- second_request = test_utils.MakeRequestWithTiming(1, 2, timing_dict)
- lens = self._NetworkActivityLens([request, second_request])
- start_end_times = lens.uploaded_bytes_timeline[0]
- expected_start_times = sorted(
- expected_start_times + [x + 5. for x in expected_start_times])
- for (expected, actual) in zip(expected_start_times, start_end_times):
- self.assertAlmostEquals(expected, actual)
-
- def testTransferredBytes(self):
- timing_dict = {
- 'requestTime': 1.2,
- 'dnsStart': 20, 'dnsEnd': 30,
- 'connectStart': 50, 'connectEnd': 60,
- 'sendStart': 70, 'sendEnd': 80,
- 'receiveHeadersEnd': 90,
- 'loadingFinished': 100}
- request = test_utils.MakeRequestWithTiming(1, 2, timing_dict)
- request.request_headers = {'a': 'b'}
- request.response_headers = {'c': 'def'}
- lens = self._NetworkActivityLens([request])
- # Upload
- upload_timeline = lens.uploaded_bytes_timeline
- self.assertEquals(1270, upload_timeline[0][4])
- self.assertEquals(1280, upload_timeline[0][5])
- self.assertEquals(0, upload_timeline[1][4])
- self.assertEquals(2, upload_timeline[1][5])
- self.assertEquals(0, upload_timeline[1][6])
- upload_rate = lens.upload_rate_timeline
- self.assertEquals(2 / 10e-3, upload_rate[1][4])
- self.assertEquals(0, upload_rate[1][5])
- # Download
- download_timeline = lens.downloaded_bytes_timeline
- download_rate = lens.download_rate_timeline
- self.assertEquals(1280, download_timeline[0][5])
- self.assertEquals(1290, download_timeline[0][6])
- self.assertEquals(0, download_timeline[1][5])
- self.assertEquals(4, download_timeline[1][6])
- self.assertEquals(0, download_timeline[1][7])
- download_rate = lens.download_rate_timeline
- self.assertEquals(4 / 10e-3, download_rate[1][5])
- self.assertEquals(0, download_rate[1][6])
- self.assertAlmostEquals(4, lens.total_download_bytes)
-
- def testLongRequest(self):
- timing_dict = {
- 'requestTime': 1200,
- 'dnsStart': 20, 'dnsEnd': 30,
- 'connectStart': 50, 'connectEnd': 60,
- 'sendStart': 70, 'sendEnd': 80,
- 'receiveHeadersEnd': 90,
- 'loadingFinished': 100}
- request = test_utils.MakeRequestWithTiming(1, 2, timing_dict)
- request.response_headers = {}
- timing_dict = {
- 'requestTime': 1200,
- 'dnsStart': 2, 'dnsEnd': 3,
- 'connectStart': 5, 'connectEnd': 6,
- 'sendStart': 7, 'sendEnd': 8,
- 'receiveHeadersEnd': 10,
- 'loadingFinished': 1000}
- long_request = test_utils.MakeRequestWithTiming(1, 2, timing_dict)
- long_request.response_headers = {}
- long_request.encoded_data_length = 1000
- lens = self._NetworkActivityLens([request, long_request])
- (timestamps, downloaded_bytes) = lens.downloaded_bytes_timeline
- (_, download_rate) = lens.download_rate_timeline
- start_receive = (long_request.start_msec
- + long_request.timing.receive_headers_end)
- end_receive = (long_request.start_msec
- + long_request.timing.loading_finished)
- self.assertEquals(1000, downloaded_bytes[-1])
- for (index, timestamp) in enumerate(timestamps):
- if start_receive < timestamp < end_receive:
- self.assertAlmostEqual(1000 / 990e-3, download_rate[index])
- self.assertEquals(0, downloaded_bytes[index])
- self.assertEquals(1000, downloaded_bytes[-1])
-
- def testDownloadedBytesAt(self):
- timing_dict = {
- 'requestTime': 1.2,
- 'dnsStart': 20, 'dnsEnd': 30,
- 'connectStart': 50, 'connectEnd': 60,
- 'sendStart': 70, 'sendEnd': 80,
- 'receiveHeadersEnd': 90,
- 'loadingFinished': 100}
- request = test_utils.MakeRequestWithTiming(1, 2, timing_dict)
- lens = self._NetworkActivityLens([request])
- # See testTransferredBytes for key events times. We test around events at
- # the start, middle and end of the data transfer as well as for the
- # interpolation.
- self.assertEquals(0, lens.DownloadedBytesAt(1219))
- self.assertEquals(0, lens.DownloadedBytesAt(1220))
- self.assertEquals(0, lens.DownloadedBytesAt(1225))
- self.assertEquals(0, lens.DownloadedBytesAt(1280))
- self.assertEquals(1.6, lens.DownloadedBytesAt(1281))
- self.assertEquals(8, lens.DownloadedBytesAt(1285))
- self.assertEquals(14.4, lens.DownloadedBytesAt(1289))
- self.assertEquals(16, lens.DownloadedBytesAt(1290))
- self.assertEquals(16, lens.DownloadedBytesAt(1291))
- self.assertEquals(16, lens.DownloadedBytesAt(1295))
- self.assertEquals(16, lens.DownloadedBytesAt(1300))
- self.assertEquals(16, lens.DownloadedBytesAt(1400))
-
- def _NetworkActivityLens(self, requests):
- trace = test_utils.LoadingTraceFromEvents(requests)
- return NetworkActivityLens(trace)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/network_cpu_activity_view.py b/loading/network_cpu_activity_view.py
deleted file mode 100755
index a118823..0000000
--- a/loading/network_cpu_activity_view.py
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/python
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Graphs the CPU and network activity during a load."""
-
-import numpy as np
-import matplotlib
-from matplotlib import pylab as plt
-import sys
-
-import activity_lens
-import loading_trace
-import network_activity_lens
-
-
-def _CpuActivityTimeline(cpu_lens, start_msec, end_msec, granularity):
- cpu_timestamps = np.arange(start_msec, end_msec, granularity)
- busy_percentage = []
- print len(cpu_timestamps)
- for i in range(len(cpu_timestamps) - 1):
- (start, end) = (cpu_timestamps[i], cpu_timestamps[i + 1])
- duration = end - start
- busy_ms = cpu_lens.MainRendererThreadBusyness(start, end)
- busy_percentage.append(100 * busy_ms / float(duration))
- return (cpu_timestamps[:-1], np.array(busy_percentage))
-
-
-def GraphTimelines(trace):
- """Creates a figure of Network and CPU activity for a trace.
-
- Args:
- trace: (LoadingTrace)
-
- Returns:
- A matplotlib.pylab.figure.
- """
- cpu_lens = activity_lens.ActivityLens(trace)
- network_lens = network_activity_lens.NetworkActivityLens(trace)
- matplotlib.rc('font', size=14)
- figure, (network, cpu) = plt.subplots(2, sharex = True, figsize=(14, 10))
- figure.suptitle('Network and CPU Activity - %s' % trace.url)
- upload_timeline = network_lens.uploaded_bytes_timeline
- download_timeline = network_lens.downloaded_bytes_timeline
- start_time = upload_timeline[0][0]
- end_time = upload_timeline[0][-1]
- times = np.array(upload_timeline[0]) - start_time
- network.step(times, np.cumsum(download_timeline[1]) / 1e6, label='Download')
- network.step(times, np.cumsum(upload_timeline[1]) / 1e6, label='Upload')
- network.legend(loc='lower right')
- network.set_xlabel('Time (ms)')
- network.set_ylabel('Total Data Transferred (MB)')
-
- (cpu_timestamps, cpu_busyness) = _CpuActivityTimeline(
- cpu_lens, start_time, end_time, 100)
- cpu.step(cpu_timestamps - start_time, cpu_busyness)
- cpu.set_ylim(ymin=0, ymax=100)
- cpu.set_xlabel('Time (ms)')
- cpu.set_ylabel('Main Renderer Thread Busyness (%)')
- return figure
-
-
-def main():
- filename = sys.argv[1]
- trace = loading_trace.LoadingTrace.FromJsonFile(filename)
- figure = GraphTimelines(trace, filename + '.pdf')
- output_filename = filename + '.pdf'
- figure.savefig(output_filename, dpi=300)
-
-
-if __name__ == '__main__':
- main()
diff --git a/loading/node_cost_csv.py b/loading/node_cost_csv.py
deleted file mode 100755
index 79d3b06..0000000
--- a/loading/node_cost_csv.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#! /usr/bin/python
-# Copyright 2015 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import argparse
-import logging
-import os
-import sys
-
-from processing import (SitesFromDir, WarmGraph, ColdGraph)
-
-
-def main():
- logging.basicConfig(level=logging.ERROR)
- parser = argparse.ArgumentParser(
- description=('Convert a directory created by ./analyze.py fetch '
- 'to a node cost CSV which compares cold and warm total '
- 'node costs.'))
- parser.add_argument('--datadir', required=True)
- parser.add_argument('--csv', required=True)
- parser.add_argument('--noads', action='store_true')
- args = parser.parse_args()
- sites = SitesFromDir(args.datadir)
- with open(args.csv, 'w') as output:
- output.write('site,cold.total,warm.total,cold.common,warm.common,'
- 'cold.node.count,common.cold.node.count,'
- 'cold.all.edges,warm.all.edges,'
- 'cold.common.edges,warm.common.edges,'
- 'cold.edge.fraction,common.cold.edge.fraction\n')
- for site in sites:
- print site
- warm = WarmGraph(args.datadir, site)
- if args.noads:
- warm.Set(node_filter=warm.FilterAds)
- cold = ColdGraph(args.datadir, site)
- if args.noads:
- cold.Set(node_filter=cold.FilterAds)
- common = [p for p in cold.Intersect(warm.Nodes())]
- common_cold = set([c.Node() for c, w in common])
- common_warm = set([w.Node() for c, w in common])
- output.write(','.join([str(s) for s in [
- site,
- sum((n.NodeCost() for n in cold.Nodes())),
- sum((n.NodeCost() for n in warm.Nodes())),
- sum((c.NodeCost() for c, w in common)),
- sum((w.NodeCost() for c, w in common)),
- sum((1 for n in cold.Nodes())),
- len(common),
- cold.EdgeCosts(), warm.EdgeCosts(),
- cold.EdgeCosts(lambda n: n in common_cold),
- warm.EdgeCosts(lambda n: n in common_warm),
- (cold.EdgeCosts() /
- sum((n.NodeCost() for n in cold.Nodes()))),
- (cold.EdgeCosts(lambda n: n in common_cold) /
- sum((c.NodeCost() for c, w in common)))
- ]]) + '\n')
-
-
-if __name__ == '__main__':
- main()
diff --git a/loading/options.py b/loading/options.py
deleted file mode 100644
index 5c6234c..0000000
--- a/loading/options.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import argparse
-import os.path
-import sys
-
-_SRC_DIR = os.path.abspath(os.path.join(
- os.path.dirname(__file__), '..', '..', '..'))
-sys.path.append(os.path.join(_SRC_DIR, 'build', 'android'))
-from pylib import constants
-
-class Options(object):
- """Global options repository.
-
- ParseArgs must be called before use. See _ARGS for common members, these will
- be available as instance attributes (eg, OPTIONS.clear_cache).
- """
- # Tuples of (argument name, default value, help string).
- _ARGS = [ ('chrome_package_name', 'chrome',
- 'build/android/pylib/constants package description'),
- ('devtools_hostname', 'localhost',
- 'hostname for devtools websocket connection'),
- ('devtools_port', 9222,
- 'port for devtools websocket connection'),
- ('local_build_dir', None,
- 'Build directory for local binary files such as chrome'),
- ('local_noisy', False,
- 'Enable local chrome console output'),
- ('local_profile_dir', None,
- 'profile directory to use for local runs'),
- ('no_sandbox', False,
- 'pass --no-sandbox to browser (local run only; see also '
- 'https://chromium.googlesource.com/chromium/src/+/master/'
- 'docs/linux_suid_sandbox_development.md)'),
- ('devices_file', _SRC_DIR + '/third_party/blink/renderer/devtools'
- '/front_end/emulated_devices/module.json', 'File containing a'
- ' list of emulated devices characteristics.')
- ]
-
-
- def __init__(self):
- self._arg_set = set()
- self._parsed_args = None
-
- def AddGlobalArgument(self, arg_name, default, help_str):
- """Add a global argument.
-
- Args:
- arg_name: the name of the argument. This will be used as an optional --
- argument.
- default: the default value for the argument. The type of this default will
- be used as the type of the argument.
- help_str: the argument help string.
- """
- self._ARGS.append((arg_name, default, help_str))
-
- def ParseArgs(self, arg_list, description=None, extra=None):
- """Parse command line arguments.
-
- Args:
- arg_list: command line argument list.
- description: description to use in argument parser.
- extra: additional required arguments to add. These will be exposed as
- instance attributes. This is either a list of extra arguments, or a
- single string or tuple. If a tuple, the first item is the argument and
- the second a default, otherwise the argument is required. Arguments are
- used as in argparse, ie those beginning with -- are named, and those
- without a dash are positional. Don't use a single dash.
- """
- parser = self._MakeParser(description, extra)
- self._parsed_args = parser.parse_args(arg_list)
-
- def ExtractArgs(self, arg_list):
- """Extract arguments from arg_str.
-
- Args:
- arg_list: command line argument list. It will be changed so that arguments
- used by this options instance are removed.
- """
- parser = self._MakeParser()
- (self._parsed_args, unused) = parser.parse_known_args(arg_list)
- del arg_list[:]
- arg_list.extend(unused)
-
- def GetParentParser(self, group_name='Global'):
- """Returns a parser suitable for passing in as a parent to argparse.
-
- Args:
- group_name: A group name for the parser (see argparse's
- add_argument_group).
-
- Returns:
- An argparse parser instance.
- """
- return self._MakeParser(group=group_name)
-
- def SetParsedArgs(self, parsed_args):
- """Set parsed args. Used with GetParentParser.
-
- Args:
- parsed_args: the result of argparse.parse_args or similar.
- """
- self._parsed_args = parsed_args
-
- def _MakeParser(self, description=None, extra=None, group=None):
- self._arg_set = set()
- add_help = True if group is None else False
- parser = argparse.ArgumentParser(
- description=description, add_help=add_help)
- container = parser if group is None else parser.add_argument_group(group)
- for arg, default, help_str in self._ARGS:
- # All global options are named.
- arg = '--' + arg
- self._AddArg(container, arg, default, help_str=help_str)
- if extra is not None:
- if type(extra) is not list:
- extra = [extra]
- for arg in extra:
- if type(arg) is tuple:
- argname, default = arg
- self._AddArg(container, argname, default)
- else:
- self._AddArg(container, arg, None, required=True)
- return parser
-
- def _AddArg(self, container, arg, default, required=False, help_str=None):
- assert not arg.startswith('-') or arg.startswith('--'), \
- "Single dash arguments aren't supported: %s" % arg
- arg_name = arg
- if arg.startswith('--'):
- arg_name = arg[2:]
- assert arg_name not in self._arg_set, \
- '%s extra arg is a duplicate' % arg_name
- self._arg_set.add(arg_name)
-
- kwargs = {}
- if required and arg.startswith('--'):
- kwargs['required'] = required
- if help_str is not None:
- kwargs['help'] = help_str
- if default is not None:
- if type(default) is bool:
- # If the default of a switch is true, setting the flag stores false.
- if default:
- kwargs['action'] = 'store_false'
- else:
- kwargs['action'] = 'store_true'
- else:
- kwargs['default'] = default
- kwargs['type'] = type(default)
-
- container.add_argument(arg, **kwargs)
-
- def __getattr__(self, name):
- if name in self._arg_set:
- assert self._parsed_args, 'Option requested before ParseArgs called'
- return getattr(self._parsed_args, name)
- raise AttributeError(name)
-
- def ChromePackage(self):
- return constants.PACKAGE_INFO[self.chrome_package_name]
-
- def LocalBinary(self, binary_name):
- """Get local binary path from its name."""
- assert self.local_build_dir, '--local_build_dir needs to be set.'
- path = os.path.join(self.local_build_dir, binary_name)
- assert os.path.isfile(path), \
- 'Missing binary file {} (wrong --local_build_dir?).'.format(path)
- return path
-
-
-OPTIONS = Options()
diff --git a/loading/options_unittest.py b/loading/options_unittest.py
deleted file mode 100644
index 9ecca66..0000000
--- a/loading/options_unittest.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import argparse
-import unittest
-
-import options
-
-
-class OptionsTestCase(unittest.TestCase):
- def testExtract(self):
- args = ['--A', 'foo', '--devtools_port', '2000', '--B=20',
- '--no_sandbox', '--C', '30', 'baz']
- opts = options.Options()
- opts.ExtractArgs(args)
- self.assertEqual(['--A', 'foo', '--B=20', '--C', '30', 'baz'], args)
- self.assertEqual(2000, opts.devtools_port)
- self.assertTrue(opts.no_sandbox)
-
- def testParent(self):
- opts = options.Options()
- parser = argparse.ArgumentParser(parents=[opts.GetParentParser()])
- parser.add_argument('--foo', type=int)
- parsed_args = parser.parse_args(['--foo=4', '--devtools_port', '2000'])
- self.assertEqual(4, parsed_args.foo)
- opts.SetParsedArgs(parsed_args)
- self.assertEqual(2000, opts.devtools_port)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/page_track.py b/loading/page_track.py
deleted file mode 100644
index bc904ba..0000000
--- a/loading/page_track.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import devtools_monitor
-
-
-class PageTrack(devtools_monitor.Track):
- """Records the events from the page track."""
- _METHODS = ('Page.frameStartedLoading', 'Page.frameStoppedLoading',
- 'Page.frameAttached')
- FRAME_STARTED_LOADING = 'Page.frameStartedLoading'
- def __init__(self, connection):
- super(PageTrack, self).__init__(connection)
- self._connection = connection
- self._events = []
- self._pending_frames = set()
- self._known_frames = set()
- self._main_frame_id = None
- if self._connection:
- for method in PageTrack._METHODS:
- self._connection.RegisterListener(method, self)
-
- def Handle(self, method, msg):
- assert method in PageTrack._METHODS
- params = msg['params']
- frame_id = params['frameId']
- should_stop = False
- event = {'method': method, 'frame_id': frame_id}
- if method == self.FRAME_STARTED_LOADING:
- if self._main_frame_id is None:
- self._main_frame_id = params['frameId']
- self._pending_frames.add(frame_id)
- self._known_frames.add(frame_id)
- elif method == 'Page.frameStoppedLoading':
- assert frame_id in self._pending_frames
- self._pending_frames.remove(frame_id)
- if frame_id == self._main_frame_id:
- should_stop = True
- elif method == 'Page.frameAttached':
- self._known_frames.add(frame_id)
- parent_frame = params['parentFrameId']
- assert parent_frame in self._known_frames
- event['parent_frame_id'] = parent_frame
- self._events.append(event)
- if should_stop and self._connection:
- self._connection.StopMonitoring()
-
- def GetEvents(self):
- #TODO(lizeb): Add more checks here (child frame stops loading before parent,
- #for instance).
- return self._events
-
- def ToJsonDict(self):
- return {'events': [event for event in self._events]}
-
- def GetMainFrameId(self):
- """Returns the Id (str) of the main frame, or raises a ValueError."""
- for event in self._events:
- if event['method'] == self.FRAME_STARTED_LOADING:
- return event['frame_id']
- else:
- raise ValueError('No frame loads in the track.')
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- assert 'events' in json_dict
- result = PageTrack(None)
- events = [event for event in json_dict['events']]
- result._events = events
- return result
diff --git a/loading/page_track_unittest.py b/loading/page_track_unittest.py
deleted file mode 100644
index 3056d99..0000000
--- a/loading/page_track_unittest.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import unittest
-
-import devtools_monitor
-from page_track import PageTrack
-
-class MockDevToolsConnection(object):
- def __init__(self):
- self.stop_has_been_called = False
-
- def RegisterListener(self, name, listener):
- pass
-
- def StopMonitoring(self):
- self.stop_has_been_called = True
-
-
-class PageTrackTest(unittest.TestCase):
- _EVENTS = [{'method': 'Page.frameStartedLoading',
- 'params': {'frameId': '1234.1'}},
- {'method': 'Page.frameAttached',
- 'params': {'frameId': '1234.12', 'parentFrameId': '1234.1'}},
- {'method': 'Page.frameStartedLoading',
- 'params': {'frameId': '1234.12'}},
- {'method': 'Page.frameStoppedLoading',
- 'params': {'frameId': '1234.12'}},
- {'method': 'Page.frameStoppedLoading',
- 'params': {'frameId': '1234.1'}}]
- def testAsksMonitoringToStop(self):
- devtools_connection = MockDevToolsConnection()
- page_track = PageTrack(devtools_connection)
- for msg in PageTrackTest._EVENTS[:-1]:
- page_track.Handle(msg['method'], msg)
- self.assertFalse(devtools_connection.stop_has_been_called)
- msg = PageTrackTest._EVENTS[-1]
- page_track.Handle(msg['method'], msg)
- self.assertTrue(devtools_connection.stop_has_been_called)
-
- def testUnknownParent(self):
- page_track = PageTrack(None)
- msg = {'method': 'Page.frameAttached',
- 'params': {'frameId': '1234.12', 'parentFrameId': '1234.1'}}
- with self.assertRaises(AssertionError):
- page_track.Handle(msg['method'], msg)
-
- def testStopsLoadingUnknownFrame(self):
- page_track = PageTrack(None)
- msg = {'method': 'Page.frameStoppedLoading',
- 'params': {'frameId': '1234.12'}}
- with self.assertRaises(AssertionError):
- page_track.Handle(msg['method'], msg)
-
- def testGetMainFrameId(self):
- devtools_connection = MockDevToolsConnection()
- page_track = PageTrack(devtools_connection)
- for msg in PageTrackTest._EVENTS:
- page_track.Handle(msg['method'], msg)
- self.assertEquals('1234.1', page_track.GetMainFrameId())
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/prefetch_view.py b/loading/prefetch_view.py
deleted file mode 100644
index cbd882a..0000000
--- a/loading/prefetch_view.py
+++ /dev/null
@@ -1,191 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Models the effect of prefetching resources from a loading trace.
-
-For example, this can be used to evaluate NoState Prefetch
-(https://goo.gl/B3nRUR).
-
-When executed as a script, takes a trace as a command-line arguments and shows
-statistics about it.
-"""
-
-import itertools
-import operator
-
-import common_util
-import dependency_graph
-import graph
-import loading_trace
-import user_satisfied_lens
-import request_dependencies_lens
-import request_track
-
-
-class RequestNode(dependency_graph.RequestNode):
- """Simulates the effect of prefetching resources discoverable by the preload
- scanner.
- """
- _ATTRS = ['preloaded', 'before']
- def __init__(self, request=None):
- super(RequestNode, self).__init__(request)
- self.preloaded = False
- self.before = False
-
- def ToJsonDict(self):
- result = super(RequestNode, self).ToJsonDict()
- return common_util.SerializeAttributesToJsonDict(result, self, self._ATTRS)
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- result = super(RequestNode, cls).FromJsonDict(json_dict)
- return common_util.DeserializeAttributesFromJsonDict(
- json_dict, result, cls._ATTRS)
-
-
-class PrefetchSimulationView(object):
- """Simulates the effect of prefetch."""
- def __init__(self, trace, dependencies_lens, user_lens):
- self.postload_msec = None
- self.graph = None
- if trace is None:
- return
- requests = trace.request_track.GetEvents()
- critical_requests_ids = user_lens.CriticalRequestIds()
- self.postload_msec = user_lens.PostloadTimeMsec()
- self.graph = dependency_graph.RequestDependencyGraph(
- requests, dependencies_lens, node_class=RequestNode)
- preloaded_requests = [r.request_id for r in self.PreloadedRequests(
- requests[0], dependencies_lens, trace)]
- self._AnnotateNodes(self.graph.graph.Nodes(), preloaded_requests,
- critical_requests_ids)
-
- def Cost(self):
- """Returns the cost of the graph, restricted to the critical requests."""
- pruned_graph = self._PrunedGraph()
- return pruned_graph.Cost() + self.postload_msec
-
- def UpdateNodeCosts(self, node_to_cost):
- """Updates the cost of nodes, according to |node_to_cost|.
-
- Args:
- node_to_cost: (Callable) RequestNode -> float. Callable returning the cost
- of a node.
- """
- pruned_graph = self._PrunedGraph()
- for node in pruned_graph.Nodes():
- node.cost = node_to_cost(node)
-
- def ToJsonDict(self):
- """Returns a dict representing this instance."""
- result = {'graph': self.graph.ToJsonDict()}
- return common_util.SerializeAttributesToJsonDict(
- result, self, ['postload_msec'])
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- """Returns an instance of PrefetchSimulationView from a dict dumped by
- ToJSonDict().
- """
- result = cls(None, None, None)
- result.graph = dependency_graph.RequestDependencyGraph.FromJsonDict(
- json_dict['graph'], RequestNode, dependency_graph.Edge)
- return common_util.DeserializeAttributesFromJsonDict(
- json_dict, result, ['postload_msec'])
-
- @classmethod
- def _AnnotateNodes(cls, nodes, preloaded_requests_ids,
- critical_requests_ids,):
- for node in nodes:
- node.preloaded = node.request.request_id in preloaded_requests_ids
- node.before = node.request.request_id in critical_requests_ids
-
- @classmethod
- def ParserDiscoverableRequests(
- cls, request, dependencies_lens, recurse=False):
- """Returns a list of requests IDs dicovered by the parser.
-
- Args:
- request: (Request) Root request.
-
- Returns:
- [Request]
- """
- # TODO(lizeb): handle the recursive case.
- assert not recurse
- discoverable_requests = [request]
- first_request = dependencies_lens.GetRedirectChain(request)[-1]
- deps = dependencies_lens.GetRequestDependencies()
- for (first, second, reason) in deps:
- if first.request_id == first_request.request_id and reason == 'parser':
- discoverable_requests.append(second)
- return discoverable_requests
-
- @classmethod
- def _ExpandRedirectChains(cls, requests, dependencies_lens):
- return list(itertools.chain.from_iterable(
- [dependencies_lens.GetRedirectChain(r) for r in requests]))
-
- @classmethod
- def PreloadedRequests(cls, request, dependencies_lens, trace):
- """Returns the requests that have been preloaded from a given request.
-
- This list is the set of request that are:
- - Discoverable by the parser
- - Found in the trace log.
-
- Before looking for dependencies, this follows the redirect chain.
-
- Args:
- request: (Request) Root request.
-
- Returns:
- A list of Request. Does not include the root request. This list is a
- subset of the one returned by ParserDiscoverableRequests().
- """
- # Preload step events are emitted in ResourceFetcher::preloadStarted().
- resource_events = trace.tracing_track.Filter(
- categories=set([u'blink.net']))
- preload_step_events = filter(
- lambda e: e.args.get('step') == 'Preload',
- resource_events.GetEvents())
- preloaded_urls = set()
- for preload_step_event in preload_step_events:
- preload_event = resource_events.EventFromStep(preload_step_event)
- if preload_event:
- preloaded_urls.add(preload_event.args['data']['url'])
- parser_requests = cls.ParserDiscoverableRequests(
- request, dependencies_lens)
- preloaded_root_requests = filter(
- lambda r: r.url in preloaded_urls, parser_requests)
- # We can actually fetch the whole redirect chain.
- return [request] + list(itertools.chain.from_iterable(
- [dependencies_lens.GetRedirectChain(r)
- for r in preloaded_root_requests]))
-
- def _PrunedGraph(self):
- roots = self.graph.graph.RootNodes()
- nodes = self.graph.graph.ReachableNodes(
- roots, should_stop=lambda n: not n.before)
- return graph.DirectedGraph(nodes, self.graph.graph.Edges())
-
-
-def _PrintSumamry(trace, dependencies_lens, user_lens):
- prefetch_view = PrefetchSimulationView(trace, dependencies_lens, user_lens)
- print 'Time to First Contentful Paint = %.02fms' % prefetch_view.Cost()
- print 'Set costs of prefetched requests to 0.'
- prefetch_view.UpdateNodeCosts(lambda n: 0 if n.preloaded else n.cost)
- print 'Time to First Contentful Paint = %.02fms' % prefetch_view.Cost()
-
-
-def main(filename):
- trace = loading_trace.LoadingTrace.FromJsonFile(filename)
- dependencies_lens = request_dependencies_lens.RequestDependencyLens(trace)
- user_lens = user_satisfied_lens.FirstContentfulPaintLens(trace)
- _PrintSumamry(trace, dependencies_lens, user_lens)
-
-
-if __name__ == '__main__':
- import sys
- main(sys.argv[1])
diff --git a/loading/prefetch_view_unittest.py b/loading/prefetch_view_unittest.py
deleted file mode 100644
index 1f85019..0000000
--- a/loading/prefetch_view_unittest.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import unittest
-
-from prefetch_view import PrefetchSimulationView
-import request_dependencies_lens
-from request_dependencies_lens_unittest import TestRequests
-import request_track
-import test_utils
-
-
-class PrefetchSimulationViewTestCase(unittest.TestCase):
- def setUp(self):
- super(PrefetchSimulationViewTestCase, self).setUp()
- self._SetUp()
-
- def testExpandRedirectChains(self):
- self.assertListEqual(
- [TestRequests.FIRST_REDIRECT_REQUEST,
- TestRequests.SECOND_REDIRECT_REQUEST, TestRequests.REDIRECTED_REQUEST],
- PrefetchSimulationView._ExpandRedirectChains(
- [TestRequests.FIRST_REDIRECT_REQUEST], self.dependencies_lens))
-
- def testParserDiscoverableRequests(self):
- first_request = TestRequests.FIRST_REDIRECT_REQUEST
- discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(
- first_request, self.dependencies_lens)
- self.assertListEqual(
- [TestRequests.FIRST_REDIRECT_REQUEST,
- TestRequests.JS_REQUEST, TestRequests.JS_REQUEST_OTHER_FRAME,
- TestRequests.JS_REQUEST_UNRELATED_FRAME], discovered_requests)
-
- def testPreloadedRequests(self):
- first_request = TestRequests.FIRST_REDIRECT_REQUEST
- preloaded_requests = PrefetchSimulationView.PreloadedRequests(
- first_request, self.dependencies_lens, self.trace)
- self.assertListEqual([first_request], preloaded_requests)
- self._SetUp(
- [{'args': {'data': {'url': 'http://bla.com/nyancat.js'}},
- 'cat': 'blink.net', 'id': '0xaf9f14fa9dd6c314', 'name': 'Resource',
- 'ph': 'X', 'ts': 1, 'dur': 120, 'pid': 12, 'tid': 12},
- {'args': {'step': 'Preload'}, 'cat': 'blink.net',
- 'id': '0xaf9f14fa9dd6c314', 'name': 'Resource', 'ph': 'T',
- 'ts': 12, 'pid': 12, 'tid': 12}])
- preloaded_requests = PrefetchSimulationView.PreloadedRequests(
- first_request, self.dependencies_lens, self.trace)
- self.assertListEqual([TestRequests.FIRST_REDIRECT_REQUEST,
- TestRequests.JS_REQUEST, TestRequests.JS_REQUEST_OTHER_FRAME,
- TestRequests.JS_REQUEST_UNRELATED_FRAME], preloaded_requests)
-
- def testCost(self):
- self.assertEqual(40 + 12, self.prefetch_view.Cost())
-
- def testUpdateNodeCosts(self):
- self.prefetch_view.UpdateNodeCosts(lambda _: 100)
- self.assertEqual(500 + 40 + 12, self.prefetch_view.Cost())
-
- def testUpdateNodeCostsPartial(self):
- self.prefetch_view.UpdateNodeCosts(
- lambda n: 100 if (n.request.request_id
- == TestRequests.REDIRECTED_REQUEST.request_id) else 0)
- self.assertEqual(100 + 40 + 12, self.prefetch_view.Cost())
-
- def testToFromJsonDict(self):
- self.assertEqual(40 + 12, self.prefetch_view.Cost())
- json_dict = self.prefetch_view.ToJsonDict()
- new_view = PrefetchSimulationView.FromJsonDict(json_dict)
- self.assertEqual(40 + 12, new_view.Cost())
- # Updated Costs.
- self.prefetch_view.UpdateNodeCosts(lambda _: 100)
- self.assertEqual(500 + 40 + 12, self.prefetch_view.Cost())
- json_dict = self.prefetch_view.ToJsonDict()
- new_view = PrefetchSimulationView.FromJsonDict(json_dict)
- self.assertEqual(500 + 40 + 12, new_view.Cost())
-
- def _SetUp(self, added_trace_events=None):
- trace_events = [
- {'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 2, 'tid': 1, 'cat': 'blink.net'}]
- if added_trace_events is not None:
- trace_events += added_trace_events
- self.trace = TestRequests.CreateLoadingTrace(trace_events)
- self.dependencies_lens = request_dependencies_lens.RequestDependencyLens(
- self.trace)
- self.user_satisfied_lens = test_utils.MockUserSatisfiedLens(self.trace)
- self.user_satisfied_lens._postload_msec = 12
- self.prefetch_view = PrefetchSimulationView(
- self.trace, self.dependencies_lens, self.user_satisfied_lens)
- for e in self.prefetch_view.graph.graph.Edges():
- e.cost = 10
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/processing.py b/loading/processing.py
deleted file mode 100644
index 7c00951..0000000
--- a/loading/processing.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright 2015 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import json
-import os
-import os.path
-import sys
-
-import loading_model
-import loading_trace
-
-
-def SitesFromDir(directory):
- """Extract sites from a data directory.
-
- Based on ./analyze.py fetch file name conventions. We assume each site
- corresponds to two files, <site>.json and <site>.json.cold, and that no other
- kind of file appears in the data directory.
-
- Args:
- directory: the directory to process.
-
- Returns:
- A list of sites as strings.
-
- """
- files = set(os.listdir(directory))
- assert files
- sites = []
- for f in files:
- if f.endswith('.png'):
- continue
- assert f.endswith('.json') or f.endswith('.json.cold'), f
- if f.endswith('.json'):
- assert f + '.cold' in files
- sites.append(f[:f.rfind('.json')])
- elif f.endswith('.cold'):
- assert f[:f.rfind('.cold')] in files
- sites.sort()
- return sites
-
-
-def WarmGraph(datadir, site):
- """Return a loading model graph for the warm pull of site.
-
- Based on ./analyze.py fetch file name conventions.
-
- Args:
- datadir: the directory containing site JSON data.
- site: a site string.
-
- Returns:
- A loading model object.
- """
- with file(os.path.join(datadir, site + '.json')) as f:
- return loading_model.ResourceGraph(loading_trace.LoadingTrace.FromJsonDict(
- json.load(f)))
-
-
-def ColdGraph(datadir, site):
- """Return a loading model graph for the cold pull of site.
-
- Based on ./analyze.py fetch file name conventions.
-
- Args:
- datadir: the directory containing site JSON data.
- site: a site string.
-
- Returns:
- A loading model object.
- """
- with file(os.path.join(datadir, site + '.json.cold')) as f:
- return loading_model.ResourceGraph(loading_trace.LoadingTrace.FromJsonDict(
- json.load(f)))
diff --git a/loading/queuing_lens.py b/loading/queuing_lens.py
deleted file mode 100644
index e29ce7a..0000000
--- a/loading/queuing_lens.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Lens for resource load queuing.
-
-When executed as a script, takes a loading trace and prints queuing information
-for each request.
-"""
-
-import collections
-import itertools
-import logging
-
-import clovis_constants
-
-
-class QueuingLens(object):
- """Attaches queuing related trace events to request objects."""
- QUEUING_CATEGORY = clovis_constants.QUEUING_CATEGORY
- ASYNC_NAME = 'ScheduledResourceRequest'
- READY_NAME = 'ScheduledResourceRequest.Ready'
- SET_PRIORITY_NAME = 'ScheduledResourceRequest.SetPriority'
- QUEUING_NAMES = set([ASYNC_NAME,
- READY_NAME,
- SET_PRIORITY_NAME])
-
- IN_FLIGHT_NAME = 'ResourceScheduler::Client.InFlightRequests'
- SHOULD_START_NAME = 'ResourceScheduler::Client::ShouldStartRequestInfo'
-
- def __init__(self, trace):
- self._request_track = trace.request_track
- self._queuing_events_by_id = self._GetQueuingEvents(trace.tracing_track)
- self._source_id_to_url = {}
- for source_id, events in self._queuing_events_by_id.iteritems():
- self._source_id_to_url[source_id] = self._GetQueuingEventUrl(events)
-
- def GenerateRequestQueuing(self):
- """Computes queuing information for each request.
-
- We determine blocking requests by looking at which urls are in-flight
- (created but not yet destroyed) at the time of the creation of each
- request. This means that a request that we list as blocking may just be
- queued (throttled) at the same time as our request, and not actually
- blocking.
-
- The lifetime of the queuing events extends from when a resource is first
- slotted into the sytem until the request is complete. The main interesting
- queuing events are begin, end (which define the lifetime) and ready, an
- instant event that is usually within a millisecond after the request_time of
- the Request.
-
- Returns:
- {request_track.Request:
- (start_msec: throttle start, end_msec: throttle end,
- ready_msec: ready,
- blocking: [blocking requests],
- source_ids: [source ids of the request])}, where the map values are
- a named tuple with the specified fields.
- """
- url_to_requests = collections.defaultdict(list)
- for rq in self._request_track.GetEvents():
- url_to_requests[rq.url].append(rq)
- # Queuing events are organized by source id, which corresponds to a load of
- # a url. First collect timing information for each source id, then associate
- # with each request.
- timing_by_source_id = {}
- for source_id, events in self._queuing_events_by_id.iteritems():
- assert all(e.end_msec is None for e in events), \
- 'Unexpected end_msec for nested async queuing events'
- ready_times = [e.start_msec for e in events if e.name == self.READY_NAME]
- if not ready_times:
- ready_msec = None
- else:
- assert len(ready_times) == 1, events
- ready_msec = ready_times[0]
- timing_by_source_id[source_id] = (
- min(e.start_msec for e in events),
- max(e.start_msec for e in events),
- ready_msec)
- queue_info = {}
- for request_url, requests in url_to_requests.iteritems():
- matching_source_ids = set(
- source_id for source_id, url in self._source_id_to_url.iteritems()
- if url == request_url)
- if len(matching_source_ids) > 1:
- logging.warning('Multiple matching source ids, probably duplicated'
- 'urls: %s', [rq.url for rq in requests])
- # Get first source id.
- sid = next(s for s in matching_source_ids) \
- if matching_source_ids else None
- (throttle_start_msec, throttle_end_msec, ready_msec) = \
- timing_by_source_id[sid] if matching_source_ids else (-1, -1, -1)
-
- blocking_requests = []
- for sid, (flight_start_msec,
- flight_end_msec, _) in timing_by_source_id.iteritems():
- if (flight_start_msec < throttle_start_msec and
- flight_end_msec > throttle_start_msec and
- flight_end_msec < throttle_end_msec):
- blocking_requests.extend(
- url_to_requests.get(self._source_id_to_url[sid], []))
-
- info = collections.namedtuple(
- 'QueueInfo', ['start_msec', 'end_msec', 'ready_msec', 'blocking'
- 'source_ids'])
- info.start_msec = throttle_start_msec
- info.end_msec = throttle_end_msec
- info.ready_msec = ready_msec
- current_request_ids = set(rq.request_id for rq in requests)
- info.blocking = [b for b in blocking_requests
- if b is not None and
- b.request_id not in current_request_ids]
- info.source_ids = matching_source_ids
- for rq in requests:
- queue_info[rq] = info
- return queue_info
-
- def _GetQueuingEvents(self, tracing_track):
- events = collections.defaultdict(list)
- for e in tracing_track.GetEvents():
- if (e.category == self.QUEUING_CATEGORY and
- e.name in self.QUEUING_NAMES):
- events[e.args['data']['source_id']].append(e)
- return events
-
- def _GetQueuingEventUrl(self, events):
- urls = set()
- for e in events:
- if 'request_url' in e.args['data']:
- urls.add(e.args['data']['request_url'])
- assert len(urls) == 1, urls
- return urls.pop()
-
- def _GetEventsForRequest(self, request):
- request_events = []
- for source_id, url in self._source_id_to_url:
- if url == request.url:
- request_events.extend(self._queuing_events_by_id[source_id])
- return request_events
-
-
-def _Main(trace_file):
- import loading_trace
- trace = loading_trace.LoadingTrace.FromJsonFile(trace_file)
- lens = QueuingLens(trace)
- queue_info = lens.GenerateRequestQueuing()
- base_msec = trace.request_track.GetFirstRequestMillis()
- mkmsec = lambda ms: ms - base_msec if ms > 0 else -1
- for rq, info in queue_info.iteritems():
- print '{fp} ({ts}->{te})[{rs}->{re}] {ids} {url}'.format(
- fp=rq.fingerprint,
- ts=mkmsec(info.start_msec), te=mkmsec(info.end_msec),
- rs=mkmsec(rq.start_msec), re=mkmsec(rq.end_msec),
- ids=info.source_ids, url=rq.url)
- for blocking_request in info.blocking:
- print ' {} {}'.format(blocking_request.fingerprint, blocking_request.url)
-
-if __name__ == '__main__':
- import sys
- _Main(sys.argv[1])
diff --git a/loading/queuing_lens_unittest.py b/loading/queuing_lens_unittest.py
deleted file mode 100644
index 2b0a8d7..0000000
--- a/loading/queuing_lens_unittest.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import unittest
-
-from queuing_lens import QueuingLens
-import request_track
-import test_utils
-
-class QueuingLensTestCase(unittest.TestCase):
- MILLIS_TO_MICROS = 1000
- MILLIS_TO_SECONDS = 0.001
- URL_1 = 'http://1'
- URL_2 = 'http://2'
-
- def testRequestQueuing(self):
- # http://1: queued at 5ms, request start at 10ms, ready at 11ms,
- # done at 12ms; blocked by 2.
- # http://2: queued at 4ms, request start at 4ms, ready at 5 ms, done at 9ms.
- trace_events = [
- {'args': {
- 'data': {
- 'request_url': self.URL_1,
- 'source_id': 1
- }
- },
- 'cat': QueuingLens.QUEUING_CATEGORY,
- 'id': 1,
- 'name': QueuingLens.ASYNC_NAME,
- 'ph': 'b',
- 'ts': 5 * self.MILLIS_TO_MICROS
- },
- {'args': {
- 'data': {
- 'source_id': 1
- }
- },
- 'cat': QueuingLens.QUEUING_CATEGORY,
- 'id': 1,
- 'name': QueuingLens.READY_NAME,
- 'ph': 'n',
- 'ts': 10 * self.MILLIS_TO_MICROS
- },
- {'args': {
- 'data': {
- 'source_id': 1
- }
- },
- 'cat': QueuingLens.QUEUING_CATEGORY,
- 'id': 1,
- 'name': QueuingLens.ASYNC_NAME,
- 'ph': 'e',
- 'ts': 12 * self.MILLIS_TO_MICROS
- },
-
- {'args': {
- 'data': {
- 'request_url': self.URL_2,
- 'source_id': 2
- }
- },
- 'cat': QueuingLens.QUEUING_CATEGORY,
- 'id': 2,
- 'name': QueuingLens.ASYNC_NAME,
- 'ph': 'b',
- 'ts': 4 * self.MILLIS_TO_MICROS
- },
- {'args': {
- 'data': {
- 'source_id': 2
- }
- },
- 'cat': QueuingLens.QUEUING_CATEGORY,
- 'id': 2,
- 'name': QueuingLens.READY_NAME,
- 'ph': 'n',
- 'ts': 5 * self.MILLIS_TO_MICROS
- },
- {'args': {
- 'data': {
- 'source_id': 2
- }
- },
- 'cat': QueuingLens.QUEUING_CATEGORY,
- 'id': 2,
- 'name': QueuingLens.ASYNC_NAME,
- 'ph': 'e',
- 'ts': 9 * self.MILLIS_TO_MICROS
- }]
- requests = [
- request_track.Request.FromJsonDict(
- {'url': self.URL_1,
- 'request_id': '0.1',
- 'timing': {'request_time': 10 * self.MILLIS_TO_SECONDS,
- 'loading_finished': 2}}),
- request_track.Request.FromJsonDict(
- {'url': self.URL_2,
- 'request_id': '0.2',
- 'timing': {'request_time': 4 * self.MILLIS_TO_SECONDS,
- 'loading_finished': 5}})]
- trace = test_utils.LoadingTraceFromEvents(
- requests=requests, trace_events=trace_events)
- queue_info = QueuingLens(trace).GenerateRequestQueuing()
- self.assertEqual(set(['0.2']),
- set(rq.request_id
- for rq in queue_info[requests[0]].blocking))
- self.assertEqual((5., 10., 12.), (queue_info[requests[0]].start_msec,
- queue_info[requests[0]].ready_msec,
- queue_info[requests[0]].end_msec))
- self.assertEqual(0, len(queue_info[requests[1]].blocking))
- self.assertEqual((4., 5., 9.), (queue_info[requests[1]].start_msec,
- queue_info[requests[1]].ready_msec,
- queue_info[requests[1]].end_msec))
diff --git a/loading/report.py b/loading/report.py
deleted file mode 100644
index 23a39a4..0000000
--- a/loading/report.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Generates a loading report.
-
-When executed as a script, takes a trace filename and print the report.
-"""
-
-from activity_lens import ActivityLens
-from content_classification_lens import ContentClassificationLens
-from loading_graph_view import LoadingGraphView
-import loading_trace
-import metrics
-from network_activity_lens import NetworkActivityLens
-from prefetch_view import PrefetchSimulationView
-from queuing_lens import QueuingLens
-import request_dependencies_lens
-from user_satisfied_lens import (
- FirstTextPaintLens, FirstContentfulPaintLens, FirstSignificantPaintLens,
- PLTLens)
-
-
-def _ComputeCpuBusyness(activity, load_start, satisfied_end):
- """Generates a breakdown of CPU activity between |load_start| and
- |satisfied_end|."""
- duration = float(satisfied_end - load_start)
- result = {
- 'activity_frac': (
- activity.MainRendererThreadBusyness(load_start, satisfied_end)
- / duration),
- }
-
- activity_breakdown = activity.ComputeActivity(load_start, satisfied_end)
- result['parsing_frac'] = (
- sum(activity_breakdown['parsing'].values()) / duration)
- result['script_frac'] = (
- sum(activity_breakdown['script'].values()) / duration)
- return result
-
-
-class PerUserLensReport(object):
- """Generates a variety of metrics relative to a passed in user lens."""
-
- def __init__(self, trace, user_lens, activity_lens, network_lens,
- navigation_start_msec):
- requests = trace.request_track.GetEvents()
- dependencies_lens = request_dependencies_lens.RequestDependencyLens(
- trace)
- prefetch_view = PrefetchSimulationView(trace, dependencies_lens, user_lens)
- preloaded_requests = prefetch_view.PreloadedRequests(
- requests[0], dependencies_lens, trace)
-
- self._navigation_start_msec = navigation_start_msec
-
- self._satisfied_msec = user_lens.SatisfiedMs()
-
- graph = LoadingGraphView.FromTrace(trace)
- self._inversions = graph.GetInversionsAtTime(self._satisfied_msec)
-
- self._byte_frac = self._GenerateByteFrac(network_lens)
-
- self._requests = user_lens.CriticalRequests()
- self._preloaded_requests = (
- [r for r in preloaded_requests if r in self._requests])
-
- self._cpu_busyness = _ComputeCpuBusyness(activity_lens,
- navigation_start_msec,
- self._satisfied_msec)
- prefetch_view.UpdateNodeCosts(lambda n: 0 if n.preloaded else n.cost)
- self._no_state_prefetch_ms = prefetch_view.Cost()
-
- def GenerateReport(self):
- report = {}
-
- report['ms'] = self._satisfied_msec - self._navigation_start_msec
- report['byte_frac'] = self._byte_frac
-
- report['requests'] = len(self._requests)
- report['preloaded_requests'] = len(self._preloaded_requests)
- report['requests_cost'] = reduce(lambda x,y: x + y.Cost(),
- self._requests, 0)
- report['preloaded_requests_cost'] = reduce(lambda x,y: x + y.Cost(),
- self._preloaded_requests, 0)
- report['predicted_no_state_prefetch_ms'] = self._no_state_prefetch_ms
-
- # Take the first (earliest) inversion.
- report['inversion'] = self._inversions[0].url if self._inversions else ''
-
- report.update(self._cpu_busyness)
- return report
-
- def _GenerateByteFrac(self, network_lens):
- if not network_lens.total_download_bytes:
- return float('Nan')
- byte_frac = (network_lens.DownloadedBytesAt(self._satisfied_msec)
- / float(network_lens.total_download_bytes))
- return byte_frac
-
-
-class LoadingReport(object):
- """Generates a loading report from a loading trace."""
- def __init__(self, trace, ad_rules=None, tracking_rules=None):
- """Constructor.
-
- Args:
- trace: (LoadingTrace) a loading trace.
- ad_rules: ([str]) List of ad filtering rules.
- tracking_rules: ([str]) List of tracking filtering rules.
- """
- self.trace = trace
-
- navigation_start_events = trace.tracing_track.GetMatchingEvents(
- 'blink.user_timing', 'navigationStart')
- self._navigation_start_msec = min(
- e.start_msec for e in navigation_start_events)
-
- self._dns_requests, self._dns_cost_msec = metrics.DnsRequestsAndCost(trace)
- self._connection_stats = metrics.ConnectionMetrics(trace)
-
- self._user_lens_reports = {}
- plt_lens = PLTLens(self.trace)
- first_text_paint_lens = FirstTextPaintLens(self.trace)
- first_contentful_paint_lens = FirstContentfulPaintLens(self.trace)
- first_significant_paint_lens = FirstSignificantPaintLens(self.trace)
- activity = ActivityLens(trace)
- network_lens = NetworkActivityLens(self.trace)
- for key, user_lens in [['plt', plt_lens],
- ['first_text', first_text_paint_lens],
- ['contentful', first_contentful_paint_lens],
- ['significant', first_significant_paint_lens]]:
- self._user_lens_reports[key] = PerUserLensReport(self.trace,
- user_lens, activity, network_lens, self._navigation_start_msec)
-
- self._transfer_size = metrics.TotalTransferSize(trace)[1]
- self._request_count = len(trace.request_track.GetEvents())
-
- content_lens = ContentClassificationLens(
- trace, ad_rules or [], tracking_rules or [])
- has_ad_rules = bool(ad_rules)
- has_tracking_rules = bool(tracking_rules)
- self._ad_report = self._AdRequestsReport(
- trace, content_lens, has_ad_rules, has_tracking_rules)
- self._ads_cost = self._AdsAndTrackingCpuCost(
- self._navigation_start_msec,
- (self._navigation_start_msec
- + self._user_lens_reports['plt'].GenerateReport()['ms']),
- content_lens, activity, has_tracking_rules or has_ad_rules)
-
- self._queue_stats = self._ComputeQueueStats(QueuingLens(trace))
-
- def GenerateReport(self):
- """Returns a report as a dict."""
- # NOTE: When changing the return value here, also update the schema
- # (bigquery_schema.json) accordingly. See cloud/frontend/README.md for
- # details.
- report = {
- 'url': self.trace.url,
- 'transfer_size': self._transfer_size,
- 'dns_requests': self._dns_requests,
- 'dns_cost_ms': self._dns_cost_msec,
- 'total_requests': self._request_count}
-
- for user_lens_type, user_lens_report in self._user_lens_reports.iteritems():
- for key, value in user_lens_report.GenerateReport().iteritems():
- report[user_lens_type + '_' + key] = value
-
- report.update(self._ad_report)
- report.update(self._ads_cost)
- report.update(self._connection_stats)
- report.update(self._queue_stats)
- return report
-
- @classmethod
- def FromTraceFilename(cls, filename, ad_rules_filename,
- tracking_rules_filename):
- """Returns a LoadingReport from a trace filename."""
- trace = loading_trace.LoadingTrace.FromJsonFile(filename)
- return LoadingReport(trace, ad_rules_filename, tracking_rules_filename)
-
- @classmethod
- def _AdRequestsReport(
- cls, trace, content_lens, has_ad_rules, has_tracking_rules):
- requests = trace.request_track.GetEvents()
- has_rules = has_ad_rules or has_tracking_rules
- result = {
- 'ad_requests': 0 if has_ad_rules else None,
- 'tracking_requests': 0 if has_tracking_rules else None,
- 'ad_or_tracking_requests': 0 if has_rules else None,
- 'ad_or_tracking_initiated_requests': 0 if has_rules else None,
- 'ad_or_tracking_initiated_transfer_size': 0 if has_rules else None}
- if not has_rules:
- return result
- for request in requests:
- is_ad = content_lens.IsAdRequest(request)
- is_tracking = content_lens.IsTrackingRequest(request)
- if has_ad_rules:
- result['ad_requests'] += int(is_ad)
- if has_tracking_rules:
- result['tracking_requests'] += int(is_tracking)
- result['ad_or_tracking_requests'] += int(is_ad or is_tracking)
- ad_tracking_requests = content_lens.AdAndTrackingRequests()
- result['ad_or_tracking_initiated_requests'] = len(ad_tracking_requests)
- result['ad_or_tracking_initiated_transfer_size'] = metrics.TransferSize(
- ad_tracking_requests)[1]
- return result
-
- @classmethod
- def _ComputeQueueStats(cls, queue_lens):
- queuing_info = queue_lens.GenerateRequestQueuing()
- total_blocked_msec = 0
- total_loading_msec = 0
- num_blocking_requests = []
- for queue_info in queuing_info.itervalues():
- try:
- total_blocked_msec += max(0, queue_info.ready_msec -
- queue_info.start_msec)
- total_loading_msec += max(0, queue_info.end_msec -
- queue_info.start_msec)
- except TypeError:
- pass # Invalid queue info timings.
- num_blocking_requests.append(len(queue_info.blocking))
- if num_blocking_requests:
- num_blocking_requests.sort()
- avg_blocking = (float(sum(num_blocking_requests)) /
- len(num_blocking_requests))
- mid = len(num_blocking_requests) / 2
- if len(num_blocking_requests) & 1:
- median_blocking = num_blocking_requests[mid]
- else:
- median_blocking = (num_blocking_requests[mid-1] +
- num_blocking_requests[mid]) / 2
- else:
- avg_blocking = 0
- median_blocking = 0
- return {
- 'total_queuing_blocked_msec': int(total_blocked_msec),
- 'total_queuing_load_msec': int(total_loading_msec),
- 'average_blocking_request_count': avg_blocking,
- 'median_blocking_request_count': median_blocking,
- }
-
- @classmethod
- def _AdsAndTrackingCpuCost(
- cls, start_msec, end_msec, content_lens, activity, has_rules):
- """Returns the CPU cost associated with Ads and tracking between timestamps.
-
- Can return an overestimate, as execution slices are tagged by URL, and not
- by requests.
-
- Args:
- start_msec: (float)
- end_msec: (float)
- content_lens: (ContentClassificationLens)
- activity: (ActivityLens)
-
- Returns:
- {'ad_and_tracking_script_frac': float,
- 'ad_and_tracking_parsing_frac': float}
- """
- result = {'ad_or_tracking_script_frac': None,
- 'ad_or_tracking_parsing_frac': None}
- if not has_rules:
- return result
-
- duration = float(end_msec - start_msec)
- requests = content_lens.AdAndTrackingRequests()
- urls = {r.url for r in requests}
- cpu_breakdown = activity.ComputeActivity(start_msec, end_msec)
- result['ad_or_tracking_script_frac'] = sum(
- value for (url, value) in cpu_breakdown['script'].items()
- if url in urls) / duration
- result['ad_or_tracking_parsing_frac'] = sum(
- value for (url, value) in cpu_breakdown['parsing'].items()
- if url in urls) / duration
- return result
-
-
-def _Main(args):
- if len(args) not in (2, 4):
- print 'Usage: report.py trace.json (ad_rules tracking_rules)'
- sys.exit(1)
- trace_filename = args[1]
- ad_rules = None
- tracking_rules = None
- if len(args) == 4:
- ad_rules = open(args[2]).readlines()
- tracking_rules = open(args[3]).readlines()
- report = LoadingReport.FromTraceFilename(
- trace_filename, ad_rules, tracking_rules)
- print json.dumps(report.GenerateReport(), indent=2, sort_keys=True)
-
-
-if __name__ == '__main__':
- import sys
- import json
-
- _Main(sys.argv)
diff --git a/loading/report_unittest.py b/loading/report_unittest.py
deleted file mode 100644
index ef43014..0000000
--- a/loading/report_unittest.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import unittest
-
-import metrics
-import report
-from queuing_lens import QueuingLens
-import test_utils
-import user_satisfied_lens_unittest
-
-
-class LoadingReportTestCase(unittest.TestCase):
- MILLI_TO_MICRO = 1000
- _NAVIGATION_START_TIME = 12
- _FIRST_REQUEST_TIME = 15
- _CONTENTFUL_PAINT = 120
- _TEXT_PAINT = 30
- _SIGNIFICANT_PAINT = 50
- _DURATION = 400
- _REQUEST_OFFSET = 5
- _LOAD_END_TIME = 1280
- _MAIN_FRAME_ID = 1
- _FIRST_REQUEST_DATA_LENGTH = 128
- _SECOND_REQUEST_DATA_LENGTH = 1024
- _TOPLEVEL_EVENT_OFFSET = 10
- _TOPLEVEL_EVENT_DURATION = 100
- _SCRIPT_EVENT_DURATION = 50
- _PARSING_EVENT_DURATION = 60
-
- def setUp(self):
- self.trace_creator = test_utils.TraceCreator()
- self.requests = [
- self.trace_creator.RequestAt(self._FIRST_REQUEST_TIME, frame_id=1),
- self.trace_creator.RequestAt(
- self._NAVIGATION_START_TIME + self._REQUEST_OFFSET, self._DURATION)]
- self.requests[0].timing.receive_headers_end = 0
- self.requests[1].timing.receive_headers_end = 0
- self.requests[0].encoded_data_length = self._FIRST_REQUEST_DATA_LENGTH
- self.requests[1].encoded_data_length = self._SECOND_REQUEST_DATA_LENGTH
-
- self.ad_domain = 'i-ve-got-the-best-ads.com'
- self.ad_url = 'http://www.' + self.ad_domain + '/i-m-really-rich.js'
- self.requests[0].url = self.ad_url
-
- self.trace_events = [
- {'args': {'name': 'CrRendererMain'}, 'cat': '__metadata',
- 'name': 'thread_name', 'ph': 'M', 'pid': 1, 'tid': 1, 'ts': 0},
- {'ts': self._NAVIGATION_START_TIME * self.MILLI_TO_MICRO, 'ph': 'R',
- 'cat': 'blink.user_timing', 'pid': 1, 'tid': 1,
- 'name': 'navigationStart',
- 'args': {'frame': 1}},
- {'ts': self._LOAD_END_TIME * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'devtools.timeline', 'pid': 1, 'tid': 1,
- 'name': 'MarkLoad',
- 'args': {'data': {'isMainFrame': True}}},
- {'ts': self._CONTENTFUL_PAINT * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink.user_timing', 'pid': 1, 'tid': 1,
- 'name': 'firstContentfulPaint',
- 'args': {'frame': self._MAIN_FRAME_ID}},
- {'ts': self._TEXT_PAINT * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink.user_timing', 'pid': 1, 'tid': 1,
- 'name': 'firstPaint',
- 'args': {'frame': self._MAIN_FRAME_ID}},
- {'ts': 90 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink', 'pid': 1, 'tid': 1,
- 'name': 'FrameView::paintTree'},
- {'ts': self._SIGNIFICANT_PAINT * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'foobar', 'name': 'biz', 'pid': 1, 'tid': 1,
- 'args': {'counters': {
- 'LayoutObjectsThatHadNeverHadLayout': 10}}},
- {'ts': (self._NAVIGATION_START_TIME - self._TOPLEVEL_EVENT_OFFSET)
- * self.MILLI_TO_MICRO,
- 'pid': 1, 'tid': 1, 'ph': 'X',
- 'dur': self._TOPLEVEL_EVENT_DURATION * self.MILLI_TO_MICRO,
- 'cat': 'toplevel', 'name': 'MessageLoop::RunTask'},
- {'ts': self._NAVIGATION_START_TIME * self.MILLI_TO_MICRO,
- 'pid': 1, 'tid': 1, 'ph': 'X',
- 'dur': self._PARSING_EVENT_DURATION * self.MILLI_TO_MICRO,
- 'cat': 'devtools.timeline', 'name': 'ParseHTML',
- 'args': {'beginData': {'url': ''}}},
- {'ts': self._NAVIGATION_START_TIME * self.MILLI_TO_MICRO,
- 'pid': 1, 'tid': 1, 'ph': 'X',
- 'dur': self._SCRIPT_EVENT_DURATION * self.MILLI_TO_MICRO,
- 'cat': 'devtools.timeline', 'name': 'EvaluateScript',
- 'args': {'data': {'scriptName': ''}}}]
-
- def _MakeTrace(self):
- trace = self.trace_creator.CreateTrace(
- self.requests, self.trace_events, self._MAIN_FRAME_ID)
- return trace
-
- def _AddQueuingEvents(self, source_id, url, start_msec, ready_msec, end_msec):
- self.trace_events.extend([
- {'args': {
- 'data': {
- 'request_url': url,
- 'source_id': source_id
- }
- },
- 'cat': QueuingLens.QUEUING_CATEGORY,
- 'id': source_id,
- 'pid': 1, 'tid': 10,
- 'name': QueuingLens.ASYNC_NAME,
- 'ph': 'b',
- 'ts': start_msec * self.MILLI_TO_MICRO
- },
- {'args': {
- 'data': {
- 'source_id': source_id
- }
- },
- 'cat': QueuingLens.QUEUING_CATEGORY,
- 'id': source_id,
- 'pid': 1, 'tid': 10,
- 'name': QueuingLens.READY_NAME,
- 'ph': 'n',
- 'ts': ready_msec * self.MILLI_TO_MICRO
- },
- {'args': {
- 'data': {
- 'source_id': source_id
- }
- },
- 'cat': QueuingLens.QUEUING_CATEGORY,
- 'id': source_id,
- 'pid': 1, 'tid': 10,
- 'name': QueuingLens.ASYNC_NAME,
- 'ph': 'e',
- 'ts': end_msec * self.MILLI_TO_MICRO
- }])
-
- def testGenerateReport(self):
- trace = self._MakeTrace()
- loading_report = report.LoadingReport(trace).GenerateReport()
- self.assertEqual(trace.url, loading_report['url'])
- self.assertEqual(self._TEXT_PAINT - self._NAVIGATION_START_TIME,
- loading_report['first_text_ms'])
- self.assertEqual(self._SIGNIFICANT_PAINT - self._NAVIGATION_START_TIME,
- loading_report['significant_ms'])
- self.assertEqual(self._CONTENTFUL_PAINT - self._NAVIGATION_START_TIME,
- loading_report['contentful_ms'])
- self.assertAlmostEqual(self._LOAD_END_TIME - self._NAVIGATION_START_TIME,
- loading_report['plt_ms'])
- self.assertEqual(2, loading_report['total_requests'])
- self.assertAlmostEqual(0.34, loading_report['contentful_byte_frac'], 2)
- self.assertAlmostEqual(0.1844, loading_report['significant_byte_frac'], 2)
- self.assertEqual(2, loading_report['plt_requests'])
- self.assertEqual(1, loading_report['first_text_requests'])
- self.assertEqual(1, loading_report['contentful_requests'])
- self.assertEqual(1, loading_report['significant_requests'])
- self.assertEqual(1, loading_report['plt_preloaded_requests'])
- self.assertEqual(1, loading_report['first_text_preloaded_requests'])
- self.assertEqual(1, loading_report['contentful_preloaded_requests'])
- self.assertEqual(1, loading_report['significant_preloaded_requests'])
- self.assertEqual(401, loading_report['plt_requests_cost'])
- self.assertEqual(1, loading_report['first_text_requests_cost'])
- self.assertEqual(1, loading_report['contentful_requests_cost'])
- self.assertEqual(1, loading_report['significant_requests_cost'])
- self.assertEqual(1, loading_report['plt_preloaded_requests_cost'])
- self.assertEqual(1, loading_report['first_text_preloaded_requests_cost'])
- self.assertEqual(1, loading_report['contentful_preloaded_requests_cost'])
- self.assertEqual(1, loading_report['significant_preloaded_requests_cost'])
- self.assertEqual(400, loading_report['plt_predicted_no_state_prefetch_ms'])
- self.assertEqual(14,
- loading_report['first_text_predicted_no_state_prefetch_ms'])
- self.assertEqual(104,
- loading_report['contentful_predicted_no_state_prefetch_ms'])
- self.assertEqual(74,
- loading_report['significant_predicted_no_state_prefetch_ms'])
- self.assertEqual('', loading_report['contentful_inversion'])
- self.assertEqual('', loading_report['significant_inversion'])
- self.assertIsNone(loading_report['ad_requests'])
- self.assertIsNone(loading_report['ad_or_tracking_requests'])
- self.assertIsNone(loading_report['ad_or_tracking_initiated_requests'])
- self.assertIsNone(loading_report['ad_or_tracking_initiated_transfer_size'])
- self.assertIsNone(loading_report['ad_or_tracking_script_frac'])
- self.assertIsNone(loading_report['ad_or_tracking_parsing_frac'])
- self.assertEqual(
- self._FIRST_REQUEST_DATA_LENGTH + self._SECOND_REQUEST_DATA_LENGTH
- + metrics.HTTP_OK_LENGTH * 2,
- loading_report['transfer_size'])
- self.assertEqual(0, loading_report['total_queuing_blocked_msec'])
- self.assertEqual(0, loading_report['total_queuing_load_msec'])
- self.assertEqual(0, loading_report['average_blocking_request_count'])
- self.assertEqual(0, loading_report['median_blocking_request_count'])
-
- def testInversion(self):
- self.requests[0].timing.loading_finished = 4 * (
- self._REQUEST_OFFSET + self._DURATION)
- self.requests[1].initiator['type'] = 'parser'
- self.requests[1].initiator['url'] = self.requests[0].url
- for e in self.trace_events:
- if e['name'] == 'firstContentfulPaint':
- e['ts'] = self.MILLI_TO_MICRO * (
- self._FIRST_REQUEST_TIME + self._REQUEST_OFFSET +
- self._DURATION + 1)
- break
- loading_report = report.LoadingReport(self._MakeTrace()).GenerateReport()
- self.assertEqual(self.requests[0].url,
- loading_report['contentful_inversion'])
- self.assertEqual('', loading_report['significant_inversion'])
-
- def testPltNoLoadEvents(self):
- trace = self._MakeTrace()
- # Change the MarkLoad events.
- for e in trace.tracing_track.GetEvents():
- if e.name == 'MarkLoad':
- e.tracing_event['name'] = 'dummy'
- loading_report = report.LoadingReport(trace).GenerateReport()
- self.assertAlmostEqual(self._REQUEST_OFFSET + self._DURATION,
- loading_report['plt_ms'])
-
- def testAdTrackingRules(self):
- trace = self._MakeTrace()
- loading_report = report.LoadingReport(
- trace, [self.ad_domain], []).GenerateReport()
- self.assertEqual(1, loading_report['ad_requests'])
- self.assertEqual(1, loading_report['ad_or_tracking_requests'])
- self.assertEqual(1, loading_report['ad_or_tracking_initiated_requests'])
- self.assertIsNone(loading_report['tracking_requests'])
- self.assertEqual(
- self._FIRST_REQUEST_DATA_LENGTH + metrics.HTTP_OK_LENGTH,
- loading_report['ad_or_tracking_initiated_transfer_size'])
-
- def testThreadBusyness(self):
- loading_report = report.LoadingReport(self._MakeTrace()).GenerateReport()
- self.assertAlmostEqual(
- 1., loading_report['significant_activity_frac'])
- self.assertAlmostEqual(
- float(self._TOPLEVEL_EVENT_DURATION - self._TOPLEVEL_EVENT_OFFSET)
- / (self._CONTENTFUL_PAINT - self._NAVIGATION_START_TIME),
- loading_report['contentful_activity_frac'])
- self.assertAlmostEqual(
- float(self._TOPLEVEL_EVENT_DURATION - self._TOPLEVEL_EVENT_OFFSET)
- / (self._LOAD_END_TIME - self._NAVIGATION_START_TIME),
- loading_report['plt_activity_frac'])
-
- def testActivityBreakdown(self):
- loading_report = report.LoadingReport(self._MakeTrace()).GenerateReport()
- load_time = float(self._LOAD_END_TIME - self._NAVIGATION_START_TIME)
- contentful_time = float(
- self._CONTENTFUL_PAINT - self._NAVIGATION_START_TIME)
-
- self.assertAlmostEqual(self._SCRIPT_EVENT_DURATION / load_time,
- loading_report['plt_script_frac'])
- self.assertAlmostEqual(
- (self._PARSING_EVENT_DURATION - self._SCRIPT_EVENT_DURATION)
- / load_time,
- loading_report['plt_parsing_frac'])
-
- self.assertAlmostEqual(1., loading_report['significant_script_frac'])
- self.assertAlmostEqual(0., loading_report['significant_parsing_frac'])
-
- self.assertAlmostEqual(self._SCRIPT_EVENT_DURATION / contentful_time,
- loading_report['contentful_script_frac'])
- self.assertAlmostEqual(
- (self._PARSING_EVENT_DURATION - self._SCRIPT_EVENT_DURATION)
- / contentful_time, loading_report['contentful_parsing_frac'])
-
- def testAdsAndTrackingCost(self):
- load_time = float(self._LOAD_END_TIME - self._NAVIGATION_START_TIME)
- self.trace_events.append(
- {'ts': load_time / 3. * self.MILLI_TO_MICRO,
- 'pid': 1, 'tid': 1, 'ph': 'X',
- 'dur': load_time / 2. * self.MILLI_TO_MICRO,
- 'cat': 'devtools.timeline', 'name': 'EvaluateScript',
- 'args': {'data': {'scriptName': self.ad_url}}})
- loading_report = report.LoadingReport(
- self._MakeTrace(), [self.ad_domain]).GenerateReport()
- self.assertAlmostEqual(.5, loading_report['ad_or_tracking_script_frac'], 2)
- self.assertAlmostEqual(0., loading_report['ad_or_tracking_parsing_frac'])
-
- def testQueueStats(self):
- # We use three requests, A, B and C. A is not blocked, B is blocked by A,
- # and C blocked by A and B.
- BASE_MSEC = self._FIRST_REQUEST_TIME + 4 * self._DURATION
- self.requests = []
- request_A = self.trace_creator.RequestAt(BASE_MSEC, 5)
- request_B = self.trace_creator.RequestAt(BASE_MSEC + 6, 5)
- request_C = self.trace_creator.RequestAt(BASE_MSEC + 12, 10)
- self.requests.extend([request_A, request_B, request_C])
- self._AddQueuingEvents(10, request_A.url,
- BASE_MSEC, BASE_MSEC, BASE_MSEC + 5)
- self._AddQueuingEvents(20, request_B.url,
- BASE_MSEC + 1, BASE_MSEC + 6, BASE_MSEC + 11)
- self._AddQueuingEvents(30, request_C.url,
- BASE_MSEC + 2, BASE_MSEC + 12, BASE_MSEC + 22)
- loading_report = report.LoadingReport(self._MakeTrace()).GenerateReport()
- self.assertEqual(15, loading_report['total_queuing_blocked_msec'])
- self.assertEqual(35, loading_report['total_queuing_load_msec'])
- self.assertAlmostEqual(1, loading_report['average_blocking_request_count'])
- self.assertEqual(1, loading_report['median_blocking_request_count'])
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/request_dependencies_lens.py b/loading/request_dependencies_lens.py
deleted file mode 100644
index 1689547..0000000
--- a/loading/request_dependencies_lens.py
+++ /dev/null
@@ -1,249 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Gathers and infers dependencies between requests.
-
-When executed as a script, loads a trace and outputs the dependencies.
-"""
-
-import collections
-import copy
-import logging
-import operator
-
-import loading_trace
-import request_track
-
-
-class RequestDependencyLens(object):
- """Analyses and infers request dependencies."""
- DEPENDENCIES = ('redirect', 'parser', 'script', 'inferred', 'other')
- CALLFRAMES_KEY = 'callFrames'
- def __init__(self, trace):
- """Initializes an instance of RequestDependencyLens.
-
- Args:
- trace: (LoadingTrace) Loading trace.
- """
- self.loading_trace = trace
- self._requests = self.loading_trace.request_track.GetEvents()
- self._requests_by_id = {r.request_id: r for r in self._requests}
- self._requests_by_url = collections.defaultdict(list)
- self._deps = None
- for request in self._requests:
- self._requests_by_url[request.url].append(request)
- self._frame_to_parent = {}
- for event in self.loading_trace.page_track.GetEvents():
- if event['method'] == 'Page.frameAttached':
- self._frame_to_parent[event['frame_id']] = event['parent_frame_id']
-
- def GetRequestDependencies(self):
- """Returns a list of request dependencies.
-
- Returns:
- [(first, second, reason), ...] where first and second are instances of
- request_track.Request, and reason is in DEPENDENCIES. The second request
- depends on the first one, with the listed reason.
- """
- self._ComputeRequestDependencies()
- return copy.copy(self._deps)
-
- def GetRedirectChain(self, request):
- """Returns the whole redirect chain for a given request.
-
- Note that this misses some JS-based redirects.
-
- Returns:
- A list of request, containing the request passed as a parameter.
- """
- self._ComputeRequestDependencies()
- chain = [request]
- while True:
- for (first_request, second_request, why) in self._deps:
- if first_request == request and why == 'redirect':
- chain.append(second_request)
- request = second_request
- break
- else:
- return chain
-
- def _ComputeRequestDependencies(self):
- if self._deps is not None:
- return
- self._deps = []
- for request in self._requests:
- dependency = self._GetDependency(request)
- if dependency:
- self._deps.append(dependency)
-
- def _GetDependency(self, request):
- """Returns (first, second, reason), or None.
-
- |second| depends on |first|.
-
- Args:
- request: (Request) the request we wish to get the initiator of.
-
- Returns:
- None if no dependency is found from this request, or
- (initiator (Request), blocked_request (Request), reason (str)).
- """
- reason = request.initiator['type']
- assert reason in request_track.Request.INITIATORS
- if reason == 'redirect':
- return self._GetInitiatingRequestRedirect(request)
- elif reason == 'parser':
- return self._GetInitiatingRequestParser(request)
- elif reason == 'script':
- return self._GetInitiatingRequestScript(request)
- else:
- assert reason == 'other'
- return self._GetInitiatingRequestOther(request)
-
- def _GetInitiatingRequestRedirect(self, request):
- assert request_track.Request.INITIATING_REQUEST in request.initiator
- initiating_request_id = request.initiator[
- request_track.Request.INITIATING_REQUEST]
- assert initiating_request_id in self._requests_by_id
- return (self._requests_by_id[initiating_request_id], request, 'redirect')
-
- def _GetInitiatingRequestParser(self, request):
- url = request.initiator['url']
- candidates = self._FindMatchingRequests(url, request.timing.request_time)
- if not candidates:
- return None
- initiating_request = self._FindBestMatchingInitiator(request, candidates)
- return (initiating_request, request, 'parser')
-
- def _FlattenScriptStack(self, stack):
- """Recursively collapses the stack of asynchronous callstacks.
-
- A stack has a list of call frames and optionnally a "parent" stack.
- This function recursively folds the parent stacks into the root stack by
- concatening all the call frames.
-
- Args:
- stack: (dict) the stack that must be flattened
-
- Returns:
- A stack with no parent, which is a dictionary with a single "callFrames"
- key, and no "parent" key.
- """
- PARENT_KEY = 'parent'
- if not PARENT_KEY in stack:
- return stack
- stack[self.CALLFRAMES_KEY] += stack[PARENT_KEY][self.CALLFRAMES_KEY]
- if not PARENT_KEY in stack[PARENT_KEY]:
- stack.pop(PARENT_KEY)
- else:
- stack[PARENT_KEY] = stack[PARENT_KEY][PARENT_KEY]
- return self._FlattenScriptStack(stack)
-
- def _GetInitiatingRequestScript(self, request):
- STACK_KEY = 'stack'
- if not STACK_KEY in request.initiator:
- logging.warning('Script initiator but no stack trace.')
- return None
- initiating_request = None
- timestamp = request.timing.request_time
- # Deep copy the initiator's stack to avoid mutating the input request.
- stack = self._FlattenScriptStack(
- copy.deepcopy(request.initiator[STACK_KEY]))
- call_frames = stack[self.CALLFRAMES_KEY]
- for frame in call_frames:
- url = frame['url']
- candidates = self._FindMatchingRequests(url, timestamp)
- if candidates:
- initiating_request = self._FindBestMatchingInitiator(
- request, candidates)
- if initiating_request:
- break
- else:
- for frame in call_frames:
- if not frame.get('url', None) and frame.get(
- 'functionName', None) == 'window.onload':
- logging.warning('Unmatched request for onload handler.')
- break
- else:
- logging.warning('Unmatched request.')
- return None
- return (initiating_request, request, 'script')
-
- def _GetInitiatingRequestOther(self, _):
- # TODO(lizeb): Infer "other" initiator types.
- return None
-
- def _FindMatchingRequests(self, url, before_timestamp):
- """Returns a list of requests matching a URL, before a timestamp.
-
- Args:
- url: (str) URL to match in requests.
- before_timestamp: (int) Only keep requests submitted before a given
- timestamp.
-
- Returns:
- A list of candidates, ordered by timestamp.
- """
- candidates = self._requests_by_url.get(url, [])
- candidates = [r for r in candidates if (
- r.timing.request_time + max(
- 0, r.timing.receive_headers_end / 1000) <= before_timestamp)]
- candidates.sort(key=lambda r: r.timing.request_time)
- return candidates
-
- def _FindBestMatchingInitiator(self, request, matches):
- """Returns the best matching request within a list of matches.
-
- Iteratively removes candidates until one is left:
- - With the same parent frame.
- - From the same frame.
-
- If this is not successful, takes the most recent request.
-
- Args:
- request: (Request) Request.
- matches: [Request] As returned by _FindMatchingRequests(), that is
- sorted by timestamp.
-
- Returns:
- The best matching initiating request, or None.
- """
- if not matches:
- return None
- if len(matches) == 1:
- return matches[0]
- # Several matches, try to reduce this number to 1. Otherwise, return the
- # most recent one.
- if request.frame_id in self._frame_to_parent: # Main frame has no parent.
- parent_frame_id = self._frame_to_parent[request.frame_id]
- same_parent_matches = [
- r for r in matches
- if r.frame_id in self._frame_to_parent and
- self._frame_to_parent[r.frame_id] == parent_frame_id]
- if not same_parent_matches:
- logging.warning('All matches are from non-sibling frames.')
- return matches[-1]
- if len(same_parent_matches) == 1:
- return same_parent_matches[0]
- same_frame_matches = [r for r in matches if r.frame_id == request.frame_id]
- if not same_frame_matches:
- logging.warning('All matches are from non-sibling frames.')
- return matches[-1]
- if len(same_frame_matches) == 1:
- return same_frame_matches[0]
- else:
- logging.warning('Several matches')
- return same_frame_matches[-1]
-
-
-if __name__ == '__main__':
- import json
- import sys
- trace_filename = sys.argv[1]
- json_dict = json.load(open(trace_filename, 'r'))
- lens = RequestDependencyLens(
- loading_trace.LoadingTrace.FromJsonDict(json_dict))
- depedencies = lens.GetRequestDependencies()
- for (first, second, dep_reason) in depedencies:
- print '%s -> %s\t(%s)' % (first.request_id, second.request_id, dep_reason)
diff --git a/loading/request_dependencies_lens_unittest.py b/loading/request_dependencies_lens_unittest.py
deleted file mode 100644
index bf2d858..0000000
--- a/loading/request_dependencies_lens_unittest.py
+++ /dev/null
@@ -1,233 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import unittest
-
-import devtools_monitor
-from loading_trace import LoadingTrace
-from request_dependencies_lens import RequestDependencyLens
-from request_track import Request
-import test_utils
-
-
-class TestRequests(object):
- FIRST_REDIRECT_REQUEST = Request.FromJsonDict(
- {'url': 'http://bla.com', 'request_id': '1234.redirect.1',
- 'initiator': {'type': 'other'},
- 'timestamp': 0.5, 'timing': {}})
- SECOND_REDIRECT_REQUEST = Request.FromJsonDict(
- {'url': 'http://bla.com/redirect1', 'request_id': '1234.redirect.2',
- 'initiator': {'type': 'redirect',
- 'initiating_request': '1234.redirect.1'},
- 'timestamp': 1, 'timing': {}})
- REDIRECTED_REQUEST = Request.FromJsonDict({
- 'url': 'http://bla.com/index.html',
- 'request_id': '1234.1',
- 'frame_id': '123.1',
- 'initiator': {'type': 'redirect',
- 'initiating_request': '1234.redirect.2'},
- 'timestamp': 2,
- 'timing': {}})
- REQUEST = Request.FromJsonDict({'url': 'http://bla.com/index.html',
- 'request_id': '1234.1',
- 'frame_id': '123.1',
- 'initiator': {'type': 'other'},
- 'timestamp': 2,
- 'timing': {}})
- JS_REQUEST = Request.FromJsonDict({'url': 'http://bla.com/nyancat.js',
- 'request_id': '1234.12',
- 'frame_id': '123.123',
- 'initiator': {
- 'type': 'parser',
- 'url': 'http://bla.com/index.html'},
- 'timestamp': 3,
- 'timing': {}})
- JS_REQUEST_OTHER_FRAME = Request.FromJsonDict(
- {'url': 'http://bla.com/nyancat.js',
- 'request_id': '1234.42',
- 'frame_id': '123.13',
- 'initiator': {'type': 'parser',
- 'url': 'http://bla.com/index.html'},
- 'timestamp': 4, 'timing': {}})
- JS_REQUEST_UNRELATED_FRAME = Request.FromJsonDict(
- {'url': 'http://bla.com/nyancat.js',
- 'request_id': '1234.56',
- 'frame_id': '123.99',
- 'initiator': {'type': 'parser',
- 'url': 'http://bla.com/index.html'},
- 'timestamp': 5, 'timing': {}})
- JS_REQUEST_2 = Request.FromJsonDict(
- {'url': 'http://bla.com/cat.js', 'request_id': '1234.13',
- 'frame_id': '123.123',
- 'initiator': {'type': 'script',
- 'stack': {'callFrames': [
- {'url': 'unknown'},
- {'url': 'http://bla.com/nyancat.js'}]}},
- 'timestamp': 10, 'timing': {}})
- PAGE_EVENTS = [{'method': 'Page.frameAttached',
- 'frame_id': '123.13', 'parent_frame_id': '123.1'},
- {'method': 'Page.frameAttached',
- 'frame_id': '123.123', 'parent_frame_id': '123.1'}]
-
- @classmethod
- def CreateLoadingTrace(cls, trace_events=None):
- # This creates a set of requests with the following dependency structure.
- #
- # 1234.redirect.1 -> 1234.redirect.2
- # 1234.redirect.2 -> 1234.1
- # 1234.1 -> 1234.12
- # 1234.1 -> 1234.42
- # 1234.1 -> 1234.56
- # 1234.12 -> 1234.13
-
- trace = test_utils.LoadingTraceFromEvents(
- [cls.FIRST_REDIRECT_REQUEST, cls.SECOND_REDIRECT_REQUEST,
- cls.REDIRECTED_REQUEST, cls.REQUEST, cls.JS_REQUEST, cls.JS_REQUEST_2,
- cls.JS_REQUEST_OTHER_FRAME, cls.JS_REQUEST_UNRELATED_FRAME],
- cls.PAGE_EVENTS, trace_events)
- # Serialize and deserialize so that clients can change events without
- # affecting future tests.
- return LoadingTrace.FromJsonDict(trace.ToJsonDict())
-
-
-class RequestDependencyLensTestCase(unittest.TestCase):
- def testRedirectDependency(self):
- loading_trace = test_utils.LoadingTraceFromEvents(
- [TestRequests.FIRST_REDIRECT_REQUEST,
- TestRequests.SECOND_REDIRECT_REQUEST, TestRequests.REDIRECTED_REQUEST])
- request_dependencies_lens = RequestDependencyLens(loading_trace)
- deps = request_dependencies_lens.GetRequestDependencies()
- self.assertEquals(2, len(deps))
- (first, second, reason) = deps[0]
- self.assertEquals('redirect', reason)
- self.assertEquals(TestRequests.FIRST_REDIRECT_REQUEST.request_id,
- first.request_id)
- self.assertEquals(TestRequests.SECOND_REDIRECT_REQUEST.request_id,
- second.request_id)
- (first, second, reason) = deps[1]
- self.assertEquals('redirect', reason)
- self.assertEquals(TestRequests.SECOND_REDIRECT_REQUEST.request_id,
- first.request_id)
- self.assertEquals(TestRequests.REQUEST.request_id, second.request_id)
-
- def testGetRedirectChain(self):
- loading_trace = test_utils.LoadingTraceFromEvents(
- [TestRequests.FIRST_REDIRECT_REQUEST,
- TestRequests.SECOND_REDIRECT_REQUEST, TestRequests.REDIRECTED_REQUEST])
- request_dependencies_lens = RequestDependencyLens(loading_trace)
- whole_chain = [TestRequests.FIRST_REDIRECT_REQUEST,
- TestRequests.SECOND_REDIRECT_REQUEST,
- TestRequests.REDIRECTED_REQUEST]
- chain = request_dependencies_lens.GetRedirectChain(
- TestRequests.FIRST_REDIRECT_REQUEST)
- self.assertListEqual(whole_chain, chain)
- chain = request_dependencies_lens.GetRedirectChain(
- TestRequests.SECOND_REDIRECT_REQUEST)
- self.assertListEqual(whole_chain[1:], chain)
- chain = request_dependencies_lens.GetRedirectChain(
- TestRequests.REDIRECTED_REQUEST)
- self.assertEquals(whole_chain[2:], chain)
-
- def testScriptDependency(self):
- loading_trace = test_utils.LoadingTraceFromEvents(
- [TestRequests.JS_REQUEST, TestRequests.JS_REQUEST_2])
- request_dependencies_lens = RequestDependencyLens(loading_trace)
- deps = request_dependencies_lens.GetRequestDependencies()
- self.assertEquals(1, len(deps))
- self._AssertDependencyIs(
- deps[0],
- TestRequests.JS_REQUEST.request_id,
- TestRequests.JS_REQUEST_2.request_id, 'script')
-
- def testAsyncScriptDependency(self):
- JS_REQUEST_WITH_ASYNC_STACK = Request.FromJsonDict(
- {'url': 'http://bla.com/cat.js', 'request_id': '1234.14',
- 'initiator': {
- 'type': 'script',
- 'stack': {'callFrames': [],
- 'parent': {'callFrames': [
- {'url': 'http://bla.com/nyancat.js'}]}}},
- 'timestamp': 10, 'timing': {}})
- loading_trace = test_utils.LoadingTraceFromEvents(
- [TestRequests.JS_REQUEST, JS_REQUEST_WITH_ASYNC_STACK])
- request_dependencies_lens = RequestDependencyLens(loading_trace)
- deps = request_dependencies_lens.GetRequestDependencies()
- self.assertEquals(1, len(deps))
- self._AssertDependencyIs(
- deps[0], TestRequests.JS_REQUEST.request_id,
- JS_REQUEST_WITH_ASYNC_STACK.request_id, 'script')
-
- def testParserDependency(self):
- loading_trace = test_utils.LoadingTraceFromEvents(
- [TestRequests.REQUEST, TestRequests.JS_REQUEST])
- request_dependencies_lens = RequestDependencyLens(loading_trace)
- deps = request_dependencies_lens.GetRequestDependencies()
- self.assertEquals(1, len(deps))
- self._AssertDependencyIs(
- deps[0],
- TestRequests.REQUEST.request_id, TestRequests.JS_REQUEST.request_id,
- 'parser')
-
- def testSeveralDependencies(self):
- loading_trace = test_utils.LoadingTraceFromEvents(
- [TestRequests.FIRST_REDIRECT_REQUEST,
- TestRequests.SECOND_REDIRECT_REQUEST,
- TestRequests.REDIRECTED_REQUEST,
- TestRequests.JS_REQUEST, TestRequests.JS_REQUEST_2])
- request_dependencies_lens = RequestDependencyLens(loading_trace)
- deps = request_dependencies_lens.GetRequestDependencies()
- self.assertEquals(4, len(deps))
- self._AssertDependencyIs(
- deps[0], TestRequests.FIRST_REDIRECT_REQUEST.request_id,
- TestRequests.SECOND_REDIRECT_REQUEST.request_id, 'redirect')
- self._AssertDependencyIs(
- deps[1], TestRequests.SECOND_REDIRECT_REQUEST.request_id,
- TestRequests.REQUEST.request_id, 'redirect')
- self._AssertDependencyIs(
- deps[2],
- TestRequests.REQUEST.request_id, TestRequests.JS_REQUEST.request_id,
- 'parser')
- self._AssertDependencyIs(
- deps[3],
- TestRequests.JS_REQUEST.request_id,
- TestRequests.JS_REQUEST_2.request_id, 'script')
-
- def testDependencyDifferentFrame(self):
- """Checks that a more recent request from another frame is ignored."""
- loading_trace = test_utils.LoadingTraceFromEvents(
- [TestRequests.JS_REQUEST, TestRequests.JS_REQUEST_OTHER_FRAME,
- TestRequests.JS_REQUEST_2])
- request_dependencies_lens = RequestDependencyLens(loading_trace)
- deps = request_dependencies_lens.GetRequestDependencies()
- self.assertEquals(1, len(deps))
- self._AssertDependencyIs(
- deps[0],
- TestRequests.JS_REQUEST.request_id,
- TestRequests.JS_REQUEST_2.request_id, 'script')
-
- def testDependencySameParentFrame(self):
- """Checks that a more recent request from an unrelated frame is ignored
- if there is one from a related frame."""
- loading_trace = test_utils.LoadingTraceFromEvents(
- [TestRequests.JS_REQUEST_OTHER_FRAME,
- TestRequests.JS_REQUEST_UNRELATED_FRAME, TestRequests.JS_REQUEST_2],
- TestRequests.PAGE_EVENTS)
- request_dependencies_lens = RequestDependencyLens(loading_trace)
- deps = request_dependencies_lens.GetRequestDependencies()
- self.assertEquals(1, len(deps))
- self._AssertDependencyIs(
- deps[0],
- TestRequests.JS_REQUEST_OTHER_FRAME.request_id,
- TestRequests.JS_REQUEST_2.request_id, 'script')
-
- def _AssertDependencyIs(
- self, dep, first_request_id, second_request_id, reason):
- (first, second, dependency_reason) = dep
- self.assertEquals(reason, dependency_reason)
- self.assertEquals(first_request_id, first.request_id)
- self.assertEquals(second_request_id, second.request_id)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/request_track.py b/loading/request_track.py
deleted file mode 100644
index be62cd1..0000000
--- a/loading/request_track.py
+++ /dev/null
@@ -1,873 +0,0 @@
-# Copyright (c) 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""The request data track.
-
-When executed, parses a JSON dump of DevTools messages.
-"""
-
-import bisect
-import collections
-import copy
-import datetime
-import email.utils
-import hashlib
-import json
-import logging
-import re
-import sys
-import urlparse
-
-import devtools_monitor
-
-
-class Timing(object):
- """Collects the timing data for a request."""
- UNVAILABLE = -1
- _TIMING_NAMES = (
- ('connectEnd', 'connect_end'), ('connectStart', 'connect_start'),
- ('dnsEnd', 'dns_end'), ('dnsStart', 'dns_start'),
- ('proxyEnd', 'proxy_end'), ('proxyStart', 'proxy_start'),
- ('receiveHeadersEnd', 'receive_headers_end'),
- ('requestTime', 'request_time'), ('sendEnd', 'send_end'),
- ('sendStart', 'send_start'), ('sslEnd', 'ssl_end'),
- ('sslStart', 'ssl_start'), ('workerReady', 'worker_ready'),
- ('workerStart', 'worker_start'),
- ('loadingFinished', 'loading_finished'), ('pushStart', 'push_start'),
- ('pushEnd', 'push_end'))
- _TIMING_NAMES_MAPPING = dict(_TIMING_NAMES)
- __slots__ = tuple(x[1] for x in _TIMING_NAMES)
-
- def __init__(self, **kwargs):
- """Constructor.
-
- Initialize with keywords arguments from __slots__.
- """
- for slot in self.__slots__:
- setattr(self, slot, self.UNVAILABLE)
- for (attr, value) in kwargs.items():
- setattr(self, attr, value)
-
- def __eq__(self, o):
- return all(getattr(self, attr) == getattr(o, attr)
- for attr in self.__slots__)
-
- def __str__(self):
- return str(self.ToJsonDict())
-
- def LargestOffset(self):
- """Returns the largest offset in the available timings."""
- return max(0, max(
- getattr(self, attr) for attr in self.__slots__
- if attr != 'request_time'))
-
- def ToJsonDict(self):
- return {attr: getattr(self, attr)
- for attr in self.__slots__ if getattr(self, attr) != -1}
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- return cls(**json_dict)
-
- @classmethod
- def FromDevToolsDict(cls, json_dict):
- """Returns an instance of Timing from a dict, as passed by DevTools."""
- timing_dict = {
- cls._TIMING_NAMES_MAPPING[k]: v for (k, v) in json_dict.items()}
- return cls(**timing_dict)
-
-
-def ShortName(url):
- """Returns a shortened version of a URL."""
- parsed = urlparse.urlparse(url)
- path = parsed.path
- hostname = parsed.hostname if parsed.hostname else '?.?.?'
- if path != '' and path != '/':
- last_path = parsed.path.split('/')[-1]
- if len(last_path) < 10:
- if len(path) < 10:
- return hostname + '/' + path
- else:
- return hostname + '/..' + parsed.path[-10:]
- else:
- return hostname + '/..' + last_path[:5]
- else:
- return hostname
-
-
-def IntervalBetween(first, second, reason):
- """Returns the start and end of the inteval between two requests, in ms.
-
- This is defined as:
- - [first.headers, second.start] if reason is 'parser'. This is to account
- for incremental parsing.
- - [first.end, second.start] if reason is 'script', 'redirect' or 'other'.
-
- Args:
- first: (Request) First request.
- second: (Request) Second request.
- reason: (str) Link between the two requests, in Request.INITIATORS.
-
- Returns:
- (start_msec (float), end_msec (float)),
- """
- assert reason in Request.INITIATORS
- second_ms = second.timing.request_time * 1000
- if reason == 'parser':
- first_offset_ms = first.timing.receive_headers_end
- else:
- first_offset_ms = first.timing.LargestOffset()
- return (first.timing.request_time * 1000 + first_offset_ms, second_ms)
-
-
-def TimeBetween(first, second, reason):
- """(end_msec - start_msec), with the values as returned by IntervalBetween().
- """
- (first_ms, second_ms) = IntervalBetween(first, second, reason)
- return second_ms - first_ms
-
-
-def TimingAsList(timing):
- """Transform Timing to a list, eg as is used in JSON output.
-
- Args:
- timing: a Timing.
-
- Returns:
- A list identical to what the eventual JSON output will be (eg,
- Request.ToJsonDict).
- """
- return json.loads(json.dumps(timing))
-
-
-class Request(object):
- """Represents a single request.
-
- Generally speaking, fields here closely mirror those documented in
- third_party/blink/renderer/devtools/protocol.json.
-
- Fields:
- request_id: (str) unique request ID. Postfixed with _REDIRECT_SUFFIX for
- redirects.
- frame_id: (str) unique frame identifier.
- loader_id: (str) unique frame identifier.
- document_url: (str) URL of the document this request is loaded for.
- url: (str) Request URL.
- protocol: (str) protocol used for the request.
- method: (str) HTTP method, such as POST or GET.
- request_headers: (dict) {'header': 'value'} Request headers.
- response_headers: (dict) {'header': 'value'} Response headers.
- initial_priority: (str) Initial request priority, in REQUEST_PRIORITIES.
- timestamp: (float) Request timestamp, in s.
- wall_time: (float) Request timestamp, UTC timestamp in s.
- initiator: (dict) Request initiator, in INITIATORS.
- resource_type: (str) Resource type, in RESOURCE_TYPES
- served_from_cache: (bool) Whether the request was served from cache.
- from_disk_cache: (bool) Whether the request was served from the disk cache.
- from_service_worker: (bool) Whether the request was served by a Service
- Worker.
- timing: (Timing) Request timing, extended with loading_finished.
- status: (int) Response status code.
- status_text: (str) Response status text received in the status line.
- encoded_data_length: (int) Total encoded data length.
- data_chunks: (list) [(offset, encoded_data_length), ...] List of data
- chunks received, with their offset in ms relative to
- Timing.requestTime.
- failed: (bool) Whether the request failed.
- error_text: (str) User friendly error message when request failed.
- start_msec: (float) Request start time, in milliseconds from chrome start.
- end_msec: (float) Request end time, in milliseconds from chrome start.
- start_msec.
- """
- REQUEST_PRIORITIES = ('VeryLow', 'Low', 'Medium', 'High', 'VeryHigh')
- RESOURCE_TYPES = ('Document', 'Stylesheet', 'Image', 'Media', 'Font',
- 'Script', 'TextTrack', 'XHR', 'Fetch', 'EventSource',
- 'WebSocket', 'Manifest', 'Other')
- INITIATORS = ('parser', 'script', 'other', 'redirect')
- INITIATING_REQUEST = 'initiating_request'
- ORIGINAL_INITIATOR = 'original_initiator'
- def __init__(self):
- self.request_id = None
- self.frame_id = None
- self.loader_id = None
- self.document_url = None
- self.url = None
- self.protocol = None
- self.method = None
- self.mime_type = None
- self.request_headers = None
- self.response_headers = None
- self.initial_priority = None
- self.timestamp = -1
- self.wall_time = -1
- self.initiator = None
- self.resource_type = None
- self.served_from_cache = False
- self.from_disk_cache = False
- self.from_service_worker = False
- self.timing = None
- self.status = None
- self.status_text = None
- self.response_headers_length = 0
- self.encoded_data_length = 0
- self.data_chunks = []
- self.failed = False
- self.error_text = None
-
- @property
- def start_msec(self):
- return self.timing.request_time * 1000
-
- @property
- def end_msec(self):
- if self.start_msec is None:
- return None
- return self.start_msec + self.timing.LargestOffset()
-
- @property
- def fingerprint(self):
- h = hashlib.sha256()
- h.update(self.url)
- return h.hexdigest()[:10]
-
- def _TimestampOffsetFromStartMs(self, timestamp):
- assert self.timing.request_time != -1
- request_time = self.timing.request_time
- return (timestamp - request_time) * 1000
-
- def ToJsonDict(self):
- result = copy.deepcopy(self.__dict__)
- result['timing'] = self.timing.ToJsonDict() if self.timing else {}
- return result
-
- @classmethod
- def FromJsonDict(cls, data_dict):
- result = Request()
- for (k, v) in data_dict.items():
- setattr(result, k, v)
- if not result.response_headers:
- result.response_headers = {}
- if result.timing:
- result.timing = Timing.FromJsonDict(result.timing)
- else:
- result.timing = Timing(request_time=result.timestamp)
- return result
-
- def GetResponseTransportLength(self):
- """Get the total amount of encoded data no matter whether load has finished
- or not.
- """
- assert self.HasReceivedResponse()
- assert not self.from_disk_cache and not self.served_from_cache
- assert self.protocol not in {'about', 'blob', 'data'}
- if self.timing.loading_finished != Timing.UNVAILABLE:
- encoded_data_length = self.encoded_data_length
- else:
- encoded_data_length = sum(
- [chunk_size for _, chunk_size in self.data_chunks])
- assert encoded_data_length > 0 or len(self.data_chunks) == 0
- return encoded_data_length + self.response_headers_length
-
- def GetHTTPResponseHeader(self, header_name):
- """Gets the value of a HTTP response header.
-
- Does a case-insensitive search for the header name in the HTTP response
- headers, in order to support servers that use a wrong capitalization.
- """
- lower_case_name = header_name.lower()
- result = None
- for name, value in self.response_headers.iteritems():
- if name.lower() == lower_case_name:
- result = value
- break
- return result
-
- def SetHTTPResponseHeader(self, header, header_value):
- """Sets the value of a HTTP response header."""
- assert header.islower()
- for name in self.response_headers.keys():
- if name.lower() == header:
- del self.response_headers[name]
- self.response_headers[header] = header_value
-
- def GetResponseHeaderValue(self, header, value):
- """Returns a copy of |value| iff response |header| contains it."""
- header_values = self.GetHTTPResponseHeader(header)
- if not header_values:
- return None
- values = header_values.split(',')
- for header_value in values:
- if header_value.lower() == value.lower():
- return header_value
- return None
-
- def HasResponseHeaderValue(self, header, value):
- """Returns True iff the response headers |header| contains |value|."""
- return self.GetResponseHeaderValue(header, value) is not None
-
- def GetContentType(self):
- """Returns the content type, or None."""
- # Check for redirects. Use the "Location" header, because the HTTP status is
- # not reliable.
- if self.GetHTTPResponseHeader('Location') is not None:
- return 'redirect'
-
- # Check if the response is empty.
- if (self.GetHTTPResponseHeader('Content-Length') == '0' or
- self.status == 204):
- return 'ping'
-
- if self.mime_type:
- return self.mime_type
-
- content_type = self.GetHTTPResponseHeader('Content-Type')
- if not content_type or ';' not in content_type:
- return content_type
- else:
- return content_type[:content_type.index(';')]
-
- def IsDataRequest(self):
- return self.protocol == 'data'
-
- def HasReceivedResponse(self):
- return self.status is not None
-
- def GetCacheControlDirective(self, directive_name):
- """Returns the value of a Cache-Control directive, or None."""
- cache_control_str = self.GetHTTPResponseHeader('Cache-Control')
- if cache_control_str is None:
- return None
- directives = [s.strip() for s in cache_control_str.split(',')]
- for directive in directives:
- parts = directive.split('=')
- if len(parts) != 2:
- continue
- (name, value) = parts
- if name == directive_name:
- return value
- return None
-
- def MaxAge(self):
- """Returns the max-age of a resource, or -1."""
- # TODO(lizeb): Handle the "Expires" header as well.
- cache_control = {}
- if not self.response_headers:
- return -1
-
- cache_control_str = self.GetHTTPResponseHeader('Cache-Control')
- if cache_control_str is not None:
- directives = [s.strip() for s in cache_control_str.split(',')]
- for directive in directives:
- parts = [s.strip() for s in directive.split('=')]
- if len(parts) == 1:
- cache_control[parts[0]] = True
- else:
- cache_control[parts[0]] = parts[1]
- if (u'no-store' in cache_control
- or u'no-cache' in cache_control
- or len(cache_control) == 0):
- return -1
- max_age = self.GetCacheControlDirective('max-age')
- if max_age:
- return int(max_age)
- return -1
-
- def Cost(self):
- """Returns the cost of this request in ms, defined as time between
- request_time and the latest timing event.
- """
- # All fields in timing are millis relative to request_time.
- return self.timing.LargestOffset()
-
- def GetRawResponseHeaders(self):
- """Gets the request's raw response headers compatible with
- net::HttpResponseHeaders's constructor.
- """
- assert not self.IsDataRequest()
- assert self.HasReceivedResponse()
- headers = bytes('{} {} {}\x00'.format(
- self.protocol.upper(), self.status, self.status_text))
- for key in sorted(self.response_headers.keys()):
- headers += (bytes(key.encode('latin-1')) + b': ' +
- bytes(self.response_headers[key].encode('latin-1')) + b'\x00')
- return headers
-
- def __eq__(self, o):
- return self.__dict__ == o.__dict__
-
- def __hash__(self):
- return hash(self.request_id)
-
- def __str__(self):
- return json.dumps(self.ToJsonDict(), sort_keys=True, indent=2)
-
-
-def _ParseStringToInt(string):
- """Parses a string to an integer like base::StringToInt64().
-
- Returns:
- Parsed integer.
- """
- string = string.strip()
- while string:
- try:
- parsed_integer = int(string)
- if parsed_integer > sys.maxint:
- return sys.maxint
- if parsed_integer < -sys.maxint - 1:
- return -sys.maxint - 1
- return parsed_integer
- except ValueError:
- string = string[:-1]
- return 0
-
-
-class CachingPolicy(object):
- """Represents the caching policy at an arbitrary time for a cached response.
- """
- FETCH = 'FETCH'
- VALIDATION_NONE = 'VALIDATION_NONE'
- VALIDATION_SYNC = 'VALIDATION_SYNC'
- VALIDATION_ASYNC = 'VALIDATION_ASYNC'
- POLICIES = (FETCH, VALIDATION_NONE, VALIDATION_SYNC, VALIDATION_ASYNC)
- def __init__(self, request):
- """Constructor.
-
- Args:
- request: (Request)
- """
- assert request.response_headers is not None
- self.request = request
- # This is incorrect, as the timestamp corresponds to when devtools is made
- # aware of the request, not when it was sent. However, this is good enough
- # for computing cache expiration, which doesn't need sub-second precision.
- self._request_time = self.request.wall_time
- # Used when the date is not available.
- self._response_time = (
- self._request_time + self.request.timing.receive_headers_end)
-
- def HasValidators(self):
- """Returns wether the request has a validator."""
- # Assuming HTTP 1.1+.
- return (self.request.GetHTTPResponseHeader('Last-Modified')
- or self.request.GetHTTPResponseHeader('Etag'))
-
- def IsCacheable(self):
- """Returns whether the request could be stored in the cache."""
- return not self.request.HasResponseHeaderValue('Cache-Control', 'no-store')
-
- def PolicyAtDate(self, timestamp):
- """Returns the caching policy at an aribitrary timestamp.
-
- Args:
- timestamp: (float) Seconds since Epoch.
-
- Returns:
- A policy in POLICIES.
- """
- # Note: the implementation is largely transcribed from
- # net/http/http_response_headers.cc, itself following RFC 2616.
- if not self.IsCacheable():
- return self.FETCH
- freshness = self.GetFreshnessLifetimes()
- if freshness[0] == 0 and freshness[1] == 0:
- return self.VALIDATION_SYNC
- age = self._GetCurrentAge(timestamp)
- if freshness[0] > age:
- return self.VALIDATION_NONE
- if (freshness[0] + freshness[1]) > age:
- return self.VALIDATION_ASYNC
- return self.VALIDATION_SYNC
-
- def GetFreshnessLifetimes(self):
- """Returns [freshness, stale-while-revalidate freshness] in seconds."""
- # This is adapted from GetFreshnessLifetimes() in
- # //net/http/http_response_headers.cc (which follows the RFC).
- r = self.request
- result = [0, 0]
- if (r.HasResponseHeaderValue('Cache-Control', 'no-cache')
- or r.HasResponseHeaderValue('Cache-Control', 'no-store')
- or r.HasResponseHeaderValue('Vary', '*')): # RFC 2616, 13.6.
- return result
- must_revalidate = r.HasResponseHeaderValue(
- 'Cache-Control', 'must-revalidate')
- swr_header = r.GetCacheControlDirective('stale-while-revalidate')
- if not must_revalidate and swr_header:
- result[1] = _ParseStringToInt(swr_header)
-
- max_age_header = r.GetCacheControlDirective('max-age')
- if max_age_header:
- result[0] = _ParseStringToInt(max_age_header)
- return result
-
- date = self._GetDateValue('Date') or self._response_time
- expires = self._GetDateValue('Expires')
- if expires:
- result[0] = expires - date
- return result
-
- if self.request.status in (200, 203, 206) and not must_revalidate:
- last_modified = self._GetDateValue('Last-Modified')
- if last_modified and last_modified < date:
- result[0] = (date - last_modified) / 10
- return result
-
- if self.request.status in (300, 301, 308, 410):
- return [2**48, 0] # ~forever.
- # No header -> not fresh.
- return result
-
- def _GetDateValue(self, name):
- date_str = self.request.GetHTTPResponseHeader(name)
- if not date_str:
- return None
- parsed_date = email.utils.parsedate_tz(date_str)
- if parsed_date is None:
- return None
- return email.utils.mktime_tz(parsed_date)
-
- def _GetCurrentAge(self, current_time):
- # See GetCurrentAge() in //net/http/http_response_headers.cc.
- r = self.request
- date_value = self._GetDateValue('Date') or self._response_time
- age_value = int(r.GetHTTPResponseHeader('Age') or '0')
-
- apparent_age = max(0, self._response_time - date_value)
- corrected_received_age = max(apparent_age, age_value)
- response_delay = self._response_time - self._request_time
- corrected_initial_age = corrected_received_age + response_delay
- resident_time = current_time - self._response_time
- current_age = corrected_initial_age + resident_time
-
- return current_age
-
-
-class RequestTrack(devtools_monitor.Track):
- """Aggregates request data."""
- _REDIRECT_SUFFIX = '.redirect'
- # Request status
- _STATUS_SENT = 0
- _STATUS_RESPONSE = 1
- _STATUS_DATA = 2
- _STATUS_FINISHED = 3
- _STATUS_FAILED = 4
- # Serialization KEYS
- _EVENTS_KEY = 'events'
- _METADATA_KEY = 'metadata'
- _DUPLICATES_KEY = 'duplicates_count'
- _INCONSISTENT_INITIATORS_KEY = 'inconsistent_initiators'
- def __init__(self, connection):
- super(RequestTrack, self).__init__(connection)
- self._connection = connection
- self._requests = []
- self._requests_in_flight = {} # requestId -> (request, status)
- self._completed_requests_by_id = {}
- self._redirects_count_by_id = collections.defaultdict(int)
- self._indexed = False
- self._request_start_timestamps = None
- self._request_end_timestamps = None
- self._requests_by_start = None
- self._requests_by_end = None
- if connection: # Optional for testing.
- for method in RequestTrack._METHOD_TO_HANDLER:
- self._connection.RegisterListener(method, self)
- # Enable asynchronous callstacks to get full javascript callstacks in
- # initiators
- self._connection.SetScopedState('Debugger.setAsyncCallStackDepth',
- {'maxDepth': 4}, {'maxDepth': 0}, True)
- # responseReceived message are sometimes duplicated. Records the message to
- # detect this.
- self._request_id_to_response_received = {}
- self.duplicates_count = 0
- self.inconsistent_initiators_count = 0
-
- def Handle(self, method, msg):
- assert method in RequestTrack._METHOD_TO_HANDLER
- self._indexed = False
- params = msg['params']
- request_id = params['requestId']
- RequestTrack._METHOD_TO_HANDLER[method](self, request_id, params)
-
- def GetEvents(self):
- if self._requests_in_flight:
- logging.warning('Number of requests still in flight: %d.'
- % len(self._requests_in_flight))
- return self._requests
-
- def GetFirstResourceRequest(self):
- return self.GetEvents()[0]
-
- def GetFirstRequestMillis(self):
- """Find the canonical start time for this track.
-
- Returns:
- The millisecond timestamp of the first request.
- """
- assert self._requests, "No requests to analyze."
- self._IndexRequests()
- return self._request_start_timestamps[0]
-
- def GetLastRequestMillis(self):
- """Find the canonical start time for this track.
-
- Returns:
- The millisecond timestamp of the first request.
- """
- assert self._requests, "No requests to analyze."
- self._IndexRequests()
- return self._request_end_timestamps[-1]
-
- def GetEventsStartingBetween(self, start_ms, end_ms):
- """Return events that started in a range.
-
- Args:
- start_ms: the start time to query, in milliseconds from the first request.
- end_ms: the end time to query, in milliseconds from the first request.
-
- Returns:
- A list of requests whose start time is in [start_ms, end_ms].
- """
- self._IndexRequests()
- low = bisect.bisect_left(self._request_start_timestamps, start_ms)
- high = bisect.bisect_right(self._request_start_timestamps, end_ms)
- return self._requests_by_start[low:high]
-
- def GetEventsEndingBetween(self, start_ms, end_ms):
- """Return events that ended in a range.
-
- Args:
- start_ms: the start time to query, in milliseconds from the first request.
- end_ms: the end time to query, in milliseconds from the first request.
-
- Returns:
- A list of requests whose end time is in [start_ms, end_ms].
- """
- self._IndexRequests()
- low = bisect.bisect_left(self._request_end_timestamps, start_ms)
- high = bisect.bisect_right(self._request_end_timestamps, end_ms)
- return self._requests_by_end[low:high]
-
- def ToJsonDict(self):
- if self._requests_in_flight:
- logging.warning('Requests in flight, will be ignored in the dump')
- return {self._EVENTS_KEY: [
- request.ToJsonDict() for request in self._requests],
- self._METADATA_KEY: {
- self._DUPLICATES_KEY: self.duplicates_count,
- self._INCONSISTENT_INITIATORS_KEY:
- self.inconsistent_initiators_count}}
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- assert cls._EVENTS_KEY in json_dict
- assert cls._METADATA_KEY in json_dict
- result = RequestTrack(None)
- requests = [Request.FromJsonDict(request)
- for request in json_dict[cls._EVENTS_KEY]]
- result._requests = requests
- metadata = json_dict[cls._METADATA_KEY]
- result.duplicates_count = metadata.get(cls._DUPLICATES_KEY, 0)
- result.inconsistent_initiators_count = metadata.get(
- cls._INCONSISTENT_INITIATORS_KEY, 0)
- return result
-
- def _IndexRequests(self):
- # TODO(mattcary): if we ever have requests without timing then we either
- # need a default, or to make an index that only includes requests with
- # timings.
- if self._indexed:
- return
- valid_requests = [r for r in self._requests
- if r.start_msec is not None]
- self._requests_by_start = sorted(valid_requests,
- key=lambda r: r.start_msec)
- self._request_start_timestamps = [r.start_msec
- for r in self._requests_by_start]
- self._requests_by_end = sorted(valid_requests,
- key=lambda r: r.end_msec)
- self._request_end_timestamps = [r.end_msec
- for r in self._requests_by_end]
- self._indexed = True
-
- def _RequestWillBeSent(self, request_id, params):
- # Several "requestWillBeSent" events can be dispatched in a row in the case
- # of redirects.
- redirect_initiator = None
- if request_id in self._completed_requests_by_id:
- assert request_id not in self._requests_in_flight
- return
- if request_id in self._requests_in_flight:
- redirect_initiator = self._HandleRedirect(request_id, params)
- assert (request_id not in self._requests_in_flight)
- r = Request()
- r.request_id = request_id
- _CopyFromDictToObject(
- params, r, (('frameId', 'frame_id'), ('loaderId', 'loader_id'),
- ('documentURL', 'document_url'),
- ('timestamp', 'timestamp'), ('wallTime', 'wall_time'),
- ('initiator', 'initiator')))
- request = params['request']
- _CopyFromDictToObject(
- request, r, (('url', 'url'), ('method', 'method'),
- ('headers', 'headers'),
- ('initialPriority', 'initial_priority')))
- r.resource_type = params.get('type', 'Other')
- if redirect_initiator:
- original_initiator = r.initiator
- r.initiator = redirect_initiator
- r.initiator[Request.ORIGINAL_INITIATOR] = original_initiator
- initiating_request = self._completed_requests_by_id[
- redirect_initiator[Request.INITIATING_REQUEST]]
- initiating_initiator = initiating_request.initiator.get(
- Request.ORIGINAL_INITIATOR, initiating_request.initiator)
- if initiating_initiator != original_initiator:
- self.inconsistent_initiators_count += 1
- self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_SENT)
-
- def _HandleRedirect(self, request_id, params):
- (r, status) = self._requests_in_flight[request_id]
- assert status == RequestTrack._STATUS_SENT
- # The second request contains timing information pertaining to the first
- # one. Finalize the first request.
- assert 'redirectResponse' in params
- redirect_response = params['redirectResponse']
-
- _CopyFromDictToObject(redirect_response, r,
- (('headers', 'response_headers'),
- ('encodedDataLength', 'response_headers_length'),
- ('fromDiskCache', 'from_disk_cache'),
- ('protocol', 'protocol'), ('status', 'status'),
- ('statusText', 'status_text')))
- r.timing = Timing.FromDevToolsDict(redirect_response['timing'])
-
- redirect_index = self._redirects_count_by_id[request_id]
- self._redirects_count_by_id[request_id] += 1
- r.request_id = '%s%s.%d' % (request_id, self._REDIRECT_SUFFIX,
- redirect_index + 1)
- initiator = {
- 'type': 'redirect', Request.INITIATING_REQUEST: r.request_id}
- self._requests_in_flight[r.request_id] = (r, RequestTrack._STATUS_FINISHED)
- del self._requests_in_flight[request_id]
- self._FinalizeRequest(r.request_id)
- return initiator
-
- def _RequestServedFromCache(self, request_id, _):
- if request_id not in self._requests_in_flight:
- return
- (request, status) = self._requests_in_flight[request_id]
- assert status == RequestTrack._STATUS_SENT
- request.served_from_cache = True
-
- def _ResponseReceived(self, request_id, params):
- if request_id in self._completed_requests_by_id:
- assert request_id not in self._requests_in_flight
- return
- assert request_id in self._requests_in_flight
- (r, status) = self._requests_in_flight[request_id]
- if status == RequestTrack._STATUS_RESPONSE:
- # Duplicated messages (apart from the timestamp) are OK.
- old_params = self._request_id_to_response_received[request_id]
- params_copy = copy.deepcopy(params)
- params_copy['timestamp'] = None
- old_params['timestamp'] = None
- assert params_copy == old_params
- self.duplicates_count += 1
- return
- assert status == RequestTrack._STATUS_SENT
- assert (r.frame_id == params['frameId'] or
- params['response']['protocol'] == 'data')
- assert r.timestamp <= params['timestamp']
- if r.resource_type == 'Other':
- r.resource_type = params.get('type', 'Other')
- else:
- assert r.resource_type == params.get('type', 'Other')
- response = params['response']
- _CopyFromDictToObject(
- response, r, (('status', 'status'), ('mimeType', 'mime_type'),
- ('fromDiskCache', 'from_disk_cache'),
- ('fromServiceWorker', 'from_service_worker'),
- ('protocol', 'protocol'), ('statusText', 'status_text'),
- # Actual request headers are not known before reaching the
- # network stack.
- ('requestHeaders', 'request_headers'),
- ('encodedDataLength', 'response_headers_length'),
- ('headers', 'response_headers')))
- timing_dict = {}
- # Some URLs don't have a timing dict (e.g. data URLs), and timings for
- # cached requests are stale.
- # TODO(droger): the timestamp is inacurate, get the real timings instead.
- if not response.get('timing') or r.served_from_cache:
- timing_dict = {'requestTime': r.timestamp}
- else:
- timing_dict = response['timing']
- r.timing = Timing.FromDevToolsDict(timing_dict)
- self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_RESPONSE)
- self._request_id_to_response_received[request_id] = params
-
- def _DataReceived(self, request_id, params):
- if request_id not in self._requests_in_flight:
- return
- (r, status) = self._requests_in_flight[request_id]
- assert (status == RequestTrack._STATUS_RESPONSE
- or status == RequestTrack._STATUS_DATA)
- offset = r._TimestampOffsetFromStartMs(params['timestamp'])
- r.data_chunks.append((offset, params['encodedDataLength']))
- self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_DATA)
-
- def _LoadingFinished(self, request_id, params):
- if request_id not in self._requests_in_flight:
- return
- (r, status) = self._requests_in_flight[request_id]
- assert (status == RequestTrack._STATUS_RESPONSE
- or status == RequestTrack._STATUS_DATA)
- r.encoded_data_length = params['encodedDataLength']
- r.timing.loading_finished = r._TimestampOffsetFromStartMs(
- params['timestamp'])
- self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_FINISHED)
- self._FinalizeRequest(request_id)
-
- def _LoadingFailed(self, request_id, params):
- if request_id not in self._requests_in_flight:
- logging.warning('An unknown request failed: %s' % request_id)
- return
- (r, _) = self._requests_in_flight[request_id]
- r.failed = True
- r.error_text = params['errorText']
- self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_FINISHED)
- self._FinalizeRequest(request_id)
-
- def _FinalizeRequest(self, request_id):
- (request, status) = self._requests_in_flight[request_id]
- assert status == RequestTrack._STATUS_FINISHED
- del self._requests_in_flight[request_id]
- self._completed_requests_by_id[request_id] = request
- self._requests.append(request)
-
- def __eq__(self, o):
- return self._requests == o._requests
-
-
-RequestTrack._METHOD_TO_HANDLER = {
- 'Network.requestWillBeSent': RequestTrack._RequestWillBeSent,
- 'Network.requestServedFromCache': RequestTrack._RequestServedFromCache,
- 'Network.responseReceived': RequestTrack._ResponseReceived,
- 'Network.dataReceived': RequestTrack._DataReceived,
- 'Network.loadingFinished': RequestTrack._LoadingFinished,
- 'Network.loadingFailed': RequestTrack._LoadingFailed}
-
-
-def _CopyFromDictToObject(d, o, key_attrs):
- for (key, attr) in key_attrs:
- if key in d:
- setattr(o, attr, d[key])
-
-
-if __name__ == '__main__':
- import json
- import sys
- events = json.load(open(sys.argv[1], 'r'))
- request_track = RequestTrack(None)
- for event in events:
- event_method = event['method']
- request_track.Handle(event_method, event)
diff --git a/loading/request_track_unittest.py b/loading/request_track_unittest.py
deleted file mode 100644
index 3f0baea..0000000
--- a/loading/request_track_unittest.py
+++ /dev/null
@@ -1,636 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import copy
-import json
-import sys
-import unittest
-
-from request_track import (TimeBetween, Request, CachingPolicy, RequestTrack,
- Timing, _ParseStringToInt)
-
-
-class TimeBetweenTestCase(unittest.TestCase):
- _REQUEST = Request.FromJsonDict({'url': 'http://bla.com',
- 'request_id': '1234.1',
- 'frame_id': '123.1',
- 'initiator': {'type': 'other'},
- 'timestamp': 2,
- 'timing': {}})
- def setUp(self):
- super(TimeBetweenTestCase, self).setUp()
- self.first = copy.deepcopy(self._REQUEST)
- self.first.timing = Timing.FromDevToolsDict({'requestTime': 123456,
- 'receiveHeadersEnd': 100,
- 'loadingFinished': 500})
- self.second = copy.deepcopy(self._REQUEST)
- self.second.timing = Timing.FromDevToolsDict({'requestTime': 123456 + 1,
- 'receiveHeadersEnd': 200,
- 'loadingFinished': 600})
-
- def testTimeBetweenParser(self):
- self.assertEquals(900, TimeBetween(self.first, self.second, 'parser'))
-
- def testTimeBetweenScript(self):
- self.assertEquals(500, TimeBetween(self.first, self.second, 'script'))
-
-
-class RequestTestCase(unittest.TestCase):
- def testContentType(self):
- r = Request()
- r.response_headers = {}
- self.assertEquals(None, r.GetContentType())
- r.response_headers = {'Content-Type': 'application/javascript'}
- self.assertEquals('application/javascript', r.GetContentType())
- # Case-insensitive match.
- r.response_headers = {'content-type': 'application/javascript'}
- self.assertEquals('application/javascript', r.GetContentType())
- # Parameters are filtered out.
- r.response_headers = {'Content-Type': 'application/javascript;bla'}
- self.assertEquals('application/javascript', r.GetContentType())
- # MIME type takes precedence over 'Content-Type' header.
- r.mime_type = 'image/webp'
- self.assertEquals('image/webp', r.GetContentType())
- r.mime_type = None
- # Test for 'ping' type.
- r.status = 204
- self.assertEquals('ping', r.GetContentType())
- r.status = None
- r.response_headers = {'Content-Type': 'application/javascript',
- 'content-length': '0'}
- self.assertEquals('ping', r.GetContentType())
- # Test for 'redirect' type.
- r.response_headers = {'Content-Type': 'application/javascript',
- 'location': 'http://foo',
- 'content-length': '0'}
- self.assertEquals('redirect', r.GetContentType())
-
- def testGetHTTPResponseHeader(self):
- r = Request()
- r.response_headers = {}
- self.assertEquals(None, r.GetHTTPResponseHeader('Foo'))
- r.response_headers = {'Foo': 'Bar', 'Baz': 'Foo'}
- self.assertEquals('Bar', r.GetHTTPResponseHeader('Foo'))
- r.response_headers = {'foo': 'Bar', 'Baz': 'Foo'}
- self.assertEquals('Bar', r.GetHTTPResponseHeader('Foo'))
-
- def testGetRawResponseHeaders(self):
- r = Request()
- r.protocol = 'http/1.1'
- r.status = 200
- r.status_text = 'Hello world'
- r.response_headers = {'Foo': 'Bar', 'Baz': 'Foo'}
- self.assertEquals('HTTP/1.1 200 Hello world\x00Baz: Foo\x00Foo: Bar\x00',
- r.GetRawResponseHeaders())
-
-
-class ParseStringToIntTestCase(unittest.TestCase):
- def runTest(self):
- MININT = -sys.maxint - 1
- # Same test cases as in string_number_conversions_unittest.cc
- CASES = [
- ("0", 0),
- ("42", 42),
- ("-2147483648", -2147483648),
- ("2147483647", 2147483647),
- ("-2147483649", -2147483649),
- ("-99999999999", -99999999999),
- ("2147483648", 2147483648),
- ("99999999999", 99999999999),
- ("9223372036854775807", sys.maxint),
- ("-9223372036854775808", MININT),
- ("09", 9),
- ("-09", -9),
- ("", 0),
- (" 42", 42),
- ("42 ", 42),
- ("0x42", 0),
- ("\t\n\v\f\r 42", 42),
- ("blah42", 0),
- ("42blah", 42),
- ("blah42blah", 0),
- ("-273.15", -273),
- ("+98.6", 98),
- ("--123", 0),
- ("++123", 0),
- ("-+123", 0),
- ("+-123", 0),
- ("-", 0),
- ("-9223372036854775809", MININT),
- ("-99999999999999999999", MININT),
- ("9223372036854775808", sys.maxint),
- ("99999999999999999999", sys.maxint)]
- for string, expected_int in CASES:
- parsed_int = _ParseStringToInt(string)
- self.assertEquals(expected_int, parsed_int)
-
-
-class CachingPolicyTestCase(unittest.TestCase):
- _REQUEST = {
- 'encoded_data_length': 14726,
- 'request_id': '2291.1',
- 'response_headers': {
- 'Age': '866',
- 'Content-Length': '14187',
- 'Date': 'Fri, 22 Apr 2016 08:56:19 -0200',
- 'Vary': 'Accept-Encoding',
- },
- 'timestamp': 5535648.730768,
- 'timing': {
- 'connect_end': 34.0510001406074,
- 'connect_start': 21.6859998181462,
- 'dns_end': 21.6859998181462,
- 'dns_start': 0,
- 'loading_finished': 58.76399949193001,
- 'receive_headers_end': 47.0650000497699,
- 'request_time': 5535648.73264,
- 'send_end': 34.6099995076656,
- 'send_start': 34.2979999259114
- },
- 'url': 'http://www.example.com/',
- 'status': 200,
- 'wall_time': 1461322579.59422}
-
- def testHasValidators(self):
- r = self._MakeRequest()
- self.assertFalse(CachingPolicy(r).HasValidators())
- r.response_headers['Last-Modified'] = 'Yesterday all my troubles'
- self.assertTrue(CachingPolicy(r).HasValidators())
- r = self._MakeRequest()
- r.response_headers['ETAG'] = 'ABC'
- self.assertTrue(CachingPolicy(r).HasValidators())
-
- def testIsCacheable(self):
- r = self._MakeRequest()
- self.assertTrue(CachingPolicy(r).IsCacheable())
- r.response_headers['Cache-Control'] = 'Whatever,no-store'
- self.assertFalse(CachingPolicy(r).IsCacheable())
-
- def testPolicyNoStore(self):
- r = self._MakeRequest()
- r.response_headers['Cache-Control'] = 'Whatever,no-store'
- self.assertEqual(CachingPolicy.FETCH, CachingPolicy(r).PolicyAtDate(0))
-
- def testPolicyMaxAge(self):
- r = self._MakeRequest()
- r.response_headers['Cache-Control'] = 'whatever,max-age= 1000,whatever'
- self.assertEqual(
- CachingPolicy.VALIDATION_NONE,
- CachingPolicy(r).PolicyAtDate(r.wall_time))
- self.assertEqual(
- CachingPolicy.VALIDATION_SYNC,
- CachingPolicy(r).PolicyAtDate(r.wall_time + 10000))
- # Take current age into account.
- self.assertEqual(
- CachingPolicy.VALIDATION_SYNC,
- CachingPolicy(r).PolicyAtDate(r.wall_time + 500))
- # Max-Age before Expires.
- r.response_headers['Expires'] = 'Thu, 21 Apr 2016 00:00:00 -0200'
- self.assertEqual(
- CachingPolicy.VALIDATION_NONE,
- CachingPolicy(r).PolicyAtDate(r.wall_time))
- # Max-Age < age
- r.response_headers['Cache-Control'] = 'whatever,max-age=100crap,whatever'
- self.assertEqual(
- CachingPolicy.VALIDATION_SYNC,
- CachingPolicy(r).PolicyAtDate(r.wall_time + 2))
-
- def testPolicyExpires(self):
- r = self._MakeRequest()
- # Already expired
- r.response_headers['Expires'] = 'Thu, 21 Apr 2016 00:00:00 -0200'
- self.assertEqual(
- CachingPolicy.VALIDATION_SYNC,
- CachingPolicy(r).PolicyAtDate(r.wall_time))
- r.response_headers['Expires'] = 'Thu, 25 Apr 2016 00:00:00 -0200'
- self.assertEqual(
- CachingPolicy.VALIDATION_NONE,\
- CachingPolicy(r).PolicyAtDate(r.wall_time))
- self.assertEqual(
- CachingPolicy.VALIDATION_NONE,
- CachingPolicy(r).PolicyAtDate(r.wall_time + 86400))
- self.assertEqual(CachingPolicy.VALIDATION_SYNC,
- CachingPolicy(r).PolicyAtDate(r.wall_time + 86400 * 5))
-
- def testStaleWhileRevalidate(self):
- r = self._MakeRequest()
- r.response_headers['Cache-Control'] = (
- 'whatever,max-age=1000,stale-while-revalidate=2000')
- self.assertEqual(
- CachingPolicy.VALIDATION_ASYNC,
- CachingPolicy(r).PolicyAtDate(r.wall_time + 200))
- self.assertEqual(
- CachingPolicy.VALIDATION_ASYNC,
- CachingPolicy(r).PolicyAtDate(r.wall_time + 2000))
- self.assertEqual(
- CachingPolicy.VALIDATION_SYNC,
- CachingPolicy(r).PolicyAtDate(r.wall_time + 3100))
- # must-revalidate overrides stale-while-revalidate.
- r.response_headers['Cache-Control'] += ',must-revalidate'
- self.assertEqual(
- CachingPolicy.VALIDATION_SYNC,
- CachingPolicy(r).PolicyAtDate(r.wall_time + 200))
-
- def test301NeverExpires(self):
- r = self._MakeRequest()
- r.status = 301
- self.assertEqual(
- CachingPolicy.VALIDATION_NONE,
- CachingPolicy(r).PolicyAtDate(r.wall_time + 2000))
-
- def testLastModifiedHeuristic(self):
- r = self._MakeRequest()
- # 8 hours ago.
- r.response_headers['Last-Modified'] = 'Fri, 22 Apr 2016 00:56:19 -0200'
- del r.response_headers['Age']
- self.assertEqual(
- CachingPolicy.VALIDATION_NONE,
- CachingPolicy(r).PolicyAtDate(r.wall_time + 60))
- self.assertEqual(
- CachingPolicy.VALIDATION_SYNC,
- CachingPolicy(r).PolicyAtDate(r.wall_time + 3600))
-
- @classmethod
- def _MakeRequest(cls):
- return Request.FromJsonDict(copy.deepcopy(cls._REQUEST))
-
-
-class RequestTrackTestCase(unittest.TestCase):
- _REQUEST_WILL_BE_SENT = {
- 'method': 'Network.requestWillBeSent',
- 'params': {
- 'documentURL': 'http://example.com/',
- 'frameId': '32493.1',
- 'initiator': {
- 'type': 'other'
- },
- 'loaderId': '32493.3',
- 'request': {
- 'headers': {
- 'Accept': 'text/html',
- 'Upgrade-Insecure-Requests': '1',
- 'User-Agent': 'Mozilla/5.0'
- },
- 'initialPriority': 'VeryHigh',
- 'method': 'GET',
- 'mixedContentType': 'none',
- 'url': 'http://example.com/'
- },
- 'requestId': '32493.1',
- 'timestamp': 5571441.535053,
- 'type': 'Document',
- 'wallTime': 1452691674.08878}}
- _REDIRECT = {
- 'method': 'Network.requestWillBeSent',
- 'params': {
- 'documentURL': 'http://www.example.com/',
- 'frameId': '32493.1',
- 'initiator': {
- 'type': 'other'
- },
- 'loaderId': '32493.3',
- 'redirectResponse': {
- 'connectionId': 18,
- 'connectionReused': False,
- 'encodedDataLength': 198,
- 'fromDiskCache': False,
- 'fromServiceWorker': False,
- 'headers': {},
- 'headersText': 'HTTP/1.1 301 Moved Permanently\r\n',
- 'mimeType': 'text/html',
- 'protocol': 'http/1.1',
- 'remoteIPAddress': '216.146.46.10',
- 'remotePort': 80,
- 'requestHeaders': {
- 'Accept': 'text/html',
- 'User-Agent': 'Mozilla/5.0'
- },
- 'securityState': 'neutral',
- 'status': 301,
- 'statusText': 'Moved Permanently',
- 'timing': {
- 'connectEnd': 137.435999698937,
- 'connectStart': 51.1459996923804,
- 'dnsEnd': 51.1459996923804,
- 'dnsStart': 0,
- 'proxyEnd': -1,
- 'proxyStart': -1,
- 'receiveHeadersEnd': 228.187000378966,
- 'requestTime': 5571441.55002,
- 'sendEnd': 138.841999694705,
- 'sendStart': 138.031999580562,
- 'sslEnd': -1,
- 'sslStart': -1,
- 'workerReady': -1,
- 'workerStart': -1
- },
- 'url': 'http://example.com/'
- },
- 'request': {
- 'headers': {
- 'Accept': 'text/html',
- 'User-Agent': 'Mozilla/5.0'
- },
- 'initialPriority': 'VeryLow',
- 'method': 'GET',
- 'mixedContentType': 'none',
- 'url': 'http://www.example.com/'
- },
- 'requestId': '32493.1',
- 'timestamp': 5571441.795948,
- 'type': 'Document',
- 'wallTime': 1452691674.34968}}
- _RESPONSE_RECEIVED = {
- 'method': 'Network.responseReceived',
- 'params': {
- 'frameId': '32493.1',
- 'loaderId': '32493.3',
- 'requestId': '32493.1',
- 'response': {
- 'connectionId': 26,
- 'connectionReused': False,
- 'encodedDataLength': -1,
- 'fromDiskCache': False,
- 'fromServiceWorker': False,
- 'headers': {
- 'Age': '67',
- 'Cache-Control': 'max-age=0,must-revalidate',
- },
- 'headersText': 'HTTP/1.1 200 OK\r\n',
- 'mimeType': 'text/html',
- 'protocol': 'http/1.1',
- 'requestHeaders': {
- 'Accept': 'text/html',
- 'Host': 'www.example.com',
- 'User-Agent': 'Mozilla/5.0'
- },
- 'status': 200,
- 'timing': {
- 'connectEnd': 37.9800004884601,
- 'connectStart': 26.8250005319715,
- 'dnsEnd': 26.8250005319715,
- 'dnsStart': 0,
- 'proxyEnd': -1,
- 'proxyStart': -1,
- 'receiveHeadersEnd': 54.9750002101064,
- 'requestTime': 5571441.798671,
- 'sendEnd': 38.3980004116893,
- 'sendStart': 38.1810003891587,
- 'sslEnd': -1,
- 'sslStart': -1,
- 'workerReady': -1,
- 'workerStart': -1
- },
- 'url': 'http://www.example.com/'
- },
- 'timestamp': 5571441.865639,
- 'type': 'Document'}}
- _DATA_RECEIVED_1 = {
- "method": "Network.dataReceived",
- "params": {
- "dataLength": 1803,
- "encodedDataLength": 1326,
- "requestId": "32493.1",
- "timestamp": 5571441.867347}}
- _DATA_RECEIVED_2 = {
- "method": "Network.dataReceived",
- "params": {
- "dataLength": 32768,
- "encodedDataLength": 32768,
- "requestId": "32493.1",
- "timestamp": 5571441.893121}}
- _SERVED_FROM_CACHE = {
- "method": "Network.requestServedFromCache",
- "params": {
- "requestId": "32493.1"}}
- _LOADING_FINISHED = {'method': 'Network.loadingFinished',
- 'params': {
- 'encodedDataLength': 101829,
- 'requestId': '32493.1',
- 'timestamp': 5571441.891189}}
- _LOADING_FAILED = {'method': 'Network.loadingFailed',
- 'params': {
- 'canceled': False,
- 'blockedReason': None,
- 'encodedDataLength': 101829,
- 'errorText': 'net::ERR_TOO_MANY_REDIRECTS',
- 'requestId': '32493.1',
- 'timestamp': 5571441.891189,
- 'type': 'Document'}}
-
- def setUp(self):
- self.request_track = RequestTrack(None)
-
- def testParseRequestWillBeSent(self):
- msg = RequestTrackTestCase._REQUEST_WILL_BE_SENT
- request_id = msg['params']['requestId']
- self.request_track.Handle('Network.requestWillBeSent', msg)
- self.assertTrue(request_id in self.request_track._requests_in_flight)
- (_, status) = self.request_track._requests_in_flight[request_id]
- self.assertEquals(RequestTrack._STATUS_SENT, status)
-
- def testRejectsUnknownMethod(self):
- with self.assertRaises(AssertionError):
- self.request_track.Handle(
- 'unknown', RequestTrackTestCase._REQUEST_WILL_BE_SENT)
-
- def testHandleRedirect(self):
- self.request_track.Handle('Network.requestWillBeSent',
- RequestTrackTestCase._REQUEST_WILL_BE_SENT)
- self.request_track.Handle('Network.requestWillBeSent',
- RequestTrackTestCase._REDIRECT)
- self.assertEquals(1, len(self.request_track._requests_in_flight))
- self.assertEquals(1, len(self.request_track.GetEvents()))
- redirect_request = self.request_track.GetEvents()[0]
- self.assertTrue(redirect_request.request_id.endswith(
- RequestTrack._REDIRECT_SUFFIX + '.1'))
- request = self.request_track._requests_in_flight.values()[0][0]
- self.assertEquals('redirect', request.initiator['type'])
- self.assertEquals(
- redirect_request.request_id,
- request.initiator[Request.INITIATING_REQUEST])
- self.assertEquals(0, self.request_track.inconsistent_initiators_count)
-
- def testMultipleRedirects(self):
- self.request_track.Handle('Network.requestWillBeSent',
- RequestTrackTestCase._REQUEST_WILL_BE_SENT)
- self.request_track.Handle('Network.requestWillBeSent',
- RequestTrackTestCase._REDIRECT)
- self.request_track.Handle('Network.requestWillBeSent',
- RequestTrackTestCase._REDIRECT)
- self.assertEquals(1, len(self.request_track._requests_in_flight))
- self.assertEquals(2, len(self.request_track.GetEvents()))
- first_redirect_request = self.request_track.GetEvents()[0]
- self.assertTrue(first_redirect_request.request_id.endswith(
- RequestTrack._REDIRECT_SUFFIX + '.1'))
- second_redirect_request = self.request_track.GetEvents()[1]
- self.assertTrue(second_redirect_request.request_id.endswith(
- RequestTrack._REDIRECT_SUFFIX + '.2'))
- self.assertEquals('redirect', second_redirect_request.initiator['type'])
- self.assertEquals(
- first_redirect_request.request_id,
- second_redirect_request.initiator[Request.INITIATING_REQUEST])
- request = self.request_track._requests_in_flight.values()[0][0]
- self.assertEquals('redirect', request.initiator['type'])
- self.assertEquals(
- second_redirect_request.request_id,
- request.initiator[Request.INITIATING_REQUEST])
- self.assertEquals(0, self.request_track.inconsistent_initiators_count)
-
- def testInconsistentInitiators(self):
- self.request_track.Handle('Network.requestWillBeSent',
- RequestTrackTestCase._REQUEST_WILL_BE_SENT)
- request = copy.deepcopy(RequestTrackTestCase._REDIRECT)
- request['params']['initiator']['type'] = 'script'
- self.request_track.Handle('Network.requestWillBeSent', request)
- self.assertEquals(1, self.request_track.inconsistent_initiators_count)
-
- def testRejectDuplicates(self):
- msg = RequestTrackTestCase._REQUEST_WILL_BE_SENT
- self.request_track.Handle('Network.requestWillBeSent', msg)
- with self.assertRaises(AssertionError):
- self.request_track.Handle('Network.requestWillBeSent', msg)
-
- def testIgnoreCompletedDuplicates(self):
- self.request_track.Handle('Network.requestWillBeSent',
- RequestTrackTestCase._REQUEST_WILL_BE_SENT)
- self.request_track.Handle('Network.responseReceived',
- RequestTrackTestCase._RESPONSE_RECEIVED)
- self.request_track.Handle('Network.loadingFinished',
- RequestTrackTestCase._LOADING_FINISHED)
- # Should not raise an AssertionError.
- self.request_track.Handle('Network.requestWillBeSent',
- RequestTrackTestCase._REQUEST_WILL_BE_SENT)
-
- def testSequenceOfGeneratedResponse(self):
- self.request_track.Handle('Network.requestServedFromCache',
- RequestTrackTestCase._SERVED_FROM_CACHE)
- self.request_track.Handle('Network.loadingFinished',
- RequestTrackTestCase._LOADING_FINISHED)
- self.assertEquals(0, len(self.request_track.GetEvents()))
-
- def testInvalidSequence(self):
- msg1 = RequestTrackTestCase._REQUEST_WILL_BE_SENT
- msg2 = RequestTrackTestCase._LOADING_FINISHED
- self.request_track.Handle('Network.requestWillBeSent', msg1)
- with self.assertRaises(AssertionError):
- self.request_track.Handle('Network.loadingFinished', msg2)
-
- def testValidSequence(self):
- self._ValidSequence(self.request_track)
- self.assertEquals(1, len(self.request_track.GetEvents()))
- self.assertEquals(0, len(self.request_track._requests_in_flight))
- r = self.request_track.GetEvents()[0]
- self.assertEquals('32493.1', r.request_id)
- self.assertEquals('32493.1', r.frame_id)
- self.assertEquals('32493.3', r.loader_id)
- self.assertEquals('http://example.com/', r.document_url)
- self.assertEquals('http://example.com/', r.url)
- self.assertEquals('http/1.1', r.protocol)
- self.assertEquals('GET', r.method)
- response = RequestTrackTestCase._RESPONSE_RECEIVED['params']['response']
- self.assertEquals(response['requestHeaders'], r.request_headers)
- self.assertEquals(response['headers'], r.response_headers)
- self.assertEquals('VeryHigh', r.initial_priority)
- request_will_be_sent = (
- RequestTrackTestCase._REQUEST_WILL_BE_SENT['params'])
- self.assertEquals(request_will_be_sent['timestamp'], r.timestamp)
- self.assertEquals(request_will_be_sent['wallTime'], r.wall_time)
- self.assertEquals(request_will_be_sent['initiator'], r.initiator)
- self.assertEquals(request_will_be_sent['type'], r.resource_type)
- self.assertEquals(False, r.served_from_cache)
- self.assertEquals(False, r.from_disk_cache)
- self.assertEquals(False, r.from_service_worker)
- timing = Timing.FromDevToolsDict(response['timing'])
- loading_finished = RequestTrackTestCase._LOADING_FINISHED['params']
- loading_finished_offset = r._TimestampOffsetFromStartMs(
- loading_finished['timestamp'])
- timing.loading_finished = loading_finished_offset
- self.assertEquals(timing, r.timing)
- self.assertEquals(200, r.status)
- self.assertEquals(
- loading_finished['encodedDataLength'], r.encoded_data_length)
- self.assertEquals(False, r.failed)
-
- def testDataReceived(self):
- self._ValidSequence(self.request_track)
- self.assertEquals(1, len(self.request_track.GetEvents()))
- r = self.request_track.GetEvents()[0]
- self.assertEquals(2, len(r.data_chunks))
- self.assertEquals(
- RequestTrackTestCase._DATA_RECEIVED_1['params']['encodedDataLength'],
- r.data_chunks[0][1])
- self.assertEquals(
- RequestTrackTestCase._DATA_RECEIVED_2['params']['encodedDataLength'],
- r.data_chunks[1][1])
-
- def testDuplicatedResponseReceived(self):
- msg1 = RequestTrackTestCase._REQUEST_WILL_BE_SENT
- msg2 = copy.deepcopy(RequestTrackTestCase._RESPONSE_RECEIVED)
- msg2_other_timestamp = copy.deepcopy(msg2)
- msg2_other_timestamp['params']['timestamp'] += 12
- msg2_different = copy.deepcopy(msg2)
- msg2_different['params']['response']['encodedDataLength'] += 1
- self.request_track.Handle('Network.requestWillBeSent', msg1)
- self.request_track.Handle('Network.responseReceived', msg2)
- # Should not raise an AssertionError.
- self.request_track.Handle('Network.responseReceived', msg2)
- self.assertEquals(1, self.request_track.duplicates_count)
- with self.assertRaises(AssertionError):
- self.request_track.Handle('Network.responseReceived', msg2_different)
-
- def testLoadingFailed(self):
- self.request_track.Handle('Network.requestWillBeSent',
- RequestTrackTestCase._REQUEST_WILL_BE_SENT)
- self.request_track.Handle('Network.responseReceived',
- RequestTrackTestCase._RESPONSE_RECEIVED)
- self.request_track.Handle('Network.loadingFailed',
- RequestTrackTestCase._LOADING_FAILED)
- r = self.request_track.GetEvents()[0]
- self.assertTrue(r.failed)
- self.assertEquals('net::ERR_TOO_MANY_REDIRECTS', r.error_text)
-
- def testCanSerialize(self):
- self._ValidSequence(self.request_track)
- json_dict = self.request_track.ToJsonDict()
- _ = json.dumps(json_dict) # Should not raise an exception.
-
- def testCanDeserialize(self):
- self._ValidSequence(self.request_track)
- self.request_track.duplicates_count = 142
- self.request_track.inconsistent_initiators_count = 123
- json_dict = self.request_track.ToJsonDict()
- request_track = RequestTrack.FromJsonDict(json_dict)
- self.assertEquals(self.request_track, request_track)
-
- def testMaxAge(self):
- rq = Request()
- self.assertEqual(-1, rq.MaxAge())
- rq.response_headers = {}
- self.assertEqual(-1, rq.MaxAge())
- rq.response_headers[
- 'Cache-Control'] = 'private,s-maxage=0,max-age=0,must-revalidate'
- self.assertEqual(0, rq.MaxAge())
- rq.response_headers[
- 'Cache-Control'] = 'private,s-maxage=0,no-store,max-age=100'
- self.assertEqual(-1, rq.MaxAge())
- rq.response_headers[
- 'Cache-Control'] = 'private,s-maxage=0'
- self.assertEqual(-1, rq.MaxAge())
- # Case-insensitive match.
- rq.response_headers['cache-control'] = 'max-age=600'
- self.assertEqual(600, rq.MaxAge())
-
-
- @classmethod
- def _ValidSequence(cls, request_track):
- request_track.Handle(
- 'Network.requestWillBeSent', cls._REQUEST_WILL_BE_SENT)
- request_track.Handle('Network.responseReceived', cls._RESPONSE_RECEIVED)
- request_track.Handle('Network.dataReceived', cls._DATA_RECEIVED_1)
- request_track.Handle('Network.dataReceived', cls._DATA_RECEIVED_2)
- request_track.Handle('Network.loadingFinished', cls._LOADING_FINISHED)
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/resource_sack.py b/loading/resource_sack.py
deleted file mode 100644
index d7fe331..0000000
--- a/loading/resource_sack.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""A collection of ResourceGraphs.
-
-Processes multiple ResourceGraphs, all presumably from requests to the same
-site. Common urls are collected in Bags and different statistics on the
-relationship between bags are collected.
-"""
-
-import collections
-import json
-import sys
-import urlparse
-
-from collections import defaultdict
-
-import content_classification_lens
-import graph
-import user_satisfied_lens
-
-class GraphSack(object):
- """Aggreate of RequestDependencyGraphs.
-
- Collects RequestDependencyGraph nodes into bags, where each bag contains the
- nodes with common urls. Dependency edges are tracked between bags (so that
- each bag may be considered as a node of a graph). This graph of bags is
- referred to as a sack.
-
- Each bag is associated with a dag.Node, even though the bag graph may not be a
- DAG. The edges are annotated with list of graphs and nodes that generated
- them.
- """
- # See CoreSet().
- CORE_THRESHOLD = 0.8
-
- _GraphInfo = collections.namedtuple('_GraphInfo', (
- 'cost', # The graph cost (aka critical path length).
- ))
-
- def __init__(self):
- # Each bag in our sack is named as indicated by this map.
- self._name_to_bag = {}
- # List our edges by bag pairs: (from_bag, to_bag) -> graph.Edge.
- self._edges = {}
- # Maps graph -> _GraphInfo structures for each graph we've consumed.
- self._graph_info = {}
-
- # How we generate names.
- self._name_generator = lambda n: n.request.url
-
- # Our graph, updated after each ConsumeGraph.
- self._graph = None
-
- def SetNameGenerator(self, generator):
- """Set the generator we use for names.
-
- This will define the equivalence class of requests we use to define sacks.
-
- Args:
- generator: a function taking a RequestDependencyGraph node and returning a
- string.
- """
- self._name_generator = generator
-
- def ConsumeGraph(self, request_graph):
- """Add a graph and process.
-
- Args:
- graph: (RequestDependencyGraph) the graph to add.
- """
- assert graph not in self._graph_info
- cost = request_graph.Cost()
- self._graph_info[request_graph] = self._GraphInfo(cost=cost)
- for n in request_graph.graph.Nodes():
- self.AddNode(request_graph, n)
-
- # TODO(mattcary): this is inefficient but our current API doesn't require an
- # explicit graph creation from the client.
- self._graph = graph.DirectedGraph(self.bags, self._edges.itervalues())
-
- def GetBag(self, node):
- """Find the bag for a node, or None if not found."""
- return self._name_to_bag.get(self._name_generator(node), None)
-
- def AddNode(self, request_graph, node):
- """Add a node to our collection.
-
- Args:
- graph: (RequestDependencyGraph) the graph in which the node lives.
- node: (RequestDependencyGraph node) the node to add.
-
- Returns:
- The Bag containing the node.
- """
- sack_name = self._name_generator(node)
- if sack_name not in self._name_to_bag:
- self._name_to_bag[sack_name] = Bag(self, sack_name)
- bag = self._name_to_bag[sack_name]
- bag.AddNode(request_graph, node)
- return bag
-
- def AddEdge(self, from_bag, to_bag):
- """Add an edge between two bags."""
- if (from_bag, to_bag) not in self._edges:
- self._edges[(from_bag, to_bag)] = graph.Edge(from_bag, to_bag)
-
- def CoreSet(self, *graph_sets):
- """Compute the core set of this sack.
-
- The core set of a sack is the set of resource that are common to most of the
- graphs in the sack. A core set of a set of graphs are the resources that
- appear with frequency at least CORE_THRESHOLD. For a collection of graph
- sets, for instance pulling the same page under different network
- connections, we intersect the core sets to produce a page core set that
- describes the key resources used by the page. See https://goo.gl/LmqQRS for
- context and discussion.
-
- Args:
- graph_sets: one or more collection of graphs to compute core sets. If one
- graph set is given, its core set is computed. If more than one set is
- given, the page core set of all sets is computed (the intersection of
- core sets). If no graph set is given, the core of all graphs is
- computed.
-
- Returns:
- A set of bags in the core set.
- """
- if not graph_sets:
- graph_sets = [self._graph_info.keys()]
- return reduce(lambda a, b: a & b,
- (self._SingleCore(s) for s in graph_sets))
-
- @classmethod
- def CoreSimilarity(cls, a, b):
- """Compute the similarity of two core sets.
-
- We use the Jaccard index. See https://goo.gl/LmqQRS for discussion.
-
- Args:
- a: The first core set, as a set of strings.
- b: The second core set, as a set of strings.
-
- Returns:
- A similarity score between zero and one. If both sets are empty the
- similarity is zero.
- """
- if not a and not b:
- return 0
- return float(len(a & b)) / len(a | b)
-
- @property
- def num_graphs(self):
- return len(self.graph_info)
-
- @property
- def graph_info(self):
- return self._graph_info
-
- @property
- def bags(self):
- return self._name_to_bag.values()
-
- def _SingleCore(self, graph_set):
- core = set()
- graph_set = set(graph_set)
- num_graphs = len(graph_set)
- for b in self.bags:
- count = sum([g in graph_set for g in b.graphs])
- if float(count) / num_graphs > self.CORE_THRESHOLD:
- core.add(b)
- return core
-
- @classmethod
- def _MakeShortname(cls, url):
- # TODO(lizeb): Move this method to a convenient common location.
- parsed = urlparse.urlparse(url)
- if parsed.scheme == 'data':
- if ';' in parsed.path:
- kind, _ = parsed.path.split(';', 1)
- else:
- kind, _ = parsed.path.split(',', 1)
- return 'data:' + kind
- path = parsed.path[:10]
- hostname = parsed.hostname if parsed.hostname else '?.?.?'
- return hostname + '/' + path
-
-
-class Bag(graph.Node):
- def __init__(self, sack, name):
- super(Bag, self).__init__()
- self._sack = sack
- self._name = name
- self._label = GraphSack._MakeShortname(name)
- # Maps a ResourceGraph to its Nodes contained in this Bag.
- self._graphs = defaultdict(set)
-
- @property
- def name(self):
- return self._name
-
- @property
- def label(self):
- return self._label
-
- @property
- def graphs(self):
- return self._graphs.iterkeys()
-
- @property
- def num_nodes(self):
- return sum(len(g) for g in self._graphs.itervalues())
-
- def GraphNodes(self, g):
- return self._graphs.get(g, set())
-
- def AddNode(self, request_graph, node):
- if node in self._graphs[request_graph]:
- return # Already added.
- self._graphs[request_graph].add(node)
- for edge in request_graph.graph.OutEdges(node):
- out_bag = self._sack.AddNode(request_graph, edge.to_node)
- self._sack.AddEdge(self, out_bag)
diff --git a/loading/resource_sack_unittest.py b/loading/resource_sack_unittest.py
deleted file mode 100644
index 30c1d0e..0000000
--- a/loading/resource_sack_unittest.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import unittest
-
-import resource_sack
-from test_utils import (MakeRequest,
- TestDependencyGraph)
-
-
-class ResourceSackTestCase(unittest.TestCase):
- def SimpleGraph(self, node_names):
- """Create a simple graph from a list of nodes."""
- requests = [MakeRequest(node_names[0], 'null')]
- for n in node_names[1:]:
- requests.append(MakeRequest(n, node_names[0]))
- return TestDependencyGraph(requests)
-
- def test_NodeMerge(self):
- g1 = TestDependencyGraph([
- MakeRequest(0, 'null'),
- MakeRequest(1, 0),
- MakeRequest(2, 0),
- MakeRequest(3, 1)])
- g2 = TestDependencyGraph([
- MakeRequest(0, 'null'),
- MakeRequest(1, 0),
- MakeRequest(2, 0),
- MakeRequest(4, 2)])
- sack = resource_sack.GraphSack()
- sack.ConsumeGraph(g1)
- sack.ConsumeGraph(g2)
- self.assertEqual(5, len(sack.bags))
- for bag in sack.bags:
- if bag.label not in ('3/', '4/'):
- self.assertEqual(2, bag.num_nodes)
- else:
- self.assertEqual(1, bag.num_nodes)
-
- def test_MultiParents(self):
- g1 = TestDependencyGraph([
- MakeRequest(0, 'null'),
- MakeRequest(2, 0)])
- g2 = TestDependencyGraph([
- MakeRequest(1, 'null'),
- MakeRequest(2, 1)])
- sack = resource_sack.GraphSack()
- sack.ConsumeGraph(g1)
- sack.ConsumeGraph(g2)
- self.assertEqual(3, len(sack.bags))
- labels = {bag.label: bag for bag in sack.bags}
- def Predecessors(label):
- bag = labels['%s/' % label]
- return [e.from_node
- for e in bag._sack._graph.InEdges(bag)]
- self.assertEqual(
- set(['0/', '1/']),
- set([bag.label for bag in Predecessors(2)]))
- self.assertFalse(Predecessors(0))
- self.assertFalse(Predecessors(1))
-
- def test_Shortname(self):
- root = MakeRequest(0, 'null')
- shortname = MakeRequest(1, 0)
- shortname.url = 'data:fake/content;' + 'lotsand' * 50 + 'lotsofdata'
- g1 = TestDependencyGraph([root, shortname])
- sack = resource_sack.GraphSack()
- sack.ConsumeGraph(g1)
- self.assertEqual(set(['0/', 'data:fake/content']),
- set([bag.label for bag in sack.bags]))
-
- def test_Core(self):
- # We will use a core threshold of 0.5 to make it easier to define
- # graphs. Resources 0 and 1 are core and others are not. We check full names
- # and node counts as we output that for core set analysis. In subsequent
- # tests we just check labels to make the tests easier to read.
- graphs = [self.SimpleGraph([0, 1, 2]),
- self.SimpleGraph([0, 1, 3]),
- self.SimpleGraph([0, 1, 4]),
- self.SimpleGraph([0, 5])]
- sack = resource_sack.GraphSack()
- sack.CORE_THRESHOLD = 0.5
- for g in graphs:
- sack.ConsumeGraph(g)
- self.assertEqual(set([('http://0', 4), ('http://1', 3)]),
- set((b.name, b.num_nodes) for b in sack.CoreSet()))
-
- def test_IntersectingCore(self):
- # Graph set A has core set {0, 1} and B {0, 2} so the final core set should
- # be {0}. Set C makes sure we restrict core computation to tags A and B.
- set_A = [self.SimpleGraph([0, 1, 2]),
- self.SimpleGraph([0, 1, 3])]
- set_B = [self.SimpleGraph([0, 2, 3]),
- self.SimpleGraph([0, 2, 1])]
- set_C = [self.SimpleGraph([2 * i + 4, 2 * i + 5]) for i in xrange(5)]
- sack = resource_sack.GraphSack()
- sack.CORE_THRESHOLD = 0.5
- for g in set_A + set_B + set_C:
- sack.ConsumeGraph(g)
- self.assertEqual(set(), sack.CoreSet())
- self.assertEqual(set(['0/', '1/']),
- set(b.label for b in sack.CoreSet(set_A)))
- self.assertEqual(set(['0/', '2/']),
- set(b.label for b in sack.CoreSet(set_B)))
- self.assertEqual(set(), sack.CoreSet(set_C))
- self.assertEqual(set(['0/']),
- set(b.label for b in sack.CoreSet(set_A, set_B)))
- self.assertEqual(set(), sack.CoreSet(set_A, set_B, set_C))
-
- def test_Simililarity(self):
- self.assertAlmostEqual(
- 0.5,
- resource_sack.GraphSack.CoreSimilarity(
- set([1, 2, 3]), set([1, 3, 4])))
- self.assertEqual(
- 0, resource_sack.GraphSack.CoreSimilarity(set(), set()))
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/run_tests b/loading/run_tests
deleted file mode 100755
index 1f04f05..0000000
--- a/loading/run_tests
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import logging
-import os
-import sys
-import unittest
-
-
-if __name__ == '__main__':
- logging.basicConfig(
- level=logging.DEBUG if '-v' in sys.argv else logging.WARNING,
- format='%(levelname)5s %(filename)15s(%(lineno)3d): %(message)s')
-
- suite = unittest.TestSuite()
- loader = unittest.TestLoader()
- root_dir = os.path.dirname(os.path.realpath(__file__))
- if len(sys.argv) < 2:
- cases = loader.discover(start_dir=root_dir, pattern='*_unittest.py')
- else:
- cases = []
- for module in sys.argv[1:]:
- pattern = '{}_unittest.py'.format(module)
- cases.extend(loader.discover(start_dir=root_dir, pattern=pattern))
- suite.addTests(cases)
- res = unittest.TextTestRunner(verbosity=2).run(suite)
- if res.wasSuccessful():
- sys.exit(0)
- else:
- sys.exit(1)
diff --git a/loading/sandwich.py b/loading/sandwich.py
deleted file mode 100755
index ffd7b00..0000000
--- a/loading/sandwich.py
+++ /dev/null
@@ -1,300 +0,0 @@
-#! /usr/bin/env python
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Instructs Chrome to load series of web pages and reports results.
-
-When running Chrome is sandwiched between preprocessed disk caches and
-WepPageReplay serving all connections.
-"""
-
-import argparse
-import csv
-import json
-import logging
-import os
-import re
-import sys
-from urlparse import urlparse
-import yaml
-
-_SRC_DIR = os.path.abspath(os.path.join(
- os.path.dirname(__file__), '..', '..', '..'))
-
-sys.path.append(os.path.join(_SRC_DIR, 'third_party', 'catapult', 'devil'))
-from devil.android import device_utils
-
-sys.path.append(os.path.join(_SRC_DIR, 'build', 'android'))
-from pylib import constants
-import devil_chromium
-
-import csv_util
-import device_setup
-import options
-import sandwich_prefetch
-import sandwich_swr
-import sandwich_utils
-import task_manager
-
-
-# Use options layer to access constants.
-OPTIONS = options.OPTIONS
-
-_SPEED_INDEX_MEASUREMENT = 'speed-index'
-_MEMORY_MEASUREMENT = 'memory'
-_TTFMP_MEASUREMENT = 'ttfmp'
-_CORPUS_DIR = 'sandwich_corpuses'
-_SANDWICH_SETUP_FILENAME = 'sandwich_setup.yaml'
-
-_MAIN_TRANSFORMER_LIST_NAME = 'no-network-emulation'
-
-
-def ReadUrlsFromCorpus(corpus_path):
- """Retrieves the list of URLs associated with the corpus name."""
- try:
- # Attempt to read by regular file name.
- json_file_name = corpus_path
- with open(json_file_name) as f:
- json_data = json.load(f)
- except IOError:
- # Extra sugar: attempt to load from _CORPUS_DIR.
- json_file_name = os.path.join(
- os.path.dirname(__file__), _CORPUS_DIR, corpus_path)
- with open(json_file_name) as f:
- json_data = json.load(f)
-
- key = 'urls'
- if json_data and key in json_data:
- url_list = json_data[key]
- if isinstance(url_list, list) and len(url_list) > 0:
- return [str(u) for u in url_list]
- raise Exception(
- 'File {} does not define a list named "urls"'.format(json_file_name))
-
-
-def _GenerateUrlDirectoryMap(urls):
- domain_times_encountered_per_domain = {}
- url_directories = {}
- for url in urls:
- domain = '.'.join(urlparse(url).netloc.split('.')[-2:])
- domain_times_encountered = domain_times_encountered_per_domain.get(
- domain, 0)
- output_subdirectory = '{}.{}'.format(domain, domain_times_encountered)
- domain_times_encountered_per_domain[domain] = domain_times_encountered + 1
- url_directories[output_subdirectory] = url
- return url_directories
-
-
-def _ArgumentParser():
- """Build a command line argument's parser."""
- # Command line parser when dealing with _SetupBenchmarkMain.
- sandwich_setup_parser = argparse.ArgumentParser(add_help=False)
- sandwich_setup_parser.add_argument('--android', default=None, type=str,
- dest='android_device_serial', help='Android device\'s serial to use.')
- sandwich_setup_parser.add_argument('-c', '--corpus', required=True,
- help='Path to a JSON file with a corpus such as in %s/.' % _CORPUS_DIR)
- sandwich_setup_parser.add_argument('-m', '--measure', default=[], nargs='+',
- choices=[_SPEED_INDEX_MEASUREMENT,
- _MEMORY_MEASUREMENT,
- _TTFMP_MEASUREMENT],
- dest='optional_measures', help='Enable optional measurements.')
- sandwich_setup_parser.add_argument('-o', '--output', type=str, required=True,
- help='Path of the output directory to setup.')
- sandwich_setup_parser.add_argument('-r', '--url-repeat', default=1, type=int,
- help='How many times to repeat the urls.')
-
- # Plumbing parser to configure OPTIONS.
- plumbing_parser = OPTIONS.GetParentParser('plumbing options')
-
- # Main parser
- parser = argparse.ArgumentParser(parents=[plumbing_parser],
- fromfile_prefix_chars=task_manager.FROMFILE_PREFIX_CHARS)
- subparsers = parser.add_subparsers(dest='subcommand', help='subcommand line')
-
- # Setup NoState-Prefetch benchmarks subcommand.
- subparsers.add_parser('setup-prefetch', parents=[sandwich_setup_parser],
- help='Setup all NoState-Prefetch benchmarks.')
-
- # Setup Stale-While-Revalidate benchmarks subcommand.
- swr_setup_parser = subparsers.add_parser('setup-swr',
- parents=[sandwich_setup_parser],
- help='Setup all Stale-While-Revalidate benchmarks.')
- swr_setup_parser.add_argument('-d', '--domains-csv',
- type=argparse.FileType('r'), required=True,
- help='Path of the CSV containing the pattern of domains in a '
- '`domain-patterns` column and a `usage` column in percent in how '
- 'likely they are in a page load.')
-
- # Run benchmarks subcommand (used in _RunBenchmarkMain).
- subparsers.add_parser('run', parents=[task_manager.CommandLineParser()],
- help='Run benchmarks steps using the task manager infrastructure.')
-
- # Collect subcommand.
- collect_csv_parser = subparsers.add_parser('collect-csv',
- help='Collects all CSVs from Sandwich output directory into a single '
- 'CSV.')
- collect_csv_parser.add_argument('output_dir', type=str,
- help='Path to the run output directory.')
- collect_csv_parser.add_argument('output_csv', type=argparse.FileType('w'),
- help='Path to the output CSV.')
-
- return parser
-
-
-def _SetupNoStatePrefetchBenchmark(args):
- del args # unused.
- return {
- 'network_conditions': ['Regular4G', 'Regular3G', 'Regular2G'],
- 'subresource_discoverers': [
- e for e in sandwich_prefetch.SUBRESOURCE_DISCOVERERS
- if e != sandwich_prefetch.Discoverer.FullCache]
- }
-
-
-def _GenerateNoStatePrefetchBenchmarkTasks(
- common_builder, main_transformer, benchmark_setup):
- builder = sandwich_prefetch.PrefetchBenchmarkBuilder(common_builder)
- builder.PopulateLoadBenchmark(sandwich_prefetch.Discoverer.EmptyCache,
- _MAIN_TRANSFORMER_LIST_NAME,
- transformer_list=[main_transformer])
- builder.PopulateLoadBenchmark(sandwich_prefetch.Discoverer.FullCache,
- _MAIN_TRANSFORMER_LIST_NAME,
- transformer_list=[main_transformer])
- for network_condition in benchmark_setup['network_conditions']:
- transformer_list_name = network_condition.lower()
- network_transformer = \
- sandwich_utils.NetworkSimulationTransformer(network_condition)
- transformer_list = [main_transformer, network_transformer]
- for subresource_discoverer in benchmark_setup['subresource_discoverers']:
- builder.PopulateLoadBenchmark(
- subresource_discoverer, transformer_list_name, transformer_list)
-
-
-def _SetupStaleWhileRevalidateBenchmark(args):
- domain_regexes = []
- for row in csv.DictReader(args.domains_csv):
- domain_patterns = json.loads('[{}]'.format(row['domain-patterns']))
- for domain_pattern in domain_patterns:
- domain_pattern_escaped = r'(\.|^){}$'.format(re.escape(domain_pattern))
- domain_regexes.append({
- 'usage': float(row['usage']),
- 'domain_regex': domain_pattern_escaped.replace(r'\?', r'\w*')})
- return {
- 'domain_regexes': domain_regexes,
- 'network_conditions': ['Regular3G', 'Regular2G'],
- 'usage_thresholds': [1, 3, 5, 10]
- }
-
-
-def _GenerateStaleWhileRevalidateBenchmarkTasks(
- common_builder, main_transformer, benchmark_setup):
- # Compile domain regexes.
- domain_regexes = []
- for e in benchmark_setup['domain_regexes']:
- domain_regexes.append({
- 'usage': e['usage'],
- 'domain_regex': re.compile(e['domain_regex'])})
-
- # Build tasks.
- builder = sandwich_swr.StaleWhileRevalidateBenchmarkBuilder(common_builder)
- for network_condition in benchmark_setup['network_conditions']:
- transformer_list_name = network_condition.lower()
- network_transformer = \
- sandwich_utils.NetworkSimulationTransformer(network_condition)
- transformer_list = [main_transformer, network_transformer]
- builder.PopulateBenchmark(
- 'no-swr', [], transformer_list_name, transformer_list)
- for usage_threshold in benchmark_setup['usage_thresholds']:
- benchmark_name = 'threshold{}'.format(usage_threshold)
- selected_domain_regexes = [e['domain_regex'] for e in domain_regexes
- if e['usage'] > usage_threshold]
- builder.PopulateBenchmark(
- benchmark_name, selected_domain_regexes,
- transformer_list_name, transformer_list)
-
-
-_TASK_GENERATORS = {
- 'prefetch': _GenerateNoStatePrefetchBenchmarkTasks,
- 'swr': _GenerateStaleWhileRevalidateBenchmarkTasks
-}
-
-
-def _SetupBenchmarkMain(args, benchmark_type, benchmark_specific_handler):
- assert benchmark_type in _TASK_GENERATORS
- urls = ReadUrlsFromCorpus(args.corpus)
- setup = {
- 'benchmark_type': benchmark_type,
- 'benchmark_setup': benchmark_specific_handler(args),
- 'sandwich_runner': {
- 'record_video': _SPEED_INDEX_MEASUREMENT in args.optional_measures,
- 'record_memory_dumps': _MEMORY_MEASUREMENT in args.optional_measures,
- 'record_first_meaningful_paint': (
- _TTFMP_MEASUREMENT in args.optional_measures),
- 'repeat': args.url_repeat,
- 'android_device_serial': args.android_device_serial
- },
- 'urls': _GenerateUrlDirectoryMap(urls)
- }
- if not os.path.isdir(args.output):
- os.makedirs(args.output)
- setup_path = os.path.join(args.output, _SANDWICH_SETUP_FILENAME)
- with open(setup_path, 'w') as file_output:
- yaml.dump(setup, file_output, default_flow_style=False)
-
-
-def _RunBenchmarkMain(args):
- setup_path = os.path.join(args.output, _SANDWICH_SETUP_FILENAME)
- with open(setup_path) as file_input:
- setup = yaml.load(file_input)
- android_device = None
- if setup['sandwich_runner']['android_device_serial']:
- android_device = device_setup.GetDeviceFromSerial(
- setup['sandwich_runner']['android_device_serial'])
- task_generator = _TASK_GENERATORS[setup['benchmark_type']]
-
- def MainTransformer(runner):
- runner.record_video = setup['sandwich_runner']['record_video']
- runner.record_memory_dumps = setup['sandwich_runner']['record_memory_dumps']
- runner.record_first_meaningful_paint = (
- setup['sandwich_runner']['record_first_meaningful_paint'])
- runner.repeat = setup['sandwich_runner']['repeat']
-
- default_final_tasks = []
- for output_subdirectory, url in setup['urls'].iteritems():
- common_builder = sandwich_utils.SandwichCommonBuilder(
- android_device=android_device,
- url=url,
- output_directory=args.output,
- output_subdirectory=output_subdirectory)
- common_builder.PopulateWprRecordingTask()
- task_generator(common_builder, MainTransformer, setup['benchmark_setup'])
- default_final_tasks.extend(common_builder.default_final_tasks)
- return task_manager.ExecuteWithCommandLine(args, default_final_tasks)
-
-
-def main(command_line_args):
- logging.basicConfig(level=logging.INFO)
- devil_chromium.Initialize()
-
- args = _ArgumentParser().parse_args(command_line_args)
- OPTIONS.SetParsedArgs(args)
-
- if args.subcommand == 'setup-prefetch':
- return _SetupBenchmarkMain(
- args, 'prefetch', _SetupNoStatePrefetchBenchmark)
- if args.subcommand == 'setup-swr':
- return _SetupBenchmarkMain(
- args, 'swr', _SetupStaleWhileRevalidateBenchmark)
- if args.subcommand == 'run':
- return _RunBenchmarkMain(args)
- if args.subcommand == 'collect-csv':
- with args.output_csv as output_file:
- if not csv_util.CollectCSVsFromDirectory(args.output_dir, output_file):
- return 1
- return 0
- assert False
-
-
-if __name__ == '__main__':
- sys.exit(main(sys.argv[1:]))
diff --git a/loading/sandwich_corpuses/mobile.json b/loading/sandwich_corpuses/mobile.json
deleted file mode 100644
index d6ad19c..0000000
--- a/loading/sandwich_corpuses/mobile.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
- "urls": [
- "http://www.bbc.com/",
- "http://cnn.com",
- "https://en.m.wikipedia.org/wiki/Main_Page",
- "https://en.m.wikipedia.org/wiki/Science",
- "https://en.m.wikipedia.org/wiki/Wassily_Kandinsky"
- ]
-}
diff --git a/loading/sandwich_metrics.py b/loading/sandwich_metrics.py
deleted file mode 100644
index bceb690..0000000
--- a/loading/sandwich_metrics.py
+++ /dev/null
@@ -1,345 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Pull a sandwich run's output directory's metrics from traces into a CSV.
-
-python pull_sandwich_metrics.py -h
-"""
-
-import collections
-import json
-import logging
-import os
-import shutil
-import subprocess
-import sys
-import tempfile
-
-_SRC_DIR = os.path.abspath(os.path.join(
- os.path.dirname(__file__), '..', '..', '..'))
-
-sys.path.append(os.path.join(_SRC_DIR, 'tools', 'perf'))
-from core import path_util
-sys.path.append(path_util.GetTelemetryDir())
-
-from telemetry.internal.image_processing import video
-from telemetry.util import image_util
-from telemetry.util import rgba_color
-
-import common_util
-import loading_trace as loading_trace_module
-import sandwich_runner
-import tracing_track
-
-
-COMMON_CSV_COLUMN_NAMES = [
- 'chromium_commit',
- 'platform',
- 'first_layout',
- 'first_contentful_paint',
- 'first_meaningful_paint',
- 'total_load',
- 'js_onload_event',
- 'browser_malloc_avg',
- 'browser_malloc_max',
- 'speed_index',
- 'net_emul.name', # Should be in emulation.NETWORK_CONDITIONS.keys()
- 'net_emul.download',
- 'net_emul.upload',
- 'net_emul.latency']
-
-_UNAVAILABLE_CSV_VALUE = 'unavailable'
-
-_FAILED_CSV_VALUE = 'failed'
-
-_TRACKED_EVENT_NAMES = set([
- 'requestStart',
- 'loadEventStart',
- 'loadEventEnd',
- 'firstContentfulPaint',
- 'firstLayout'])
-
-# Points of a completeness record.
-#
-# Members:
-# |time| is in milliseconds,
-# |frame_completeness| value representing how complete the frame is at a given
-# |time|. Caution: this completeness might be negative.
-CompletenessPoint = collections.namedtuple('CompletenessPoint',
- ('time', 'frame_completeness'))
-
-
-def _GetBrowserPID(track):
- """Get the browser PID from a trace.
-
- Args:
- track: The tracing_track.TracingTrack.
-
- Returns:
- The browser's PID as an integer.
- """
- for event in track.GetEvents():
- if event.category != '__metadata' or event.name != 'process_name':
- continue
- if event.args['name'] == 'Browser':
- return event.pid
- raise ValueError('couldn\'t find browser\'s PID')
-
-
-def _GetBrowserDumpEvents(track):
- """Get the browser memory dump events from a tracing track.
-
- Args:
- track: The tracing_track.TracingTrack.
-
- Returns:
- List of memory dump events.
- """
- assert sandwich_runner.MEMORY_DUMP_CATEGORY in track.Categories()
- browser_pid = _GetBrowserPID(track)
- browser_dumps_events = []
- for event in track.GetEvents():
- if event.category != 'disabled-by-default-memory-infra':
- continue
- if event.type != 'v' or event.name != 'periodic_interval':
- continue
- # Ignore dump events for processes other than the browser process
- if event.pid != browser_pid:
- continue
- browser_dumps_events.append(event)
- if len(browser_dumps_events) == 0:
- raise ValueError('No browser dump events found.')
- return browser_dumps_events
-
-
-def _GetWebPageTrackedEvents(track):
- """Get the web page's tracked events from a tracing track.
-
- Args:
- track: The tracing_track.TracingTrack.
-
- Returns:
- A dict mapping event.name -> tracing_track.Event for each first occurrence
- of a tracked event.
- """
- main_frame_id = None
- tracked_events = {}
- sorted_events = sorted(track.GetEvents(),
- key=lambda event: event.start_msec)
- for event in sorted_events:
- if event.category != 'blink.user_timing':
- continue
- event_name = event.name
-
- # Find the id of the main frame. Skip all events until it is found.
- if not main_frame_id:
- # Tracing (in Sandwich) is started after about:blank is fully loaded,
- # hence the first navigationStart in the trace registers the correct frame
- # id.
- if event_name == 'navigationStart':
- logging.info(' Found navigationStart at: %f', event.start_msec)
- main_frame_id = event.args['frame']
- continue
-
- # Ignore events with frame id attached, but not being the main frame.
- if 'frame' in event.args and event.args['frame'] != main_frame_id:
- continue
-
- # Capture trace events by the first time of their appearance. Note: some
- # important events (like requestStart) do not have a frame id attached.
- if event_name in _TRACKED_EVENT_NAMES and event_name not in tracked_events:
- tracked_events[event_name] = event
- logging.info(' Event %s first appears at: %f', event_name,
- event.start_msec)
- return tracked_events
-
-
-def _ExtractDefaultMetrics(loading_trace):
- """Extracts all the default metrics from a given trace.
-
- Args:
- loading_trace: loading_trace.LoadingTrace.
-
- Returns:
- Dictionary with all trace extracted fields set.
- """
- END_REQUEST_EVENTS = [
- ('first_layout', 'requestStart', 'firstLayout'),
- ('first_contentful_paint', 'requestStart', 'firstContentfulPaint'),
- ('total_load', 'requestStart', 'loadEventEnd'),
- ('js_onload_event', 'loadEventStart', 'loadEventEnd')]
- web_page_tracked_events = _GetWebPageTrackedEvents(
- loading_trace.tracing_track)
- metrics = {}
- for metric_name, start_event_name, end_event_name in END_REQUEST_EVENTS:
- try:
- metrics[metric_name] = (
- web_page_tracked_events[end_event_name].start_msec -
- web_page_tracked_events[start_event_name].start_msec)
- except KeyError as error:
- logging.error('could not extract metric %s: missing trace event: %s' % (
- metric_name, str(error)))
- metrics[metric_name] = _FAILED_CSV_VALUE
- return metrics
-
-
-def _ExtractTimeToFirstMeaningfulPaint(loading_trace):
- """Extracts the time to first meaningful paint from a given trace.
-
- Args:
- loading_trace: loading_trace_module.LoadingTrace.
-
- Returns:
- Time to first meaningful paint in milliseconds.
- """
- required_categories = set(sandwich_runner.TTFMP_ADDITIONAL_CATEGORIES)
- if not required_categories.issubset(loading_trace.tracing_track.Categories()):
- return _UNAVAILABLE_CSV_VALUE
- logging.info(' Extracting first_meaningful_paint')
- events = [e.ToJsonDict() for e in loading_trace.tracing_track.GetEvents()]
- with common_util.TemporaryDirectory(prefix='sandwich_tmp_') as tmp_dir:
- chrome_trace_path = os.path.join(tmp_dir, 'chrome_trace.json')
- with open(chrome_trace_path, 'w') as output_file:
- json.dump({'traceEvents': events, 'metadata': {}}, output_file)
- catapult_run_metric_bin_path = os.path.join(
- _SRC_DIR, 'third_party', 'catapult', 'tracing', 'bin', 'run_metric')
- output = subprocess.check_output(
- [catapult_run_metric_bin_path, 'firstPaintMetric', chrome_trace_path])
- json_output = json.loads(output)
- for metric in json_output[chrome_trace_path]['pairs']['values']:
- if metric['name'] == 'firstMeaningfulPaint_avg':
- return metric['numeric']['value']
- logging.info(' Extracting first_meaningful_paint: failed')
- return _FAILED_CSV_VALUE
-
-
-def _ExtractMemoryMetrics(loading_trace):
- """Extracts all the memory metrics from a given trace.
-
- Args:
- loading_trace: loading_trace_module.LoadingTrace.
-
- Returns:
- Dictionary with all trace extracted fields set.
- """
- if (sandwich_runner.MEMORY_DUMP_CATEGORY not in
- loading_trace.tracing_track.Categories()):
- return {
- 'browser_malloc_avg': _UNAVAILABLE_CSV_VALUE,
- 'browser_malloc_max': _UNAVAILABLE_CSV_VALUE
- }
- browser_dump_events = _GetBrowserDumpEvents(loading_trace.tracing_track)
- browser_malloc_sum = 0
- browser_malloc_max = 0
- for dump_event in browser_dump_events:
- attr = dump_event.args['dumps']['allocators']['malloc']['attrs']['size']
- assert attr['units'] == 'bytes'
- size = int(attr['value'], 16)
- browser_malloc_sum += size
- browser_malloc_max = max(browser_malloc_max, size)
- return {
- 'browser_malloc_avg': browser_malloc_sum / float(len(browser_dump_events)),
- 'browser_malloc_max': browser_malloc_max
- }
-
-
-def _ExtractCompletenessRecordFromVideo(video_path):
- """Extracts the completeness record from a video.
-
- The video must start with a filled rectangle of orange (RGB: 222, 100, 13), to
- give the view-port size/location from where to compute the completeness.
-
- Args:
- video_path: Path of the video to extract the completeness list from.
-
- Returns:
- list(CompletenessPoint)
- """
- video_file = tempfile.NamedTemporaryFile()
- shutil.copy(video_path, video_file.name)
- video_capture = video.Video(video_file)
-
- histograms = [
- (time, image_util.GetColorHistogram(
- image, ignore_color=rgba_color.WHITE, tolerance=8))
- for time, image in video_capture.GetVideoFrameIter()
- ]
-
- start_histogram = histograms[1][1]
- final_histogram = histograms[-1][1]
- total_distance = start_histogram.Distance(final_histogram)
-
- def FrameProgress(histogram):
- if total_distance == 0:
- if histogram.Distance(final_histogram) == 0:
- return 1.0
- else:
- return 0.0
- return 1 - histogram.Distance(final_histogram) / total_distance
-
- return [(time, FrameProgress(hist)) for time, hist in histograms]
-
-
-def _ComputeSpeedIndex(completeness_record):
- """Computes the speed-index from a completeness record.
-
- Args:
- completeness_record: list(CompletenessPoint)
-
- Returns:
- Speed-index value.
- """
- speed_index = 0.0
- last_time = completeness_record[0][0]
- last_completness = completeness_record[0][1]
- for time, completeness in completeness_record:
- if time < last_time:
- raise ValueError('Completeness record must be sorted by timestamps.')
- elapsed = time - last_time
- speed_index += elapsed * (1.0 - last_completness)
- last_time = time
- last_completness = completeness
- return speed_index
-
-
-def ExtractCommonMetricsFromRepeatDirectory(repeat_dir, trace):
- """Extracts all the metrics from traces and video of a sandwich run repeat
- directory.
-
- Args:
- repeat_dir: Path of the repeat directory within a run directory.
- trace: preloaded LoadingTrace in |repeat_dir|
-
- Contract:
- trace == LoadingTrace.FromJsonFile(
- os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME))
-
- Returns:
- Dictionary of extracted metrics.
- """
- run_metrics = {
- 'chromium_commit': trace.metadata['chromium_commit'],
- 'platform': (trace.metadata['platform']['os'] + '-' +
- trace.metadata['platform']['product_model'])
- }
- run_metrics.update(_ExtractDefaultMetrics(trace))
- run_metrics.update(_ExtractMemoryMetrics(trace))
- run_metrics['first_meaningful_paint'] = _ExtractTimeToFirstMeaningfulPaint(
- trace)
- video_path = os.path.join(repeat_dir, sandwich_runner.VIDEO_FILENAME)
- if os.path.isfile(video_path):
- logging.info('processing speed-index video \'%s\'' % video_path)
- try:
- completeness_record = _ExtractCompletenessRecordFromVideo(video_path)
- run_metrics['speed_index'] = _ComputeSpeedIndex(completeness_record)
- except video.BoundingBoxNotFoundException:
- # Sometimes the bounding box for the web content area is not present. Skip
- # calculating Speed Index.
- run_metrics['speed_index'] = _FAILED_CSV_VALUE
- else:
- run_metrics['speed_index'] = _UNAVAILABLE_CSV_VALUE
- for key, value in trace.metadata['network_emulation'].iteritems():
- run_metrics['net_emul.' + key] = value
- assert set(run_metrics.keys()) == set(COMMON_CSV_COLUMN_NAMES)
- return run_metrics
diff --git a/loading/sandwich_metrics_unittest.py b/loading/sandwich_metrics_unittest.py
deleted file mode 100644
index 19520b5..0000000
--- a/loading/sandwich_metrics_unittest.py
+++ /dev/null
@@ -1,258 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import copy
-import json
-import os
-import shutil
-import subprocess
-import tempfile
-import unittest
-
-import loading_trace
-import page_track
-import sandwich_metrics as puller
-import sandwich_runner
-import request_track
-import tracing_track
-
-
-_BLINK_CAT = 'blink.user_timing'
-_MEM_CAT = sandwich_runner.MEMORY_DUMP_CATEGORY
-_START = 'requestStart'
-_LOADS = 'loadEventStart'
-_LOADE = 'loadEventEnd'
-_NAVIGATION_START = 'navigationStart'
-_PAINT = 'firstContentfulPaint'
-_LAYOUT = 'firstLayout'
-
-_MINIMALIST_TRACE_EVENTS = [
- {'ph': 'R', 'cat': _BLINK_CAT, 'name': _NAVIGATION_START, 'ts': 10000,
- 'args': {'frame': '0'}},
- {'ph': 'R', 'cat': _BLINK_CAT, 'name': _START, 'ts': 20000,
- 'args': {}},
- {'cat': _MEM_CAT, 'name': 'periodic_interval', 'pid': 1, 'ph': 'v',
- 'ts': 1, 'args': {'dumps': {'allocators': {'malloc': {'attrs': {'size':{
- 'units': 'bytes', 'value': '1af2', }}}}}}},
- {'ph': 'R', 'cat': _BLINK_CAT, 'name': _LAYOUT, 'ts': 24000,
- 'args': {'frame': '0'}},
- {'ph': 'R', 'cat': _BLINK_CAT, 'name': _PAINT, 'ts': 31000,
- 'args': {'frame': '0'}},
- {'ph': 'R', 'cat': _BLINK_CAT, 'name': _LOADS, 'ts': 35000,
- 'args': {'frame': '0'}},
- {'ph': 'R', 'cat': _BLINK_CAT, 'name': _LOADE, 'ts': 40000,
- 'args': {'frame': '0'}},
- {'cat': _MEM_CAT, 'name': 'periodic_interval', 'pid': 1, 'ph': 'v',
- 'ts': 1, 'args': {'dumps': {'allocators': {'malloc': {'attrs': {'size':{
- 'units': 'bytes', 'value': 'd704', }}}}}}},
- {'ph': 'M', 'cat': '__metadata', 'pid': 1, 'name': 'process_name', 'ts': 1,
- 'args': {'name': 'Browser'}}]
-
-
-def TracingTrack(events):
- return tracing_track.TracingTrack.FromJsonDict({
- 'events': events,
- 'categories': (sandwich_runner._TRACING_CATEGORIES +
- [sandwich_runner.MEMORY_DUMP_CATEGORY])})
-
-
-def LoadingTrace(events):
- return loading_trace.LoadingTrace('http://a.com/', {},
- page_track.PageTrack(None),
- request_track.RequestTrack(None),
- TracingTrack(events))
-
-
-class PageTrackTest(unittest.TestCase):
- def testGetBrowserPID(self):
- def RunHelper(expected, events):
- self.assertEquals(expected, puller._GetBrowserPID(TracingTrack(events)))
-
- RunHelper(123, [
- {'ph': 'M', 'ts': 0, 'pid': 354, 'cat': 'whatever0'},
- {'ph': 'M', 'ts': 0, 'pid': 354, 'cat': 'whatever1'},
- {'ph': 'M', 'ts': 0, 'pid': 354, 'cat': '__metadata',
- 'name': 'thread_name'},
- {'ph': 'M', 'ts': 0, 'pid': 354, 'cat': '__metadata',
- 'name': 'process_name', 'args': {'name': 'Renderer'}},
- {'ph': 'M', 'ts': 0, 'pid': 123, 'cat': '__metadata',
- 'name': 'process_name', 'args': {'name': 'Browser'}},
- {'ph': 'M', 'ts': 0, 'pid': 354, 'cat': 'whatever0'}])
-
- with self.assertRaises(ValueError):
- RunHelper(123, [
- {'ph': 'M', 'ts': 0, 'pid': 354, 'cat': 'whatever0'},
- {'ph': 'M', 'ts': 0, 'pid': 354, 'cat': 'whatever1'}])
-
- def testGetBrowserDumpEvents(self):
- NAME = 'periodic_interval'
-
- def RunHelper(trace_events, browser_pid):
- trace_events = copy.copy(trace_events)
- trace_events.append({
- 'pid': browser_pid,
- 'cat': '__metadata',
- 'name': 'process_name',
- 'ph': 'M',
- 'ts': 0,
- 'args': {'name': 'Browser'}})
- return puller._GetBrowserDumpEvents(TracingTrack(trace_events))
-
- TRACE_EVENTS = [
- {'pid': 354, 'ts': 1000, 'cat': _MEM_CAT, 'ph': 'v', 'name': NAME},
- {'pid': 354, 'ts': 2000, 'cat': _MEM_CAT, 'ph': 'V'},
- {'pid': 672, 'ts': 3000, 'cat': _MEM_CAT, 'ph': 'v', 'name': NAME},
- {'pid': 123, 'ts': 4000, 'cat': _MEM_CAT, 'ph': 'v', 'name': 'foo'},
- {'pid': 123, 'ts': 5000, 'cat': _MEM_CAT, 'ph': 'v', 'name': NAME},
- {'pid': 123, 'ts': 6000, 'cat': _MEM_CAT, 'ph': 'V'},
- {'pid': 672, 'ts': 7000, 'cat': _MEM_CAT, 'ph': 'v', 'name': NAME},
- {'pid': 354, 'ts': 8000, 'cat': _MEM_CAT, 'ph': 'v', 'name': 'foo'},
- {'pid': 123, 'ts': 9000, 'cat': 'whatever1', 'ph': 'v', 'name': NAME},
- {'pid': 123, 'ts': 10000, 'cat': _MEM_CAT, 'ph': 'v', 'name': NAME},
- {'pid': 354, 'ts': 11000, 'cat': 'whatever0', 'ph': 'R'},
- {'pid': 672, 'ts': 12000, 'cat': _MEM_CAT, 'ph': 'v', 'name': NAME}]
-
- bump_events = RunHelper(TRACE_EVENTS, 123)
- self.assertEquals(2, len(bump_events))
- self.assertEquals(5, bump_events[0].start_msec)
- self.assertEquals(10, bump_events[1].start_msec)
-
- bump_events = RunHelper(TRACE_EVENTS, 354)
- self.assertEquals(1, len(bump_events))
- self.assertEquals(1, bump_events[0].start_msec)
-
- bump_events = RunHelper(TRACE_EVENTS, 672)
- self.assertEquals(3, len(bump_events))
- self.assertEquals(3, bump_events[0].start_msec)
- self.assertEquals(7, bump_events[1].start_msec)
- self.assertEquals(12, bump_events[2].start_msec)
-
- with self.assertRaises(ValueError):
- RunHelper(TRACE_EVENTS, 895)
-
- def testGetWebPageTrackedEvents(self):
- trace_events = puller._GetWebPageTrackedEvents(TracingTrack([
- {'ph': 'R', 'ts': 0000, 'args': {}, 'cat': 'whatever',
- 'name': _START},
- {'ph': 'R', 'ts': 1000, 'args': {'frame': '0'}, 'cat': 'whatever',
- 'name': _LOADS},
- {'ph': 'R', 'ts': 2000, 'args': {'frame': '0'}, 'cat': 'whatever',
- 'name': _LOADE},
- {'ph': 'R', 'ts': 3000, 'args': {}, 'cat': _BLINK_CAT,
- 'name': _START},
- {'ph': 'R', 'ts': 4000, 'args': {'frame': '0'}, 'cat': _BLINK_CAT,
- 'name': _LOADS},
- {'ph': 'R', 'ts': 5000, 'args': {'frame': '0'}, 'cat': _BLINK_CAT,
- 'name': _LOADE},
- {'ph': 'R', 'ts': 7000, 'args': {}, 'cat': _BLINK_CAT,
- 'name': _START},
- {'ph': 'R', 'ts': 8000, 'args': {'frame': '0'}, 'cat': _BLINK_CAT,
- 'name': _LOADS},
- {'ph': 'R', 'ts': 9000, 'args': {'frame': '0'}, 'cat': _BLINK_CAT,
- 'name': _LOADE},
- {'ph': 'R', 'ts': 11000, 'args': {'frame': '0'}, 'cat': 'whatever',
- 'name': _START},
- {'ph': 'R', 'ts': 12000, 'args': {'frame': '0'}, 'cat': 'whatever',
- 'name': _LOADS},
- {'ph': 'R', 'ts': 13000, 'args': {'frame': '0'}, 'cat': 'whatever',
- 'name': _LOADE},
- {'ph': 'R', 'ts': 14000, 'args': {}, 'cat': _BLINK_CAT,
- 'name': _START},
- {'ph': 'R', 'ts': 10000, 'args': {'frame': '0'}, 'cat': _BLINK_CAT,
- 'name': _NAVIGATION_START}, # Event out of |start_msec| order.
- {'ph': 'R', 'ts': 6000, 'args': {'frame': '0'}, 'cat': 'whatever',
- 'name': _NAVIGATION_START},
- {'ph': 'R', 'ts': 15000, 'args': {}, 'cat': _BLINK_CAT,
- 'name': _START},
- {'ph': 'R', 'ts': 16000, 'args': {'frame': '1'}, 'cat': _BLINK_CAT,
- 'name': _LOADS},
- {'ph': 'R', 'ts': 17000, 'args': {'frame': '0'}, 'cat': _BLINK_CAT,
- 'name': _LOADS},
- {'ph': 'R', 'ts': 18000, 'args': {'frame': '1'}, 'cat': _BLINK_CAT,
- 'name': _LOADE},
- {'ph': 'R', 'ts': 19000, 'args': {'frame': '0'}, 'cat': _BLINK_CAT,
- 'name': _LOADE},
- {'ph': 'R', 'ts': 20000, 'args': {}, 'cat': 'whatever',
- 'name': _START},
- {'ph': 'R', 'ts': 21000, 'args': {'frame': '0'}, 'cat': 'whatever',
- 'name': _LOADS},
- {'ph': 'R', 'ts': 22000, 'args': {'frame': '0'}, 'cat': 'whatever',
- 'name': _LOADE},
- {'ph': 'R', 'ts': 23000, 'args': {}, 'cat': _BLINK_CAT,
- 'name': _START},
- {'ph': 'R', 'ts': 24000, 'args': {'frame': '0'}, 'cat': _BLINK_CAT,
- 'name': _LOADS},
- {'ph': 'R', 'ts': 25000, 'args': {'frame': '0'}, 'cat': _BLINK_CAT,
- 'name': _LOADE}]))
-
- self.assertEquals(3, len(trace_events))
- self.assertEquals(14, trace_events['requestStart'].start_msec)
- self.assertEquals(17, trace_events['loadEventStart'].start_msec)
- self.assertEquals(19, trace_events['loadEventEnd'].start_msec)
-
- def testExtractDefaultMetrics(self):
- metrics = puller._ExtractDefaultMetrics(LoadingTrace(
- _MINIMALIST_TRACE_EVENTS))
- self.assertEquals(4, len(metrics))
- self.assertEquals(20, metrics['total_load'])
- self.assertEquals(5, metrics['js_onload_event'])
- self.assertEquals(4, metrics['first_layout'])
- self.assertEquals(11, metrics['first_contentful_paint'])
-
- def testExtractDefaultMetricsBestEffort(self):
- metrics = puller._ExtractDefaultMetrics(LoadingTrace([
- {'ph': 'R', 'ts': 10000, 'args': {'frame': '0'}, 'cat': _BLINK_CAT,
- 'name': _NAVIGATION_START},
- {'ph': 'R', 'ts': 11000, 'args': {'frame': '0'}, 'cat': 'whatever',
- 'name': _START}]))
- self.assertEquals(4, len(metrics))
- self.assertEquals(puller._FAILED_CSV_VALUE, metrics['total_load'])
- self.assertEquals(puller._FAILED_CSV_VALUE, metrics['js_onload_event'])
- self.assertEquals(puller._FAILED_CSV_VALUE, metrics['first_layout'])
- self.assertEquals(puller._FAILED_CSV_VALUE,
- metrics['first_contentful_paint'])
-
- def testExtractMemoryMetrics(self):
- metrics = puller._ExtractMemoryMetrics(LoadingTrace(
- _MINIMALIST_TRACE_EVENTS))
- self.assertEquals(2, len(metrics))
- self.assertEquals(30971, metrics['browser_malloc_avg'])
- self.assertEquals(55044, metrics['browser_malloc_max'])
-
- def testComputeSpeedIndex(self):
- def point(time, frame_completeness):
- return puller.CompletenessPoint(time=time,
- frame_completeness=frame_completeness)
- completness_record = [
- point(0, 0.0),
- point(120, 0.4),
- point(190, 0.75),
- point(280, 1.0),
- point(400, 1.0),
- ]
- self.assertEqual(120 + 70 * 0.6 + 90 * 0.25,
- puller._ComputeSpeedIndex(completness_record))
-
- completness_record = [
- point(70, 0.0),
- point(150, 0.3),
- point(210, 0.6),
- point(220, 0.9),
- point(240, 1.0),
- ]
- self.assertEqual(80 + 60 * 0.7 + 10 * 0.4 + 20 * 0.1,
- puller._ComputeSpeedIndex(completness_record))
-
- completness_record = [
- point(90, 0.0),
- point(200, 0.6),
- point(150, 0.3),
- point(230, 1.0),
- ]
- with self.assertRaises(ValueError):
- puller._ComputeSpeedIndex(completness_record)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/sandwich_prefetch.py b/loading/sandwich_prefetch.py
deleted file mode 100644
index 0f53263..0000000
--- a/loading/sandwich_prefetch.py
+++ /dev/null
@@ -1,678 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""
-Implements a task builder for benchmarking effects of NoState Prefetch.
-Noticeable steps of the task pipeline:
- * Save a WPR archive
- * Process the WPR archive to make all resources cacheable
- * Process cache archive to patch response headers back to their original
- values.
- * Find out which resources are discoverable by NoState Prefetch
- (HTMLPreloadScanner)
- * Load pages with empty/full/prefetched cache
- * Extract most important metrics to a CSV
-"""
-
-import csv
-import logging
-import json
-import os
-import re
-import shutil
-import urlparse
-
-import chrome_cache
-import common_util
-import loading_trace
-from prefetch_view import PrefetchSimulationView
-from request_dependencies_lens import RequestDependencyLens
-import sandwich_metrics
-import sandwich_runner
-import sandwich_utils
-import task_manager
-import wpr_backend
-
-
-class Discoverer(object):
- # Do not prefetch anything.
- EmptyCache = 'empty-cache'
-
- # Prefetches everything to load fully from cache (impossible in practice).
- FullCache = 'full-cache'
-
- # Prefetches the first resource following the redirection chain.
- MainDocument = 'main-document'
-
- # All resources which are fetched from the main document and their
- # redirections.
- Parser = 'parser'
-
- # Simulation of HTMLPreloadScanner on the main document and their
- # redirections and subsets:
- # Store: only resources that don't have Cache-Control: No-Store.
- HTMLPreloadScanner = 'html-scanner'
- HTMLPreloadScannerStore = 'html-scanner-store'
-
-
-# List of all available sub-resource discoverers.
-SUBRESOURCE_DISCOVERERS = set([
- Discoverer.EmptyCache,
- Discoverer.FullCache,
- Discoverer.MainDocument,
- Discoverer.Parser,
- Discoverer.HTMLPreloadScanner,
- Discoverer.HTMLPreloadScannerStore,
-])
-
-
-_UPLOAD_DATA_STREAM_REQUESTS_REGEX = re.compile(r'^\d+/(?P<url>.*)$')
-
-
-def _NormalizeUrl(url):
- """Returns normalized URL such as removing trailing slashes."""
- parsed_url = list(urlparse.urlparse(url))
- parsed_url[2] = re.sub(r'/{2,}', r'/', parsed_url[2])
- return urlparse.urlunparse(parsed_url)
-
-
-def _PatchCacheArchive(cache_archive_path, loading_trace_path,
- cache_archive_dest_path):
- """Patch the cache archive.
-
- Note: This method update the raw response headers of cache entries' to store
- the ones such as Set-Cookie that were pruned by the
- net::HttpCacheTransaction, and remove the stream index 2 holding resource's
- compile meta data.
-
- Args:
- cache_archive_path: Input archive's path to patch.
- loading_trace_path: Path of the loading trace that have recorded the cache
- archive <cache_archive_path>.
- cache_archive_dest_path: Archive destination's path.
- """
- trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)
- with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:
- cache_path = os.path.join(tmp_path, 'cache')
- chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path)
- cache_backend = chrome_cache.CacheBackend(cache_path, 'simple')
- cache_entries = set(cache_backend.ListKeys())
- logging.info('Original cache size: %d bytes' % cache_backend.GetSize())
- for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
- trace.request_track.GetEvents()):
- # On requests having an upload data stream such as POST requests,
- # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with
- # the upload data stream's session unique identifier.
- #
- # It is fine to not patch these requests since when reopening Chrome,
- # there is no way the entry can be reused since the upload data stream's
- # identifier will be different.
- #
- # The fact that these entries are kept in the cache after closing Chrome
- # properly by closing the Chrome tab as the ChromeControler.SetSlowDeath()
- # do is known chrome bug (crbug.com/610725).
- if request.url not in cache_entries:
- continue
- # Chrome prunes Set-Cookie from response headers before storing them in
- # disk cache. Also, it adds implicit "Vary: cookie" header to all redirect
- # response headers. Sandwich manages the cache, but between recording the
- # cache and benchmarking the cookie jar is invalidated. This leads to
- # invalidation of all cacheable redirects.
- raw_headers = request.GetRawResponseHeaders()
- cache_backend.UpdateRawResponseHeaders(request.url, raw_headers)
- # NoState-Prefetch would only fetch the resources, but not parse them.
- cache_backend.DeleteStreamForKey(request.url, 2)
- chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path)
- logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())
-
-
-def _DiscoverRequests(dependencies_lens, subresource_discoverer):
- trace = dependencies_lens.loading_trace
- first_resource_request = trace.request_track.GetFirstResourceRequest()
-
- if subresource_discoverer == Discoverer.EmptyCache:
- requests = []
- elif subresource_discoverer == Discoverer.FullCache:
- requests = dependencies_lens.loading_trace.request_track.GetEvents()
- elif subresource_discoverer == Discoverer.MainDocument:
- requests = [dependencies_lens.GetRedirectChain(first_resource_request)[-1]]
- elif subresource_discoverer == Discoverer.Parser:
- requests = PrefetchSimulationView.ParserDiscoverableRequests(
- first_resource_request, dependencies_lens)
- elif subresource_discoverer == Discoverer.HTMLPreloadScanner:
- requests = PrefetchSimulationView.PreloadedRequests(
- first_resource_request, dependencies_lens, trace)
- else:
- assert False
- logging.info('number of requests discovered by %s: %d',
- subresource_discoverer, len(requests))
- return requests
-
-
-def _PruneOutOriginalNoStoreRequests(original_headers_path, requests):
- with open(original_headers_path) as file_input:
- original_headers = json.load(file_input)
- pruned_requests = set()
- for request in requests:
- url = _NormalizeUrl(request.url)
- if url not in original_headers:
- # TODO(gabadie): Investigate why these requests were not in WPR.
- assert request.failed
- logging.warning(
- 'could not find original headers for: %s (failure: %s)',
- url, request.error_text)
- continue
- request_original_headers = original_headers[url]
- if ('cache-control' in request_original_headers and
- 'no-store' in request_original_headers['cache-control'].lower()):
- pruned_requests.add(request)
- return [r for r in requests if r not in pruned_requests]
-
-
-def _ExtractDiscoverableUrls(
- original_headers_path, loading_trace_path, subresource_discoverer):
- """Extracts discoverable resource urls from a loading trace according to a
- sub-resource discoverer.
-
- Args:
- original_headers_path: Path of JSON containing the original headers.
- loading_trace_path: Path of the loading trace recorded at original cache
- creation.
- subresource_discoverer: The sub-resources discoverer that should white-list
- the resources to keep in cache for the NoState-Prefetch benchmarks.
-
- Returns:
- A set of urls.
- """
- assert subresource_discoverer in SUBRESOURCE_DISCOVERERS, \
- 'unknown prefetch simulation {}'.format(subresource_discoverer)
- logging.info('loading %s', loading_trace_path)
- trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)
- dependencies_lens = RequestDependencyLens(trace)
-
- # Build the list of discovered requests according to the desired simulation.
- discovered_requests = []
- if subresource_discoverer == Discoverer.HTMLPreloadScannerStore:
- requests = _DiscoverRequests(
- dependencies_lens, Discoverer.HTMLPreloadScanner)
- discovered_requests = _PruneOutOriginalNoStoreRequests(
- original_headers_path, requests)
- else:
- discovered_requests = _DiscoverRequests(
- dependencies_lens, subresource_discoverer)
-
- whitelisted_urls = set()
- for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
- discovered_requests):
- logging.debug('white-listing %s', request.url)
- whitelisted_urls.add(request.url)
- logging.info('number of white-listed resources: %d', len(whitelisted_urls))
- return whitelisted_urls
-
-
-def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):
- """Compare URL sets and log the diffs.
-
- Args:
- ref_url_set: Set of reference urls.
- url_set: Set of urls to compare to the reference.
- url_set_name: The set name for logging purposes.
- """
- assert type(ref_url_set) == set
- assert type(url_set) == set
- if ref_url_set == url_set:
- logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name))
- return
- missing_urls = ref_url_set.difference(url_set)
- unexpected_urls = url_set.difference(ref_url_set)
- logging.error(' %s are not matching (expected %d, had %d)' % \
- (url_set_name, len(ref_url_set), len(url_set)))
- logging.error(' List of %d missing resources:' % len(missing_urls))
- for url in sorted(missing_urls):
- logging.error('- ' + url)
- logging.error(' List of %d unexpected resources:' % len(unexpected_urls))
- for url in sorted(unexpected_urls):
- logging.error('+ ' + url)
-
-
-class _RunOutputVerifier(object):
- """Object to verify benchmark run from traces and WPR log stored in the
- runner output directory.
- """
-
- def __init__(self, cache_validation_result, benchmark_setup):
- """Constructor.
-
- Args:
- cache_validation_result: JSON of the cache validation task.
- benchmark_setup: JSON of the benchmark setup.
- """
- self._cache_whitelist = set(benchmark_setup['cache_whitelist'])
- self._original_requests = set(
- cache_validation_result['effective_encoded_data_lengths'].keys())
- self._original_post_requests = set(
- cache_validation_result['effective_post_requests'])
- self._original_cached_requests = self._original_requests.intersection(
- self._cache_whitelist)
- self._original_uncached_requests = self._original_requests.difference(
- self._cache_whitelist)
- self._all_sent_url_requests = set()
-
- def VerifyTrace(self, trace):
- """Verifies a trace with the cache validation result and the benchmark
- setup.
- """
- effective_requests = sandwich_utils.ListUrlRequests(
- trace, sandwich_utils.RequestOutcome.All)
- effective_post_requests = sandwich_utils.ListUrlRequests(
- trace, sandwich_utils.RequestOutcome.Post)
- effective_cached_requests = sandwich_utils.ListUrlRequests(
- trace, sandwich_utils.RequestOutcome.ServedFromCache)
- effective_uncached_requests = sandwich_utils.ListUrlRequests(
- trace, sandwich_utils.RequestOutcome.NotServedFromCache)
-
- missing_requests = self._original_requests.difference(effective_requests)
- unexpected_requests = effective_requests.difference(self._original_requests)
- expected_cached_requests = \
- self._original_cached_requests.difference(missing_requests)
- expected_uncached_requests = self._original_uncached_requests.union(
- unexpected_requests).difference(missing_requests)
-
- # POST requests are known to be unable to use the cache.
- expected_cached_requests.difference_update(effective_post_requests)
- expected_uncached_requests.update(effective_post_requests)
-
- _PrintUrlSetComparison(self._original_requests, effective_requests,
- 'All resources')
- _PrintUrlSetComparison(set(), effective_post_requests, 'POST resources')
- _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests,
- 'Cached resources')
- _PrintUrlSetComparison(expected_uncached_requests,
- effective_uncached_requests, 'Non cached resources')
-
- self._all_sent_url_requests.update(effective_uncached_requests)
-
- def VerifyWprLog(self, wpr_log_path):
- """Verifies WPR log with previously verified traces."""
- all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path)
- all_wpr_urls = set()
- unserved_wpr_urls = set()
- wpr_command_colliding_urls = set()
-
- for request in all_wpr_requests:
- if request.is_wpr_host:
- continue
- if urlparse.urlparse(request.url).path.startswith('/web-page-replay'):
- wpr_command_colliding_urls.add(request.url)
- elif request.is_served is False:
- unserved_wpr_urls.add(request.url)
- all_wpr_urls.add(request.url)
-
- _PrintUrlSetComparison(set(), unserved_wpr_urls,
- 'Distinct unserved resources from WPR')
- _PrintUrlSetComparison(set(), wpr_command_colliding_urls,
- 'Distinct resources colliding to WPR commands')
- _PrintUrlSetComparison(all_wpr_urls, self._all_sent_url_requests,
- 'Distinct resource requests to WPR')
-
-
-def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
- """Validates a cache archive content.
-
- Args:
- cache_build_trace_path: Path of the generated trace at the cache build time.
- cache_archive_path: Cache archive's path to validate.
-
- Returns:
- {
- 'effective_encoded_data_lengths':
- {URL of all requests: encoded_data_length},
- 'effective_post_requests': [URLs of POST requests],
- 'expected_cached_resources': [URLs of resources expected to be cached],
- 'successfully_cached': [URLs of cached sub-resources]
- }
- """
- # TODO(gabadie): What's the best way of propagating errors happening in here?
- logging.info('lists cached urls from %s' % cache_archive_path)
- with common_util.TemporaryDirectory() as cache_directory:
- chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
- cache_keys = set(
- chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())
- trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)
- effective_requests = sandwich_utils.ListUrlRequests(
- trace, sandwich_utils.RequestOutcome.All)
- effective_post_requests = sandwich_utils.ListUrlRequests(
- trace, sandwich_utils.RequestOutcome.Post)
- effective_encoded_data_lengths = {}
- for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
- trace.request_track.GetEvents()):
- if request.from_disk_cache or request.served_from_cache:
- # At cache archive creation time, a request might be loaded several times,
- # but avoid the request.encoded_data_length == 0 if loaded from cache.
- continue
- if request.url in effective_encoded_data_lengths:
- effective_encoded_data_lengths[request.url] = max(
- effective_encoded_data_lengths[request.url],
- request.GetResponseTransportLength())
- else:
- effective_encoded_data_lengths[request.url] = (
- request.GetResponseTransportLength())
-
- upload_data_stream_cache_entry_keys = set()
- upload_data_stream_requests = set()
- for cache_entry_key in cache_keys:
- match = _UPLOAD_DATA_STREAM_REQUESTS_REGEX.match(cache_entry_key)
- if not match:
- continue
- upload_data_stream_cache_entry_keys.add(cache_entry_key)
- upload_data_stream_requests.add(match.group('url'))
-
- expected_cached_requests = effective_requests.difference(
- effective_post_requests)
- effective_cache_keys = cache_keys.difference(
- upload_data_stream_cache_entry_keys)
-
- _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests,
- 'POST resources')
- _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,
- 'Cached resources')
-
- return {
- 'effective_encoded_data_lengths': effective_encoded_data_lengths,
- 'effective_post_requests': [url for url in effective_post_requests],
- 'expected_cached_resources': [url for url in expected_cached_requests],
- 'successfully_cached_resources': [url for url in effective_cache_keys]
- }
-
-
-def _ProcessRunOutputDir(
- cache_validation_result, benchmark_setup, runner_output_dir):
- """Process benchmark's run output directory.
-
- Args:
- cache_validation_result: Same as for _RunOutputVerifier
- benchmark_setup: Same as for _RunOutputVerifier
- runner_output_dir: Same as for SandwichRunner.output_dir
-
- Returns:
- List of dictionary.
- """
- run_metrics_list = []
- run_output_verifier = _RunOutputVerifier(
- cache_validation_result, benchmark_setup)
- cached_encoded_data_lengths = (
- cache_validation_result['effective_encoded_data_lengths'])
- for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
- runner_output_dir):
- trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
-
- logging.info('loading trace: %s', trace_path)
- trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
-
- logging.info('verifying trace: %s', trace_path)
- run_output_verifier.VerifyTrace(trace)
-
- logging.info('extracting metrics from trace: %s', trace_path)
-
- # Gather response size per URLs.
- response_sizes = {}
- for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
- trace.request_track.GetEvents()):
- # Ignore requests served from the blink's cache.
- if request.served_from_cache:
- continue
- if request.from_disk_cache:
- if request.url in cached_encoded_data_lengths:
- response_size = cached_encoded_data_lengths[request.url]
- else:
- # Some fat webpages may overflow the Memory cache, and so some
- # requests might be served from disk cache couple of times per page
- # load.
- logging.warning('Looks like could be served from memory cache: %s',
- request.url)
- if request.url in response_sizes:
- response_size = response_sizes[request.url]
- else:
- response_size = request.GetResponseTransportLength()
- response_sizes[request.url] = response_size
-
- # Sums the served from cache/network bytes.
- served_from_network_bytes = 0
- served_from_cache_bytes = 0
- urls_hitting_network = set()
- for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
- trace.request_track.GetEvents()):
- # Ignore requests served from the blink's cache.
- if request.served_from_cache:
- continue
- urls_hitting_network.add(request.url)
- if request.from_disk_cache:
- served_from_cache_bytes += response_sizes[request.url]
- else:
- served_from_network_bytes += response_sizes[request.url]
-
- # Make sure the served from blink's cache requests have at least one
- # corresponding request that was not served from the blink's cache.
- for request in sandwich_utils.FilterOutDataAndIncompleteRequests(
- trace.request_track.GetEvents()):
- assert (request.url in urls_hitting_network or
- not request.served_from_cache)
-
- run_metrics = {
- 'url': trace.url,
- 'repeat_id': repeat_id,
- 'subresource_discoverer': benchmark_setup['subresource_discoverer'],
- 'cache_recording.subresource_count':
- len(cache_validation_result['effective_encoded_data_lengths']),
- 'cache_recording.cached_subresource_count_theoretic':
- len(cache_validation_result['successfully_cached_resources']),
- 'cache_recording.cached_subresource_count':
- len(cache_validation_result['expected_cached_resources']),
- 'benchmark.subresource_count': len(sandwich_utils.ListUrlRequests(
- trace, sandwich_utils.RequestOutcome.All)),
- 'benchmark.served_from_cache_count_theoretic':
- len(benchmark_setup['cache_whitelist']),
- 'benchmark.served_from_cache_count': len(sandwich_utils.ListUrlRequests(
- trace, sandwich_utils.RequestOutcome.ServedFromCache)),
- 'benchmark.served_from_network_bytes': served_from_network_bytes,
- 'benchmark.served_from_cache_bytes': served_from_cache_bytes
- }
- run_metrics.update(
- sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
- repeat_dir, trace))
- run_metrics_list.append(run_metrics)
- run_metrics_list.sort(key=lambda e: e['repeat_id'])
-
- wpr_log_path = os.path.join(
- runner_output_dir, sandwich_runner.WPR_LOG_FILENAME)
- logging.info('verifying wpr log: %s', wpr_log_path)
- run_output_verifier.VerifyWprLog(wpr_log_path)
- return run_metrics_list
-
-
-class PrefetchBenchmarkBuilder(task_manager.Builder):
- """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks."""
-
- def __init__(self, common_builder):
- task_manager.Builder.__init__(self,
- common_builder.output_directory,
- common_builder.output_subdirectory)
- self._common_builder = common_builder
-
- self._original_headers_path = None
- self._wpr_archive_path = None
- self._cache_path = None
- self._trace_from_grabbing_reference_cache = None
- self._cache_validation_task = None
- self._PopulateCommonPipelines()
-
- def _PopulateCommonPipelines(self):
- """Creates necessary tasks to produce initial cache archive.
-
- Also creates a task for producing a json file with a mapping of URLs to
- subresources (urls-resources.json).
-
- Here is the full dependency tree for the returned task:
- common/patched-cache-validation.json
- depends on: common/patched-cache.zip
- depends on: common/original-cache.zip
- depends on: common/webpages-patched.wpr
- depends on: common/webpages.wpr
- """
- self._original_headers_path = self.RebaseOutputPath(
- 'common/response-headers.json')
-
- @self.RegisterTask('common/webpages-patched.wpr',
- dependencies=[self._common_builder.original_wpr_task])
- def BuildPatchedWpr():
- shutil.copyfile(
- self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)
- wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path)
-
- # Save up original response headers.
- original_response_headers = {e.url: e.GetResponseHeadersDict() \
- for e in wpr_archive.ListUrlEntries()}
- logging.info('save up response headers for %d resources',
- len(original_response_headers))
- if not original_response_headers:
- # TODO(gabadie): How is it possible to not even have the main resource
- # in the WPR archive? Example URL can be found in:
- # http://crbug.com/623966#c5
- raise Exception(
- 'Looks like no resources were recorded in WPR during: {}'.format(
- self._common_builder.original_wpr_task.name))
- with open(self._original_headers_path, 'w') as file_output:
- json.dump(original_response_headers, file_output)
-
- # Patch WPR.
- wpr_url_entries = wpr_archive.ListUrlEntries()
- for wpr_url_entry in wpr_url_entries:
- sandwich_utils.PatchWprEntryToBeCached(wpr_url_entry)
- logging.info('number of patched entries: %d', len(wpr_url_entries))
- wpr_archive.Persist()
-
- @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])
- def BuildOriginalCache():
- runner = self._common_builder.CreateSandwichRunner()
- runner.wpr_archive_path = BuildPatchedWpr.path
- runner.cache_archive_path = BuildOriginalCache.path
- runner.cache_operation = sandwich_runner.CacheOperation.SAVE
- runner.output_dir = BuildOriginalCache.run_path
- runner.Run()
- BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run'
- original_cache_trace_path = os.path.join(
- BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME)
-
- @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache])
- def BuildPatchedCache():
- _PatchCacheArchive(BuildOriginalCache.path,
- original_cache_trace_path, BuildPatchedCache.path)
-
- @self.RegisterTask('common/patched-cache-validation.json',
- [BuildPatchedCache])
- def ValidatePatchedCache():
- cache_validation_result = _ValidateCacheArchiveContent(
- original_cache_trace_path, BuildPatchedCache.path)
- with open(ValidatePatchedCache.path, 'w') as output:
- json.dump(cache_validation_result, output)
-
- self._wpr_archive_path = BuildPatchedWpr.path
- self._trace_from_grabbing_reference_cache = original_cache_trace_path
- self._cache_path = BuildPatchedCache.path
- self._cache_validation_task = ValidatePatchedCache
-
- self._common_builder.default_final_tasks.append(ValidatePatchedCache)
-
- def PopulateLoadBenchmark(self, subresource_discoverer,
- transformer_list_name, transformer_list):
- """Populate benchmarking tasks from its setup tasks.
-
- Args:
- subresource_discoverer: Name of a subresources discoverer.
- transformer_list_name: A string describing the transformers, will be used
- in Task names (prefer names without spaces and special characters).
- transformer_list: An ordered list of function that takes an instance of
- SandwichRunner as parameter, would be applied immediately before
- SandwichRunner.Run() in the given order.
-
- Here is the full dependency of the added tree for the returned task:
- <transformer_list_name>/<subresource_discoverer>-metrics.csv
- depends on: <transformer_list_name>/<subresource_discoverer>-run/
- depends on: common/<subresource_discoverer>-cache.zip
- depends on: common/<subresource_discoverer>-setup.json
- depends on: common/patched-cache-validation.json
- """
- additional_column_names = [
- 'url',
- 'repeat_id',
- 'subresource_discoverer',
- 'cache_recording.subresource_count',
- 'cache_recording.cached_subresource_count_theoretic',
- 'cache_recording.cached_subresource_count',
- 'benchmark.subresource_count',
- 'benchmark.served_from_cache_count_theoretic',
- 'benchmark.served_from_cache_count',
- 'benchmark.served_from_network_bytes',
- 'benchmark.served_from_cache_bytes']
-
- assert subresource_discoverer in SUBRESOURCE_DISCOVERERS
- assert 'common' not in SUBRESOURCE_DISCOVERERS
- shared_task_prefix = os.path.join('common', subresource_discoverer)
- task_prefix = os.path.join(transformer_list_name, subresource_discoverer)
-
- @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True,
- dependencies=[self._cache_validation_task])
- def SetupBenchmark():
- whitelisted_urls = _ExtractDiscoverableUrls(
- original_headers_path=self._original_headers_path,
- loading_trace_path=self._trace_from_grabbing_reference_cache,
- subresource_discoverer=subresource_discoverer)
-
- common_util.EnsureParentDirectoryExists(SetupBenchmark.path)
- with open(SetupBenchmark.path, 'w') as output:
- json.dump({
- 'cache_whitelist': [url for url in whitelisted_urls],
- 'subresource_discoverer': subresource_discoverer,
- }, output)
-
- @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True,
- dependencies=[SetupBenchmark])
- def BuildBenchmarkCacheArchive():
- benchmark_setup = json.load(open(SetupBenchmark.path))
- chrome_cache.ApplyUrlWhitelistToCacheArchive(
- cache_archive_path=self._cache_path,
- whitelisted_urls=benchmark_setup['cache_whitelist'],
- output_cache_archive_path=BuildBenchmarkCacheArchive.path)
-
- @self.RegisterTask(task_prefix + '-run/',
- dependencies=[BuildBenchmarkCacheArchive])
- def RunBenchmark():
- runner = self._common_builder.CreateSandwichRunner()
- for transformer in transformer_list:
- transformer(runner)
- runner.wpr_archive_path = self._common_builder.original_wpr_task.path
- runner.wpr_out_log_path = os.path.join(
- RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME)
- runner.cache_archive_path = BuildBenchmarkCacheArchive.path
- runner.cache_operation = sandwich_runner.CacheOperation.PUSH
- runner.output_dir = RunBenchmark.path
- runner.Run()
-
- @self.RegisterTask(task_prefix + '-metrics.csv',
- dependencies=[RunBenchmark])
- def ProcessRunOutputDir():
- benchmark_setup = json.load(open(SetupBenchmark.path))
- cache_validation_result = json.load(
- open(self._cache_validation_task.path))
-
- run_metrics_list = _ProcessRunOutputDir(
- cache_validation_result, benchmark_setup, RunBenchmark.path)
- with open(ProcessRunOutputDir.path, 'w') as csv_file:
- writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
- sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
- writer.writeheader()
- for trace_metrics in run_metrics_list:
- writer.writerow(trace_metrics)
-
- self._common_builder.default_final_tasks.append(ProcessRunOutputDir)
diff --git a/loading/sandwich_prefetch_unittest.py b/loading/sandwich_prefetch_unittest.py
deleted file mode 100644
index 0d69a47..0000000
--- a/loading/sandwich_prefetch_unittest.py
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import json
-import os
-import shutil
-import tempfile
-import unittest
-import urlparse
-
-import sandwich_prefetch
-
-
-LOADING_DIR = os.path.dirname(__file__)
-TEST_DATA_DIR = os.path.join(LOADING_DIR, 'testdata')
-
-
-class SandwichPrefetchTestCase(unittest.TestCase):
- _TRACE_PATH = os.path.join(TEST_DATA_DIR, 'scanner_vs_parser.trace')
-
- def setUp(self):
- self._tmp_dir = tempfile.mkdtemp()
-
- def tearDown(self):
- shutil.rmtree(self._tmp_dir)
-
- def GetTmpPath(self, file_name):
- return os.path.join(self._tmp_dir, file_name)
-
- def GetResourceUrl(self, path):
- return urlparse.urljoin('http://l/', path)
-
- def testEmptyCacheWhitelisting(self):
- url_set = sandwich_prefetch._ExtractDiscoverableUrls(None,
- self._TRACE_PATH, sandwich_prefetch.Discoverer.EmptyCache)
- self.assertEquals(set(), url_set)
-
- def testFullCacheWhitelisting(self):
- reference_url_set = set([self.GetResourceUrl('./'),
- self.GetResourceUrl('0.png'),
- self.GetResourceUrl('1.png'),
- self.GetResourceUrl('0.css'),
- self.GetResourceUrl('favicon.ico')])
- url_set = sandwich_prefetch._ExtractDiscoverableUrls(None,
- self._TRACE_PATH, sandwich_prefetch.Discoverer.FullCache)
- self.assertEquals(reference_url_set, url_set)
-
- def testMainDocumentWhitelisting(self):
- reference_url_set = set([self.GetResourceUrl('./')])
- url_set = sandwich_prefetch._ExtractDiscoverableUrls(None,
- self._TRACE_PATH, sandwich_prefetch.Discoverer.MainDocument)
- self.assertEquals(reference_url_set, url_set)
-
- def testParserDiscoverableWhitelisting(self):
- reference_url_set = set([self.GetResourceUrl('./'),
- self.GetResourceUrl('0.png'),
- self.GetResourceUrl('1.png'),
- self.GetResourceUrl('0.css')])
- url_set = sandwich_prefetch._ExtractDiscoverableUrls(None,
- self._TRACE_PATH, sandwich_prefetch.Discoverer.Parser)
- self.assertEquals(reference_url_set, url_set)
-
- def testHTMLPreloadScannerWhitelisting(self):
- reference_url_set = set([self.GetResourceUrl('./'),
- self.GetResourceUrl('0.png'),
- self.GetResourceUrl('0.css')])
- url_set = sandwich_prefetch._ExtractDiscoverableUrls(None,
- self._TRACE_PATH, sandwich_prefetch.Discoverer.HTMLPreloadScanner)
- self.assertEquals(reference_url_set, url_set)
-
- def testHTMLPreloadScannerStoreWhitelisting(self):
- original_headers_path = self.GetTmpPath('original_headers.json')
-
- def RunTest(reference_urls):
- url_set = sandwich_prefetch._ExtractDiscoverableUrls(
- original_headers_path, self._TRACE_PATH,
- sandwich_prefetch.Discoverer.HTMLPreloadScannerStore)
- self.assertEquals(set(reference_urls), url_set)
-
- with open(original_headers_path, 'w') as output_file:
- json.dump({
- self.GetResourceUrl('./'): {},
- self.GetResourceUrl('0.png'): {'cache-control': 'max-age=0'},
- self.GetResourceUrl('0.css'): {}
- }, output_file)
- RunTest([self.GetResourceUrl('./'),
- self.GetResourceUrl('0.png'),
- self.GetResourceUrl('0.css')])
-
- with open(original_headers_path, 'w') as output_file:
- json.dump({
- self.GetResourceUrl('./'): {},
- self.GetResourceUrl('0.png'): {'cache-control': 'private, no-store'},
- self.GetResourceUrl('0.css'): {}
- }, output_file)
- RunTest([self.GetResourceUrl('./'),
- self.GetResourceUrl('0.css')])
-
- with open(original_headers_path, 'w') as output_file:
- json.dump({
- self.GetResourceUrl('./'): {'cache-control': 'private, no-store'},
- self.GetResourceUrl('0.png'): {},
- self.GetResourceUrl('0.css'): {}
- }, output_file)
- RunTest([self.GetResourceUrl('0.png'),
- self.GetResourceUrl('0.css')])
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/sandwich_runner.py b/loading/sandwich_runner.py
deleted file mode 100644
index 5c3bb94..0000000
--- a/loading/sandwich_runner.py
+++ /dev/null
@@ -1,376 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import logging
-import os
-import shutil
-import sys
-import tempfile
-
-_SRC_DIR = os.path.abspath(os.path.join(
- os.path.dirname(__file__), '..', '..', '..'))
-
-sys.path.append(os.path.join(_SRC_DIR, 'third_party', 'catapult', 'devil'))
-from devil.android import device_utils
-
-sys.path.append(os.path.join(_SRC_DIR, 'third_party', 'catapult', 'telemetry',
- 'third_party', 'websocket-client'))
-import websocket
-
-import chrome_cache
-import common_util
-import controller
-import devtools_monitor
-import device_setup
-import loading_trace
-
-
-# Standard filenames in the sandwich runner's output directory.
-ERROR_FILENAME = 'error'
-TRACE_FILENAME = 'trace.json'
-VIDEO_FILENAME = 'video.mp4'
-WPR_LOG_FILENAME = 'wpr.log'
-
-# Memory dump category used to get memory metrics.
-MEMORY_DUMP_CATEGORY = 'disabled-by-default-memory-infra'
-
-# Devtools timeout of 1 minute to avoid websocket timeout on slow
-# network condition.
-_DEVTOOLS_TIMEOUT = 60
-
-# Categories to enable or disable for all traces collected. Disabled categories
-# are prefixed with '-'.
-_TRACING_CATEGORIES = [
- 'blink',
- 'blink.net',
- 'blink.user_timing',
- 'devtools.timeline',
- 'java',
- 'navigation',
- 'toplevel',
- 'v8',
- '-cc', # A lot of unnecessary events are enabled by default in "cc".
-]
-
-TTFMP_ADDITIONAL_CATEGORIES = [
- 'loading',
- 'disabled-by-default-blink.debug.layout',
-]
-
-def _CleanArtefactsFromPastRuns(output_directories_path):
- """Cleans artifacts generated from past run in the output directory.
-
- Args:
- output_directories_path: The output directory path where to clean the
- previous traces.
- """
- for dirname in os.listdir(output_directories_path):
- directory_path = os.path.join(output_directories_path, dirname)
- if not os.path.isdir(directory_path):
- continue
- try:
- int(dirname)
- except ValueError:
- continue
- shutil.rmtree(directory_path)
-
-
-class CacheOperation(object):
- CLEAR, SAVE, PUSH = range(3)
-
-
-class SandwichRunnerError(Exception):
- pass
-
-
-class SandwichRunner(object):
- """Sandwich runner.
-
- This object is meant to be configured first and then run using the Run()
- method.
- """
- _ATTEMPT_COUNT = 3
- _STOP_DELAY_MULTIPLIER = 2
- _ABORT_RUN_TIMEOUT_SECONDS = 30 * 60
-
- def __init__(self):
- """Configures a sandwich runner out of the box.
-
- Public members are meant to be configured before calling Run().
- """
- # Cache operation to do before doing the chrome navigation.
- self.cache_operation = CacheOperation.CLEAR
-
- # The cache archive's path to save to or push from. Is str or None.
- self.cache_archive_path = None
-
- # List of additional chrome command line flags.
- self.chrome_args = []
-
- # Controls whether the WPR server should do script injection.
- self.disable_wpr_script_injection = False
-
- # Number of times to repeat the url.
- self.repeat = 1
-
- # Network conditions to emulate. None if no emulation.
- self.network_condition = None
-
- # Network condition emulator. Can be: browser,wpr
- self.network_emulator = 'browser'
-
- # Output directory where to save the traces, videos, etc. Is str or None.
- self.output_dir = None
-
- # URL to navigate to.
- self.url = None
-
- # Configures whether to record speed-index video.
- self.record_video = False
-
- # Configures whether to record memory dumps.
- self.record_memory_dumps = False
-
- # Configures whether to record tracing categories needed for TTFMP.
- self.record_first_meaningful_paint = False
-
- # Path to the WPR archive to load or save. Is str or None.
- self.wpr_archive_path = None
-
- # Configures whether the WPR archive should be read or generated.
- self.wpr_record = False
-
- # The android DeviceUtils to run sandwich on or None to run it locally.
- self.android_device = None
-
- self._chrome_ctl = None
- self._local_cache_directory_path = None
-
- def _CleanTraceOutputDirectory(self):
- assert self.output_dir
- if not os.path.isdir(self.output_dir):
- try:
- os.makedirs(self.output_dir)
- except OSError:
- logging.error('Cannot create directory for results: %s',
- self.output_dir)
- raise
- else:
- _CleanArtefactsFromPastRuns(self.output_dir)
-
- def _GetEmulatorNetworkCondition(self, emulator):
- if self.network_emulator == emulator:
- return self.network_condition
- return None
-
- def _RunNavigation(self, clear_cache, repeat_id=None):
- """Run a page navigation to the given URL.
-
- Args:
- clear_cache: Whether if the cache should be cleared before navigation.
- repeat_id: Id of the run in the output directory. If it is None, then no
- trace or video will be saved.
- """
- run_path = None
- if repeat_id is not None:
- run_path = os.path.join(self.output_dir, str(repeat_id))
- if not os.path.isdir(run_path):
- os.makedirs(run_path)
- self._chrome_ctl.SetNetworkEmulation(
- self._GetEmulatorNetworkCondition('browser'))
- categories = _TRACING_CATEGORIES
- if self.record_memory_dumps:
- categories += [MEMORY_DUMP_CATEGORY]
- if self.record_first_meaningful_paint:
- categories += TTFMP_ADDITIONAL_CATEGORIES
- stop_delay_multiplier = 0
- if self.wpr_record or self.cache_operation == CacheOperation.SAVE:
- stop_delay_multiplier = self._STOP_DELAY_MULTIPLIER
- # TODO(gabadie): add a way to avoid recording a trace.
- with common_util.TimeoutScope(
- self._ABORT_RUN_TIMEOUT_SECONDS, 'Sandwich run overdue.'):
- with self._chrome_ctl.Open() as connection:
- if clear_cache:
- connection.ClearCache()
-
- # Binds all parameters of RecordUrlNavigation() to avoid repetition.
- def RecordTrace():
- return loading_trace.LoadingTrace.RecordUrlNavigation(
- url=self.url,
- connection=connection,
- chrome_metadata=self._chrome_ctl.ChromeMetadata(),
- categories=categories,
- timeout_seconds=_DEVTOOLS_TIMEOUT,
- stop_delay_multiplier=stop_delay_multiplier)
-
- if run_path is not None and self.record_video:
- device = self._chrome_ctl.GetDevice()
- if device is None:
- raise RuntimeError('Can only record video on a remote device.')
- video_recording_path = os.path.join(run_path, VIDEO_FILENAME)
- with device_setup.RemoteSpeedIndexRecorder(device, connection,
- video_recording_path):
- trace = RecordTrace()
- else:
- trace = RecordTrace()
- for event in trace.request_track.GetEvents():
- if event.failed:
- logging.warning(
- 'request to %s failed: %s', event.url, event.error_text)
- if not trace.tracing_track.HasLoadingSucceeded():
- raise SandwichRunnerError('Page load has failed.')
- if run_path is not None:
- trace_path = os.path.join(run_path, TRACE_FILENAME)
- trace.ToJsonFile(trace_path)
-
- def _RunInRetryLoop(self, repeat_id, perform_dry_run_before):
- """Attempts to run monitoring navigation.
-
- Args:
- repeat_id: Id of the run in the output directory.
- perform_dry_run_before: Whether it should do a dry run attempt before the
- actual monitoring run.
-
- Returns:
- Whether the device should be rebooted to continue attempting for that
- given |repeat_id|.
- """
- resume_attempt_id = 0
- if perform_dry_run_before:
- resume_attempt_id = 1
- for attempt_id in xrange(resume_attempt_id, self._ATTEMPT_COUNT):
- try:
- if perform_dry_run_before:
- logging.info('Do sandwich dry run attempt %d', attempt_id)
- else:
- logging.info('Do sandwich run attempt %d', attempt_id)
- self._chrome_ctl.ResetBrowserState()
- clear_cache = False
- if self.cache_operation == CacheOperation.CLEAR:
- clear_cache = True
- elif self.cache_operation == CacheOperation.PUSH:
- self._chrome_ctl.PushBrowserCache(self._local_cache_directory_path)
- elif self.cache_operation == CacheOperation.SAVE:
- clear_cache = repeat_id == 0
- self._RunNavigation(clear_cache=clear_cache, repeat_id=repeat_id)
- if not perform_dry_run_before or attempt_id > resume_attempt_id:
- break
- except controller.ChromeControllerError as error:
- request_reboot = False
- is_intermittent = error.IsIntermittent()
- if (self.android_device and
- attempt_id == 0 and
- error.error_type is websocket.WebSocketConnectionClosedException):
- assert not perform_dry_run_before
- # On Android, the first socket connection closure is likely caused by
- # memory pressure on the device and therefore considered intermittent,
- # and therefore request a reboot of the device to the caller.
- request_reboot = True
- is_intermittent = True
- if is_intermittent and attempt_id + 1 != self._ATTEMPT_COUNT:
- dump_filename = '{}_intermittent_failure'.format(attempt_id)
- dump_path = os.path.join(
- self.output_dir, str(repeat_id), dump_filename)
- else:
- dump_path = os.path.join(self.output_dir, ERROR_FILENAME)
- with open(dump_path, 'w') as dump_output:
- error.Dump(dump_output)
- if not is_intermittent:
- error.RaiseOriginal()
- if request_reboot:
- assert resume_attempt_id is 0
- return True
- else:
- logging.error('Failed to navigate to %s after %d attemps' % \
- (self.url, self._ATTEMPT_COUNT))
- error.RaiseOriginal()
- return False
-
- def _RunWithWpr(self, resume_repeat_id, perform_dry_run_before):
- """Opens WPR and attempts to run repeated monitoring navigation.
-
- Args:
- resume_repeat_id: Id of the run to resume.
- perform_dry_run_before: Whether the repeated run to resume should first do
- a dry run navigation attempt.
-
- Returns:
- Number of repeat performed. If < self.repeat, then it means that the
- device should be rebooted.
- """
- with self._chrome_ctl.OpenWprHost(self.wpr_archive_path,
- record=self.wpr_record,
- network_condition_name=self._GetEmulatorNetworkCondition('wpr'),
- disable_script_injection=self.disable_wpr_script_injection,
- out_log_path=os.path.join(self.output_dir, WPR_LOG_FILENAME)):
- for repeat_id in xrange(resume_repeat_id, self.repeat):
- reboot_requested = self._RunInRetryLoop(
- repeat_id, perform_dry_run_before)
- if reboot_requested:
- return repeat_id
- return self.repeat
-
- def _PullCacheFromDevice(self):
- assert self.cache_operation == CacheOperation.SAVE
- assert self.cache_archive_path, 'Need to specify where to save the cache'
-
- cache_directory_path = self._chrome_ctl.PullBrowserCache()
- chrome_cache.ZipDirectoryContent(
- cache_directory_path, self.cache_archive_path)
- shutil.rmtree(cache_directory_path)
-
- def Run(self):
- """SandwichRunner main entry point meant to be called once configured."""
- assert self.output_dir is not None
- assert self._chrome_ctl == None
- assert self._local_cache_directory_path == None
- self._CleanTraceOutputDirectory()
-
- if self.android_device:
- self._chrome_ctl = controller.RemoteChromeController(self.android_device)
- else:
- self._chrome_ctl = controller.LocalChromeController()
- self._chrome_ctl.AddChromeArguments(['--disable-infobars'])
- self._chrome_ctl.AddChromeArguments(self.chrome_args)
- if self.cache_operation == CacheOperation.SAVE:
- self._chrome_ctl.SetSlowDeath()
- try:
- if self.cache_operation == CacheOperation.PUSH:
- assert os.path.isfile(self.cache_archive_path)
- self._local_cache_directory_path = tempfile.mkdtemp(suffix='.cache')
- chrome_cache.UnzipDirectoryContent(
- self.cache_archive_path, self._local_cache_directory_path)
- times_repeated = self._RunWithWpr(0, False)
- if times_repeated < self.repeat:
- self._chrome_ctl.RebootDevice()
- self._RunWithWpr(times_repeated, True)
- finally:
- if self._local_cache_directory_path:
- shutil.rmtree(self._local_cache_directory_path)
- self._local_cache_directory_path = None
- if self.cache_operation == CacheOperation.SAVE:
- self._PullCacheFromDevice()
-
- self._chrome_ctl = None
-
-
-def WalkRepeatedRuns(runner_output_dir):
- """Yields unordered (repeat id, path of the repeat directory).
-
- Args:
- runner_output_dir: Same as for SandwichRunner.output_dir.
- """
- repeated_run_count = 0
- for node_name in os.listdir(runner_output_dir):
- repeat_dir = os.path.join(runner_output_dir, node_name)
- if not os.path.isdir(repeat_dir):
- continue
- try:
- repeat_id = int(node_name)
- except ValueError:
- continue
- yield repeat_id, repeat_dir
- repeated_run_count += 1
- assert repeated_run_count > 0, ('Error: not a sandwich runner output '
- 'directory: {}').format(runner_output_dir)
diff --git a/loading/sandwich_swr.py b/loading/sandwich_swr.py
deleted file mode 100644
index e6b3736..0000000
--- a/loading/sandwich_swr.py
+++ /dev/null
@@ -1,323 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-""" This module implements the Stale-While-Revalidate performance improvement
-experiment on third parties' resources.
-
-The top level operations of the experiment are:
- 1. Record WPR archive;
- 2. Create a patched WPR archive so that all resource are getting cached;
- 3. Record original cache using the patched WPR archive;
- 4. Setup the benchmark producing the list of URL to enable SWR in a JSON file;
- 5. Create the benchmark cache by:
- - Remove No-Store resources;
- - Adding the SWR header on resources that are experimentally required to
- have it;
- - Patch SWR header on resources that already had it to make sure the
- the SWR freshness is not out of date;
- - And restore all other headers so that response headers such as
- Set-Cookie are still in the cache to avoid entropy caused by
- different cookie values.
- 6. Run the benchmark;
- 7. Extract metrics into CSV files.
-"""
-
-import csv
-import json
-import logging
-import os
-import shutil
-from urlparse import urlparse
-
-import chrome_cache
-import common_util
-import loading_trace
-import request_track
-import sandwich_metrics
-import sandwich_runner
-import sandwich_utils
-import task_manager
-import wpr_backend
-
-
-def _ExtractRegexMatchingUrls(urls, domain_regexes):
- urls_to_enable = set()
- for url in urls:
- if url in urls_to_enable:
- continue
- parsed_url = urlparse(url)
- for domain_regex in domain_regexes:
- if domain_regex.search(parsed_url.netloc):
- urls_to_enable.add(url)
- break
- return urls_to_enable
-
-
-def _BuildBenchmarkCache(
- original_wpr_trace_path, urls_to_enable_swr,
- original_cache_trace_path, original_cache_archive_path,
- cache_archive_dest_path):
- # Load trace that was generated at original cache creation.
- logging.info('loading %s', original_wpr_trace_path)
- trace = loading_trace.LoadingTrace.FromJsonFile(original_wpr_trace_path)
-
- # Lists URLs that should not be in the cache or already have SWR headers.
- urls_should_not_be_cached = set()
- urls_already_with_swr = set()
- for request in trace.request_track.GetEvents():
- caching_policy = request_track.CachingPolicy(request)
- if not caching_policy.IsCacheable():
- urls_should_not_be_cached.add(request.url)
- elif caching_policy.GetFreshnessLifetimes()[1] > 0:
- urls_already_with_swr.add(request.url)
- # Trace are fat, kill this one to save up memory for the next one to load in
- # this scope.
- del trace
-
- # Load trace that was generated at original cache creation.
- logging.info('loading %s', original_cache_trace_path)
- trace = loading_trace.LoadingTrace.FromJsonFile(original_cache_trace_path)
-
- # Create cache contents.
- delete_count = 0
- swr_patch_count = 0
- originaly_swr_patch_count = 0
- noswr_patch_count = 0
- with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:
- cache_path = os.path.join(tmp_path, 'cache')
- chrome_cache.UnzipDirectoryContent(original_cache_archive_path, cache_path)
- cache_backend = chrome_cache.CacheBackend(cache_path, 'simple')
- cache_keys = set(cache_backend.ListKeys())
- for request in trace.request_track.GetEvents():
- if request.url not in cache_keys:
- continue
- if request.url in urls_should_not_be_cached:
- cache_backend.DeleteKey(request.url)
- delete_count += 1
- continue
- if not request.HasReceivedResponse():
- continue
- if request.url in urls_to_enable_swr:
- request.SetHTTPResponseHeader(
- 'cache-control', 'max-age=0,stale-while-revalidate=315360000')
- request.SetHTTPResponseHeader(
- 'last-modified', 'Thu, 23 Jun 2016 11:30:00 GMT')
- swr_patch_count += 1
- elif request.url in urls_already_with_swr:
- # Force to use SWR on resources that originally attempted to use it.
- request.SetHTTPResponseHeader(
- 'cache-control', 'max-age=0,stale-while-revalidate=315360000')
- # The resource originally had SWR enabled therefore we don't
- # Last-Modified to repro exactly the performance impact in case these
- # headers were not set properly causing an invalidation instead of a
- # revalidation.
- originaly_swr_patch_count += 1
- else:
- # Force synchronous revalidation.
- request.SetHTTPResponseHeader('cache-control', 'max-age=0')
- noswr_patch_count += 1
- raw_headers = request.GetRawResponseHeaders()
- cache_backend.UpdateRawResponseHeaders(request.url, raw_headers)
- chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path)
- logging.info('patched %d cached resources with forced SWR', swr_patch_count)
- logging.info('patched %d cached resources with original SWR',
- originaly_swr_patch_count)
- logging.info('patched %d cached resources without SWR', noswr_patch_count)
- logging.info('deleted %d cached resources', delete_count)
-
-
-def _ProcessRunOutputDir(benchmark_setup, runner_output_dir):
- """Process benchmark's run output directory.
-
- Args:
- cache_validation_result: Same as for _RunOutputVerifier
- benchmark_setup: Same as for _RunOutputVerifier
- runner_output_dir: Same as for SandwichRunner.output_dir
-
- Returns:
- List of dictionary.
- """
- run_metrics_list = []
- for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
- runner_output_dir):
- trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
- logging.info('processing trace: %s', trace_path)
- trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
- served_from_cache_urls = sandwich_utils.ListUrlRequests(
- trace, sandwich_utils.RequestOutcome.ServedFromCache)
- matching_subresource_count_used_from_cache = (
- served_from_cache_urls.intersection(
- set(benchmark_setup['urls_to_enable_swr'])))
- run_metrics = {
- 'url': trace.url,
- 'repeat_id': repeat_id,
- 'benchmark_name': benchmark_setup['benchmark_name'],
- 'cache_recording.subresource_count':
- len(benchmark_setup['effective_subresource_urls']),
- 'cache_recording.matching_subresource_count':
- len(benchmark_setup['urls_to_enable_swr']),
- 'benchmark.matching_subresource_count_used_from_cache':
- len(matching_subresource_count_used_from_cache)
- }
- run_metrics.update(
- sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
- repeat_dir, trace))
- run_metrics_list.append(run_metrics)
- return run_metrics_list
-
-
-class StaleWhileRevalidateBenchmarkBuilder(task_manager.Builder):
- """A builder for a graph of tasks for Stale-While-Revalidate study benchmarks.
- """
-
- def __init__(self, common_builder):
- task_manager.Builder.__init__(self,
- common_builder.output_directory,
- common_builder.output_subdirectory)
- self._common_builder = common_builder
- self._patched_wpr_path = None
- self._original_cache_task = None
- self._original_cache_trace_path = None
- self._PopulateCommonPipelines()
-
- def _PopulateCommonPipelines(self):
- """Creates necessary tasks to produce initial cache archives.
-
- Here is the full dependency tree for the returned task:
- depends on: common/original-cache.zip
- depends on: common/webpages-patched.wpr
- depends on: common/webpages.wpr
- """
- @self.RegisterTask('common/webpages-patched.wpr',
- dependencies=[self._common_builder.original_wpr_task])
- def BuildPatchedWpr():
- shutil.copyfile(
- self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)
- wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path)
- wpr_url_entries = wpr_archive.ListUrlEntries()
- for wpr_url_entry in wpr_url_entries:
- sandwich_utils.PatchWprEntryToBeCached(wpr_url_entry)
- logging.info('number of patched entries: %d', len(wpr_url_entries))
- wpr_archive.Persist()
-
- @self.RegisterTask('common/original-cache.zip',
- dependencies=[BuildPatchedWpr])
- def BuildOriginalCache():
- runner = self._common_builder.CreateSandwichRunner()
- runner.wpr_archive_path = BuildPatchedWpr.path
- runner.cache_archive_path = BuildOriginalCache.path
- runner.cache_operation = sandwich_runner.CacheOperation.SAVE
- runner.output_dir = BuildOriginalCache.run_path
- runner.Run()
- BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run'
-
- self._original_cache_trace_path = os.path.join(
- BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME)
- self._patched_wpr_path = BuildPatchedWpr.path
- self._original_cache_task = BuildOriginalCache
-
- def PopulateBenchmark(self, benchmark_name, domain_regexes,
- transformer_list_name, transformer_list):
- """Populate benchmarking tasks.
-
- Args:
- benchmark_name: Name of the benchmark.
- domain_regexes: Compiled regexes of domains to enable SWR.
- transformer_list_name: A string describing the transformers, will be used
- in Task names (prefer names without spaces and special characters).
- transformer_list: An ordered list of function that takes an instance of
- SandwichRunner as parameter, would be applied immediately before
- SandwichRunner.Run() in the given order.
-
-
- Here is the full dependency of the added tree for the returned task:
- <transformer_list_name>/<benchmark_name>-metrics.csv
- depends on: <transformer_list_name>/<benchmark_name>-run/
- depends on: common/<benchmark_name>-cache.zip
- depends on: common/<benchmark_name>-setup.json
- depends on: common/patched-cache.zip
- """
- additional_column_names = [
- 'url',
- 'repeat_id',
- 'benchmark_name',
-
- # Number of resources of the page.
- 'cache_recording.subresource_count',
-
- # Number of resources matching at least one domain regex, to give an
- # idea in the CSV how much the threshold influence additional SWR uses.
- 'cache_recording.matching_subresource_count',
-
- # Number of resources fetched from cache matching at least one domain
- # regex, to give an actual idea if it is possible to have performance
- # improvement on the web page (or not because only XHR), but also tells
- # if the page loading time should see a performance improvement or not
- # compared with a different thresholds.
- 'benchmark.matching_subresource_count_used_from_cache']
- shared_task_prefix = os.path.join('common', benchmark_name)
- task_prefix = os.path.join(transformer_list_name, benchmark_name)
-
- @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True,
- dependencies=[self._original_cache_task])
- def SetupBenchmark():
- logging.info('loading %s', self._original_cache_trace_path)
- trace = loading_trace.LoadingTrace.FromJsonFile(
- self._original_cache_trace_path)
- logging.info('generating %s', SetupBenchmark.path)
- effective_subresource_urls = sandwich_utils.ListUrlRequests(
- trace, sandwich_utils.RequestOutcome.All)
- urls_to_enable_swr = _ExtractRegexMatchingUrls(
- effective_subresource_urls, domain_regexes)
- logging.info(
- 'count of urls to enable SWR: %s', len(urls_to_enable_swr))
- with open(SetupBenchmark.path, 'w') as output:
- json.dump({
- 'benchmark_name': benchmark_name,
- 'urls_to_enable_swr': [url for url in urls_to_enable_swr],
- 'effective_subresource_urls':
- [url for url in effective_subresource_urls]
- }, output)
-
- @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True,
- dependencies=[SetupBenchmark])
- def BuildBenchmarkCacheArchive():
- benchmark_setup = json.load(open(SetupBenchmark.path))
- _BuildBenchmarkCache(
- original_wpr_trace_path=(
- self._common_builder.original_wpr_recording_trace_path),
- urls_to_enable_swr=set(benchmark_setup['urls_to_enable_swr']),
- original_cache_trace_path=self._original_cache_trace_path,
- original_cache_archive_path=self._original_cache_task.path,
- cache_archive_dest_path=BuildBenchmarkCacheArchive.path)
-
- @self.RegisterTask(task_prefix + '-run/', [BuildBenchmarkCacheArchive])
- def RunBenchmark():
- runner = self._common_builder.CreateSandwichRunner()
- for transformer in transformer_list:
- transformer(runner)
- runner.wpr_archive_path = self._patched_wpr_path
- runner.wpr_out_log_path = os.path.join(
- RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME)
- runner.cache_archive_path = BuildBenchmarkCacheArchive.path
- runner.cache_operation = sandwich_runner.CacheOperation.PUSH
- runner.output_dir = RunBenchmark.path
- runner.chrome_args.append('--enable-features=StaleWhileRevalidate2')
- runner.Run()
-
- @self.RegisterTask(task_prefix + '-metrics.csv', [RunBenchmark])
- def ExtractMetrics():
- benchmark_setup = json.load(open(SetupBenchmark.path))
- run_metrics_list = _ProcessRunOutputDir(
- benchmark_setup, RunBenchmark.path)
-
- run_metrics_list.sort(key=lambda e: e['repeat_id'])
- with open(ExtractMetrics.path, 'w') as csv_file:
- writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
- sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
- writer.writeheader()
- for run_metrics in run_metrics_list:
- writer.writerow(run_metrics)
-
- self._common_builder.default_final_tasks.append(ExtractMetrics)
diff --git a/loading/sandwich_utils.py b/loading/sandwich_utils.py
deleted file mode 100644
index eda55e0..0000000
--- a/loading/sandwich_utils.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import os
-
-import common_util
-import emulation
-import sandwich_runner
-import task_manager
-
-
-def NetworkSimulationTransformer(network_condition):
- """Creates a function that accepts a SandwichRunner as a parameter and sets
- network emulation options on it.
-
- Args:
- network_condition: The network condition to apply to the sandwich runner.
-
- Returns:
- A callback transforming the SandwichRunner given in argument accordingly
- """
- assert network_condition in emulation.NETWORK_CONDITIONS
- def Transformer(runner):
- assert isinstance(runner, sandwich_runner.SandwichRunner)
- runner.network_condition = network_condition
- return Transformer
-
-
-def FilterOutDataAndIncompleteRequests(requests):
- for request in filter(lambda r: not r.IsDataRequest(), requests):
- # The protocol is only known once the response has been received. But the
- # trace recording might have been stopped with still some JavaScript
- # originated requests that have not received any responses yet.
- if request.protocol is None:
- assert not request.HasReceivedResponse()
- continue
- if request.protocol in {'about'}:
- continue
- if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}:
- raise RuntimeError('Unknown request protocol {}'.format(request.protocol))
- yield request
-
-
-class RequestOutcome:
- All, ServedFromCache, NotServedFromCache, Post = range(4)
-
-
-def ListUrlRequests(trace, request_kind):
- """Lists requested URLs from a trace.
-
- Args:
- trace: (loading_trace.LoadingTrace) loading trace.
- request_kind: RequestOutcome.* indicating the subset of requests to output.
-
- Returns:
- set([str])
- """
- urls = set()
- for request_event in FilterOutDataAndIncompleteRequests(
- trace.request_track.GetEvents()):
- if (request_kind == RequestOutcome.ServedFromCache and
- request_event.from_disk_cache):
- urls.add(request_event.url)
- elif (request_kind == RequestOutcome.Post and
- request_event.method.upper().strip() == 'POST'):
- urls.add(request_event.url)
- elif (request_kind == RequestOutcome.NotServedFromCache and
- not request_event.from_disk_cache):
- urls.add(request_event.url)
- elif request_kind == RequestOutcome.All:
- urls.add(request_event.url)
- return urls
-
-
-def PatchWprEntryToBeCached(wpr_url_entry):
- """Patches a WprUrlEntry to ensure the resources to go into the HTTP cache and
- avoid invalidation and revalidations.
-
- Args:
- wpr_url_entry: Wpr url entry of the resource to put into the cache.
- """
- MAX_AGE = 10 * 365 * 24 * 60 * 60
- CACHE_CONTROL = 'public, max-age={}'.format(MAX_AGE)
-
- # TODO(gabadie): may need to patch Last-Modified and If-Modified-Since.
- # TODO(gabadie): may need to delete ETag.
- # TODO(gabadie): may need to take care of x-cache.
- #
- # Override the cache-control header to set the resources max age to MAX_AGE.
- #
- # Important note: Some resources holding sensitive information might have
- # cache-control set to no-store which allow the resource to be cached but
- # not cached in the file system. NoState-Prefetch is going to take care of
- # this case. But in here, to simulate NoState-Prefetch, we don't have other
- # choices but save absolutely all cached resources on disk so they survive
- # after killing chrome for cache save, modification and push.
- wpr_url_entry.SetResponseHeader('cache-control', CACHE_CONTROL)
-
- # TODO(gabadie): May need to extend Vary blacklist (referer?)
- #
- # All of these Vary and Pragma possibilities need to be removed from
- # response headers in order for Chrome to store a resource in HTTP cache and
- # not to invalidate it.
- wpr_url_entry.RemoveResponseHeaderDirectives('vary', {'*', 'cookie'})
- wpr_url_entry.RemoveResponseHeaderDirectives('pragma', {'no-cache'})
-
-
-class SandwichCommonBuilder(task_manager.Builder):
- """A builder for a graph of tasks, each prepares or invokes a SandwichRunner.
- """
-
- def __init__(self, android_device, url, output_directory,
- output_subdirectory):
- """Constructor.
-
- Args:
- android_device: The android DeviceUtils to run sandwich on or None to run
- it locally.
- url: URL to benchmark.
- output_directory: As in task_manager.Builder.__init__
- output_subdirectory: As in task_manager.Builder.__init__
- """
- task_manager.Builder.__init__(self, output_directory, output_subdirectory)
- self._android_device = android_device
- self._url = url
- self.default_final_tasks = []
-
- self.original_wpr_task = None
- self.original_wpr_recording_trace_path = None
-
- def CreateSandwichRunner(self):
- """Create a runner for non benchmark purposes."""
- runner = sandwich_runner.SandwichRunner()
- runner.url = self._url
- runner.android_device = self._android_device
- return runner
-
- def PopulateWprRecordingTask(self):
- """Records the original WPR archive."""
- @self.RegisterTask('common/webpages.wpr')
- def BuildOriginalWpr():
- common_util.EnsureParentDirectoryExists(BuildOriginalWpr.path)
- runner = self.CreateSandwichRunner()
- runner.wpr_archive_path = BuildOriginalWpr.path
- runner.wpr_record = True
- runner.output_dir = BuildOriginalWpr.run_path
- runner.Run()
- BuildOriginalWpr.run_path = BuildOriginalWpr.path[:-4] + '-run'
-
- self.original_wpr_task = BuildOriginalWpr
- self.original_wpr_recording_trace_path = os.path.join(
- BuildOriginalWpr.run_path, '0', sandwich_runner.TRACE_FILENAME)
diff --git a/loading/task_manager.py b/loading/task_manager.py
deleted file mode 100644
index f132eb1..0000000
--- a/loading/task_manager.py
+++ /dev/null
@@ -1,519 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""API that build and execute recipes wrapped into a task dependency graph.
-
-A Task consists of a 'recipe' (a closure to be executed) and a list of refs to
-tasks that should be executed prior to executing this Task (i.e. dependencies).
-The responsibility of the recipe of a task is to produce the file with the name
-assigned at task creation.
-
-A scenario is a ordered list of tasks to execute such that the dependencies of a
-given task are execute before the said task. The scenario is built from a list
-of final tasks and a list of frozen tasks:
- - A final task is a task to execute ultimately. Therefore the scenario is
- composed of final tasks and their required intermediary tasks.
- - A frozen task is task to not execute. This is a mechanism to morph a task
- that may have dependencies to a task with no dependency at scenario
- generation time, injecting what the task have already produced before as an
- input of the smaller tasks dependency graph covered by the scenario.
-
-Example:
- # -------------------------------------------------- Build my dependency graph
- builder = Builder('my/output/dir')
-
- @builder.RegisterTask('out0')
- def BuildOut0():
- Produce(out=BuildOut0.path)
-
- @builder.RegisterTask('out1')
- def BuildOut1():
- Produce(out=BuildOut1.path)
-
- @builder.RegisterTask('out2', dependencies=[BuildOut0, BuildOut1])
- def BuildOut2():
- DoStuff(BuildOut0.path, BuildOut1.path, out=BuildOut2.path)
-
- @builder.RegisterTask('out3', dependencies=[BuildOut0])
- def BuildOut3():
- DoStuff(BuildOut0.path, out=BuildOut3.path)
-
- # ---------------------------- Case 1: Execute BuildOut3 and its dependencies.
- for task in GenerateScenario(final_tasks=[BuildOut3], frozen_tasks=[])
- task.Execute()
-
- # ---------- Case 2: Execute BuildOut2 and its dependencies but not BuildOut1.
- # It is required that BuildOut1.path is already existing.
- for task in GenerateScenario(final_tasks=[BuildOut2],
- frozen_tasks=[BuildOut1])
- task.Execute()
-"""
-
-
-import argparse
-import collections
-import datetime
-import errno
-import logging
-import os
-import re
-import subprocess
-import sys
-
-import common_util
-
-
-_TASK_LOGS_DIR_NAME = 'logs'
-_TASK_GRAPH_DOTFILE_NAME = 'tasks_graph.dot'
-_TASK_GRAPH_PNG_NAME = 'tasks_graph.png'
-_TASK_RESUME_ARGUMENTS_FILE = 'resume.txt'
-_TASK_EXECUTION_LOG_NAME_FORMAT = 'task-execution-%Y-%m-%d-%H-%M-%S.log'
-
-FROMFILE_PREFIX_CHARS = '@'
-
-
-class TaskError(Exception):
- pass
-
-
-class Task(object):
- """Task with a recipe."""
-
- def __init__(self, name, path, dependencies, recipe):
- """Constructor.
-
- Args:
- name: The name of the task.
- path: Path to the file or directory that this task produces.
- dependencies: List of parent task to execute before.
- recipe: Function to execute.
- """
- self.name = name
- self.path = path
- self._dependencies = dependencies
- self._recipe = recipe
- self._is_done = recipe == None
-
- def Execute(self):
- """Executes this task."""
- if not self._is_done:
- self._recipe()
- self._is_done = True
-
-
-class Builder(object):
- """Utilities for creating sub-graphs of tasks with dependencies."""
-
- def __init__(self, output_directory, output_subdirectory):
- """Constructor.
-
- Args:
- output_directory: Output directory where the tasks work.
- output_subdirectory: Subdirectory to put all created tasks in or None.
- """
- self.output_directory = output_directory
- self.output_subdirectory = output_subdirectory
- self._tasks = {}
-
- # Caution:
- # This decorator may not create a task in the case where merge=True and
- # another task having the same name have already been created. In this case,
- # it will just reuse the former task. This is at the user responsibility to
- # ensure that merged tasks would do the exact same thing.
- #
- # @builder.RegisterTask('hello')
- # def TaskA():
- # my_object.a = 1
- #
- # @builder.RegisterTask('hello', merge=True)
- # def TaskB():
- # # This function won't be executed ever.
- # my_object.a = 2 # <------- Wrong because different from what TaskA do.
- #
- # assert TaskA == TaskB
- # TaskB.Execute() # Sets set my_object.a == 1
- def RegisterTask(self, task_name, dependencies=None, merge=False):
- """Decorator that wraps a function into a task.
-
- Args:
- task_name: The name of this new task to register.
- dependencies: List of SandwichTarget to build before this task.
- merge: If a task already have this name, don't create a new one and
- reuse the existing one.
-
- Returns:
- A Task that was created by wrapping the function or an existing registered
- wrapper (that have wrapped a different function).
- """
- rebased_task_name = self._RebaseTaskName(task_name)
- dependencies = dependencies or []
- def InnerAddTaskWithNewPath(recipe):
- if rebased_task_name in self._tasks:
- if not merge:
- raise TaskError('Task {} already exists.'.format(rebased_task_name))
- task = self._tasks[rebased_task_name]
- return task
- task_path = self.RebaseOutputPath(task_name)
- task = Task(rebased_task_name, task_path, dependencies, recipe)
- self._tasks[rebased_task_name] = task
- return task
- return InnerAddTaskWithNewPath
-
- def RebaseOutputPath(self, builder_relative_path):
- """Rebases buider relative path."""
- return os.path.join(
- self.output_directory, self._RebaseTaskName(builder_relative_path))
-
- def _RebaseTaskName(self, task_name):
- if self.output_subdirectory:
- return os.path.join(self.output_subdirectory, task_name)
- return task_name
-
-
-def GenerateScenario(final_tasks, frozen_tasks):
- """Generates a list of tasks to execute in order of dependencies-first.
-
- Args:
- final_tasks: The final tasks to generate the scenario from.
- frozen_tasks: Sets of task to freeze.
-
- Returns:
- [Task]
- """
- scenario = []
- task_paths = {}
- def InternalAppendTarget(task):
- if task in frozen_tasks:
- if not os.path.exists(task.path):
- raise TaskError('Frozen target `{}`\'s path doesn\'t exist.'.format(
- task.name))
- return
- if task.path in task_paths:
- if task_paths[task.path] == None:
- raise TaskError('Target `{}` depends on itself.'.format(task.name))
- if task_paths[task.path] != task:
- raise TaskError(
- 'Tasks `{}` and `{}` produce the same file: `{}`.'.format(
- task.name, task_paths[task.path].name, task.path))
- return
- task_paths[task.path] = None
- for dependency in task._dependencies:
- InternalAppendTarget(dependency)
- task_paths[task.path] = task
- scenario.append(task)
-
- for final_task in final_tasks:
- InternalAppendTarget(final_task)
- return scenario
-
-
-def GenerateDependentSetPerTask(scenario):
- """Maps direct dependents per tasks of scenario.
-
- Args:
- scenario: The scenario containing the Tasks to map.
-
- Returns:
- {Task: set(Task)}
- """
- task_set = set(scenario)
- task_children = collections.defaultdict(set)
- for task in scenario:
- for parent in task._dependencies:
- if parent in task_set:
- task_children[parent].add(task)
- return task_children
-
-
-def ListResumingTasksToFreeze(scenario, final_tasks, skipped_tasks):
- """Lists the tasks that one needs to freeze to be able to resume the scenario
- after failure.
-
- Args:
- scenario: The scenario (list of Task) to be resumed.
- final_tasks: The list of final Task used to generate the scenario.
- skipped_tasks: Set of Tasks in the scenario that were skipped.
-
- Returns:
- [Task]
- """
- scenario_tasks = set(scenario)
- assert skipped_tasks.issubset(scenario_tasks)
- frozen_tasks = []
- frozen_task_set = set()
- walked_tasks = set()
-
- def InternalWalk(task):
- if task in walked_tasks:
- return
- walked_tasks.add(task)
- if task not in scenario_tasks or task not in skipped_tasks:
- if task not in frozen_task_set:
- frozen_task_set.add(task)
- frozen_tasks.append(task)
- else:
- for dependency in task._dependencies:
- InternalWalk(dependency)
-
- for final_task in final_tasks:
- InternalWalk(final_task)
- return frozen_tasks
-
-
-def OutputGraphViz(scenario, final_tasks, output):
- """Outputs the build dependency graph covered by this scenario.
-
- Args:
- scenario: The generated scenario.
- final_tasks: The final tasks used to generate the scenario.
- output: A file-like output stream to receive the dot file.
-
- Graph interpretations:
- - Final tasks (the one that where directly appended) are box shaped.
- - Non final tasks are ellipse shaped.
- - Frozen tasks have a blue shape.
- """
- task_execution_ids = {t: i for i, t in enumerate(scenario)}
- tasks_node_ids = dict()
-
- def GetTaskNodeId(task):
- if task in tasks_node_ids:
- return tasks_node_ids[task]
- node_id = len(tasks_node_ids)
- node_label = task.name
- node_color = 'blue'
- node_shape = 'ellipse'
- if task in task_execution_ids:
- node_color = 'black'
- node_label = str(task_execution_ids[task]) + ': ' + node_label
- if task in final_tasks:
- node_shape = 'box'
- output.write(' n{} [label="{}", color={}, shape={}];\n'.format(
- node_id, node_label, node_color, node_shape))
- tasks_node_ids[task] = node_id
- return node_id
-
- output.write('digraph graphname {\n')
- for task in scenario:
- task_node_id = GetTaskNodeId(task)
- for dep in task._dependencies:
- dep_node_id = GetTaskNodeId(dep)
- output.write(' n{} -> n{};\n'.format(dep_node_id, task_node_id))
- output.write('}\n')
-
-
-def CommandLineParser():
- """Creates command line arguments parser meant to be used as a parent parser
- for any entry point that use the ExecuteWithCommandLine() function.
-
- The root parser must be created with:
- fromfile_prefix_chars=FROMFILE_PREFIX_CHARS.
-
- Returns:
- The command line arguments parser.
- """
- parser = argparse.ArgumentParser(add_help=False)
- parser.add_argument('-d', '--dry-run', action='store_true',
- help='Only prints the tasks to build.')
- parser.add_argument('-e', '--to-execute', metavar='REGEX', type=str,
- action='append', dest='run_regexes', default=[],
- help='Regex selecting tasks to execute.')
- parser.add_argument('-f', '--to-freeze', metavar='REGEX', type=str,
- action='append', dest='frozen_regexes', default=[],
- help='Regex selecting tasks to not execute.')
- parser.add_argument('-k', '--keep-going', action='store_true', default=False,
- help='Keep going when some targets can\'t be made.')
- parser.add_argument('-o', '--output', type=str, required=True,
- help='Path of the output directory.')
- parser.add_argument('-v', '--output-graphviz', action='store_true',
- help='Outputs the {} and {} file in the output directory.'
- ''.format(_TASK_GRAPH_DOTFILE_NAME, _TASK_GRAPH_PNG_NAME))
- return parser
-
-
-def _SelectTasksFromCommandLineRegexes(args, default_final_tasks):
- frozen_regexes = [common_util.VerboseCompileRegexOrAbort(e)
- for e in args.frozen_regexes]
- run_regexes = [common_util.VerboseCompileRegexOrAbort(e)
- for e in args.run_regexes]
-
- # Lists final tasks.
- final_tasks = default_final_tasks
- if run_regexes:
- final_tasks = []
- # Traverse the graph in the normal execution order starting from
- # |default_final_tasks| in case of command line regex selection.
- tasks = GenerateScenario(default_final_tasks, frozen_tasks=set())
- # Order of run regexes prevails on the traversing order of tasks.
- for regex in run_regexes:
- for task in tasks:
- if regex.search(task.name):
- final_tasks.append(task)
-
- # Lists parents of |final_tasks| to freeze.
- frozen_tasks = set()
- impossible_tasks = set()
- if frozen_regexes:
- complete_scenario = GenerateScenario(final_tasks, frozen_tasks=set())
- dependents_per_task = GenerateDependentSetPerTask(complete_scenario)
- def MarkTaskAsImpossible(task):
- if task in impossible_tasks:
- return
- impossible_tasks.add(task)
- for dependent in dependents_per_task[task]:
- MarkTaskAsImpossible(dependent)
-
- for task in complete_scenario:
- for regex in frozen_regexes:
- if regex.search(task.name):
- if os.path.exists(task.path):
- frozen_tasks.add(task)
- else:
- MarkTaskAsImpossible(task)
- break
-
- return [t for t in final_tasks if t not in impossible_tasks], frozen_tasks
-
-
-class _ResumingFileBuilder(object):
- def __init__(self, args):
- resume_path = os.path.join(args.output, _TASK_RESUME_ARGUMENTS_FILE)
- self._resume_output = open(resume_path, 'w')
- # List initial freezing regexes not to loose track of final targets to
- # freeze in case of severals resume attempts caused by sudden death.
- for regex in args.frozen_regexes:
- self._resume_output.write('-f\n{}\n'.format(regex))
-
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_value, exc_traceback):
- del exc_type, exc_value, exc_traceback # unused
- self._resume_output.close()
-
- def OnTaskSuccess(self, task):
- # Log the succeed tasks so that they are ensured to be frozen in case
- # of a sudden death.
- self._resume_output.write('-f\n^{}$\n'.format(re.escape(task.name)))
- # Makes sure the task freezing command line make it to the disk.
- self._resume_output.flush()
- os.fsync(self._resume_output.fileno())
-
- def OnScenarioFinish(
- self, scenario, final_tasks, failed_tasks, skipped_tasks):
- resume_additonal_arguments = []
- for task in ListResumingTasksToFreeze(
- scenario, final_tasks, skipped_tasks):
- resume_additonal_arguments.extend(
- ['-f', '^{}$'.format(re.escape(task.name))])
- self._resume_output.seek(0)
- self._resume_output.truncate()
- self._resume_output.write('\n'.join(resume_additonal_arguments))
- print '# Looks like something went wrong in tasks:'
- for failed_task in failed_tasks:
- print '# {}'.format(failed_task.name)
- print '#'
- print '# To resume, append the following parameter:'
- print '# ' + FROMFILE_PREFIX_CHARS + self._resume_output.name
-
-
-def ExecuteWithCommandLine(args, default_final_tasks):
- """Helper to execute tasks using command line arguments.
-
- Args:
- args: Command line argument parsed with CommandLineParser().
- default_final_tasks: Default final tasks if there is no -r command
- line arguments.
-
- Returns:
- 0 if success or 1 otherwise
- """
- # Builds the scenario.
- final_tasks, frozen_tasks = _SelectTasksFromCommandLineRegexes(
- args, default_final_tasks)
- scenario = GenerateScenario(final_tasks, frozen_tasks)
- if len(scenario) == 0:
- logging.error('No tasks to build.')
- return 1
-
- if not os.path.isdir(args.output):
- os.makedirs(args.output)
-
- # Print the task dependency graph visualization.
- if args.output_graphviz:
- graphviz_path = os.path.join(args.output, _TASK_GRAPH_DOTFILE_NAME)
- png_graph_path = os.path.join(args.output, _TASK_GRAPH_PNG_NAME)
- with open(graphviz_path, 'w') as output:
- OutputGraphViz(scenario, final_tasks, output)
- subprocess.check_call(['dot', '-Tpng', graphviz_path, '-o', png_graph_path])
-
- # Use the build scenario.
- if args.dry_run:
- for task in scenario:
- print task.name
- return 0
-
- # Run the Scenario while saving intermediate state to be able to resume later.
- failed_tasks = []
- tasks_to_skip = set()
- dependents_per_task = GenerateDependentSetPerTask(scenario)
-
- def MarkTaskNotToExecute(task):
- if task not in tasks_to_skip:
- logging.warning('can not execute task: %s', task.name)
- tasks_to_skip.add(task)
- for dependent in dependents_per_task[task]:
- MarkTaskNotToExecute(dependent)
-
- log_filename = datetime.datetime.now().strftime(
- _TASK_EXECUTION_LOG_NAME_FORMAT)
- log_path = os.path.join(args.output, _TASK_LOGS_DIR_NAME, log_filename)
- if not os.path.isdir(os.path.dirname(log_path)):
- os.makedirs(os.path.dirname(log_path))
- formatter = logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s')
- handler = logging.FileHandler(log_path, mode='a')
- handler.setFormatter(formatter)
- logging.getLogger().addHandler(handler)
- logging.info(
- '%s %s', '-' * 60, common_util.GetCommandLineForLogging(sys.argv))
- try:
- with _ResumingFileBuilder(args) as resume_file_builder:
- for task_execute_id, task in enumerate(scenario):
- if task in tasks_to_skip:
- continue
- logging.info('%s %s', '-' * 60, task.name)
- try:
- task.Execute()
- except (MemoryError, SyntaxError):
- raise
- except BaseException:
- # The resuming file being incrementally generated by
- # resume_file_builder.OnTaskSuccess() is automatically fsynced().
- # But resume_file_builder.OnScenarioFinish() completely rewrite
- # this file with the mininal subset of task to freeze, and in case
- # of an ENOSPC, we don't want to touch the resuming file at all so
- # that it remains uncorrupted.
- if (sys.exc_info()[0] == IOError and
- sys.exc_info()[1].errno == errno.ENOSPC):
- raise
- logging.exception('%s %s failed', '-' * 60, task.name)
- failed_tasks.append(task)
- if args.keep_going and sys.exc_info()[0] != KeyboardInterrupt:
- MarkTaskNotToExecute(task)
- else:
- tasks_to_skip.update(set(scenario[task_execute_id:]))
- break
- else:
- resume_file_builder.OnTaskSuccess(task)
- if tasks_to_skip:
- assert failed_tasks
- resume_file_builder.OnScenarioFinish(
- scenario, final_tasks, failed_tasks, tasks_to_skip)
- if sys.exc_info()[0] == KeyboardInterrupt:
- raise
- return 1
- finally:
- logging.getLogger().removeHandler(handler)
- assert not failed_tasks
- return 0
diff --git a/loading/task_manager_unittest.py b/loading/task_manager_unittest.py
deleted file mode 100644
index f6e9788..0000000
--- a/loading/task_manager_unittest.py
+++ /dev/null
@@ -1,541 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import argparse
-import contextlib
-import errno
-import os
-import re
-import shutil
-import StringIO
-import sys
-import tempfile
-import unittest
-
-import common_util
-import task_manager
-
-
-_GOLDEN_GRAPHVIZ = """digraph graphname {
- n0 [label="0: b", color=black, shape=ellipse];
- n1 [label="1: a", color=black, shape=ellipse];
- n2 [label="2: c", color=black, shape=ellipse];
- n0 -> n2;
- n1 -> n2;
- n3 [label="3: d", color=black, shape=ellipse];
- n2 -> n3;
- n4 [label="4: f", color=black, shape=box];
- n3 -> n4;
- n5 [label="e", color=blue, shape=ellipse];
- n5 -> n4;
-}\n"""
-
-
-@contextlib.contextmanager
-def EatStdoutAndStderr():
- """Overrides sys.std{out,err} to intercept write calls."""
- sys.stdout.flush()
- sys.stderr.flush()
- original_stdout = sys.stdout
- original_stderr = sys.stderr
- try:
- sys.stdout = StringIO.StringIO()
- sys.stderr = StringIO.StringIO()
- yield
- finally:
- sys.stdout = original_stdout
- sys.stderr = original_stderr
-
-
-class TestException(Exception):
- pass
-
-
-class TaskManagerTestCase(unittest.TestCase):
- def setUp(self):
- self.output_directory = tempfile.mkdtemp()
-
- def tearDown(self):
- shutil.rmtree(self.output_directory)
-
- def OutputPath(self, file_path):
- return os.path.join(self.output_directory, file_path)
-
- def TouchOutputFile(self, file_path):
- with open(self.OutputPath(file_path), 'w') as output:
- output.write(file_path + '\n')
-
-
-class TaskTest(TaskManagerTestCase):
- def testTaskExecution(self):
- def Recipe():
- Recipe.counter += 1
- Recipe.counter = 0
- task = task_manager.Task('hello.json', 'what/ever/hello.json', [], Recipe)
- self.assertFalse(task._is_done)
- self.assertEqual(0, Recipe.counter)
- task.Execute()
- self.assertEqual(1, Recipe.counter)
- task.Execute()
- self.assertEqual(1, Recipe.counter)
-
- def testTaskExecutionWithUnexecutedDeps(self):
- def RecipeA():
- self.fail()
-
- def RecipeB():
- RecipeB.counter += 1
- RecipeB.counter = 0
-
- a = task_manager.Task('hello.json', 'out/hello.json', [], RecipeA)
- b = task_manager.Task('hello.json', 'out/hello.json', [a], RecipeB)
- self.assertEqual(0, RecipeB.counter)
- b.Execute()
- self.assertEqual(1, RecipeB.counter)
-
-
-class BuilderTest(TaskManagerTestCase):
- def testRegisterTask(self):
- builder = task_manager.Builder(self.output_directory, None)
- @builder.RegisterTask('hello.txt')
- def TaskA():
- TaskA.executed = True
- TaskA.executed = False
- self.assertEqual(os.path.join(self.output_directory, 'hello.txt'),
- TaskA.path)
- self.assertFalse(TaskA.executed)
- TaskA.Execute()
- self.assertTrue(TaskA.executed)
-
- def testRegisterDuplicateTask(self):
- builder = task_manager.Builder(self.output_directory, None)
- @builder.RegisterTask('hello.txt')
- def TaskA():
- pass
- del TaskA # unused
- with self.assertRaises(task_manager.TaskError):
- @builder.RegisterTask('hello.txt')
- def TaskB():
- pass
- del TaskB # unused
-
- def testTaskMerging(self):
- builder = task_manager.Builder(self.output_directory, None)
- @builder.RegisterTask('hello.txt')
- def TaskA():
- pass
- @builder.RegisterTask('hello.txt', merge=True)
- def TaskB():
- pass
- self.assertEqual(TaskA, TaskB)
-
- def testOutputSubdirectory(self):
- builder = task_manager.Builder(self.output_directory, 'subdir')
-
- @builder.RegisterTask('world.txt')
- def TaskA():
- pass
- del TaskA # unused
-
- self.assertIn('subdir/world.txt', builder._tasks)
- self.assertNotIn('subdir/subdir/world.txt', builder._tasks)
- self.assertNotIn('world.txt', builder._tasks)
-
- @builder.RegisterTask('subdir/world.txt')
- def TaskB():
- pass
- del TaskB # unused
- self.assertIn('subdir/subdir/world.txt', builder._tasks)
- self.assertNotIn('world.txt', builder._tasks)
-
-
-class GenerateScenarioTest(TaskManagerTestCase):
- def testParents(self):
- builder = task_manager.Builder(self.output_directory, None)
- @builder.RegisterTask('a')
- def TaskA():
- pass
- @builder.RegisterTask('b', dependencies=[TaskA])
- def TaskB():
- pass
- @builder.RegisterTask('c', dependencies=[TaskB])
- def TaskC():
- pass
- scenario = task_manager.GenerateScenario([TaskA, TaskB, TaskC], set())
- self.assertListEqual([TaskA, TaskB, TaskC], scenario)
-
- scenario = task_manager.GenerateScenario([TaskB], set())
- self.assertListEqual([TaskA, TaskB], scenario)
-
- scenario = task_manager.GenerateScenario([TaskC], set())
- self.assertListEqual([TaskA, TaskB, TaskC], scenario)
-
- scenario = task_manager.GenerateScenario([TaskC, TaskB], set())
- self.assertListEqual([TaskA, TaskB, TaskC], scenario)
-
- def testFreezing(self):
- builder = task_manager.Builder(self.output_directory, None)
- @builder.RegisterTask('a')
- def TaskA():
- pass
- @builder.RegisterTask('b', dependencies=[TaskA])
- def TaskB():
- pass
- @builder.RegisterTask('c')
- def TaskC():
- pass
- @builder.RegisterTask('d', dependencies=[TaskB, TaskC])
- def TaskD():
- pass
-
- # assert no exception raised.
- task_manager.GenerateScenario([TaskB], set([TaskC]))
-
- with self.assertRaises(task_manager.TaskError):
- task_manager.GenerateScenario([TaskD], set([TaskA]))
-
- self.TouchOutputFile('a')
- scenario = task_manager.GenerateScenario([TaskD], set([TaskA]))
- self.assertListEqual([TaskB, TaskC, TaskD], scenario)
-
- self.TouchOutputFile('b')
- scenario = task_manager.GenerateScenario([TaskD], set([TaskB]))
- self.assertListEqual([TaskC, TaskD], scenario)
-
- def testCycleError(self):
- builder = task_manager.Builder(self.output_directory, None)
- @builder.RegisterTask('a')
- def TaskA():
- pass
- @builder.RegisterTask('b', dependencies=[TaskA])
- def TaskB():
- pass
- @builder.RegisterTask('c', dependencies=[TaskB])
- def TaskC():
- pass
- @builder.RegisterTask('d', dependencies=[TaskC])
- def TaskD():
- pass
- TaskA._dependencies.append(TaskC)
- with self.assertRaises(task_manager.TaskError):
- task_manager.GenerateScenario([TaskD], set())
-
- def testCollisionError(self):
- builder_a = task_manager.Builder(self.output_directory, None)
- builder_b = task_manager.Builder(self.output_directory, None)
- @builder_a.RegisterTask('a')
- def TaskA():
- pass
- @builder_b.RegisterTask('a')
- def TaskB():
- pass
- with self.assertRaises(task_manager.TaskError):
- task_manager.GenerateScenario([TaskA, TaskB], set())
-
- def testGenerateDependentSetPerTask(self):
- builder = task_manager.Builder(self.output_directory, None)
- @builder.RegisterTask('a')
- def TaskA():
- pass
- @builder.RegisterTask('b')
- def TaskB():
- pass
- @builder.RegisterTask('c', dependencies=[TaskA, TaskB])
- def TaskC():
- pass
- @builder.RegisterTask('d', dependencies=[TaskA])
- def TaskD():
- pass
-
- def RunSubTest(expected, scenario, task):
- self.assertEqual(
- expected, task_manager.GenerateDependentSetPerTask(scenario)[task])
-
- RunSubTest(set([]), [TaskA], TaskA)
- RunSubTest(set([]), [TaskA, TaskB], TaskA)
- RunSubTest(set([TaskC]), [TaskA, TaskB, TaskC], TaskA)
- RunSubTest(set([TaskC, TaskD]), [TaskA, TaskB, TaskC, TaskD], TaskA)
- RunSubTest(set([]), [TaskA, TaskD], TaskD)
-
- def testGraphVizOutput(self):
- builder = task_manager.Builder(self.output_directory, None)
- @builder.RegisterTask('a')
- def TaskA():
- pass
- @builder.RegisterTask('b')
- def TaskB():
- pass
- @builder.RegisterTask('c', dependencies=[TaskB, TaskA])
- def TaskC():
- pass
- @builder.RegisterTask('d', dependencies=[TaskC])
- def TaskD():
- pass
- @builder.RegisterTask('e')
- def TaskE():
- pass
- @builder.RegisterTask('f', dependencies=[TaskD, TaskE])
- def TaskF():
- pass
- self.TouchOutputFile('e')
- scenario = task_manager.GenerateScenario([TaskF], set([TaskE]))
- output = StringIO.StringIO()
- task_manager.OutputGraphViz(scenario, [TaskF], output)
- self.assertEqual(_GOLDEN_GRAPHVIZ, output.getvalue())
-
- def testListResumingTasksToFreeze(self):
- TaskManagerTestCase.setUp(self)
- builder = task_manager.Builder(self.output_directory, None)
- @builder.RegisterTask('a')
- def TaskA():
- pass
- @builder.RegisterTask('b')
- def TaskB():
- pass
- @builder.RegisterTask('c', dependencies=[TaskA, TaskB])
- def TaskC():
- pass
- @builder.RegisterTask('d', dependencies=[TaskA])
- def TaskD():
- pass
- @builder.RegisterTask('e', dependencies=[TaskC])
- def TaskE():
- pass
- @builder.RegisterTask('f', dependencies=[TaskC])
- def TaskF():
- pass
-
- for k in 'abcdef':
- self.TouchOutputFile(k)
-
- def RunSubTest(
- final_tasks, initial_frozen_tasks, skipped_tasks, reference):
- scenario = task_manager.GenerateScenario(
- final_tasks, initial_frozen_tasks)
- resume_frozen_tasks = task_manager.ListResumingTasksToFreeze(
- scenario, final_tasks, skipped_tasks)
- self.assertEqual(reference, resume_frozen_tasks)
-
- new_scenario = \
- task_manager.GenerateScenario(final_tasks, resume_frozen_tasks)
- self.assertEqual(skipped_tasks, set(new_scenario))
-
- RunSubTest([TaskA], set([]), set([TaskA]), [])
- RunSubTest([TaskD], set([]), set([TaskA, TaskD]), [])
- RunSubTest([TaskD], set([]), set([TaskD]), [TaskA])
- RunSubTest([TaskE, TaskF], set([TaskA]), set([TaskB, TaskC, TaskE, TaskF]),
- [TaskA])
- RunSubTest([TaskE, TaskF], set([TaskA]), set([TaskC, TaskE, TaskF]),
- [TaskA, TaskB])
- RunSubTest([TaskE, TaskF], set([TaskA]), set([TaskE, TaskF]), [TaskC])
- RunSubTest([TaskE, TaskF], set([TaskA]), set([TaskF]), [TaskE, TaskC])
- RunSubTest([TaskD, TaskE, TaskF], set([]), set([TaskD, TaskF]),
- [TaskA, TaskE, TaskC])
-
-
-class CommandLineControlledExecutionTest(TaskManagerTestCase):
- def setUp(self):
- TaskManagerTestCase.setUp(self)
- self.with_raise_exception_tasks = False
- self.task_execution_history = None
-
- def Execute(self, command_line_args):
- self.task_execution_history = []
- builder = task_manager.Builder(self.output_directory, None)
- @builder.RegisterTask('a')
- def TaskA():
- self.task_execution_history.append(TaskA.name)
- @builder.RegisterTask('b')
- def TaskB():
- self.task_execution_history.append(TaskB.name)
- @builder.RegisterTask('c', dependencies=[TaskA, TaskB])
- def TaskC():
- self.task_execution_history.append(TaskC.name)
- @builder.RegisterTask('d', dependencies=[TaskA])
- def TaskD():
- self.task_execution_history.append(TaskD.name)
- @builder.RegisterTask('e', dependencies=[TaskC])
- def TaskE():
- self.task_execution_history.append(TaskE.name)
- @builder.RegisterTask('raise_exception', dependencies=[TaskD])
- def RaiseExceptionTask():
- self.task_execution_history.append(RaiseExceptionTask.name)
- raise TestException('Expected error.')
- @builder.RegisterTask('raise_keyboard_interrupt', dependencies=[TaskD])
- def RaiseKeyboardInterruptTask():
- self.task_execution_history.append(RaiseKeyboardInterruptTask.name)
- raise KeyboardInterrupt
- @builder.RegisterTask('sudden_death', dependencies=[TaskD])
- def SimulateKillTask():
- self.task_execution_history.append(SimulateKillTask.name)
- raise MemoryError
- @builder.RegisterTask('timeout_error', dependencies=[TaskD])
- def SimulateTimeoutError():
- self.task_execution_history.append(SimulateTimeoutError.name)
- raise common_util.TimeoutError
- @builder.RegisterTask('errno_ENOSPC', dependencies=[TaskD])
- def SimulateENOSPC():
- self.task_execution_history.append(SimulateENOSPC.name)
- raise IOError(errno.ENOSPC, os.strerror(errno.ENOSPC))
- @builder.RegisterTask('errno_EPERM', dependencies=[TaskD])
- def SimulateEPERM():
- self.task_execution_history.append(SimulateEPERM.name)
- raise IOError(errno.EPERM, os.strerror(errno.EPERM))
-
- default_final_tasks = [TaskD, TaskE]
- if self.with_raise_exception_tasks:
- default_final_tasks.extend([
- RaiseExceptionTask,
- RaiseKeyboardInterruptTask,
- SimulateKillTask,
- SimulateTimeoutError,
- SimulateENOSPC,
- SimulateEPERM])
- task_parser = task_manager.CommandLineParser()
- parser = argparse.ArgumentParser(parents=[task_parser],
- fromfile_prefix_chars=task_manager.FROMFILE_PREFIX_CHARS)
- cmd = ['-o', self.output_directory]
- cmd.extend([i for i in command_line_args])
- args = parser.parse_args(cmd)
- with EatStdoutAndStderr():
- return task_manager.ExecuteWithCommandLine(args, default_final_tasks)
-
- def ResumeFilePath(self):
- return self.OutputPath(task_manager._TASK_RESUME_ARGUMENTS_FILE)
-
- def ResumeCmd(self):
- return task_manager.FROMFILE_PREFIX_CHARS + self.ResumeFilePath()
-
- def testSimple(self):
- self.assertEqual(0, self.Execute([]))
- self.assertListEqual(['a', 'd', 'b', 'c', 'e'], self.task_execution_history)
-
- def testDryRun(self):
- self.assertEqual(0, self.Execute(['-d']))
- self.assertListEqual([], self.task_execution_history)
-
- def testRegex(self):
- self.assertEqual(0, self.Execute(['-e', 'b', '-e', 'd']))
- self.assertListEqual(['b', 'a', 'd'], self.task_execution_history)
- self.assertEqual(1, self.Execute(['-e', r'\d']))
- self.assertListEqual([], self.task_execution_history)
-
- def testFreezing(self):
- self.assertEqual(0, self.Execute(['-f', r'\d']))
- self.assertListEqual(['a', 'd', 'b', 'c', 'e'], self.task_execution_history)
- self.TouchOutputFile('c')
- self.assertEqual(0, self.Execute(['-f', 'c']))
- self.assertListEqual(['a', 'd', 'e'], self.task_execution_history)
-
- def testDontFreezeUnreachableTasks(self):
- self.TouchOutputFile('c')
- self.assertEqual(0, self.Execute(['-e', 'e', '-f', 'c', '-f', 'd']))
-
- def testAbortOnFirstError(self):
- ARGS = ['-e', 'exception', '-e', r'^b$']
- self.with_raise_exception_tasks = True
- self.assertEqual(1, self.Execute(ARGS))
- self.assertListEqual(
- ['a', 'd', 'raise_exception'], self.task_execution_history)
- with open(self.ResumeFilePath()) as resume_input:
- self.assertEqual('-f\n^d$', resume_input.read())
-
- self.TouchOutputFile('d')
- self.assertEqual(1, self.Execute(ARGS + [self.ResumeCmd()]))
- self.assertListEqual(['raise_exception'], self.task_execution_history)
-
- self.assertEqual(1, self.Execute(ARGS + [self.ResumeCmd()]))
- self.assertListEqual(['raise_exception'], self.task_execution_history)
-
- self.assertEqual(1, self.Execute(ARGS + [self.ResumeCmd(), '-k']))
- self.assertListEqual(['raise_exception', 'b'], self.task_execution_history)
-
- def testKeepGoing(self):
- ARGS = ['-k', '-e', 'exception', '-e', r'^b$']
- self.with_raise_exception_tasks = True
- self.assertEqual(1, self.Execute(ARGS))
- self.assertListEqual(
- ['a', 'd', 'raise_exception', 'b'], self.task_execution_history)
- with open(self.ResumeFilePath()) as resume_input:
- self.assertEqual('-f\n^d$\n-f\n^b$', resume_input.read())
-
- self.TouchOutputFile('d')
- self.TouchOutputFile('b')
- self.assertEqual(1, self.Execute(ARGS + [self.ResumeCmd()]))
- self.assertListEqual(['raise_exception'], self.task_execution_history)
-
- self.assertEqual(1, self.Execute(ARGS + [self.ResumeCmd()]))
- self.assertListEqual(['raise_exception'], self.task_execution_history)
-
- def testKeyboardInterrupt(self):
- self.with_raise_exception_tasks = True
- with self.assertRaises(KeyboardInterrupt):
- self.Execute(
- ['-k', '-e', 'raise_keyboard_interrupt', '-e', r'^b$'])
- self.assertListEqual(['a', 'd', 'raise_keyboard_interrupt'],
- self.task_execution_history)
- with open(self.ResumeFilePath()) as resume_input:
- self.assertEqual('-f\n^d$', resume_input.read())
-
- def testResumeAfterSuddenDeath(self):
- EXPECTED_RESUME_FILE_CONTENT = '-f\n^a$\n-f\n^d$\n'
- ARGS = ['-k', '-e', 'sudden_death', '-e', r'^a$']
- self.with_raise_exception_tasks = True
- with self.assertRaises(MemoryError):
- self.Execute(ARGS)
- self.assertListEqual(
- ['a', 'd', 'sudden_death'], self.task_execution_history)
- with open(self.ResumeFilePath()) as resume_input:
- self.assertEqual(EXPECTED_RESUME_FILE_CONTENT, resume_input.read())
-
- self.TouchOutputFile('a')
- self.TouchOutputFile('d')
- with self.assertRaises(MemoryError):
- self.Execute(ARGS + [self.ResumeCmd()])
- self.assertListEqual(['sudden_death'], self.task_execution_history)
- with open(self.ResumeFilePath()) as resume_input:
- self.assertEqual(EXPECTED_RESUME_FILE_CONTENT, resume_input.read())
-
- with self.assertRaises(MemoryError):
- self.Execute(ARGS + [self.ResumeCmd()])
- self.assertListEqual(['sudden_death'], self.task_execution_history)
- with open(self.ResumeFilePath()) as resume_input:
- self.assertEqual(EXPECTED_RESUME_FILE_CONTENT, resume_input.read())
-
- def testTimeoutError(self):
- self.with_raise_exception_tasks = True
- self.Execute(['-k', '-e', 'timeout_error', '-e', r'^b$'])
- self.assertListEqual(['a', 'd', 'timeout_error', 'b'],
- self.task_execution_history)
- with open(self.ResumeFilePath()) as resume_input:
- self.assertEqual('-f\n^d$\n-f\n^b$', resume_input.read())
-
- def testENOSPC(self):
- self.with_raise_exception_tasks = True
- with self.assertRaises(IOError):
- self.Execute(['-k', '-e', 'errno_ENOSPC', '-e', r'^a$'])
- self.assertListEqual(
- ['a', 'd', 'errno_ENOSPC'], self.task_execution_history)
- with open(self.ResumeFilePath()) as resume_input:
- self.assertEqual('-f\n^a$\n-f\n^d$\n', resume_input.read())
-
- def testEPERM(self):
- self.with_raise_exception_tasks = True
- self.Execute(['-k', '-e', 'errno_EPERM', '-e', r'^b$'])
- self.assertListEqual(['a', 'd', 'errno_EPERM', 'b'],
- self.task_execution_history)
- with open(self.ResumeFilePath()) as resume_input:
- self.assertEqual('-f\n^d$\n-f\n^b$', resume_input.read())
-
- def testImpossibleTasks(self):
- self.assertEqual(1, self.Execute(['-f', r'^a$', '-e', r'^c$']))
- self.assertListEqual([], self.task_execution_history)
-
- self.assertEqual(0, self.Execute(
- ['-f', r'^a$', '-e', r'^c$', '-e', r'^b$']))
- self.assertListEqual(['b'], self.task_execution_history)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/test_utils.py b/loading/test_utils.py
deleted file mode 100644
index 2b2479c..0000000
--- a/loading/test_utils.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Common utilities used in unit tests, within this directory."""
-
-import clovis_constants
-import dependency_graph
-import devtools_monitor
-import loading_trace
-import page_track
-import request_track
-import tracing_track
-import user_satisfied_lens
-
-
-class FakeRequestTrack(devtools_monitor.Track):
- def __init__(self, events):
- super(FakeRequestTrack, self).__init__(None)
- self._events = events
- for e in self._events:
- e.timing.request_time = e.timestamp
-
- def Handle(self, _method, _msg):
- assert False # Should never be called.
-
- def GetEvents(self):
- return self._events
-
- def ToJsonDict(self):
- cls = request_track.RequestTrack
- return {cls._EVENTS_KEY: [
- rq.ToJsonDict() for rq in self.GetEvents()],
- cls._METADATA_KEY: {
- cls._DUPLICATES_KEY: 0,
- cls._INCONSISTENT_INITIATORS_KEY: 0}}
-
-
-class FakePageTrack(devtools_monitor.Track):
- def __init__(self, events):
- super(FakePageTrack, self).__init__(None)
- self._events = events
-
- def Handle(self, _method, _msg):
- assert False # Should never be called.
-
- def GetEvents(self):
- return self._events
-
- def GetMainFrameId(self):
- event = self._events[0]
- # Make sure our laziness is not an issue here.
- assert event['method'] == page_track.PageTrack.FRAME_STARTED_LOADING
- return event['frame_id']
-
- def ToJsonDict(self):
- return {'events': [event for event in self._events]}
-
-
-def MakeRequestWithTiming(
- url, source_url, timing_dict, magic_content_type=False,
- initiator_type='other'):
- """Make a dependent request.
-
- Args:
- url: a url, or number which will be used as a url.
- source_url: a url or number which will be used as the source (initiating)
- url. If the source url is not present, then url will be a root. The
- convention in tests is to use a source_url of 'null' in this case.
- timing_dict: (dict) Suitable to be passed to request_track.Timing().
- initiator_type: the initiator type to use.
-
- Returns:
- A request_track.Request.
- """
- assert initiator_type in ('other', 'parser')
- timing = request_track.Timing.FromDevToolsDict(timing_dict)
- rq = request_track.Request.FromJsonDict({
- 'timestamp': timing.request_time,
- 'request_id': str(MakeRequestWithTiming._next_request_id),
- 'url': 'http://' + str(url),
- 'initiator': {'type': initiator_type, 'url': 'http://' + str(source_url)},
- 'response_headers': {'Content-Type':
- 'null' if not magic_content_type
- else 'magic-debug-content' },
- 'timing': timing.ToJsonDict()
- })
- MakeRequestWithTiming._next_request_id += 1
- return rq
-
-
-MakeRequestWithTiming._next_request_id = 0
-
-
-def MakeRequest(
- url, source_url, start_time=None, headers_time=None, end_time=None,
- magic_content_type=False, initiator_type='other'):
- """Make a dependent request.
-
- Args:
- url: a url, or number which will be used as a url.
- source_url: a url or number which will be used as the source (initiating)
- url. If the source url is not present, then url will be a root. The
- convention in tests is to use a source_url of 'null' in this case.
- start_time: The request start time in milliseconds. If None, this is set to
- the current request id in seconds. If None, the two other time parameters
- below must also be None.
- headers_time: The timestamp when resource headers were received, or None.
- end_time: The timestamp when the resource was finished, or None.
- magic_content_type (bool): if true, set a magic content type that makes url
- always be detected as a valid source and destination request.
- initiator_type: the initiator type to use.
-
- Returns:
- A request_track.Request.
- """
- assert ((start_time is None and
- headers_time is None and
- end_time is None) or
- (start_time is not None and
- headers_time is not None and
- end_time is not None)), \
- 'Need no time specified or all times specified'
- if start_time is None:
- # Use the request id in seconds for timestamps. This guarantees increasing
- # times which makes request dependencies behave as expected.
- start_time = headers_time = end_time = (
- MakeRequestWithTiming._next_request_id * 1000)
- timing_dict = {
- # connectEnd should be ignored.
- 'connectEnd': (end_time - start_time) / 2,
- 'receiveHeadersEnd': headers_time - start_time,
- 'loadingFinished': end_time - start_time,
- 'requestTime': start_time / 1000.0}
- return MakeRequestWithTiming(
- url, source_url, timing_dict, magic_content_type, initiator_type)
-
-
-def LoadingTraceFromEvents(requests, page_events=None, trace_events=None):
- """Returns a LoadingTrace instance from various events."""
- request = FakeRequestTrack(requests)
- page_event_track = FakePageTrack(page_events if page_events else [])
- if trace_events is not None:
- track = tracing_track.TracingTrack(None,
- clovis_constants.DEFAULT_CATEGORIES)
- track.Handle('Tracing.dataCollected',
- {'params': {'value': [e for e in trace_events]}})
- else:
- track = None
- return loading_trace.LoadingTrace(
- None, None, page_event_track, request, track)
-
-
-class SimpleLens(object):
- """A simple replacement for RequestDependencyLens.
-
- Uses only the initiator url of a request for determining a dependency.
- """
- def __init__(self, trace):
- self._trace = trace
-
- def GetRequestDependencies(self):
- url_to_rq = {}
- deps = []
- for rq in self._trace.request_track.GetEvents():
- assert rq.url not in url_to_rq
- url_to_rq[rq.url] = rq
- for rq in self._trace.request_track.GetEvents():
- initiating_url = rq.initiator['url']
- if initiating_url in url_to_rq:
- deps.append((url_to_rq[initiating_url], rq, rq.initiator['type']))
- return deps
-
-
-class TestDependencyGraph(dependency_graph.RequestDependencyGraph):
- """A dependency graph created from requests using a simple lens."""
- def __init__(self, requests):
- lens = SimpleLens(LoadingTraceFromEvents(requests))
- super(TestDependencyGraph, self).__init__(requests, lens)
-
-
-class MockConnection(object):
- """Mock out connection for testing.
-
- Use Expect* for requests expecting a repsonse. SyncRequestNoResponse puts
- requests into no_response_requests_seen.
-
- TODO(mattcary): use a standard mock system (the back-ported python3
- unittest.mock? devil.utils.mock_calls?)
-
- """
- def __init__(self, test_case):
- # List of (method, params) tuples.
- self.no_response_requests_seen = []
-
- self._test_case = test_case
- self._expected_responses = {}
-
- def ExpectSyncRequest(self, response, method, params=None):
- """Test method when the connection is expected to make a SyncRequest.
-
- Args:
- response: (dict) the response to generate.
- method: (str) the expected method in the call.
- params: (dict) the expected params in the call.
- """
- self._expected_responses.setdefault(method, []).append((params, response))
-
- def AllExpectationsUsed(self):
- """Returns true when all expectations where used."""
- return not self._expected_responses
-
- def SyncRequestNoResponse(self, method, params):
- """Mocked method."""
- self.no_response_requests_seen.append((method, params))
-
- def SyncRequest(self, method, params=None):
- """Mocked method."""
- expected_params, response = self._expected_responses[method].pop(0)
- if not self._expected_responses[method]:
- del self._expected_responses[method]
- self._test_case.assertEqual(expected_params, params)
- return response
-
-
-class MockUserSatisfiedLens(user_satisfied_lens._FirstEventLens):
- def _CalculateTimes(self, _):
- self._satisfied_msec = float('inf')
- self._event_msec = float('inf')
-
-
-class TraceCreator(object):
- def __init__(self):
- self._request_index = 1
-
- def RequestAt(self, timestamp_msec, duration=1, frame_id=None):
- timestamp_sec = float(timestamp_msec) / 1000
- rq = request_track.Request.FromJsonDict({
- 'url': 'http://bla-%s-.com' % timestamp_msec,
- 'document_url': 'http://bla.com',
- 'request_id': '0.%s' % self._request_index,
- 'frame_id': frame_id or '123.%s' % timestamp_msec,
- 'initiator': {'type': 'other'},
- 'timestamp': timestamp_sec,
- 'timing': {'request_time': timestamp_sec,
- 'loading_finished': duration},
- 'status': 200})
- self._request_index += 1
- return rq
-
- def CreateTrace(self, requests, events, main_frame_id):
- page_event = {'method': 'Page.frameStartedLoading',
- 'frame_id': main_frame_id}
- trace = LoadingTraceFromEvents(
- requests, trace_events=events, page_events=[page_event])
- trace.tracing_track.SetMainFrameID(main_frame_id)
- return trace
diff --git a/loading/testdata/scanner_vs_parser.trace b/loading/testdata/scanner_vs_parser.trace
deleted file mode 100644
index 991fe8b..0000000
--- a/loading/testdata/scanner_vs_parser.trace
+++ /dev/null
@@ -1,231 +0,0 @@
-{
- "metadata": {},
- "page_track": {
- "events": []
- },
- "request_track": {
- "events": [
- {
- "initiator": {
- "type": "other"
- },
- "protocol": "http/1.0",
- "url": "http://l/"
- },
- {
- "initiator": {
- "lineNumber": 28,
- "type": "parser",
- "url": "http://l/"
- },
- "protocol": "data",
- "url": "data:image/png;base64,iVBO[PRUNED]"
- },
- {
- "initiator": {
- "lineNumber": 21,
- "type": "parser",
- "url": "http://l/"
- },
- "protocol": "http/1.0",
- "url": "http://l/0.png"
- },
- {
- "initiator": {
- "type": "parser",
- "url": "http://l/"
- },
- "protocol": "http/1.0",
- "url": "http://l/1.png"
- },
- {
- "initiator": {
- "lineNumber": 22,
- "type": "parser",
- "url": "http://l/"
- },
- "protocol": "http/1.0",
- "url": "http://l/0.css"
- },
- {
- "initiator": {
- "type": "other"
- },
- "protocol": "http/1.0",
- "url": "http://l/favicon.ico"
- }
- ],
- "metadata": {
- "duplicates_count": 0,
- "inconsistent_initiators": 0
- }
- },
- "tracing_track": {
- "categories": [],
- "events": [
- {
- "args": {
- "data": {
- "priority": 4,
- "url": "http://l/"
- }
- },
- "cat": "blink.net",
- "name": "Resource",
- "ph": "S",
- "pid": 3,
- "ts": 1213697828839,
- "id": 0
- },
- {
- "args": {},
- "cat": "blink.net",
- "name": "Resource",
- "ph": "F",
- "pid": 3,
- "ts": 1213697889955,
- "id": 0
- },
- {
- "args": {
- "data": {
- "priority": 1,
- "url": "http://l/0.png"
- }
- },
- "cat": "blink.net",
- "name": "Resource",
- "ph": "S",
- "pid": 3,
- "ts": 1213697891911,
- "id": 1
- },
- {
- "args": {
- "step": "Preload"
- },
- "cat": "blink.net",
- "name": "Resource",
- "ph": "T",
- "pid": 3,
- "ts": 1213697892658,
- "id": 1
- },
- {
- "args": {
- "data": {
- "priority": 1,
- "url": "http://l/0.css"
- }
- },
- "cat": "blink.net",
- "name": "Resource",
- "ph": "S",
- "pid": 3,
- "ts": 1213697892660,
- "id": 2
- },
- {
- "args": {
- "step": "Preload"
- },
- "cat": "blink.net",
- "name": "Resource",
- "ph": "T",
- "pid": 3,
- "ts": 1213697892661,
- "id": 2
- },
- {
- "args": {
- "data": {
- "priority": 1,
- "url": "http://l/1.png"
- }
- },
- "cat": "blink.net",
- "name": "Resource",
- "ph": "S",
- "pid": 3,
- "ts": 1213697934273,
- "id": 3
- },
- {
- "args": {
- "priority": 3,
- "step": "ChangePriority"
- },
- "cat": "blink.net",
- "name": "Resource",
- "ph": "T",
- "pid": 3,
- "ts": 1213697984606,
- "id": 0
- },
- {
- "args": {},
- "cat": "blink.net",
- "name": "Resource",
- "ph": "F",
- "pid": 3,
- "ts": 1213697943810,
- "id": 1
- },
- {
- "args": {
- "priority": 3,
- "step": "ChangePriority"
- },
- "cat": "blink.net",
- "name": "Resource",
- "ph": "T",
- "pid": 3,
- "ts": 1213697984875,
- "id": 1
- },
- {
- "args": {
- "priority": 3,
- "step": "ChangePriority"
- },
- "cat": "blink.net",
- "name": "Resource",
- "ph": "T",
- "pid": 3,
- "ts": 1213697985346,
- "id": 2
- },
- {
- "args": {
- "priority": 3,
- "step": "ChangePriority"
- },
- "cat": "blink.net",
- "name": "Resource",
- "ph": "T",
- "pid": 3,
- "ts": 1213697985346,
- "id": 3
- },
- {
- "args": {},
- "cat": "blink.net",
- "name": "Resource",
- "ph": "F",
- "pid": 3,
- "ts": 1213698035637,
- "id": 2
- },
- {
- "args": {},
- "cat": "blink.net",
- "name": "Resource",
- "ph": "F",
- "pid": 3,
- "ts": 1213698035637,
- "id": 3
- }
- ]
- },
- "url": "http://l/"
-}
diff --git a/loading/trace_test/README.md b/loading/trace_test/README.md
deleted file mode 100644
index c16e472..0000000
--- a/loading/trace_test/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-Trace Integration Tests
-
-This directory defines integration tests which verify traces in various corners
-of the HTML/JS/CSS world.
-
-The unittests in this directory are run as part of
-tools/android/loading/run_tests. The integration tests are only run
-manually. See webserver_test.py for details.
diff --git a/loading/trace_test/__init__.py b/loading/trace_test/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/loading/trace_test/__init__.py
+++ /dev/null
diff --git a/loading/trace_test/results/1.result b/loading/trace_test/results/1.result
deleted file mode 100644
index 0b445dc..0000000
--- a/loading/trace_test/results/1.result
+++ /dev/null
@@ -1,7 +0,0 @@
-parser (no stack) 1.css
-parser (no stack) 1a.js
-parser (no stack) 1a.png
-parser (no stack) 1b.png
-script (1a.js:9) 1b.js
-script (1b.js:54) 1.ttf
-script (1b.js:54) application/font-wof...Zk73/mAw==
diff --git a/loading/trace_test/results/2.result b/loading/trace_test/results/2.result
deleted file mode 100644
index 6fe8a06..0000000
--- a/loading/trace_test/results/2.result
+++ /dev/null
@@ -1,5 +0,0 @@
-parser (no stack) 1.css
-parser (no stack) 1a.png
-parser (no stack) 1b.js
-parser (no stack) 1b.png
-parser (no stack) application/font-wof...Zk73/mAw==
diff --git a/loading/trace_test/results/3.result b/loading/trace_test/results/3.result
deleted file mode 100644
index 196a88d..0000000
--- a/loading/trace_test/results/3.result
+++ /dev/null
@@ -1,6 +0,0 @@
-parser (no stack) 3a.js
-parser (no stack) 3c.js
-script (3a.js:10/3a.js:14/3.html:20) 3a.jpg
-script (3a.js:20) 3b.js
-script (3b.js:9) 3b.jpg
-script (3c.js:7/3.html:21) 3c.jpg
diff --git a/loading/trace_test/test_server.py b/loading/trace_test/test_server.py
deleted file mode 100755
index 45517f7..0000000
--- a/loading/trace_test/test_server.py
+++ /dev/null
@@ -1,110 +0,0 @@
-#! /usr/bin/python
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""A simple http server for running local integration tests.
-
-This chooses a port dynamically and so can communicate that back to its spawner
-via a named pipe at --fifo. Sources are served from the tree named at
---source_dir.
-"""
-
-
-import argparse
-import cgi
-import json
-import os.path
-import logging
-import re
-import time
-import wsgiref.simple_server
-
-
-_CONTENT_TYPE_FOR_SUFFIX = {
- 'css': 'text/css',
- 'html': 'text/html',
- 'jpg': 'image/jpeg',
- 'js': 'text/javascript',
- 'json': 'application/json',
- 'png': 'image/png',
- 'ttf': 'font/ttf',}
-
-# Name of the JSON file containing per file custom response headers located in
-# the --source_dir.
-# This file should structured like:
-# {
-# 'mydocument.html': [
-# ['Cache-Control', 'max-age=3600'],
-# ['Content-Encoding', 'gzip'],
-# ]
-# }
-RESPONSE_HEADERS_PATH = 'RESPONSE_HEADERS.json'
-
-
-class ServerApp(object):
- """WSGI App.
-
- Dispatches by matching, in order, against GetPaths.
- """
- def __init__(self, source_dir):
- self._source_dir = source_dir
- self._response_headers = {}
- response_header_path = os.path.join(source_dir, RESPONSE_HEADERS_PATH)
- if os.path.exists(response_header_path):
- with open(response_header_path) as response_headers_file:
- self._response_headers = json.load(response_headers_file)
-
- def __call__(self, environ, start_response):
- """WSGI dispatch.
-
- Args:
- environ: environment list.
- start_response: WSGI response start.
-
- Returns:
- Iterable server result.
- """
- path = environ.get('PATH_INFO', '')
- while path.startswith('/'):
- path = path[1:]
- filename = os.path.join(self._source_dir, path)
- if not os.path.exists(filename):
- logging.info('%s not found', filename)
- start_response('404 Not Found', [('Content-Type', 'text/html')])
- return ["""<!DOCTYPE html>
-<html>
-<head>
-<title>Not Found</title>
-<body>%s not found</body>
-</html>""" % path]
-
- logging.info('responding with %s', filename)
- suffix = path[path.rfind('.') + 1:]
- headers = [('Content-Type', _CONTENT_TYPE_FOR_SUFFIX[suffix])]
- if path in self._response_headers:
- for header in self._response_headers[path]:
- headers.append((str(header[0]), str(header[1])))
- start_response('200 OK', headers)
- return [file(filename).read()]
-
-
-if __name__ == '__main__':
- logging.basicConfig(level=logging.INFO)
- parser = argparse.ArgumentParser()
- parser.add_argument('--fifo', default=None,
- help='Named pipe used to communicate port')
- parser.add_argument('--source_dir', required=True,
- help='Directory holding sources to serve.')
- args = parser.parse_args()
- server_app = ServerApp(args.source_dir)
- server = wsgiref.simple_server.make_server(
- 'localhost', 0, server_app)
- ip, port = server.server_address
- logging.info('Listening on port %s at %s', port, args.source_dir)
- if args.fifo:
- fifo = file(args.fifo, 'w')
- fifo.write('%s\n' % port)
- fifo.flush()
- fifo.close()
- server.serve_forever()
diff --git a/loading/trace_test/tests/1.css b/loading/trace_test/tests/1.css
deleted file mode 100644
index 5ae3598..0000000
--- a/loading/trace_test/tests/1.css
+++ /dev/null
@@ -1,9 +0,0 @@
-/* Copyright 2016 The Chromium Authors. All rights reserved.
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-.outside {
- font-family: inline;
- color: red;
-}
diff --git a/loading/trace_test/tests/1.html b/loading/trace_test/tests/1.html
deleted file mode 100644
index 5dd15ce..0000000
--- a/loading/trace_test/tests/1.html
+++ /dev/null
@@ -1,43 +0,0 @@
-<!DOCTYPE html>
-<!--
- Test Javascript Redirection in <head>
-
- In <head> we have a CSS, a javascript file and a <style> tag. The javascript
- file inserts another javascript file into head, which itself inserts a <style>
- tag containing an inline font. The static <style> tag below has a font
- resource. We expect the static font resource to have an initiator with a stack
- trace incorrectly attached from the javascript.
-
- TODO(mattcary): It also appears that if resources are found in the cache we
- get different intiators: namely both the fonts have a parser initiator with no
- stack. This is not exactly the problem, as occasionally the initiator sequence
- changes, but can become consistent again by switching binaries with each run
- (eg, out/Debug vs out/Release).
--->
-<html>
-<head>
-<title>Test Javascript Redirection</title>
-<link rel='stylesheet' type='text/css' href='1.css'>
-<script type='text/javascript' src='1a.js'></script>
-<style>
-/* Custom font */
-@font-face {
- font-family: 'test1';
- font-style: normal;
- font-weight: normal;
- src: local('test1'), local(test1), url(1.ttf) format('truetype');
-}
-</style>
-<style>
-div {
- background: url('1a.png')
-}
-</style>
-</head>
-<body>
-<img src='1b.png' alt=''>
-
-<div class="outside">ABCpqrst</div>
-<div class="inside">ABCpqrst</div>
-</body>
-</html>
diff --git a/loading/trace_test/tests/1.ttf b/loading/trace_test/tests/1.ttf
deleted file mode 100644
index d268785..0000000
--- a/loading/trace_test/tests/1.ttf
+++ /dev/null
Binary files differ
diff --git a/loading/trace_test/tests/1a.js b/loading/trace_test/tests/1a.js
deleted file mode 100644
index 496cebb..0000000
--- a/loading/trace_test/tests/1a.js
+++ /dev/null
@@ -1,9 +0,0 @@
-/* Copyright 2016 The Chromium Authors. All rights reserved.
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-var scr = document.createElement('script');
-scr.setAttribute('type', 'text/javascript');
-scr.setAttribute('src', '1b.js');
-document.getElementsByTagName('head')[0].appendChild(scr)
diff --git a/loading/trace_test/tests/1a.png b/loading/trace_test/tests/1a.png
deleted file mode 100644
index 88a0325..0000000
--- a/loading/trace_test/tests/1a.png
+++ /dev/null
Binary files differ
diff --git a/loading/trace_test/tests/1b.js b/loading/trace_test/tests/1b.js
deleted file mode 100644
index eaf905d..0000000
--- a/loading/trace_test/tests/1b.js
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Copyright 2016 The Chromium Authors. All rights reserved.
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-/* Droid Sans from Google Fonts */
-var font = '@font-face { font-family: "inline"; ' +
- 'src: url(data:application/font-woff2;charset=utf-8;base64,' +
- 'd09GMgABAAAAAAboABEAAAAADcgAAAaGAAEAAAAAAAAAAAAAAAAAAAAAAA' +
- 'AAAAAAP0ZGVE0cGigGVgCCeggUCYRlEQgKiDSIWgE2AiQDWAsuAAQgBYJ+B' +
- '4FVDHg/d2ViZgYbkQxRlC1Sm+yLArvh2MHKbfiAVIG+Knex+u6x+Pyd0L+n' +
- '4Cl0n74VfYIZH6AMqEOdEag0hxQqkzJcpeRdedy7DCB9T9LF3Y3l8976Xbg' +
- 'X6AArK4qytKYdx2UW4LK8xGbPr2v+AmhM4aV1UgMv5btaum+17iX0YpGGCG' +
- 'EYLIOf7Zf340t4NJtpeX7PFhBmixQP5C/r1GtZokUUskL2f9fU3r93GZDv8' +
- '+jM5uzlH7wmKVHaEV07AFCtGtkaPQtEalMT1s5gePQ3sRnV4Ie/BQjAB0te' +
- '/QV450a0AsBn99o2dz6vCnQQAg6CMAHBq5hchnij85b8j4/nx/4LIH3J2e5' +
- 'XnHWa4BC4kDXZW4H4ypUcLmTqeMADwE+YsRuLDoNQTwOuCFHme+wHNKnjeQ' +
- '4VQlZxh0I4HB6bOp5lQIUVVdi92f3s9+zLil/yP//x853/zhXWky0SLJ0S5' +
- '4zrezfa/qbk/3t+wEvL5BhOBEmi7632G4otEyCtC2O/ot+wANdlQyrVGts8' +
- 'YN/SC/C0smwfFwt9QSr1wUnXoLawNbial7VsAvWrAVkfgrAdYtjs6G/3rQ1' +
- 'prtX/7j8bsoFYqqg3bKtO6FyHi5IwOe5DkoPCi688Potvk0Fgih5ZDqp6NR' +
- '2tSGoKVcR8qEL7C7Ab4UkZ+PwOJggFnUA/cz93Uzq5PGiMDbqKNoiLBbWdd' +
- 'SUHk81sPbrQ01ECBl4Qg1w6qURt3Dq3TkqL8+xIw81VqTxILmtzfUV2mSuX' +
- '4jxxDKTSs2EtB1oqUXphrTK/5i3bmCC9uSugDMMdBIzsS5gxw7YwvS18KJN' +
- '2DQUNmFV3mLEd7EpyXcjnRpsqxjkfzhOAwd3NY1rOA3dxgOWS2VOgLH2hnf' +
- 'P/lR3auchORtav1cGLzmsDOUK9VN/Y6HWdO4EFRDgyvioOmZTnCeDGoKywg' +
- 'MUlNKiHoEBT0njIyMNMZAtIl0LryFDQIRkIr/M9BUGyDBuANvmGAaAEfAh8' +
- 'Dxn1wNn1oazEwf00PlI8b3EQVsszOvJSeki/GZNCuSSCHSolHeYacwCKIkV' +
- 'gk0lGdQlFrwAlijFrUPfCPiHBEieVgkVuOoyOOaMxTXcR3AqCGkGfJQCoYX' +
- 'DR0JjAYqMqiuIQszkxdjNRcCh0k26crIa2hwb7S6x64eeF5UQEQuWvZN80m' +
- 'wrN8Xqyl8cyNI2QiZ/ARSYML05ZL/9fbIz/Q15LOjnMbVPpwZQNCuOmwM3L' +
- 'UiDSG5Te4UTpIZyv1JidE620EGKWp6qyYKVa2kGqomYifgQbFl05rNhXdk2' +
- '39FozuhTZgW7ZxrT0CHrQTGiwxf6RRbMBj8ykW+lgFqPbD7MqhUhzUFOzSI' +
- 'y0Bgv5lRBu4PGKZ4kYGSXtw4jSajk1kHG6FI6ayMYtqVtyIPfmKDtmhsA5s' +
- 'IsBVWRHjmKyii7cJGTPWkAzzVY8Mn5iHJvJtlTehFLHzNU61VhdMNiyC1a7' +
- '/o0MazQ1udRV1/RSwbgdhHPmTmlfgHUljaZl+YIF21T7wXFURxbqSgaPMXu' +
- 'AKkHFhRQaoCcoQsY5NxVP+7KyQxe8OGLMrp1iuoqu33iNFHQxsQnbG9dkX+' +
- 'mmSC6pbrljMi3Tu7p0zSqlUK3aoeOw827lGNdLWkAuD+wzpiunoecYa+ppN' +
- 'g0uIIfopXHHsrt7Fi0+0zg9123bWyYiwx5W2Asewfq7ckv+qphwrLb4fr4/' +
- 'D/zVWZssC/ATIP8Nc5KAn2R/ECQDG/9xOKzN+ZfVAJzXgmMS8CHxEqHmDhJ' +
- '3mc9OTpEvQY4D3BOWKkgnsBXYnXT/WbePNtZ/v0kHCURbm/UROYYyz+EiXm' +
- 'G3IQoQks87lP8mIdwuTXrcHm0MuX1CVrsD8px2v0Mbl93vMsIT7veoksL72' +
- 't1Dv3Xp4iOukLFEdgL+7JSKja5Z3qEopSoEbFbnVwz0UEa8/ChDiY5IyMFC' +
- 'IR+TUyC/aWEHS0WxdgAFMY/fmcdC4oqzkDFiW8Qzyrmchn9OxEYbGteVGVs' +
- 'U8eYdv4uJjapb93SE21+g2IOMb1Pj79pAHHFxmcJUpoknvgSSk7wUpCglKU' +
- 'tFqlKTujSkGZxrr7E0c1f7yiDB1UndihFm1SyQURKTMTKbzCFzyTwynyzQV' +
- 'jTEa7U5uvS0VS+ePQ15hk3KbWcPLs8esLd/QzHV/ujFrK/UOR3oVeZfxDPA' +
- 'nXCNktFqJcM1KVF3ohJQDWSpTdTvBwboLiPX7iqwaaZUPuIAt0Zk73/mAw==) ' +
- 'format("woff2"); font-weight: normal; font-style: normal;} ' +
- '.inside { font-family: test1; color: green;}';
-
-var sty = document.createElement('style');
-sty.appendChild(document.createTextNode(font));
-document.getElementsByTagName('head')[0].appendChild(sty);
-
-var dummyToCheckStackIsnotJustEndOfFile = 0;
-function makingTheStackTraceReallyInteresting(x) {
- dummyToCheckStackIsnotJustEndOfFile = x + 3;
-}
-makingTheStackTraceReallyInteresting(5);
diff --git a/loading/trace_test/tests/1b.png b/loading/trace_test/tests/1b.png
deleted file mode 100644
index dca89e0..0000000
--- a/loading/trace_test/tests/1b.png
+++ /dev/null
Binary files differ
diff --git a/loading/trace_test/tests/2.html b/loading/trace_test/tests/2.html
deleted file mode 100644
index 1d79ff3..0000000
--- a/loading/trace_test/tests/2.html
+++ /dev/null
@@ -1,36 +0,0 @@
-<!DOCTYPE html>
-<!--
- Test Less Javascript Redirection in <head>
-
- Like 1.html, in <head> we have a CSS, a javascript file and a <style> tag. In
- this case, the javacript file directly inserts a <style> tag into <head>. This
- causes the subsequently loaded font to not be associated with a stack trace,
- but also causes the dynamically loaded static font from 1b.js to also not have
- a stack trace.
--->
-<html>
-<head>
-<title>As 1.html, but one less redirection</title>
-<link rel='stylesheet' type='text/css' href='1.css'>
-<script type='text/javascript' src='1b.js'></script>
-<style>
-@font-face {
- font-family: 'indie';
- font-style: normal;
- font-weight: normal;
- src: local('Indie Flower'), local('IndieFlower'), url(1.ttf) format('truetype');
-}
-</style>
-<style>
-div {
- background: url('1a.png')
-}
-</style>
-</head>
-<body>
-<img src='1b.png' alt=''>
-
-<div class="outside">Outside</div>
-<div class="inside">Inside</div>
-</body>
-</html>
diff --git a/loading/trace_test/tests/3.html b/loading/trace_test/tests/3.html
deleted file mode 100644
index f6850eb..0000000
--- a/loading/trace_test/tests/3.html
+++ /dev/null
@@ -1,24 +0,0 @@
-<!DOCTYPE html>
-<!--
- Javascript indirect image loading.
-
- 3a.js defines fn1(), which adds an <img> tag to the body. 3a.js also
- inserts a script tag with 3b.js into head (between the scripts for
- 3a and 3c). 3b.js itself creates an <img> tag which directly adds it
- to the body. Finally, 3c.js defines fn3(), which
- modifies <img id='img3'>.
-
- Note that as 3b.js adds a tag to the body, it is executed only after
- the body has been parsed. No, I don't know how that works either.
--->
-<html>
-<head>
-<script type='text/javascript' src='3a.js'></script>
-<script type='text/javascript' src='3c.js'></script>
-<img src='' alt='' id='img3'>
-<script type='text/javascript'>
- fn1();
- fn3();
-</script>
-</body>
-</html>
diff --git a/loading/trace_test/tests/3a.jpg b/loading/trace_test/tests/3a.jpg
deleted file mode 100644
index 25f3a43..0000000
--- a/loading/trace_test/tests/3a.jpg
+++ /dev/null
Binary files differ
diff --git a/loading/trace_test/tests/3a.js b/loading/trace_test/tests/3a.js
deleted file mode 100644
index 31f6dca..0000000
--- a/loading/trace_test/tests/3a.js
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2016 The Chromium Authors. All rights reserved.
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-function addImg(img_link) {
- var img = document.createElement('img');
- img.setAttribute('src', img_link);
- img.setAttribute('alt', '');
- document.body.appendChild(img);
-}
-
-function fn1() {
- addImg('3a.jpg');
-}
-
-var scr = document.createElement('script');
-scr.setAttribute('src', '3b.js');
-scr.setAttribute('type', 'text/javascript');
-document.getElementsByTagName('head')[0].insertBefore(
- scr, document.getElementsByTagName('script')[0].nextSibling);
diff --git a/loading/trace_test/tests/3b.jpg b/loading/trace_test/tests/3b.jpg
deleted file mode 100644
index de44b66..0000000
--- a/loading/trace_test/tests/3b.jpg
+++ /dev/null
Binary files differ
diff --git a/loading/trace_test/tests/3b.js b/loading/trace_test/tests/3b.js
deleted file mode 100644
index 9ccc020..0000000
--- a/loading/trace_test/tests/3b.js
+++ /dev/null
@@ -1,9 +0,0 @@
-/* Copyright 2016 The Chromium Authors. All rights reserved.
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-var img = document.createElement('img');
-img.setAttribute('src', '3b.jpg');
-img.setAttribute('alt', '');
-document.body.appendChild(img);
diff --git a/loading/trace_test/tests/3c.jpg b/loading/trace_test/tests/3c.jpg
deleted file mode 100644
index 688b70b..0000000
--- a/loading/trace_test/tests/3c.jpg
+++ /dev/null
Binary files differ
diff --git a/loading/trace_test/tests/3c.js b/loading/trace_test/tests/3c.js
deleted file mode 100644
index b34da79..0000000
--- a/loading/trace_test/tests/3c.js
+++ /dev/null
@@ -1,8 +0,0 @@
-/* Copyright 2016 The Chromium Authors. All rights reserved.
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-function fn3() {
- document.getElementById('img3').setAttribute('src', '3c.jpg');
-}
diff --git a/loading/trace_test/webserver_test.py b/loading/trace_test/webserver_test.py
deleted file mode 100755
index f6dbf96..0000000
--- a/loading/trace_test/webserver_test.py
+++ /dev/null
@@ -1,291 +0,0 @@
-#! /usr/bin/python
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""An integration test for tracing.
-
-This is not run as part of unittests and is executed directly. In normal
-operation it can be run with no arguments (or perhaps --no_sandbox depending on
-how you have chrome set up). When debugging or adding tests, setting
---failed_trace_dir could be useful.
-
-The integration test spawns a local http server to serve web pages. The trace
-generated by each file in tests/*.html will be compared with the corresponding
-results/*.result. Each test should have a detailed comment explaining its
-organization and what the important part of the test result is.
-
-By default this will use a release version of chrome built in this same
-code tree (out/Release/chrome), see --local_binary to override.
-
-See InitiatorSequence for what the integration tests verify. The idea is to
-capture a sketch of the initiator and call stack relationship. The output is
-human-readable. To create a new test, first run test_server.py locally with
---source_dir pointing to tests/, and verify that the test page works as expected
-by pointing a browser to localhost:XXX/your_new_test.html (with XXX the port
-reported in the console output of test_server.py). Then run this
-webserver_test.py with --failed_trace_dir set. Verify that the actual output is
-what you expect it to be, then copy it to results/. If your test is 7.html, you
-should copy to results/7.result.
-"""
-
-import argparse
-import contextlib
-import json
-import os
-import shutil
-import subprocess
-import sys
-import tempfile
-import urlparse
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-import clovis_constants
-import controller
-import loading_trace
-import options
-
-
-OPTIONS = options.OPTIONS
-WEBSERVER = os.path.join(os.path.dirname(__file__), 'test_server.py')
-TESTDIR = os.path.join(os.path.dirname(__file__), 'tests')
-RESULTDIR = os.path.join(os.path.dirname(__file__), 'results')
-
-
-@contextlib.contextmanager
-def TemporaryDirectory():
- """Returns a freshly-created directory that gets automatically deleted after
- usage.
- """
- name = tempfile.mkdtemp()
- try:
- yield name
- finally:
- shutil.rmtree(name)
-
-
-class WebServer(object):
- """Wrap the webserver."""
- def __init__(self, source_dir, communication_dir):
- """Initialize the server but does not start it.
-
- Args:
- source_dir: the directory where source data (html, js, etc) will be found.
- communication_dir: a directory to use for IPC (eg, discovering the
- port, which is dynamically allocated). This should probably be a
- temporary directory.
- """
- self._source_dir = source_dir
- self._communication_dir = communication_dir
- self._fifo = None
- self._server_process = None
- self._port = None
-
- @classmethod
- @contextlib.contextmanager
- def Context(cls, *args, **kwargs):
- """Creates a webserver as a context manager.
-
- Args:
- As in __init__.
-
- Returns:
- A context manager for an instance of a WebServer.
- """
- try:
- server = cls(*args, **kwargs)
- server.Start()
- yield server
- finally:
- server.Stop()
-
- def Start(self):
- """Start the server by spawning a process."""
- fifo_name = os.path.join(self._communication_dir, 'from_server')
- os.mkfifo(fifo_name)
- server_out = None if OPTIONS.local_noisy else file('/dev/null', 'w')
- self._server_process = subprocess.Popen(
- [WEBSERVER,
- '--source_dir=%s' % self._source_dir,
- '--fifo=%s' % fifo_name],
- shell=False, stdout=server_out, stderr=server_out)
- fifo = file(fifo_name)
- # TODO(mattcary): timeout?
- self._port = int(fifo.readline())
- fifo.close()
-
- def Stop(self):
- """Stops the server, waiting for it to complete.
-
- Returns:
- True if the server stopped correctly.
- """
- if self._server_process is None:
- return False
- self._server_process.kill()
- # TODO(mattcary): timeout & error?
- self._server_process.wait()
- return True
-
- def Address(self):
- """Returns a host:port string suitable for an http request."""
- assert self._port is not None, \
- "No port exists until the server is started."
- return 'localhost:%s' % self._port
-
-
-class InitiatorSequence(object):
- """The interesting parts of the initiator dependancies that are tested."""
- def __init__(self, trace):
- """Create.
-
- Args:
- trace: a LoadingTrace.
- """
- self._seq = []
- # ReadFromFile will initialize without a trace.
- if trace is None:
- return
- for rq in trace.request_track.GetEvents():
- if rq.initiator['type'] in ('parser', 'script'):
- stack_string = ''
- stack = rq.initiator.get('stack')
- # Iteratively walk the stack and its parents.
- while stack:
- current_string = '/'.join(
- ['%s:%s' % (self._ShortUrl(frame['url']), frame['lineNumber'])
- for frame in stack['callFrames']])
- if len(current_string) and len(stack_string):
- stack_string += '/'
- stack_string += current_string
- stack = stack.get('parent')
-
- if stack_string == '':
- stack_string = 'no stack'
-
- self._seq.append('%s (%s) %s' % (
- rq.initiator['type'],
- stack_string,
- self._ShortUrl(rq.url)))
- self._seq.sort()
-
- @classmethod
- def ReadFromFile(cls, input_file):
- """Read a file from DumpToFile.
-
- Args:
- input_file: a file-like object.
-
- Returns:
- An InitiatorSequence instance.
- """
- seq = cls(None)
- seq._seq = sorted([l.strip() for l in input_file.readlines() if l])
- return seq
-
- def DumpToFile(self, output):
- """Write to a file.
-
- Args:
- output: a writeable file-like object.
- """
- output.write('\n'.join(self._seq) + '\n')
-
- def __eq__(self, other):
- if other is None:
- return False
- assert type(other) is InitiatorSequence
- if len(self._seq) != len(other._seq):
- return False
- for a, b in zip(self._seq, other._seq):
- if a != b:
- return False
- return True
-
- def _ShortUrl(self, url):
- short = urlparse.urlparse(url).path
- while short.startswith('/'):
- short = short[1:]
- if len(short) > 40:
- short = '...'.join((short[:20], short[-10:]))
- return short
-
-
-def RunTest(webserver, test_page, expected):
- """Run an webserver test.
-
- The expected result can be None, in which case --failed_trace_dir can be set
- to output the observed trace.
-
- Args:
- webserver [WebServer]: the webserver to use for the test. It must be
- started.
- test_page: the name of the page to load.
- expected [InitiatorSequence]: expected initiator sequence.
-
- Returns:
- True if the test passed and false otherwise. Status is printed to stdout.
- """
- url = 'http://%s/%s' % (webserver.Address(), test_page)
- sys.stdout.write('Testing %s...' % url)
- chrome_controller = controller.LocalChromeController()
-
- with chrome_controller.Open() as connection:
- connection.ClearCache()
- observed_seq = InitiatorSequence(
- loading_trace.LoadingTrace.RecordUrlNavigation(
- url, connection, chrome_controller.ChromeMetadata(),
- categories=clovis_constants.DEFAULT_CATEGORIES))
- if observed_seq == expected:
- sys.stdout.write(' ok\n')
- return True
- else:
- sys.stdout.write(' FAILED!\n')
- if OPTIONS.failed_trace_dir:
- outname = os.path.join(OPTIONS.failed_trace_dir,
- test_page + '.observed_result')
- with file(outname, 'w') as output:
- observed_seq.DumpToFile(output)
- sys.stdout.write('Wrote observed result to %s\n' % outname)
- return False
-
-
-def RunAllTests():
- """Run all tests in TESTDIR.
-
- All tests must have a corresponding result in RESULTDIR unless
- --failed_trace_dir is set.
- """
- test_filter = set(OPTIONS.test_filter.split(',')) \
- if OPTIONS.test_filter else None
-
- with TemporaryDirectory() as temp_dir, \
- WebServer.Context(TESTDIR, temp_dir) as webserver:
- failure = False
- for test in sorted(os.listdir(TESTDIR)):
- if test.endswith('.html'):
- if test_filter and test not in test_filter:
- continue
- result = os.path.join(RESULTDIR, test[:test.rfind('.')] + '.result')
- assert OPTIONS.failed_trace_dir or os.path.exists(result), \
- 'No result found for test'
- expected = None
- if os.path.exists(result):
- with file(result) as result_file:
- expected = InitiatorSequence.ReadFromFile(result_file)
- if not RunTest(webserver, test, expected):
- failure = True
- if failure:
- print 'FAILED!'
- else:
- print 'all tests passed'
-
-
-if __name__ == '__main__':
- OPTIONS.ParseArgs(sys.argv[1:],
- description='Run webserver integration test',
- extra=[('--failed_trace_dir', ''),
- ('--noisy', False),
- ('--test_filter', None)])
- RunAllTests()
diff --git a/loading/trace_test/webserver_unittest.py b/loading/trace_test/webserver_unittest.py
deleted file mode 100644
index f4b232c..0000000
--- a/loading/trace_test/webserver_unittest.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import httplib
-import json
-import os
-import shutil
-import sys
-import tempfile
-import unittest
-
-_SRC_DIR = os.path.abspath(os.path.join(
- os.path.dirname(__file__), '..', '..', '..', '..'))
-sys.path.append(os.path.join(_SRC_DIR, 'tools', 'android', 'loading'))
-
-import options
-from trace_test import test_server
-from trace_test import webserver_test
-
-
-OPTIONS = options.OPTIONS
-
-
-class WebServerTestCase(unittest.TestCase):
- def setUp(self):
- OPTIONS.ParseArgs('', extra=[('--noisy', False)])
- self._temp_dir = tempfile.mkdtemp()
- self._server = webserver_test.WebServer(self._temp_dir, self._temp_dir)
-
- def tearDown(self):
- self.assertTrue(self._server.Stop())
- shutil.rmtree(self._temp_dir)
-
- def StartServer(self):
- self._server.Start()
-
- def WriteFile(self, path, file_content):
- with open(os.path.join(self._temp_dir, path), 'w') as file_output:
- file_output.write(file_content)
-
- def Request(self, path):
- host, port = self._server.Address().split(':')
- connection = httplib.HTTPConnection(host, int(port))
- connection.request('GET', path)
- response = connection.getresponse()
- connection.close()
- return response
-
- def testWebserverBasic(self):
- self.WriteFile('test.html',
- '<!DOCTYPE html><html><head><title>Test</title></head>'
- '<body><h1>Test Page</h1></body></html>')
- self.StartServer()
-
- response = self.Request('test.html')
- self.assertEqual(200, response.status)
-
- response = self.Request('/test.html')
- self.assertEqual(200, response.status)
-
- response = self.Request('///test.html')
- self.assertEqual(200, response.status)
-
- def testWebserver404(self):
- self.StartServer()
-
- response = self.Request('null')
- self.assertEqual(404, response.status)
- self.assertEqual('text/html', response.getheader('content-type'))
-
- def testContentType(self):
- self.WriteFile('test.html',
- '<!DOCTYPE html><html><head><title>Test</title></head>'
- '<body><h1>Test Page</h1></body></html>')
- self.WriteFile('blobfile',
- 'whatever')
- self.StartServer()
-
- response = self.Request('test.html')
- self.assertEqual(200, response.status)
- self.assertEqual('text/html', response.getheader('content-type'))
-
- response = self.Request('blobfile')
- self.assertEqual(500, response.status)
-
- def testCustomResponseHeader(self):
- self.WriteFile('test.html',
- '<!DOCTYPE html><html><head><title>Test</title></head>'
- '<body><h1>Test Page</h1></body></html>')
- self.WriteFile('test2.html',
- '<!DOCTYPE html><html><head><title>Test 2</title></head>'
- '<body><h1>Test Page 2</h1></body></html>')
- self.WriteFile(test_server.RESPONSE_HEADERS_PATH,
- json.dumps({'test2.html': [['Cache-Control', 'no-store']]}))
- self.StartServer()
-
- response = self.Request('test.html')
- self.assertEqual(200, response.status)
- self.assertEqual('text/html', response.getheader('content-type'))
- self.assertEqual(None, response.getheader('cache-control'))
-
- response = self.Request('test2.html')
- self.assertEqual(200, response.status)
- self.assertEqual('text/html', response.getheader('content-type'))
- self.assertEqual('no-store', response.getheader('cache-control'))
-
- response = self.Request(test_server.RESPONSE_HEADERS_PATH)
- self.assertEqual(200, response.status)
- self.assertEqual('application/json', response.getheader('content-type'))
- self.assertEqual(None, response.getheader('cache-control'))
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/trace_to_chrome_trace.py b/loading/trace_to_chrome_trace.py
deleted file mode 100755
index 23c3632..0000000
--- a/loading/trace_to_chrome_trace.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#! /usr/bin/python
-# Copyright 2015 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Convert trace output for Chrome.
-
-Takes a loading trace from 'analyze.py log_requests' and outputs a json file
-that can be loaded by chrome's about:tracing..
-"""
-
-import argparse
-import json
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument('input')
- parser.add_argument('output')
- args = parser.parse_args()
- with file(args.output, 'w') as output_f, file(args.input) as input_f:
- events = json.load(input_f)['tracing_track']['events']
- json.dump({'traceEvents': events, 'metadata': {}}, output_f)
diff --git a/loading/tracing_track.py b/loading/tracing_track.py
deleted file mode 100644
index f69392d..0000000
--- a/loading/tracing_track.py
+++ /dev/null
@@ -1,571 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Monitor tracing events on chrome via chrome remote debugging."""
-
-import itertools
-import logging
-import operator
-
-import clovis_constants
-import devtools_monitor
-
-
-class TracingTrack(devtools_monitor.Track):
- """Grabs and processes trace event messages.
-
- See https://goo.gl/Qabkqk for details on the protocol.
- """
- def __init__(self, connection, categories, fetch_stream=False):
- """Initialize this TracingTrack.
-
- Args:
- connection: a DevToolsConnection.
- categories: ([str] or None) If set, a list of categories to enable or
- disable in Chrome tracing. Categories prefixed with '-' are
- disabled.
- fetch_stream: if true, use a websocket stream to fetch tracing data rather
- than dataCollected events. It appears based on very limited testing that
- a stream is slower than the default reporting as dataCollected events.
- """
- super(TracingTrack, self).__init__(connection)
- if connection:
- connection.RegisterListener('Tracing.dataCollected', self)
-
- self._categories = set(categories)
- params = {}
- params['categories'] = ','.join(self._categories)
- if fetch_stream:
- params['transferMode'] = 'ReturnAsStream'
-
- if connection:
- connection.SyncRequestNoResponse('Tracing.start', params)
-
- self._events = []
- self._base_msec = None
- self._interval_tree = None
- self._main_frame_id = None
-
- def Handle(self, method, event):
- for e in event['params']['value']:
- event = Event(e)
- self._events.append(event)
- if self._base_msec is None or event.start_msec < self._base_msec:
- self._base_msec = event.start_msec
- # Invalidate our index rather than trying to be fancy and incrementally
- # update.
- self._interval_tree = None
-
- def Categories(self):
- """Returns the set of categories in this trace."""
- return self._categories
-
- def GetFirstEventMillis(self):
- """Find the canonical start time for this track.
-
- Returns:
- The millisecond timestamp of the first request.
- """
- return self._base_msec
-
- def GetEvents(self):
- """Returns a list of tracing.Event. Not sorted."""
- return self._events
-
- def GetMatchingEvents(self, category, name):
- """Gets events matching |category| and |name|."""
- return [e for e in self.GetEvents() if e.Matches(category, name)]
-
- def GetMatchingMainFrameEvents(self, category, name):
- """Gets events matching |category| and |name| that occur in the main frame.
-
- Events without a 'frame' key in their |args| are discarded.
- """
- matching_events = self.GetMatchingEvents(category, name)
- return [e for e in matching_events
- if 'frame' in e.args and e.args['frame'] == self.GetMainFrameID()]
-
- def GetMainFrameRoutingID(self):
- """Returns the main frame routing ID."""
- for event in self.GetMatchingEvents(
- 'navigation', 'RenderFrameImpl::OnNavigate'):
- return event.args['id']
- assert False
-
- def GetMainFrameID(self):
- """Returns the main frame ID."""
- if not self._main_frame_id:
- navigation_start_events = self.GetMatchingEvents(
- 'blink.user_timing', 'navigationStart')
- first_event = min(navigation_start_events, key=lambda e: e.start_msec)
- self._main_frame_id = first_event.args['frame']
-
- return self._main_frame_id
-
- def SetMainFrameID(self, frame_id):
- """Set the main frame ID. Normally this is used only for testing."""
- self._main_frame_id = frame_id
-
- def EventsAt(self, msec):
- """Gets events active at a timestamp.
-
- Args:
- msec: tracing milliseconds to query. Tracing milliseconds appears to be
- since chrome startup (ie, arbitrary epoch).
-
- Returns:
- List of events active at that timestamp. Instantaneous (ie, instant,
- sample and counter) events are never included. Event end times are
- exclusive, so that an event ending at the usec parameter will not be
- returned.
- """
- self._IndexEvents()
- return self._interval_tree.EventsAt(msec)
-
- def Filter(self, pid=None, tid=None, categories=None):
- """Returns a new TracingTrack with a subset of the events.
-
- Args:
- pid: (int or None) Selects events from this PID.
- tid: (int or None) Selects events from this TID.
- categories: (set([str]) or None) Selects events belonging to one of the
- categories.
- """
- events = self._events
- if pid is not None:
- events = filter(lambda e : e.tracing_event['pid'] == pid, events)
- if tid is not None:
- events = filter(lambda e : e.tracing_event['tid'] == tid, events)
- if categories is not None:
- events = filter(
- lambda e : set(e.category.split(',')).intersection(categories),
- events)
- tracing_track = TracingTrack(None, clovis_constants.DEFAULT_CATEGORIES)
- tracing_track._events = events
- tracing_track._categories = self._categories
- if categories is not None:
- tracing_track._categories = self._categories.intersection(categories)
- return tracing_track
-
- def ToJsonDict(self):
- return {'categories': list(self._categories),
- 'events': [e.ToJsonDict() for e in self._events]}
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- if not json_dict:
- return None
- assert 'events' in json_dict
- events = [Event(e) for e in json_dict['events']]
- tracing_track = TracingTrack(None, clovis_constants.DEFAULT_CATEGORIES)
- tracing_track._categories = set(json_dict.get('categories', []))
- tracing_track._events = events
- tracing_track._base_msec = events[0].start_msec if events else 0
- for e in events[1:]:
- if e.type == 'M':
- continue # No timestamp for metadata events.
- assert e.start_msec > 0
- if e.start_msec < tracing_track._base_msec:
- tracing_track._base_msec = e.start_msec
- return tracing_track
-
- def OverlappingEvents(self, start_msec, end_msec):
- self._IndexEvents()
- return self._interval_tree.OverlappingEvents(start_msec, end_msec)
-
- def EventsEndingBetween(self, start_msec, end_msec):
- """Gets the list of events ending within an interval.
-
- Args:
- start_msec: the start of the range to query, in milliseconds, inclusive.
- end_msec: the end of the range to query, in milliseconds, inclusive.
-
- Returns:
- See OverlappingEvents() above.
- """
- overlapping_events = self.OverlappingEvents(start_msec, end_msec)
- return [e for e in overlapping_events
- if start_msec <= e.end_msec <= end_msec]
-
- def EventFromStep(self, step_event):
- """Returns the Event associated with a step event, or None.
-
- Args:
- step_event: (Event) Step event.
-
- Returns:
- an Event that matches the step event, or None.
- """
- self._IndexEvents()
- assert 'step' in step_event.args and step_event.tracing_event['ph'] == 'T'
- candidates = self._interval_tree.EventsAt(step_event.start_msec)
- for event in candidates:
- # IDs are only unique within a process (often they are pointers).
- if (event.pid == step_event.pid and event.tracing_event['ph'] != 'T'
- and event.name == step_event.name and event.id == step_event.id):
- return event
- return None
-
- def _IndexEvents(self, strict=False):
- if self._interval_tree:
- return
- complete_events = []
- spanning_events = self._SpanningEvents()
- for event in self._events:
- if not event.IsIndexable():
- continue
- if event.IsComplete():
- complete_events.append(event)
- continue
- matched_event = spanning_events.Match(event, strict)
- if matched_event is not None:
- complete_events.append(matched_event)
- self._interval_tree = _IntervalTree.FromEvents(complete_events)
-
- if strict and spanning_events.HasPending():
- raise devtools_monitor.DevToolsConnectionException(
- 'Pending spanning events: %s' %
- '\n'.join([str(e) for e in spanning_events.PendingEvents()]))
-
- def _GetEvents(self):
- self._IndexEvents()
- return self._interval_tree.GetEvents()
-
- def HasLoadingSucceeded(self):
- """Returns whether the loading has succeed at recording time."""
- main_frame_id = self.GetMainFrameRoutingID()
- for event in self.GetMatchingEvents(
- 'navigation', 'RenderFrameImpl::didFailProvisionalLoad'):
- if event.args['id'] == main_frame_id:
- return False
- for event in self.GetMatchingEvents(
- 'navigation', 'RenderFrameImpl::didFailLoad'):
- if event.args['id'] == main_frame_id:
- return False
- return True
-
- class _SpanningEvents(object):
- def __init__(self):
- self._duration_stack = []
- self._async_stacks = {}
- self._objects = {}
- self._MATCH_HANDLER = {
- 'B': self._DurationBegin,
- 'E': self._DurationEnd,
- 'b': self._AsyncStart,
- 'e': self._AsyncEnd,
- 'S': self._AsyncStart,
- 'F': self._AsyncEnd,
- 'N': self._ObjectCreated,
- 'D': self._ObjectDestroyed,
- 'M': self._Ignore,
- 'X': self._Ignore,
- 'R': self._Ignore,
- 'p': self._Ignore,
- '(': self._Ignore, # Context events.
- ')': self._Ignore, # Ditto.
- None: self._Ignore,
- }
-
- def Match(self, event, strict=False):
- return self._MATCH_HANDLER.get(
- event.type, self._Unsupported)(event, strict)
-
- def HasPending(self):
- return (self._duration_stack or
- self._async_stacks or
- self._objects)
-
- def PendingEvents(self):
- return itertools.chain(
- (e for e in self._duration_stack),
- (o for o in self._objects),
- itertools.chain.from_iterable((
- (e for e in s) for s in self._async_stacks.itervalues())))
-
- def _AsyncKey(self, event, _):
- return (event.tracing_event['cat'], event.id)
-
- def _Ignore(self, _event, _):
- return None
-
- def _Unsupported(self, event, _):
- raise devtools_monitor.DevToolsConnectionException(
- 'Unsupported spanning event type: %s' % event)
-
- def _DurationBegin(self, event, _):
- self._duration_stack.append(event)
- return None
-
- def _DurationEnd(self, event, _):
- if not self._duration_stack:
- raise devtools_monitor.DevToolsConnectionException(
- 'Unmatched duration end: %s' % event)
- start = self._duration_stack.pop()
- start.SetClose(event)
- return start
-
- def _AsyncStart(self, event, strict):
- key = self._AsyncKey(event, strict)
- self._async_stacks.setdefault(key, []).append(event)
- return None
-
- def _AsyncEnd(self, event, strict):
- key = self._AsyncKey(event, strict)
- if key not in self._async_stacks:
- message = 'Unmatched async end %s: %s' % (key, event)
- if strict:
- raise devtools_monitor.DevToolsConnectionException(message)
- else:
- logging.warning(message)
- return None
- stack = self._async_stacks[key]
- start = stack.pop()
- if not stack:
- del self._async_stacks[key]
- start.SetClose(event)
- return start
-
- def _ObjectCreated(self, event, _):
- # The tracing event format has object deletion timestamps being exclusive,
- # that is the timestamp for a deletion my equal that of the next create at
- # the same address. This asserts that does not happen in practice as it is
- # inconvenient to handle that correctly here.
- if event.id in self._objects:
- raise devtools_monitor.DevToolsConnectionException(
- 'Multiple objects at same address: %s, %s' %
- (event, self._objects[event.id]))
- self._objects[event.id] = event
- return None
-
- def _ObjectDestroyed(self, event, _):
- if event.id not in self._objects:
- raise devtools_monitor.DevToolsConnectionException(
- 'Missing object creation for %s' % event)
- start = self._objects[event.id]
- del self._objects[event.id]
- start.SetClose(event)
- return start
-
-
-class Event(object):
- """Wraps a tracing event."""
- CLOSING_EVENTS = {'E': 'B',
- 'e': 'b',
- 'F': 'S',
- 'D': 'N'}
- __slots__ = ('_tracing_event', 'start_msec', 'end_msec', '_synthetic')
- def __init__(self, tracing_event, synthetic=False):
- """Creates Event.
-
- Intended to be created only by TracingTrack.
-
- Args:
- tracing_event: JSON tracing event, as defined in https://goo.gl/Qabkqk.
- synthetic: True if the event is synthetic. This is only used for indexing
- internal to TracingTrack.
- """
- if not synthetic and tracing_event['ph'] in ['s', 't', 'f']:
- raise devtools_monitor.DevToolsConnectionException(
- 'Unsupported event: %s' % tracing_event)
-
- self._tracing_event = tracing_event
- # Note tracing event times are in microseconds.
- self.start_msec = tracing_event['ts'] / 1000.0
- self.end_msec = None
- self._synthetic = synthetic
- if self.type == 'X':
- # Some events don't have a duration.
- duration = (tracing_event['dur']
- if 'dur' in tracing_event else tracing_event['tdur'])
- self.end_msec = self.start_msec + duration / 1000.0
-
- @property
- def type(self):
- if self._synthetic:
- return None
- return self._tracing_event['ph']
-
- @property
- def category(self):
- return self._tracing_event['cat']
-
- @property
- def pid(self):
- return self._tracing_event['pid']
-
- @property
- def args(self):
- return self._tracing_event.get('args', {})
-
- @property
- def id(self):
- return self._tracing_event.get('id')
-
- @property
- def name(self):
- return self._tracing_event['name']
-
- @property
- def tracing_event(self):
- return self._tracing_event
-
- @property
- def synthetic(self):
- return self._synthetic
-
- def __str__(self):
- return ''.join([str(self._tracing_event),
- '[%s,%s]' % (self.start_msec, self.end_msec)])
-
- def Matches(self, category, name):
- """Match tracing events.
-
- Args:
- category: a tracing category (event['cat']).
- name: the tracing event name (event['name']).
-
- Returns:
- True if the event matches and False otherwise.
- """
- if name != self.name:
- return False
- categories = self.category.split(',')
- return category in categories
-
- def IsIndexable(self):
- """True iff the event can be indexed by time."""
- return self._synthetic or self.type not in [
- 'I', 'P', 'c', 'C',
- 'n', 'T', 'p', # TODO(mattcary): ?? instant types of async events.
- 'O', # TODO(mattcary): ?? object snapshot
- 'M' # Metadata
- ]
-
- def IsComplete(self):
- return self.type == 'X'
-
- def Synthesize(self):
- """Expand into synthetic events.
-
- Returns:
- A list of events, possibly some synthetic, whose start times are all
- interesting for purposes of indexing. If the event is not indexable the
- set may be empty.
- """
- if not self.IsIndexable():
- return []
- if self.IsComplete():
- # Tracing event timestamps are microseconds!
- return [self, Event({'ts': self.end_msec * 1000}, synthetic=True)]
- return [self]
-
- def SetClose(self, closing):
- """Close a spanning event.
-
- Args:
- closing: The closing event.
-
- Raises:
- devtools_monitor.DevToolsConnectionException if closing can't property
- close this event.
- """
- if self.type != self.CLOSING_EVENTS.get(closing.type):
- raise devtools_monitor.DevToolsConnectionException(
- 'Bad closing: %s --> %s' % (self, closing))
- if self.type in ['b', 'S'] and (
- self.tracing_event['cat'] != closing.tracing_event['cat'] or
- self.id != closing.id):
- raise devtools_monitor.DevToolsConnectionException(
- 'Bad async closing: %s --> %s' % (self, closing))
- self.end_msec = closing.start_msec
- if 'args' in closing.tracing_event:
- self.tracing_event.setdefault(
- 'args', {}).update(closing.tracing_event['args'])
-
- def ToJsonDict(self):
- return self._tracing_event
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- return Event(json_dict)
-
-
-class _IntervalTree(object):
- """Simple interval tree. This is not an optimal one, as the split is done with
- an equal number of events on each side, according to start time.
- """
- _TRESHOLD = 100
- def __init__(self, start, end, events):
- """Builds an interval tree.
-
- Args:
- start: start timestamp of this node, in ms.
- end: end timestamp covered by this node, in ms.
- events: Iterable of objects having start_msec and end_msec fields. Has to
- be sorted by start_msec.
- """
- self.start = start
- self.end = end
- self._events = events
- self._left = self._right = None
- if len(self._events) > self._TRESHOLD:
- self._Divide()
-
- @classmethod
- def FromEvents(cls, events):
- """Returns an IntervalTree instance from a list of events."""
- filtered_events = [e for e in events
- if e.start_msec is not None and e.end_msec is not None]
- filtered_events.sort(key=operator.attrgetter('start_msec'))
- start = min(event.start_msec for event in filtered_events)
- end = max(event.end_msec for event in filtered_events)
- return _IntervalTree(start, end, filtered_events)
-
- def OverlappingEvents(self, start, end):
- """Returns a set of events overlapping with [start, end)."""
- if min(end, self.end) - max(start, self.start) <= 0:
- return set()
- elif self._IsLeaf():
- result = set()
- for event in self._events:
- if self._Overlaps(event, start, end):
- result.add(event)
- return result
- else:
- return (self._left.OverlappingEvents(start, end)
- | self._right.OverlappingEvents(start, end))
-
- def EventsAt(self, timestamp):
- result = set()
- if self._IsLeaf():
- for event in self._events:
- if event.start_msec <= timestamp < event.end_msec:
- result.add(event)
- else:
- if self._left.start <= timestamp < self._left.end:
- result |= self._left.EventsAt(timestamp)
- if self._right.start <= timestamp < self._right.end:
- result |= self._right.EventsAt(timestamp)
- return result
-
- def GetEvents(self):
- return self._events
-
- def _Divide(self):
- middle = len(self._events) / 2
- left_events = self._events[:middle]
- right_events = self._events[middle:]
- left_end = max(e.end_msec for e in left_events)
- right_start = min(e.start_msec for e in right_events)
- self._left = _IntervalTree(self.start, left_end, left_events)
- self._right = _IntervalTree(right_start, self.end, right_events)
-
- def _IsLeaf(self):
- return self._left is None
-
- @classmethod
- def _Overlaps(cls, event, start, end):
- return (min(end, event.end_msec) - max(start, event.start_msec) > 0
- or start <= event.start_msec < end) # For instant events.
diff --git a/loading/tracing_track_unittest.py b/loading/tracing_track_unittest.py
deleted file mode 100644
index b10083f..0000000
--- a/loading/tracing_track_unittest.py
+++ /dev/null
@@ -1,493 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import collections
-import copy
-import logging
-import operator
-import unittest
-
-import devtools_monitor
-
-from tracing_track import (Event, TracingTrack, _IntervalTree)
-
-
-class TracingTrackTestCase(unittest.TestCase):
- _MIXED_EVENTS = [
- {'ts': 3, 'ph': 'N', 'id': 1, 'args': {'name': 'A'}},
- {'ts': 5, 'ph': 'X', 'dur': 1, 'args': {'name': 'B'}},
- {'ts': 7, 'ph': 'D', 'id': 1},
- {'ts': 10, 'ph': 'B', 'args': {'name': 'D'}},
- {'ts': 10, 'ph': 'b', 'cat': 'X', 'id': 1, 'args': {'name': 'C'}},
- {'ts': 11, 'ph': 'e', 'cat': 'X', 'id': 1},
- {'ts': 12, 'ph': 'E'},
- {'ts': 12, 'ph': 'N', 'id': 1, 'args': {'name': 'E'}},
- {'ts': 13, 'ph': 'b', 'cat': 'X', 'id': 2, 'args': {'name': 'F'}},
- {'ts': 14, 'ph': 'e', 'cat': 'X', 'id': 2},
- {'ts': 15, 'ph': 'D', 'id': 1}]
-
- _EVENTS = [
- {'ts': 5, 'ph': 'X', 'dur': 1, 'pid': 2, 'tid': 1, 'args': {'name': 'B'}},
- {'ts': 3, 'ph': 'X', 'dur': 4, 'pid': 2, 'tid': 1, 'args': {'name': 'A'}},
- {'ts': 10, 'ph': 'X', 'dur': 1, 'pid': 2, 'tid': 2,
- 'args': {'name': 'C'}},
- {'ts': 10, 'ph': 'X', 'dur': 2, 'pid': 2, 'tid': 2,
- 'args': {'name': 'D'}},
- {'ts': 13, 'ph': 'X', 'dur': 1, 'pid': 2, 'tid': 1,
- 'args': {'name': 'F'}},
- {'ts': 12, 'ph': 'X', 'dur': 3, 'pid': 2, 'tid': 1,
- 'args': {'name': 'E'}}]
-
- def setUp(self):
- self.tree_threshold = _IntervalTree._TRESHOLD
- _IntervalTree._TRESHOLD = 2 # Expose more edge cases in the tree.
- self.track = TracingTrack(None, ['A', 'B', 'C', 'D'])
-
- def tearDown(self):
- _IntervalTree._TRESHOLD = self.tree_threshold
-
- def EventToMicroseconds(self, event):
- result = copy.deepcopy(event)
- if 'ts' in result:
- result['ts'] *= 1000
- if 'dur' in result:
- result['dur'] *= 1000
- return result
-
- def CheckTrack(self, timestamp, names):
- self.track._IndexEvents(strict=True)
- self.assertEqual(
- set((e.args['name'] for e in self.track.EventsAt(timestamp))),
- set(names))
-
- def CheckIntervals(self, events):
- """All tests should produce the following sequence of intervals, each
- identified by a 'name' in the event args.
-
- Timestamp
- 3 | A
- 4 |
- 5 | | B
- 6 |
- 7
- ..
- 10 | | C, D
- 11 |
- 12 | E
- 13 | | F
- 14 |
- """
- self.track.Handle('Tracing.dataCollected',
- {'params': {'value': [self.EventToMicroseconds(e)
- for e in events]}})
- self.CheckTrack(0, '')
- self.CheckTrack(2, '')
- self.CheckTrack(3, 'A')
- self.CheckTrack(4, 'A')
- self.CheckTrack(5, 'AB')
- self.CheckTrack(6, 'A')
- self.CheckTrack(7, '')
- self.CheckTrack(9, '')
- self.CheckTrack(10, 'CD')
- self.CheckTrack(11, 'D')
- self.CheckTrack(12, 'E')
- self.CheckTrack(13, 'EF')
- self.CheckTrack(14, 'E')
- self.CheckTrack(15, '')
- self.CheckTrack(100, '')
-
- def testComplete(self):
- # These are deliberately out of order.
- self.CheckIntervals([
- {'ts': 5, 'ph': 'X', 'dur': 1, 'args': {'name': 'B'}},
- {'ts': 3, 'ph': 'X', 'dur': 4, 'args': {'name': 'A'}},
- {'ts': 10, 'ph': 'X', 'dur': 1, 'args': {'name': 'C'}},
- {'ts': 10, 'ph': 'X', 'dur': 2, 'args': {'name': 'D'}},
- {'ts': 13, 'ph': 'X', 'dur': 1, 'args': {'name': 'F'}},
- {'ts': 12, 'ph': 'X', 'dur': 3, 'args': {'name': 'E'}}])
-
- def testDuration(self):
- self.CheckIntervals([
- {'ts': 3, 'ph': 'B', 'args': {'name': 'A'}},
- {'ts': 5, 'ph': 'B', 'args': {'name': 'B'}},
- {'ts': 6, 'ph': 'E'},
- {'ts': 7, 'ph': 'E'},
- # Since async intervals aren't named and must be nested, we fudge the
- # beginning of D by a tenth to ensure it's consistently detected as the
- # outermost event.
- {'ts': 9.9, 'ph': 'B', 'args': {'name': 'D'}},
- {'ts': 10, 'ph': 'B', 'args': {'name': 'C'}},
- {'ts': 11, 'ph': 'E'},
- # End of D. As end times are exclusive this should not conflict with the
- # start of E.
- {'ts': 12, 'ph': 'E'},
- {'ts': 12, 'ph': 'B', 'args': {'name': 'E'}},
- {'ts': 13, 'ph': 'B', 'args': {'name': 'F'}},
- {'ts': 14, 'ph': 'E'},
- {'ts': 15, 'ph': 'E'}])
-
- def testBadDurationExtraBegin(self):
- self.assertRaises(devtools_monitor.DevToolsConnectionException,
- self.CheckIntervals,
- [{'ts': 3, 'ph': 'B'},
- {'ts': 4, 'ph': 'B'},
- {'ts': 5, 'ph': 'E'}])
-
- def testBadDurationExtraEnd(self):
- self.assertRaises(devtools_monitor.DevToolsConnectionException,
- self.CheckIntervals,
- [{'ts': 3, 'ph': 'B'},
- {'ts': 4, 'ph': 'E'},
- {'ts': 5, 'ph': 'E'}])
-
- def testAsync(self):
- self.CheckIntervals([
- # A, B and F have the same category/id (so that A & B nest); C-E do not.
- {'ts': 3, 'ph': 'b', 'cat': 'A', 'id': 1, 'args': {'name': 'A'}},
- {'ts': 5, 'ph': 'b', 'cat': 'A', 'id': 1, 'args': {'name': 'B'}},
- # Not indexable.
- {'ts': 4, 'ph': 'n', 'cat': 'A', 'id': 1, 'args': {'name': 'A'}},
- {'ts': 6, 'ph': 'e', 'cat': 'A', 'id': 1},
- {'ts': 7, 'ph': 'e', 'cat': 'A', 'id': 1},
- {'ts': 10, 'ph': 'b', 'cat': 'B', 'id': 2, 'args': {'name': 'D'}},
- {'ts': 10, 'ph': 'b', 'cat': 'B', 'id': 3, 'args': {'name': 'C'}},
- {'ts': 11, 'ph': 'e', 'cat': 'B', 'id': 3},
- {'ts': 12, 'ph': 'e', 'cat': 'B', 'id': 2},
- {'ts': 12, 'ph': 'b', 'cat': 'A', 'id': 2, 'args': {'name': 'E'}},
- {'ts': 13, 'ph': 'b', 'cat': 'A', 'id': 1, 'args': {'name': 'F'}},
- {'ts': 14, 'ph': 'e', 'cat': 'A', 'id': 1},
- {'ts': 15, 'ph': 'e', 'cat': 'A', 'id': 2}])
-
- def testBadAsyncIdMismatch(self):
- self.assertRaises(
- devtools_monitor.DevToolsConnectionException,
- self.CheckIntervals,
- [{'ts': 3, 'ph': 'b', 'cat': 'A', 'id': 1, 'args': {'name': 'A'}},
- {'ts': 5, 'ph': 'b', 'cat': 'A', 'id': 1, 'args': {'name': 'B'}},
- {'ts': 6, 'ph': 'e', 'cat': 'A', 'id': 2},
- {'ts': 7, 'ph': 'e', 'cat': 'A', 'id': 1}])
-
- def testBadAsyncExtraBegin(self):
- self.assertRaises(
- devtools_monitor.DevToolsConnectionException,
- self.CheckIntervals,
- [{'ts': 3, 'ph': 'b', 'cat': 'A', 'id': 1, 'args': {'name': 'A'}},
- {'ts': 5, 'ph': 'b', 'cat': 'A', 'id': 1, 'args': {'name': 'B'}},
- {'ts': 6, 'ph': 'e', 'cat': 'A', 'id': 1}])
-
- def testBadAsyncExtraEnd(self):
- self.assertRaises(
- devtools_monitor.DevToolsConnectionException,
- self.CheckIntervals,
- [{'ts': 3, 'ph': 'b', 'cat': 'A', 'id': 1, 'args': {'name': 'A'}},
- {'ts': 5, 'ph': 'e', 'cat': 'A', 'id': 1},
- {'ts': 6, 'ph': 'e', 'cat': 'A', 'id': 1}])
-
- def testObject(self):
- # A and E share ids, which is okay as their scopes are disjoint.
- self.CheckIntervals([
- {'ts': 3, 'ph': 'N', 'id': 1, 'args': {'name': 'A'}},
- {'ts': 5, 'ph': 'N', 'id': 2, 'args': {'name': 'B'}},
- {'ts': 6, 'ph': 'D', 'id': 2},
- {'ts': 6, 'ph': 'O', 'id': 2}, # Ignored.
- {'ts': 7, 'ph': 'D', 'id': 1},
- {'ts': 10, 'ph': 'N', 'id': 3, 'args': {'name': 'D'}},
- {'ts': 10, 'ph': 'N', 'id': 4, 'args': {'name': 'C'}},
- {'ts': 11, 'ph': 'D', 'id': 4},
- {'ts': 12, 'ph': 'D', 'id': 3},
- {'ts': 12, 'ph': 'N', 'id': 1, 'args': {'name': 'E'}},
- {'ts': 13, 'ph': 'N', 'id': 5, 'args': {'name': 'F'}},
- {'ts': 14, 'ph': 'D', 'id': 5},
- {'ts': 15, 'ph': 'D', 'id': 1}])
-
- def testMixed(self):
- # A and E are objects, B complete, D a duration, and C and F async.
- self.CheckIntervals(self._MIXED_EVENTS)
-
- def testEventSerialization(self):
- for e in self._MIXED_EVENTS:
- event = Event(e)
- json_dict = event.ToJsonDict()
- deserialized_event = Event.FromJsonDict(json_dict)
- self.assertEquals(
- event.tracing_event, deserialized_event.tracing_event)
-
- def testTracingTrackSerialization(self):
- self._HandleEvents(self._MIXED_EVENTS)
- json_dict = self.track.ToJsonDict()
- self.assertTrue('events' in json_dict)
- deserialized_track = TracingTrack.FromJsonDict(json_dict)
- self.assertEquals(
- len(self.track._events), len(deserialized_track._events))
- for (e1, e2) in zip(self.track._events, deserialized_track._events):
- self.assertEquals(e1.tracing_event, e2.tracing_event)
-
- def testEventsEndingBetween(self):
- self._HandleEvents(self._EVENTS)
- self.assertEqual(set('ABCDEF'),
- set([e.args['name']
- for e in self.track.EventsEndingBetween(0, 100)]))
- self.assertFalse([e.args['name']
- for e in self.track.EventsEndingBetween(3, 5)])
- self.assertTrue('B' in set([e.args['name']
- for e in self.track.EventsEndingBetween(3, 6)]))
- self.assertEqual(set('B'),
- set([e.args['name']
- for e in self.track.EventsEndingBetween(3, 6)]))
-
- def testOverlappingEvents(self):
- self._HandleEvents(self._EVENTS)
- self.assertEqual(set('ABCDEF'),
- set([e.args['name']
- for e in self.track.OverlappingEvents(0, 100)]))
- self.assertFalse([e.args['name']
- for e in self.track.OverlappingEvents(0, 2)])
- self.assertEqual(set('BA'),
- set([e.args['name']
- for e in self.track.OverlappingEvents(4, 5.1)]))
- self.assertEqual(set('ACD'),
- set([e.args['name']
- for e in self.track.OverlappingEvents(6, 10.1)]))
-
- def testEventFromStep(self):
- events = [
- {'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 2, 'tid': 1, 'id': '0x123',
- 'name': 'B'},
- {'ts': 5, 'ph': 'X', 'dur': 2, 'pid': 2, 'tid': 1, 'id': '0x12343',
- 'name': 'A'}]
- step_events = [{'ts': 6, 'ph': 'T', 'pid': 2, 'tid': 1, 'id': '0x123',
- 'name': 'B', 'args': {'step': 'Bla'}},
- {'ts': 4, 'ph': 'T', 'pid': 2, 'tid': 1, 'id': '0x123',
- 'name': 'B', 'args': {'step': 'Bla'}},
- {'ts': 6, 'ph': 'T', 'pid': 12, 'tid': 1, 'id': '0x123',
- 'name': 'B', 'args': {'step': 'Bla'}},
- {'ts': 6, 'ph': 'T', 'pid': 2, 'tid': 1, 'id': '0x1234',
- 'name': 'B', 'args': {'step': 'Bla'}},
- {'ts': 6, 'ph': 'T', 'pid': 2, 'tid': 1, 'id': '0x123',
- 'name': 'A', 'args': {'step': 'Bla'}},
- {'ts': 6, 'ph': 'n', 'pid': 2, 'tid': 1, 'id': '0x123',
- 'name': 'B', 'args': {'step': 'Bla'}},
- {'ts': 6, 'ph': 'n', 'pid': 2, 'tid': 1, 'id': '0x123',
- 'name': 'B', 'args': {}}]
- self._HandleEvents(events + step_events)
- trace_events = self.track.GetEvents()
- self.assertEquals(9, len(trace_events))
- # pylint: disable=unbalanced-tuple-unpacking
- (event, _, step_event, outside, wrong_pid, wrong_id, wrong_name,
- wrong_phase, no_step) = trace_events
- self.assertEquals(event, self.track.EventFromStep(step_event))
- self.assertIsNone(self.track.EventFromStep(outside))
- self.assertIsNone(self.track.EventFromStep(wrong_pid))
- self.assertIsNone(self.track.EventFromStep(wrong_id))
- self.assertIsNone(self.track.EventFromStep(wrong_name))
- # Invalid events
- with self.assertRaises(AssertionError):
- self.track.EventFromStep(wrong_phase)
- with self.assertRaises(AssertionError):
- self.track.EventFromStep(no_step)
-
- def testFilterPidTid(self):
- self._HandleEvents(self._EVENTS)
- tracing_track = self.track.Filter(2, 1)
- self.assertTrue(tracing_track is not self.track)
- self.assertEquals(4, len(tracing_track.GetEvents()))
- tracing_track = self.track.Filter(2, 42)
- self.assertEquals(0, len(tracing_track.GetEvents()))
-
- def testGetMainFrameID(self):
- _MAIN_FRAME_ID = 0xffff
- _SUBFRAME_ID = 0xaaaa
- events = [
- {'ts': 7, 'ph': 'X', 'dur': 10, 'pid': 2, 'tid': 1, 'id': '0x123',
- 'name': 'navigationStart', 'cat': 'blink.user_timing',
- 'args': {'frame': _SUBFRAME_ID}},
- {'ts': 8, 'ph': 'X', 'dur': 2, 'pid': 2, 'tid': 1, 'id': '0x12343',
- 'name': 'A'},
- {'ts': 3, 'ph': 'X', 'dur': 10, 'pid': 2, 'tid': 1, 'id': '0x125',
- 'name': 'navigationStart', 'cat': 'blink.user_timing',
- 'args': {'frame': _MAIN_FRAME_ID}},
- ]
- self._HandleEvents(events)
- self.assertEquals(_MAIN_FRAME_ID, self.track.GetMainFrameID())
-
- def testGetMatchingEvents(self):
- _MAIN_FRAME_ID = 0xffff
- _SUBFRAME_ID = 0xaaaa
- events = [
- {'ts': 7, 'ph': 'X', 'dur': 10, 'pid': 2, 'tid': 1, 'id': '0x123',
- 'name': 'navigationStart', 'cat': 'blink.user_timing',
- 'args': {'frame': _SUBFRAME_ID}},
- {'ts': 8, 'ph': 'X', 'dur': 2, 'pid': 2, 'tid': 1, 'id': '0x12343',
- 'name': 'A'},
- {'ts': 3, 'ph': 'X', 'dur': 10, 'pid': 2, 'tid': 1, 'id': '0x125',
- 'name': 'navigationStart', 'cat': 'blink.user_timing',
- 'args': {'frame': _MAIN_FRAME_ID}},
- ]
- self._HandleEvents(events)
- matching_events = self.track.GetMatchingEvents('blink.user_timing',
- 'navigationStart')
- self.assertEquals(2, len(matching_events))
- self.assertListEqual([self.track.GetEvents()[0],
- self.track.GetEvents()[2]], matching_events)
-
- matching_main_frame_events = self.track.GetMatchingMainFrameEvents(
- 'blink.user_timing', 'navigationStart')
- self.assertEquals(1, len(matching_main_frame_events))
- self.assertListEqual([self.track.GetEvents()[2]],
- matching_main_frame_events)
-
- def testFilterCategories(self):
- events = [
- {'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 2, 'tid': 1, 'cat': 'A'},
- {'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 2, 'tid': 1, 'cat': 'B'},
- {'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 2, 'tid': 1, 'cat': 'C,D'},
- {'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 2, 'tid': 1, 'cat': 'A,B,C,D'}]
- self._HandleEvents(events)
- tracing_events = self.track.GetEvents()
- self.assertEquals(4, len(tracing_events))
- filtered_events = self.track.Filter(categories=None).GetEvents()
- self.assertListEqual(tracing_events, filtered_events)
- filtered_events = self.track.Filter(categories=set(['A'])).GetEvents()
- self.assertEquals(2, len(filtered_events))
- self.assertListEqual([tracing_events[0], tracing_events[3]],
- filtered_events)
- filtered_events = self.track.Filter(categories=set(['Z'])).GetEvents()
- self.assertEquals(0, len(filtered_events))
- filtered_events = self.track.Filter(categories=set(['B', 'C'])).GetEvents()
- self.assertEquals(3, len(filtered_events))
- self.assertListEqual(tracing_events[1:], filtered_events)
- self.assertSetEqual(
- set('A'), self.track.Filter(categories=set('A')).Categories())
-
- def testHasLoadingSucceeded(self):
- cat = 'navigation'
- on_navigate = 'RenderFrameImpl::OnNavigate'
- fail_provisional = 'RenderFrameImpl::didFailProvisionalLoad'
- fail_load = 'RenderFrameImpl::didFailLoad'
-
- track = TracingTrack.FromJsonDict({'categories': [cat], 'events': []})
- with self.assertRaises(AssertionError):
- track.HasLoadingSucceeded()
-
- track = TracingTrack.FromJsonDict({'categories': [cat], 'events': [
- {'cat': cat, 'name': on_navigate, 'args': {'id': 1},
- 'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 1, 'tid': 1}]})
- self.assertTrue(track.HasLoadingSucceeded())
-
- track = TracingTrack.FromJsonDict({'categories': [cat], 'events': [
- {'cat': cat, 'name': on_navigate, 'args': {'id': 1},
- 'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 1, 'tid': 1},
- {'cat': cat, 'name': on_navigate, 'args': {'id': 2},
- 'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 1, 'tid': 1},
- {'cat': cat, 'name': fail_provisional, 'args': {'id': 2},
- 'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 1, 'tid': 1}]})
- self.assertTrue(track.HasLoadingSucceeded())
-
- track = TracingTrack.FromJsonDict({'categories': [cat], 'events': [
- {'cat': cat, 'name': on_navigate, 'args': {'id': 1},
- 'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 1, 'tid': 1},
- {'cat': cat, 'name': fail_provisional, 'args': {'id': 1},
- 'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 1, 'tid': 1}]})
- self.assertFalse(track.HasLoadingSucceeded())
-
- track = TracingTrack.FromJsonDict({'categories': [cat], 'events': [
- {'cat': cat, 'name': on_navigate, 'args': {'id': 1},
- 'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 1, 'tid': 1},
- {'cat': cat, 'name': fail_load, 'args': {'id': 1},
- 'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 1, 'tid': 1}]})
- self.assertFalse(track.HasLoadingSucceeded())
-
- track = TracingTrack.FromJsonDict({'categories': [cat], 'events': [
- {'cat': cat, 'name': on_navigate, 'args': {'id': 1},
- 'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 1, 'tid': 1},
- {'cat': cat, 'name': fail_load, 'args': {'id': 1},
- 'ts': 5, 'ph': 'X', 'dur': 10, 'pid': 1, 'tid': 1}]})
- self.assertFalse(track.HasLoadingSucceeded())
-
- def _HandleEvents(self, events):
- self.track.Handle('Tracing.dataCollected', {'params': {'value': [
- self.EventToMicroseconds(e) for e in events]}})
-
-
-class IntervalTreeTestCase(unittest.TestCase):
- class FakeEvent(object):
- def __init__(self, start_msec, end_msec):
- self.start_msec = start_msec
- self.end_msec = end_msec
-
- def __eq__(self, o):
- return self.start_msec == o.start_msec and self.end_msec == o.end_msec
-
- _COUNT = 1000
-
- def testCreateTree(self):
- events = [self.FakeEvent(100 * i, 100 * (i + 1))
- for i in range(self._COUNT)]
- tree = _IntervalTree.FromEvents(events)
- self.assertEquals(0, tree.start)
- self.assertEquals(100 * self._COUNT, tree.end)
- self.assertFalse(tree._IsLeaf())
-
- def testEventsAt(self):
- events = ([self.FakeEvent(100 * i, 100 * (i + 1))
- for i in range(self._COUNT)]
- + [self.FakeEvent(100 * i + 50, 100 * i + 150)
- for i in range(self._COUNT)])
- tree = _IntervalTree.FromEvents(events)
- self.assertEquals(0, tree.start)
- self.assertEquals(100 * self._COUNT + 50, tree.end)
- self.assertFalse(tree._IsLeaf())
- for i in range(self._COUNT):
- self.assertEquals(2, len(tree.EventsAt(100 * i + 75)))
- # Add instant events, check that they are excluded.
- events += [self.FakeEvent(100 * i + 75, 100 * i + 75)
- for i in range(self._COUNT)]
- tree = _IntervalTree.FromEvents(events)
- self.assertEquals(3 * self._COUNT, len(tree._events))
- for i in range(self._COUNT):
- self.assertEquals(2, len(tree.EventsAt(100 * i + 75)))
-
- def testOverlappingEvents(self):
- events = ([self.FakeEvent(100 * i, 100 * (i + 1))
- for i in range(self._COUNT)]
- + [self.FakeEvent(100 * i + 50, 100 * i + 150)
- for i in range(self._COUNT)])
- tree = _IntervalTree.FromEvents(events)
- self.assertEquals(0, tree.start)
- self.assertEquals(100 * self._COUNT + 50, tree.end)
- self.assertFalse(tree._IsLeaf())
- # 400 -> 500, 450 -> 550, 500 -> 600
- self.assertEquals(3, len(tree.OverlappingEvents(450, 550)))
- overlapping = sorted(
- tree.OverlappingEvents(450, 550), key=operator.attrgetter('start_msec'))
- self.assertEquals(self.FakeEvent(400, 500), overlapping[0])
- self.assertEquals(self.FakeEvent(450, 550), overlapping[1])
- self.assertEquals(self.FakeEvent(500, 600), overlapping[2])
- self.assertEquals(8, len(tree.OverlappingEvents(450, 800)))
- # Add instant events, check that they are included.
- events += [self.FakeEvent(500, 500) for i in range(10)]
- tree = _IntervalTree.FromEvents(events)
- self.assertEquals(3 + 10, len(tree.OverlappingEvents(450, 550)))
- self.assertEquals(8 + 10, len(tree.OverlappingEvents(450, 800)))
-
- def testEventMatches(self):
- event = Event({'name': 'foo',
- 'cat': 'bar',
- 'ph': 'X',
- 'ts': 0, 'dur': 0})
- self.assertTrue(event.Matches('bar', 'foo'))
- self.assertFalse(event.Matches('bar', 'biz'))
- self.assertFalse(event.Matches('biz', 'foo'))
-
- event = Event({'name': 'foo',
- 'cat': 'bar,baz,bizbiz',
- 'ph': 'X',
- 'ts': 0, 'dur': 0})
- self.assertTrue(event.Matches('bar', 'foo'))
- self.assertTrue(event.Matches('baz', 'foo'))
- self.assertFalse(event.Matches('bar', 'biz'))
- self.assertFalse(event.Matches('biz', 'foo'))
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/unmaintained/README.md b/loading/unmaintained/README.md
deleted file mode 100644
index 295323b..0000000
--- a/loading/unmaintained/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-This directory contains unmaintained code that still has value, such as
-experimental or temporary scripts.
diff --git a/loading/unmaintained/gce_validation_collect.sh b/loading/unmaintained/gce_validation_collect.sh
deleted file mode 100755
index ec85dca..0000000
--- a/loading/unmaintained/gce_validation_collect.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-# Takes a list of URLs (infile), and runs analyse.py on them in parallel on a
-# device and on GCE, in a sychronized manner (the task is started on both
-# platforms at the same time).
-
-infile=$1
-outdir=$2
-instance_ip=$3
-repeat_count=$4
-
-for site in $(< $infile); do
- echo $site
- output_subdir=$(echo "$site"|tr "/:" "_")
- echo 'Start remote task'
- cat >urls.json << EOF
- {
- "urls" : [
- "$site"
- ],
- "repeat_count" : "$repeat_count",
- "emulate_device" : "Nexus 4"
- }
-EOF
-
- while [ "$(curl http://$instance_ip:8080/status)" != "Idle" ]; do
- echo 'Waiting for instance to be ready, retry in 5s'
- sleep 5
- done
- curl -X POST -d @urls.json http://$instance_ip:8080/set_tasks
-
- echo 'Run on device'
- mkdir $outdir/$output_subdir
- for ((run=0;run<$repeat_count;++run)); do
- echo '****' $run
- tools/android/loading/analyze.py log_requests \
- --devtools_port 9222 \
- --url $site \
- --output $outdir/${output_subdir}/${run}
- if [ $? -ne 0 ]; then
- echo "Analyze failed. Wait a bit for device to recover."
- sleep 3
- fi
- done
-done
diff --git a/loading/unmaintained/gce_validation_compare.sh b/loading/unmaintained/gce_validation_compare.sh
deleted file mode 100755
index 88e26df..0000000
--- a/loading/unmaintained/gce_validation_compare.sh
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/bin/bash
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-# gce_validation_compare.sh rootdir compare_filename
-# root_dir: root directory for the experiment.
-# compare_filename: file where the comparison breakdown is output.
-#
-# Computes core sets from GCE and device experiment resutls, and compare them.
-# The expected directory structure is:
-#
-# root_dir/
-# cloud/
-# url1/ # Can be any name as long as it is mirrored under device/.
-# run1.trace # Can be any name.
-# run2.trace
-# ...
-# url2/
-# ...
-# device/
-# url1/
-# run1.trace
-# run2.trace
-# ...
-# url2/
-# ...
-
-root_dir=$1
-compare_filename=$2
-
-rm $compare_filename
-
-# Check directory structure.
-if [ ! -d $root_dir/cloud ]; then
- echo "$root_dir/cloud missing!"
- exit 1
-fi
-
-if [ ! -d $root_dir/device ]; then
- echo "$root_dir/device missing!"
- exit 1
-fi
-
-for device_file in $root_dir/device/*/ ; do
- cloud_file=$root_dir/cloud/$(basename $device_file)
- if [ ! -d $cloud_file ]; then
- echo "$cloud_file not found"
- fi
-done
-
-for cloud_file in $root_dir/cloud/*/ ; do
- device_file=$root_dir/device/$(basename $device_file)
- if [ ! -d $device_file ]; then
- echo "$device_file not found"
- fi
-done
-
-# Loop through all the subdirectories, compute the core sets and compare them.
-for device_file in $root_dir/device/*/ ; do
- base_name=$(basename $device_file)
- python tools/android/loading/core_set.py page_core --sets device/$base_name \
- --output $device_file/core_set.json --prefix $device_file
-
- cloud_file=$root_dir/cloud/$base_name
- if [ -d $cloud_file ]; then
- python tools/android/loading/core_set.py page_core --sets cloud/$base_name \
- --output $cloud_file/core_set.json --prefix $cloud_file
-
- compare_result=$(python tools/android/loading/core_set.py compare \
- --a $cloud_file/core_set.json --b $device_file/core_set.json)
- compare_result+=" $base_name"
- echo $compare_result >> $compare_filename
- fi
-done
diff --git a/loading/user_satisfied_lens.py b/loading/user_satisfied_lens.py
deleted file mode 100644
index 2e0b4be..0000000
--- a/loading/user_satisfied_lens.py
+++ /dev/null
@@ -1,253 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Identifies key events related to user satisfaction.
-
-Several lenses are defined, for example FirstTextPaintLens and
-FirstSignificantPaintLens.
-
-When run from the command line, takes a lens name and a trace, and prints the
-fingerprints of the critical resources to stdout.
-"""
-import logging
-import operator
-
-import common_util
-
-
-class _UserSatisfiedLens(object):
- """A base class for all user satisfaction metrics.
-
- All of these work by identifying a user satisfaction event from the trace, and
- then building a set of request ids whose loading is needed to achieve that
- event. Subclasses need only provide the time computation. The base class will
- use that to construct the request ids.
- """
- _ATTRS = ['_satisfied_msec', '_event_msec', '_postload_msec',
- '_critical_request_ids']
-
- def CriticalRequests(self):
- """Critical requests.
-
- Returns:
- A sequence of request_track.Request objects representing an estimate of
- all requests that are necessary for the user satisfaction defined by this
- class.
- """
- raise NotImplementedError
-
- def CriticalRequestIds(self):
- """Ids of critical requests."""
- return set(rq.request_id for rq in self.CriticalRequests())
-
- def CriticalFingerprints(self):
- """Fingerprints of critical requests."""
- return set(rq.fingerprint for rq in self.CriticalRequests())
-
- def PostloadTimeMsec(self):
- """Return postload time.
-
- The postload time is an estimate of the amount of time needed by chrome to
- transform the critical results into the satisfying event.
-
- Returns:
- Postload time in milliseconds.
- """
- return 0
-
- def SatisfiedMs(self):
- """Returns user satisfied timestamp, in ms.
-
- This is *not* a unix timestamp. It is relative to the same point in time
- as the request_time field in request_track.Timing.
- """
- return self._satisfied_msec
-
- @classmethod
- def RequestsBefore(cls, request_track, time_ms):
- return [rq for rq in request_track.GetEvents()
- if rq.end_msec <= time_ms]
-
-
-class PLTLens(_UserSatisfiedLens):
- """A lens built using page load time (PLT) as the metric of user satisfaction.
- """
- def __init__(self, trace):
- self._satisfied_msec = PLTLens._ComputePlt(trace)
- self._critical_requests = _UserSatisfiedLens.RequestsBefore(
- trace.request_track, self._satisfied_msec)
-
- def CriticalRequests(self):
- return self._critical_requests
-
- @classmethod
- def _ComputePlt(cls, trace):
- mark_load_events = trace.tracing_track.GetMatchingEvents(
- 'devtools.timeline', 'MarkLoad')
- # Some traces contain several load events for the main frame.
- main_frame_load_events = filter(
- lambda e: e.args['data']['isMainFrame'], mark_load_events)
- if main_frame_load_events:
- return max(e.start_msec for e in main_frame_load_events)
- # Main frame onLoad() didn't finish. Take the end of the last completed
- # request.
- return max(r.end_msec or -1 for r in trace.request_track.GetEvents())
-
-
-class RequestFingerprintLens(_UserSatisfiedLens):
- """A lens built using requests in a trace that match a set of fingerprints."""
- def __init__(self, trace, fingerprints):
- fingerprints = set(fingerprints)
- self._critical_requests = [rq for rq in trace.request_track.GetEvents()
- if rq.fingerprint in fingerprints]
-
- def CriticalRequests(self):
- """Ids of critical requests."""
- return set(self._critical_requests)
-
-
-class _FirstEventLens(_UserSatisfiedLens):
- """Helper abstract subclass that defines users first event manipulations."""
- # pylint can't handle abstract subclasses.
- # pylint: disable=abstract-method
-
- def __init__(self, trace):
- """Initialize the lens.
-
- Args:
- trace: (LoadingTrace) the trace to use in the analysis.
- """
- self._satisfied_msec = None
- self._event_msec = None
- self._postload_msec = None
- self._critical_request_ids = None
- if trace is None:
- return
- self._CalculateTimes(trace)
- self._critical_requests = _UserSatisfiedLens.RequestsBefore(
- trace.request_track, self._satisfied_msec)
- self._critical_request_ids = set(rq.request_id
- for rq in self._critical_requests)
- if self._critical_requests:
- last_load = max(rq.end_msec for rq in self._critical_requests)
- else:
- last_load = float('inf')
- self._postload_msec = self._event_msec - last_load
-
- def CriticalRequests(self):
- """Override."""
- return self._critical_requests
-
- def PostloadTimeMsec(self):
- """Override."""
- return self._postload_msec
-
- def ToJsonDict(self):
- return common_util.SerializeAttributesToJsonDict({}, self, self._ATTRS)
-
- @classmethod
- def FromJsonDict(cls, json_dict):
- result = cls(None)
- return common_util.DeserializeAttributesFromJsonDict(
- json_dict, result, cls._ATTRS)
-
- def _CalculateTimes(self, trace):
- """Subclasses should implement to set _satisfied_msec and _event_msec."""
- raise NotImplementedError
-
- @classmethod
- def _CheckCategory(cls, tracing_track, category):
- assert category in tracing_track.Categories(), (
- 'The "%s" category must be enabled.' % category)
-
- @classmethod
- def _ExtractBestTiming(cls, times):
- if not times:
- return float('inf')
- assert len(times) == 1, \
- 'Unexpected duplicate {}: {} with spread of {}'.format(
- str(cls), len(times), max(times) - min(times))
- return float(max(times))
-
-
-class FirstTextPaintLens(_FirstEventLens):
- """Define satisfaction by the first text paint.
-
- This event is taken directly from a trace.
- """
- _EVENT_CATEGORY = 'blink.user_timing'
- def _CalculateTimes(self, trace):
- self._CheckCategory(trace.tracing_track, self._EVENT_CATEGORY)
- first_paints = [
- e.start_msec for e in trace.tracing_track.GetMatchingMainFrameEvents(
- 'blink.user_timing', 'firstPaint')]
- self._satisfied_msec = self._event_msec = \
- self._ExtractBestTiming(first_paints)
-
-
-class FirstContentfulPaintLens(_FirstEventLens):
- """Define satisfaction by the first contentful paint.
-
- This event is taken directly from a trace. Internally to chrome it's computed
- by filtering out things like background paint from firstPaint.
- """
- _EVENT_CATEGORY = 'blink.user_timing'
- def _CalculateTimes(self, trace):
- self._CheckCategory(trace.tracing_track, self._EVENT_CATEGORY)
- first_paints = [
- e.start_msec for e in trace.tracing_track.GetMatchingMainFrameEvents(
- 'blink.user_timing', 'firstContentfulPaint')]
- self._satisfied_msec = self._event_msec = \
- self._ExtractBestTiming(first_paints)
-
-
-class FirstSignificantPaintLens(_FirstEventLens):
- """Define satisfaction by the first paint after a big layout change.
-
- Our satisfaction time is that of the layout change, as all resources must have
- been loaded to compute the layout. Our event time is that of the next paint as
- that is the observable event.
- """
- _FIRST_LAYOUT_COUNTER = 'LayoutObjectsThatHadNeverHadLayout'
- _EVENT_CATEGORIES = ['blink', 'disabled-by-default-blink.debug.layout']
- def _CalculateTimes(self, trace):
- for cat in self._EVENT_CATEGORIES:
- self._CheckCategory(trace.tracing_track, cat)
- paint_tree_times = []
- layouts = [] # (layout item count, msec).
- for e in trace.tracing_track.GetEvents():
- if ('frame' in e.args and
- e.args['frame'] != trace.tracing_track.GetMainFrameID()):
- continue
- # If we don't know have a frame id, we assume it applies to all events.
-
- if e.Matches('blink', 'FrameView::paintTree'):
- paint_tree_times.append(e.start_msec)
- if ('counters' in e.args and
- self._FIRST_LAYOUT_COUNTER in e.args['counters']):
- layouts.append((e.args['counters'][self._FIRST_LAYOUT_COUNTER],
- e.start_msec))
- assert layouts, 'No layout events'
- assert paint_tree_times,'No paintTree times'
- layouts.sort(key=operator.itemgetter(0), reverse=True)
- self._satisfied_msec = layouts[0][1]
- self._event_msec = min(t for t in paint_tree_times
- if t > self._satisfied_msec)
-
-
-def main(lens_name, trace_file):
- assert (lens_name in globals() and
- not lens_name.startswith('_') and
- lens_name.endswith('Lens')), 'Bad lens %s' % lens_name
- lens_cls = globals()[lens_name]
- trace = loading_trace.LoadingTrace.FromJsonFile(trace_file)
- lens = lens_cls(trace)
- for fp in sorted(lens.CriticalFingerprints()):
- print fp
-
-
-if __name__ == '__main__':
- import sys
- import loading_trace
- main(sys.argv[1], sys.argv[2])
diff --git a/loading/user_satisfied_lens_unittest.py b/loading/user_satisfied_lens_unittest.py
deleted file mode 100644
index ed6c753..0000000
--- a/loading/user_satisfied_lens_unittest.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import unittest
-
-import request_track
-import test_utils
-import user_satisfied_lens
-
-
-class TraceCreator(object):
- def __init__(self):
- self._request_index = 1
-
- def RequestAt(self, timestamp_msec, duration=1):
- timestamp_sec = float(timestamp_msec) / 1000
- rq = request_track.Request.FromJsonDict({
- 'url': 'http://bla-%s-.com' % timestamp_msec,
- 'request_id': '0.%s' % self._request_index,
- 'frame_id': '123.%s' % timestamp_msec,
- 'initiator': {'type': 'other'},
- 'timestamp': timestamp_sec,
- 'timing': {'request_time': timestamp_sec,
- 'loading_finished': duration}
- })
- self._request_index += 1
- return rq
-
- def CreateTrace(self, requests, events, main_frame_id):
- loading_trace = test_utils.LoadingTraceFromEvents(
- requests, trace_events=events)
- loading_trace.tracing_track.SetMainFrameID(main_frame_id)
- loading_trace.url = 'http://www.dummy.com'
- return loading_trace
-
-
-class UserSatisfiedLensTestCase(unittest.TestCase):
- # We track all times in milliseconds, but raw trace events are in
- # microseconds.
- MILLI_TO_MICRO = 1000
-
- def setUp(self):
- super(UserSatisfiedLensTestCase, self).setUp()
-
- def testPLTLens(self):
- MAINFRAME = 1
- trace_creator = test_utils.TraceCreator()
- requests = [trace_creator.RequestAt(1), trace_creator.RequestAt(10),
- trace_creator.RequestAt(20)]
- loading_trace = trace_creator.CreateTrace(
- requests,
- [{'ts': 5 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'devtools.timeline', 'pid': 1, 'tid': 1,
- 'name': 'MarkLoad',
- 'args': {'data': {'isMainFrame': True}}},
- {'ts': 10 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'devtools.timeline', 'pid': 1, 'tid': 1,
- 'name': 'MarkLoad',
- 'args': {'data': {'isMainFrame': True}}},
- {'ts': 20 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'devtools.timeline', 'pid': 1, 'tid': 1,
- 'name': 'MarkLoad',
- 'args': {'data': {'isMainFrame': False}}}], MAINFRAME)
- lens = user_satisfied_lens.PLTLens(loading_trace)
- self.assertEqual(set(['0.1']), lens.CriticalRequestIds())
- self.assertEqual(10, lens.SatisfiedMs())
-
- def testFirstContentfulPaintLens(self):
- MAINFRAME = 1
- SUBFRAME = 2
- trace_creator = test_utils.TraceCreator()
- requests = [trace_creator.RequestAt(1), trace_creator.RequestAt(10),
- trace_creator.RequestAt(20)]
- loading_trace = trace_creator.CreateTrace(
- requests,
- [{'ts': 0, 'ph': 'I',
- 'cat': 'blink.some_other_user_timing',
- 'name': 'firstContentfulPaint'},
- {'ts': 30 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink.user_timing',
- 'name': 'firstDiscontentPaint'},
- {'ts': 5 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink.user_timing',
- 'name': 'firstContentfulPaint',
- 'args': {'frame': SUBFRAME} },
- {'ts': 12 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink.user_timing',
- 'name': 'firstContentfulPaint',
- 'args': {'frame': MAINFRAME}}], MAINFRAME)
- lens = user_satisfied_lens.FirstContentfulPaintLens(loading_trace)
- self.assertEqual(set(['0.1', '0.2']), lens.CriticalRequestIds())
- self.assertEqual(1, lens.PostloadTimeMsec())
-
- def testCantGetNoSatisfaction(self):
- MAINFRAME = 1
- trace_creator = test_utils.TraceCreator()
- requests = [trace_creator.RequestAt(1), trace_creator.RequestAt(10),
- trace_creator.RequestAt(20)]
- loading_trace = trace_creator.CreateTrace(
- requests,
- [{'ts': 0, 'ph': 'I',
- 'cat': 'not_my_cat',
- 'name': 'someEvent',
- 'args': {'frame': MAINFRAME}}], MAINFRAME)
- loading_trace.tracing_track.SetMainFrameID(MAINFRAME)
- lens = user_satisfied_lens.FirstContentfulPaintLens(loading_trace)
- self.assertEqual(set(['0.1', '0.2', '0.3']), lens.CriticalRequestIds())
- self.assertEqual(float('inf'), lens.PostloadTimeMsec())
-
- def testFirstTextPaintLens(self):
- MAINFRAME = 1
- SUBFRAME = 2
- trace_creator = test_utils.TraceCreator()
- requests = [trace_creator.RequestAt(1), trace_creator.RequestAt(10),
- trace_creator.RequestAt(20)]
- loading_trace = trace_creator.CreateTrace(
- requests,
- [{'ts': 0, 'ph': 'I',
- 'cat': 'blink.some_other_user_timing',
- 'name': 'firstPaint'},
- {'ts': 30 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink.user_timing',
- 'name': 'firstishPaint',
- 'args': {'frame': MAINFRAME}},
- {'ts': 3 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink.user_timing',
- 'name': 'firstPaint',
- 'args': {'frame': SUBFRAME}},
- {'ts': 12 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink.user_timing',
- 'name': 'firstPaint',
- 'args': {'frame': MAINFRAME}}], MAINFRAME)
- loading_trace.tracing_track.SetMainFrameID(MAINFRAME)
- lens = user_satisfied_lens.FirstTextPaintLens(loading_trace)
- self.assertEqual(set(['0.1', '0.2']), lens.CriticalRequestIds())
- self.assertEqual(1, lens.PostloadTimeMsec())
-
- def testFirstSignificantPaintLens(self):
- MAINFRAME = 1
- trace_creator = test_utils.TraceCreator()
- requests = [trace_creator.RequestAt(1), trace_creator.RequestAt(10),
- trace_creator.RequestAt(15), trace_creator.RequestAt(20)]
- loading_trace = trace_creator.CreateTrace(
- requests,
- [{'ts': 0, 'ph': 'I',
- 'cat': 'blink',
- 'name': 'firstPaint'},
- {'ts': 9 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink.user_timing',
- 'name': 'FrameView::paintTree'},
- {'ts': 18 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink',
- 'name': 'FrameView::paintTree'},
- {'ts': 22 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink',
- 'name': 'FrameView::paintTree'},
- {'ts': 5 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'foobar', 'name': 'biz',
- 'args': {'counters': {
- 'LayoutObjectsThatHadNeverHadLayout': 10
- } } },
- {'ts': 12 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'foobar', 'name': 'biz',
- 'args': {'counters': {
- 'LayoutObjectsThatHadNeverHadLayout': 12
- } } },
- {'ts': 15 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'foobar', 'name': 'biz',
- 'args': {'counters': {
- 'LayoutObjectsThatHadNeverHadLayout': 10
- } } } ], MAINFRAME)
- lens = user_satisfied_lens.FirstSignificantPaintLens(loading_trace)
- self.assertEqual(set(['0.1', '0.2']), lens.CriticalRequestIds())
- self.assertEqual(7, lens.PostloadTimeMsec())
-
- def testRequestFingerprintLens(self):
- MAINFRAME = 1
- SUBFRAME = 2
- trace_creator = test_utils.TraceCreator()
- requests = [trace_creator.RequestAt(1), trace_creator.RequestAt(10),
- trace_creator.RequestAt(20)]
- loading_trace = trace_creator.CreateTrace(
- requests,
- [{'ts': 0, 'ph': 'I',
- 'cat': 'blink.some_other_user_timing',
- 'name': 'firstContentfulPaint'},
- {'ts': 30 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink.user_timing',
- 'name': 'firstDiscontentPaint'},
- {'ts': 5 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink.user_timing',
- 'name': 'firstContentfulPaint',
- 'args': {'frame': SUBFRAME} },
- {'ts': 12 * self.MILLI_TO_MICRO, 'ph': 'I',
- 'cat': 'blink.user_timing',
- 'name': 'firstContentfulPaint',
- 'args': {'frame': MAINFRAME}}], MAINFRAME)
- lens = user_satisfied_lens.FirstContentfulPaintLens(loading_trace)
- self.assertEqual(set(['0.1', '0.2']), lens.CriticalRequestIds())
- self.assertEqual(1, lens.PostloadTimeMsec())
- request_lens = user_satisfied_lens.RequestFingerprintLens(
- loading_trace, lens.CriticalFingerprints())
- self.assertEqual(set(['0.1', '0.2']), request_lens.CriticalRequestIds())
- self.assertEqual(0, request_lens.PostloadTimeMsec())
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/util.r b/loading/util.r
deleted file mode 100644
index a65c572..0000000
--- a/loading/util.r
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright 2015 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-# Useful R routines for analyzing output from several cost_to_csv.py
-# output and producing interesting graphs.
-
-combine.runs <- function(times, prefix, suffix)
- do.call("rbind", lapply(times, function (t)
- with(read.csv(paste0(prefix, t, suffix)),
- data.frame(site, kind, cost, time=t))))
-
-get.ordered.names <- function(runs) {
- means <- with(runs, tapply(cost, list(site, kind), mean))
- return(names(means[,"cold"])[order(means[,"cold"])])
-}
-
-plot.warm.cold <- function(runs, main="") {
- ordered.names <- get.ordered.names(runs)
- n <- length(ordered.names)
- par(mar=c(8,4,4,4), bg="white")
- plot(NULL, xlim=c(1,25), ylim=range(runs$cost), xaxt="n",
- ylab="ms", xlab="", main=main)
- axis(1, 1:n, labels=ordered.names, las=2)
- getdata <- function(k, t) sapply(
- ordered.names, function (s) with(runs, cost[site==s & kind==k & time==t]))
- for (t in unique(runs$time)) {
- points(1:n, getdata("cold", t), pch=1)
- points(1:n, getdata("warm", t), pch=3)
- }
- legend("topleft", pch=c(1, 3), legend=c("cold", "warm"))
-}
-
-plot.relative.sds <- function(runs, main="") {
- sds <- with(runs, tapply(cost, list(site, kind), sd))
- means <- with(runs, tapply(cost, list(site, kind), mean))
- ordered.names <- get.ordered.names(runs)
- n <- length(ordered.names)
- par(mar=c(8,4,4,4), bg="white")
- plot(NULL, xlim=c(1,25), ylim=c(0,.8),
- xaxt="n", ylab="Relative SD", xlab="", main=main)
- axis(1, 1:n, labels=ordered.names, las=2)
- getdata <- function(k) sapply(ordered.names, function(s) (sds/means)[s, k])
- points(1:n, getdata("cold"), pch=1)
- points(1:n, getdata("warm"), pch=3)
- legend("topleft", pch=c(1, 3), legend=c("cold", "warm"))
-}
diff --git a/loading/wpr_backend.py b/loading/wpr_backend.py
deleted file mode 100644
index 25f753c..0000000
--- a/loading/wpr_backend.py
+++ /dev/null
@@ -1,206 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Opens and modifies WPR archive.
-"""
-
-import collections
-import os
-import re
-import sys
-from urlparse import urlparse
-
-
-_SRC_DIR = os.path.abspath(os.path.join(
- os.path.dirname(__file__), '..', '..', '..'))
-
-_WEBPAGEREPLAY_DIR = os.path.join(_SRC_DIR, 'third_party', 'webpagereplay')
-_WEBPAGEREPLAY_HTTPARCHIVE = os.path.join(_WEBPAGEREPLAY_DIR, 'httparchive.py')
-
-sys.path.append(os.path.join(_SRC_DIR, 'third_party', 'webpagereplay'))
-import httparchive
-
-# Regex used to parse httparchive.py stdout's when listing all urls.
-_PARSE_WPR_REQUEST_REGEX = re.compile(r'^\S+\s+(?P<url>\S+)')
-
-# Regex used to extract WPR domain from WPR log.
-_PARSE_WPR_DOMAIN_REGEX = re.compile(r'^\(WARNING\)\s.*\sHTTP server started on'
- r' (?P<netloc>\S+)\s*$')
-
-# Regex used to extract URLs requests from WPR log.
-_PARSE_WPR_URL_REGEX = re.compile(
- r'^\((?P<level>\S+)\)\s.*\shttpproxy\..*\s(?P<method>[A-Z]+)\s+'
- r'(?P<url>https?://[a-zA-Z0-9\-_:.]+/?\S*)\s.*$')
-
-
-class WprUrlEntry(object):
- """Wpr url entry holding request and response infos. """
-
- def __init__(self, wpr_request, wpr_response):
- self._wpr_response = wpr_response
- self.url = self._ExtractUrl(str(wpr_request))
-
- def GetResponseHeadersDict(self):
- """Get a copied dictionary of available headers.
-
- Returns:
- dict(name -> value)
- """
- headers = collections.defaultdict(list)
- for (key, value) in self._wpr_response.original_headers:
- headers[key.lower()].append(value)
- return {k: ','.join(v) for (k, v) in headers.items()}
-
- def SetResponseHeader(self, name, value):
- """Set a header value.
-
- In the case where the <name> response header is present more than once
- in the response header list, then the given value is set only to the first
- occurrence of that given headers, and the next ones are removed.
-
- Args:
- name: The name of the response header to set.
- value: The value of the response header to set.
- """
- assert name.islower()
- new_headers = []
- new_header_set = False
- for header in self._wpr_response.original_headers:
- if header[0].lower() != name:
- new_headers.append(header)
- elif not new_header_set:
- new_header_set = True
- new_headers.append((header[0], value))
- if new_header_set:
- self._wpr_response.original_headers = new_headers
- else:
- self._wpr_response.original_headers.append((name, value))
-
- def DeleteResponseHeader(self, name):
- """Delete a header.
-
- In the case where the <name> response header is present more than once
- in the response header list, this method takes care of removing absolutely
- all them.
-
- Args:
- name: The name of the response header field to delete.
- """
- assert name.islower()
- self._wpr_response.original_headers = \
- [x for x in self._wpr_response.original_headers if x[0].lower() != name]
-
- def RemoveResponseHeaderDirectives(self, name, directives_blacklist):
- """Removed a set of directives from response headers.
-
- Also removes the cache header in case no more directives are left.
- It is useful, for example, to remove 'no-cache' from 'pragma: no-cache'.
-
- Args:
- name: The name of the response header field to modify.
- directives_blacklist: Set of lowered directives to remove from list.
- """
- response_headers = self.GetResponseHeadersDict()
- if name not in response_headers:
- return
- new_value = []
- for header_name in response_headers[name].split(','):
- if header_name.strip().lower() not in directives_blacklist:
- new_value.append(header_name)
- if new_value:
- self.SetResponseHeader(name, ','.join(new_value))
- else:
- self.DeleteResponseHeader(name)
-
- @classmethod
- def _ExtractUrl(cls, request_string):
- match = _PARSE_WPR_REQUEST_REGEX.match(request_string)
- assert match, 'Looks like there is an issue with: {}'.format(request_string)
- return match.group('url')
-
-
-class WprArchiveBackend(object):
- """WPR archive back-end able to read and modify. """
-
- def __init__(self, wpr_archive_path):
- """Constructor:
-
- Args:
- wpr_archive_path: The path of the WPR archive to read/modify.
- """
- self._wpr_archive_path = wpr_archive_path
- self._http_archive = httparchive.HttpArchive.Load(wpr_archive_path)
-
- def ListUrlEntries(self):
- """Iterates over all url entries
-
- Returns:
- A list of WprUrlEntry.
- """
- return [WprUrlEntry(request, self._http_archive[request])
- for request in self._http_archive.get_requests()]
-
- def Persist(self):
- """Persists the archive to disk. """
- for request in self._http_archive.get_requests():
- response = self._http_archive[request]
- response.headers = response._TrimHeaders(response.original_headers)
- self._http_archive.Persist(self._wpr_archive_path)
-
-
-# WPR request seen by the WPR's HTTP proxy.
-# is_served: Boolean whether WPR has found a matching resource in the archive.
-# method: HTTP method of the request ['GET', 'POST' and so on...].
-# url: The requested URL.
-# is_wpr_host: Whether the requested url have WPR has an host such as:
-# http://127.0.0.1:<WPR's HTTP listening port>/web-page-replay-command-exit
-WprRequest = collections.namedtuple('WprRequest',
- ['is_served', 'method', 'url', 'is_wpr_host'])
-
-
-def ExtractRequestsFromLog(log_path):
- """Extract list of requested handled by the WPR's HTTP proxy from a WPR log.
-
- Args:
- log_path: The path of the WPR log to parse.
-
- Returns:
- List of WprRequest.
- """
- requests = []
- wpr_http_netloc = None
- with open(log_path) as log_file:
- for line in log_file.readlines():
- # Extract WPR's HTTP proxy's listening network location.
- match = _PARSE_WPR_DOMAIN_REGEX.match(line)
- if match:
- wpr_http_netloc = match.group('netloc')
- assert wpr_http_netloc.startswith('127.0.0.1:')
- continue
- # Extract the WPR requested URLs.
- match = _PARSE_WPR_URL_REGEX.match(line)
- if match:
- parsed_url = urlparse(match.group('url'))
- # Ignore strange URL requests such as http://ousvtzkizg/
- # TODO(gabadie): Find and terminate the location where they are queried.
- if '.' not in parsed_url.netloc and ':' not in parsed_url.netloc:
- continue
- assert wpr_http_netloc
- request = WprRequest(is_served=(match.group('level') == 'DEBUG'),
- method=match.group('method'), url=match.group('url'),
- is_wpr_host=parsed_url.netloc == wpr_http_netloc)
- requests.append(request)
- return requests
-
-
-if __name__ == '__main__':
- import argparse
- parser = argparse.ArgumentParser(description='Tests cache back-end.')
- parser.add_argument('wpr_archive', type=str)
- command_line_args = parser.parse_args()
-
- wpr_backend = WprArchiveBackend(command_line_args.wpr_archive)
- url_entries = wpr_backend.ListUrlEntries()
- print url_entries[0].url
- wpr_backend.Persist()
diff --git a/loading/wpr_backend_unittest.py b/loading/wpr_backend_unittest.py
deleted file mode 100644
index fbcb517..0000000
--- a/loading/wpr_backend_unittest.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import contextlib
-import httplib
-import os
-import shutil
-import tempfile
-import unittest
-
-from device_setup import _WprHost
-from options import OPTIONS
-from trace_test.webserver_test import WebServer
-from wpr_backend import WprUrlEntry, WprRequest, ExtractRequestsFromLog
-
-
-LOADING_DIR = os.path.dirname(__file__)
-
-
-class MockWprResponse(object):
- def __init__(self, headers):
- self.original_headers = headers
-
-class WprUrlEntryTest(unittest.TestCase):
-
- @classmethod
- def _CreateWprUrlEntry(cls, headers):
- wpr_response = MockWprResponse(headers)
- return WprUrlEntry('GET http://a.com/', wpr_response)
-
- def testExtractUrl(self):
- self.assertEquals('http://aa.bb/c',
- WprUrlEntry._ExtractUrl('GET http://aa.bb/c'))
- self.assertEquals('http://aa.b/c',
- WprUrlEntry._ExtractUrl('POST http://aa.b/c'))
- self.assertEquals('http://a.bb/c',
- WprUrlEntry._ExtractUrl('WHATEVER http://a.bb/c'))
- self.assertEquals('https://aa.bb/c',
- WprUrlEntry._ExtractUrl('GET https://aa.bb/c'))
- self.assertEquals('http://aa.bb',
- WprUrlEntry._ExtractUrl('GET http://aa.bb'))
- self.assertEquals('http://aa.bb',
- WprUrlEntry._ExtractUrl('GET http://aa.bb FOO BAR'))
-
- def testGetResponseHeadersDict(self):
- entry = self._CreateWprUrlEntry([('header0', 'value0'),
- ('header1', 'value1'),
- ('header0', 'value2'),
- ('header2', 'value3'),
- ('header0', 'value4'),
- ('HEadEr3', 'VaLue4')])
- headers = entry.GetResponseHeadersDict()
- self.assertEquals(4, len(headers))
- self.assertEquals('value0,value2,value4', headers['header0'])
- self.assertEquals('value1', headers['header1'])
- self.assertEquals('value3', headers['header2'])
- self.assertEquals('VaLue4', headers['header3'])
-
- def testSetResponseHeader(self):
- entry = self._CreateWprUrlEntry([('header0', 'value0'),
- ('header1', 'value1')])
- entry.SetResponseHeader('new_header0', 'new_value0')
- headers = entry.GetResponseHeadersDict()
- self.assertEquals(3, len(headers))
- self.assertEquals('new_value0', headers['new_header0'])
- self.assertEquals('new_header0', entry._wpr_response.original_headers[2][0])
-
- entry = self._CreateWprUrlEntry([('header0', 'value0'),
- ('header1', 'value1'),
- ('header2', 'value1'),])
- entry.SetResponseHeader('header1', 'new_value1')
- headers = entry.GetResponseHeadersDict()
- self.assertEquals(3, len(headers))
- self.assertEquals('new_value1', headers['header1'])
- self.assertEquals('header1', entry._wpr_response.original_headers[1][0])
-
- entry = self._CreateWprUrlEntry([('header0', 'value0'),
- ('hEADEr1', 'value1'),
- ('header2', 'value1'),])
- entry.SetResponseHeader('header1', 'new_value1')
- headers = entry.GetResponseHeadersDict()
- self.assertEquals(3, len(headers))
- self.assertEquals('new_value1', headers['header1'])
- self.assertEquals('hEADEr1', entry._wpr_response.original_headers[1][0])
-
- entry = self._CreateWprUrlEntry([('header0', 'value0'),
- ('header1', 'value1'),
- ('header2', 'value2'),
- ('header1', 'value3'),
- ('header3', 'value4'),
- ('heADer1', 'value5')])
- entry.SetResponseHeader('header1', 'new_value2')
- headers = entry.GetResponseHeadersDict()
- self.assertEquals(4, len(headers))
- self.assertEquals('new_value2', headers['header1'])
- self.assertEquals('header1', entry._wpr_response.original_headers[1][0])
- self.assertEquals('header3', entry._wpr_response.original_headers[3][0])
- self.assertEquals('value4', entry._wpr_response.original_headers[3][1])
-
- entry = self._CreateWprUrlEntry([('header0', 'value0'),
- ('heADer1', 'value1'),
- ('header2', 'value2'),
- ('HEader1', 'value3'),
- ('header3', 'value4'),
- ('header1', 'value5')])
- entry.SetResponseHeader('header1', 'new_value2')
- headers = entry.GetResponseHeadersDict()
- self.assertEquals(4, len(headers))
- self.assertEquals('new_value2', headers['header1'])
- self.assertEquals('heADer1', entry._wpr_response.original_headers[1][0])
- self.assertEquals('header3', entry._wpr_response.original_headers[3][0])
- self.assertEquals('value4', entry._wpr_response.original_headers[3][1])
-
- def testDeleteResponseHeader(self):
- entry = self._CreateWprUrlEntry([('header0', 'value0'),
- ('header1', 'value1'),
- ('header0', 'value2'),
- ('header2', 'value3')])
- entry.DeleteResponseHeader('header1')
- self.assertNotIn('header1', entry.GetResponseHeadersDict())
- self.assertEquals(2, len(entry.GetResponseHeadersDict()))
- entry.DeleteResponseHeader('header0')
- self.assertNotIn('header0', entry.GetResponseHeadersDict())
- self.assertEquals(1, len(entry.GetResponseHeadersDict()))
-
- entry = self._CreateWprUrlEntry([('header0', 'value0'),
- ('hEAder1', 'value1'),
- ('header0', 'value2'),
- ('heaDEr2', 'value3')])
- entry.DeleteResponseHeader('header1')
- self.assertNotIn('header1', entry.GetResponseHeadersDict())
- self.assertEquals(2, len(entry.GetResponseHeadersDict()))
-
- def testRemoveResponseHeaderDirectives(self):
- entry = self._CreateWprUrlEntry([('hEAder0', 'keyWOrd0,KEYword1'),
- ('heaDER1', 'value1'),
- ('headeR2', 'value3')])
- entry.RemoveResponseHeaderDirectives('header0', {'keyword1', 'keyword0'})
- self.assertNotIn('header0', entry.GetResponseHeadersDict())
-
- entry = self._CreateWprUrlEntry([('heADEr0', 'keYWOrd0'),
- ('hEADERr1', 'value1'),
- ('HEAder0', 'keywoRD1,keYwoRd2'),
- ('hEADer2', 'value3')])
- entry.RemoveResponseHeaderDirectives('header0', {'keyword1'})
- self.assertEquals(
- 'keYWOrd0,keYwoRd2', entry.GetResponseHeadersDict()['header0'])
- self.assertEquals(3, len(entry._wpr_response.original_headers))
- self.assertEquals(
- 'keYWOrd0,keYwoRd2', entry._wpr_response.original_headers[0][1])
-
-
-class WprHostTest(unittest.TestCase):
- def setUp(self):
- OPTIONS.ParseArgs([])
- self._server_address = None
- self._wpr_http_port = None
- self._tmp_directory = tempfile.mkdtemp(prefix='tmp_test_')
-
- def tearDown(self):
- shutil.rmtree(self._tmp_directory)
-
- def _TmpPath(self, name):
- return os.path.join(self._tmp_directory, name)
-
- def _LogPath(self):
- return self._TmpPath('wpr.log')
-
- def _ArchivePath(self):
- return self._TmpPath('wpr')
-
- @contextlib.contextmanager
- def RunWebServer(self):
- assert self._server_address is None
- with WebServer.Context(
- source_dir=os.path.join(LOADING_DIR, 'trace_test', 'tests'),
- communication_dir=self._tmp_directory) as server:
- self._server_address = server.Address()
- yield
-
- @contextlib.contextmanager
- def RunWpr(self, record):
- assert self._server_address is not None
- assert self._wpr_http_port is None
- with _WprHost(self._ArchivePath(), record=record,
- out_log_path=self._LogPath()) as (http_port, https_port):
- del https_port # unused
- self._wpr_http_port = http_port
- yield http_port
-
- def DoHttpRequest(self, path, expected_status=200, destination='wpr'):
- assert self._server_address is not None
- if destination == 'wpr':
- assert self._wpr_http_port is not None
- connection = httplib.HTTPConnection('127.0.0.1', self._wpr_http_port)
- elif destination == 'server':
- connection = httplib.HTTPConnection(self._server_address)
- else:
- assert False
- try:
- connection.request(
- "GET", '/' + path, headers={'Host': self._server_address})
- response = connection.getresponse()
- finally:
- connection.close()
- self.assertEquals(expected_status, response.status)
-
- def _GenRawWprRequest(self, path):
- assert self._wpr_http_port is not None
- url = 'http://127.0.0.1:{}/web-page-replay-{}'.format(
- self._wpr_http_port, path)
- return WprRequest(is_served=True, method='GET', is_wpr_host=True, url=url)
-
- def GenRawRequest(self, path, is_served):
- assert self._server_address is not None
- return WprRequest(is_served=is_served, method='GET', is_wpr_host=False,
- url='http://{}/{}'.format(self._server_address, path))
-
- def AssertWprParsedRequests(self, ref_requests):
- all_ref_requests = []
- all_ref_requests.append(self._GenRawWprRequest('generate-200'))
- all_ref_requests.extend(ref_requests)
- all_ref_requests.append(self._GenRawWprRequest('generate-200'))
- all_ref_requests.append(self._GenRawWprRequest('command-exit'))
- requests = ExtractRequestsFromLog(self._LogPath())
- self.assertEquals(all_ref_requests, requests)
- self._wpr_http_port = None
-
- def testExtractRequestsFromLog(self):
- with self.RunWebServer():
- with self.RunWpr(record=True):
- self.DoHttpRequest('1.html')
- self.DoHttpRequest('2.html')
- ref_requests = [
- self.GenRawRequest('1.html', is_served=True),
- self.GenRawRequest('2.html', is_served=True)]
- self.AssertWprParsedRequests(ref_requests)
-
- with self.RunWpr(record=False):
- self.DoHttpRequest('2.html')
- self.DoHttpRequest('1.html')
- ref_requests = [
- self.GenRawRequest('2.html', is_served=True),
- self.GenRawRequest('1.html', is_served=True)]
- self.AssertWprParsedRequests(ref_requests)
-
- def testExtractRequestsFromLogHaveCorrectIsServed(self):
- with self.RunWebServer():
- with self.RunWpr(record=True):
- self.DoHttpRequest('4.html', expected_status=404)
- ref_requests = [self.GenRawRequest('4.html', is_served=True)]
- self.AssertWprParsedRequests(ref_requests)
-
- with self.RunWpr(record=False):
- self.DoHttpRequest('4.html', expected_status=404)
- self.DoHttpRequest('5.html', expected_status=404)
- ref_requests = [self.GenRawRequest('4.html', is_served=True),
- self.GenRawRequest('5.html', is_served=False)]
- self.AssertWprParsedRequests(ref_requests)
-
- def testExtractRequestsFromLogHaveCorrectIsWprHost(self):
- PATH = 'web-page-replay-generate-200'
- with self.RunWebServer():
- self.DoHttpRequest(PATH, expected_status=404, destination='server')
- with self.RunWpr(record=True):
- self.DoHttpRequest(PATH)
- ref_requests = [self.GenRawRequest(PATH, is_served=True)]
- self.AssertWprParsedRequests(ref_requests)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/loading/wpr_helper.py b/loading/wpr_helper.py
deleted file mode 100755
index b509d11..0000000
--- a/loading/wpr_helper.py
+++ /dev/null
@@ -1,126 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright 2017 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Helper script to launch Chrome on device and WebPageReplay on host."""
-
-import logging
-import optparse
-import os
-import sys
-import time
-
-_SRC_PATH = os.path.abspath(os.path.join(
- os.path.dirname(__file__), os.pardir, os.pardir, os.pardir))
-
-sys.path.append(os.path.join(_SRC_PATH, 'third_party', 'catapult', 'devil'))
-from devil.android import device_utils
-from devil.android import flag_changer
-from devil.android.constants import chrome
-from devil.android.perf import cache_control
-from devil.android.sdk import intent
-
-sys.path.append(os.path.join(_SRC_PATH, 'build', 'android'))
-import devil_chromium
-
-import chrome_setup
-import device_setup
-
-
-def RunChrome(device, cold, chrome_args, package_info):
- """Runs Chrome on the device.
-
- Args:
- device: (DeviceUtils) device to run the tests on.
- cold: (bool) Whether caches should be dropped.
- chrome_args: ([str]) List of arguments to pass to Chrome.
- package_info: (PackageInfo) Chrome package info.
- """
- if not device.HasRoot():
- device.EnableRoot()
-
- cmdline_file = package_info.cmdline_file
- package = package_info.package
- with flag_changer.CustomCommandLineFlags(device, cmdline_file, chrome_args):
- device.ForceStop(package)
-
- if cold:
- chrome_setup.ResetChromeLocalState(device, package)
- cache_control.CacheControl(device).DropRamCaches()
-
- start_intent = intent.Intent(package=package, data='about:blank',
- activity=package_info.activity)
- try:
- device.StartActivity(start_intent, blocking=True)
- print (
- '\n\n'
- ' +---------------------------------------------+\n'
- ' | Chrome launched, press Ctrl-C to interrupt. |\n'
- ' +---------------------------------------------+')
- while True:
- time.sleep(1)
- except KeyboardInterrupt:
- pass
- finally:
- device.ForceStop(package)
-
-
-def _CreateOptionParser():
- description = 'Launches Chrome on a device, connected to a WebPageReplay ' \
- 'instance running on the host. The WPR archive must be ' \
- 'passed as parameter.'
- parser = optparse.OptionParser(description=description,
- usage='Usage: %prog [options] wpr_archive')
-
- # Device-related options.
- d = optparse.OptionGroup(parser, 'Device options')
- d.add_option('--device', help='Device ID')
- d.add_option('--cold', help='Purge all caches before running Chrome.',
- default=False, action='store_true')
- d.add_option('--chrome_package_name',
- help='Chrome package name (e.g. "chrome" or "chromium") '
- '[default: %default].', default='chrome')
- parser.add_option_group(d)
-
- # WebPageReplay-related options.
- w = optparse.OptionGroup(parser, 'WebPageReplay options')
- w.add_option('--record',
- help='Enable this to record a new WPR archive.',
- action='store_true', default=False)
- w.add_option('--wpr_log', help='WPR log path.')
- w.add_option('--network_condition', help='Network condition for emulation.')
- parser.add_option_group(w)
-
- return parser
-
-
-def main():
- parser = _CreateOptionParser()
- options, args = parser.parse_args()
- if len(args) != 1:
- parser.error("Incorrect number of arguments.")
- devil_chromium.Initialize()
- devices = device_utils.DeviceUtils.HealthyDevices()
- device = devices[0]
- if len(devices) != 1 and options.device is None:
- logging.error('Several devices attached, must specify one with --device.')
- sys.exit(0)
- if options.device is not None:
- matching_devices = [d for d in devices if str(d) == options.device]
- if not matching_devices:
- logging.error('Device not found.')
- sys.exit(0)
- device = matching_devices[0]
-
- with device_setup.RemoteWprHost(device, args[0], options.record,
- options.network_condition,
- out_log_path=options.wpr_log) as wpr_attr:
- RunChrome(device, options.cold,
- chrome_setup.CHROME_ARGS + wpr_attr.chrome_args,
- chrome.PACKAGE_INFO[options.chrome_package_name])
-
-
-if __name__ == '__main__':
- main()
diff --git a/loading/xvfb_helper.py b/loading/xvfb_helper.py
deleted file mode 100644
index 4939e76..0000000
--- a/loading/xvfb_helper.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import os
-import subprocess
-
-
-def LaunchXvfb():
- """Launches Xvfb for running Chrome in headless mode, and returns the
- subprocess."""
- xvfb_cmd = ['Xvfb', ':99', '-screen', '0', '1600x1200x24']
- return subprocess.Popen(xvfb_cmd, stdout=open(os.devnull, 'wb'),
- stderr=subprocess.STDOUT)
-
-
-def GetChromeEnvironment():
- """Returns the environment for Chrome to run in headless mode with Xvfb."""
- return {'DISPLAY': 'localhost:99'}