blob: 566fb3c36208f76388b61e676594d255abb96192 [file] [log] [blame]
# Copyright (c) 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""The request data track.
When executed, parses a JSON dump of DevTools messages.
"""
import collections
import copy
import json
import logging
import re
import devtools_monitor
_TIMING_NAMES_MAPPING = {
'connectEnd': 'connect_end', 'connectStart': 'connect_start',
'dnsEnd': 'dns_end', 'dnsStart': 'dns_start', 'proxyEnd': 'proxy_end',
'proxyStart': 'proxy_start', 'receiveHeadersEnd': 'receive_headers_end',
'requestTime': 'request_time', 'sendEnd': 'send_end',
'sendStart': 'send_start', 'sslEnd': 'ssl_end', 'sslStart': 'ssl_start',
'workerReady': 'worker_ready', 'workerStart': 'worker_start',
'loadingFinished': 'loading_finished'}
Timing = collections.namedtuple('Timing', _TIMING_NAMES_MAPPING.values())
def TimingAsList(timing):
"""Transform Timing to a list, eg as is used in JSON output.
Args:
timing: a Timing.
Returns:
A list identical to what the eventual JSON output will be (eg,
Request.ToJsonDict).
"""
return json.loads(json.dumps(timing))
class Request(object):
"""Represents a single request.
Generally speaking, fields here closely mirror those documented in
third_party/WebKit/Source/devtools/protocol.json.
Fields:
request_id: (str) unique request ID. Postfixed with _REDIRECT_SUFFIX for
redirects.
frame_id: (str) unique frame identifier.
loader_id: (str) unique frame identifier.
document_url: (str) URL of the document this request is loaded for.
url: (str) Request URL.
protocol: (str) protocol used for the request.
method: (str) HTTP method, such as POST or GET.
request_headers: (dict) {'header': 'value'} Request headers.
response_headers: (dict) {'header': 'value'} Response headers.
initial_priority: (str) Initial request priority, in REQUEST_PRIORITIES.
timestamp: (float) Request timestamp, in s.
wall_time: (float) Request timestamp, UTC timestamp in s.
initiator: (dict) Request initiator, in INITIATORS.
resource_type: (str) Resource type, in RESOURCE_TYPES
served_from_cache: (bool) Whether the request was served from cache.
from_disk_cache: (bool) Whether the request was served from the disk cache.
from_service_worker: (bool) Whether the request was served by a Service
Worker.
timing: (Timing) Request timing, extended with loading_finished.
status: (int) Response status code.
encoded_data_length: (int) Total encoded data length.
data_chunks: (list) [(offset, encoded_data_length), ...] List of data
chunks received, with their offset in ms relative to
Timing.requestTime.
failed: (bool) Whether the request failed.
"""
REQUEST_PRIORITIES = ('VeryLow', 'Low', 'Medium', 'High', 'VeryHigh')
RESOURCE_TYPES = ('Document', 'Stylesheet', 'Image', 'Media', 'Font',
'Script', 'TextTrack', 'XHR', 'Fetch', 'EventSource',
'WebSocket', 'Manifest', 'Other')
INITIATORS = ('parser', 'script', 'other', 'redirect')
INITIATING_REQUEST = 'initiating_request'
ORIGINAL_INITIATOR = 'original_initiator'
def __init__(self):
self.request_id = None
self.frame_id = None
self.loader_id = None
self.document_url = None
self.url = None
self.protocol = None
self.method = None
self.request_headers = None
self.response_headers = None
self.initial_priority = None
self.timestamp = -1
self.wall_time = -1
self.initiator = None
self.resource_type = None
self.served_from_cache = False
self.from_disk_cache = False
self.from_service_worker = False
self.timing = None
self.status = None
self.encoded_data_length = 0
self.data_chunks = []
self.failed = False
def _TimestampOffsetFromStartMs(self, timestamp):
assert self.timing.request_time != -1
request_time = self.timing.request_time
return (timestamp - request_time) * 1000
def ToJsonDict(self):
return copy.deepcopy(self.__dict__)
@classmethod
def FromJsonDict(cls, data_dict):
result = Request()
for (k, v) in data_dict.items():
setattr(result, k, v)
if not result.response_headers:
result.response_headers = {}
if result.timing:
result.timing = Timing(*result.timing)
else:
result.timing = TimingFromDict({'requestTime': result.timestamp})
return result
def GetContentType(self):
"""Returns the content type, or None."""
content_type = self.response_headers.get('Content-Type', None)
if not content_type or ';' not in content_type:
return content_type
else:
return content_type[:content_type.index(';')]
def IsDataRequest(self):
return self.protocol == 'data'
def MaxAge(self):
"""Returns the max-age of a resource, or -1."""
# TODO(lizeb): Handle the "Expires" header as well.
cache_control = {}
if not self.response_headers:
return -1
cache_control_str = self.response_headers.get('Cache-Control', None)
if cache_control_str is not None:
directives = [s.strip() for s in cache_control_str.split(',')]
for directive in directives:
parts = [s.strip() for s in directive.split('=')]
if len(parts) == 1:
cache_control[parts[0]] = True
else:
cache_control[parts[0]] = parts[1]
if (u'no-store' in cache_control
or u'no-cache' in cache_control
or len(cache_control) == 0):
return -1
if 'max-age' in cache_control:
age_match = re.match(r'\s*(\d+)+', cache_control['max-age'])
if not age_match:
return -1
return int(age_match.group(1))
return -1
def __eq__(self, o):
return self.__dict__ == o.__dict__
def __hash__(self):
return hash(self.request_id)
def __str__(self):
return json.dumps(self.ToJsonDict(), sort_keys=True, indent=2)
class RequestTrack(devtools_monitor.Track):
"""Aggregates request data."""
_REDIRECT_SUFFIX = '.redirect'
# Request status
_STATUS_SENT = 0
_STATUS_RESPONSE = 1
_STATUS_DATA = 2
_STATUS_FINISHED = 3
_STATUS_FAILED = 4
# Serialization KEYS
_EVENTS_KEY = 'events'
_METADATA_KEY = 'metadata'
_DUPLICATES_KEY = 'duplicates_count'
_INCONSISTENT_INITIATORS_KEY = 'inconsistent_initiators'
def __init__(self, connection):
super(RequestTrack, self).__init__(connection)
self._connection = connection
self._requests = []
self._requests_in_flight = {} # requestId -> (request, status)
self._completed_requests_by_id = {}
self._redirects_count_by_id = collections.defaultdict(int)
if connection: # Optional for testing.
for method in RequestTrack._METHOD_TO_HANDLER:
self._connection.RegisterListener(method, self)
# responseReceived message are sometimes duplicated. Records the message to
# detect this.
self._request_id_to_response_received = {}
self.duplicates_count = 0
self.inconsistent_initiators_count = 0
def Handle(self, method, msg):
assert method in RequestTrack._METHOD_TO_HANDLER
params = msg['params']
request_id = params['requestId']
RequestTrack._METHOD_TO_HANDLER[method](self, request_id, params)
def GetEvents(self):
if self._requests_in_flight:
logging.warning('Number of requests still in flight: %d.'
% len(self._requests_in_flight))
return self._requests
def ToJsonDict(self):
if self._requests_in_flight:
logging.warning('Requests in flight, will be ignored in the dump')
return {self._EVENTS_KEY: [
request.ToJsonDict() for request in self._requests],
self._METADATA_KEY: {
self._DUPLICATES_KEY: self.duplicates_count,
self._INCONSISTENT_INITIATORS_KEY:
self.inconsistent_initiators_count}}
@classmethod
def FromJsonDict(cls, json_dict):
assert cls._EVENTS_KEY in json_dict
assert cls._METADATA_KEY in json_dict
result = RequestTrack(None)
requests = [Request.FromJsonDict(request)
for request in json_dict[cls._EVENTS_KEY]]
result._requests = requests
metadata = json_dict[cls._METADATA_KEY]
result.duplicates_count = metadata.get(cls._DUPLICATES_KEY, 0)
result.inconsistent_initiators_count = metadata.get(
cls._INCONSISTENT_INITIATORS_KEY, 0)
return result
def _RequestWillBeSent(self, request_id, params):
# Several "requestWillBeSent" events can be dispatched in a row in the case
# of redirects.
redirect_initiator = None
if request_id in self._requests_in_flight:
redirect_initiator = self._HandleRedirect(request_id, params)
assert (request_id not in self._requests_in_flight
and request_id not in self._completed_requests_by_id)
r = Request()
r.request_id = request_id
_CopyFromDictToObject(
params, r, (('frameId', 'frame_id'), ('loaderId', 'loader_id'),
('documentURL', 'document_url'),
('timestamp', 'timestamp'), ('wallTime', 'wall_time'),
('initiator', 'initiator')))
request = params['request']
_CopyFromDictToObject(
request, r, (('url', 'url'), ('method', 'method'),
('headers', 'headers'),
('initialPriority', 'initial_priority')))
r.resource_type = params.get('type', 'Other')
if redirect_initiator:
original_initiator = r.initiator
r.initiator = redirect_initiator
r.initiator[Request.ORIGINAL_INITIATOR] = original_initiator
initiating_request = self._completed_requests_by_id[
redirect_initiator[Request.INITIATING_REQUEST]]
initiating_initiator = initiating_request.initiator.get(
Request.ORIGINAL_INITIATOR, initiating_request.initiator)
if initiating_initiator != original_initiator:
self.inconsistent_initiators_count += 1
self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_SENT)
def _HandleRedirect(self, request_id, params):
(r, status) = self._requests_in_flight[request_id]
assert status == RequestTrack._STATUS_SENT
# The second request contains timing information pertaining to the first
# one. Finalize the first request.
assert 'redirectResponse' in params
redirect_response = params['redirectResponse']
_CopyFromDictToObject(redirect_response, r,
(('headers', 'response_headers'),
('encodedDataLength', 'encoded_data_length'),
('fromDiskCache', 'from_disk_cache')))
r.timing = TimingFromDict(redirect_response['timing'])
redirect_index = self._redirects_count_by_id[request_id]
self._redirects_count_by_id[request_id] += 1
r.request_id = '%s%s.%d' % (request_id, self._REDIRECT_SUFFIX,
redirect_index + 1)
initiator = {
'type': 'redirect', Request.INITIATING_REQUEST: r.request_id}
self._requests_in_flight[r.request_id] = (r, RequestTrack._STATUS_FINISHED)
del self._requests_in_flight[request_id]
self._FinalizeRequest(r.request_id)
return initiator
def _RequestServedFromCache(self, request_id, _):
assert request_id in self._requests_in_flight
(request, status) = self._requests_in_flight[request_id]
assert status == RequestTrack._STATUS_SENT
request.served_from_cache = True
def _ResponseReceived(self, request_id, params):
assert request_id in self._requests_in_flight
(r, status) = self._requests_in_flight[request_id]
if status == RequestTrack._STATUS_RESPONSE:
# Duplicated messages (apart from the timestamp) are OK.
old_params = self._request_id_to_response_received[request_id]
params_copy = copy.deepcopy(params)
params_copy['timestamp'] = None
old_params['timestamp'] = None
assert params_copy == old_params
self.duplicates_count += 1
return
assert status == RequestTrack._STATUS_SENT
assert r.frame_id == params['frameId']
assert r.timestamp <= params['timestamp']
if r.resource_type == 'Other':
r.resource_type = params.get('type', 'Other')
else:
assert r.resource_type == params.get('type', 'Other')
response = params['response']
_CopyFromDictToObject(
response, r, (('status', 'status'), ('mimeType', 'mime_type'),
('fromDiskCache', 'from_disk_cache'),
('fromServiceWorker', 'from_service_worker'),
('protocol', 'protocol'),
# Actual request headers are not known before reaching the
# network stack.
('requestHeaders', 'request_headers'),
('headers', 'response_headers')))
# data URLs don't have a timing dict.
timing_dict = {}
if r.protocol != 'data':
timing_dict = response['timing']
else:
timing_dict = {'requestTime': r.timestamp}
r.timing = TimingFromDict(timing_dict)
self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_RESPONSE)
self._request_id_to_response_received[request_id] = params
def _DataReceived(self, request_id, params):
(r, status) = self._requests_in_flight[request_id]
assert (status == RequestTrack._STATUS_RESPONSE
or status == RequestTrack._STATUS_DATA)
offset = r._TimestampOffsetFromStartMs(params['timestamp'])
r.data_chunks.append((offset, params['encodedDataLength']))
self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_DATA)
def _LoadingFinished(self, request_id, params):
assert request_id in self._requests_in_flight
(r, status) = self._requests_in_flight[request_id]
assert (status == RequestTrack._STATUS_RESPONSE
or status == RequestTrack._STATUS_DATA)
r.encoded_data_length = params['encodedDataLength']
r.timing = r.timing._replace(
loading_finished=r._TimestampOffsetFromStartMs(params['timestamp']))
self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_FINISHED)
self._FinalizeRequest(request_id)
def _LoadingFailed(self, request_id, _):
assert request_id in self._requests_in_flight
(r, _) = self._requests_in_flight[request_id]
r.failed = True
self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_FINISHED)
self._FinalizeRequest(request_id)
def _FinalizeRequest(self, request_id):
assert request_id in self._requests_in_flight
(request, status) = self._requests_in_flight[request_id]
assert status == RequestTrack._STATUS_FINISHED
del self._requests_in_flight[request_id]
self._completed_requests_by_id[request_id] = request
self._requests.append(request)
def __eq__(self, o):
return self._requests == o._requests
RequestTrack._METHOD_TO_HANDLER = {
'Network.requestWillBeSent': RequestTrack._RequestWillBeSent,
'Network.requestServedFromCache': RequestTrack._RequestServedFromCache,
'Network.responseReceived': RequestTrack._ResponseReceived,
'Network.dataReceived': RequestTrack._DataReceived,
'Network.loadingFinished': RequestTrack._LoadingFinished,
'Network.loadingFailed': RequestTrack._LoadingFailed}
def TimingFromDict(timing_dict):
"""Returns an instance of Timing from an () dict."""
complete_timing_dict = {field: -1 for field in Timing._fields}
timing_dict_mapped = {
_TIMING_NAMES_MAPPING[k]: v for (k, v) in timing_dict.items()}
complete_timing_dict.update(timing_dict_mapped)
return Timing(**complete_timing_dict)
def _CopyFromDictToObject(d, o, key_attrs):
for (key, attr) in key_attrs:
if key in d:
setattr(o, attr, d[key])
if __name__ == '__main__':
import json
import sys
events = json.load(open(sys.argv[1], 'r'))
request_track = RequestTrack(None)
for event in events:
event_method = event['method']
request_track.Handle(event_method, event)