blob: a0542dcc26aa43ff5bd323f00466640ca1df8bbe [file] [log] [blame]
# Copyright (c) 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""The request data track.
When executed, parses a JSON dump of DevTools messages.
"""
import bisect
import collections
import copy
import datetime
import email.utils
import hashlib
import json
import logging
import re
import sys
import urlparse
import devtools_monitor
class Timing(object):
"""Collects the timing data for a request."""
UNVAILABLE = -1
_TIMING_NAMES = (
('connectEnd', 'connect_end'), ('connectStart', 'connect_start'),
('dnsEnd', 'dns_end'), ('dnsStart', 'dns_start'),
('proxyEnd', 'proxy_end'), ('proxyStart', 'proxy_start'),
('receiveHeadersEnd', 'receive_headers_end'),
('requestTime', 'request_time'), ('sendEnd', 'send_end'),
('sendStart', 'send_start'), ('sslEnd', 'ssl_end'),
('sslStart', 'ssl_start'), ('workerReady', 'worker_ready'),
('workerStart', 'worker_start'),
('loadingFinished', 'loading_finished'), ('pushStart', 'push_start'),
('pushEnd', 'push_end'))
_TIMING_NAMES_MAPPING = dict(_TIMING_NAMES)
__slots__ = tuple(x[1] for x in _TIMING_NAMES)
def __init__(self, **kwargs):
"""Constructor.
Initialize with keywords arguments from __slots__.
"""
for slot in self.__slots__:
setattr(self, slot, self.UNVAILABLE)
for (attr, value) in kwargs.items():
setattr(self, attr, value)
def __eq__(self, o):
return all(getattr(self, attr) == getattr(o, attr)
for attr in self.__slots__)
def __str__(self):
return str(self.ToJsonDict())
def LargestOffset(self):
"""Returns the largest offset in the available timings."""
return max(0, max(
getattr(self, attr) for attr in self.__slots__
if attr != 'request_time'))
def ToJsonDict(self):
return {attr: getattr(self, attr)
for attr in self.__slots__ if getattr(self, attr) != -1}
@classmethod
def FromJsonDict(cls, json_dict):
return cls(**json_dict)
@classmethod
def FromDevToolsDict(cls, json_dict):
"""Returns an instance of Timing from a dict, as passed by DevTools."""
timing_dict = {
cls._TIMING_NAMES_MAPPING[k]: v for (k, v) in json_dict.items()}
return cls(**timing_dict)
def ShortName(url):
"""Returns a shortened version of a URL."""
parsed = urlparse.urlparse(url)
path = parsed.path
hostname = parsed.hostname if parsed.hostname else '?.?.?'
if path != '' and path != '/':
last_path = parsed.path.split('/')[-1]
if len(last_path) < 10:
if len(path) < 10:
return hostname + '/' + path
else:
return hostname + '/..' + parsed.path[-10:]
else:
return hostname + '/..' + last_path[:5]
else:
return hostname
def IntervalBetween(first, second, reason):
"""Returns the start and end of the inteval between two requests, in ms.
This is defined as:
- [first.headers, second.start] if reason is 'parser'. This is to account
for incremental parsing.
- [first.end, second.start] if reason is 'script', 'redirect' or 'other'.
Args:
first: (Request) First request.
second: (Request) Second request.
reason: (str) Link between the two requests, in Request.INITIATORS.
Returns:
(start_msec (float), end_msec (float)),
"""
assert reason in Request.INITIATORS
second_ms = second.timing.request_time * 1000
if reason == 'parser':
first_offset_ms = first.timing.receive_headers_end
else:
first_offset_ms = first.timing.LargestOffset()
return (first.timing.request_time * 1000 + first_offset_ms, second_ms)
def TimeBetween(first, second, reason):
"""(end_msec - start_msec), with the values as returned by IntervalBetween().
"""
(first_ms, second_ms) = IntervalBetween(first, second, reason)
return second_ms - first_ms
def TimingAsList(timing):
"""Transform Timing to a list, eg as is used in JSON output.
Args:
timing: a Timing.
Returns:
A list identical to what the eventual JSON output will be (eg,
Request.ToJsonDict).
"""
return json.loads(json.dumps(timing))
class Request(object):
"""Represents a single request.
Generally speaking, fields here closely mirror those documented in
third_party/WebKit/Source/devtools/protocol.json.
Fields:
request_id: (str) unique request ID. Postfixed with _REDIRECT_SUFFIX for
redirects.
frame_id: (str) unique frame identifier.
loader_id: (str) unique frame identifier.
document_url: (str) URL of the document this request is loaded for.
url: (str) Request URL.
protocol: (str) protocol used for the request.
method: (str) HTTP method, such as POST or GET.
request_headers: (dict) {'header': 'value'} Request headers.
response_headers: (dict) {'header': 'value'} Response headers.
initial_priority: (str) Initial request priority, in REQUEST_PRIORITIES.
timestamp: (float) Request timestamp, in s.
wall_time: (float) Request timestamp, UTC timestamp in s.
initiator: (dict) Request initiator, in INITIATORS.
resource_type: (str) Resource type, in RESOURCE_TYPES
served_from_cache: (bool) Whether the request was served from cache.
from_disk_cache: (bool) Whether the request was served from the disk cache.
from_service_worker: (bool) Whether the request was served by a Service
Worker.
timing: (Timing) Request timing, extended with loading_finished.
status: (int) Response status code.
status_text: (str) Response status text received in the status line.
encoded_data_length: (int) Total encoded data length.
data_chunks: (list) [(offset, encoded_data_length), ...] List of data
chunks received, with their offset in ms relative to
Timing.requestTime.
failed: (bool) Whether the request failed.
error_text: (str) User friendly error message when request failed.
start_msec: (float) Request start time, in milliseconds from chrome start.
end_msec: (float) Request end time, in milliseconds from chrome start.
start_msec.
"""
REQUEST_PRIORITIES = ('VeryLow', 'Low', 'Medium', 'High', 'VeryHigh')
RESOURCE_TYPES = ('Document', 'Stylesheet', 'Image', 'Media', 'Font',
'Script', 'TextTrack', 'XHR', 'Fetch', 'EventSource',
'WebSocket', 'Manifest', 'Other')
INITIATORS = ('parser', 'script', 'other', 'redirect')
INITIATING_REQUEST = 'initiating_request'
ORIGINAL_INITIATOR = 'original_initiator'
def __init__(self):
self.request_id = None
self.frame_id = None
self.loader_id = None
self.document_url = None
self.url = None
self.protocol = None
self.method = None
self.mime_type = None
self.request_headers = None
self.response_headers = None
self.initial_priority = None
self.timestamp = -1
self.wall_time = -1
self.initiator = None
self.resource_type = None
self.served_from_cache = False
self.from_disk_cache = False
self.from_service_worker = False
self.timing = None
self.status = None
self.status_text = None
self.response_headers_length = 0
self.encoded_data_length = 0
self.data_chunks = []
self.failed = False
self.error_text = None
@property
def start_msec(self):
return self.timing.request_time * 1000
@property
def end_msec(self):
if self.start_msec is None:
return None
return self.start_msec + self.timing.LargestOffset()
@property
def fingerprint(self):
h = hashlib.sha256()
h.update(self.url)
return h.hexdigest()[:10]
def _TimestampOffsetFromStartMs(self, timestamp):
assert self.timing.request_time != -1
request_time = self.timing.request_time
return (timestamp - request_time) * 1000
def ToJsonDict(self):
result = copy.deepcopy(self.__dict__)
result['timing'] = self.timing.ToJsonDict() if self.timing else {}
return result
@classmethod
def FromJsonDict(cls, data_dict):
result = Request()
for (k, v) in data_dict.items():
setattr(result, k, v)
if not result.response_headers:
result.response_headers = {}
if result.timing:
result.timing = Timing.FromJsonDict(result.timing)
else:
result.timing = Timing(request_time=result.timestamp)
return result
def GetResponseTransportLength(self):
"""Get the total amount of encoded data no matter whether load has finished
or not.
"""
assert self.HasReceivedResponse()
assert not self.from_disk_cache and not self.served_from_cache
assert self.protocol not in {'about', 'blob', 'data'}
if self.timing.loading_finished != Timing.UNVAILABLE:
encoded_data_length = self.encoded_data_length
else:
encoded_data_length = sum(
[chunk_size for _, chunk_size in self.data_chunks])
assert encoded_data_length > 0 or len(self.data_chunks) == 0
return encoded_data_length + self.response_headers_length
def GetHTTPResponseHeader(self, header_name):
"""Gets the value of a HTTP response header.
Does a case-insensitive search for the header name in the HTTP response
headers, in order to support servers that use a wrong capitalization.
"""
lower_case_name = header_name.lower()
result = None
for name, value in self.response_headers.iteritems():
if name.lower() == lower_case_name:
result = value
break
return result
def SetHTTPResponseHeader(self, header, header_value):
"""Sets the value of a HTTP response header."""
assert header.islower()
for name in self.response_headers.keys():
if name.lower() == header:
del self.response_headers[name]
self.response_headers[header] = header_value
def GetResponseHeaderValue(self, header, value):
"""Returns a copy of |value| iff response |header| contains it."""
header_values = self.GetHTTPResponseHeader(header)
if not header_values:
return None
values = header_values.split(',')
for header_value in values:
if header_value.lower() == value.lower():
return header_value
return None
def HasResponseHeaderValue(self, header, value):
"""Returns True iff the response headers |header| contains |value|."""
return self.GetResponseHeaderValue(header, value) is not None
def GetContentType(self):
"""Returns the content type, or None."""
# Check for redirects. Use the "Location" header, because the HTTP status is
# not reliable.
if self.GetHTTPResponseHeader('Location') is not None:
return 'redirect'
# Check if the response is empty.
if (self.GetHTTPResponseHeader('Content-Length') == '0' or
self.status == 204):
return 'ping'
if self.mime_type:
return self.mime_type
content_type = self.GetHTTPResponseHeader('Content-Type')
if not content_type or ';' not in content_type:
return content_type
else:
return content_type[:content_type.index(';')]
def IsDataRequest(self):
return self.protocol == 'data'
def HasReceivedResponse(self):
return self.status is not None
def GetCacheControlDirective(self, directive_name):
"""Returns the value of a Cache-Control directive, or None."""
cache_control_str = self.GetHTTPResponseHeader('Cache-Control')
if cache_control_str is None:
return None
directives = [s.strip() for s in cache_control_str.split(',')]
for directive in directives:
parts = directive.split('=')
if len(parts) != 2:
continue
(name, value) = parts
if name == directive_name:
return value
return None
def MaxAge(self):
"""Returns the max-age of a resource, or -1."""
# TODO(lizeb): Handle the "Expires" header as well.
cache_control = {}
if not self.response_headers:
return -1
cache_control_str = self.GetHTTPResponseHeader('Cache-Control')
if cache_control_str is not None:
directives = [s.strip() for s in cache_control_str.split(',')]
for directive in directives:
parts = [s.strip() for s in directive.split('=')]
if len(parts) == 1:
cache_control[parts[0]] = True
else:
cache_control[parts[0]] = parts[1]
if (u'no-store' in cache_control
or u'no-cache' in cache_control
or len(cache_control) == 0):
return -1
max_age = self.GetCacheControlDirective('max-age')
if max_age:
return int(max_age)
return -1
def Cost(self):
"""Returns the cost of this request in ms, defined as time between
request_time and the latest timing event.
"""
# All fields in timing are millis relative to request_time.
return self.timing.LargestOffset()
def GetRawResponseHeaders(self):
"""Gets the request's raw response headers compatible with
net::HttpResponseHeaders's constructor.
"""
assert not self.IsDataRequest()
assert self.HasReceivedResponse()
headers = bytes('{} {} {}\x00'.format(
self.protocol.upper(), self.status, self.status_text))
for key in sorted(self.response_headers.keys()):
headers += (bytes(key.encode('latin-1')) + b': ' +
bytes(self.response_headers[key].encode('latin-1')) + b'\x00')
return headers
def __eq__(self, o):
return self.__dict__ == o.__dict__
def __hash__(self):
return hash(self.request_id)
def __str__(self):
return json.dumps(self.ToJsonDict(), sort_keys=True, indent=2)
def _ParseStringToInt(string):
"""Parses a string to an integer like base::StringToInt64().
Returns:
Parsed integer.
"""
string = string.strip()
while string:
try:
parsed_integer = int(string)
if parsed_integer > sys.maxint:
return sys.maxint
if parsed_integer < -sys.maxint - 1:
return -sys.maxint - 1
return parsed_integer
except ValueError:
string = string[:-1]
return 0
class CachingPolicy(object):
"""Represents the caching policy at an arbitrary time for a cached response.
"""
FETCH = 'FETCH'
VALIDATION_NONE = 'VALIDATION_NONE'
VALIDATION_SYNC = 'VALIDATION_SYNC'
VALIDATION_ASYNC = 'VALIDATION_ASYNC'
POLICIES = (FETCH, VALIDATION_NONE, VALIDATION_SYNC, VALIDATION_ASYNC)
def __init__(self, request):
"""Constructor.
Args:
request: (Request)
"""
assert request.response_headers is not None
self.request = request
# This is incorrect, as the timestamp corresponds to when devtools is made
# aware of the request, not when it was sent. However, this is good enough
# for computing cache expiration, which doesn't need sub-second precision.
self._request_time = self.request.wall_time
# Used when the date is not available.
self._response_time = (
self._request_time + self.request.timing.receive_headers_end)
def HasValidators(self):
"""Returns wether the request has a validator."""
# Assuming HTTP 1.1+.
return (self.request.GetHTTPResponseHeader('Last-Modified')
or self.request.GetHTTPResponseHeader('Etag'))
def IsCacheable(self):
"""Returns whether the request could be stored in the cache."""
return not self.request.HasResponseHeaderValue('Cache-Control', 'no-store')
def PolicyAtDate(self, timestamp):
"""Returns the caching policy at an aribitrary timestamp.
Args:
timestamp: (float) Seconds since Epoch.
Returns:
A policy in POLICIES.
"""
# Note: the implementation is largely transcribed from
# net/http/http_response_headers.cc, itself following RFC 2616.
if not self.IsCacheable():
return self.FETCH
freshness = self.GetFreshnessLifetimes()
if freshness[0] == 0 and freshness[1] == 0:
return self.VALIDATION_SYNC
age = self._GetCurrentAge(timestamp)
if freshness[0] > age:
return self.VALIDATION_NONE
if (freshness[0] + freshness[1]) > age:
return self.VALIDATION_ASYNC
return self.VALIDATION_SYNC
def GetFreshnessLifetimes(self):
"""Returns [freshness, stale-while-revalidate freshness] in seconds."""
# This is adapted from GetFreshnessLifetimes() in
# //net/http/http_response_headers.cc (which follows the RFC).
r = self.request
result = [0, 0]
if (r.HasResponseHeaderValue('Cache-Control', 'no-cache')
or r.HasResponseHeaderValue('Cache-Control', 'no-store')
or r.HasResponseHeaderValue('Vary', '*')): # RFC 2616, 13.6.
return result
must_revalidate = r.HasResponseHeaderValue(
'Cache-Control', 'must-revalidate')
swr_header = r.GetCacheControlDirective('stale-while-revalidate')
if not must_revalidate and swr_header:
result[1] = _ParseStringToInt(swr_header)
max_age_header = r.GetCacheControlDirective('max-age')
if max_age_header:
result[0] = _ParseStringToInt(max_age_header)
return result
date = self._GetDateValue('Date') or self._response_time
expires = self._GetDateValue('Expires')
if expires:
result[0] = expires - date
return result
if self.request.status in (200, 203, 206) and not must_revalidate:
last_modified = self._GetDateValue('Last-Modified')
if last_modified and last_modified < date:
result[0] = (date - last_modified) / 10
return result
if self.request.status in (300, 301, 308, 410):
return [2**48, 0] # ~forever.
# No header -> not fresh.
return result
def _GetDateValue(self, name):
date_str = self.request.GetHTTPResponseHeader(name)
if not date_str:
return None
parsed_date = email.utils.parsedate_tz(date_str)
if parsed_date is None:
return None
return email.utils.mktime_tz(parsed_date)
def _GetCurrentAge(self, current_time):
# See GetCurrentAge() in //net/http/http_response_headers.cc.
r = self.request
date_value = self._GetDateValue('Date') or self._response_time
age_value = int(r.GetHTTPResponseHeader('Age') or '0')
apparent_age = max(0, self._response_time - date_value)
corrected_received_age = max(apparent_age, age_value)
response_delay = self._response_time - self._request_time
corrected_initial_age = corrected_received_age + response_delay
resident_time = current_time - self._response_time
current_age = corrected_initial_age + resident_time
return current_age
class RequestTrack(devtools_monitor.Track):
"""Aggregates request data."""
_REDIRECT_SUFFIX = '.redirect'
# Request status
_STATUS_SENT = 0
_STATUS_RESPONSE = 1
_STATUS_DATA = 2
_STATUS_FINISHED = 3
_STATUS_FAILED = 4
# Serialization KEYS
_EVENTS_KEY = 'events'
_METADATA_KEY = 'metadata'
_DUPLICATES_KEY = 'duplicates_count'
_INCONSISTENT_INITIATORS_KEY = 'inconsistent_initiators'
def __init__(self, connection):
super(RequestTrack, self).__init__(connection)
self._connection = connection
self._requests = []
self._requests_in_flight = {} # requestId -> (request, status)
self._completed_requests_by_id = {}
self._redirects_count_by_id = collections.defaultdict(int)
self._indexed = False
self._request_start_timestamps = None
self._request_end_timestamps = None
self._requests_by_start = None
self._requests_by_end = None
if connection: # Optional for testing.
for method in RequestTrack._METHOD_TO_HANDLER:
self._connection.RegisterListener(method, self)
# Enable asynchronous callstacks to get full javascript callstacks in
# initiators
self._connection.SetScopedState('Debugger.setAsyncCallStackDepth',
{'maxDepth': 4}, {'maxDepth': 0}, True)
# responseReceived message are sometimes duplicated. Records the message to
# detect this.
self._request_id_to_response_received = {}
self.duplicates_count = 0
self.inconsistent_initiators_count = 0
def Handle(self, method, msg):
assert method in RequestTrack._METHOD_TO_HANDLER
self._indexed = False
params = msg['params']
request_id = params['requestId']
RequestTrack._METHOD_TO_HANDLER[method](self, request_id, params)
def GetEvents(self):
if self._requests_in_flight:
logging.warning('Number of requests still in flight: %d.'
% len(self._requests_in_flight))
return self._requests
def GetFirstResourceRequest(self):
return self.GetEvents()[0]
def GetFirstRequestMillis(self):
"""Find the canonical start time for this track.
Returns:
The millisecond timestamp of the first request.
"""
assert self._requests, "No requests to analyze."
self._IndexRequests()
return self._request_start_timestamps[0]
def GetLastRequestMillis(self):
"""Find the canonical start time for this track.
Returns:
The millisecond timestamp of the first request.
"""
assert self._requests, "No requests to analyze."
self._IndexRequests()
return self._request_end_timestamps[-1]
def GetEventsStartingBetween(self, start_ms, end_ms):
"""Return events that started in a range.
Args:
start_ms: the start time to query, in milliseconds from the first request.
end_ms: the end time to query, in milliseconds from the first request.
Returns:
A list of requests whose start time is in [start_ms, end_ms].
"""
self._IndexRequests()
low = bisect.bisect_left(self._request_start_timestamps, start_ms)
high = bisect.bisect_right(self._request_start_timestamps, end_ms)
return self._requests_by_start[low:high]
def GetEventsEndingBetween(self, start_ms, end_ms):
"""Return events that ended in a range.
Args:
start_ms: the start time to query, in milliseconds from the first request.
end_ms: the end time to query, in milliseconds from the first request.
Returns:
A list of requests whose end time is in [start_ms, end_ms].
"""
self._IndexRequests()
low = bisect.bisect_left(self._request_end_timestamps, start_ms)
high = bisect.bisect_right(self._request_end_timestamps, end_ms)
return self._requests_by_end[low:high]
def ToJsonDict(self):
if self._requests_in_flight:
logging.warning('Requests in flight, will be ignored in the dump')
return {self._EVENTS_KEY: [
request.ToJsonDict() for request in self._requests],
self._METADATA_KEY: {
self._DUPLICATES_KEY: self.duplicates_count,
self._INCONSISTENT_INITIATORS_KEY:
self.inconsistent_initiators_count}}
@classmethod
def FromJsonDict(cls, json_dict):
assert cls._EVENTS_KEY in json_dict
assert cls._METADATA_KEY in json_dict
result = RequestTrack(None)
requests = [Request.FromJsonDict(request)
for request in json_dict[cls._EVENTS_KEY]]
result._requests = requests
metadata = json_dict[cls._METADATA_KEY]
result.duplicates_count = metadata.get(cls._DUPLICATES_KEY, 0)
result.inconsistent_initiators_count = metadata.get(
cls._INCONSISTENT_INITIATORS_KEY, 0)
return result
def _IndexRequests(self):
# TODO(mattcary): if we ever have requests without timing then we either
# need a default, or to make an index that only includes requests with
# timings.
if self._indexed:
return
valid_requests = [r for r in self._requests
if r.start_msec is not None]
self._requests_by_start = sorted(valid_requests,
key=lambda r: r.start_msec)
self._request_start_timestamps = [r.start_msec
for r in self._requests_by_start]
self._requests_by_end = sorted(valid_requests,
key=lambda r: r.end_msec)
self._request_end_timestamps = [r.end_msec
for r in self._requests_by_end]
self._indexed = True
def _RequestWillBeSent(self, request_id, params):
# Several "requestWillBeSent" events can be dispatched in a row in the case
# of redirects.
redirect_initiator = None
if request_id in self._completed_requests_by_id:
assert request_id not in self._requests_in_flight
return
if request_id in self._requests_in_flight:
redirect_initiator = self._HandleRedirect(request_id, params)
assert (request_id not in self._requests_in_flight)
r = Request()
r.request_id = request_id
_CopyFromDictToObject(
params, r, (('frameId', 'frame_id'), ('loaderId', 'loader_id'),
('documentURL', 'document_url'),
('timestamp', 'timestamp'), ('wallTime', 'wall_time'),
('initiator', 'initiator')))
request = params['request']
_CopyFromDictToObject(
request, r, (('url', 'url'), ('method', 'method'),
('headers', 'headers'),
('initialPriority', 'initial_priority')))
r.resource_type = params.get('type', 'Other')
if redirect_initiator:
original_initiator = r.initiator
r.initiator = redirect_initiator
r.initiator[Request.ORIGINAL_INITIATOR] = original_initiator
initiating_request = self._completed_requests_by_id[
redirect_initiator[Request.INITIATING_REQUEST]]
initiating_initiator = initiating_request.initiator.get(
Request.ORIGINAL_INITIATOR, initiating_request.initiator)
if initiating_initiator != original_initiator:
self.inconsistent_initiators_count += 1
self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_SENT)
def _HandleRedirect(self, request_id, params):
(r, status) = self._requests_in_flight[request_id]
assert status == RequestTrack._STATUS_SENT
# The second request contains timing information pertaining to the first
# one. Finalize the first request.
assert 'redirectResponse' in params
redirect_response = params['redirectResponse']
_CopyFromDictToObject(redirect_response, r,
(('headers', 'response_headers'),
('encodedDataLength', 'response_headers_length'),
('fromDiskCache', 'from_disk_cache'),
('protocol', 'protocol'), ('status', 'status'),
('statusText', 'status_text')))
r.timing = Timing.FromDevToolsDict(redirect_response['timing'])
redirect_index = self._redirects_count_by_id[request_id]
self._redirects_count_by_id[request_id] += 1
r.request_id = '%s%s.%d' % (request_id, self._REDIRECT_SUFFIX,
redirect_index + 1)
initiator = {
'type': 'redirect', Request.INITIATING_REQUEST: r.request_id}
self._requests_in_flight[r.request_id] = (r, RequestTrack._STATUS_FINISHED)
del self._requests_in_flight[request_id]
self._FinalizeRequest(r.request_id)
return initiator
def _RequestServedFromCache(self, request_id, _):
if request_id not in self._requests_in_flight:
return
(request, status) = self._requests_in_flight[request_id]
assert status == RequestTrack._STATUS_SENT
request.served_from_cache = True
def _ResponseReceived(self, request_id, params):
if request_id in self._completed_requests_by_id:
assert request_id not in self._requests_in_flight
return
assert request_id in self._requests_in_flight
(r, status) = self._requests_in_flight[request_id]
if status == RequestTrack._STATUS_RESPONSE:
# Duplicated messages (apart from the timestamp) are OK.
old_params = self._request_id_to_response_received[request_id]
params_copy = copy.deepcopy(params)
params_copy['timestamp'] = None
old_params['timestamp'] = None
assert params_copy == old_params
self.duplicates_count += 1
return
assert status == RequestTrack._STATUS_SENT
assert (r.frame_id == params['frameId'] or
params['response']['protocol'] == 'data')
assert r.timestamp <= params['timestamp']
if r.resource_type == 'Other':
r.resource_type = params.get('type', 'Other')
else:
assert r.resource_type == params.get('type', 'Other')
response = params['response']
_CopyFromDictToObject(
response, r, (('status', 'status'), ('mimeType', 'mime_type'),
('fromDiskCache', 'from_disk_cache'),
('fromServiceWorker', 'from_service_worker'),
('protocol', 'protocol'), ('statusText', 'status_text'),
# Actual request headers are not known before reaching the
# network stack.
('requestHeaders', 'request_headers'),
('encodedDataLength', 'response_headers_length'),
('headers', 'response_headers')))
timing_dict = {}
# Some URLs don't have a timing dict (e.g. data URLs), and timings for
# cached requests are stale.
# TODO(droger): the timestamp is inacurate, get the real timings instead.
if not response.get('timing') or r.served_from_cache:
timing_dict = {'requestTime': r.timestamp}
else:
timing_dict = response['timing']
r.timing = Timing.FromDevToolsDict(timing_dict)
self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_RESPONSE)
self._request_id_to_response_received[request_id] = params
def _DataReceived(self, request_id, params):
if request_id not in self._requests_in_flight:
return
(r, status) = self._requests_in_flight[request_id]
assert (status == RequestTrack._STATUS_RESPONSE
or status == RequestTrack._STATUS_DATA)
offset = r._TimestampOffsetFromStartMs(params['timestamp'])
r.data_chunks.append((offset, params['encodedDataLength']))
self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_DATA)
def _LoadingFinished(self, request_id, params):
if request_id not in self._requests_in_flight:
return
(r, status) = self._requests_in_flight[request_id]
assert (status == RequestTrack._STATUS_RESPONSE
or status == RequestTrack._STATUS_DATA)
r.encoded_data_length = params['encodedDataLength']
r.timing.loading_finished = r._TimestampOffsetFromStartMs(
params['timestamp'])
self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_FINISHED)
self._FinalizeRequest(request_id)
def _LoadingFailed(self, request_id, params):
if request_id not in self._requests_in_flight:
logging.warning('An unknown request failed: %s' % request_id)
return
(r, _) = self._requests_in_flight[request_id]
r.failed = True
r.error_text = params['errorText']
self._requests_in_flight[request_id] = (r, RequestTrack._STATUS_FINISHED)
self._FinalizeRequest(request_id)
def _FinalizeRequest(self, request_id):
(request, status) = self._requests_in_flight[request_id]
assert status == RequestTrack._STATUS_FINISHED
del self._requests_in_flight[request_id]
self._completed_requests_by_id[request_id] = request
self._requests.append(request)
def __eq__(self, o):
return self._requests == o._requests
RequestTrack._METHOD_TO_HANDLER = {
'Network.requestWillBeSent': RequestTrack._RequestWillBeSent,
'Network.requestServedFromCache': RequestTrack._RequestServedFromCache,
'Network.responseReceived': RequestTrack._ResponseReceived,
'Network.dataReceived': RequestTrack._DataReceived,
'Network.loadingFinished': RequestTrack._LoadingFinished,
'Network.loadingFailed': RequestTrack._LoadingFailed}
def _CopyFromDictToObject(d, o, key_attrs):
for (key, attr) in key_attrs:
if key in d:
setattr(o, attr, d[key])
if __name__ == '__main__':
import json
import sys
events = json.load(open(sys.argv[1], 'r'))
request_track = RequestTrack(None)
for event in events:
event_method = event['method']
request_track.Handle(event_method, event)