| # Copyright 2015 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Parses a JSON request log created by log_requests.py.""" |
| |
| import collections |
| import json |
| import operator |
| import urlparse |
| |
| Timing = collections.namedtuple( |
| 'Timing', |
| ['connectEnd', 'connectStart', 'dnsEnd', 'dnsStart', 'proxyEnd', |
| 'proxyStart', 'receiveHeadersEnd', 'requestTime', 'sendEnd', 'sendStart', |
| 'sslEnd', 'sslStart', 'workerReady', 'workerStart', 'loadingFinished']) |
| |
| |
| class Resource(object): |
| """Describes a resource.""" |
| |
| def __init__(self, url, content_type): |
| """Creates an instance of Resource. |
| |
| Args: |
| url: URL of the resource |
| content_type: Content-Type of the resources. |
| """ |
| self.url = url |
| self.content_type = content_type |
| |
| def GetShortName(self): |
| """Returns either the hostname of the resource, or the filename, |
| or the end of the path. Tries to include the domain as much as possible. |
| """ |
| parsed = urlparse.urlparse(self.url) |
| path = parsed.path |
| if path != '' and path != '/': |
| last_path = parsed.path.split('/')[-1] |
| if len(last_path) < 10: |
| if len(path) < 10: |
| return parsed.hostname + '/' + path |
| else: |
| return parsed.hostname + '/..' + parsed.path[-10:] |
| elif len(last_path) > 10: |
| return parsed.hostname + '/..' + last_path[:5] |
| else: |
| return parsed.hostname + '/..' + last_path |
| else: |
| return parsed.hostname |
| |
| def GetContentType(self): |
| mime = self.content_type |
| if 'magic-debug-content' in mime: |
| # A silly hack to make the unittesting easier. |
| return 'magic-debug-content' |
| elif mime == 'text/html': |
| return 'html' |
| elif mime == 'text/css': |
| return 'css' |
| elif mime in ('application/x-javascript', 'text/javascript', |
| 'application/javascript'): |
| return 'script' |
| elif mime == 'application/json': |
| return 'json' |
| elif mime == 'image/gif': |
| return 'gif_image' |
| elif mime.startswith('image/'): |
| return 'image' |
| else: |
| return 'other' |
| |
| @classmethod |
| def FromRequest(cls, request): |
| """Creates a Resource from an instance of RequestData.""" |
| return Resource(request.url, request.GetContentType()) |
| |
| def __Fields(self): |
| return (self.url, self.content_type) |
| |
| def __eq__(self, o): |
| return self.__Fields() == o.__Fields() |
| |
| def __hash__(self): |
| return hash(self.__Fields()) |
| |
| |
| class RequestData(object): |
| """Represents a request, as dumped by log_requests.py.""" |
| |
| def __init__(self, status, headers, request_headers, timestamp, timing, url, |
| served_from_cache, initiator): |
| self.status = status |
| self.headers = headers |
| self.request_headers = request_headers |
| self.timestamp = timestamp |
| self.timing = Timing(**timing) if timing else None |
| self.url = url |
| self.served_from_cache = served_from_cache |
| self.initiator = initiator |
| |
| def IsDataUrl(self): |
| return self.url.startswith('data:') |
| |
| def GetContentType(self): |
| content_type = self.headers['Content-Type'] |
| if ';' in content_type: |
| return content_type[:content_type.index(';')] |
| else: |
| return content_type |
| |
| @classmethod |
| def FromDict(cls, r): |
| """Creates a RequestData object from a dict.""" |
| return RequestData(r['status'], r['headers'], r['request_headers'], |
| r['timestamp'], r['timing'], r['url'], |
| r['served_from_cache'], r['initiator']) |
| |
| |
| def ParseJsonFile(filename): |
| """Converts a JSON file to a sequence of RequestData.""" |
| with open(filename) as f: |
| json_data = json.load(f) |
| return [RequestData.FromDict(r) for r in json_data] |
| |
| |
| def FilterRequests(requests): |
| """Filters a list of requests. |
| |
| Args: |
| requests: [RequestData, ...] |
| |
| Returns: |
| A list of requests that are not data URL, have a Content-Type, and are |
| not served from the cache. |
| """ |
| return [r for r in requests if not r.IsDataUrl() |
| and 'Content-Type' in r.headers and not r.served_from_cache] |
| |
| |
| def ResourceToRequestMap(requests): |
| """Returns a Resource -> Request map. |
| |
| A resource can be requested several times in a single page load. Keeps the |
| first request in this case. |
| |
| Args: |
| requests: [RequestData, ...] |
| |
| Returns: |
| [Resource, ...] |
| """ |
| # reversed(requests) because we want the first one to win. |
| return dict([(Resource.FromRequest(r), r) for r in reversed(requests)]) |
| |
| |
| def GetResources(requests): |
| """Returns an ordered list of resources from a list of requests. |
| |
| The same resource can be requested several time for a single page load. This |
| keeps only the first request. |
| |
| Args: |
| requests: [RequestData] |
| |
| Returns: |
| [Resource] |
| """ |
| resources = [] |
| known_resources = set() |
| for r in requests: |
| resource = Resource.FromRequest(r) |
| if r in known_resources: |
| continue |
| known_resources.add(resource) |
| resources.append(resource) |
| return resources |
| |
| |
| def ParseCacheControl(headers): |
| """Parses the "Cache-Control" header and returns a dict representing it. |
| |
| Args: |
| headers: (dict) Response headers. |
| |
| Returns: |
| {Directive: Value, ...} |
| """ |
| # TODO(lizeb): Handle the "Expires" header as well. |
| result = {} |
| cache_control = headers.get('Cache-Control', None) |
| if cache_control is None: |
| return result |
| directives = [s.strip() for s in cache_control.split(',')] |
| for directive in directives: |
| parts = [s.strip() for s in directive.split('=')] |
| if len(parts) == 1: |
| result[parts[0]] = True |
| else: |
| result[parts[0]] = parts[1] |
| return result |
| |
| |
| def MaxAge(request): |
| """Returns the max-age of a resource, or -1.""" |
| cache_control = ParseCacheControl(request.headers) |
| if (u'no-store' in cache_control |
| or u'no-cache' in cache_control |
| or len(cache_control) == 0): |
| return -1 |
| if 'max-age' in cache_control: |
| return int(cache_control['max-age']) |
| return -1 |
| |
| |
| def SortedByCompletion(requests): |
| """Returns the requests, sorted by completion time.""" |
| return sorted(requests, key=operator.attrgetter('timestamp')) |