| # Copyright 2016 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Gathers and infers dependencies between requests. |
| |
| When executed as a script, loads a trace and outputs the dependencies. |
| """ |
| |
| import collections |
| import copy |
| import logging |
| import operator |
| |
| import loading_trace |
| import request_track |
| |
| |
| class RequestDependencyLens(object): |
| """Analyses and infers request dependencies.""" |
| DEPENDENCIES = ('redirect', 'parser', 'script', 'inferred', 'other') |
| CALLFRAMES_KEY = 'callFrames' |
| def __init__(self, trace): |
| """Initializes an instance of RequestDependencyLens. |
| |
| Args: |
| trace: (LoadingTrace) Loading trace. |
| """ |
| self.loading_trace = trace |
| self._requests = self.loading_trace.request_track.GetEvents() |
| self._requests_by_id = {r.request_id: r for r in self._requests} |
| self._requests_by_url = collections.defaultdict(list) |
| self._deps = None |
| for request in self._requests: |
| self._requests_by_url[request.url].append(request) |
| self._frame_to_parent = {} |
| for event in self.loading_trace.page_track.GetEvents(): |
| if event['method'] == 'Page.frameAttached': |
| self._frame_to_parent[event['frame_id']] = event['parent_frame_id'] |
| |
| def GetRequestDependencies(self): |
| """Returns a list of request dependencies. |
| |
| Returns: |
| [(first, second, reason), ...] where first and second are instances of |
| request_track.Request, and reason is in DEPENDENCIES. The second request |
| depends on the first one, with the listed reason. |
| """ |
| self._ComputeRequestDependencies() |
| return copy.copy(self._deps) |
| |
| def GetRedirectChain(self, request): |
| """Returns the whole redirect chain for a given request. |
| |
| Note that this misses some JS-based redirects. |
| |
| Returns: |
| A list of request, containing the request passed as a parameter. |
| """ |
| self._ComputeRequestDependencies() |
| chain = [request] |
| while True: |
| for (first_request, second_request, why) in self._deps: |
| if first_request == request and why == 'redirect': |
| chain.append(second_request) |
| request = second_request |
| break |
| else: |
| return chain |
| |
| def _ComputeRequestDependencies(self): |
| if self._deps is not None: |
| return |
| self._deps = [] |
| for request in self._requests: |
| dependency = self._GetDependency(request) |
| if dependency: |
| self._deps.append(dependency) |
| |
| def _GetDependency(self, request): |
| """Returns (first, second, reason), or None. |
| |
| |second| depends on |first|. |
| |
| Args: |
| request: (Request) the request we wish to get the initiator of. |
| |
| Returns: |
| None if no dependency is found from this request, or |
| (initiator (Request), blocked_request (Request), reason (str)). |
| """ |
| reason = request.initiator['type'] |
| assert reason in request_track.Request.INITIATORS |
| if reason == 'redirect': |
| return self._GetInitiatingRequestRedirect(request) |
| elif reason == 'parser': |
| return self._GetInitiatingRequestParser(request) |
| elif reason == 'script': |
| return self._GetInitiatingRequestScript(request) |
| else: |
| assert reason == 'other' |
| return self._GetInitiatingRequestOther(request) |
| |
| def _GetInitiatingRequestRedirect(self, request): |
| assert request_track.Request.INITIATING_REQUEST in request.initiator |
| initiating_request_id = request.initiator[ |
| request_track.Request.INITIATING_REQUEST] |
| assert initiating_request_id in self._requests_by_id |
| return (self._requests_by_id[initiating_request_id], request, 'redirect') |
| |
| def _GetInitiatingRequestParser(self, request): |
| url = request.initiator['url'] |
| candidates = self._FindMatchingRequests(url, request.timing.request_time) |
| if not candidates: |
| return None |
| initiating_request = self._FindBestMatchingInitiator(request, candidates) |
| return (initiating_request, request, 'parser') |
| |
| def _FlattenScriptStack(self, stack): |
| """Recursively collapses the stack of asynchronous callstacks. |
| |
| A stack has a list of call frames and optionnally a "parent" stack. |
| This function recursively folds the parent stacks into the root stack by |
| concatening all the call frames. |
| |
| Args: |
| stack: (dict) the stack that must be flattened |
| |
| Returns: |
| A stack with no parent, which is a dictionary with a single "callFrames" |
| key, and no "parent" key. |
| """ |
| PARENT_KEY = 'parent' |
| if not PARENT_KEY in stack: |
| return stack |
| stack[self.CALLFRAMES_KEY] += stack[PARENT_KEY][self.CALLFRAMES_KEY] |
| if not PARENT_KEY in stack[PARENT_KEY]: |
| stack.pop(PARENT_KEY) |
| else: |
| stack[PARENT_KEY] = stack[PARENT_KEY][PARENT_KEY] |
| return self._FlattenScriptStack(stack) |
| |
| def _GetInitiatingRequestScript(self, request): |
| STACK_KEY = 'stack' |
| if not STACK_KEY in request.initiator: |
| logging.warning('Script initiator but no stack trace.') |
| return None |
| initiating_request = None |
| timestamp = request.timing.request_time |
| # Deep copy the initiator's stack to avoid mutating the input request. |
| stack = self._FlattenScriptStack( |
| copy.deepcopy(request.initiator[STACK_KEY])) |
| call_frames = stack[self.CALLFRAMES_KEY] |
| for frame in call_frames: |
| url = frame['url'] |
| candidates = self._FindMatchingRequests(url, timestamp) |
| if candidates: |
| initiating_request = self._FindBestMatchingInitiator( |
| request, candidates) |
| if initiating_request: |
| break |
| else: |
| for frame in call_frames: |
| if not frame.get('url', None) and frame.get( |
| 'functionName', None) == 'window.onload': |
| logging.warning('Unmatched request for onload handler.') |
| break |
| else: |
| logging.warning('Unmatched request.') |
| return None |
| return (initiating_request, request, 'script') |
| |
| def _GetInitiatingRequestOther(self, _): |
| # TODO(lizeb): Infer "other" initiator types. |
| return None |
| |
| def _FindMatchingRequests(self, url, before_timestamp): |
| """Returns a list of requests matching a URL, before a timestamp. |
| |
| Args: |
| url: (str) URL to match in requests. |
| before_timestamp: (int) Only keep requests submitted before a given |
| timestamp. |
| |
| Returns: |
| A list of candidates, ordered by timestamp. |
| """ |
| candidates = self._requests_by_url.get(url, []) |
| candidates = [r for r in candidates if ( |
| r.timing.request_time + max( |
| 0, r.timing.receive_headers_end / 1000) <= before_timestamp)] |
| candidates.sort(key=lambda r: r.timing.request_time) |
| return candidates |
| |
| def _FindBestMatchingInitiator(self, request, matches): |
| """Returns the best matching request within a list of matches. |
| |
| Iteratively removes candidates until one is left: |
| - With the same parent frame. |
| - From the same frame. |
| |
| If this is not successful, takes the most recent request. |
| |
| Args: |
| request: (Request) Request. |
| matches: [Request] As returned by _FindMatchingRequests(), that is |
| sorted by timestamp. |
| |
| Returns: |
| The best matching initiating request, or None. |
| """ |
| if not matches: |
| return None |
| if len(matches) == 1: |
| return matches[0] |
| # Several matches, try to reduce this number to 1. Otherwise, return the |
| # most recent one. |
| if request.frame_id in self._frame_to_parent: # Main frame has no parent. |
| parent_frame_id = self._frame_to_parent[request.frame_id] |
| same_parent_matches = [ |
| r for r in matches |
| if r.frame_id in self._frame_to_parent and |
| self._frame_to_parent[r.frame_id] == parent_frame_id] |
| if not same_parent_matches: |
| logging.warning('All matches are from non-sibling frames.') |
| return matches[-1] |
| if len(same_parent_matches) == 1: |
| return same_parent_matches[0] |
| same_frame_matches = [r for r in matches if r.frame_id == request.frame_id] |
| if not same_frame_matches: |
| logging.warning('All matches are from non-sibling frames.') |
| return matches[-1] |
| if len(same_frame_matches) == 1: |
| return same_frame_matches[0] |
| else: |
| logging.warning('Several matches') |
| return same_frame_matches[-1] |
| |
| |
| if __name__ == '__main__': |
| import json |
| import sys |
| trace_filename = sys.argv[1] |
| json_dict = json.load(open(trace_filename, 'r')) |
| lens = RequestDependencyLens( |
| loading_trace.LoadingTrace.FromJsonDict(json_dict)) |
| depedencies = lens.GetRequestDependencies() |
| for (first, second, dep_reason) in depedencies: |
| print '%s -> %s\t(%s)' % (first.request_id, second.request_id, dep_reason) |