| # Copyright 2016 The Chromium Authors. All rights reserved. | 
 | # Use of this source code is governed by a BSD-style license that can be | 
 | # found in the LICENSE file. | 
 |  | 
 | """Request dependency graph.""" | 
 |  | 
 | import logging | 
 | import sys | 
 |  | 
 | import common_util | 
 | import graph | 
 | import request_track | 
 |  | 
 |  | 
 | class RequestNode(graph.Node): | 
 |   def __init__(self, request=None): | 
 |     super(RequestNode, self).__init__() | 
 |     self.request = request | 
 |     self.cost = request.Cost() if request else None # Deserialization. | 
 |  | 
 |   def ToJsonDict(self): | 
 |     json_dict = super(RequestNode, self).ToJsonDict() | 
 |     json_dict.update({'request': self.request.ToJsonDict()}) | 
 |     return json_dict | 
 |  | 
 |   @classmethod | 
 |   def FromJsonDict(cls, json_dict): | 
 |     result = super(RequestNode, cls).FromJsonDict(json_dict) | 
 |     result.request = request_track.Request.FromJsonDict(json_dict['request']) | 
 |     return common_util.DeserializeAttributesFromJsonDict( | 
 |         json_dict, result, ['cost']) | 
 |  | 
 |  | 
 | class Edge(graph.Edge): | 
 |   def __init__(self, from_node, to_node, reason=None): | 
 |     super(Edge, self).__init__(from_node, to_node) | 
 |     self.reason = reason | 
 |     self.cost = None | 
 |     self.is_timing = None | 
 |     if from_node is None: # Deserialization. | 
 |       return | 
 |     self.reason = reason | 
 |     self.cost = request_track.TimeBetween( | 
 |         self.from_node.request, self.to_node.request, self.reason) | 
 |     self.is_timing = False | 
 |  | 
 |   def ToJsonDict(self): | 
 |     result = {} | 
 |     return common_util.SerializeAttributesToJsonDict( | 
 |         result, self, ['reason', 'cost', 'is_timing']) | 
 |  | 
 |   @classmethod | 
 |   def FromJsonDict(cls, json_dict): | 
 |     result = cls(None, None, None) | 
 |     return common_util.DeserializeAttributesFromJsonDict( | 
 |         json_dict, result, ['reason', 'cost', 'is_timing']) | 
 |  | 
 |  | 
 | class RequestDependencyGraph(object): | 
 |   """Request dependency graph.""" | 
 |   # This resource type may induce a timing dependency. See _SplitChildrenByTime | 
 |   # for details. | 
 |   # TODO(lizeb,mattcary): are these right? | 
 |   _CAN_BE_TIMING_PARENT = set(['script', 'magic-debug-content']) | 
 |   _CAN_MAKE_TIMING_DEPENDENCE = set(['json', 'other', 'magic-debug-content']) | 
 |  | 
 |   def __init__(self, requests, dependencies_lens, | 
 |                node_class=RequestNode, edge_class=Edge): | 
 |     """Creates a request dependency graph. | 
 |  | 
 |     Args: | 
 |       requests: ([Request]) a list of requests. | 
 |       dependencies_lens: (RequestDependencyLens) | 
 |       node_class: (subclass of RequestNode) | 
 |       edge_class: (subclass of Edge) | 
 |     """ | 
 |     self._requests = None | 
 |     self._first_request_node = None | 
 |     self._deps_graph = None | 
 |     self._nodes_by_id = None | 
 |     if requests is None: # Deserialization. | 
 |       return | 
 |     assert issubclass(node_class, RequestNode) | 
 |     assert issubclass(edge_class, Edge) | 
 |     self._requests = requests | 
 |     deps = dependencies_lens.GetRequestDependencies() | 
 |     self._nodes_by_id = {r.request_id : node_class(r) for r in self._requests} | 
 |     edges = [] | 
 |     for (parent_request, child_request, reason) in deps: | 
 |       if (parent_request.request_id not in self._nodes_by_id | 
 |           or child_request.request_id not in self._nodes_by_id): | 
 |         continue | 
 |       parent_node = self._nodes_by_id[parent_request.request_id] | 
 |       child_node = self._nodes_by_id[child_request.request_id] | 
 |       edges.append(edge_class(parent_node, child_node, reason)) | 
 |     self._first_request_node = self._nodes_by_id[self._requests[0].request_id] | 
 |     self._deps_graph = graph.DirectedGraph(self._nodes_by_id.values(), edges) | 
 |     self._HandleTimingDependencies() | 
 |  | 
 |   @property | 
 |   def graph(self): | 
 |     """Return the Graph we're based on.""" | 
 |     return self._deps_graph | 
 |  | 
 |   def UpdateRequestsCost(self, request_id_to_cost): | 
 |     """Updates the cost of the nodes identified by their request ID. | 
 |  | 
 |     Args: | 
 |       request_id_to_cost: ({request_id: new_cost}) Can be a superset of the | 
 |                           requests actually present in the graph. | 
 |     """ | 
 |     for node in self._deps_graph.Nodes(): | 
 |       request_id = node.request.request_id | 
 |       if request_id in request_id_to_cost: | 
 |         node.cost = request_id_to_cost[request_id] | 
 |  | 
 |   def Cost(self, from_first_request=True, path_list=None, costs_out=None): | 
 |     """Returns the cost of the graph, that is the costliest path. | 
 |  | 
 |     Args: | 
 |       from_first_request: (boolean) If True, only considers paths that originate | 
 |                           from the first request node. | 
 |       path_list: (list) See graph.Cost(). | 
 |       costs_out: (list) See graph.Cost(). | 
 |     """ | 
 |     if from_first_request: | 
 |       return self._deps_graph.Cost( | 
 |           [self._first_request_node], path_list, costs_out) | 
 |     else: | 
 |       return self._deps_graph.Cost(path_list=path_list, costs_out=costs_out) | 
 |  | 
 |   def AncestorRequests(self, descendants): | 
 |     """Return requests that are ancestors of a set of requests. | 
 |  | 
 |     Args: | 
 |       descendants: ([Request]) List of requests. | 
 |  | 
 |     Returns: | 
 |       List of Requests that are ancestors of descendants. | 
 |     """ | 
 |     return [n.request for n in self.graph.AncestorNodes( | 
 |         self._nodes_by_id[r.request_id] for r in descendants)] | 
 |  | 
 |   def _HandleTimingDependencies(self): | 
 |     try: | 
 |       for n in self._deps_graph.TopologicalSort(): | 
 |         self._SplitChildrenByTime(n) | 
 |     except AssertionError as exc: | 
 |       sys.stderr.write('Bad topological sort: %s\n' | 
 |                        'Skipping child split\n' % str(exc)) | 
 |  | 
 |   def _SplitChildrenByTime(self, parent): | 
 |     """Splits children of a node by request times. | 
 |  | 
 |     The initiator of a request may not be the true dependency of a request. For | 
 |     example, a script may appear to load several resources independently, but in | 
 |     fact one of them may be a JSON data file, and the remaining resources assets | 
 |     described in the JSON. The assets should be dependent upon the JSON data | 
 |     file, and not the original script. | 
 |  | 
 |     This function approximates that by rearranging the children of a node | 
 |     according to their request times. The predecessor of each child is made to | 
 |     be the node with the greatest finishing time, that is before the start time | 
 |     of the child. | 
 |  | 
 |     We do this by sorting the nodes twice, once by start time and once by end | 
 |     time. We mark the earliest end time, and then we walk the start time list, | 
 |     advancing the end time mark when it is less than our current start time. | 
 |  | 
 |     This is refined by only considering assets which we believe actually create | 
 |     a dependency. We only split if the original parent is a script, and the new | 
 |     parent a data file. | 
 |     We incorporate this heuristic by skipping over any non-script/json resources | 
 |     when moving the end mark. | 
 |  | 
 |     TODO(mattcary): More heuristics, like incorporating cachability somehow, and | 
 |     not just picking arbitrarily if there are two nodes with the same end time | 
 |     (does that ever really happen?) | 
 |  | 
 |     Args: | 
 |       parent: (_RequestNode) The children of this node are processed by this | 
 |               function. | 
 |     """ | 
 |     if parent.request.GetContentType() not in self._CAN_BE_TIMING_PARENT: | 
 |       return | 
 |     edges = self._deps_graph.OutEdges(parent) | 
 |     edges_by_start_time = sorted( | 
 |         edges, key=lambda e: e.to_node.request.start_msec) | 
 |     edges_by_end_time = sorted( | 
 |         edges, key=lambda e: e.to_node.request.end_msec) | 
 |     end_mark = 0 | 
 |     for current in edges_by_start_time: | 
 |       assert current.from_node is parent | 
 |       if current.to_node.request.start_msec < parent.request.end_msec - 1e-5: | 
 |         parent_url = parent.request.url | 
 |         child_url = current.to_node.request.url | 
 |         logging.warning('Child loaded before parent finished: %s -> %s', | 
 |                         request_track.ShortName(parent_url), | 
 |                         request_track.ShortName(child_url)) | 
 |       go_to_next_child = False | 
 |       while end_mark < len(edges_by_end_time): | 
 |         if edges_by_end_time[end_mark] == current: | 
 |           go_to_next_child = True | 
 |           break | 
 |         elif (edges_by_end_time[end_mark].to_node.request.GetContentType() | 
 |               not in self._CAN_MAKE_TIMING_DEPENDENCE): | 
 |           end_mark += 1 | 
 |         elif (end_mark < len(edges_by_end_time) - 1 and | 
 |               edges_by_end_time[end_mark + 1].to_node.request.end_msec | 
 |               < current.to_node.request.start_msec): | 
 |           end_mark += 1 | 
 |         else: | 
 |           break | 
 |       if end_mark >= len(edges_by_end_time): | 
 |         break  # It's not possible to rearrange any more children. | 
 |       if go_to_next_child: | 
 |         continue  # We can't rearrange this child, but the next child may be | 
 |                   # eligible. | 
 |       if (edges_by_end_time[end_mark].to_node.request.end_msec | 
 |           <= current.to_node.request.start_msec): | 
 |         current.is_timing = True | 
 |         self._deps_graph.UpdateEdge( | 
 |             current, edges_by_end_time[end_mark].to_node, | 
 |             current.to_node) | 
 |  | 
 |   def ToJsonDict(self): | 
 |     result = {'graph': self.graph.ToJsonDict()} | 
 |     result['requests'] = [r.ToJsonDict() for r in self._requests] | 
 |     return result | 
 |  | 
 |   @classmethod | 
 |   def FromJsonDict(cls, json_dict, node_class, edge_class): | 
 |     result = cls(None, None) | 
 |     graph_dict = json_dict['graph'] | 
 |     g = graph.DirectedGraph.FromJsonDict(graph_dict, node_class, edge_class) | 
 |     result._requests = [request_track.Request.FromJsonDict(r) | 
 |                         for r in json_dict['requests']] | 
 |     result._nodes_by_id = {node.request.request_id: node | 
 |                            for node in g.Nodes()} | 
 |     result._first_request_node = result._nodes_by_id[ | 
 |         result._requests[0].request_id] | 
 |     result._deps_graph = g | 
 |     return result |