blob: a2c1f1755622139257cc8533c775631e998fbd15 [file] [log] [blame]
# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Request dependency graph."""
import logging
import sys
import common_util
import graph
import request_track
class RequestNode(graph.Node):
def __init__(self, request=None):
super(RequestNode, self).__init__()
self.request = request
self.cost = request.Cost() if request else None # Deserialization.
def ToJsonDict(self):
json_dict = super(RequestNode, self).ToJsonDict()
json_dict.update({'request': self.request.ToJsonDict()})
return json_dict
@classmethod
def FromJsonDict(cls, json_dict):
result = super(RequestNode, cls).FromJsonDict(json_dict)
result.request = request_track.Request.FromJsonDict(json_dict['request'])
return common_util.DeserializeAttributesFromJsonDict(
json_dict, result, ['cost'])
class Edge(graph.Edge):
def __init__(self, from_node, to_node, reason=None):
super(Edge, self).__init__(from_node, to_node)
self.reason = reason
self.cost = None
self.is_timing = None
if from_node is None: # Deserialization.
return
self.reason = reason
self.cost = request_track.TimeBetween(
self.from_node.request, self.to_node.request, self.reason)
self.is_timing = False
def ToJsonDict(self):
result = {}
return common_util.SerializeAttributesToJsonDict(
result, self, ['reason', 'cost', 'is_timing'])
@classmethod
def FromJsonDict(cls, json_dict):
result = cls(None, None, None)
return common_util.DeserializeAttributesFromJsonDict(
json_dict, result, ['reason', 'cost', 'is_timing'])
class RequestDependencyGraph(object):
"""Request dependency graph."""
# This resource type may induce a timing dependency. See _SplitChildrenByTime
# for details.
# TODO(lizeb,mattcary): are these right?
_CAN_BE_TIMING_PARENT = set(['script', 'magic-debug-content'])
_CAN_MAKE_TIMING_DEPENDENCE = set(['json', 'other', 'magic-debug-content'])
def __init__(self, requests, dependencies_lens,
node_class=RequestNode, edge_class=Edge):
"""Creates a request dependency graph.
Args:
requests: ([Request]) a list of requests.
dependencies_lens: (RequestDependencyLens)
node_class: (subclass of RequestNode)
edge_class: (subclass of Edge)
"""
self._requests = None
self._first_request_node = None
self._deps_graph = None
self._nodes_by_id = None
if requests is None: # Deserialization.
return
assert issubclass(node_class, RequestNode)
assert issubclass(edge_class, Edge)
self._requests = requests
deps = dependencies_lens.GetRequestDependencies()
self._nodes_by_id = {r.request_id : node_class(r) for r in self._requests}
edges = []
for (parent_request, child_request, reason) in deps:
if (parent_request.request_id not in self._nodes_by_id
or child_request.request_id not in self._nodes_by_id):
continue
parent_node = self._nodes_by_id[parent_request.request_id]
child_node = self._nodes_by_id[child_request.request_id]
edges.append(edge_class(parent_node, child_node, reason))
self._first_request_node = self._nodes_by_id[self._requests[0].request_id]
self._deps_graph = graph.DirectedGraph(self._nodes_by_id.values(), edges)
self._HandleTimingDependencies()
@property
def graph(self):
"""Return the Graph we're based on."""
return self._deps_graph
def UpdateRequestsCost(self, request_id_to_cost):
"""Updates the cost of the nodes identified by their request ID.
Args:
request_id_to_cost: ({request_id: new_cost}) Can be a superset of the
requests actually present in the graph.
"""
for node in self._deps_graph.Nodes():
request_id = node.request.request_id
if request_id in request_id_to_cost:
node.cost = request_id_to_cost[request_id]
def Cost(self, from_first_request=True, path_list=None, costs_out=None):
"""Returns the cost of the graph, that is the costliest path.
Args:
from_first_request: (boolean) If True, only considers paths that originate
from the first request node.
path_list: (list) See graph.Cost().
costs_out: (list) See graph.Cost().
"""
if from_first_request:
return self._deps_graph.Cost(
[self._first_request_node], path_list, costs_out)
else:
return self._deps_graph.Cost(path_list=path_list, costs_out=costs_out)
def AncestorRequests(self, descendants):
"""Return requests that are ancestors of a set of requests.
Args:
descendants: ([Request]) List of requests.
Returns:
List of Requests that are ancestors of descendants.
"""
return [n.request for n in self.graph.AncestorNodes(
self._nodes_by_id[r.request_id] for r in descendants)]
def _HandleTimingDependencies(self):
try:
for n in self._deps_graph.TopologicalSort():
self._SplitChildrenByTime(n)
except AssertionError as exc:
sys.stderr.write('Bad topological sort: %s\n'
'Skipping child split\n' % str(exc))
def _SplitChildrenByTime(self, parent):
"""Splits children of a node by request times.
The initiator of a request may not be the true dependency of a request. For
example, a script may appear to load several resources independently, but in
fact one of them may be a JSON data file, and the remaining resources assets
described in the JSON. The assets should be dependent upon the JSON data
file, and not the original script.
This function approximates that by rearranging the children of a node
according to their request times. The predecessor of each child is made to
be the node with the greatest finishing time, that is before the start time
of the child.
We do this by sorting the nodes twice, once by start time and once by end
time. We mark the earliest end time, and then we walk the start time list,
advancing the end time mark when it is less than our current start time.
This is refined by only considering assets which we believe actually create
a dependency. We only split if the original parent is a script, and the new
parent a data file.
We incorporate this heuristic by skipping over any non-script/json resources
when moving the end mark.
TODO(mattcary): More heuristics, like incorporating cachability somehow, and
not just picking arbitrarily if there are two nodes with the same end time
(does that ever really happen?)
Args:
parent: (_RequestNode) The children of this node are processed by this
function.
"""
if parent.request.GetContentType() not in self._CAN_BE_TIMING_PARENT:
return
edges = self._deps_graph.OutEdges(parent)
edges_by_start_time = sorted(
edges, key=lambda e: e.to_node.request.start_msec)
edges_by_end_time = sorted(
edges, key=lambda e: e.to_node.request.end_msec)
end_mark = 0
for current in edges_by_start_time:
assert current.from_node is parent
if current.to_node.request.start_msec < parent.request.end_msec - 1e-5:
parent_url = parent.request.url
child_url = current.to_node.request.url
logging.warning('Child loaded before parent finished: %s -> %s',
request_track.ShortName(parent_url),
request_track.ShortName(child_url))
go_to_next_child = False
while end_mark < len(edges_by_end_time):
if edges_by_end_time[end_mark] == current:
go_to_next_child = True
break
elif (edges_by_end_time[end_mark].to_node.request.GetContentType()
not in self._CAN_MAKE_TIMING_DEPENDENCE):
end_mark += 1
elif (end_mark < len(edges_by_end_time) - 1 and
edges_by_end_time[end_mark + 1].to_node.request.end_msec
< current.to_node.request.start_msec):
end_mark += 1
else:
break
if end_mark >= len(edges_by_end_time):
break # It's not possible to rearrange any more children.
if go_to_next_child:
continue # We can't rearrange this child, but the next child may be
# eligible.
if (edges_by_end_time[end_mark].to_node.request.end_msec
<= current.to_node.request.start_msec):
current.is_timing = True
self._deps_graph.UpdateEdge(
current, edges_by_end_time[end_mark].to_node,
current.to_node)
def ToJsonDict(self):
result = {'graph': self.graph.ToJsonDict()}
result['requests'] = [r.ToJsonDict() for r in self._requests]
return result
@classmethod
def FromJsonDict(cls, json_dict, node_class, edge_class):
result = cls(None, None)
graph_dict = json_dict['graph']
g = graph.DirectedGraph.FromJsonDict(graph_dict, node_class, edge_class)
result._requests = [request_track.Request.FromJsonDict(r)
for r in json_dict['requests']]
result._nodes_by_id = {node.request.request_id: node
for node in g.Nodes()}
result._first_request_node = result._nodes_by_id[
result._requests[0].request_id]
result._deps_graph = g
return result