| #!/usr/bin/env python3 |
| # Copyright 2020 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Command-line tool to enumerate cycles in Graph structures.""" |
| |
| import argparse |
| import collections |
| |
| from typing import Dict, List, Tuple |
| |
| import serialization |
| import graph |
| |
| |
| Cycle = Tuple[graph.Node, ...] |
| |
| |
| def find_cycles_from_node( |
| start_node: graph.Node, |
| max_cycle_length: int, |
| node_to_id: Dict[graph.Node, int], |
| ) -> List[List[List[graph.Node]]]: |
| """Finds all cycles starting at |start_node| in a subset of nodes. |
| |
| Only nodes with ID >= |start_node|'s ID will be considered. This ensures |
| uniquely counting all cycles since this function is called on all nodes of |
| the graph, one at a time in increasing order. Some justification: Consider |
| cycle C with smallest node n. When this function is called on node n, C will |
| be found since all nodes of C are >= n. After that call, C will never be |
| found again since further calls are on nodes > n (n is removed from the |
| search space). |
| |
| Cycles are found by recursively scanning all outbound nodes starting from |
| |start_node|, up to a certain depth. Note this is the same idea, but is |
| different from DFS since nodes can be visited more than once (to avoid |
| missing cycles). An example of normal DFS (where nodes can only be visited |
| once) missing cycles is in the following graph, starting at a: |
| a <-> b <-> c |
| ^ ^ |
| | | |
| +-----------+ |
| DFS(a) |
| DFS(b) |
| DFS(a) (cycle aba, return) |
| DFS(c) |
| DFS(b) (already seen, return) |
| DFS(a) (cycle abca, return) |
| DFS(c) (already seen, return) |
| Since DFS(c) cannot proceed, we miss the cycles aca and acba. |
| |
| Args: |
| start_node: The node to start the cycle search from. Only nodes with ID |
| >= |start_node|'s ID will be considered. |
| max_cycle_length: The maximum length of cycles to be found. |
| node_to_id: A map from a Node to a generated ID. |
| |
| Returns: |
| A list |cycles| of length |max_cycle_length| + 1, where cycles[i] |
| contains all relevant cycles of length i. |
| """ |
| start_node_id = node_to_id[start_node] |
| cycles = [[] for _ in range(max_cycle_length + 1)] |
| |
| def edge_is_interesting(start: graph.Node, end: graph.Node) -> bool: |
| if start == end: |
| # Ignore self-loops. |
| return False |
| if node_to_id[end] < start_node_id: |
| # Ignore edges ending at nodes with ID lower than the start. |
| return False |
| return True |
| |
| dfs_stack = collections.deque() |
| on_stack: Dict[graph.Node, bool] = collections.defaultdict(bool) |
| |
| def find_cycles_dfs(cur_node: graph.Node, cur_length: int): |
| for other_node in cur_node.outbound: |
| if edge_is_interesting(cur_node, other_node): |
| if other_node == start_node: |
| # We have found a valid cycle, add it to the list. |
| new_cycle = list(dfs_stack) + [cur_node, start_node] |
| cycles[cur_length + 1].append(new_cycle) |
| |
| elif (not on_stack[other_node] |
| and cur_length + 1 < max_cycle_length): |
| # We are only allowed to recurse into the next node if: |
| # 1) It hasn't been visited in the current cycle. This is |
| # because if the next node n _has_ been visited in the |
| # current cycle (i.e., it's on the stack), then we have |
| # found a cycle starting and ending at n. Since this |
| # function only returns cycles starting at |start_node|, we |
| # only care if |n = start_node| (which we already detect |
| # above). |
| # 2) It would not exceed the maximum depth allowed. |
| dfs_stack.append(cur_node) |
| on_stack[cur_node] = True |
| find_cycles_dfs(other_node, cur_length + 1) |
| dfs_stack.pop() |
| on_stack[cur_node] = False |
| |
| find_cycles_dfs(start_node, 0) |
| return cycles |
| |
| |
| def find_cycles(base_graph: graph.Graph, |
| max_cycle_length: int) -> List[List[Cycle]]: |
| """Finds all cycles in the graph within a certain length. |
| |
| The algorithm is as such: Number the nodes arbitrarily. For i from 0 to |
| the number of nodes, find all cycles starting and ending at node i using |
| only nodes with numbers >= i (see find_cycles_from_node). Taking the union |
| of the results will give all relevant cycles in the graph. |
| |
| Returns: |
| A list |cycles| of length |max_cycle_length| + 1, where cycles[i] |
| contains all cycles of length i. |
| """ |
| sorted_base_graph_nodes = sorted(base_graph.nodes) |
| # Some preliminary setup: map between the graph nodes' unique keys and a |
| # unique number, since the algorithm needs some way to decide when a node is |
| # 'bigger'. Nodes with a lower number will be processed first, which |
| # influences the output cycles. For example, the cycle abca is also valid as |
| # the cycle bcab or cabc. By numbering node a lower than b and c, it is |
| # guaranteed that the cycle will be output as abca. |
| node_to_id = {} |
| for generated_node_id, node in enumerate(sorted_base_graph_nodes): |
| node_to_id[node] = generated_node_id |
| |
| num_nodes = base_graph.num_nodes |
| cycles = [[] for _ in range(max_cycle_length + 1)] |
| |
| for start_node in sorted_base_graph_nodes: |
| start_node_cycles = find_cycles_from_node(start_node, max_cycle_length, |
| node_to_id) |
| for cycle_length, cycle_list in enumerate(start_node_cycles): |
| cycles[cycle_length].extend(cycle_list) |
| |
| # Convert cycles to be tuples of nodes, so the cycles are hashable and |
| # immutable. |
| immutable_cycles = [] |
| for cycle_list in cycles: |
| immutable_cycles.append([tuple(cycle) for cycle in cycle_list]) |
| return immutable_cycles |
| |
| |
| def main(): |
| """Enumerates the cycles within a certain length in a graph.""" |
| |
| arg_parser = argparse.ArgumentParser( |
| description='Given a JSON dependency graph, count the number of cycles ' |
| 'in the package graph.') |
| required_arg_group = arg_parser.add_argument_group('required arguments') |
| required_arg_group.add_argument( |
| '-f', |
| '--file', |
| required=True, |
| help='Path to the JSON file containing the dependency graph. ' |
| 'See the README on how to generate this file.') |
| required_arg_group.add_argument( |
| '-l', |
| '--cycle-length', |
| type=int, |
| required=True, |
| help='The maximum length of cycles to find, at most 5 or 6 to keep the ' |
| 'script runtime low.') |
| arg_parser.add_argument( |
| '-o', |
| '--output', |
| type=argparse.FileType('w'), |
| help='Path to the file to write the list of cycles to.') |
| args = arg_parser.parse_args() |
| |
| _, package_graph, _ = serialization.load_class_and_package_graphs_from_file( |
| args.file) |
| |
| all_cycles = find_cycles(package_graph, args.cycle_length) |
| # There are no cycles of length 0 or 1 (since self-loops are disallowed). |
| nonzero_cycles = all_cycles[2:] |
| |
| print(f'Found {sum(len(cycles) for cycles in nonzero_cycles)} cycles.') |
| |
| for cycle_length, cycles in enumerate(nonzero_cycles, 2): |
| print(f'Found {len(cycles)} cycles of length {cycle_length}.') |
| |
| if args.output is not None: |
| print(f'Dumping cycles to {args.output.name}.') |
| with args.output as output_file: |
| for cycle_length, cycles in enumerate(nonzero_cycles, 2): |
| output_file.write(f'Cycles of length {cycle_length}:\n') |
| cycle_texts = [] |
| for cycle in cycles: |
| cycle_texts.append(' > '.join(cycle_node.name |
| for cycle_node in cycle)) |
| output_file.write('\n'.join(sorted(cycle_texts))) |
| output_file.write('\n') |
| |
| |
| if __name__ == '__main__': |
| main() |