tools/clang/spanify/extract_edits.py - chromium/src - Git at Google

 #!/usr/bin/env vpython3
 # Copyright 2024 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Script to extract edits from clang spanification tool output.

 The edits have the following format:
 ```
   e lhs_node1 rhs_node2           # Edge from node 1 to node 2.
   e lhs_node2 rhs_node3           # Edge from node 2 to node 3.
   e lhs_node3 rhs_node4           # Edge from node 3 to node 4.
   ...
   s node_1                        # Source node of the graph that triggers a
                                   # rewrite (i.e. buffer usage)
   ...
   i node_4                        # Sink node. A rewrite from a source
                                   # requires the ultimate end nodes to be
                                   # sink. They represent nodes we know can
                                   # be rewrite because the buffer's size
                                   # is known.
   ...
   f lhs_node rhs_node replacement # Span frontier replacement applied if
                                   # lhs_node is not rewritten but rhs_node
                                   # is.
   ...
   r node_1 replacement            # A replacement associated with a node.
 ```

 Where all the `*node*` are abstract ID that represents a node in the graph.

 Real example:
 ```
   s 0008244:DBKYJas7
   s 0008303:GWkNbhQ4
   e 0001450:8-AxbSn3 0008303:GWkNbhQ4
   e 0001518:BUQKDaXe 0008244:DBKYJas7
   e 0001518:L97i_bwg 0008303:GWkNbhQ4
   f 0001450:8-AxbSn3 0008303:GWkNbhQ4 r:::../../base/memory/shared_memory_mapping.h:::8684:::0:::0:::.data()
   f 0001518:BUQKDaXe 0008244:DBKYJas7 r:::../../base/memory/shared_memory_mapping.h:::8535:::15:::0:::(data() + size()).data()
   f 0001518:L97i_bwg 0008303:GWkNbhQ4 r:::../../base/memory/shared_memory_mapping.h:::8686:::15:::0:::(data() + size()).data()
   r 0001518:BUQKDaXe include-user-header:::../../base/containers/checked_iterators.h:::-1:::-1:::base/containers/span.h
   r 0001518:BUQKDaXe r:::../../base/containers/checked_iterators.h:::1518:::9:::0:::base::span<const unsigned char>
   r 0001946:gKWdIpwv r:::../../base/containers/checked_iterators.h:::1946:::9:::0:::base::span<const unsigned char>
 ```

 **Important Note on "r:::" Replacement Directive:**
 The `replacement_directive` strings starting with `r:::` (which can appear in
 `r` lines or as the third argument in `f` lines) have been extended and are
 slightly different from the format accepted by apply_edits.py. There is an
 additional `precedence` field before the replacement text.

 This `<precedence>` value is used by `extract_edits.py` to merge conflicting
 insertions. If multiple `r` directives are insertions (i.e., `<length>` is
 "0") and target the exact same file and offset:
     Their `<text>` components are merged into a single replacement.
     Directives with a lower numerical `<precedence>` value have their text
     inserted *earlier* (further to the left) in the final merged text.

 The `<precedence>` field is **removed** by `extract_edits.py` from all `r`
 directives before they are included in the final list of edits output by this
 script.

 extract_edits.py takes input that is concatenated from multiple tool
 invocations and extract just the edits with the following steps:
 1- Construct the adjacency list of nodes
    (a pairs of nodes represents an edge in the directed graph)

 2- Determine whether size info is available for a given source node.

 3- Run `DFS` starting from source nodes whose size info is available and emit
    edits for reachable nodes.

 4- Adapt dereference expressions and add data changes where necessary.

 extract_edits.py would then emit the following output:
     <edit1>
     <edit2>
     <edit3>
     ...
 Where the edit is either a replacement or an include directive.

 For more details about how the tool works, see the doc here:
 https://docs.google.com/document/d/1hUPe21CDdbT6_YFHl03KWlcZqhNIPBAfC-5N5DDY2OE/
 """

 import sys
 import urllib.parse

 from os.path import expanduser
 import pprint
 from collections import defaultdict


 # The connected components in the graph. This is useful to split the rewrite
 # into atomic changes.
 class Component:
     all = set()

     def __init__(self) -> None:
         # Changes associated with the connected component.
         self.changes = set()

         # Frontier changes are either accepted or rejected. The two dictionaries
         # are used to detect conflicts in the frontier changes. This might
         # happen in rare cases where C++ macros are used. In case of conflict,
         # the whole component is discarded.
         self.frontier_changes_accepted = set()
         self.frontier_changes_rejected = set()

         # `Component.all` can be used to iterate over all components.
         Component.all.add(self)

 class Node:
     # Mapping in between the node's key and the node.
     key_to_node = dict()

     def __init__(self, key) -> None:
         self.key = key
         self.replacements = set()

         # Neighbors of the node in the graph. The graph is directed,
         # flowing from lhs to rhs.
         self.neighbors_directed = set()
         self.neighbors_undirected = set()

         # Property to track whether the node is "connected" to a source node.
         # This is set from DFS(...)
         self.visited = False

         # The size info is available for a node if all the paths through the
         # graph are leading to a sink. This is initially set to None, and then
         # set by ComputeSizeInfoAvailable(...).
         self.size_info_available = None
         # Track whether this node is currently on the stack of the
         # `ComputeSizeInfoAvailable` recursive function. This is used to detect
         # cycles in the graph.
         self.size_info_visiting = False

         # Identify the connected component this node belongs to. This is set in
         # the main function.
         self.component = None

     def add_replacement(self, replacement: str):
         assert_valid_replacement(replacement)
         self.replacements.add(replacement)

     # Static method to get a node from a replacement key.
     @classmethod
     def from_key(cls: type, key: str):
         # Deduplicate nodes, as they will appear multiple times in the input.
         node = Node.key_to_node.get(key)
         if node is not None:
             return node

         node = Node(key)
         Node.key_to_node[key] = node
         return node

     def __repr__(self) -> str:
         result = [
             f"Node {hash(self)} {{",
             f"  key: {self.key}",
             f"  size_info_available: {self.size_info_available}",
             f"  neighbors_directed: {pprint.pformat([hash(n) for n in self.neighbors_directed], indent=4)}",
             "}",
         ]
         return "\n".join(result)

     # This is not parsable by from_key but is useful for debugging the
     # graph of nodes.
     def to_debug_string(self) -> str:
         return repr(self)

     # Static method to get all nodes.
     @classmethod
     def all(cls: type):
         return cls.key_to_node.values()


 def DFS(node: Node):
     """
     Explore the graph in depth-first search from the given node. Identify edits
     to apply.

     Args:
         node: The current node being processed.
     """
     # Only visit nodes once:
     if (node.visited):
         return
     node.visited = True

     for replacement in node.replacements:
         node.component.changes.add(replacement)

     for neighbour in node.neighbors_directed:
         DFS(neighbour)


 def ComputeSizeInfoAvailable(node: Node):
     """
     Determines whether size information is available for a source node and its
     neighbors_directed. Updates the node's size_info_available attribute.

     Args:
         node: The current node's being processed.
     """

     # Memoization: node.size_info_available has already been computed. Return.
     if node.size_info_available:
         return

     # If there are no dependencies, the size info is definitely not available
     # for this node.
     if not node.neighbors_directed:
         node.size_info_available = False
         return

     # Cycle: If the node is currently being visited, it means it depends on
     # itself, and there's a cycle. We can't determine the size info for this
     # node with the current implementation.
     if node.size_info_visiting:
         return

     # The size info is available for a node if all the paths through the graph
     # are leading to a sink. Locally, it means all the dependencies have their
     # size info available.
     node.size_info_visiting = True
     for neighbour in node.neighbors_directed:
         ComputeSizeInfoAvailable(neighbour)
     node.size_info_visiting = False

     # This node can be rewritten if all of its dependencies can.
     # Dependencies with `size_info_available == None` are nodes that are part
     # of an isolated cycle. Isolated cycle are rewritten.
     node.size_info_available = not any(
         neighbour.size_info_available == False
         for neighbour in node.neighbors_directed)

 # Assert a replacement follows the expected format:
 # - r:::<file path>:::<offset>:::<length>:::<replacement text>
 # - include-user-header:::<file path>:::-1:::-1:::<include text>
 # - include-system-header:::<file path>:::-1:::-1:::<include text>
 def assert_valid_replacement(replacement: str):
     try:
         parts = replacement.split(':::')
         directive_type = parts[0]

         assert directive_type in [
             'r', 'include-user-header', 'include-system-header'
         ], f"Unknown directive type '{directive_type}'"

         assert len(parts) > 1
         assert parts[1] != '', "File path must not be empty."

         if directive_type == 'r':
             assert len(
                 parts
             ) == 6, f"Directive 'r' must have 6 parts, got {len(parts)}"

             # Validate offset.
             assert parts[2].isdigit()

             # Validate length.
             assert parts[3].isdigit()

             # Validate precedence. Can be a negative or positive integer.
             try:
                 int(parts[4])  # Check if it's a valid integer representation
             except ValueError:
                 raise AssertionError(
                     f"Precedence '{parts[4]}' must be a valid integer string.")
         else:
             assert len(parts) == 5
     except:
         # Augment the error with the replacement text for better debugging.
         assert False, f"Invalid replacement: \"{replacement}\""


 def merge_insertions_and_remove_precedence_field(changes: set) -> set:
     """
     Merges conflicting insertions at the same code location.

     The merge order is determined as follows:
     Handles "associativity" by grouping insertions based on precedence sign.
         The final text is formed by concatenating all positive-precedence
         insertions (i.e., "closing" parts), then zero-precedence, then all
         negative-precedence insertions ("opening" parts). This ensures that
         a closing bracket from a left expression is placed before an opening
         bracket from a right expression (e.g., `...>[...`).
     Also removes the precedence field from the final replacement directives.
     """
     replacements_by_range = defaultdict(list)
     result = set()

     for change in changes:
         assert_valid_replacement(change)
         parts = change.split(':::')
         directive_type = parts[0]

         if directive_type == 'r':
             _, file_path, offset, length, precedence, text = parts
             precedence = int(precedence)
             # Key identifies the exact code range being replaced
             key = (file_path, offset, length)
             replacements_by_range[key].append((precedence, text))
         else:
             result.add(change)

     for key, candidates in replacements_by_range.items():
         assert candidates, "A key should always have at least one candidate."

         file_path, offset, length = key

         if len(candidates) == 1:
             # No conflict.
             _, text = candidates[0]
             reconstructed_directive = f"r:::{file_path}:::{offset}:::{length}:::{text}"
             result.add(reconstructed_directive)
             continue

         if int(length) == 0:
             # Conflicting insertion detected.

             # Assert uniqueness of precedence values to ensure determinism.
             precedences = [p for p, _ in candidates]
             assert len(precedences) == len(
                 set(precedences)
             ), "Conflicting insertions need to have unique precedece values."

             merged_texts = [t for _, t in sorted(candidates)]
             reconstructed_directive = f"r:::{file_path}:::{offset}:::{length}:::{''.join(merged_texts)}"
             result.add(reconstructed_directive)
         else:
             # Conflicting non-insertion replacement. This is an unresolvable
             # conflict for now. Just remove the precedence field.
             for _, text in candidates:
                 reconstructed_directive = f"r:::{file_path}:::{offset}:::{length}:::{text}"
                 result.add(reconstructed_directive)

     return result


 def main():
     # Since the tool is invoked from multiple compile units, we are using sets
     # to deduplicate what was visible from multiple compile units.

     # A set of source nodes that trigger the rewrite.
     sources = set()

     # A set of sink nodes. A rewrite from a source requires all the end nodes
     # to be sink. They represent nodes where the rewrite could be applied,
     # because the size info is available.
     sinks = set()

     # Change to apply at the edge in between rewritten and non-rewritten nodes.
     frontiers = set()

     # Collect from every compile units the nodes and edges of the graph:
     for line in sys.stdin:
         line = line.rstrip('\n\r')

         # The first character of the line denotes the type of the line:
         # - 'r': Replacement associated with a node.
         # - 'e': Edge in between two nodes.
         # - 's': Source node of the graph triggering the rewrite.
         # - 'f': Span frontier change.
         # - 'i': Sink node. A rewrite from a source requires the ultimate end
         #        nodes to be sink. They represent nodes we know can be rewrite
         #        because the buffer's size is known.
         assert line[0] in ['r', 'e', 's', 'i', 'f'], "Unknown line type: " +\
                line[0] + " in line: " + line

         # Replacement associated with a node:
         if line[0] == 'r':
             (_, key, replacement) = line.split(' ', 2)
             Node.from_key(key).add_replacement(replacement)
             continue

         # Sink node:
         if line[0] == 'i':
             (_, key) = line.split(' ')
             sinks.add(key)
             continue

         # Source node:
         if line[0] == 's':
             (_, key) = line.split(' ')
             sources.add(key)
             continue

         # Edge in between two nodes:
         if line[0] == 'e':
             (_, lhs_key, rhs_key) = line.split(' ')
             lhs = Node.from_key(lhs_key)
             rhs = Node.from_key(rhs_key)

             # Directed edge:
             lhs.neighbors_directed.add(rhs)

             # Undirected edge:
             lhs.neighbors_undirected.add(rhs)
             rhs.neighbors_undirected.add(lhs)
             continue

         # Span frontier change:
         if line[0] == 'f':
             frontiers.add(line)
             continue

         assert False, "Unreachable code"

     # Mark the sink nodes as rewritable.
     for sink in sinks:
         Node.from_key(sink).size_info_available = True

     # Mark the source nodes:
     source_nodes = []
     for source in sources:
         source_node = Node.from_key(source)
         source_nodes.append(source_node)

         # Determine whether size information is available from this source.
         ComputeSizeInfoAvailable(source_node)

     # Identify all the connected components in the undirected graph. This is
     # exploring the graph in depth-first search and assigning the same component
     # to each node in the connected component.
     for node in Node.all():
         if node.component is not None:
             continue
         new_component = Component()
         stack = [node]
         while stack:
             current = stack.pop()
             if current.component is not None:
                 continue
             current.component = new_component
             for neighbor in current.neighbors_undirected:
                 stack.append(neighbor)

     # Collect the changes to apply. Starting from sources nodes whose size info
     # could be determined.
     for node in source_nodes:
         # Collect the changes to apply. We start from sources nodes whose size
         # info is available and explore the graph in depth-first search.
         if node.size_info_available:
             DFS(node)

     # At the edge in between rewritten and non-rewritten nodes, we need
     # to add a call to `.data()` to access the pointer from the span:
     for frontier in frontiers:
         (_, lhs_key, rhs_key, replacement) = frontier.split(' ', 3)
         lhs_node = Node.from_key(lhs_key)
         rhs_node = Node.from_key(rhs_key)

         apply_frontier = rhs_node.visited and not lhs_node.visited
         if apply_frontier:
             lhs_node.component.frontier_changes_accepted.add(replacement)
         else:
             lhs_node.component.frontier_changes_rejected.add(replacement)

     # Do or do not, there is no try. Discard components with conflicting
     # frontier changes. This happens in rare cases where C++ macros are used.
     # The whole component is discarded in case of conflict, because we can't
     # satisfy the constraints.
     for component in Component.all:
         if component.frontier_changes_accepted & component.frontier_changes_rejected:
             component.changes.clear()
             continue;

         component.changes |= component.frontier_changes_accepted

     # Emit the changes:
     # - ~/scratch/patches.txt: A summary of each atomic change.
     # - ~/scratch/patch_<patch_index>: Write each atomic change.
     # - stdout: Print a bundle of all the changes. This is usually piped to
     #           "./tools/clang/scripts/apply_edits.py" to apply the changes.

     summary_filename = expanduser('~/scratch/patches.txt')
     summary_file = open(summary_filename, 'w')

     component_with_changes = [
         component for component in Component.all if len(component.changes) > 0
     ]

     for index, component in enumerate(component_with_changes):
         merged_component_changes = merge_insertions_and_remove_precedence_field(
             component.changes)

         for text in merged_component_changes:
             print(text)

         summary_file.write(f'patch_{index}: {len(merged_component_changes)}\n')

         with open(expanduser(f'~/scratch/patch_{index}.txt'), 'w') as f:
             f.write('\n'.join(merged_component_changes))

     summary_file.close()

     return 0


 if __name__ == '__main__':
     sys.exit(main())
	#!/usr/bin/env vpython3
	# Copyright 2024 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Script to extract edits from clang spanification tool output.

	The edits have the following format:
	```
	e lhs_node1 rhs_node2 # Edge from node 1 to node 2.
	e lhs_node2 rhs_node3 # Edge from node 2 to node 3.
	e lhs_node3 rhs_node4 # Edge from node 3 to node 4.
	...
	s node_1 # Source node of the graph that triggers a
	# rewrite (i.e. buffer usage)
	...
	i node_4 # Sink node. A rewrite from a source
	# requires the ultimate end nodes to be
	# sink. They represent nodes we know can
	# be rewrite because the buffer's size
	# is known.
	...
	f lhs_node rhs_node replacement # Span frontier replacement applied if
	# lhs_node is not rewritten but rhs_node
	# is.
	...
	r node_1 replacement # A replacement associated with a node.
	```

	Where all the `node` are abstract ID that represents a node in the graph.

	Real example:
	```
	s 0008244:DBKYJas7
	s 0008303:GWkNbhQ4
	e 0001450:8-AxbSn3 0008303:GWkNbhQ4
	e 0001518:BUQKDaXe 0008244:DBKYJas7
	e 0001518:L97i_bwg 0008303:GWkNbhQ4
	f 0001450:8-AxbSn3 0008303:GWkNbhQ4 r:::../../base/memory/shared_memory_mapping.h:::8684:::0:::0:::.data()
	f 0001518:BUQKDaXe 0008244:DBKYJas7 r:::../../base/memory/shared_memory_mapping.h:::8535:::15:::0:::(data() + size()).data()
	f 0001518:L97i_bwg 0008303:GWkNbhQ4 r:::../../base/memory/shared_memory_mapping.h:::8686:::15:::0:::(data() + size()).data()
	r 0001518:BUQKDaXe include-user-header:::../../base/containers/checked_iterators.h:::-1:::-1:::base/containers/span.h
	r 0001518:BUQKDaXe r:::../../base/containers/checked_iterators.h:::1518:::9:::0:::base::span<const unsigned char>
	r 0001946:gKWdIpwv r:::../../base/containers/checked_iterators.h:::1946:::9:::0:::base::span<const unsigned char>
	```

	Important Note on "r:::" Replacement Directive:
	The `replacement_directive` strings starting with `r:::` (which can appear in
	`r` lines or as the third argument in `f` lines) have been extended and are
	slightly different from the format accepted by apply_edits.py. There is an
	additional `precedence` field before the replacement text.

	This `<precedence>` value is used by `extract_edits.py` to merge conflicting
	insertions. If multiple `r` directives are insertions (i.e., `<length>` is
	"0") and target the exact same file and offset:
	Their `<text>` components are merged into a single replacement.
	Directives with a lower numerical `<precedence>` value have their text
	inserted earlier (further to the left) in the final merged text.

	The `<precedence>` field is removed by `extract_edits.py` from all `r`
	directives before they are included in the final list of edits output by this
	script.

	extract_edits.py takes input that is concatenated from multiple tool
	invocations and extract just the edits with the following steps:
	1- Construct the adjacency list of nodes
	(a pairs of nodes represents an edge in the directed graph)

	2- Determine whether size info is available for a given source node.

	3- Run `DFS` starting from source nodes whose size info is available and emit
	edits for reachable nodes.

	4- Adapt dereference expressions and add data changes where necessary.

	extract_edits.py would then emit the following output:
	<edit1>
	<edit2>
	<edit3>
	...
	Where the edit is either a replacement or an include directive.

	For more details about how the tool works, see the doc here:
	https://docs.google.com/document/d/1hUPe21CDdbT6_YFHl03KWlcZqhNIPBAfC-5N5DDY2OE/
	"""

	import sys
	import urllib.parse

	from os.path import expanduser
	import pprint
	from collections import defaultdict


	# The connected components in the graph. This is useful to split the rewrite
	# into atomic changes.
	class Component:
	all = set()

	def __init__(self) -> None:
	# Changes associated with the connected component.
	self.changes = set()

	# Frontier changes are either accepted or rejected. The two dictionaries
	# are used to detect conflicts in the frontier changes. This might
	# happen in rare cases where C++ macros are used. In case of conflict,
	# the whole component is discarded.
	self.frontier_changes_accepted = set()
	self.frontier_changes_rejected = set()

	# `Component.all` can be used to iterate over all components.
	Component.all.add(self)

	class Node:
	# Mapping in between the node's key and the node.
	key_to_node = dict()

	def __init__(self, key) -> None:
	self.key = key
	self.replacements = set()

	# Neighbors of the node in the graph. The graph is directed,
	# flowing from lhs to rhs.
	self.neighbors_directed = set()
	self.neighbors_undirected = set()

	# Property to track whether the node is "connected" to a source node.
	# This is set from DFS(...)
	self.visited = False

	# The size info is available for a node if all the paths through the
	# graph are leading to a sink. This is initially set to None, and then
	# set by ComputeSizeInfoAvailable(...).
	self.size_info_available = None
	# Track whether this node is currently on the stack of the
	# `ComputeSizeInfoAvailable` recursive function. This is used to detect
	# cycles in the graph.
	self.size_info_visiting = False

	# Identify the connected component this node belongs to. This is set in
	# the main function.
	self.component = None

	def add_replacement(self, replacement: str):
	assert_valid_replacement(replacement)
	self.replacements.add(replacement)

	# Static method to get a node from a replacement key.
	@classmethod
	def from_key(cls: type, key: str):
	# Deduplicate nodes, as they will appear multiple times in the input.
	node = Node.key_to_node.get(key)
	if node is not None:
	return node

	node = Node(key)
	Node.key_to_node[key] = node
	return node

	def __repr__(self) -> str:
	result = [
	f"Node {hash(self)} {{",
	f" key: {self.key}",
	f" size_info_available: {self.size_info_available}",
	f" neighbors_directed: {pprint.pformat([hash(n) for n in self.neighbors_directed], indent=4)}",
	"}",
	]
	return "\n".join(result)

	# This is not parsable by from_key but is useful for debugging the
	# graph of nodes.
	def to_debug_string(self) -> str:
	return repr(self)

	# Static method to get all nodes.
	@classmethod
	def all(cls: type):
	return cls.key_to_node.values()


	def DFS(node: Node):
	"""
	Explore the graph in depth-first search from the given node. Identify edits
	to apply.

	Args:
	node: The current node being processed.
	"""
	# Only visit nodes once:
	if (node.visited):
	return
	node.visited = True

	for replacement in node.replacements:
	node.component.changes.add(replacement)

	for neighbour in node.neighbors_directed:
	DFS(neighbour)


	def ComputeSizeInfoAvailable(node: Node):
	"""
	Determines whether size information is available for a source node and its
	neighbors_directed. Updates the node's size_info_available attribute.

	Args:
	node: The current node's being processed.
	"""

	# Memoization: node.size_info_available has already been computed. Return.
	if node.size_info_available:
	return

	# If there are no dependencies, the size info is definitely not available
	# for this node.
	if not node.neighbors_directed:
	node.size_info_available = False
	return

	# Cycle: If the node is currently being visited, it means it depends on
	# itself, and there's a cycle. We can't determine the size info for this
	# node with the current implementation.
	if node.size_info_visiting:
	return

	# The size info is available for a node if all the paths through the graph
	# are leading to a sink. Locally, it means all the dependencies have their
	# size info available.
	node.size_info_visiting = True
	for neighbour in node.neighbors_directed:
	ComputeSizeInfoAvailable(neighbour)
	node.size_info_visiting = False

	# This node can be rewritten if all of its dependencies can.
	# Dependencies with `size_info_available == None` are nodes that are part
	# of an isolated cycle. Isolated cycle are rewritten.
	node.size_info_available = not any(
	neighbour.size_info_available == False
	for neighbour in node.neighbors_directed)

	# Assert a replacement follows the expected format:
	# - r:::<file path>:::<offset>:::<length>:::<replacement text>
	# - include-user-header:::<file path>:::-1:::-1:::<include text>
	# - include-system-header:::<file path>:::-1:::-1:::<include text>
	def assert_valid_replacement(replacement: str):
	try:
	parts = replacement.split(':::')
	directive_type = parts[0]

	assert directive_type in [
	'r', 'include-user-header', 'include-system-header'
	], f"Unknown directive type '{directive_type}'"

	assert len(parts) > 1
	assert parts[1] != '', "File path must not be empty."

	if directive_type == 'r':
	assert len(
	parts
	) == 6, f"Directive 'r' must have 6 parts, got {len(parts)}"

	# Validate offset.
	assert parts[2].isdigit()

	# Validate length.
	assert parts[3].isdigit()

	# Validate precedence. Can be a negative or positive integer.
	try:
	int(parts[4]) # Check if it's a valid integer representation
	except ValueError:
	raise AssertionError(
	f"Precedence '{parts[4]}' must be a valid integer string.")
	else:
	assert len(parts) == 5
	except:
	# Augment the error with the replacement text for better debugging.
	assert False, f"Invalid replacement: \"{replacement}\""


	def merge_insertions_and_remove_precedence_field(changes: set) -> set:
	"""
	Merges conflicting insertions at the same code location.

	The merge order is determined as follows:
	Handles "associativity" by grouping insertions based on precedence sign.
	The final text is formed by concatenating all positive-precedence
	insertions (i.e., "closing" parts), then zero-precedence, then all
	negative-precedence insertions ("opening" parts). This ensures that
	a closing bracket from a left expression is placed before an opening
	bracket from a right expression (e.g., `...>[...`).
	Also removes the precedence field from the final replacement directives.
	"""
	replacements_by_range = defaultdict(list)
	result = set()

	for change in changes:
	assert_valid_replacement(change)
	parts = change.split(':::')
	directive_type = parts[0]

	if directive_type == 'r':
	_, file_path, offset, length, precedence, text = parts
	precedence = int(precedence)
	# Key identifies the exact code range being replaced
	key = (file_path, offset, length)
	replacements_by_range[key].append((precedence, text))
	else:
	result.add(change)

	for key, candidates in replacements_by_range.items():
	assert candidates, "A key should always have at least one candidate."

	file_path, offset, length = key

	if len(candidates) == 1:
	# No conflict.
	_, text = candidates[0]
	reconstructed_directive = f"r:::{file_path}:::{offset}:::{length}:::{text}"
	result.add(reconstructed_directive)
	continue

	if int(length) == 0:
	# Conflicting insertion detected.

	# Assert uniqueness of precedence values to ensure determinism.
	precedences = [p for p, _ in candidates]
	assert len(precedences) == len(
	set(precedences)
	), "Conflicting insertions need to have unique precedece values."

	merged_texts = [t for _, t in sorted(candidates)]
	reconstructed_directive = f"r:::{file_path}:::{offset}:::{length}:::{''.join(merged_texts)}"
	result.add(reconstructed_directive)
	else:
	# Conflicting non-insertion replacement. This is an unresolvable
	# conflict for now. Just remove the precedence field.
	for _, text in candidates:
	reconstructed_directive = f"r:::{file_path}:::{offset}:::{length}:::{text}"
	result.add(reconstructed_directive)

	return result


	def main():
	# Since the tool is invoked from multiple compile units, we are using sets
	# to deduplicate what was visible from multiple compile units.

	# A set of source nodes that trigger the rewrite.
	sources = set()

	# A set of sink nodes. A rewrite from a source requires all the end nodes
	# to be sink. They represent nodes where the rewrite could be applied,
	# because the size info is available.
	sinks = set()

	# Change to apply at the edge in between rewritten and non-rewritten nodes.
	frontiers = set()

	# Collect from every compile units the nodes and edges of the graph:
	for line in sys.stdin:
	line = line.rstrip('\n\r')

	# The first character of the line denotes the type of the line:
	# - 'r': Replacement associated with a node.
	# - 'e': Edge in between two nodes.
	# - 's': Source node of the graph triggering the rewrite.
	# - 'f': Span frontier change.
	# - 'i': Sink node. A rewrite from a source requires the ultimate end
	# nodes to be sink. They represent nodes we know can be rewrite
	# because the buffer's size is known.
	assert line[0] in ['r', 'e', 's', 'i', 'f'], "Unknown line type: " +\
	line[0] + " in line: " + line

	# Replacement associated with a node:
	if line[0] == 'r':
	(_, key, replacement) = line.split(' ', 2)
	Node.from_key(key).add_replacement(replacement)
	continue

	# Sink node:
	if line[0] == 'i':
	(_, key) = line.split(' ')
	sinks.add(key)
	continue

	# Source node:
	if line[0] == 's':
	(_, key) = line.split(' ')
	sources.add(key)
	continue

	# Edge in between two nodes:
	if line[0] == 'e':
	(_, lhs_key, rhs_key) = line.split(' ')
	lhs = Node.from_key(lhs_key)
	rhs = Node.from_key(rhs_key)

	# Directed edge:
	lhs.neighbors_directed.add(rhs)

	# Undirected edge:
	lhs.neighbors_undirected.add(rhs)
	rhs.neighbors_undirected.add(lhs)
	continue

	# Span frontier change:
	if line[0] == 'f':
	frontiers.add(line)
	continue

	assert False, "Unreachable code"

	# Mark the sink nodes as rewritable.
	for sink in sinks:
	Node.from_key(sink).size_info_available = True

	# Mark the source nodes:
	source_nodes = []
	for source in sources:
	source_node = Node.from_key(source)
	source_nodes.append(source_node)

	# Determine whether size information is available from this source.
	ComputeSizeInfoAvailable(source_node)

	# Identify all the connected components in the undirected graph. This is
	# exploring the graph in depth-first search and assigning the same component
	# to each node in the connected component.
	for node in Node.all():
	if node.component is not None:
	continue
	new_component = Component()
	stack = [node]
	while stack:
	current = stack.pop()
	if current.component is not None:
	continue
	current.component = new_component
	for neighbor in current.neighbors_undirected:
	stack.append(neighbor)

	# Collect the changes to apply. Starting from sources nodes whose size info
	# could be determined.
	for node in source_nodes:
	# Collect the changes to apply. We start from sources nodes whose size
	# info is available and explore the graph in depth-first search.
	if node.size_info_available:
	DFS(node)

	# At the edge in between rewritten and non-rewritten nodes, we need
	# to add a call to `.data()` to access the pointer from the span:
	for frontier in frontiers:
	(_, lhs_key, rhs_key, replacement) = frontier.split(' ', 3)
	lhs_node = Node.from_key(lhs_key)
	rhs_node = Node.from_key(rhs_key)

	apply_frontier = rhs_node.visited and not lhs_node.visited
	if apply_frontier:
	lhs_node.component.frontier_changes_accepted.add(replacement)
	else:
	lhs_node.component.frontier_changes_rejected.add(replacement)

	# Do or do not, there is no try. Discard components with conflicting
	# frontier changes. This happens in rare cases where C++ macros are used.
	# The whole component is discarded in case of conflict, because we can't
	# satisfy the constraints.
	for component in Component.all:
	if component.frontier_changes_accepted & component.frontier_changes_rejected:
	component.changes.clear()
	continue;

	component.changes \|= component.frontier_changes_accepted

	# Emit the changes:
	# - ~/scratch/patches.txt: A summary of each atomic change.
	# - ~/scratch/patch_<patch_index>: Write each atomic change.
	# - stdout: Print a bundle of all the changes. This is usually piped to
	# "./tools/clang/scripts/apply_edits.py" to apply the changes.

	summary_filename = expanduser('~/scratch/patches.txt')
	summary_file = open(summary_filename, 'w')

	component_with_changes = [
	component for component in Component.all if len(component.changes) > 0
	]

	for index, component in enumerate(component_with_changes):
	merged_component_changes = merge_insertions_and_remove_precedence_field(
	component.changes)

	for text in merged_component_changes:
	print(text)

	summary_file.write(f'patch_{index}: {len(merged_component_changes)}\n')

	with open(expanduser(f'~/scratch/patch_{index}.txt'), 'w') as f:
	f.write('\n'.join(merged_component_changes))

	summary_file.close()

	return 0


	if __name__ == '__main__':
	sys.exit(main())