build/modules/modularize/graph.py - chromium/src - Git at Google

 # Copyright 2025 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 from __future__ import annotations

 import collections
 import contextlib
 import dataclasses
 import enum
 import functools
 import itertools
 import pathlib
 import re

 # Almost every sysroot just uses #include <>, but fuchsia uses #include ""
 # sometimes.
 _INCLUDES = re.compile(r'#\s*(?:include|import)(_next)?\s*["<]([^>"]+)[>"]')


 class CompileStatus(enum.Enum):
   NotCompiled = 1
   Success = 2
   Failure = 3


 class IncludeDir(enum.Enum):
   # Ordered by include order for clang
   LibCxx = 1
   Builtin = 2
   SysrootModule = 3
   Sysroot = 4
   Framework = 5

   def __lt__(self, other):
     return self.value < other.value


 @dataclasses.dataclass
 class Header:
   include_dir: IncludeDir
   # The string to use in your #include statement to get this file.
   rel: str
   # The absolute path to the file
   # This should be filled by the time the compiler finishes.
   abs: pathlib.Path = None
   # Prev and next come from include_next / include_prev
   prev: None | Header = None
   next: None | Header = None
   root_module: None | str = None
   textual: bool = False
   umbrella: bool = False
   compile_status: CompileStatus = CompileStatus.NotCompiled

   deps: list[Header] = dataclasses.field(default_factory=list)
   direct_deps: set[Header] = dataclasses.field(default_factory=set)
   # Here, None means no exports, and the empty list means 'export *'
   # We default to exporting all to preserve the behaviour of includes.
   exports: None | list[str] = dataclasses.field(default_factory=list)

   # Kwargs that will end up on the BUILD.gn targets.
   kwargs: dict[str, list[str]] = dataclasses.field(
       default_factory=lambda: collections.defaultdict(list))

   def __hash__(self):
     return hash((self.include_dir, self.rel))

   def __eq__(self, other):
     if isinstance(other, Header):
       return (self.include_dir, self.rel) == (other.include_dir, other.rel)
     else:
       # This allows you to write (Sysroot, 'foo.h') in set[Header]
       return (self.include_dir, self.rel) == other

   def __lt__(self, other):
     return (self.include_dir, self.rel) < (other.include_dir, other.rel)

   @property
   def pretty_name(self):
     return f'{self.include_dir.name}/{self.rel}'

   def __repr__(self):
     return self.pretty_name

   @property
   def submodule_name(self):
     return self.rel.replace('.', '_').replace('/', '_').replace('-', '_')

   @functools.cached_property
   def content(self) -> str:
     return self.abs.read_text(errors='ignore')

   def calculate_direct_deps(self, includes: dict[str, Header],
                             sysroot: pathlib.Path) -> set[Header]:

     direct = set()
     found_includes = _INCLUDES.findall(self.content)

     def find_include(is_next, include) -> bool:
       header = None
       first = includes.get(include, None)
       if first is not None:
         # When modules are enabled, #include_next<foo.h> from any file other
         # than foo.h is treated the same as #include <foo.h>.
         if not is_next or (is_next and self.rel != include):
           header = first
         elif self.next is not None:
           header = self.next

         # It might have been conditionally included.
         if header is not None and header in self.deps:
           direct.add(header)
           return True
       return False

     for is_next, include in found_includes:
       if not find_include(is_next, include):
         # This is required, because, for example, libcxx's threading includes
         # pthread.h, but the include scanner sees pthread/pthread.h (the
         # symlink target).
         with contextlib.suppress(OSError, FileNotFoundError):
           find_include(is_next, str((sysroot / include).readlink()))

     return direct

   @functools.cache
   def _required_deps(self) -> tuple[set[Header], set[Header]]:
     nontextual = set()
     textual = set()
     todo = [self]
     while todo:
       hdr = todo.pop()
       for dep in hdr.direct_deps:
         if dep.textual and dep not in textual:
           todo.append(dep)
           textual.add(dep)
         elif not dep.textual:
           nontextual.add(dep)
     return nontextual, textual

   @property
   def required_deps(self) -> set[Header]:
     """The header files required to be built before we can build."""
     return self._required_deps()[0]

   @property
   def required_textual_deps(self) -> set[Header]:
     """The textual header files we directly include.

     This includes textual headers included via other textual headers"""
     return self._required_deps()[1]

   def find_loop(self) -> list[Header] | None:
     """Finds a loop of #includes, if it exists."""
     chain = [self]
     has_chain = True
     while has_chain:
       has_chain = False
       if self in chain[-1].direct_deps:
         return chain + [self]
       for dep in chain[-1].direct_deps:
         if dep not in chain and self in dep.deps:
           chain.append(dep)
           has_chain = True
           break
     # It shouldn't be possible to have a node that has you as a transitive
     # dep without having a dep that has you as a transitive dep.
     assert len(chain) == 1


 def calculate_rdeps(headers: list[Header]) -> dict[Header, list[Header]]:
   """Calculates a reverse dependency graph"""
   rdeps = collections.defaultdict(list)
   for header in headers:
     for dep in header.deps:
       rdeps[dep].append(header)
   return rdeps


 def all_headers(graph: dict[str, Header]):
   """Iterates through all headers in a graph."""
   for header in graph.values():
     while header is not None:
       yield header
       header = header.next


 @dataclasses.dataclass
 class Target:
   """Represents a single clang module / gn target."""
   include_dir: IncludeDir
   name: str
   headers: list[Header] = dataclasses.field(default_factory=list)

   def __lt__(self, other):
     return self.name < other.name

   def __eq__(self, other):
     return self.name == other.name

   def __hash__(self):
     return hash(self.name)

   @property
   def kwargs(self) -> dict[str, set[str]]:
     """The kwargs associated with a build target.

     eg. if you have kwargs = {"defines": ["FOO"]}, then it outputs:

     target_type(target.name) {
       defines = ["FOO"]
     }
     """
     kwargs = collections.defaultdict(set)
     for header in self.headers:
       for single in header.group:
         for dep in {single} | single.required_textual_deps:
           for k, v in dep.kwargs.items():
             kwargs[k].update(v)
     return kwargs

   @property
   def header_deps(self) -> set[Header]:
     direct_deps = set()
     for hdr in self.headers:
       direct_deps.update(hdr.required_deps)
     return direct_deps

   @property
   def public_deps(self) -> list[str]:
     return sorted(
         set([
             hdr.root_module for hdr in self.header_deps
             if hdr.root_module is not None and hdr.root_module != self.name
         ]))


 def run_build(graph: dict[str, Header]) -> list[Target]:
   """Calculates the correct way to run a build."""
   unbuilt_modules: dict[str, list[Header]] = collections.defaultdict(list)
   unbuilt_headers: set[Header] = set()

   for header in all_headers(graph):
     if not header.textual:
       if header.root_module is None:
         unbuilt_headers.add(header)
       else:
         unbuilt_modules[header.root_module].append(header)
     header.rdeps = set()
     # A group is a set of headers that must be compiled together, because they
     # form a dependency loop.
     header.group = [header]
     header.mod_deps = set()
     header.unbuilt_deps = set(
         dep for dep in header.required_deps
         # You don't need to wait for a dependency within the same module.
         if (header.root_module is None or header.root_module != dep.root_module)
         and dep != header)

   # Perform a union find to find all dependency loops.
   # Since we can easily tell if a given edge represents a dependency loop, we
   # simply union together all pairs of nodes on loop edges.
   # We perform a simple form of union find where we don't bother with rank or
   # size, and only do path compression (always on the lexicographically first
   # header). It's slower but still plenty fast and simplifies things.
   parents = {}

   def find(header):
     if header in parents:
       # Optimization: path compression
       parents[header] = find(parents[header])
       return parents[header]
     else:
       return header

   for header in sorted(unbuilt_headers):
     for dep in header.required_deps:
       if dep > header and header in dep.deps:
         assert header.include_dir == IncludeDir.Sysroot and dep.include_dir == IncludeDir.Sysroot, (
             header, dep)
         # Perform the 'union' operation.
         x, y = sorted([find(header), find(dep)])
         if x != y:
           parents[y] = x

   loops = collections.defaultdict(list)
   for header in unbuilt_headers:
     loops[find(header)].append(header)

   for headers in loops.values():
     # Not a loop
     if len(headers) == 1:
       continue
     headers.sort()
     headers[0].group = headers
     headers[0].unbuilt_deps = set.union(
         *[header.unbuilt_deps for header in headers]) - set(headers)
     for header in headers[1:]:
       unbuilt_headers.remove(header)

   for header in all_headers(graph):
     for dep in header.unbuilt_deps:
       dep.rdeps.add(header)

   build_gn = []

   for i in itertools.count():
     # Try and build any buildable modules from modulemaps.
     while True:
       n_remaining = len(unbuilt_modules)
       for mod, headers in list(unbuilt_modules.items()):
         if not any(header.unbuilt_deps for header in headers):
           build_gn.append(
               Target(
                   include_dir=headers[0].include_dir,
                   name=mod,
                   headers=sorted(headers),
               ))
           del unbuilt_modules[mod]
           for header in headers:
             for rdep in header.rdeps:
               rdep.mod_deps.add(header.root_module)
               rdep.unbuilt_deps.remove(header)

       if n_remaining == len(unbuilt_modules):
         break

     # Try and build any builtable sysroot modules
     sysroot_mod = f'sys_stage{i + 1}'
     build_gn.append(Target(
         include_dir=IncludeDir.Sysroot,
         name=sysroot_mod,
     ))
     while True:
       n_remaining = len(unbuilt_headers)
       for header in list(unbuilt_headers):
         if not header.unbuilt_deps:
           build_gn[-1].headers.append(header)
           unbuilt_headers.remove(header)
           for header in header.group:
             header.root_module = sysroot_mod
             for rdep in header.rdeps:
               rdep.mod_deps.add(header.root_module)
               rdep.unbuilt_deps.remove(header)

       if n_remaining == len(unbuilt_headers):
         break

     build_gn[-1].headers.sort()
     if not build_gn[-1].headers:
       break

   build_gn.pop()

   if not unbuilt_modules and not unbuilt_headers:
     # Success. Everything is built
     return build_gn
   else:
     print(
         "Dependency loop in sysroot. You probably want to make one of them textual."
     )
     print("The following headers are in a dependency loop:")
     seen = set()
     for header in unbuilt_headers:
       if header not in seen:
         chain = header.find_loop()
         if chain is not None:
           print(' -> '.join([header.pretty_name for header in chain]))
           seen.update(chain)

     # If you get to this point, you probably want a debugger to help understand what the problem is.
     breakpoint()
     exit(1)
	# Copyright 2025 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	from __future__ import annotations

	import collections
	import contextlib
	import dataclasses
	import enum
	import functools
	import itertools
	import pathlib
	import re

	# Almost every sysroot just uses #include <>, but fuchsia uses #include ""
	# sometimes.
	_INCLUDES = re.compile(r'#\s(?:include\|import)(_next)?\s["<]([^>"]+)[>"]')


	class CompileStatus(enum.Enum):
	NotCompiled = 1
	Success = 2
	Failure = 3


	class IncludeDir(enum.Enum):
	# Ordered by include order for clang
	LibCxx = 1
	Builtin = 2
	SysrootModule = 3
	Sysroot = 4
	Framework = 5

	def __lt__(self, other):
	return self.value < other.value


	@dataclasses.dataclass
	class Header:
	include_dir: IncludeDir
	# The string to use in your #include statement to get this file.
	rel: str
	# The absolute path to the file
	# This should be filled by the time the compiler finishes.
	abs: pathlib.Path = None
	# Prev and next come from include_next / include_prev
	prev: None \| Header = None
	next: None \| Header = None
	root_module: None \| str = None
	textual: bool = False
	umbrella: bool = False
	compile_status: CompileStatus = CompileStatus.NotCompiled

	deps: list[Header] = dataclasses.field(default_factory=list)
	direct_deps: set[Header] = dataclasses.field(default_factory=set)
	# Here, None means no exports, and the empty list means 'export *'
	# We default to exporting all to preserve the behaviour of includes.
	exports: None \| list[str] = dataclasses.field(default_factory=list)

	# Kwargs that will end up on the BUILD.gn targets.
	kwargs: dict[str, list[str]] = dataclasses.field(
	default_factory=lambda: collections.defaultdict(list))

	def __hash__(self):
	return hash((self.include_dir, self.rel))

	def __eq__(self, other):
	if isinstance(other, Header):
	return (self.include_dir, self.rel) == (other.include_dir, other.rel)
	else:
	# This allows you to write (Sysroot, 'foo.h') in set[Header]
	return (self.include_dir, self.rel) == other

	def __lt__(self, other):
	return (self.include_dir, self.rel) < (other.include_dir, other.rel)

	@property
	def pretty_name(self):
	return f'{self.include_dir.name}/{self.rel}'

	def __repr__(self):
	return self.pretty_name

	@property
	def submodule_name(self):
	return self.rel.replace('.', '_').replace('/', '_').replace('-', '_')

	@functools.cached_property
	def content(self) -> str:
	return self.abs.read_text(errors='ignore')

	def calculate_direct_deps(self, includes: dict[str, Header],
	sysroot: pathlib.Path) -> set[Header]:

	direct = set()
	found_includes = _INCLUDES.findall(self.content)

	def find_include(is_next, include) -> bool:
	header = None
	first = includes.get(include, None)
	if first is not None:
	# When modules are enabled, #include_next<foo.h> from any file other
	# than foo.h is treated the same as #include <foo.h>.
	if not is_next or (is_next and self.rel != include):
	header = first
	elif self.next is not None:
	header = self.next

	# It might have been conditionally included.
	if header is not None and header in self.deps:
	direct.add(header)
	return True
	return False

	for is_next, include in found_includes:
	if not find_include(is_next, include):
	# This is required, because, for example, libcxx's threading includes
	# pthread.h, but the include scanner sees pthread/pthread.h (the
	# symlink target).
	with contextlib.suppress(OSError, FileNotFoundError):
	find_include(is_next, str((sysroot / include).readlink()))

	return direct

	@functools.cache
	def _required_deps(self) -> tuple[set[Header], set[Header]]:
	nontextual = set()
	textual = set()
	todo = [self]
	while todo:
	hdr = todo.pop()
	for dep in hdr.direct_deps:
	if dep.textual and dep not in textual:
	todo.append(dep)
	textual.add(dep)
	elif not dep.textual:
	nontextual.add(dep)
	return nontextual, textual

	@property
	def required_deps(self) -> set[Header]:
	"""The header files required to be built before we can build."""
	return self._required_deps()[0]

	@property
	def required_textual_deps(self) -> set[Header]:
	"""The textual header files we directly include.

	This includes textual headers included via other textual headers"""
	return self._required_deps()[1]

	def find_loop(self) -> list[Header] \| None:
	"""Finds a loop of #includes, if it exists."""
	chain = [self]
	has_chain = True
	while has_chain:
	has_chain = False
	if self in chain[-1].direct_deps:
	return chain + [self]
	for dep in chain[-1].direct_deps:
	if dep not in chain and self in dep.deps:
	chain.append(dep)
	has_chain = True
	break
	# It shouldn't be possible to have a node that has you as a transitive
	# dep without having a dep that has you as a transitive dep.
	assert len(chain) == 1


	def calculate_rdeps(headers: list[Header]) -> dict[Header, list[Header]]:
	"""Calculates a reverse dependency graph"""
	rdeps = collections.defaultdict(list)
	for header in headers:
	for dep in header.deps:
	rdeps[dep].append(header)
	return rdeps


	def all_headers(graph: dict[str, Header]):
	"""Iterates through all headers in a graph."""
	for header in graph.values():
	while header is not None:
	yield header
	header = header.next


	@dataclasses.dataclass
	class Target:
	"""Represents a single clang module / gn target."""
	include_dir: IncludeDir
	name: str
	headers: list[Header] = dataclasses.field(default_factory=list)

	def __lt__(self, other):
	return self.name < other.name

	def __eq__(self, other):
	return self.name == other.name

	def __hash__(self):
	return hash(self.name)

	@property
	def kwargs(self) -> dict[str, set[str]]:
	"""The kwargs associated with a build target.

	eg. if you have kwargs = {"defines": ["FOO"]}, then it outputs:

	target_type(target.name) {
	defines = ["FOO"]
	}
	"""
	kwargs = collections.defaultdict(set)
	for header in self.headers:
	for single in header.group:
	for dep in {single} \| single.required_textual_deps:
	for k, v in dep.kwargs.items():
	kwargs[k].update(v)
	return kwargs

	@property
	def header_deps(self) -> set[Header]:
	direct_deps = set()
	for hdr in self.headers:
	direct_deps.update(hdr.required_deps)
	return direct_deps

	@property
	def public_deps(self) -> list[str]:
	return sorted(
	set([
	hdr.root_module for hdr in self.header_deps
	if hdr.root_module is not None and hdr.root_module != self.name
	]))


	def run_build(graph: dict[str, Header]) -> list[Target]:
	"""Calculates the correct way to run a build."""
	unbuilt_modules: dict[str, list[Header]] = collections.defaultdict(list)
	unbuilt_headers: set[Header] = set()

	for header in all_headers(graph):
	if not header.textual:
	if header.root_module is None:
	unbuilt_headers.add(header)
	else:
	unbuilt_modules[header.root_module].append(header)
	header.rdeps = set()
	# A group is a set of headers that must be compiled together, because they
	# form a dependency loop.
	header.group = [header]
	header.mod_deps = set()
	header.unbuilt_deps = set(
	dep for dep in header.required_deps
	# You don't need to wait for a dependency within the same module.
	if (header.root_module is None or header.root_module != dep.root_module)
	and dep != header)

	# Perform a union find to find all dependency loops.
	# Since we can easily tell if a given edge represents a dependency loop, we
	# simply union together all pairs of nodes on loop edges.
	# We perform a simple form of union find where we don't bother with rank or
	# size, and only do path compression (always on the lexicographically first
	# header). It's slower but still plenty fast and simplifies things.
	parents = {}

	def find(header):
	if header in parents:
	# Optimization: path compression
	parents[header] = find(parents[header])
	return parents[header]
	else:
	return header

	for header in sorted(unbuilt_headers):
	for dep in header.required_deps:
	if dep > header and header in dep.deps:
	assert header.include_dir == IncludeDir.Sysroot and dep.include_dir == IncludeDir.Sysroot, (
	header, dep)
	# Perform the 'union' operation.
	x, y = sorted([find(header), find(dep)])
	if x != y:
	parents[y] = x

	loops = collections.defaultdict(list)
	for header in unbuilt_headers:
	loops[find(header)].append(header)

	for headers in loops.values():
	# Not a loop
	if len(headers) == 1:
	continue
	headers.sort()
	headers[0].group = headers
	headers[0].unbuilt_deps = set.union(
	*[header.unbuilt_deps for header in headers]) - set(headers)
	for header in headers[1:]:
	unbuilt_headers.remove(header)

	for header in all_headers(graph):
	for dep in header.unbuilt_deps:
	dep.rdeps.add(header)

	build_gn = []

	for i in itertools.count():
	# Try and build any buildable modules from modulemaps.
	while True:
	n_remaining = len(unbuilt_modules)
	for mod, headers in list(unbuilt_modules.items()):
	if not any(header.unbuilt_deps for header in headers):
	build_gn.append(
	Target(
	include_dir=headers[0].include_dir,
	name=mod,
	headers=sorted(headers),
	))
	del unbuilt_modules[mod]
	for header in headers:
	for rdep in header.rdeps:
	rdep.mod_deps.add(header.root_module)
	rdep.unbuilt_deps.remove(header)

	if n_remaining == len(unbuilt_modules):
	break

	# Try and build any builtable sysroot modules
	sysroot_mod = f'sys_stage{i + 1}'
	build_gn.append(Target(
	include_dir=IncludeDir.Sysroot,
	name=sysroot_mod,
	))
	while True:
	n_remaining = len(unbuilt_headers)
	for header in list(unbuilt_headers):
	if not header.unbuilt_deps:
	build_gn[-1].headers.append(header)
	unbuilt_headers.remove(header)
	for header in header.group:
	header.root_module = sysroot_mod
	for rdep in header.rdeps:
	rdep.mod_deps.add(header.root_module)
	rdep.unbuilt_deps.remove(header)

	if n_remaining == len(unbuilt_headers):
	break

	build_gn[-1].headers.sort()
	if not build_gn[-1].headers:
	break

	build_gn.pop()

	if not unbuilt_modules and not unbuilt_headers:
	# Success. Everything is built
	return build_gn
	else:
	print(
	"Dependency loop in sysroot. You probably want to make one of them textual."
	)
	print("The following headers are in a dependency loop:")
	seen = set()
	for header in unbuilt_headers:
	if header not in seen:
	chain = header.find_loop()
	if chain is not None:
	print(' -> '.join([header.pretty_name for header in chain]))
	seen.update(chain)

	# If you get to this point, you probably want a debugger to help understand what the problem is.
	breakpoint()
	exit(1)