blob: 7a77bf36a4b39cc89f395f9ee411aea319dde51c [file] [log] [blame]
# Copyright 2025 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from __future__ import annotations
import collections
import contextlib
import dataclasses
import enum
import functools
import itertools
import pathlib
import re
# Almost every sysroot just uses #include <>, but fuchsia uses #include ""
# sometimes.
_INCLUDES = re.compile(r'#\s*(?:include|import)(_next)?\s*["<]([^>"]+)[>"]')
class CompileStatus(enum.Enum):
NotCompiled = 1
Success = 2
Failure = 3
class IncludeDir(enum.Enum):
# Ordered by include order for clang
LibCxx = 1
Builtin = 2
SysrootModule = 3
Sysroot = 4
Framework = 5
def __lt__(self, other):
return self.value < other.value
@dataclasses.dataclass
class Header:
include_dir: IncludeDir
# The string to use in your #include statement to get this file.
rel: str
# The absolute path to the file
# This should be filled by the time the compiler finishes.
abs: pathlib.Path = None
# Prev and next come from include_next / include_prev
prev: None | Header = None
next: None | Header = None
root_module: None | str = None
textual: bool = False
umbrella: bool = False
compile_status: CompileStatus = CompileStatus.NotCompiled
deps: list[Header] = dataclasses.field(default_factory=list)
direct_deps: set[Header] = dataclasses.field(default_factory=set)
# Here, None means no exports, and the empty list means 'export *'
# We default to exporting all to preserve the behaviour of includes.
exports: None | list[str] = dataclasses.field(default_factory=list)
# Kwargs that will end up on the BUILD.gn targets.
kwargs: dict[str, list[str]] = dataclasses.field(
default_factory=lambda: collections.defaultdict(list))
def __hash__(self):
return hash((self.include_dir, self.rel))
def __eq__(self, other):
if isinstance(other, Header):
return (self.include_dir, self.rel) == (other.include_dir, other.rel)
else:
# This allows you to write (Sysroot, 'foo.h') in set[Header]
return (self.include_dir, self.rel) == other
def __lt__(self, other):
return (self.include_dir, self.rel) < (other.include_dir, other.rel)
@property
def pretty_name(self):
return f'{self.include_dir.name}/{self.rel}'
def __repr__(self):
return self.pretty_name
@property
def submodule_name(self):
return self.rel.replace('.', '_').replace('/', '_').replace('-', '_')
@functools.cached_property
def content(self) -> str:
return self.abs.read_text(errors='ignore')
def calculate_direct_deps(self, includes: dict[str, Header],
sysroot: pathlib.Path) -> set[Header]:
direct = set()
found_includes = _INCLUDES.findall(self.content)
def find_include(is_next, include) -> bool:
header = None
first = includes.get(include, None)
if first is not None:
# When modules are enabled, #include_next<foo.h> from any file other
# than foo.h is treated the same as #include <foo.h>.
if not is_next or (is_next and self.rel != include):
header = first
elif self.next is not None:
header = self.next
# It might have been conditionally included.
if header is not None and header in self.deps:
direct.add(header)
return True
return False
for is_next, include in found_includes:
if not find_include(is_next, include):
# This is required, because, for example, libcxx's threading includes
# pthread.h, but the include scanner sees pthread/pthread.h (the
# symlink target).
with contextlib.suppress(OSError, FileNotFoundError):
find_include(is_next, str((sysroot / include).readlink()))
return direct
@functools.cache
def _required_deps(self) -> tuple[set[Header], set[Header]]:
nontextual = set()
textual = set()
todo = [self]
while todo:
hdr = todo.pop()
for dep in hdr.direct_deps:
if dep.textual and dep not in textual:
todo.append(dep)
textual.add(dep)
elif not dep.textual:
nontextual.add(dep)
return nontextual, textual
@property
def required_deps(self) -> set[Header]:
"""The header files required to be built before we can build."""
return self._required_deps()[0]
@property
def required_textual_deps(self) -> set[Header]:
"""The textual header files we directly include.
This includes textual headers included via other textual headers"""
return self._required_deps()[1]
def find_loop(self) -> list[Header] | None:
"""Finds a loop of #includes, if it exists."""
chain = [self]
has_chain = True
while has_chain:
has_chain = False
if self in chain[-1].direct_deps:
return chain + [self]
for dep in chain[-1].direct_deps:
if dep not in chain and self in dep.deps:
chain.append(dep)
has_chain = True
break
# It shouldn't be possible to have a node that has you as a transitive
# dep without having a dep that has you as a transitive dep.
assert len(chain) == 1
def calculate_rdeps(headers: list[Header]) -> dict[Header, list[Header]]:
"""Calculates a reverse dependency graph"""
rdeps = collections.defaultdict(list)
for header in headers:
for dep in header.deps:
rdeps[dep].append(header)
return rdeps
def all_headers(graph: dict[str, Header]):
"""Iterates through all headers in a graph."""
for header in graph.values():
while header is not None:
yield header
header = header.next
@dataclasses.dataclass
class Target:
"""Represents a single clang module / gn target."""
include_dir: IncludeDir
name: str
headers: list[Header] = dataclasses.field(default_factory=list)
def __lt__(self, other):
return self.name < other.name
def __eq__(self, other):
return self.name == other.name
def __hash__(self):
return hash(self.name)
@property
def kwargs(self) -> dict[str, set[str]]:
"""The kwargs associated with a build target.
eg. if you have kwargs = {"defines": ["FOO"]}, then it outputs:
target_type(target.name) {
defines = ["FOO"]
}
"""
kwargs = collections.defaultdict(set)
for header in self.headers:
for single in header.group:
for dep in {single} | single.required_textual_deps:
for k, v in dep.kwargs.items():
kwargs[k].update(v)
return kwargs
@property
def header_deps(self) -> set[Header]:
direct_deps = set()
for hdr in self.headers:
direct_deps.update(hdr.required_deps)
return direct_deps
@property
def public_deps(self) -> list[str]:
return sorted(
set([
hdr.root_module for hdr in self.header_deps
if hdr.root_module is not None and hdr.root_module != self.name
]))
def run_build(graph: dict[str, Header]) -> list[Target]:
"""Calculates the correct way to run a build."""
unbuilt_modules: dict[str, list[Header]] = collections.defaultdict(list)
unbuilt_headers: set[Header] = set()
for header in all_headers(graph):
if not header.textual:
if header.root_module is None:
unbuilt_headers.add(header)
else:
unbuilt_modules[header.root_module].append(header)
header.rdeps = set()
# A group is a set of headers that must be compiled together, because they
# form a dependency loop.
header.group = [header]
header.mod_deps = set()
header.unbuilt_deps = set(
dep for dep in header.required_deps
# You don't need to wait for a dependency within the same module.
if (header.root_module is None or header.root_module != dep.root_module)
and dep != header)
# Perform a union find to find all dependency loops.
# Since we can easily tell if a given edge represents a dependency loop, we
# simply union together all pairs of nodes on loop edges.
# We perform a simple form of union find where we don't bother with rank or
# size, and only do path compression (always on the lexicographically first
# header). It's slower but still plenty fast and simplifies things.
parents = {}
def find(header):
if header in parents:
# Optimization: path compression
parents[header] = find(parents[header])
return parents[header]
else:
return header
for header in sorted(unbuilt_headers):
for dep in header.required_deps:
if dep > header and header in dep.deps:
assert header.include_dir == IncludeDir.Sysroot and dep.include_dir == IncludeDir.Sysroot, (
header, dep)
# Perform the 'union' operation.
x, y = sorted([find(header), find(dep)])
if x != y:
parents[y] = x
loops = collections.defaultdict(list)
for header in unbuilt_headers:
loops[find(header)].append(header)
for headers in loops.values():
# Not a loop
if len(headers) == 1:
continue
headers.sort()
headers[0].group = headers
headers[0].unbuilt_deps = set.union(
*[header.unbuilt_deps for header in headers]) - set(headers)
for header in headers[1:]:
unbuilt_headers.remove(header)
for header in all_headers(graph):
for dep in header.unbuilt_deps:
dep.rdeps.add(header)
build_gn = []
for i in itertools.count():
# Try and build any buildable modules from modulemaps.
while True:
n_remaining = len(unbuilt_modules)
for mod, headers in list(unbuilt_modules.items()):
if not any(header.unbuilt_deps for header in headers):
build_gn.append(
Target(
include_dir=headers[0].include_dir,
name=mod,
headers=sorted(headers),
))
del unbuilt_modules[mod]
for header in headers:
for rdep in header.rdeps:
rdep.mod_deps.add(header.root_module)
rdep.unbuilt_deps.remove(header)
if n_remaining == len(unbuilt_modules):
break
# Try and build any builtable sysroot modules
sysroot_mod = f'sys_stage{i + 1}'
build_gn.append(Target(
include_dir=IncludeDir.Sysroot,
name=sysroot_mod,
))
while True:
n_remaining = len(unbuilt_headers)
for header in list(unbuilt_headers):
if not header.unbuilt_deps:
build_gn[-1].headers.append(header)
unbuilt_headers.remove(header)
for header in header.group:
header.root_module = sysroot_mod
for rdep in header.rdeps:
rdep.mod_deps.add(header.root_module)
rdep.unbuilt_deps.remove(header)
if n_remaining == len(unbuilt_headers):
break
build_gn[-1].headers.sort()
if not build_gn[-1].headers:
break
build_gn.pop()
if not unbuilt_modules and not unbuilt_headers:
# Success. Everything is built
return build_gn
else:
print(
"Dependency loop in sysroot. You probably want to make one of them textual."
)
print("The following headers are in a dependency loop:")
seen = set()
for header in unbuilt_headers:
if header not in seen:
chain = header.find_loop()
if chain is not None:
print(' -> '.join([header.pretty_name for header in chain]))
seen.update(chain)
# If you get to this point, you probably want a debugger to help understand what the problem is.
breakpoint()
exit(1)