blob: d2bd83a59d34f41407fd57e293872d0883da591e [file] [log] [blame]
# Copyright 2025 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import collections
import enum
import functools
import logging
import pathlib
import pickle
import re
import shutil
import subprocess
import sys
import tempfile
import time
import config
from graph import CompileStatus
from graph import Header
from graph import IncludeDir
from graph import calculate_rdeps
import modulemap
_FRAMEWORK = ' (framework directory)'
# Foo.framework/Versions/A/headers/Bar.h -> Foo/Bar.h
_FRAMEWORK_HEADER = re.compile(
r'([^/]+)\.framework/(?:Versions/[^/]+/)?(?:Headers|Modules)/(.*)')
_LIBCXXABI = '../../third_party/libc++abi/src/include'
# Some of these steps are quite slow (O(minutes)).
# To allow for fast iteration of config, cache them.
def _maybe_cache(fn):
@functools.wraps(fn)
def new_fn(self, *args):
# The results should be solely dependent on the GN out dir (assuming the
# user doesn't change args.gn)
gn_rel = str(self.gn_out.resolve()).lstrip('/')
cache_path = pathlib.Path(f'/tmp/modularize_cache', gn_rel, fn.__name__,
*args)
cache_path.parent.mkdir(exist_ok=True, parents=True)
if self._use_cache and cache_path.is_file():
try:
return pickle.loads(cache_path.read_bytes())
# When attempting to run this without a debugger after pickling from a
# debugger it fails to load pathlib._local.
except ModuleNotFoundError:
logging.info('Failed to unpickle - not using cache')
result = fn(self, *args)
cache_path.write_bytes(pickle.dumps(result))
return result
return new_fn
class Os(str, enum.Enum):
Android = 'android'
Fuchsia = 'fuchsia'
Ios = 'ios'
Linux = 'linux'
Mac = 'mac'
Win = 'win'
@property
def is_apple(self):
return self == Os.Mac or self == Os.Ios
class Cpu(str, enum.Enum):
x86 = 'x86'
x64 = 'x64'
arm = 'arm'
arm64 = 'arm64'
class Compiler:
def __init__(self, *, source_root: pathlib.Path, gn_out: pathlib.Path,
error_dir: pathlib.Path | None, use_cache: bool, os: Os,
cpu: Cpu):
self._error_dir = error_dir
self._use_cache = use_cache
self.gn_out = gn_out
self.source_root = source_root
self.os = os
self.cpu = cpu
self.sysroot_dir = IncludeDir.SysrootModule if self.os.is_apple else IncludeDir.Sysroot
self.sysroot = None
# __eq__ and __hash__ are required for functools.cache to work correctly.
def __eq__(self, other):
return self.gn_out == other.gn_out
def __hash__(self):
return hash(self.gn_out)
def _parse_depfile(self, content: str) -> list[pathlib.Path]:
files = []
# The file will look like:
# /dev/null: <blah>.cc \
# <main include> \
# <other includes> \
# So we need [1:] to ensure it doesn't have a dependency on itself.
for line in content.rstrip().split('\n')[1:]:
# Remove both the trailing newlines and any escapes in the file names.
p = pathlib.Path(self.gn_out, line.replace('\\', '').strip(' '))
files.append(p.resolve())
return files
def _clang_arg(self, arg: str) -> str:
if self.os == 'win':
return f'/clang:{arg}'
else:
return arg
def _write_err(self, rel: str, content: bytes):
if self._error_dir is not None:
out = self._error_dir / rel
out.parent.mkdir(exist_ok=True, parents=True)
out.write_bytes(content)
def split_path(self, path: pathlib.Path) -> tuple[IncludeDir, str]:
"""Splits a path into the include directory it's under, and the string
needed to include it from a header file."""
assert path.is_absolute()
for d, include_dir in self.include_dirs:
if path.is_relative_to(d):
rel = str(path.relative_to(d))
if include_dir == IncludeDir.Framework:
framework, hdr = _FRAMEWORK_HEADER.search(rel).groups()
rel = f'{framework}/{hdr}'
return include_dir, rel
raise NotImplementedError(f'Unsupported path {path}')
# Apply two layers of cache here.
# The _maybe_cache layer caches between runs via a file.
# The functools.cache layer ensures you don't keep loading the pickle file.
@functools.cache
@_maybe_cache
def base_command(self) -> list[str]:
"""Returns a command suitable for building for current platform"""
return subprocess.run(
[
'build/modules/modularize/no_modules_compile_command.sh',
str(self.gn_out),
str(self.os),
],
check=True,
text=True,
cwd=self.source_root,
stdout=subprocess.PIPE,
# Strip the -o /dev/null with [:-2]
# Windows requires it to be at the end, otherwise it writes to {output}.obj.
).stdout.rstrip().replace('\\', '').split(' ')[:-2]
@functools.cached_property
def include_dirs(self) -> list[tuple[pathlib.Path, IncludeDir]]:
cmd = self.base_command() + [
'-E',
'-v',
'-x',
'c++',
'-',
'-o',
'/dev/null',
]
cmd.remove('/c' if self.os == Os.Win else '-c')
# include dir lines both start and end with whitespace
lines = [
line.strip() for line in subprocess.run(
cmd,
cwd=self.gn_out,
text=True,
check=True,
stderr=subprocess.PIPE,
# We need to pass in a "file" so we pass in - and devnull so it's
# an empty file.
stdin=subprocess.DEVNULL,
).stderr.replace('\\', '').split('\n')
]
dirs = lines[lines.index('#include <...> search starts here:') +
1:lines.index('End of search list.')]
# We don't care about these.
dirs.remove('../..')
dirs.remove('gen')
if _LIBCXXABI in dirs:
dirs.remove(_LIBCXXABI)
out = []
for d in dirs:
is_framework = d.endswith(_FRAMEWORK)
d = (self.gn_out / d.removesuffix(_FRAMEWORK)).resolve()
if is_framework:
out.append((d, IncludeDir.Framework))
elif 'libc++' in d.parts:
out.append((d, IncludeDir.LibCxx))
elif 'clang' in d.parts:
out.append((d, IncludeDir.Builtin))
elif config.SYSROOT_DIRS.intersection(d.parts):
out.append((d, self.sysroot_dir))
self.sysroot = d
else:
raise NotImplementedError(f'Unknown include directory {d}')
return out
@_maybe_cache
def compile_one(
self, include: str
) -> tuple[subprocess.CompletedProcess, None | list[pathlib.Path]]:
"""Compiles a single source file including {include}.
Args:
include: The string to #include (eg. 'vector')
Returns:
The result of the compilation, and either:
None if no depfile was created,
A list of all files transitively required otherwise.
"""
with tempfile.TemporaryDirectory() as td:
source = pathlib.Path(td, 'source.cc')
source.write_text(f'#include <{include}>')
depfile = pathlib.Path(td, 'source.o.d')
command = self.base_command() + [
# We write stderr to a file
'-fno-color-diagnostics',
'-x',
'c++-header',
str(source),
self._clang_arg('-MD'),
self._clang_arg('-MF'),
self._clang_arg(depfile),
'-o',
'/dev/null',
]
if logging.getLogger().isEnabledFor(logging.DEBUG):
logging.debug('Running command: (cd %s && %s)', self.gn_out,
' '.join(map(str, command)))
ps = subprocess.run(
command,
stderr=subprocess.PIPE,
cwd=self.gn_out,
)
# The depfile is generated even if it fails to compile.
try:
return ps, self._parse_depfile(depfile.read_text())
except FileNotFoundError:
return ps, None
@functools.cache
def _modules_and_headers(self):
return modulemap.calculate_modules(self.include_dirs)
def modulemaps_for_modules(self) -> dict[str, pathlib.Path]:
return self._modules_and_headers()[0]
def modulemap_headers(self) -> set[Header]:
return self._modules_and_headers()[1]
@_maybe_cache
def compile_all(self) -> dict[str, Header]:
"""Generates a graph of headers by compiling all files in the sysroot."""
if self._error_dir is not None:
shutil.rmtree(self._error_dir, ignore_errors=True)
graph: dict[str, Header] = {}
uncompiled = []
seen = set()
def visit(include: str):
if include not in seen:
uncompiled.append(include)
seen.add(include)
for hdr in self.modulemap_headers():
if hdr.root_module not in config.IGNORED_MODULES:
graph[(hdr.include_dir, hdr.rel)] = hdr
visit(hdr.rel)
# Populate a list of initial headers to compile.
for hdr in config.SYSROOT_PRECOMPILED_HEADERS:
visit(hdr)
logging.info('Starting compilation')
# Could consider making the DFS parallel to improve performance.
# But it's a lot of effort for a script that's rarely run.
i = 0
start = time.time()
while uncompiled:
i += 1
if i % 100 == 0:
rate = i / (time.time() - start)
logging.info('Compiled %d/%d, %.2f/s, estimate: %ds', i - 1, len(seen),
rate, (len(seen) - i) / rate)
rel = uncompiled.pop()
ps, files = self.compile_one(rel)
if files is None:
logging.warning("Failed to generate depfile while compiling %s", rel)
self._write_err(rel, ps.stderr)
continue
abs_path = files[0]
kind, _ = self.split_path(abs_path)
if (kind, rel) not in graph:
# If we're seeing it for the first time here, but it's from another
# include dir, it must not be in the module map, so it should be treated as textual.
graph[(kind, rel)] = Header(include_dir=kind,
rel=rel,
textual=kind != IncludeDir.Sysroot)
state = graph[(kind, rel)]
state.abs = abs_path
for to_abs in files[1:]:
to_kind, to_rel = self.split_path(to_abs)
assert (kind, rel) != (to_kind, to_rel)
dep = graph.get((to_kind, to_rel), None)
if dep is None:
dep = Header(
include_dir=to_kind,
rel=to_rel,
abs=to_abs,
textual=to_kind != IncludeDir.Sysroot,
)
graph[(to_kind, to_rel)] = dep
# Skip compiling textual headers - we'll calculate their dependencies after the fact.
if not dep.textual:
visit(to_rel)
state.deps.append(dep)
state.compile_status = CompileStatus.Success if ps.returncode == 0 else CompileStatus.Failure
if ps.returncode == 0:
logging.debug('Compiled %s', state.pretty_name)
elif any([
state.textual,
rel.startswith('bits/'),
'/bits/' in rel,
rel.endswith('intrin.h'),
b'Do not include this header directly' in ps.stderr,
# eg. Please #include <os/workgroup.h> instead of this file directly.
b'Please #include' in ps.stderr,
]):
# These things are generally expected to not compile standalone.
logging.debug('Probably fine: Failed to compile %s', state.pretty_name)
else:
if state.root_module != None:
logging.warning('%s was not textual but failed to compile',
state.pretty_name)
else:
# Since this isn't part of a modulemap we can choose to mark it as
# textual.
logging.warning('Failed to compile %s', state.pretty_name)
self._write_err(rel, ps.stderr)
# If you can't compile it, assume it's textual
if state.root_module is None and ps.returncode != 0:
state.textual = True
rdeps = calculate_rdeps(graph.values())
includes = collections.defaultdict(list)
logging.info('Inferring dependencies')
for header in sorted(graph.values()):
includes[header.rel].append(header)
if header.abs is None:
for d, kind in self.include_dirs:
if header.include_dir == kind and (d / header.rel).is_file():
header.abs = d / header.rel
break
assert header.abs is not None
# If we were unable to compile something, calculate what the dependencies
# likely are.
if header.compile_status == CompileStatus.NotCompiled and rdeps[header]:
intersection = set.intersection(
*[set(rdep.deps) for rdep in rdeps[header]])
# For libcxx/foo.h -> builtin/foo.h -> sysroot/foo.h
# Despite the fact that builtin/foo.h should appear all the time, we need
# to filter it out for sysroot/foo.h.
header.deps = [
dep for dep in intersection
if dep.rel != header.rel or dep.include_dir > header.include_dir
]
# Translate it to a mapping from include path to a linked list of headers.
out = {}
for k, headers in includes.items():
headers.sort()
for prev, nxt in zip(headers, headers[1:]):
# If it didn't #include_next we don't need to worry about it.
if nxt not in prev.deps:
break
prev.next = nxt
nxt.prev = prev
out[k] = headers[0]
logging.info('Compilation complete')
return out