blob: eabae59ad3082b703cb705ce62e6b46521bfb56e [file] [log] [blame]
# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Methods to run tools over jars and cache their output."""
import dataclasses
import functools
import logging
import pathlib
import zipfile
from typing import List, Optional
from util import build_utils
_SRC_PATH = pathlib.Path(__file__).resolve().parents[4]
_JDEPS_PATH = _SRC_PATH / 'third_party/jdk/current/bin/jdeps'
_IGNORED_JAR_PATHS = [
# This matches org_ow2_asm_asm_commons and org_ow2_asm_asm_analysis, both of
# which fail jdeps (not sure why).
'third_party/android_deps/libs/org_ow2_asm_asm',
]
def _is_relative_to(path: pathlib.Path, other_path: pathlib.Path):
"""This replicates pathlib.Path.is_relative_to.
Since bots still run python3.8, they do not have access to is_relative_to,
which was introduced in python3.9.
"""
try:
path.relative_to(other_path)
return True
except ValueError:
# This error is expected when path is not a subpath of other_path.
return False
@dataclasses.dataclass
class CacheFile:
jar_path: pathlib.Path
cache_suffix: str
build_output_dir: pathlib.Path
src_dir: pathlib.Path = _SRC_PATH
def __post_init__(self):
# Ensure that all paths are absolute so that relative_to works correctly.
self.jar_path = self.jar_path.resolve()
self.build_output_dir = self.build_output_dir.resolve()
self.src_dir = self.src_dir.resolve()
@functools.cached_property
def cache_path(self):
"""Return a cache path for the jar that is always in the output dir.
Example:
- Given:
src_path = /cr/src
build_output_dir = /cr/src/out/Debug
cache_suffix = .jdeps
- filepath = /cr/src/out/Debug/a/d/file.jar
Returns: /cr/src/out/Debug/a/d/file.jar.jdeps
- filepath = /cr/src/out/b/c/file.jar
Returns: /cr/src/out/Debug/gen/b/c/file.jar.jdeps
- filepath = /random/path/file.jar
Returns: /cr/src/out/Debug/gen/abs/random/path/file.jar.jdeps
"""
path = self.jar_path.with_suffix(self.jar_path.suffix + self.cache_suffix)
if _is_relative_to(path, self.build_output_dir):
# already in the outdir, no need to adjust cache path
return path
if _is_relative_to(self.jar_path, _SRC_PATH):
return self.build_output_dir / 'gen' / path.relative_to(_SRC_PATH)
return self.build_output_dir / 'gen/abs' / path.relative_to(path.anchor)
def is_valid(self):
return (self.cache_path.exists() and self.jar_path.exists()
and self.cache_path.stat().st_mtime > self.jar_path.stat().st_mtime)
def read(self):
with open(self.cache_path) as f:
return f.read()
def write(self, content: str):
# If the jar file is in //src but not in the output dir or outside //src
# then the reparented dirs within the output dir need to be created first.
self.cache_path.parent.mkdir(parents=True, exist_ok=True)
with open(self.cache_path, 'w') as f:
f.write(content)
def _should_ignore(jar_path: pathlib.Path) -> bool:
for ignored_jar_path in _IGNORED_JAR_PATHS:
if ignored_jar_path in str(jar_path):
return True
return False
def run_jdeps(filepath: pathlib.Path,
*,
build_output_dir: pathlib.Path,
jdeps_path: pathlib.Path = _JDEPS_PATH,
src_path: pathlib.Path = _SRC_PATH) -> Optional[str]:
"""Runs jdeps on the given filepath and returns the output.
Uses a simple file cache for the output of jdeps. If the jar file's mtime is
older than the jdeps cache then just use the cached content instead.
Otherwise jdeps is run again and the output used to update the file cache.
Tested Nov 2nd, 2022:
- With all cache hits, script takes 13 seconds.
- Without the cache, script takes 1 minute 14 seconds.
"""
# Some __compile_java targets do not generate a .jar file, skipping these
# does not affect correctness.
if not filepath.exists() or _should_ignore(filepath):
return None
cache_file = CacheFile(jar_path=filepath,
cache_suffix='.jdeps_cache',
build_output_dir=build_output_dir,
src_dir=src_path)
if cache_file.is_valid():
return cache_file.read()
# Cache either doesn't exist or is older than the jar file.
output = build_utils.CheckOutput([
str(jdeps_path),
'-verbose:class',
'--multi-release', # Some jars support multiple JDK releases.
'base',
str(filepath),
])
cache_file.write(output)
return output
def extract_full_class_names_from_jar(build_output_dir: pathlib.Path,
jar_path: pathlib.Path) -> List[str]:
"""Returns set of fully qualified class names in passed-in jar."""
cache_file = CacheFile(jar_path=jar_path,
cache_suffix='.class_name_cache',
build_output_dir=build_output_dir)
if cache_file.is_valid():
return cache_file.read().splitlines()
out = set()
with zipfile.ZipFile(jar_path) as z:
for zip_entry_name in z.namelist():
if not zip_entry_name.endswith('.class'):
continue
# Remove .class suffix
full_java_class = zip_entry_name[:-6]
# Remove inner class names after the first $.
full_java_class = full_java_class.replace('/', '.')
dollar_index = full_java_class.find('$')
if dollar_index >= 0:
full_java_class = full_java_class[0:dollar_index]
out.add(full_java_class)
out = sorted(out)
cache_file.write('\n'.join(out))
return out
def parse_full_java_class(source_path: pathlib.Path) -> str:
"""Guess the fully qualified class name from the path to the source file."""
if source_path.suffix not in ('.java', '.kt'):
logging.warning('"%s" does not end in .java or .kt.', source_path)
return ''
directory_path = source_path.parent
package_list_reversed = []
for part in reversed(directory_path.parts):
if part == 'java':
break
package_list_reversed.append(part)
if part in ('com', 'org'):
break
else:
logging.debug(
'File %s not in a subdir of "org" or "com", cannot detect '
'package heuristically.', source_path)
return ''
package = '.'.join(reversed(package_list_reversed))
class_name = source_path.stem
return f'{package}.{class_name}'