blob: 08191754fba5e0817cf300607f88bf33798d9899 [file] [log] [blame]
# Copyright 2018 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Gclient utility."""
import ast
import collections
import itertools
import logging
import operator
import os
import pprint
import queue
import shutil
import subprocess
import sys
import tempfile
import urllib.parse
from bisect_kit import codechange
from bisect_kit import errors
from bisect_kit import git_util
from bisect_kit import locking
from bisect_kit import util
# from third_party
from depot_tools import gclient_eval
logger = logging.getLogger(__name__)
emitted_warnings = set()
# If the dependency is not pinned in DEPS file, the default branch.
# ref: gclient_scm.py GitWrapper.update default_rev
# TODO(kcwu): follow gclient to change the default branch name
DEFAULT_BRANCH_NAME = 'main'
def config(
gclient_dir: str,
url: str | None = None,
cache_dir: str | None = None,
deps_file: str | None = None,
custom_var_list: list[str] | None = None,
gclientfile: str | None = None,
managed: bool = True,
spec: str | None = None,
target_os: str | None = None,
):
"""Simply wrapper of `gclient config`.
Args:
gclient_dir: root directory of gclient project
url: URL of gclient configuration files
cache_dir: gclient's git cache folder
deps_file: override the default DEPS file name
custom_var_list: A list of custom variables
gclientfile: alternate .gclient file name
managed: use managed dependencies
spec: content of gclient file
target_os: target OS
"""
cmd = ['gclient', 'config']
if deps_file:
cmd += ['--deps-file', deps_file]
if cache_dir:
cmd += ['--cache-dir', cache_dir]
if custom_var_list:
for custom_var in custom_var_list:
cmd += ['--custom-var', custom_var]
if gclientfile:
cmd += ['--gclientfile', gclientfile]
if not managed:
cmd.append('--unmanaged')
if spec:
cmd += ['--spec', spec]
if url:
cmd.append(url)
with git_util.PatchGitConfig() as new_env:
util.check_call(*cmd, cwd=gclient_dir, env=new_env)
# 'target_os' is mandatory for chromeos build, but 'gclient config' doesn't
# recognize it. Here add it to gclient file explicitly.
if target_os:
if not gclientfile:
gclientfile = '.gclient'
with open(os.path.join(gclient_dir, gclientfile), 'a') as f:
f.write('target_os = ["%s"]\n' % target_os)
def sync(
gclient_dir: str,
with_branch_heads: bool = False,
with_tags: bool = False,
revision: str = None,
jobs: int = 8,
):
"""Simply wrapper of `gclient sync`.
Args:
gclient_dir: root directory of gclient project
with_branch_heads: whether to clone git `branch_heads` refspecs
with_tags: whether to clone git tags
revision: the revision name, e.g. src@100.0.4857.0 or 100.0.4857.0
jobs: how many workers running in parallel
"""
# Work around gclient issue crbug/943430
# gclient rejected to sync if there are untracked symlink even with --force
for path in [
'src/chromeos/assistant/libassistant/src/deps',
'src/chromeos/assistant/libassistant/src/libassistant',
]:
if os.path.islink(os.path.join(gclient_dir, path)):
os.unlink(os.path.join(gclient_dir, path))
cmd = [
'gclient',
'sync',
'--jobs=%d' % jobs,
'--delete_unversioned_trees',
# --force is necessary because runhook may generate some untracked files.
'--force',
]
if with_branch_heads:
cmd.append('--with_branch_heads')
if with_tags:
cmd.append('--with_tags')
if revision:
cmd += ['--revision', revision]
try:
old_projects = load_gclient_entries(gclient_dir)
except IOError:
old_projects = {}
with git_util.PatchGitConfig() as new_env:
util.check_call(*cmd, cwd=gclient_dir, env=new_env)
# Remove dead .git folder after sync.
# Ideally, this should be handled by gclient but sometimes gclient didn't
# (crbug/930047).
new_projects = load_gclient_entries(gclient_dir)
for path in old_projects:
if path in new_projects:
continue
old_git_dir = os.path.join(gclient_dir, path)
if not os.path.exists(old_git_dir):
continue
if git_util.is_git_root(old_git_dir):
logger.warning(
'%s was removed from .gclient_entries but %s still exists; remove it',
path,
old_git_dir,
)
shutil.rmtree(old_git_dir)
else:
logger.warning(
'%s was removed from .gclient_entries but %s still exists;'
' keep it because it is not git root',
path,
old_git_dir,
)
def runhook(gclient_dir, jobs=8, gclientfile=None):
"""Simply wrapper of `gclient runhook`.
Args:
gclient_dir: root directory of gclient project
jobs: how many workers running in parallel
gclientfile: specify an alternate .gclient file
"""
cmd = ['gclient', 'runhook', '--job', str(jobs)]
if gclientfile:
cmd += ['--gclientfile', gclientfile]
with git_util.PatchGitConfig() as new_env:
util.check_call(*cmd, cwd=gclient_dir, env=new_env)
def load_gclient_entries(gclient_dir):
"""Loads .gclient_entries."""
repo_project_list = os.path.join(gclient_dir, '.gclient_entries')
scope = {}
with open(repo_project_list) as f:
code = compile(f.read(), repo_project_list, 'exec')
# pylint: disable=exec-used
exec(code, scope)
entries = scope.get('entries', {})
# normalize path: remove trailing slash
entries = dict(
(os.path.normpath(path), url) for path, url in entries.items()
)
return entries
def write_gclient_entries(gclient_dir, projects):
"""Writes .gclient_entries."""
repo_project_list = os.path.join(gclient_dir, '.gclient_entries')
content = 'entries = {\n'
for path, repo_url in sorted(projects.items()):
content += ' %s: %s,\n' % (
pprint.pformat(path),
pprint.pformat(repo_url),
)
content += '}\n'
with open(repo_project_list, 'w') as f:
f.write(content)
def mirror(code_storage, repo_url):
"""Mirror git repo.
This function mimics the caching behavior of 'gclient sync' with 'cache_dir'.
Args:
code_storage: CodeStorage object
repo_url: remote repo url
"""
logger.info('mirror %s', repo_url)
tmp_dir = tempfile.mkdtemp(dir=code_storage.cache_dir)
git_root = code_storage.cached_git_root(repo_url)
assert not os.path.exists(git_root)
with git_util.PatchGitConfig() as new_env:
util.check_call('git', 'init', '--bare', cwd=tmp_dir, env=new_env)
# These config parameters are copied from gclient.
git_util.config(tmp_dir, 'gc.autodetach', '0')
git_util.config(tmp_dir, 'gc.autopacklimit', '0')
git_util.config(tmp_dir, 'core.deltaBaseCacheLimit', '2g')
git_util.config(tmp_dir, 'remote.origin.url', repo_url)
git_util.config(
tmp_dir,
'--replace-all',
'remote.origin.fetch',
'+refs/heads/*:refs/heads/*',
r'\+refs/heads/\*:.*',
)
git_util.config(
tmp_dir,
'--replace-all',
'remote.origin.fetch',
'+refs/tags/*:refs/tags/*',
r'\+refs/tags/\*:.*',
)
git_util.config(
tmp_dir,
'--replace-all',
'remote.origin.fetch',
'+refs/branch-heads/*:refs/branch-heads/*',
r'\+refs/branch-heads/\*:.*',
)
git_util.fetch(tmp_dir, 'origin', '+refs/heads/*:refs/heads/*')
git_util.fetch(tmp_dir, 'origin', '+refs/tags/*:refs/tags/*')
git_util.fetch(
tmp_dir, 'origin', '+refs/branch-heads/*:refs/branch-heads/*'
)
# Rename to correct name atomically.
os.rename(tmp_dir, git_root)
# Copied from depot_tools' gclient.py
_PLATFORM_MAPPING = {
'cygwin': 'win',
'darwin': 'mac',
'linux2': 'linux',
'linux': 'linux',
'win32': 'win',
'aix6': 'aix',
}
def _detect_host_os():
return _PLATFORM_MAPPING[sys.platform]
def evaluate_condition(condition, variables, referenced_variables=None):
"""Evaluate condition in gclient DEPS.
This function is copied from gclient_eval.py EvaluateCondition().
Returns:
evaluate result
"""
if not referenced_variables:
referenced_variables = set()
_allowed_names = {'None': None, 'True': True, 'False': False}
main_node = ast.parse(condition, mode='eval')
if isinstance(main_node, ast.Expression):
main_node = main_node.body
def _convert(node, allow_tuple=False):
if isinstance(node, ast.Str):
return node.s
if isinstance(node, ast.Tuple) and allow_tuple:
return tuple(map(_convert, node.elts))
if isinstance(node, ast.Name):
if node.id in referenced_variables:
raise ValueError(
'invalid cyclic reference to %r (inside %r)'
% (node.id, condition)
)
if node.id in _allowed_names:
return _allowed_names[node.id]
if node.id in variables:
value = variables[node.id]
# Allow using "native" types, without wrapping everything in strings.
# Note that schema constraints still apply to variables.
if not isinstance(value, str):
return value
# Recursively evaluate the variable reference.
return evaluate_condition(
variables[node.id],
variables,
referenced_variables.union([node.id]),
)
# Implicitly convert unrecognized names to strings.
# If we want to change this, we'll need to explicitly distinguish
# between arguments for GN to be passed verbatim, and ones to
# be evaluated.
return node.id
if not sys.version_info[:2] < (3, 4) and isinstance(
node, ast.NameConstant
): # Since Python 3.4
return node.value
if isinstance(node, ast.BoolOp) and isinstance(node.op, ast.Or):
bool_values = []
for value in node.values:
bool_values.append(_convert(value))
if not isinstance(bool_values[-1], bool):
raise ValueError(
'invalid "or" operand %r (inside %r)'
% (bool_values[-1], condition)
)
return any(bool_values)
if isinstance(node, ast.BoolOp) and isinstance(node.op, ast.And):
bool_values = []
for value in node.values:
bool_values.append(_convert(value))
if not isinstance(bool_values[-1], bool):
raise ValueError(
'invalid "and" operand %r (inside %r)'
% (bool_values[-1], condition)
)
return all(bool_values)
if isinstance(node, ast.UnaryOp) and isinstance(node.op, ast.Not):
value = _convert(node.operand)
if not isinstance(value, bool):
raise ValueError(
'invalid "not" operand %r (inside %r)' % (value, condition)
)
return not value
if isinstance(node, ast.Compare):
if len(node.ops) != 1:
raise ValueError(
'invalid compare: exactly 1 operator required (inside %r)'
% (condition)
)
if len(node.comparators) != 1:
raise ValueError(
'invalid compare: exactly 1 comparator required (inside %r)'
% (condition)
)
left = _convert(node.left)
right = _convert(
node.comparators[0], allow_tuple=isinstance(node.ops[0], ast.In)
)
if isinstance(node.ops[0], ast.Eq):
return left == right
if isinstance(node.ops[0], ast.NotEq):
return left != right
if isinstance(node.ops[0], ast.In):
return left in right
raise ValueError(
'unexpected operator: %s %s (inside %r)'
% (node.ops[0], ast.dump(node), condition)
)
raise ValueError(
'unexpected AST node: %s %s (inside %r)'
% (node, ast.dump(node), condition)
)
return _convert(main_node)
class Dep:
"""Represent one entry of DEPS's deps.
One Dep object means one subproject inside DEPS file. It recorded what to
checkout (like git or cipd) content of each subproject.
Attributes:
path: subproject path, relative to project root
variables: the variables of the containing DEPS file; these variables will
be applied to fields of this object (like 'url' and 'condition') and
children projects.
condition: whether to checkout this subproject
dep_type: 'git' or 'cipd'
url: if dep_type='git', the url of remote repo and associated branch/commit
packages: if dep_type='cipd', cipd package version and location
"""
def __init__(self, path, variables, entry):
self.path = path
self.variables = variables
self.url = None # only valid for dep_type='git'
self.packages = None # only valid for dep_type='cipd'
if isinstance(entry, str):
self.dep_type = 'git'
self.url = entry
self.condition = None
else:
self.dep_type = entry.get('dep_type', 'git')
self.condition = entry.get('condition')
if self.dep_type == 'git':
self.url = entry['url']
else:
assert self.dep_type == 'cipd', (
'unknown dep_type:' + self.dep_type
)
self.packages = entry['packages']
if self.dep_type == 'git':
# TODO(b/301570818): It should be just
# self.url.format(vars.variables).
# Pass unpacked self.variables temporarily due to a bad entry in
# DEPS.
self.url = self.url.format(vars=self.variables, **self.variables)
def __eq__(self, rhs):
return vars(self) == vars(rhs)
def __ne__(self, rhs):
return not self.__eq__(rhs)
def set_url(self, repo_url, at):
assert self.dep_type == 'git'
self.url = '%s@%s' % (repo_url, at)
def set_revision(self, at):
assert self.dep_type == 'git'
repo_url, _ = self.parse_url()
self.set_url(repo_url, at)
def parse_url(self):
assert self.dep_type == 'git'
if '@' in self.url:
repo_url, at = self.url.split('@')
else:
# If the dependency is not pinned, the default branch.
repo_url, at = self.url, DEFAULT_BRANCH_NAME
return repo_url, at
def as_path_spec(self):
repo_url, at = self.parse_url()
return codechange.PathSpec(self.path, repo_url, at)
def eval_condition(self):
"""Evaluate condition for DEPS parsing.
Returns:
eval result
"""
if not self.condition:
return True
# Currently, we only support chromeos as target_os.
# TODO(kcwu): make it configurable if we need to bisect for other os.
# We don't specify `target_os_only`, so `unix` will be considered by
# gclient as well.
target_os = ['chromeos', 'unix']
vars_dict = {
'checkout_android': 'android' in target_os,
'checkout_chromeos': 'chromeos' in target_os,
'checkout_fuchsia': 'fuchsia' in target_os,
'checkout_ios': 'ios' in target_os,
'checkout_linux': 'unix' in target_os,
'checkout_mac': 'mac' in target_os,
'checkout_win': 'win' in target_os,
# default cpu: x64
'checkout_arm64': False,
'checkout_arm': False,
'checkout_mips': False,
'checkout_ppc': False,
'checkout_s390': False,
'checkout_x64': True,
'checkout_x86': False,
'host_os': _detect_host_os(),
'False': False,
'None': None,
'True': True,
}
vars_dict.update(self.variables)
return evaluate_condition(self.condition, vars_dict)
def to_lines(self):
s = []
condition_part = (
[' "condition": %r,' % self.condition] if self.condition else []
)
if self.dep_type == 'cipd':
s.extend(
[
' "%s": {' % (self.path.split(':')[0],),
' "packages": [',
]
)
for p in sorted(self.packages, key=lambda x: x['package']):
s.extend(
[
' {',
' "package": "%s",' % p['package'],
' "version": "%s",' % p['version'],
' },',
]
)
s.extend(
[
' ],',
' "dep_type": "cipd",',
]
+ condition_part
+ [
' },',
'',
]
)
else:
s.extend(
[
' "%s": {' % (self.path,),
' "url": "%s",' % (self.url,),
]
+ condition_part
+ [
' },',
'',
]
)
return s
class Deps:
"""DEPS parsed result.
Attributes:
variables: 'vars' dict in DEPS file; these variables will be applied
recursively to children.
entries: dict of Dep objects
recursedeps: list of recursive projects
"""
def __init__(self):
self.variables = {}
self.entries = {}
self.ignored_entries = {}
self.recursedeps = []
self.allowed_hosts = set()
self.gn_args_from = None
self.gn_args_file = None
self.gn_args = []
self.hooks = []
self.pre_deps_hooks = []
self.modified = set()
def _gn_settings_to_lines(self):
s = []
if self.gn_args_file:
s.extend(
[
'gclient_gn_args_file = "%s"' % self.gn_args_file,
'gclient_gn_args = %r' % self.gn_args,
]
)
return s
def _allowed_hosts_to_lines(self):
"""Converts |allowed_hosts| set to list of lines for output."""
if not self.allowed_hosts:
return []
s = ['allowed_hosts = [']
for h in sorted(self.allowed_hosts):
s.append(' "%s",' % h)
s.extend([']', ''])
return s
def _entries_to_lines(self):
"""Converts |entries| dict to list of lines for output."""
entries = self.ignored_entries
entries.update(self.entries)
if not entries:
return []
s = ['deps = {']
for _, dep in sorted(entries.items()):
s.extend(dep.to_lines())
s.extend(['}', ''])
return s
def _vars_to_lines(self):
"""Converts |variables| dict to list of lines for output."""
if not self.variables:
return []
s = ['vars = {']
for key, value in sorted(self.variables.items()):
s.extend(
[
' "%s": %r,' % (key, value),
'',
]
)
s.extend(['}', ''])
return s
def _hooks_to_lines(self, name, hooks):
"""Converts |hooks| list to list of lines for output."""
if not hooks:
return []
hooks.sort(key=lambda x: x.get('name', ''))
s = ['%s = [' % name]
for hook in hooks:
s.extend(
[
' {',
]
)
if hook.get('name') is not None:
s.append(' "name": "%s",' % hook.get('name'))
if hook.get('pattern') is not None:
s.append(' "pattern": "%s",' % hook.get('pattern'))
if hook.get('condition') is not None:
s.append(' "condition": %r,' % hook.get('condition'))
# Flattened hooks need to be written relative to the root gclient dir
cwd = os.path.relpath(os.path.normpath(hook.get('cwd', '.')))
s.extend(
[' "cwd": "%s",' % cwd]
+ [' "action": [']
+ [' "%s",' % arg for arg in hook.get('action', [])]
+ [' ]', ' },', '']
)
s.extend([']', ''])
return s
def to_string(self):
"""Return flatten DEPS string."""
return '\n'.join(
self._gn_settings_to_lines()
+ self._allowed_hosts_to_lines()
+ self._entries_to_lines()
+ self._hooks_to_lines('hooks', self.hooks)
+ self._hooks_to_lines('pre_deps_hooks', self.pre_deps_hooks)
+ self._vars_to_lines()
+ ['']
) # Ensure newline at end of file.
def remove_src(self):
"""Return src_revision for buildbucket use."""
assert 'src' in self.entries
_, src_rev = self.entries['src'].parse_url()
del self.entries['src']
return src_rev
def apply_commit(self, path, revision, overwrite=True):
"""Set revision to a project by path.
Args:
path: A project's path.
revision: A git commit id.
overwrite: Overwrite flag, the project won't change if overwrite=False
and it was modified before.
"""
if path in self.modified and not overwrite:
return
self.modified.add(path)
if path not in self.entries:
logger.warning('path: %s not found in DEPS', path)
return
self.entries[path].set_revision(revision)
def apply_action_groups(self, action_groups):
"""Apply multiple action groups to DEPS.
If there are multiple actions in one repo, only last one is applied.
Args:
action_groups: A list of action groups.
"""
# Apply in reversed order with overwrite=False,
# so each repo is on the state of last action.
for action_group in reversed(action_groups):
for action in reversed(action_group.actions):
if isinstance(action, codechange.GitCheckoutCommit):
self.apply_commit(action.path, action.rev, overwrite=False)
if isinstance(action, codechange.GitAddRepo):
self.apply_commit(action.path, action.rev, overwrite=False)
if isinstance(action, codechange.GitRemoveRepo):
assert action.path not in self.entries
self.modified.add(action.path)
def apply_deps(self, deps):
for path, dep in deps.entries.items():
if path in self.entries:
_, rev = dep.parse_url()
self.apply_commit(path, rev, overwrite=False)
# hooks, vars, ignored_entries are ignored and should be set by float_spec
class TimeSeriesTree:
"""Data structure for generating snapshots of historical dependency tree.
This is a tree structure with time information. Each tree node represents not
only typical tree data and tree children information, but also historical
value of those tree data and tree children.
To be more specific in terms of DEPS parsing, one TimeSeriesTree object
represent a DEPS file. The caller will add_snapshot() to add parsed result of
historical DEPS instances. After that, the tree root of this class can
reconstruct the every historical moment of the project dependency state.
This class is slight abstraction of git_util.get_history_recursively() to
support more than single git repo and be version control system independent.
"""
# TODO(kcwu): refactor git_util.get_history_recursively() to reuse this class.
def __init__(self, parent_deps, entry, start_time, end_time):
"""TimeSeriesTree constructor.
Args:
parent_deps: parent DEPS of the given period. None if this is tree root.
entry: project entry
start_time: start time
end_time: end time
"""
self.parent_deps = parent_deps
self.entry = entry
self.snapshots = {}
self.start_time = start_time
self.end_time = end_time
# Intermediate dict to keep track alive children for the time being.
# Maintained by add_snapshot() and no_more_snapshot().
self.alive_children = {}
# All historical children (TimeSeriesTree object) between start_time and
# end_time. It's possible that children with the same entry appear more than
# once in this list because they are removed and added back to the DEPS
# file.
self.subtrees = []
def subtree_eq(self, deps_a, deps_b, child_entry):
"""Compares subtree of two Deps.
Args:
deps_a: Deps object
deps_b: Deps object
child_entry: the subtree to compare
Returns:
True if the said subtree of these two Deps equal
"""
# Need to compare variables because they may influence subtree parsing
# behavior
path = child_entry[0]
return (
deps_a.entries[path] == deps_b.entries[path]
and deps_a.variables == deps_b.variables
)
def add_snapshot(self, timestamp, deps, children_entries):
"""Adds parsed DEPS result and children.
For example, if a given DEPS file has N revisions between start_time and
end_time, the caller should call this method N times to feed all parsed
results in order (timestamp increasing).
Args:
timestamp: timestamp of `deps`
deps: Deps object
children_entries: list of names of deps' children
"""
assert timestamp not in self.snapshots
self.snapshots[timestamp] = deps
for child_entry in set(
list(self.alive_children.keys()) + children_entries
):
# `child_entry` is added at `timestamp`
if child_entry not in self.alive_children:
self.alive_children[child_entry] = timestamp, deps
# `child_entry` is removed at `timestamp`
elif child_entry not in children_entries:
self.subtrees.append(
TimeSeriesTree(
self.alive_children[child_entry][1],
child_entry,
self.alive_children[child_entry][0],
timestamp,
)
)
del self.alive_children[child_entry]
# `child_entry` is alive before and after `timestamp`
else:
last_deps = self.alive_children[child_entry][1]
if not self.subtree_eq(last_deps, deps, child_entry):
self.subtrees.append(
TimeSeriesTree(
last_deps,
child_entry,
self.alive_children[child_entry][0],
timestamp,
)
)
self.alive_children[child_entry] = timestamp, deps
def no_more_snapshot(self):
"""Indicates all snapshots are added.
add_snapshot() should not be invoked after no_more_snapshot().
"""
for child_entry, (timestamp, deps) in self.alive_children.items():
if timestamp == self.end_time and timestamp != self.start_time:
continue
self.subtrees.append(
TimeSeriesTree(deps, child_entry, timestamp, self.end_time)
)
self.alive_children = None
def events(self):
"""Gets children added/removed events of this subtree.
Returns:
list of (timestamp, deps_name, deps, end_flag):
timestamp: timestamp of event
deps_name: name of this subtree
deps: Deps object of given project
end_flag: True indicates this is the last event of this deps tree
"""
if not self.snapshots:
# This substree is broken (e.g., deps files are removed
# accidentally but still referenced by its parent).
return []
assert self.alive_children is None, (
'events() is valid only after ' 'no_more_snapshot() is invoked'
)
result = []
last_deps = None
for timestamp, deps in self.snapshots.items():
result.append((timestamp, self.entry, deps, False))
last_deps = deps
assert last_deps
result.append((self.end_time, self.entry, last_deps, True))
for subtree in self.subtrees:
for event in subtree.events():
result.append(event)
result.sort(key=lambda x: x[0])
return result
def iter_forest(self):
"""Iterates snapshots of project dependency state.
In terms of DEPS parsing, `forest` is a collection of Deps objects.
Yields:
(timestamp, forest):
timestamp: time of snapshot
forest: A dict indicates path => deps mapping
"""
forest = {}
# Group by timestamp
for timestamp, events in itertools.groupby(
self.events(), operator.itemgetter(0)
):
# It's possible that one deps is removed and added at the same timestamp,
# i.e. modification, so use counter to track.
end_counter = collections.Counter()
for _timestamp, entry, deps, end in events:
forest[entry] = deps
if end:
end_counter[entry] += 1
else:
end_counter[entry] -= 1
yield timestamp, forest
# Remove deps which are removed at this timestamp.
for entry, count in end_counter.items():
assert -1 <= count <= 1, (timestamp, entry)
if count == 1:
del forest[entry]
def iter_path_specs(self):
"""Iterates snapshots of project dependency state.
In terms of DEPS parsing, `path_specs` is flatten of recursive Deps objects.
Yields:
(timestamp, path_specs):
timestamp: time of snapshot
path_specs: dict of path_spec entries
"""
for timestamp, forest in self.iter_forest():
path_specs = {}
# Merge Deps at time `timestamp` into single path_specs.
for deps in forest.values():
for path, dep in deps.entries.items():
path_specs[path] = dep.as_path_spec()
yield timestamp, path_specs
class DepsParser:
"""Gclient DEPS file parser."""
def __init__(self, project_root, code_storage):
self.project_root = project_root
self.code_storage = code_storage
def load_single_deps(self, content):
# var names may not be valid python identifiers, so wrap them in a
# dictionary (b/301370477).
def var_function(name):
return '{vars[%s]}' % name
def str_function(name):
return str(name)
global_scope = {"Var": var_function, "Str": str_function}
local_scope = {}
# pylint: disable=exec-used
exec(content, global_scope, local_scope)
return local_scope
def parse_single_deps(
self, content, parent_vars=None, parent_path='', parent_dep=None
):
"""Parses DEPS file without recursion.
Args:
content: file content of DEPS file
parent_vars: variables inherent from parent DEPS
parent_path: project path of parent DEPS file
parent_dep: A corresponding Dep object in parent DEPS
Returns:
Deps object
"""
local_scope = self.load_single_deps(content)
deps = Deps()
local_scope.setdefault('vars', {})
if parent_vars:
local_scope['vars'].update(parent_vars)
deps.variables = local_scope['vars']
# Warnings for old usages which we don't support.
for name in deps.variables:
if name.startswith('RECURSEDEPS_') or name.endswith('_DEPS_file'):
logger.warning(
'%s is deprecated and not supported recursion syntax', name
)
if 'deps_os' in local_scope:
logger.warning('deps_os is no longer supported')
if 'allowed_hosts' in local_scope:
deps.allowed_hosts = set(local_scope.get('allowed_hosts'))
deps.hooks = local_scope.get('hooks', [])
deps.pre_deps_hooks = local_scope.get('pre_deps_hooks', [])
deps.gn_args_from = local_scope.get('gclient_gn_args_from')
deps.gn_args_file = local_scope.get('gclient_gn_args_file')
deps.gn_args = local_scope.get('gclient_gn_args', [])
# recalculate hook path
use_relative_hooks = local_scope.get('use_relative_hooks', False)
if use_relative_hooks:
assert local_scope.get('use_relative_paths', False)
for hook in deps.hooks:
hook['cwd'] = os.path.join(parent_path, hook.get('cwd', ''))
for pre_deps_hook in deps.pre_deps_hooks:
pre_deps_hook['cwd'] = os.path.join(
parent_path, pre_deps_hook.get('cwd', '')
)
for path, dep_entry in local_scope.get('deps', {}).items():
# recalculate path
path = path.format(vars=deps.variables)
if local_scope.get('use_relative_paths', False):
path = os.path.join(parent_path, path)
path = os.path.normpath(path)
dep = Dep(path, deps.variables, dep_entry)
eval_condition = dep.eval_condition()
# update condition
if parent_dep and parent_dep.condition:
tmp_dict = {'condition': dep.condition}
gclient_eval.UpdateCondition(
tmp_dict, 'and', parent_dep.condition
)
dep.condition = tmp_dict['condition']
if not eval_condition:
deps.ignored_entries[path] = dep
continue
# TODO(kcwu): support dep_type=cipd http://crbug.com/846564
if dep.dep_type != 'git':
warning_key = ('dep_type', dep.dep_type, path)
if warning_key not in emitted_warnings:
emitted_warnings.add(warning_key)
logger.warning(
'dep_type=%s is not supported yet: %s',
dep.dep_type,
path,
)
deps.ignored_entries[path] = dep
continue
deps.entries[path] = dep
recursedeps = []
for recurse_entry in local_scope.get('recursedeps', []):
# Normalize entries.
if isinstance(recurse_entry, tuple):
path, deps_file = recurse_entry
else:
assert isinstance(path, str)
path, deps_file = recurse_entry, 'DEPS'
if local_scope.get('use_relative_paths', False):
path = os.path.join(parent_path, path)
path = path.format(vars=deps.variables)
if path in deps.entries:
recursedeps.append((path, deps_file))
deps.recursedeps = recursedeps
return deps
def construct_deps_tree(
self,
tstree,
repo_url,
at,
after,
before,
parent_vars=None,
parent_path='',
parent_dep=None,
deps_file='DEPS',
allow_floating=True,
):
"""Processes DEPS recursively of given time period.
This method parses all commits of DEPS between time `after` and `before`,
segments recursive dependencies into subtrees if they are changed, and
processes subtrees recursively.
The parsed results (multiple revisions of DEPS file) are stored in `tstree`.
Args:
tstree: TimeSeriesTree object
repo_url: remote repo url
at: branch or git commit id
after: begin of period
before: end of period
parent_vars: DEPS variables inherit from parent DEPS (including
custom_vars)
parent_path: the path of parent project of current DEPS file
parent_dep: A corresponding Dep object in parent DEPS
deps_file: filename of DEPS file, relative to the git repo, repo_rul
allow_floating: True if allow floating commit references
"""
if '://' in repo_url:
git_repo = self.code_storage.cached_git_root(repo_url)
if not os.path.exists(git_repo):
with locking.lock_file(
os.path.join(
self.code_storage.cache_dir,
locking.LOCK_FILE_FOR_MIRROR_SYNC,
)
):
mirror(self.code_storage, repo_url)
else:
git_repo = repo_url
if git_util.is_git_rev(at):
history = git_util.Commit.make_commit_list(
[
(after, at),
(before, at),
]
)
else:
if not allow_floating:
raise errors.ExternalError(
'Reference to floating commit (%s) is not allowed' % at
)
history = git_util.get_history(
git_repo,
deps_file,
branch=at,
after=after,
before=before,
padding_begin=True,
padding_end=True,
)
assert history
# If not equal, it means the file was deleted but is still referenced by
# its parent.
assert history[-1].timestamp == before
# TODO(kcwu): optimization: history[-1] is unused
for commit in history[:-1]:
try:
content = git_util.get_file_from_revision(
git_repo, commit.rev, deps_file
)
except subprocess.CalledProcessError:
logger.error(
'Failed to get %s:%s from repo %s. Skip this snapshot. '
'It usually means somebody messed up the deps file.',
commit.rev,
deps_file,
git_repo,
)
continue
deps = self.parse_single_deps(
content,
parent_vars=parent_vars,
parent_path=parent_path,
parent_dep=parent_dep,
)
tstree.add_snapshot(commit.timestamp, deps, deps.recursedeps)
tstree.no_more_snapshot()
for subtree in tstree.subtrees:
path, deps_file = subtree.entry
path_spec = subtree.parent_deps.entries[path].as_path_spec()
self.construct_deps_tree(
subtree,
path_spec.repo_url,
path_spec.rev,
subtree.start_time,
subtree.end_time,
parent_vars=subtree.parent_deps.variables,
parent_path=path,
parent_dep=subtree.parent_deps.entries[path],
deps_file=deps_file,
allow_floating=allow_floating,
)
def enumerate_path_specs(self, start_time, end_time, path, branch=None):
tstree = TimeSeriesTree(None, path, start_time, end_time)
if not branch:
branch = DEFAULT_BRANCH_NAME
self.construct_deps_tree(
tstree,
path,
branch,
start_time,
end_time,
allow_floating=(start_time != end_time),
)
return tstree.iter_path_specs()
def enumerate_gclient_solutions(self, start_time, end_time, path):
tstree = TimeSeriesTree(None, path, start_time, end_time)
self.construct_deps_tree(
tstree, path, DEFAULT_BRANCH_NAME, start_time, end_time
)
return tstree.iter_forest()
def flatten(self, solutions, entry_point: str):
"""Flatten all given Deps
Args:
solutions: A name => Deps dict, name can be either a str or a tuple.
entry_point: An entry_point name of solutions.
Returns:
Deps: A flatten Deps.
"""
def _add_unvisited_recursedeps(deps_queue, visited, deps):
for name in deps.recursedeps:
if name not in visited:
visited.add(name)
deps_queue.put(name)
result = solutions[entry_point]
deps_queue = queue.SimpleQueue()
visited = set()
visited.add(entry_point)
_add_unvisited_recursedeps(deps_queue, visited, solutions[entry_point])
# BFS to merge `deps` into `result`
while not deps_queue.empty():
deps_name = deps_queue.get()
deps = solutions[deps_name]
result.allowed_hosts.update(deps.allowed_hosts)
for key, value in deps.variables.items():
assert (
key not in result.variables or deps.variables[key] == value
)
result.variables[key] = value
result.pre_deps_hooks += deps.pre_deps_hooks
result.hooks += deps.hooks
for dep in deps.entries.values():
assert (
dep.path not in result.entries
or result.entries.get(dep.path) == dep
)
result.entries[dep.path] = dep
for dep in deps.ignored_entries.values():
assert (
dep.path not in result.ignored_entries
or result.ignored_entries.get(dep.path) == dep
)
result.ignored_entries[dep.path] = dep
_add_unvisited_recursedeps(deps_queue, visited, deps)
# If gn_args_from is set in root DEPS, overwrite gn arguments
if solutions[entry_point].gn_args_from:
gn_args_dep = solutions[
(solutions[entry_point].gn_args_from, 'DEPS')
]
result.gn_args = gn_args_dep.gn_args
result.gn_args_file = gn_args_dep.gn_args_file
return result
class GclientCache(codechange.CodeStorage):
"""Gclient git cache."""
def __init__(self, cache_dir: str):
self.cache_dir = cache_dir
def _url_to_cache_dir(self, url: str) -> str:
# ref: depot_tools' git_cache.Mirror.UrlToCacheDir
parsed = urllib.parse.urlparse(url)
norm_url = parsed.netloc + parsed.path
if norm_url.endswith('.git'):
norm_url = norm_url[: -len('.git')]
norm_url = norm_url.replace('googlesource.com/a/', 'googlesource.com/')
return norm_url.replace('-', '--').replace('/', '-').lower()
def cached_git_root(self, repo_url: str) -> str:
cache_path = self._url_to_cache_dir(repo_url)
return os.path.join(self.cache_dir, cache_path)
def add_to_project_list(
self, project_root: str, path: str, repo_url: str
) -> None:
projects = load_gclient_entries(project_root)
projects[path] = repo_url
write_gclient_entries(project_root, projects)
def remove_from_project_list(self, project_root: str, path: str) -> None:
projects = load_gclient_entries(project_root)
if path in projects:
del projects[path]
write_gclient_entries(project_root, projects)