| # -*- coding: utf-8 -*- |
| # Copyright 2018 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Gclient utility.""" |
| |
| from __future__ import print_function |
| import collections |
| import itertools |
| import logging |
| import operator |
| import os |
| import pprint |
| import sys |
| import urlparse |
| |
| from bisect_kit import codechange |
| from bisect_kit import git_util |
| from bisect_kit import util |
| |
| logger = logging.getLogger(__name__) |
| |
| |
| def config(gclient_dir, |
| url=None, |
| cache_dir=None, |
| deps_file=None, |
| custom_var=None, |
| spec=None): |
| """Simply wrapper of `gclient config`. |
| |
| Args: |
| gclient_dir: root directory of gclient project |
| url: URL of gclient configuration files |
| cache_dir: gclient's git cache folder |
| deps_file: override the default DEPS file name |
| custom_var: custom variables |
| spec: content of gclient file |
| """ |
| cmd = ['gclient', 'config'] |
| if deps_file: |
| cmd += ['--deps-file', deps_file] |
| if cache_dir: |
| cmd += ['--cache-dir', cache_dir] |
| if custom_var: |
| cmd += ['--custom-var', custom_var] |
| if spec: |
| cmd += ['--spec', spec] |
| if url: |
| cmd.append(url) |
| |
| util.check_call(*cmd, cwd=gclient_dir) |
| |
| |
| def sync(gclient_dir, |
| with_branch_heads=False, |
| with_tags=False, |
| ignore_locks=False, |
| jobs=8): |
| """Simply wrapper of `gclient sync`. |
| |
| Args: |
| gclient_dir: root directory of gclient project |
| with_branch_heads: whether to clone git `branch_heads` refspecs |
| with_tags: whether to clone git tags |
| ignore_locks: bypass gclient's lock |
| jobs: how many workers running in parallel |
| """ |
| cmd = ['gclient', 'sync', '--jobs', str(jobs), '--delete_unversioned_trees'] |
| if with_branch_heads: |
| cmd.append('--with_branch_heads') |
| if with_tags: |
| cmd.append('--with_tags') |
| |
| # If 'gclient sync' is interrupted by ctrl-c or terminated with whatever |
| # reasons, it will leave annoying lock files on disk and thus unfriendly to |
| # bot tasks. In bisect-kit, we will use our own lock mechanism (in caller of |
| # this function) and bypass gclient's. |
| if ignore_locks: |
| cmd.append('--ignore_locks') |
| |
| util.check_call(*cmd, cwd=gclient_dir) |
| |
| |
| # Copied from depot_tools' gclient.py |
| _PLATFORM_MAPPING = { |
| 'cygwin': 'win', |
| 'darwin': 'mac', |
| 'linux2': 'linux', |
| 'win32': 'win', |
| 'aix6': 'aix', |
| } |
| |
| |
| def _detect_host_os(): |
| return _PLATFORM_MAPPING[sys.platform] |
| |
| |
| class Dep(object): |
| """Represent one entry of DEPS's deps. |
| |
| One Dep object means one subproject inside DEPS file. It recorded what to |
| checkout (like git or cipd) content of each subproject. |
| |
| Attributes: |
| path: subproject path, relative to project root |
| variables: the variables of the containing DEPS file; these variables will |
| be applied to fields of this object (like 'url' and 'condition') and |
| children projects. |
| condition: whether to checkout this subproject |
| dep_type: 'git' or 'cipd' |
| url: if dep_type='git', the url of remote repo and associated branch/commit |
| packages: if dep_type='cipd', cipd package version and location |
| """ |
| |
| def __init__(self, path, variables, entry): |
| self.path = path |
| self.variables = variables |
| |
| self.url = None # only valid for dep_type='git' |
| self.packages = None # only valid for dep_type='cipd' |
| |
| if isinstance(entry, str): |
| self.dep_type = 'git' |
| self.url = entry |
| self.condition = None |
| else: |
| self.dep_type = entry.get('dep_type', 'git') |
| self.condition = entry.get('condition') |
| if self.dep_type == 'git': |
| self.url = entry['url'] |
| else: |
| assert self.dep_type == 'cipd' |
| self.packages = entry['packages'] |
| |
| if self.dep_type == 'git': |
| self.url = self.url.format(**self.variables) |
| |
| def __eq__(self, rhs): |
| return vars(self) == vars(rhs) |
| |
| def __ne__(self, rhs): |
| return not self.__eq__(rhs) |
| |
| def as_path_spec(self): |
| assert self.dep_type == 'git' |
| |
| if '@' in self.url: |
| repo_url, at = self.url.split('@') |
| else: |
| # If the dependency is not pinned, the default is master branch. |
| repo_url, at = self.url, 'master' |
| return codechange.PathSpec(self.path, repo_url, at) |
| |
| def eval_condition(self): |
| """Evaluate condition for DEPS parsing. |
| |
| Returns: |
| eval result |
| """ |
| if not self.condition: |
| return True |
| |
| vars_dict = { |
| # default os: linux |
| 'checkout_android': False, |
| 'checkout_chromeos': False, |
| 'checkout_fuchsia': False, |
| 'checkout_ios': False, |
| 'checkout_linux': True, |
| 'checkout_mac': False, |
| 'checkout_win': False, |
| # default cpu: x64 |
| 'checkout_arm64': False, |
| 'checkout_arm': False, |
| 'checkout_mips': False, |
| 'checkout_ppc': False, |
| 'checkout_s390': False, |
| 'checkout_x64': True, |
| 'checkout_x86': False, |
| 'host_os': _detect_host_os(), |
| 'False': False, |
| 'None': None, |
| 'True': True, |
| } |
| vars_dict.update(self.variables) |
| # pylint: disable=eval-used |
| return eval(self.condition, vars_dict) |
| |
| |
| class Deps(object): |
| """DEPS parsed result. |
| |
| Attributes: |
| variables: 'vars' dict in DEPS file; these variables will be applied |
| recursively to children. |
| entries: dict of Dep objects |
| recursedeps: list of recursive projects |
| """ |
| |
| def __init__(self): |
| self.variables = {} |
| self.entries = {} |
| self.recursedeps = [] |
| |
| |
| class TimeSeriesTree(object): |
| """Data structure for generating snapshots of historical dependency tree. |
| |
| This is a tree structure with time information. Each tree node represents not |
| only typical tree data and tree children information, but also historical |
| value of those tree data and tree children. |
| |
| To be more specific in terms of DEPS parsing, one TimeSeriesTree object |
| represent a DEPS file. The caller will add_snapshot() to add parsed result of |
| historical DEPS instances. After that, the tree root of this class can |
| reconstruct the every historical moment of the project dependency state. |
| |
| This class is slight abstraction of git_util.get_history_recursively() to |
| support more than single git repo and be version control system independent. |
| """ |
| |
| # TODO(kcwu): refactor git_util.get_history_recursively() to reuse this class. |
| |
| def __init__(self, parent_deps, entry, start_time, end_time): |
| """TimeSeriesTree constructor. |
| |
| Args: |
| parent_deps: parent DEPS of the given period. None if this is tree root. |
| entry: project entry |
| start_time: start time |
| end_time: end time |
| """ |
| self.parent_deps = parent_deps |
| self.entry = entry |
| self.snapshots = {} |
| self.start_time = start_time |
| self.end_time = end_time |
| |
| # Intermediate dict to keep track alive children for the time being. |
| # Maintained by add_snapshot() and no_more_snapshot(). |
| self.alive_children = {} |
| |
| # All historical children (TimeSeriesTree object) between start_time and |
| # end_time. It's possible that children with the same entry appear more than |
| # once in this list because they are removed and added back to the DEPS |
| # file. |
| self.subtrees = [] |
| |
| def subtree_eq(self, deps_a, deps_b, child_entry): |
| """Compares subtree of two Deps. |
| |
| Args: |
| deps_a: Deps object |
| deps_b: Deps object |
| child_entry: the subtree to compare |
| |
| Returns: |
| True if the said subtree of these two Deps equal |
| """ |
| # Need to compare variables because they may influence subtree parsing |
| # behavior |
| path = child_entry[0] |
| return (deps_a.entries[path] == deps_b.entries[path] and |
| deps_a.variables == deps_b.variables) |
| |
| def add_snapshot(self, timestamp, deps, children_entries): |
| """Adds parsed DEPS result and children. |
| |
| For example, if a given DEPS file has N revisions between start_time and |
| end_time, the caller should call this method N times to feed all parsed |
| results in order (timestamp increasing). |
| |
| Args: |
| timestamp: timestamp of `deps` |
| deps: Deps object |
| children_entries: list of names of deps' children |
| """ |
| assert timestamp not in self.snapshots |
| self.snapshots[timestamp] = deps |
| |
| for child_entry in set(self.alive_children.keys() + children_entries): |
| # `child_entry` is added at `timestamp` |
| if child_entry not in self.alive_children: |
| self.alive_children[child_entry] = timestamp, deps |
| |
| # `child_entry` is removed at `timestamp` |
| elif child_entry not in children_entries: |
| self.subtrees.append( |
| TimeSeriesTree(self.alive_children[child_entry][1], child_entry, |
| self.alive_children[child_entry][0], timestamp)) |
| del self.alive_children[child_entry] |
| |
| # `child_entry` is alive before and after `timestamp` |
| else: |
| last_deps = self.alive_children[child_entry][1] |
| if not self.subtree_eq(last_deps, deps, child_entry): |
| self.subtrees.append( |
| TimeSeriesTree(last_deps, child_entry, |
| self.alive_children[child_entry][0], timestamp)) |
| self.alive_children[child_entry] = timestamp, deps |
| |
| def no_more_snapshot(self, deps): |
| """Indicates all snapshots are added. |
| |
| add_snapshot() should not be invoked after no_more_snapshot(). |
| """ |
| for child_entry, (timestamp, deps) in self.alive_children.items(): |
| if timestamp == self.end_time: |
| continue |
| self.subtrees.append( |
| TimeSeriesTree(deps, child_entry, timestamp, self.end_time)) |
| self.alive_children = None |
| |
| def events(self): |
| """Gets children added/removed events of this subtree. |
| |
| Returns: |
| list of (timestamp, deps_name, deps, end_flag): |
| timestamp: timestamp of event |
| deps_name: name of this subtree |
| deps: Deps object of given project |
| end_flag: True indicates this is the last event of this deps tree |
| """ |
| assert self.snapshots |
| assert self.alive_children is None, ('events() is valid only after ' |
| 'no_more_snapshot() is invoked') |
| |
| result = [] |
| |
| last_deps = None |
| for timestamp, deps in self.snapshots.items(): |
| result.append((timestamp, self.entry, deps, False)) |
| last_deps = deps |
| |
| assert last_deps |
| result.append((self.end_time, self.entry, last_deps, True)) |
| |
| for subtree in self.subtrees: |
| for event in subtree.events(): |
| result.append(event) |
| |
| result.sort() |
| |
| return result |
| |
| def iter_path_specs(self): |
| """Iterates snapshots of project dependency state. |
| |
| Yields: |
| (timestamp, path_specs): |
| timestamp: time of snapshot |
| path_specs: dict of path_spec entries |
| """ |
| forest = {} |
| # Group by timestamp |
| for timestamp, events in itertools.groupby(self.events(), |
| operator.itemgetter(0)): |
| # It's possible that one deps is removed and added at the same timestamp, |
| # i.e. modification, so use counter to track. |
| end_counter = collections.Counter() |
| |
| for timestamp, entry, deps, end in events: |
| forest[entry] = deps |
| if end: |
| end_counter[entry] += 1 |
| else: |
| end_counter[entry] -= 1 |
| |
| # Merge Deps at time `timestamp` into single path_specs. |
| path_specs = {} |
| for deps in forest.values(): |
| for path, dep in deps.entries.items(): |
| path_specs[path] = dep.as_path_spec() |
| |
| yield timestamp, path_specs |
| |
| # Remove deps which are removed at this timestamp. |
| for entry, count in end_counter.items(): |
| assert -1 <= count <= 1, (timestamp, entry) |
| if count == 1: |
| del forest[entry] |
| |
| |
| class DepsParser(object): |
| """Gclient DEPS file parser.""" |
| |
| def __init__(self, project_root, code_storage): |
| self.project_root = project_root |
| self.code_storage = code_storage |
| |
| def parse_single_deps(self, content, parent_vars=None, parent_path=''): |
| """Parses DEPS file without recursion. |
| |
| Args: |
| content: file content of DEPS file |
| parent_vars: variables inherent from parent DEPS |
| parent_path: project path of parent DEPS file |
| |
| Returns: |
| Deps object |
| """ |
| |
| def var_function(name): |
| return '{%s}' % name |
| |
| global_scope = dict(Var=var_function) |
| local_scope = {} |
| try: |
| exec (content, global_scope, local_scope) # pylint: disable=exec-used |
| except SyntaxError: |
| raise |
| |
| deps = Deps() |
| local_scope.setdefault('vars', {}) |
| if parent_vars: |
| local_scope['vars'].update(parent_vars) |
| deps.variables = local_scope['vars'] |
| |
| # Warnings for old usages which we don't support. |
| for name in deps.variables: |
| if name.startswith('RECURSEDEPS_') or name.endswith('_DEPS_file'): |
| logger.warning('%s is deprecated and not supported recursion syntax', |
| name) |
| if 'deps_os' in local_scope: |
| logger.warning('deps_os is no longer supported') |
| |
| for path, dep_entry in local_scope['deps'].items(): |
| if local_scope.get('use_relative_paths', False): |
| path = os.path.join(parent_path, path) |
| path = path.format(**deps.variables) |
| dep = Dep(path, deps.variables, dep_entry) |
| if not dep.eval_condition(): |
| continue |
| |
| # TODO(kcwu): support dep_type=cipd http://crbug.com/846564 |
| if dep.dep_type != 'git': |
| logger.warning('dep_type=%s is not supported yet: %s', dep.dep_type, |
| path) |
| continue |
| |
| deps.entries[path] = dep |
| |
| recursedeps = [] |
| for recurse_entry in local_scope.get('recursedeps', []): |
| # Normalize entries. |
| if isinstance(recurse_entry, tuple): |
| path, deps_file = recurse_entry |
| else: |
| assert isinstance(path, str) |
| path, deps_file = recurse_entry, 'DEPS' |
| |
| if local_scope.get('use_relative_paths', False): |
| path = os.path.join(parent_path, path) |
| path = path.format(**deps.variables) |
| if path in deps.entries: |
| recursedeps.append((path, deps_file)) |
| deps.recursedeps = recursedeps |
| |
| return deps |
| |
| def construct_deps_tree(self, |
| tstree, |
| repo_url, |
| at, |
| after, |
| before, |
| parent_vars=None, |
| parent_path='', |
| deps_file='DEPS'): |
| """Processes DEPS recursively of given time period. |
| |
| This method parses all commits of DEPS between time `after` and `before`, |
| segments recursive dependencies into subtrees if they are changed, and |
| processes subtrees recursively. |
| |
| The parsed results (multiple revisions of DEPS file) are stored in `tstree`. |
| |
| Args: |
| tstree: TimeSeriesTree object |
| repo_url: remote repo url |
| at: branch or git commit id |
| after: begin of period |
| before: end of period |
| parent_vars: DEPS variables inherit from parent DEPS (including |
| custom_vars) |
| parent_path: the path of parent project of current DEPS file |
| deps_file: filename of DEPS file, relative to the git repo, repo_rul |
| """ |
| if '://' in repo_url: |
| git_repo = self.code_storage.cached_git_root(repo_url) |
| else: |
| git_repo = repo_url |
| |
| if git_util.is_git_rev(at): |
| history = [ |
| (after, at), |
| (before, at), |
| ] |
| else: |
| history = git_util.get_history( |
| git_repo, |
| deps_file, |
| branch=at, |
| after=after, |
| before=before, |
| padding=True) |
| assert history |
| |
| # If not equal, it means the file was deleted but is still referenced by |
| # its parent. |
| assert history[-1][0] == before |
| |
| # TODO(kcwu): optimization: history[-1] is unused |
| for timestamp, git_rev in history[:-1]: |
| content = git_util.get_file_from_revision(git_repo, git_rev, deps_file) |
| |
| deps = self.parse_single_deps( |
| content, parent_vars=parent_vars, parent_path=parent_path) |
| tstree.add_snapshot(timestamp, deps, deps.recursedeps) |
| |
| tstree.no_more_snapshot(deps) |
| |
| for subtree in tstree.subtrees: |
| path, deps_file = subtree.entry |
| path_spec = subtree.parent_deps.entries[path].as_path_spec() |
| self.construct_deps_tree( |
| subtree, |
| path_spec.repo_url, |
| path_spec.at, |
| subtree.start_time, |
| subtree.end_time, |
| parent_vars=subtree.parent_deps.variables, |
| parent_path=path, |
| deps_file=deps_file) |
| |
| def enumerate_path_specs(self, start_time, end_time, path): |
| tstree = TimeSeriesTree(None, path, start_time, end_time) |
| self.construct_deps_tree(tstree, path, 'master', start_time, end_time) |
| return tstree.iter_path_specs() |
| |
| |
| class GclientCache(codechange.CodeStorage): |
| """Gclient git cache.""" |
| |
| def __init__(self, cache_dir): |
| self.cache_dir = cache_dir |
| |
| def _url_to_cache_dir(self, url): |
| # ref: depot_tools' git_cache.Mirror.UrlToCacheDir |
| parsed = urlparse.urlparse(url) |
| norm_url = parsed.netloc + parsed.path |
| if norm_url.endswith('.git'): |
| norm_url = norm_url[:-len('.git')] |
| return norm_url.replace('-', '--').replace('/', '-').lower() |
| |
| def cached_git_root(self, repo_url): |
| cache_path = self._url_to_cache_dir(repo_url) |
| return os.path.join(self.cache_dir, cache_path) |
| |
| def _load_project_list(self, project_root): |
| repo_project_list = os.path.join(project_root, '.gclient_entries') |
| scope = {} |
| exec open(repo_project_list) in scope # pylint: disable=exec-used |
| return scope.get('entries', {}) |
| |
| def _save_project_list(self, project_root, projects): |
| repo_project_list = os.path.join(project_root, '.gclient_entries') |
| content = 'entries = {\n' |
| for item in sorted(projects.items()): |
| path, repo_url = map(pprint.pformat, item) |
| content += ' %s: %s,\n' % (path, repo_url) |
| content += '}\n' |
| with open(repo_project_list, 'w') as f: |
| f.write(content) |
| |
| def add_to_project_list(self, project_root, path, repo_url): |
| projects = self._load_project_list(project_root) |
| |
| projects[path] = repo_url |
| |
| self._save_project_list(project_root, projects) |
| |
| def remove_from_project_list(self, project_root, path): |
| projects = self._load_project_list(project_root) |
| |
| if path in projects: |
| del projects[path] |
| |
| self._save_project_list(project_root, projects) |