blob: 6aa33ae30b79e4abc9ed8f6e72db312aad936604 [file] [log] [blame] [edit]
#!/usr/bin/env python3
# Copyright 2020 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import argparse
import collections
import datetime
import os
import subprocess
import sys
import time
import filter_config
import filtered_utils
import filters
import lazytree
import utils
# Indicating information for look forward.
# look_forward_until_index indicating a 1-based idx in pending_commits that
# there's no changes we care about until look_forward_until_index.
# parent_commit_hash indicating the parent previously made commit we cared
# about.
LookForwardInformation = collections.namedtuple(
'LookForwardInformation',
['look_forward_until_index', 'parent_commit_hash'])
# Use avg speed of last TIMING_DISTANCE commits.
_TIMING_DISTANCE = 100
# Verify the tree is consistent (diff-based, and actual) when a commit is made
# after every _VERIFY_INTEGRITY_DISTANCE in browser repository.
# Merge commits are always verified.
_VERIFY_INTEGRITY_DISTANCE = 1000
def timing(timing_deque, update=True):
"""Returns a speed (c/s), and updates timing_deque.
Args:
timing_deque: a deque to store the timing of past _TIMING_DISTANCE.
update: adds current timestamp to timing_deque if True. It needs to set
to False, if it wants to be called multiple times with the current
timestamp.
"""
first = timing_deque[0]
now = time.time()
if update:
timing_deque.append(now)
if len(timing_deque) > _TIMING_DISTANCE:
timing_deque.popleft()
return _TIMING_DISTANCE / (now - first)
def get_start_commit_of_browser_tree(parent_filtered):
"""Returns the last commit committed in bytes by the script, and its metadata.
Args:
parent_filtered: the commit hash of the tip of the filtered branch.
"""
assert isinstance(parent_filtered, bytes)
current = parent_filtered
while True:
meta = filtered_utils.get_metadata(current)
if meta.original_commits:
return current, meta
if not meta.parents:
return None, None
# Follow main line only
current = meta.parents[0]
def find_filtered_commit(commit, commits_map):
"""Finds the corresponding parent of a browser commit in filtered branch.
If not found, the corresponding commit of its least ancestor is used.
Args:
commit: commit hash in browser repository.
commits_map: commit hash mapping from original commit to the one in the
filtered branch. commits_map may be altered.
"""
look_for = commit
while look_for not in commits_map:
meta = filtered_utils.get_metadata(look_for)
assert len(meta.parents) <= 1
if len(meta.parents) == 1:
look_for = meta.parents[0]
else:
look_for = 'ROOT'
commits_map[commit] = commits_map[look_for]
return commits_map[look_for]
def do_commit(treehash, commithash, meta, commits_map, commit_hash_meta_name):
"""Makes a commit with the given arguments.
This creates a commit on the filtered branch with preserving the original
commiter name, email, authored timestamp and the message.
Also, the special annotation `CrOS-Libchrome-Original-Commit:
<original-commit-hash>' is appended at the end of commit message.
The parent commits are identified by the parents of the original commit and
commits_map.
Args:
treehash: tree object id for this commit.
commithash: original commit hash, used to append to commit message.
meta: meta data of the original commit.
commits_map: current known commit mapping. commits_map may be altered.
"""
parents_parameters = []
for parent in meta.parents:
parents_parameters.append('-p')
parents_parameters.append(find_filtered_commit(parent, commits_map))
msg = (meta.message + b'\n\n' + commit_hash_meta_name + b': ' + commithash +
b'\n')
return subprocess.check_output(
['git', 'commit-tree'] + parents_parameters + [treehash],
env=dict(os.environ,
GIT_AUTHOR_NAME=meta.authorship.name,
GIT_AUTHOR_EMAIL=meta.authorship.email,
GIT_AUTHOR_DATE=b' '.join(
[meta.authorship.time, meta.authorship.timezone])),
input=msg).strip(b'\n')
def verify_commit(libchrome_filter, original_commit, new_tree):
"""Verifies if new_tree is exactly original_commit after filters.
Args:
original_commit: commit hash in Chromium browser tree.
new_tree: tree hash created for upstream branch commit.
"""
expected_file_list = libchrome_filter.filter_files(
utils.get_file_list(original_commit))
assert utils.git_mktree(expected_file_list) == new_tree
def check_look_forward(pending_commits, base_commit, base_commit_meta,
look_forward_from, look_forward_cnt,
ignore_look_forward_until, commits_map,
libchrome_filter):
"""
Checks if we can look forward a few commits.
Returns (look_forward_until, ignore_look_forward_until) indicating we can
either look forward until look_forward_until[0], or skip checking again until
ignore_look_forward_until.
look_forward_until is LookForwardInformation or None.
ignore_look_forward_until is a number, which indicate we don't need to call
check_look_forward until ignore_look_forward_until.
Args:
pending_commits: list of GitCommitInRevList to process, in topological
order.
base_commit: an element in pending_commits.
base_commit_meta: a GitCommitMetadata for base_commit.
look_forward_from: the first commit after base_commit to check.
look_forward_cnt: total count of commits to scan for look-forward.
ignore_look_forward_until: do not look forward until look_forward_from
reaching ignore_look_forward_until.
commits_map: commits_map: current known commit mapping. may be altered.
libchrome_filter: the filter to use after diff commits.
"""
# Not to look forward if feature disabled.
if not look_forward_cnt:
return None, 0
# Not to look forward until ignore_look_forward_until.
if look_forward_from < ignore_look_forward_until:
return None, ignore_look_forward_until
# Not to look forward at last few commits.
if look_forward_from + look_forward_cnt >= len(pending_commits):
return None, ignore_look_forward_until
look_forward_until = LookForwardInformation(look_forward_from - 1, None)
ignore_look_forward_until = look_forward_from - 1
merge_commit_count = sum(
len(commit.parent_hashes) != 1
for commit in pending_commits[look_forward_from:look_forward_from +
look_forward_cnt])
future_diff = libchrome_filter.filter_diff(
utils.git_difftree(
pending_commits[look_forward_from + look_forward_cnt -
1].commit_hash, base_commit.commit_hash))
if len(future_diff) == 0 and merge_commit_count == 0:
look_forward_until = LookForwardInformation(
look_forward_from + look_forward_cnt,
commits_map[base_commit_meta.parents[0]])
else:
# It has changes, do not check again at next iteration.
ignore_look_forward_until = look_forward_from + look_forward_cnt
return look_forward_until, ignore_look_forward_until
def process_commits(libchrome_filter, pending_commits, commits_map,
look_forward, commit_hash_meta_name, progress_callback,
commit_callback):
"""Processes new commits in browser repository.
Returns the commit hash of the last commit made.
Args:
libchrome_filter: the filter to use after diff commits.
pending_commits: list of GitCommitInRevList to process, in topological
order.
commits_map: current known commit mapping. may be altered.
progress_callback: callback for every commit in pending_commits. It
should take (idx, total, orig_commit_hash, meta) as parameters.
look_forward: look at next look_forward commits and skip all of them if
next look_forward combined doesn't have any diff (submit + revert
will be ignored), and has no merge commit.
progress_callback: callback when a commit in Chromium upstream has been
read. It should take (idx, tot, original_commit_hash, meta) as
parameters.
commit_callback: callback when a commit is made to filtered branch. It
should take (orig_commit_hash, new_commit_hash, meta) as parameters.
"""
last_commit = None
last_verified = -1
look_forward_until = None
ignore_look_forward_until = 0
for i, commit in enumerate(pending_commits, start=1):
# [i, look_forward_until.look_forward_until_index] has no changes. Skip
# CLs are ignored if it's submitted and reverted. Since it brings no
# change, and a Reland can bring back the correct history (and usually
# contains the correct commit message).
if look_forward_until and i < look_forward_until.look_forward_until_index:
commits_map[
commit.commit_hash] = look_forward_until.parent_commit_hash
if progress_callback:
progress_callback(i, len(pending_commits), commit.commit_hash,
None)
continue
meta = filtered_utils.get_metadata(commit.commit_hash)
if progress_callback:
progress_callback(i, len(pending_commits), commit.commit_hash, meta)
# Read diff and parent tree.
diff_with_parent = libchrome_filter.filter_diff(
utils.git_difftree(meta.parents[0] if meta.parents else None,
commit.commit_hash))
git_lazytree = lazytree.LazyTree(
filtered_utils.
get_metadata(find_filtered_commit(meta.parents[0], commits_map)
).tree if meta.parents else None)
if len(meta.parents) <= 1 and len(diff_with_parent) == 0:
# not merge commit AND no diff
if len(meta.parents) == 1 and meta.parents[0] in commits_map:
commits_map[commit.commit_hash] = commits_map[meta.parents[0]]
# Check if [i, i+look_forward] has no changes.
look_forward_until, ignore_look_forward_until = check_look_forward(
pending_commits, commit, meta, i, look_forward,
ignore_look_forward_until, commits_map, libchrome_filter)
continue
# Apply diff and commit.
for op, f in diff_with_parent:
if op == utils.DiffOperations.ADD or op == utils.DiffOperations.REP:
git_lazytree[f.path] = f
elif op == utils.DiffOperations.DEL:
del git_lazytree[f.path]
treehash_after_diff_applied = git_lazytree.hash()
filtered_commit = do_commit(treehash_after_diff_applied,
commit.commit_hash, meta, commits_map,
commit_hash_meta_name)
if commit_callback:
commit_callback(commit.commit_hash, filtered_commit, meta)
commits_map[commit.commit_hash] = filtered_commit
last_commit = filtered_commit
if len(meta.parents) > 1 or (i - last_verified >=
_VERIFY_INTEGRITY_DISTANCE):
# merge commit OR every _VERIFY_INTEGRITY_DISTANCE
last_verified = i
verify_commit(libchrome_filter, commit.commit_hash,
treehash_after_diff_applied)
# Verify last commit
if last_commit:
verify_commit(libchrome_filter, pending_commits[-1].commit_hash,
filtered_utils.get_metadata(last_commit).tree)
return last_commit
def main():
# Init args
parser = argparse.ArgumentParser(description='Copy file from given commits')
parser.add_argument(
'parent_filtered',
metavar='parent_filtered',
type=str,
nargs=1,
help=
'commit hash in filtered branch to continue from. usually HEAD of that branch.'
)
parser.add_argument('goal_browser',
metavar='goal_browser',
type=str,
nargs=1,
help='commit hash in browser master branch.')
parser.add_argument(
'--filter_files',
metavar='filter_files',
type=str,
help=
'Path a file which should contain file paths to be checked in each line. This overwrites the default file filtering rules, if given.',
nargs='?')
parser.add_argument(
'--commit_hash_meta_name',
metavar='commit_hash_meta_name',
type=str,
default=filtered_utils.CROS_LIBCHROME_ORIGINAL_COMMIT.decode('utf-8'),
help=
'Machine-and-people redable metadata key for original commit hash. This overwrites the default value, if given.',
nargs='?')
parser.add_argument('--dry_run',
dest='dry_run',
action='store_const',
const=True,
default=False)
parser.add_argument('--verbose',
dest='verbose',
action='store_const',
const=True,
default=False)
parser.add_argument('--quiet',
dest='quiet',
action='store_const',
const=True,
default=False)
arg = parser.parse_args(sys.argv[1:])
if arg.quiet:
INFO = VERBOSE = open(os.devnull, 'w')
elif arg.verbose:
INFO = sys.stderr
VERBOSE = sys.stderr
else:
INFO = sys.stderr
VERBOSE = open(os.devnull, 'w')
# Init filters
if arg.filter_files:
with open(arg.filter_files) as f:
lines = [line.strip().encode('utf-8') for line in f]
libchrome_filter = filters.Filter([filters.PathFilter(lines)], [], [])
print('Filter loaded', file=INFO)
else:
libchrome_filter = filters.Filter(filter_config.WANT,
filter_config.WANT_EXCLUDE,
filter_config.ALWAYS_WANT)
# Look for last known commit made by the script in filtered branch.
print('Looking for last known commit from',
arg.parent_filtered[0],
file=INFO)
last_known, meta_last_known = get_start_commit_of_browser_tree(
arg.parent_filtered[0].encode('utf-8'))
if last_known:
print('Continuing from', last_known, meta_last_known, file=INFO)
else:
print('No known last commit', file=INFO)
print('parent on filter branch', arg.parent_filtered[0], file=INFO)
# Get a mapping between browser repository and filtered branch for commits
# in filtered branch.
print('reading commits details for commits mapping', file=INFO)
timing_deque = collections.deque([time.time()])
commits_map = filtered_utils.get_commits_map(
arg.parent_filtered[0], lambda cur_idx, tot_cnt, cur_hash:
(print('Reading',
cur_hash,
'%d/%d' % (cur_idx, tot_cnt),
'%f c/s' % timing(timing_deque),
end='\r',
file=VERBOSE,
flush=True),))
if not 'ROOT' in commits_map:
commits_map['ROOT'] = subprocess.check_output(
[
'git', 'commit-tree', '-p', arg.parent_filtered[0],
utils.git_mktree([])
],
input=filtered_utils.CROS_LIBCHROME_INITIAL_COMMIT).strip(b'\n')
print(file=VERBOSE)
print('loaded commit mapping of', len(commits_map), 'commit', file=INFO)
# If last_known is not parent_filtered, which means some other commits are
# made after last_known, use parent_filtered as HEAD to connect.
if last_known and last_known != arg.parent_filtered[0].encode('utf-8'):
assert meta_last_known.original_commits[0] in commits_map
commits_map[meta_last_known.original_commits[0]] = arg.parent_filtered[
0].encode('ascii')
# Process newer commits in browser repository from
# last_known.original_commits
print('search for commits to filter', file=INFO)
timing_deque = collections.deque([time.time()])
pending_commits = utils.git_revlist(
meta_last_known.original_commits[0] if meta_last_known else None,
arg.goal_browser[0])
print(len(pending_commits), 'commits to process', file=INFO)
new_head = process_commits(
libchrome_filter,
pending_commits,
commits_map,
# Use look_forward=1000 if filter_files is set.
# This is used to generate a small sets of newly wanted files.
1000 if arg.filter_files else None,
arg.commit_hash_meta_name.encode('utf-8'),
# Print progress
lambda cur_idx, tot_cnt, cur_hash, cur_meta:
(print('Processing',
cur_hash,
'%d/%d' % (cur_idx, tot_cnt),
'%f c/s' % timing(timing_deque, update=False),
'eta %s' % (datetime.timedelta(seconds=int(
(tot_cnt - cur_idx) / timing(timing_deque)))),
cur_meta.title[:50] if cur_meta else '',
end='\r',
file=VERBOSE,
flush=True),),
# Print new commits
lambda orig_hash, new_hash, commit_meta: print(
b'%s is commited as %s: %s' %
(orig_hash, new_hash, commit_meta.title[:50]),
file=INFO))
print(file=VERBOSE)
if new_head:
print(new_head.decode('ascii'))
if __name__ == '__main__':
main()