| #!/usr/bin/env python3 |
| # Copyright 2020 The ChromiumOS Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """ |
| Utility to disconnect history of files from a branch, and reconnect with base on |
| a different branch. |
| """ |
| |
| import argparse |
| import collections |
| import subprocess |
| import sys |
| |
| import filtered_utils |
| import lazytree |
| import utils |
| |
| |
| class CommitMetadataFactory(dict): |
| """Dict-like class to read commit metadata""" |
| |
| def __missing__(self, key): |
| """Reads commit metadata if missing""" |
| value = filtered_utils.get_metadata(key) |
| self.__setitem__(key, value) |
| return value |
| |
| |
| def disconnect(source_commit, ref_commit): |
| """Creates a commit that disconnects files from source_commit. |
| |
| All files existing in ref_commit will be removed from source_commit. |
| |
| Args: |
| source_commit: commit hash to disconnect from. |
| ref_commit: commit hash to be a file list reference. |
| """ |
| source_files = utils.get_file_list(source_commit) |
| ref_files = utils.get_file_list(ref_commit) |
| ref_files_set = set(ref.path for ref in ref_files) |
| kept_files = [ref for ref in source_files if ref.path not in ref_files_set] |
| tree = utils.git_mktree(kept_files) |
| return utils.git_commit(tree, [source_commit], |
| message=b'Disconnect history from %s' % |
| (source_commit.encode('ascii'))) |
| |
| |
| def connect_base(current_commit, base_commit): |
| """Creates a merge commit that takes files from base_commit. |
| |
| Literally it's identical to git merge base_commit in current_commit. |
| |
| Args: |
| current_commit: commit hashes on where to commit to. |
| base_commit: commit hashes contains file histories. |
| """ |
| current_files = utils.get_file_list(current_commit) |
| base_files = utils.get_file_list(base_commit) |
| tree = utils.git_mktree(current_files + base_files) |
| return utils.git_commit(tree, [current_commit, base_commit], |
| message=b'Connect history with base %s' % |
| (base_commit.encode('ascii'))) |
| |
| |
| def blame_files(commithash, files): |
| """Blames files on givven commithash""" |
| blames = {} |
| for path in files: |
| blames[path] = utils.git_blame(commithash, path) |
| return blames |
| |
| |
| def search_blame_line(blames, amend_commits, target_commit_hash): |
| """Searches blames matching target_commit_hash in amend_commits |
| |
| Returns a map from file path to a list of tuple, each tuple has |
| len(amend_commits) + 1 elements. 0-th element is the line in blames. and |
| 1st to n-th element are corresponding lines in amend_commits blaems. |
| |
| Args: |
| blames: a dict from path to list of GitBlameLine, for files blamed on |
| target_commit_hash. |
| amend_commits: a list of commit hashes to provide actual history. |
| target_commit_hash: commit hash that blames are blaemd on. |
| """ |
| blames_combined = {} |
| for blame_file_path, blame_file in blames.items(): |
| blames_amend = [ |
| utils.git_blame(commit, blame_file_path) for commit in amend_commits |
| ] |
| blames_combined[blame_file_path] = [ |
| blame_combined for blame_combined in zip(blame_file, *blames_amend) |
| if blame_combined[0].commit == target_commit_hash |
| ] |
| return blames_combined |
| |
| |
| def get_track_from_blames(blames_combined, virtual_goal_commit, amend_commits, |
| commit_choice_cache, commit_msg_cache): |
| """Blames diffs and locate the amend commits. |
| |
| Returns a tuple containing: |
| - a set of commit hashes in amend_commits tree; |
| - a line-by-line mapping for files in diff to commit hashes in |
| amend_commits tree of diffed lines. |
| |
| Args: |
| blames_combined: a map from path to a list of tuple. each tuple reflect |
| one line, and has len(amend_commits)+1 elements. See more details in |
| search_blame_line. |
| virtual_goal_commit: a commit that contains no useful history for diffs. |
| amend_commits: list of HEAD commit hashes that refers to tree that can |
| amend the diffs. |
| commit_choice_cache: caches user choice on which amend commit to use. |
| commit_msg_cache: caches commit metadata. |
| """ |
| blame_untracked_lines = {} |
| commits_to_track = set() |
| |
| for blame_file_path, blame_lines in blames_combined.items(): |
| blame_untracked_lines[blame_file_path] = [] |
| for blame_line in blame_lines: |
| original_commits = tuple( |
| blame_amend.commit for blame_amend in list(blame_line)[1:]) |
| chosen = commit_choice_cache.get(original_commits) |
| if chosen is None: |
| for idx, original_commit in enumerate(original_commits): |
| print('%d: %s' % |
| (idx, commit_msg_cache[original_commit].title)) |
| # No validation on user_choice since no untrusted user. |
| # Also the developer can rerun if entered wrongly by accident. |
| user_choice = int(input('Choose patch: ')) |
| chosen = original_commits[user_choice] |
| commit_choice_cache[original_commits] = chosen |
| commits_to_track.add(chosen) |
| blame_untracked_lines[blame_file_path].append( |
| (blame_line[0], chosen)) |
| |
| return commits_to_track, blame_untracked_lines |
| |
| |
| def reconstruct_file(blame_goal, blame_base, lines_to_reconstruct, |
| virtual_goal_commit): |
| """Reconstrucs a file to reflect changes in lines_to_reconstruct. |
| |
| Takes lines to blame_base, and blame_goal it belongs lines_to_reconstruct. |
| It also deletes removed lines nearby. |
| |
| Returns a binary for the new file content. |
| |
| Args: |
| blame_goal: a list of utils.GitBlameLine blaming the file on |
| virtual_goal_commit. |
| blame_base: a list of utils.GitBlameLine blaming the file on last |
| commited commit. |
| lines_to_reconstruct: only to reconstruct these lines, instead of |
| everything in blame_goal. It is represented in a list of |
| GitBlameLine. |
| virtual_goal_commit: commit hash where blame_goal is based on. |
| """ |
| idx_base, idx_goal = 0, 0 |
| reconstructed_file = [] |
| |
| print('Changed lines are', [line.data for line in lines_to_reconstruct]) |
| line_iter = iter(lines_to_reconstruct) |
| line = next(line_iter, None) |
| while idx_base < len(blame_base) or idx_goal < len(blame_goal): |
| # Both sides are idendical. We can't compare blame_base, and line |
| # directly due to blame commit difference could end up different lineno. |
| if (idx_base < len(blame_base) and |
| blame_base[idx_base].data == blame_goal[idx_goal].data and |
| blame_base[idx_base].commit == blame_goal[idx_goal].commit): |
| # We append this line if both sides are identical. |
| reconstructed_file.append(blame_base[idx_base].data) |
| idx_base += 1 |
| idx_goal += 1 |
| should_skip_base = False |
| elif line and blame_goal[idx_goal] == line: |
| # We append the line from goal, if blame_goal[idx_goal] is the line |
| # we're interested in. |
| reconstructed_file.append(line.data) |
| line = next(line_iter, None) |
| idx_goal += 1 |
| should_skip_base = True |
| elif blame_goal[idx_goal].commit == virtual_goal_commit: |
| # We skip the line from goal, if the change in not in the commit |
| # we're interested. Thus, changed lines in other commits will not be |
| # reflected. |
| idx_goal += 1 |
| else: |
| # We should skip base if we just appended some lines from goal. |
| # This would treat modified lines and append first and skip later. |
| # If we didn't append something from goal, lines from base should be |
| # preserved because the modified lines are not in the commit we're |
| # currently interested in. |
| if not should_skip_base: |
| reconstructed_file.append(blame_base[idx_base].data) |
| idx_base += 1 |
| |
| return b''.join([line + b'\n' for line in reconstructed_file]) |
| |
| |
| def reconstruct_files(track_commit, blame_untracked_lines, blames, |
| current_base_commit, virtual_goal_commit): |
| """Reconstructs files to reflect changes in track_commit. |
| |
| Returns a map from file path to file content for reconstructed files. |
| |
| Args: |
| track_commit: commit hashes to track, and reconstruct from. |
| blame_untracked_lines: a line-by-line mapping regarding selected amend |
| commits for diffs. see get_track_from_blames for more. |
| blames: a map from filename to list of utils.GitBlameLine |
| current_base_commit: commit hashes for HEAD of base that contains base |
| history + already committed amend history. |
| virtual_goal_commit: commit hash for one giant commit that has no |
| history. virtual_goal_commit is one commit ahead of |
| current_base_commit. |
| """ |
| lines_to_track = collections.defaultdict(list) |
| for file, lines in blame_untracked_lines.items(): |
| for line in lines: |
| if line[1] == track_commit: |
| lines_to_track[file].append(line[0]) |
| constructed_files = {} |
| for current_file, current_file_lines in lines_to_track.items(): |
| print('Reconstructing', current_file, 'for', track_commit) |
| blame_base = utils.git_blame(current_base_commit, current_file) |
| constructed_files[current_file] = reconstruct_file( |
| blames[current_file], blame_base, current_file_lines, |
| virtual_goal_commit) |
| return constructed_files |
| |
| |
| def main(): |
| # Init args |
| parser = argparse.ArgumentParser(description='Reconnect git history') |
| parser.add_argument('disconnect_from', |
| metavar='disconnect_from', |
| type=str, |
| nargs=1, |
| help='disconnect history from this commit') |
| parser.add_argument('base_commit', |
| metavar='base_commit', |
| type=str, |
| nargs=1, |
| help='base commit to use the history') |
| parser.add_argument('amend_commits', |
| metavar='amend_commits', |
| type=str, |
| nargs='+', |
| help='commits to amend histories from base_commit') |
| |
| arg = parser.parse_args(sys.argv[1:]) |
| empty_commit = disconnect(arg.disconnect_from[0], arg.base_commit[0]) |
| connected_base = connect_base(empty_commit, arg.base_commit[0]) |
| |
| commit_msg_cache = CommitMetadataFactory() |
| commit_choice_cache = {} |
| last_commit = connected_base |
| # In each iteration of the loop, it |
| # - re-create the new goal commit, (base + committed history + (one giant) |
| # uncommited history). |
| # - blame on new goal commit and tot of amend commits. map line-by-line |
| # from uncommited to past histories. |
| # - choose one of the past commits, reconstruct files to reflect changes in |
| # that commit, and create a new commits. |
| # last_commit, commit_msg_cache, commit_choice_cache will be persistent |
| # across iteratins. |
| while True: |
| # One commit is processed per iteration. |
| |
| # Create virtual target commit, and its diff. |
| virtual_goal = utils.git_commit(arg.disconnect_from[0] + '^{tree}', |
| [last_commit]) |
| diffs = utils.git_difftree(None, virtual_goal) |
| if not diffs: |
| print('No diffs are found between %s and goal.' % |
| (last_commit.decode('ascii'),)) |
| break |
| |
| blames = blame_files(virtual_goal, [diff.file.path for diff in diffs]) |
| blames_combined = search_blame_line(blames, arg.amend_commits, |
| virtual_goal) |
| |
| commits_to_track, blame_untracked_lines = get_track_from_blames( |
| blames_combined, virtual_goal, arg.amend_commits, |
| commit_choice_cache, commit_msg_cache) |
| if not commits_to_track: |
| print('no commits to track, stopping') |
| break |
| |
| # Stablely choose one commit from commits_to_track, and reconstruct it. |
| track_commit = min(commits_to_track) |
| print('Reconstructing commit %s: %s' % |
| (track_commit, commit_msg_cache[track_commit].title)) |
| constructed_files = reconstruct_files(track_commit, |
| blame_untracked_lines, blames, |
| last_commit, virtual_goal) |
| |
| # Mktree and commit with re-constructed_files. |
| tree = lazytree.LazyTree(filtered_utils.get_metadata(last_commit).tree) |
| for filename, filedata in constructed_files.items(): |
| blob = subprocess.check_output( |
| ['git', 'hash-object', '-w', '/dev/stdin'], |
| input=filedata).strip() |
| tree[filename] = utils.GitFile(filename, tree[filename].mode, blob) |
| meta = commit_msg_cache[track_commit] |
| last_commit = utils.git_commit( |
| tree.hash(), [last_commit], |
| (meta.message + b'\n(Reconstructed from ' + track_commit + b')\n'), |
| dict(GIT_AUTHOR_NAME=meta.authorship.name, |
| GIT_AUTHOR_EMAIL=meta.authorship.email, |
| GIT_AUTHOR_DATE=b' '.join( |
| [meta.authorship.time, meta.authorship.timezone]))) |
| print('Reconstructed as', last_commit) |
| # Make last commit for history reconstruction. |
| print( |
| utils.git_commit( |
| filtered_utils.get_metadata(arg.disconnect_from[0]).tree, |
| [last_commit], |
| b'Finished history reconstruction\n\nRemoving unnecessary lines\n')) |
| |
| |
| if __name__ == '__main__': |
| main() |