| # Copyright 2018 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Prettifies 'git diff' output. |
| |
| prettify_diff() takes a diff string, and returns an HTML string decorating the |
| diff. |
| |
| This code doesn't support other diff commands such as "diff" and "svn diff". |
| """ |
| |
| import base64 |
| import cgi |
| import difflib |
| import mimetypes |
| import re |
| import zlib |
| |
| from blinkpy.common.base85 import decode_base85 |
| |
| # The style below is meant to be similar to PolyGerrit. |
| _LEADING_HTML = """<!DOCTYPE html> |
| <meta charset="UTF-8"> |
| <style> |
| body { |
| background: white; |
| font-family: "Roboto Mono", Menlo, "Lucida Console", Monaco, monospace; |
| } |
| table { |
| border-collapse: collapse; |
| border-spacing: 0; |
| width: 100%; |
| margin-top: 1em; |
| } |
| td { white-space: pre-wrap; font-size: 14px; } |
| .fileheader { position: sticky; top: 0px; } |
| .fileheader-container { |
| background: #eee; |
| border-bottom: 1px solid #ddd; |
| border-top: 1px solid #ddd; |
| box-sizing: border-box; |
| display: flex; |
| line-height: 2.25em; |
| padding: 0.2em 1rem 0.2em 1rem; |
| } |
| .filename { flex-grow: 1; } |
| .fileheader button { flex-grow: 0; width: 2.25em; } |
| .rename { color: #999999; display: block; } |
| .fileinfo { background: #fafafa; color: #3a66d9; } |
| .filehooter div { border-top: 1px solid #ddd; } |
| .hunkheader { background: rgb(255, 247, 212); color: #757575; } |
| .lineno { |
| background: #fafafa; |
| box-sizing: border-box; |
| color: #666; |
| padding: 0 0.5em; |
| text-align: right; |
| user-select: none; |
| vertical-align: top; |
| width: 94px; |
| } |
| .emptylineno { box-sizing: border-box; user-select: none; width: 94px; } |
| .code { border-left: 1px solid #ddd; word-break: break-all; } |
| .del { background: #ffeeee; } |
| .del.strong { background: #ffcaca; } |
| .add { background: #eeffee; } |
| .add.strong { background: #caffca; } |
| .binary { padding: 8px; border-left: 1px solid #ddd; } |
| pre { white-space: pre-wrap; font-size: 14px; } |
| .hidden { display: none; } |
| </style> |
| <body> |
| <script> |
| function toggleFollowingRows(button) { |
| button.textContent = button.textContent == '\\u25B2' ? '\\u25BC' : '\\u25B2'; |
| let parent = button; |
| while (parent && parent.tagName != 'TR') { |
| parent = parent.parentNode; |
| } |
| if (!parent) |
| return; |
| for (let next = parent.nextSibling; next; next = next.nextSibling) { |
| if (next.tagName == 'TR') |
| next.classList.toggle('hidden') |
| } |
| } |
| </script> |
| """ |
| |
| |
| def prettify_diff(diff_str): |
| diff_lines = diff_str.split('\n') |
| # List of DiffFile instances |
| diff_files = [] |
| |
| diff_file, diff_lines = DiffFile.parse(diff_lines) |
| while diff_file: |
| diff_files.append(diff_file) |
| diff_file, diff_lines = DiffFile.parse(diff_lines) |
| |
| result_html = _LEADING_HTML |
| for diff_file in diff_files: |
| result_html += diff_file.prettify() |
| |
| # If diff_lines still has unconsumed lines, this code has a bug or the input |
| # diff is broken. We show the raw diff in such case. |
| if diff_lines: |
| result_html += '<pre>' |
| for line in diff_lines: |
| result_html += cgi.escape(line) + '\n' |
| result_html += '</pre>' |
| |
| return result_html + '</body>\n' |
| |
| |
| class DiffFile(object): |
| """Represents diff for a single file. |
| |
| An instance of this class contains one of the following: |
| - Text hunks |
| - Two binary hunks |
| - Meta information |
| """ |
| LINK_BASE_URL = 'https://chromium.googlesource.com/chromium/src/+/master/' |
| |
| def __init__(self, |
| old_name, |
| new_name, |
| hunks=None, |
| binaries=None, |
| info=None): |
| assert old_name or new_name |
| assert bool(hunks) + bool(binaries) + bool(info) == 1 |
| self._old_name = old_name |
| self._new_name = new_name |
| self._hunks = hunks |
| self._binaries = binaries |
| self._info = info |
| |
| def prettify(self): |
| status = 'M' |
| pretty_name = self._linkify(self._new_name) |
| additional_info = '' |
| if self._old_name == '': |
| status = 'A' |
| pretty_name = cgi.escape(self._new_name) |
| elif self._new_name == '': |
| status = 'D' |
| pretty_name = self._linkify(self._old_name) |
| elif self._old_name != self._new_name: |
| status = 'R' |
| pretty_name = cgi.escape(self._new_name) |
| additional_info = ( |
| '\n<span class=rename>Renamed from {}</span>'.format( |
| self._linkify(self._old_name))) |
| |
| result_html = ( |
| '\n<table>\n<tr><td colspan=3 class=fileheader>' |
| '<div class=fileheader-container>' |
| '<div class=filename>' + status + ' ' + pretty_name + |
| additional_info + '</div>' |
| '<button type=button onclick="toggleFollowingRows(this);">▲</button>' |
| '</div></tr>') |
| |
| if self._hunks: |
| for hunk in self._hunks: |
| result_html += hunk.prettify() |
| elif self._info: |
| result_html += '<tr><td colspan=3 class=fileinfo>{}</tr>'.format( |
| cgi.escape('\n'.join(self._info))) |
| else: |
| old_binary, new_binary = self._binaries # pylint: disable=unpacking-non-sequence |
| if self._old_name and old_binary: |
| result_html += old_binary.prettify( |
| self._mime_from_name(self._old_name), 'del') |
| if self._new_name and new_binary: |
| result_html += new_binary.prettify( |
| self._mime_from_name(self._new_name), 'add') |
| return result_html + '<tr><td colspan=3 class=filehooter><div></div></table>\n' |
| |
| def _linkify(self, name): |
| return '<a href="{url}" target="_new">{anchor}</a>'.format( |
| url=DiffFile.LINK_BASE_URL + cgi.escape(name), |
| anchor=cgi.escape(name)) |
| |
| def _mime_from_name(self, name): |
| mime_type, _ = mimetypes.guess_type(name) |
| return mime_type if mime_type else 'application/octet-stream' |
| |
| @staticmethod |
| def parse(lines): |
| """Parses diff lines, and creates a DiffFile instance. |
| |
| Finds a file diff header, creates a single DiffFile instance, and |
| returns a tuple of the DiffFile instance and unconsumed lines. If a file |
| diff isn't found, (None, lines) is returned. |
| """ |
| diff_command_re = r'diff (?:-[^ ]+ )*a/([^ ]+) b/([^ ]+)' |
| old_name = None |
| new_name = None |
| info_lines = None |
| found_diff_command_line = False |
| for i, line in enumerate(lines): |
| if not found_diff_command_line: |
| match = re.match(diff_command_re, line) |
| if not match: |
| continue |
| old_name = match.group(1) |
| new_name = match.group(2) |
| info_lines = [] |
| found_diff_command_line = True |
| continue |
| |
| match = re.match(r'(GIT binary patch|--- ([^ ]+).*)', line) |
| if match: |
| if match.group(0) == 'GIT binary patch': |
| return DiffFile._parse_binaries(lines[i + 1:], old_name, |
| new_name) |
| return DiffFile._parse_text_hunks(lines[i:], old_name, |
| new_name) |
| |
| index_match = re.match(r'^index ([0-9a-f]+)\.\.([0-9a-f]+).*', |
| line) |
| if index_match: |
| # Adjusts old_name and new_name for file addition/removal. |
| old_name, new_name = DiffFile._adjust_names( |
| index_match, old_name, new_name) |
| continue |
| |
| diff_match = re.match(diff_command_re, line) |
| if diff_match: |
| # There are no hunks. Renaming without any modification, |
| # or adding/removing an empty file. |
| return (DiffFile(old_name, new_name, info=info_lines), |
| lines[i:]) |
| |
| # File mode, rename summary, etc. |
| info_lines.append(line) |
| |
| if found_diff_command_line and info_lines: |
| return (DiffFile(old_name, new_name, info=info_lines), []) |
| return (None, lines) |
| |
| @staticmethod |
| def _parse_binaries(lines, old_name, new_name): |
| new_binary, remaining_lines = BinaryHunk.parse(lines) |
| old_binary, remaining_lines = BinaryHunk.parse(remaining_lines) |
| return (DiffFile( |
| old_name, new_name, binaries=(old_binary, new_binary)), |
| remaining_lines) |
| |
| @staticmethod |
| def _parse_text_hunks(lines, old_name, new_name): |
| line = lines[0] |
| if len(lines) < 2: |
| raise ValueError('"+++ " line is missing after "{}"'.format(line)) |
| next_line = lines[1] |
| if not next_line.startswith('+++ '): |
| raise ValueError('"+++ " line is missing after "{}"'.format(line)) |
| hunks, remaining_lines = DiffHunk.parse(lines[2:]) |
| return (DiffFile(old_name, new_name, hunks=hunks), remaining_lines) |
| |
| @staticmethod |
| def _adjust_names(match, old_name, new_name): |
| old_index = match.group(1) |
| new_index = match.group(2) |
| if old_index and re.match(r'^0+$', old_index): |
| old_name = '' |
| if new_index and re.match(r'^0+$', new_index): |
| new_name = '' |
| return (old_name, new_name) |
| |
| |
| class DiffHunk(object): |
| """Represents a single text hunk, starting with '@@ -d,d +d,d @@'. |
| |
| This class also has code to detect character-level diff. |
| """ |
| |
| def __init__(self, old_start, new_start, context, lines): |
| self._old_start = old_start |
| self._new_start = new_start |
| self._context = '' |
| if context: |
| self._context = context |
| if self._context.startswith(' '): |
| self._context = self._context[1:] |
| self._lines = lines |
| # _annotations is a list of None or a list of tuples. |
| # A tuple consists of start index and end index, and it represents a |
| # modified part of a line, which should be highlighted in the pretty |
| # diff. |
| self._annotations = [None for _ in self._lines] |
| for deleted_index, inserted_index in self._find_operations( |
| self._lines): |
| DiffHunk._annotate_character_diff( |
| self._lines, deleted_index, inserted_index, self._annotations) |
| |
| @staticmethod |
| def _find_operations(lines): |
| """Finds 'operations' in the hunk. |
| |
| A hunk contains one or more operations, and an operation is one of the |
| followings: |
| - Replace operation: '-' lines, followed by '+' lines |
| - Delete operation: '-' lines, not followed by '+' lines |
| - Insertion operation: '+' lines |
| """ |
| # List of tuples which consist of (list of '-' line index, list of '+' line index) |
| operations = [] |
| inserted_index = [] |
| deleted_index = [] |
| for i, line in enumerate(lines): |
| if line[0] == ' ': |
| if deleted_index or inserted_index: |
| operations.append((deleted_index, inserted_index)) |
| deleted_index = [] |
| inserted_index = [] |
| elif line[0] == '-': |
| if inserted_index: |
| operations.append((deleted_index, inserted_index)) |
| deleted_index = [] |
| inserted_index = [] |
| deleted_index.append(i) |
| else: |
| assert line[0] == '+' |
| inserted_index.append(i) |
| if deleted_index or inserted_index: |
| operations.append((deleted_index, inserted_index)) |
| return operations |
| |
| @staticmethod |
| def _annotate_character_diff(lines, deleted_index, inserted_index, |
| annotations): |
| assert len(lines) == len(annotations) |
| if not deleted_index: |
| for i in inserted_index: |
| annotations[i] = [(0, len(lines[i]) - 1)] |
| return |
| |
| if not inserted_index: |
| for i in deleted_index: |
| annotations[i] = [(0, len(lines[i]) - 1)] |
| return |
| |
| deleted_str = ''.join([lines[i][1:] for i in deleted_index]) |
| inserted_str = ''.join([lines[i][1:] for i in inserted_index]) |
| matcher = difflib.SequenceMatcher(None, deleted_str, inserted_str) |
| for tag, d_start, d_end, i_start, i_end in matcher.get_opcodes(): |
| if tag == 'delete': |
| DiffHunk._annotate(lines, deleted_index[0], d_start, d_end, |
| annotations) |
| elif tag == 'insert': |
| DiffHunk._annotate(lines, inserted_index[0], i_start, i_end, |
| annotations) |
| elif tag == 'replace': |
| DiffHunk._annotate(lines, deleted_index[0], d_start, d_end, |
| annotations) |
| DiffHunk._annotate(lines, inserted_index[0], i_start, i_end, |
| annotations) |
| |
| @staticmethod |
| def _annotate(lines, index, start, end, annotations): |
| assert index < len(lines) |
| line_len = len(lines[index]) - 1 |
| if line_len == 0 and start == 0: |
| annotations[index] = [(0, 0)] |
| DiffHunk._annotate(lines, index + 1, start, end, annotations) |
| return |
| if start >= line_len: |
| DiffHunk._annotate(lines, index + 1, start - line_len, |
| end - line_len, annotations) |
| return |
| if not annotations[index]: |
| annotations[index] = [] |
| annotations[index].append((start, min(line_len, end))) |
| if end > line_len: |
| DiffHunk._annotate(lines, index + 1, 0, end - line_len, |
| annotations) |
| |
| def prettify_code(self, index, klass): |
| line = self._lines[index][1:] |
| annotation = self._annotations[index] |
| if not annotation: |
| return '<td class="code {klass}">{code}'.format( |
| klass=klass, code=cgi.escape(line)) |
| |
| start, end = annotation[0] |
| if start == 0 and end == len(line): |
| return '<td class="code {klass} strong">{code}'.format( |
| klass=klass, code=cgi.escape(line)) |
| |
| i = 0 |
| result_html = '<td class="code {}">'.format(klass) |
| for start, end in annotation: |
| result_html += cgi.escape(line[i:start]) |
| result_html += '<span class="{} strong">'.format(klass) |
| result_html += cgi.escape(line[start:end]) |
| result_html += '</span>' |
| i = end |
| return result_html + cgi.escape(line[i:]) |
| |
| def prettify(self): |
| result_html = ('<tr><td class=hunkheader>@@<td class=hunkheader>@@' |
| '<td class=hunkheader>{}</tr>\n').format( |
| cgi.escape(self._context)) |
| old_lineno = self._old_start |
| new_lineno = self._new_start |
| for i, line in enumerate(self._lines): |
| if line[0] == ' ': |
| result_html += ( |
| '<tr><td class=lineno>{old_lineno}<td ' |
| 'class=lineno>{new_lineno}<td class=code>{code}' |
| '</tr>\n').format( |
| old_lineno=old_lineno, |
| new_lineno=new_lineno, |
| code=cgi.escape(line[1:])) |
| old_lineno += 1 |
| new_lineno += 1 |
| elif line[0] == '-': |
| result_html += '<tr><td class=lineno>{lineno}<td class=emptylineno>{code}</tr>\n'.format( |
| lineno=old_lineno, code=self.prettify_code(i, 'del')) |
| old_lineno += 1 |
| else: |
| assert line[0] == '+' |
| result_html += '<tr><td class=emptylineno><td class=lineno>{lineno}{code}</tr>\n'.format( |
| lineno=new_lineno, code=self.prettify_code(i, 'add')) |
| new_lineno += 1 |
| return result_html |
| |
| @staticmethod |
| def parse(lines): |
| """Parses diff lines, and creates a sequence of DiffHunk instances. |
| |
| Finds a hunk header, creates a sequence of DiffHunk instances, and |
| returns a tuple of the DiffHunk list and unconsumed lines. If a hunk |
| header isn't found, ValueError is raised. |
| """ |
| old_start = None |
| new_start = None |
| context = None |
| hunk_lines = None |
| hunks = [] |
| hunk_header_re = r'^@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@(.*)?' |
| found_hunk_header = False |
| for i, line in enumerate(lines): |
| if not found_hunk_header: |
| match = re.match(hunk_header_re, line) |
| if match: |
| found_hunk_header = True |
| old_start = int(match.group(1)) |
| new_start = int(match.group(2)) |
| context = match.group(3) |
| hunk_lines = [] |
| continue |
| if line.startswith((' ', '-', '+')): |
| hunk_lines.append(line) |
| continue |
| hunks.append(DiffHunk(old_start, new_start, context, hunk_lines)) |
| match = re.match(hunk_header_re, line) |
| if not match: |
| return (hunks, lines[i:]) |
| old_start = int(match.group(1)) |
| new_start = int(match.group(2)) |
| context = match.group(3) |
| hunk_lines = [] |
| if found_hunk_header: |
| hunks.append(DiffHunk(old_start, new_start, context, hunk_lines)) |
| else: |
| raise ValueError('Found no hunks') |
| return (hunks, []) |
| |
| |
| class BinaryHunk(object): |
| """Represents a binary hunk. |
| |
| A binary diff for a single file contains two binary hunks. An |
| instance of this class represents a single binary hunk. |
| """ |
| |
| def __init__(self, bin_type, size, bin_data): |
| assert bin_type in ('literal', 'delta') |
| self._type = bin_type |
| self._size = size |
| self._compressed_data = bin_data |
| |
| def prettify(self, mime_type, klass): |
| result_html = ( |
| '<tr><td class=emptylineno><td class=emptylineno>' |
| '<td class="{klass} strong binary">Binary {type}; {size}' |
| ' Bytes<br>\n').format( |
| klass=klass, type=self._type, size=self._size) |
| if self._type == 'delta': |
| # Because we can assume the input diff is always produced by git, we |
| # can obtain the original blob, apply the delta, and render both of |
| # the original blob and the patched blob. However, we're not sure |
| # how much it is worth to do. |
| # |
| # For 'delta' format, see patch_delta() in patch-delta.c. |
| # https://github.com/git/git/blob/master/patch-delta.c |
| return result_html + 'We don\'t support rendering a delta binary hunk.' |
| if mime_type.startswith('image/'): |
| return result_html + '<img src="data:{type};base64,{data}">'.format( |
| type=mime_type, |
| data=base64.b64encode(zlib.decompress(self._compressed_data))) |
| return result_html + 'We don\'t support rendering {} binary.'.format( |
| mime_type) |
| |
| @staticmethod |
| def parse(lines): |
| """Creates a BinaryHunk instance starting with lines[0]. |
| |
| Returns a tuple of the BinaryHunk instance and unconsumed lines. |
| """ |
| match = re.match(r'(literal|delta) (\d+)', lines[0]) |
| if not match: |
| raise ValueError('No "literal <size>" or "delta <size>".') |
| bin_type = match.group(1) |
| size = int(match.group(2)) |
| bin_data = '' |
| |
| lines = lines[1:] |
| for i, line in enumerate(lines): |
| if len(line) == 0: |
| return (BinaryHunk(bin_type, size, bin_data), lines[i + 1:]) |
| line_length_letter = line[0] |
| # Map a letter to a number. |
| # A-Z -> 1-26 |
| # a-z -> 27-52 |
| line_length = 1 + ord(line_length_letter) - ord('A') |
| if line_length_letter >= 'a': |
| line_length = 27 + ord(line_length_letter) - ord('a') |
| if line_length * 5 > (len(line) - 1) * 4: |
| raise ValueError('Base85 length mismatch: length by the first ' |
| 'letter:{}, actual:{}, line:"{}"'.format( |
| line_length * 5, (len(line) - 1) * 4, |
| line)) |
| bin_data += decode_base85(line[1:])[0:line_length] |
| raise ValueError('No blank line terminating a binary hunk.') |