third_party/blink/tools/blinkpy/common/pretty_diff.py - chromium/src - Git at Google

 # Copyright 2018 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Prettifies 'git diff' output.

 prettify_diff() takes a diff string, and returns an HTML string decorating the
 diff.

 This code doesn't support other diff commands such as "diff" and "svn diff".
 """

 import base64
 import cgi
 import difflib
 import mimetypes
 import re
 import zlib

 from blinkpy.common.base85 import decode_base85

 # The style below is meant to be similar to PolyGerrit.
 _LEADING_HTML = """<!DOCTYPE html>
 <meta charset="UTF-8">
 <style>
 body {
   background: white;
   font-family: "Roboto Mono", Menlo, "Lucida Console", Monaco, monospace;
 }
 table {
   border-collapse: collapse;
   border-spacing: 0;
   width: 100%;
   margin-top: 1em;
 }
 td { white-space: pre-wrap; font-size: 14px; }
 .fileheader { position: sticky; top: 0px; }
 .fileheader-container {
   background: #eee;
   border-bottom: 1px solid #ddd;
   border-top: 1px solid #ddd;
   box-sizing: border-box;
   display: flex;
   line-height: 2.25em;
   padding: 0.2em 1rem 0.2em 1rem;
 }
 .filename { flex-grow: 1; }
 .fileheader button { flex-grow: 0; width: 2.25em; }
 .rename { color: #999999; display: block; }
 .fileinfo { background: #fafafa; color: #3a66d9; }
 .filehooter div { border-top: 1px solid #ddd; }
 .hunkheader { background: rgb(255, 247, 212); color: #757575; }
 .lineno {
   background: #fafafa;
   box-sizing: border-box;
   color: #666;
   padding: 0 0.5em;
   text-align: right;
   user-select: none;
   vertical-align: top;
   width: 94px;
 }
 .emptylineno { box-sizing: border-box;  user-select: none; width: 94px; }
 .code { border-left: 1px solid #ddd; word-break: break-all; }
 .del { background: #ffeeee; }
 .del.strong { background: #ffcaca; }
 .add { background: #eeffee; }
 .add.strong { background: #caffca; }
 .binary { padding: 8px; border-left: 1px solid #ddd; }
 pre { white-space: pre-wrap; font-size: 14px; }
 .hidden { display: none; }
 </style>
 <body>
 <script>
 function toggleFollowingRows(button) {
   button.textContent = button.textContent == '\\u25B2' ? '\\u25BC' : '\\u25B2';
   let parent = button;
   while (parent && parent.tagName != 'TR') {
     parent = parent.parentNode;
   }
   if (!parent)
     return;
   for (let next = parent.nextSibling; next; next = next.nextSibling) {
     if (next.tagName == 'TR')
       next.classList.toggle('hidden')
   }
 }
 </script>
 """


 def prettify_diff(diff_str):
     diff_lines = diff_str.split('\n')
     # List of DiffFile instances
     diff_files = []

     diff_file, diff_lines = DiffFile.parse(diff_lines)
     while diff_file:
         diff_files.append(diff_file)
         diff_file, diff_lines = DiffFile.parse(diff_lines)

     result_html = _LEADING_HTML
     for diff_file in diff_files:
         result_html += diff_file.prettify()

     # If diff_lines still has unconsumed lines, this code has a bug or the input
     # diff is broken. We show the raw diff in such case.
     if diff_lines:
         result_html += '<pre>'
         for line in diff_lines:
             result_html += cgi.escape(line) + '\n'
         result_html += '</pre>'

     return result_html + '</body>\n'


 class DiffFile(object):
     """Represents diff for a single file.

     An instance of this class contains one of the following:
     - Text hunks
     - Two binary hunks
     - Meta information
     """
     LINK_BASE_URL = 'https://chromium.googlesource.com/chromium/src/+/master/'

     def __init__(self,
                  old_name,
                  new_name,
                  hunks=None,
                  binaries=None,
                  info=None):
         assert old_name or new_name
         assert bool(hunks) + bool(binaries) + bool(info) == 1
         self._old_name = old_name
         self._new_name = new_name
         self._hunks = hunks
         self._binaries = binaries
         self._info = info

     def prettify(self):
         status = 'M'
         pretty_name = self._linkify(self._new_name)
         additional_info = ''
         if self._old_name == '':
             status = 'A'
             pretty_name = cgi.escape(self._new_name)
         elif self._new_name == '':
             status = 'D'
             pretty_name = self._linkify(self._old_name)
         elif self._old_name != self._new_name:
             status = 'R'
             pretty_name = cgi.escape(self._new_name)
             additional_info = (
                 '\n<span class=rename>Renamed from {}</span>'.format(
                     self._linkify(self._old_name)))

         result_html = (
             '\n<table>\n<tr><td colspan=3 class=fileheader>'
             '<div class=fileheader-container>'
             '<div class=filename>' + status + ' ' + pretty_name +
             additional_info + '</div>'
             '<button type=button onclick="toggleFollowingRows(this);">&#x25B2;</button>'
             '</div></tr>')

         if self._hunks:
             for hunk in self._hunks:
                 result_html += hunk.prettify()
         elif self._info:
             result_html += '<tr><td colspan=3 class=fileinfo>{}</tr>'.format(
                 cgi.escape('\n'.join(self._info)))
         else:
             old_binary, new_binary = self._binaries  # pylint: disable=unpacking-non-sequence
             if self._old_name and old_binary:
                 result_html += old_binary.prettify(
                     self._mime_from_name(self._old_name), 'del')
             if self._new_name and new_binary:
                 result_html += new_binary.prettify(
                     self._mime_from_name(self._new_name), 'add')
         return result_html + '<tr><td colspan=3 class=filehooter><div></div></table>\n'

     def _linkify(self, name):
         return '<a href="{url}" target="_new">{anchor}</a>'.format(
             url=DiffFile.LINK_BASE_URL + cgi.escape(name),
             anchor=cgi.escape(name))

     def _mime_from_name(self, name):
         mime_type, _ = mimetypes.guess_type(name)
         return mime_type if mime_type else 'application/octet-stream'

     @staticmethod
     def parse(lines):
         """Parses diff lines, and creates a DiffFile instance.

         Finds a file diff header, creates a single DiffFile instance, and
         returns a tuple of the DiffFile instance and unconsumed lines. If a file
         diff isn't found, (None, lines) is returned.
         """
         diff_command_re = r'diff (?:-[^ ]+ )*a/([^ ]+) b/([^ ]+)'
         old_name = None
         new_name = None
         info_lines = None
         found_diff_command_line = False
         for i, line in enumerate(lines):
             if not found_diff_command_line:
                 match = re.match(diff_command_re, line)
                 if not match:
                     continue
                 old_name = match.group(1)
                 new_name = match.group(2)
                 info_lines = []
                 found_diff_command_line = True
                 continue

             match = re.match(r'(GIT binary patch|--- ([^ ]+).*)', line)
             if match:
                 if match.group(0) == 'GIT binary patch':
                     return DiffFile._parse_binaries(lines[i + 1:], old_name,
                                                     new_name)
                 return DiffFile._parse_text_hunks(lines[i:], old_name,
                                                   new_name)

             index_match = re.match(r'^index ([0-9a-f]+)\.\.([0-9a-f]+).*',
                                    line)
             if index_match:
                 # Adjusts old_name and new_name for file addition/removal.
                 old_name, new_name = DiffFile._adjust_names(
                     index_match, old_name, new_name)
                 continue

             diff_match = re.match(diff_command_re, line)
             if diff_match:
                 # There are no hunks. Renaming without any modification,
                 # or adding/removing an empty file.
                 return (DiffFile(old_name, new_name, info=info_lines),
                         lines[i:])

             # File mode, rename summary, etc.
             info_lines.append(line)

         if found_diff_command_line and info_lines:
             return (DiffFile(old_name, new_name, info=info_lines), [])
         return (None, lines)

     @staticmethod
     def _parse_binaries(lines, old_name, new_name):
         new_binary, remaining_lines = BinaryHunk.parse(lines)
         old_binary, remaining_lines = BinaryHunk.parse(remaining_lines)
         return (DiffFile(
             old_name, new_name, binaries=(old_binary, new_binary)),
                 remaining_lines)

     @staticmethod
     def _parse_text_hunks(lines, old_name, new_name):
         line = lines[0]
         if len(lines) < 2:
             raise ValueError('"+++ " line is missing after "{}"'.format(line))
         next_line = lines[1]
         if not next_line.startswith('+++ '):
             raise ValueError('"+++ " line is missing after "{}"'.format(line))
         hunks, remaining_lines = DiffHunk.parse(lines[2:])
         return (DiffFile(old_name, new_name, hunks=hunks), remaining_lines)

     @staticmethod
     def _adjust_names(match, old_name, new_name):
         old_index = match.group(1)
         new_index = match.group(2)
         if old_index and re.match(r'^0+$', old_index):
             old_name = ''
         if new_index and re.match(r'^0+$', new_index):
             new_name = ''
         return (old_name, new_name)


 class DiffHunk(object):
     """Represents a single text hunk, starting with '@@ -d,d +d,d @@'.

     This class also has code to detect character-level diff.
     """

     def __init__(self, old_start, new_start, context, lines):
         self._old_start = old_start
         self._new_start = new_start
         self._context = ''
         if context:
             self._context = context
             if self._context.startswith(' '):
                 self._context = self._context[1:]
         self._lines = lines
         # _annotations is a list of None or a list of tuples.
         # A tuple consists of start index and end index, and it represents a
         # modified part of a line, which should be highlighted in the pretty
         # diff.
         self._annotations = [None for _ in self._lines]
         for deleted_index, inserted_index in self._find_operations(
                 self._lines):
             DiffHunk._annotate_character_diff(
                 self._lines, deleted_index, inserted_index, self._annotations)

     @staticmethod
     def _find_operations(lines):
         """Finds 'operations' in the hunk.

         A hunk contains one or more operations, and an operation is one of the
         followings:
           - Replace operation: '-' lines, followed by '+' lines
           - Delete operation: '-' lines, not followed by '+' lines
           - Insertion operation: '+' lines
         """
         # List of tuples which consist of (list of '-' line index, list of '+' line index)
         operations = []
         inserted_index = []
         deleted_index = []
         for i, line in enumerate(lines):
             if line[0] == ' ':
                 if deleted_index or inserted_index:
                     operations.append((deleted_index, inserted_index))
                     deleted_index = []
                     inserted_index = []
             elif line[0] == '-':
                 if inserted_index:
                     operations.append((deleted_index, inserted_index))
                     deleted_index = []
                     inserted_index = []
                 deleted_index.append(i)
             else:
                 assert line[0] == '+'
                 inserted_index.append(i)
         if deleted_index or inserted_index:
             operations.append((deleted_index, inserted_index))
         return operations

     @staticmethod
     def _annotate_character_diff(lines, deleted_index, inserted_index,
                                  annotations):
         assert len(lines) == len(annotations)
         if not deleted_index:
             for i in inserted_index:
                 annotations[i] = [(0, len(lines[i]) - 1)]
             return

         if not inserted_index:
             for i in deleted_index:
                 annotations[i] = [(0, len(lines[i]) - 1)]
             return

         deleted_str = ''.join([lines[i][1:] for i in deleted_index])
         inserted_str = ''.join([lines[i][1:] for i in inserted_index])
         matcher = difflib.SequenceMatcher(None, deleted_str, inserted_str)
         for tag, d_start, d_end, i_start, i_end in matcher.get_opcodes():
             if tag == 'delete':
                 DiffHunk._annotate(lines, deleted_index[0], d_start, d_end,
                                    annotations)
             elif tag == 'insert':
                 DiffHunk._annotate(lines, inserted_index[0], i_start, i_end,
                                    annotations)
             elif tag == 'replace':
                 DiffHunk._annotate(lines, deleted_index[0], d_start, d_end,
                                    annotations)
                 DiffHunk._annotate(lines, inserted_index[0], i_start, i_end,
                                    annotations)

     @staticmethod
     def _annotate(lines, index, start, end, annotations):
         assert index < len(lines)
         line_len = len(lines[index]) - 1
         if line_len == 0 and start == 0:
             annotations[index] = [(0, 0)]
             DiffHunk._annotate(lines, index + 1, start, end, annotations)
             return
         if start >= line_len:
             DiffHunk._annotate(lines, index + 1, start - line_len,
                                end - line_len, annotations)
             return
         if not annotations[index]:
             annotations[index] = []
         annotations[index].append((start, min(line_len, end)))
         if end > line_len:
             DiffHunk._annotate(lines, index + 1, 0, end - line_len,
                                annotations)

     def prettify_code(self, index, klass):
         line = self._lines[index][1:]
         annotation = self._annotations[index]
         if not annotation:
             return '<td class="code {klass}">{code}'.format(
                 klass=klass, code=cgi.escape(line))

         start, end = annotation[0]
         if start == 0 and end == len(line):
             return '<td class="code {klass} strong">{code}'.format(
                 klass=klass, code=cgi.escape(line))

         i = 0
         result_html = '<td class="code {}">'.format(klass)
         for start, end in annotation:
             result_html += cgi.escape(line[i:start])
             result_html += '<span class="{} strong">'.format(klass)
             result_html += cgi.escape(line[start:end])
             result_html += '</span>'
             i = end
         return result_html + cgi.escape(line[i:])

     def prettify(self):
         result_html = ('<tr><td class=hunkheader>@@<td class=hunkheader>@@'
                        '<td class=hunkheader>{}</tr>\n').format(
                            cgi.escape(self._context))
         old_lineno = self._old_start
         new_lineno = self._new_start
         for i, line in enumerate(self._lines):
             if line[0] == ' ':
                 result_html += (
                     '<tr><td class=lineno>{old_lineno}<td '
                     'class=lineno>{new_lineno}<td class=code>{code}'
                     '</tr>\n').format(
                         old_lineno=old_lineno,
                         new_lineno=new_lineno,
                         code=cgi.escape(line[1:]))
                 old_lineno += 1
                 new_lineno += 1
             elif line[0] == '-':
                 result_html += '<tr><td class=lineno>{lineno}<td class=emptylineno>{code}</tr>\n'.format(
                     lineno=old_lineno, code=self.prettify_code(i, 'del'))
                 old_lineno += 1
             else:
                 assert line[0] == '+'
                 result_html += '<tr><td class=emptylineno><td class=lineno>{lineno}{code}</tr>\n'.format(
                     lineno=new_lineno, code=self.prettify_code(i, 'add'))
                 new_lineno += 1
         return result_html

     @staticmethod
     def parse(lines):
         """Parses diff lines, and creates a sequence of DiffHunk instances.

         Finds a hunk header, creates a sequence of DiffHunk instances, and
         returns a tuple of the DiffHunk list and unconsumed lines. If a hunk
         header isn't found, ValueError is raised.
         """
         old_start = None
         new_start = None
         context = None
         hunk_lines = None
         hunks = []
         hunk_header_re = r'^@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@(.*)?'
         found_hunk_header = False
         for i, line in enumerate(lines):
             if not found_hunk_header:
                 match = re.match(hunk_header_re, line)
                 if match:
                     found_hunk_header = True
                     old_start = int(match.group(1))
                     new_start = int(match.group(2))
                     context = match.group(3)
                     hunk_lines = []
                 continue
             if line.startswith((' ', '-', '+')):
                 hunk_lines.append(line)
                 continue
             hunks.append(DiffHunk(old_start, new_start, context, hunk_lines))
             match = re.match(hunk_header_re, line)
             if not match:
                 return (hunks, lines[i:])
             old_start = int(match.group(1))
             new_start = int(match.group(2))
             context = match.group(3)
             hunk_lines = []
         if found_hunk_header:
             hunks.append(DiffHunk(old_start, new_start, context, hunk_lines))
         else:
             raise ValueError('Found no hunks')
         return (hunks, [])


 class BinaryHunk(object):
     """Represents a binary hunk.

     A binary diff for a single file contains two binary hunks. An
     instance of this class represents a single binary hunk.
     """

     def __init__(self, bin_type, size, bin_data):
         assert bin_type in ('literal', 'delta')
         self._type = bin_type
         self._size = size
         self._compressed_data = bin_data

     def prettify(self, mime_type, klass):
         result_html = (
             '<tr><td class=emptylineno><td class=emptylineno>'
             '<td class="{klass} strong binary">Binary {type}; {size}'
             ' Bytes<br>\n').format(
                 klass=klass, type=self._type, size=self._size)
         if self._type == 'delta':
             # Because we can assume the input diff is always produced by git, we
             # can obtain the original blob, apply the delta, and render both of
             # the original blob and the patched blob. However, we're not sure
             # how much it is worth to do.
             #
             # For 'delta' format, see patch_delta() in patch-delta.c.
             # https://github.com/git/git/blob/master/patch-delta.c
             return result_html + 'We don\'t support rendering a delta binary hunk.'
         if mime_type.startswith('image/'):
             return result_html + '<img src="data:{type};base64,{data}">'.format(
                 type=mime_type,
                 data=base64.b64encode(zlib.decompress(self._compressed_data)))
         return result_html + 'We don\'t support rendering {} binary.'.format(
             mime_type)

     @staticmethod
     def parse(lines):
         """Creates a BinaryHunk instance starting with lines[0].

         Returns a tuple of the BinaryHunk instance and unconsumed lines.
         """
         match = re.match(r'(literal|delta) (\d+)', lines[0])
         if not match:
             raise ValueError('No "literal <size>" or "delta <size>".')
         bin_type = match.group(1)
         size = int(match.group(2))
         bin_data = ''

         lines = lines[1:]
         for i, line in enumerate(lines):
             if len(line) == 0:
                 return (BinaryHunk(bin_type, size, bin_data), lines[i + 1:])
             line_length_letter = line[0]
             # Map a letter to a number.
             #   A-Z -> 1-26
             #   a-z -> 27-52
             line_length = 1 + ord(line_length_letter) - ord('A')
             if line_length_letter >= 'a':
                 line_length = 27 + ord(line_length_letter) - ord('a')
             if line_length * 5 > (len(line) - 1) * 4:
                 raise ValueError('Base85 length mismatch: length by the first '
                                  'letter:{}, actual:{}, line:"{}"'.format(
                                      line_length * 5, (len(line) - 1) * 4,
                                      line))
             bin_data += decode_base85(line[1:])[0:line_length]
         raise ValueError('No blank line terminating a binary hunk.')
	# Copyright 2018 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Prettifies 'git diff' output.

	prettify_diff() takes a diff string, and returns an HTML string decorating the
	diff.

	This code doesn't support other diff commands such as "diff" and "svn diff".
	"""

	import base64
	import cgi
	import difflib
	import mimetypes
	import re
	import zlib

	from blinkpy.common.base85 import decode_base85

	# The style below is meant to be similar to PolyGerrit.
	_LEADING_HTML = """<!DOCTYPE html>
	<meta charset="UTF-8">
	<style>
	body {
	background: white;
	font-family: "Roboto Mono", Menlo, "Lucida Console", Monaco, monospace;
	}
	table {
	border-collapse: collapse;
	border-spacing: 0;
	width: 100%;
	margin-top: 1em;
	}
	td { white-space: pre-wrap; font-size: 14px; }
	.fileheader { position: sticky; top: 0px; }
	.fileheader-container {
	background: #eee;
	border-bottom: 1px solid #ddd;
	border-top: 1px solid #ddd;
	box-sizing: border-box;
	display: flex;
	line-height: 2.25em;
	padding: 0.2em 1rem 0.2em 1rem;
	}
	.filename { flex-grow: 1; }
	.fileheader button { flex-grow: 0; width: 2.25em; }
	.rename { color: #999999; display: block; }
	.fileinfo { background: #fafafa; color: #3a66d9; }
	.filehooter div { border-top: 1px solid #ddd; }
	.hunkheader { background: rgb(255, 247, 212); color: #757575; }
	.lineno {
	background: #fafafa;
	box-sizing: border-box;
	color: #666;
	padding: 0 0.5em;
	text-align: right;
	user-select: none;
	vertical-align: top;
	width: 94px;
	}
	.emptylineno { box-sizing: border-box; user-select: none; width: 94px; }
	.code { border-left: 1px solid #ddd; word-break: break-all; }
	.del { background: #ffeeee; }
	.del.strong { background: #ffcaca; }
	.add { background: #eeffee; }
	.add.strong { background: #caffca; }
	.binary { padding: 8px; border-left: 1px solid #ddd; }
	pre { white-space: pre-wrap; font-size: 14px; }
	.hidden { display: none; }
	</style>
	<body>
	<script>
	function toggleFollowingRows(button) {
	button.textContent = button.textContent == '\\u25B2' ? '\\u25BC' : '\\u25B2';
	let parent = button;
	while (parent && parent.tagName != 'TR') {
	parent = parent.parentNode;
	}
	if (!parent)
	return;
	for (let next = parent.nextSibling; next; next = next.nextSibling) {
	if (next.tagName == 'TR')
	next.classList.toggle('hidden')
	}
	}
	</script>
	"""


	def prettify_diff(diff_str):
	diff_lines = diff_str.split('\n')
	# List of DiffFile instances
	diff_files = []

	diff_file, diff_lines = DiffFile.parse(diff_lines)
	while diff_file:
	diff_files.append(diff_file)
	diff_file, diff_lines = DiffFile.parse(diff_lines)

	result_html = _LEADING_HTML
	for diff_file in diff_files:
	result_html += diff_file.prettify()

	# If diff_lines still has unconsumed lines, this code has a bug or the input
	# diff is broken. We show the raw diff in such case.
	if diff_lines:
	result_html += '<pre>'
	for line in diff_lines:
	result_html += cgi.escape(line) + '\n'
	result_html += '</pre>'

	return result_html + '</body>\n'


	class DiffFile(object):
	"""Represents diff for a single file.

	An instance of this class contains one of the following:
	- Text hunks
	- Two binary hunks
	- Meta information
	"""
	LINK_BASE_URL = 'https://chromium.googlesource.com/chromium/src/+/master/'

	def __init__(self,
	old_name,
	new_name,
	hunks=None,
	binaries=None,
	info=None):
	assert old_name or new_name
	assert bool(hunks) + bool(binaries) + bool(info) == 1
	self._old_name = old_name
	self._new_name = new_name
	self._hunks = hunks
	self._binaries = binaries
	self._info = info

	def prettify(self):
	status = 'M'
	pretty_name = self._linkify(self._new_name)
	additional_info = ''
	if self._old_name == '':
	status = 'A'
	pretty_name = cgi.escape(self._new_name)
	elif self._new_name == '':
	status = 'D'
	pretty_name = self._linkify(self._old_name)
	elif self._old_name != self._new_name:
	status = 'R'
	pretty_name = cgi.escape(self._new_name)
	additional_info = (
	'\n<span class=rename>Renamed from {}</span>'.format(
	self._linkify(self._old_name)))

	result_html = (
	'\n<table>\n<tr><td colspan=3 class=fileheader>'
	'<div class=fileheader-container>'
	'<div class=filename>' + status + ' ' + pretty_name +
	additional_info + '</div>'
	'<button type=button onclick="toggleFollowingRows(this);">▲</button>'
	'</div></tr>')

	if self._hunks:
	for hunk in self._hunks:
	result_html += hunk.prettify()
	elif self._info:
	result_html += '<tr><td colspan=3 class=fileinfo>{}</tr>'.format(
	cgi.escape('\n'.join(self._info)))
	else:
	old_binary, new_binary = self._binaries # pylint: disable=unpacking-non-sequence
	if self._old_name and old_binary:
	result_html += old_binary.prettify(
	self._mime_from_name(self._old_name), 'del')
	if self._new_name and new_binary:
	result_html += new_binary.prettify(
	self._mime_from_name(self._new_name), 'add')
	return result_html + '<tr><td colspan=3 class=filehooter><div></div></table>\n'

	def _linkify(self, name):
	return '<a href="{url}" target="_new">{anchor}</a>'.format(
	url=DiffFile.LINK_BASE_URL + cgi.escape(name),
	anchor=cgi.escape(name))

	def _mime_from_name(self, name):
	mime_type, _ = mimetypes.guess_type(name)
	return mime_type if mime_type else 'application/octet-stream'

	@staticmethod
	def parse(lines):
	"""Parses diff lines, and creates a DiffFile instance.

	Finds a file diff header, creates a single DiffFile instance, and
	returns a tuple of the DiffFile instance and unconsumed lines. If a file
	diff isn't found, (None, lines) is returned.
	"""
	diff_command_re = r'diff (?:-[^ ]+ )*a/([^ ]+) b/([^ ]+)'
	old_name = None
	new_name = None
	info_lines = None
	found_diff_command_line = False
	for i, line in enumerate(lines):
	if not found_diff_command_line:
	match = re.match(diff_command_re, line)
	if not match:
	continue
	old_name = match.group(1)
	new_name = match.group(2)
	info_lines = []
	found_diff_command_line = True
	continue

	match = re.match(r'(GIT binary patch\|--- ([^ ]+).*)', line)
	if match:
	if match.group(0) == 'GIT binary patch':
	return DiffFile._parse_binaries(lines[i + 1:], old_name,
	new_name)
	return DiffFile._parse_text_hunks(lines[i:], old_name,
	new_name)

	index_match = re.match(r'^index ([0-9a-f]+)\.\.([0-9a-f]+).*',
	line)
	if index_match:
	# Adjusts old_name and new_name for file addition/removal.
	old_name, new_name = DiffFile._adjust_names(
	index_match, old_name, new_name)
	continue

	diff_match = re.match(diff_command_re, line)
	if diff_match:
	# There are no hunks. Renaming without any modification,
	# or adding/removing an empty file.
	return (DiffFile(old_name, new_name, info=info_lines),
	lines[i:])

	# File mode, rename summary, etc.
	info_lines.append(line)

	if found_diff_command_line and info_lines:
	return (DiffFile(old_name, new_name, info=info_lines), [])
	return (None, lines)

	@staticmethod
	def _parse_binaries(lines, old_name, new_name):
	new_binary, remaining_lines = BinaryHunk.parse(lines)
	old_binary, remaining_lines = BinaryHunk.parse(remaining_lines)
	return (DiffFile(
	old_name, new_name, binaries=(old_binary, new_binary)),
	remaining_lines)

	@staticmethod
	def _parse_text_hunks(lines, old_name, new_name):
	line = lines[0]
	if len(lines) < 2:
	raise ValueError('"+++ " line is missing after "{}"'.format(line))
	next_line = lines[1]
	if not next_line.startswith('+++ '):
	raise ValueError('"+++ " line is missing after "{}"'.format(line))
	hunks, remaining_lines = DiffHunk.parse(lines[2:])
	return (DiffFile(old_name, new_name, hunks=hunks), remaining_lines)

	@staticmethod
	def _adjust_names(match, old_name, new_name):
	old_index = match.group(1)
	new_index = match.group(2)
	if old_index and re.match(r'^0+$', old_index):
	old_name = ''
	if new_index and re.match(r'^0+$', new_index):
	new_name = ''
	return (old_name, new_name)


	class DiffHunk(object):
	"""Represents a single text hunk, starting with '@@ -d,d +d,d @@'.

	This class also has code to detect character-level diff.
	"""

	def __init__(self, old_start, new_start, context, lines):
	self._old_start = old_start
	self._new_start = new_start
	self._context = ''
	if context:
	self._context = context
	if self._context.startswith(' '):
	self._context = self._context[1:]
	self._lines = lines
	# _annotations is a list of None or a list of tuples.
	# A tuple consists of start index and end index, and it represents a
	# modified part of a line, which should be highlighted in the pretty
	# diff.
	self._annotations = [None for _ in self._lines]
	for deleted_index, inserted_index in self._find_operations(
	self._lines):
	DiffHunk._annotate_character_diff(
	self._lines, deleted_index, inserted_index, self._annotations)

	@staticmethod
	def _find_operations(lines):
	"""Finds 'operations' in the hunk.

	A hunk contains one or more operations, and an operation is one of the
	followings:
	- Replace operation: '-' lines, followed by '+' lines
	- Delete operation: '-' lines, not followed by '+' lines
	- Insertion operation: '+' lines
	"""
	# List of tuples which consist of (list of '-' line index, list of '+' line index)
	operations = []
	inserted_index = []
	deleted_index = []
	for i, line in enumerate(lines):
	if line[0] == ' ':
	if deleted_index or inserted_index:
	operations.append((deleted_index, inserted_index))
	deleted_index = []
	inserted_index = []
	elif line[0] == '-':
	if inserted_index:
	operations.append((deleted_index, inserted_index))
	deleted_index = []
	inserted_index = []
	deleted_index.append(i)
	else:
	assert line[0] == '+'
	inserted_index.append(i)
	if deleted_index or inserted_index:
	operations.append((deleted_index, inserted_index))
	return operations

	@staticmethod
	def _annotate_character_diff(lines, deleted_index, inserted_index,
	annotations):
	assert len(lines) == len(annotations)
	if not deleted_index:
	for i in inserted_index:
	annotations[i] = [(0, len(lines[i]) - 1)]
	return

	if not inserted_index:
	for i in deleted_index:
	annotations[i] = [(0, len(lines[i]) - 1)]
	return

	deleted_str = ''.join([lines[i][1:] for i in deleted_index])
	inserted_str = ''.join([lines[i][1:] for i in inserted_index])
	matcher = difflib.SequenceMatcher(None, deleted_str, inserted_str)
	for tag, d_start, d_end, i_start, i_end in matcher.get_opcodes():
	if tag == 'delete':
	DiffHunk._annotate(lines, deleted_index[0], d_start, d_end,
	annotations)
	elif tag == 'insert':
	DiffHunk._annotate(lines, inserted_index[0], i_start, i_end,
	annotations)
	elif tag == 'replace':
	DiffHunk._annotate(lines, deleted_index[0], d_start, d_end,
	annotations)
	DiffHunk._annotate(lines, inserted_index[0], i_start, i_end,
	annotations)

	@staticmethod
	def _annotate(lines, index, start, end, annotations):
	assert index < len(lines)
	line_len = len(lines[index]) - 1
	if line_len == 0 and start == 0:
	annotations[index] = [(0, 0)]
	DiffHunk._annotate(lines, index + 1, start, end, annotations)
	return
	if start >= line_len:
	DiffHunk._annotate(lines, index + 1, start - line_len,
	end - line_len, annotations)
	return
	if not annotations[index]:
	annotations[index] = []
	annotations[index].append((start, min(line_len, end)))
	if end > line_len:
	DiffHunk._annotate(lines, index + 1, 0, end - line_len,
	annotations)

	def prettify_code(self, index, klass):
	line = self._lines[index][1:]
	annotation = self._annotations[index]
	if not annotation:
	return '<td class="code {klass}">{code}'.format(
	klass=klass, code=cgi.escape(line))

	start, end = annotation[0]
	if start == 0 and end == len(line):
	return '<td class="code {klass} strong">{code}'.format(
	klass=klass, code=cgi.escape(line))

	i = 0
	result_html = '<td class="code {}">'.format(klass)
	for start, end in annotation:
	result_html += cgi.escape(line[i:start])
	result_html += '<span class="{} strong">'.format(klass)
	result_html += cgi.escape(line[start:end])
	result_html += '</span>'
	i = end
	return result_html + cgi.escape(line[i:])

	def prettify(self):
	result_html = ('<tr><td class=hunkheader>@@<td class=hunkheader>@@'
	'<td class=hunkheader>{}</tr>\n').format(
	cgi.escape(self._context))
	old_lineno = self._old_start
	new_lineno = self._new_start
	for i, line in enumerate(self._lines):
	if line[0] == ' ':
	result_html += (
	'<tr><td class=lineno>{old_lineno}<td '
	'class=lineno>{new_lineno}<td class=code>{code}'
	'</tr>\n').format(
	old_lineno=old_lineno,
	new_lineno=new_lineno,
	code=cgi.escape(line[1:]))
	old_lineno += 1
	new_lineno += 1
	elif line[0] == '-':
	result_html += '<tr><td class=lineno>{lineno}<td class=emptylineno>{code}</tr>\n'.format(
	lineno=old_lineno, code=self.prettify_code(i, 'del'))
	old_lineno += 1
	else:
	assert line[0] == '+'
	result_html += '<tr><td class=emptylineno><td class=lineno>{lineno}{code}</tr>\n'.format(
	lineno=new_lineno, code=self.prettify_code(i, 'add'))
	new_lineno += 1
	return result_html

	@staticmethod
	def parse(lines):
	"""Parses diff lines, and creates a sequence of DiffHunk instances.

	Finds a hunk header, creates a sequence of DiffHunk instances, and
	returns a tuple of the DiffHunk list and unconsumed lines. If a hunk
	header isn't found, ValueError is raised.
	"""
	old_start = None
	new_start = None
	context = None
	hunk_lines = None
	hunks = []
	hunk_header_re = r'^@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@(.*)?'
	found_hunk_header = False
	for i, line in enumerate(lines):
	if not found_hunk_header:
	match = re.match(hunk_header_re, line)
	if match:
	found_hunk_header = True
	old_start = int(match.group(1))
	new_start = int(match.group(2))
	context = match.group(3)
	hunk_lines = []
	continue
	if line.startswith((' ', '-', '+')):
	hunk_lines.append(line)
	continue
	hunks.append(DiffHunk(old_start, new_start, context, hunk_lines))
	match = re.match(hunk_header_re, line)
	if not match:
	return (hunks, lines[i:])
	old_start = int(match.group(1))
	new_start = int(match.group(2))
	context = match.group(3)
	hunk_lines = []
	if found_hunk_header:
	hunks.append(DiffHunk(old_start, new_start, context, hunk_lines))
	else:
	raise ValueError('Found no hunks')
	return (hunks, [])


	class BinaryHunk(object):
	"""Represents a binary hunk.

	A binary diff for a single file contains two binary hunks. An
	instance of this class represents a single binary hunk.
	"""

	def __init__(self, bin_type, size, bin_data):
	assert bin_type in ('literal', 'delta')
	self._type = bin_type
	self._size = size
	self._compressed_data = bin_data

	def prettify(self, mime_type, klass):
	result_html = (
	'<tr><td class=emptylineno><td class=emptylineno>'
	'<td class="{klass} strong binary">Binary {type}; {size}'
	' Bytes<br>\n').format(
	klass=klass, type=self._type, size=self._size)
	if self._type == 'delta':
	# Because we can assume the input diff is always produced by git, we
	# can obtain the original blob, apply the delta, and render both of
	# the original blob and the patched blob. However, we're not sure
	# how much it is worth to do.
	#
	# For 'delta' format, see patch_delta() in patch-delta.c.
	# https://github.com/git/git/blob/master/patch-delta.c
	return result_html + 'We don\'t support rendering a delta binary hunk.'
	if mime_type.startswith('image/'):
	return result_html + '<img src="data:{type};base64,{data}">'.format(
	type=mime_type,
	data=base64.b64encode(zlib.decompress(self._compressed_data)))
	return result_html + 'We don\'t support rendering {} binary.'.format(
	mime_type)

	@staticmethod
	def parse(lines):
	"""Creates a BinaryHunk instance starting with lines[0].

	Returns a tuple of the BinaryHunk instance and unconsumed lines.
	"""
	match = re.match(r'(literal\|delta) (\d+)', lines[0])
	if not match:
	raise ValueError('No "literal <size>" or "delta <size>".')
	bin_type = match.group(1)
	size = int(match.group(2))
	bin_data = ''

	lines = lines[1:]
	for i, line in enumerate(lines):
	if len(line) == 0:
	return (BinaryHunk(bin_type, size, bin_data), lines[i + 1:])
	line_length_letter = line[0]
	# Map a letter to a number.
	# A-Z -> 1-26
	# a-z -> 27-52
	line_length = 1 + ord(line_length_letter) - ord('A')
	if line_length_letter >= 'a':
	line_length = 27 + ord(line_length_letter) - ord('a')
	if line_length * 5 > (len(line) - 1) * 4:
	raise ValueError('Base85 length mismatch: length by the first '
	'letter:{}, actual:{}, line:"{}"'.format(
	line_length * 5, (len(line) - 1) * 4,
	line))
	bin_data += decode_base85(line[1:])[0:line_length]
	raise ValueError('No blank line terminating a binary hunk.')