blob: 91b55808b1c4a8552f25afdc9efc120729a3d3b2 [file] [log] [blame]
# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Utility for outputting a HTML diff of two multi-line strings.
The main purpose of this utility is to show the difference between
text baselines (-expected.txt files) and actual text results.
Note, in the standard library module difflib, there is also a HtmlDiff class,
although it outputs a larger and more complex HTML table than we need.
import difflib
from cgi import escape
except ImportError:
# cgi.escape is deprecated in Python3
from html import escape
_TEMPLATE = """<html>
table { white-space: pre-wrap; font-family: monospace; border-collapse: collapse; }
th { color: #444; background: #eed; text-align: right; vertical-align: baseline; padding: 1px 4px 1px 4px; }
.del { background: #faa; }
.add { background: #afa; }
def html_diff(a_text, b_text):
"""Returns a diff between two strings as HTML."""
# Diffs can be between multiple text files of different encodings
# so we always want to deal with them as byte arrays, not unicode strings.
assert isinstance(a_text, str)
assert isinstance(b_text, str)
a_lines = a_text.splitlines(True)
b_lines = b_text.splitlines(True)
return _TEMPLATE % HtmlDiffGenerator().generate_tbody(a_lines, b_lines)
class HtmlDiffGenerator(object):
def __init__(self):
self.a_line_no = None
self.b_line_no = None
self.a_lines_len = None
def generate_tbody(self, a_lines, b_lines):
self.a_line_no = 0
self.b_line_no = 0
self.a_lines_len = len(a_lines)
self.b_lines_len = len(b_lines)
matcher = difflib.SequenceMatcher(None, a_lines, b_lines)
output = []
for tag, a_start, a_end, b_start, b_end in matcher.get_opcodes():
self._format_chunk(tag, a_lines[a_start:a_end],
return ''.join(output)
def _format_chunk(self, tag, a_chunk, b_chunk):
if tag == 'delete':
return self._format_delete(a_chunk)
if tag == 'insert':
return self._format_insert(b_chunk)
if tag == 'replace':
return self._format_delete(a_chunk) + self._format_insert(b_chunk)
assert tag == 'equal'
return self._format_equal(a_chunk)
def _format_equal(self, common_chunk):
output = ''
if len(common_chunk) <= 7:
for line in common_chunk:
output += self._format_equal_line(line)
# Do not show context lines at the beginning of the file.
if self.a_line_no == 0 and self.b_line_no == 0:
self.a_line_no += 3
self.b_line_no += 3
for line in common_chunk[0:3]:
output += self._format_equal_line(line)
self.a_line_no += len(common_chunk) - 6
self.b_line_no += len(common_chunk) - 6
output += '<tr><td colspan=3>\n\n</tr>'
# Do not show context lines at the end of the file.
if self.a_line_no + 3 != self.a_lines_len or self.b_line_no + 3 != self.b_lines_len:
for line in common_chunk[len(common_chunk) -
output += self._format_equal_line(line)
return output
def _format_equal_line(self, line):
self.a_line_no += 1
self.b_line_no += 1
return '<tr><th>%d<th>%d<td>%s</tr>' % (self.a_line_no, self.b_line_no,
def _format_insert(self, chunk):
output = ''
for line in chunk:
self.b_line_no += 1
output += '<tr><th><th>%d<td class="add">%s</tr>' % (
self.b_line_no, escape(line))
return output
def _format_delete(self, chunk):
output = ''
for line in chunk:
self.a_line_no += 1
output += '<tr><th>%d<th><td class="del">%s</tr>' % (
self.a_line_no, escape(line))
return output