blob: 82cb1482482ef62b3893715d49796abc2369e5d2 [file] [log] [blame]
###
Module difflib -- helpers for computing deltas between objects.
Function get_close_matches(word, possibilities, n=3, cutoff=0.6):
Use SequenceMatcher to return list of the best "good enough" matches.
Function context_diff(a, b):
For two lists of strings, return a delta in context diff format.
Function ndiff(a, b):
Return a delta: the difference between `a` and `b` (lists of strings).
Function restore(delta, which):
Return one of the two sequences that generated an ndiff delta.
Function unified_diff(a, b):
For two lists of strings, return a delta in unified diff format.
Class SequenceMatcher:
A flexible class for comparing pairs of sequences of any type.
Class Differ:
For producing human-readable deltas from sequences of lines of text.
Class HtmlDiff:
For producing HTML side by side comparison with change highlights.
###
{floor, max, min} = Math
{Heap} = require('heap')
assert = require('assert')
log = console.log.bind(console)
calculateRatio = (matches, length) ->
if length then (2.0 * matches / length) else 1.0
arrayCmp = (a, b) ->
for i in [0...max(a.length, b.length)]
return -1 if a[i] < b[i]
return 1 if a[i] > b[i]
0
class SequenceMatcher
constructor: (@isjunk, a='', b='', @autojunk=true) ->
@a = @b = null
@setSeqs(a, b)
###
Set the two sequences to be compared.
###
setSeqs: (a, b) ->
@setSeq1(a)
@setSeq2(b)
###
Set the first sequence to be compared.
The second sequence to be compared is not changed.
###
setSeq1: (a) ->
return if a is @a
@a = a
@matchingBlocks = @opcodes = null
###
Set the second sequence to be compared.
The first sequence to be compared is not changed.
###
setSeq2: (b) ->
return if b is @b
@b = b
@matchingBlocks = @opcodes = null
@fullbcount = null
@_chainB()
_chainB: ->
b = @b
@b2j = b2j = {}
for elt, i in b
indices = if elt of b2j then b2j[elt] else b2j[elt] = []
indices.push(i)
junk = {}
isjunk = @isjunk
if isjunk
for elt in Object.keys(b2j)
if isjunk(elt)
junk[elt] = true
delete b2j[elt]
popular = {}
n = b.length
if @autojunk and n >= 200
ntest = floor(n / 100) + 1
for elt, idxs of b2j
if idxs.length > ntest
popular[elt] = true
delete b2j[elt]
@isbjunk = (b) -> b of junk
@isbpopular = (b) -> b of popular
###
Find longest matching block in a[alo:ahi] and b[blo:bhi].
###
findLongestMatch: (alo, ahi, blo, bhi) ->
[a, b, b2j, isbjunk] = [@a, @b, @b2j, @isbjunk]
[besti, bestj, bestsize] = [alo, blo, 0]
j2len = {}
for i in [alo...ahi]
newj2len = {}
for j in (if a[i] of b2j then b2j[a[i]] else [])
continue if j < blo
break if j >= bhi
k = newj2len[j] = (j2len[j-1] or 0) + 1
if k > bestsize
[besti, bestj, bestsize] = [i-k+1,j-k+1,k]
j2len = newj2len
while besti > alo and bestj > blo and
not isbjunk(b[bestj-1]) and
a[besti-1] is b[bestj-1]
[besti, bestj, bestsize] = [besti-1, bestj-1, bestsize+1]
while besti+bestsize < ahi and bestj+bestsize < bhi and
not isbjunk(b[bestj+bestsize]) and
a[besti+bestsize] is b[bestj+bestsize]
bestsize++
while besti > alo and bestj > blo and
isbjunk(b[bestj-1]) and
a[besti-1] is b[bestj-1]
[besti,bestj,bestsize] = [besti-1, bestj-1, bestsize+1]
while besti+bestsize < ahi and bestj+bestsize < bhi and
isbjunk(b[bestj+bestsize]) and
a[besti+bestsize] is b[bestj+bestsize]
bestsize++
[besti, bestj, bestsize]
###
Return list of matches describing matching subsequences.
###
getMatchingBlocks: ->
return @matchingBlocks if @matchingBlocks
[la, lb] = [@a.length, @b.length]
queue = [[0, la, 0, lb]]
matchingBlocks = []
while queue.length
[alo, ahi, blo, bhi] = queue.pop()
[i, j, k] = x = @findLongestMatch(alo, ahi, blo, bhi)
if k
matchingBlocks.push(x)
if alo < i and blo < j
queue.push([alo, i, blo, j])
if i+k < ahi and j+k < bhi
queue.push([i+k, ahi, j+k, bhi])
matchingBlocks.sort(arrayCmp)
i1 = j1 = k1 = 0
nonAdjacent = []
for [i2, j2, k2] in matchingBlocks
if i1 + k1 is i2 and j1 + k1 is j2
k1 += k2
else
if k1
nonAdjacent.push([i1, j1, k1])
[i1, j1, k1] = [i2, j2, k2]
if k1
nonAdjacent.push([i1, j1, k1])
nonAdjacent.push([la, lb, 0])
@matchingBlocks = nonAdjacent
###
Return list of 5-tuples describing how to turn a into b
###
getOpcodes: ->
return @opcodes if @opcodes
i = j = 0
@opcodes = answer = []
for [ai, bj, size] in @getMatchingBlocks()
tag = ''
if i < ai and j < bj
tag = 'replace'
else if i < ai
tag = 'delete'
else if j < bj
tag = 'insert'
if tag
answer.push([tag, i, ai, j, bj])
[i, j] = [ai+size, bj+size]
if size
answer.push(['equal', ai, i, bj, j])
answer
###
Isolate change clusters by eliminating ranges with no changes.
XXX: not match
###
getGroupedOpcodes: (n=3) ->
codes = @getOpcodes()
unless codes.length
codes = [['equal', 0, 1, 0, 1]]
if codes[0][0] is 'equal'
[tag, i1, i2, j1, j2] = codes[0]
codes[0] = [tag, max(i1, i2-n), i2, max(j1, j2-n), j2]
if codes[codes.length-1][0] is 'equal'
[tag, i1, i2, j1, j2] = codes[codes.length-1]
codes[codes.length-1] = [tag, i1, min(i2, i1+n), j1, min(j2, j1+n)]
nn = n + n
groups = []
group = []
for [tag, i1, i2, j1, j2] in codes
if tag is 'equal' and i2-i1 > nn
group.push([tag, i1, min(i2, i1+n), j1, min(j2, j1+n)])
groups.push(group)
group = []
[i1, j1] = [max(i1, i2-n), max(j1, j2-n)]
group.push([tag, i1, i2, j1, j2])
if group.length and not (group.length is 1 and group[0][0] is 'equal')
groups.push(group)
groups
ratio: ->
matches = @getMatchingBlocks().reduce ((sum, match) ->
sum += match[2]
), 0
calculateRatio(matches, @a.length + @b.length)
quickRatio: ->
unless @fullbcount
@fullbcount = fullbcount = {}
for elt in @b
fullbcount[elt] = (fullbcount[elt] or 0) + 1
fullbcount = @fullbcount
avail = {}
matches = 0
for elt in @a
if elt of avail
numb = avail[elt]
else
numb = fullbcount[elt] or 0
avail[elt] = numb - 1
if numb > 0
matches++
calculateRatio(matches, @a.length + @b.length)
realQuickRatio: ->
[la, lb] = [@a.length, @b.length]
calculateRatio(min(la, lb), la + lb)
getCloseMatches = (word, possibilities, n=3, cutoff=0.6) ->
unless n > 0
throw new Error("n must be > 0: (#{n})")
unless 0.0 <= cutoff <= 1.0
throw new Error("cutoff must be in [0.0, 1.0]: (#{cutoff})")
result = []
s = new SequenceMatcher()
s.setSeq2(word)
for x in possibilities
s.setSeq1(x)
if s.realQuickRatio() >= cutoff and
s.quickRatio() >= cutoff and
s.ratio() >= cutoff
result.push([s.ratio(), x])
result = Heap.nlargest(n, result)
(x for [score, x] in result)
###
Return number of `ch` characters at the start of `line`.
###
_countLeading = (line, ch) ->
[i, n] = [0, line.length]
while i < n and line[i] is ch
i++
i
class Differ
constructor: (@linejunk, @charjunk) ->
compare: (a, b) ->
cruncher = new SequenceMatcher(@linejunk, a, b)
lines = []
for [tag, alo, ahi, blo, bhi] in cruncher.getOpcodes()
switch tag
when 'replace'
g = @_fancyReplace(a, alo, ahi, b, blo, bhi)
when 'delete'
g = @_dump('-', a, alo, ahi)
when 'insert'
g = @_dump('+', b, blo, bhi)
when 'equal'
g = @_dump(' ', a, alo, ahi)
else
throw new Error("unknow tag (#{tag})")
lines.push(line) for line in g
lines
_dump: (tag, x, lo, hi) ->
("#{tag} #{x[i]}" for i in [lo...hi])
_plainReplace: (a, alo, ahi, b, blo, bhi) ->
assert(alo < ahi and blo < bhi)
if bhi - blo < ahi - alo
first = @_dump('+', b, blo, bhi)
second = @_dump('-', a, alo, ahi)
else
first = @_dump('-', a, alo, ahi)
second = @_dump('+', b, blo, bhi)
lines = []
lines.push(line) for line in g for g in [first, second]
lines
_fancyReplace: (a, alo, ahi, b, blo, bhi) ->
[bestRatio, cutoff] = [0.74, 0.75]
cruncher = new SequenceMatcher(@charjunk)
[eqi, eqj] = [null, null]
lines = []
for j in [blo...bhi]
bj = b[j]
cruncher.setSeq2(bj)
for i in [alo...ahi]
ai = a[i]
if ai is bj
if eqi is null
[eqi, eqj] = [i, j]
continue
cruncher.setSeq1(ai)
if cruncher.realQuickRatio() > bestRatio and
cruncher.quickRatio() > bestRatio and
cruncher.ratio() > bestRatio
[bestRatio, besti, bestj] = [cruncher.ratio(), i, j]
if bestRatio < cutoff
if eqi is null
for line in @_plainReplace(a, alo, ahi, b, blo, bhi)
lines.push(line)
return lines
[besti, bestj, bestRatio] = [eqi, eqj, 1.0]
else
eqi = null
for line in @_fancyHelper(a, alo, besti, b, blo, bestj)
lines.push(line)
[aelt, belt] = [a[besti], b[bestj]]
if eqi is null
atags = btags = ''
cruncher.setSeqs(aelt, belt)
for [tag, ai1, ai2, bj1, bj2] in cruncher.getOpcodes()
[la, lb] = [ai2 - ai1, bj2 - bj1]
switch tag
when 'replace'
atags += Array(la+1).join('^')
btags += Array(lb+1).join('^')
when 'delete'
atags += Array(la+1).join('-')
when 'insert'
btags += Array(lb+1).join('+')
when 'equal'
atags += Array(la+1).join(' ')
btags += Array(lb+1).join(' ')
else
throw new Error("unknow tag (#{tag})")
for line in @_qformat(aelt, belt, atags, btags)
lines.push(line)
else
lines.push(' ' + aelt)
for line in @_fancyHelper(a, besti+1, ahi, b, bestj+1, bhi)
lines.push(line)
lines
_fancyHelper: (a, alo, ahi, b, blo, bhi) ->
g = []
if alo < ahi
if blo < bhi
g = @_fancyReplace(a, alo, ahi, b, blo, bhi)
else
g = @_dump('-', a, alo, ahi)
else if blo < bhi
g = @_dump('+', b, blo, bhi)
g
_qformat: (aline, bline, atags, btags) ->
lines = []
common = min(_countLeading(aline, '\t'),
_countLeading(bline, '\t'))
common = min(common, _countLeading(atags[0...common], ' '))
common = min(common, _countLeading(btags[0...common], ' '))
atags = atags[common..].trimRight()
btags = btags[common..].trimRight()
lines.push('- ' + aline)
if atags.length
lines.push("? #{Array(common+1).join('\t')}#{atags}\n")
lines.push('+ ' + bline)
if btags.length
lines.push("? #{Array(common+1).join('\t')}#{btags}\n")
lines
exports = module?.exports or (window.difflib = {})
exports.SequenceMatcher = SequenceMatcher
exports.getCloseMatches = getCloseMatches
exports._countLeading = _countLeading
exports.Differ = Differ