Merge pull request #90 from vmarkovtsev/patch-1
Fix DiffLevenshtein counting single runes as multiple edits
diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go
index 0d1c2d4..cb25b43 100644
--- a/diffmatchpatch/diff.go
+++ b/diffmatchpatch/diff.go
@@ -1236,9 +1236,9 @@
for _, aDiff := range diffs {
switch aDiff.Type {
case DiffInsert:
- insertions += len(aDiff.Text)
+ insertions += utf8.RuneCountInString(aDiff.Text)
case DiffDelete:
- deletions += len(aDiff.Text)
+ deletions += utf8.RuneCountInString(aDiff.Text)
case DiffEqual:
// A deletion and an insertion is one substitution.
levenshtein += max(insertions, deletions)
diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go
index 8596999..5c165b1 100644
--- a/diffmatchpatch/diff_test.go
+++ b/diffmatchpatch/diff_test.go
@@ -1153,9 +1153,9 @@
dmp := New()
for i, tc := range []TestCase{
- {"Levenshtein with trailing equality", []Diff{{DiffDelete, "abc"}, {DiffInsert, "1234"}, {DiffEqual, "xyz"}}, 4},
- {"Levenshtein with leading equality", []Diff{{DiffEqual, "xyz"}, {DiffDelete, "abc"}, {DiffInsert, "1234"}}, 4},
- {"Levenshtein with middle equality", []Diff{{DiffDelete, "abc"}, {DiffEqual, "xyz"}, {DiffInsert, "1234"}}, 7},
+ {"Levenshtein with trailing equality", []Diff{{DiffDelete, "абв"}, {DiffInsert, "1234"}, {DiffEqual, "эюя"}}, 4},
+ {"Levenshtein with leading equality", []Diff{{DiffEqual, "эюя"}, {DiffDelete, "абв"}, {DiffInsert, "1234"}}, 4},
+ {"Levenshtein with middle equality", []Diff{{DiffDelete, "абв"}, {DiffEqual, "эюя"}, {DiffInsert, "1234"}}, 7},
} {
actual := dmp.DiffLevenshtein(tc.Diffs)
assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %s", i, tc.Name))