fix: use common lineHash to share indice between text1 and text2
Use common cache of line contents between two texts in `DiffLinesToChars` to get line diffs correctly.
In some cases, line diffs cannot be retrieved correctly in the standard way (https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs#line-mode).
In the below case, we failed to get line diffs correctly before this fix.
```go:main.go
package main
import (
"fmt"
"github.com/sergi/go-diff/diffmatchpatch"
)
const (
text1 = `hoge:
step11:
- arrayitem1
- arrayitem2
step12:
step21: hoge
step22: -93
fuga: flatitem
`
text2 = `hoge:
step11:
- arrayitem4
- arrayitem2
- arrayitem3
step12:
step21: hoge
step22: -92
fuga: flatitem
`
)
func main() {
dmp := diffmatchpatch.New()
a, b, c := dmp.DiffLinesToChars(text1, text2)
diffs := dmp.DiffMain(a, b, false)
diffs = dmp.DiffCharsToLines(diffs, c)
// diffs = dmp.DiffCleanupSemantic(diffs)
fmt.Println(diffs)
}
```
```text:output
[{Insert hoge:
step11:
hoge:
} {Equal hoge:
} {Insert hoge:
} {Equal step11:
} {Insert hoge:
} {Equal - arrayitem1
} {Insert hoge:
} {Equal - arrayitem2
} {Insert hoge:
} {Equal step12:
} {Insert hoge:
} {Equal step21: hoge
} {Insert hoge:
} {Equal step22: -93
} {Delete fuga: flatitem
}]
```
Note: This fix corresponds to a javascript implementation.
(ref: https://github.com/google/diff-match-patch/blob/62f2e689f498f9c92dbc588c58750addec9b1654/javascript/diff_match_patch_uncompressed.js#L466)
diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go
index 2a9f2dc..4f7b424 100644
--- a/diffmatchpatch/diff.go
+++ b/diffmatchpatch/diff.go
@@ -1313,17 +1313,17 @@
// '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
+ lineHash := make(map[string]int)
//Each string has the index of lineArray which it points to
- strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
- strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)
+ strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray, lineHash)
+ strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray, lineHash)
return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
}
// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
-func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
+func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string, lineHash map[string]int) []uint32 {
// Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
- lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
lineStart := 0
lineEnd := -1
strs := []uint32{}
diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go
index acb97e3..d6fed50 100644
--- a/diffmatchpatch/diff_test.go
+++ b/diffmatchpatch/diff_test.go
@@ -318,6 +318,8 @@
{"a", "b", "1", "2", []string{"", "a", "b"}},
// Omit final newline.
{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
+ // Same lines in Text1 and Text2
+ {"abc\ndefg\n12345\n", "abc\ndef\n12345\n678", "1,2,3", "1,4,3,5", []string{"", "abc\n", "defg\n", "12345\n", "def\n", "678"}},
} {
actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
assert.Equal(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))