Merge branch 'iambus-master'
diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index 34ce7ce..08c36e7 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go
@@ -1320,7 +1320,6 @@ // diffLinesToStrings splits two texts into a list of strings. Each string represents one line. func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) { - // '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character. lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n' lineHash := make(map[string]int) @@ -1331,12 +1330,11 @@ return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray } -// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string. -func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string, lineHash map[string]int) []uint32 { - // Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect. +// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []index. +func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string, lineHash map[string]int) []index { lineStart := 0 lineEnd := -1 - strs := []uint32{} + strs := []index{} for lineEnd < len(text)-1 { lineEnd = indexOf(text, "\n", lineStart) @@ -1350,11 +1348,11 @@ lineValue, ok := lineHash[line] if ok { - strs = append(strs, uint32(lineValue)) + strs = append(strs, index(lineValue)) } else { *lineArray = append(*lineArray, line) lineHash[line] = len(*lineArray) - 1 - strs = append(strs, uint32(len(*lineArray)-1)) + strs = append(strs, index(len(*lineArray)-1)) } }
diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go index f63bf98..2c43864 100644 --- a/diffmatchpatch/diff_test.go +++ b/diffmatchpatch/diff_test.go
@@ -332,13 +332,14 @@ lineList := []string{ "", // Account for the initial empty element of the lines array. } - var charList []rune + var charList []index for x := 1; x < n+1; x++ { lineList = append(lineList, strconv.Itoa(x)+"\n") - charList = append(charList, rune(x)) + charList = append(charList, index(x)) } lines := strings.Join(lineList, "") - chars := string(charList) + chars := indexesToString(charList) + assert.Equal(t, n, len(charList)) actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(lines, "") assert.Equal(t, chars, actualChars1) @@ -379,12 +380,13 @@ lineList := []string{ "", // Account for the initial empty element of the lines array. } - charList := []rune{} + charList := []index{} for x := 1; x <= n; x++ { lineList = append(lineList, strconv.Itoa(x)+"\n") - charList = append(charList, rune(x)) + charList = append(charList, index(x)) } - chars := string(charList) + assert.Equal(t, n, len(charList)) + chars := indexesToString(charList) actual := dmp.DiffCharsToLines([]Diff{Diff{DiffDelete, chars}}, lineList) assert.Equal(t, []Diff{Diff{DiffDelete, strings.Join(lineList, "")}}, actual)
diff --git a/diffmatchpatch/index.go b/diffmatchpatch/index.go new file mode 100644 index 0000000..965a1c6 --- /dev/null +++ b/diffmatchpatch/index.go
@@ -0,0 +1,32 @@ +package diffmatchpatch + +type index uint32 + +const runeSkipStart = 0xd800 +const runeSkipEnd = 0xdfff + 1 +const runeMax = 0x110000 // next invalid code point + +func stringToIndex(text string) []index { + runes := []rune(text) + indexes := make([]index, len(runes)) + for i, r := range runes { + if r < runeSkipEnd { + indexes[i] = index(r) + } else { + indexes[i] = index(r) - (runeSkipEnd - runeSkipStart) + } + } + return indexes +} + +func indexesToString(indexes []index) string { + runes := make([]rune, len(indexes)) + for i, index := range indexes { + if index < runeSkipStart { + runes[i] = rune(index) + } else { + runes[i] = rune(index + (runeSkipEnd - runeSkipStart)) + } + } + return string(runes) +}
diff --git a/diffmatchpatch/index_test.go b/diffmatchpatch/index_test.go new file mode 100644 index 0000000..6f1d982 --- /dev/null +++ b/diffmatchpatch/index_test.go
@@ -0,0 +1,16 @@ +package diffmatchpatch + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func TestIndexConversion(t *testing.T) { + n := runeMax - (runeSkipEnd - runeSkipStart) + indexes := make([]index, n) + for i := 0; i < n; i++ { + indexes[i] = index(i) + } + indexes2 := stringToIndex(indexesToString(indexes)) + assert.EqualValues(t, indexes, indexes2) +}
diff --git a/diffmatchpatch/patch_test.go b/diffmatchpatch/patch_test.go index b019f88..c564f8c 100644 --- a/diffmatchpatch/patch_test.go +++ b/diffmatchpatch/patch_test.go
@@ -337,3 +337,28 @@ assert.Equal(t, tc.ExpectedApplies, actualApplies, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) } } + +func TestPatchMakeOutOfRangePanic(t *testing.T) { + text1 := ` + 1111111111111 000000 + ------------- ------ + xxxxxxxxxxxxx ------ + xxxxxxxxxxxxx ------ + xxxxxxxxxxxxx xxxxxx + xxxxxxxxxxxxx ...... + xxxxxxxxxxxxx 111111 + xxxxxxxxxxxxx ?????? + xxxxxxxxxxxxx 333333 + xxxxxxxxxxxxx 555555 + xxxxxxxxxx xxxxx + xxxxxxxxxx xxxxx + xxxxxxxxxx xxxxx + xxxxxxxxxx xxxxx +` + text2 := ` + 2222222222222 000000 + xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx` + dmp := New() + patches := dmp.PatchMake(text1, text2) + assert.Equal(t, 6, len(patches), "TestPatchMakeOutOfRangePanic") +}
diff --git a/diffmatchpatch/stringutil.go b/diffmatchpatch/stringutil.go index eb727bb..573b6bf 100644 --- a/diffmatchpatch/stringutil.go +++ b/diffmatchpatch/stringutil.go
@@ -93,14 +93,14 @@ return -1 } -func intArrayToString(ns []uint32) string { +func intArrayToString(ns []index) string { if len(ns) == 0 { return "" } b := []rune{} for _, n := range ns { - b = append(b, intToRune(n)) + b = append(b, intToRune(uint32(n))) } return string(b) }