From 31050506cd4297dbc0973d0f6dacf142858f6302 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Mon, 7 Aug 2017 08:41:44 -0700 Subject: [PATCH 01/11] Make Operation a stringer This makes for more pleasant debugging. --- diffmatchpatch/diff.go | 2 ++ diffmatchpatch/operation_string.go | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 diffmatchpatch/operation_string.go diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index 82ad7bc..dedf4ca 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -25,6 +25,8 @@ import ( // Operation defines the operation of a diff item. type Operation int8 +//go:generate stringer -type=Operation -trimprefix=Diff + const ( // DiffDelete item represents a delete diff. DiffDelete Operation = -1 diff --git a/diffmatchpatch/operation_string.go b/diffmatchpatch/operation_string.go new file mode 100644 index 0000000..533ec0d --- /dev/null +++ b/diffmatchpatch/operation_string.go @@ -0,0 +1,17 @@ +// Code generated by "stringer -type=Operation -trimprefix=Diff"; DO NOT EDIT. + +package diffmatchpatch + +import "fmt" + +const _Operation_name = "DeleteEqualInsert" + +var _Operation_index = [...]uint8{0, 6, 11, 17} + +func (i Operation) String() string { + i -= -1 + if i < 0 || i >= Operation(len(_Operation_index)-1) { + return fmt.Sprintf("Operation(%d)", i+-1) + } + return _Operation_name[_Operation_index[i]:_Operation_index[i+1]] +} From 6cf26334a5b56ff9152e963b369cd76bfae0e2d5 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Wed, 8 Nov 2017 21:30:42 -0800 Subject: [PATCH 02/11] Delete unused commonSuffixLength binary search code See https://github.com/sergi/go-diff/issues/54#issuecomment-343002957 Closes #54 --- diffmatchpatch/diff.go | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index 82ad7bc..897ab67 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -434,6 +434,7 @@ func (dmp *DiffMatchPatch) DiffCommonSuffix(text1, text2 string) int { // commonPrefixLength returns the length of the common prefix of two rune slices. func commonPrefixLength(text1, text2 []rune) int { + // Linear search. See comment in commonSuffixLength. short, long := text1, text2 if len(short) > len(long) { short, long = long, short @@ -448,6 +449,8 @@ func commonPrefixLength(text1, text2 []rune) int { // commonSuffixLength returns the length of the common suffix of two rune slices. func commonSuffixLength(text1, text2 []rune) int { + // Use linear search rather than the binary search discussed at https://neil.fraser.name/news/2007/10/09/. + // See discussion at https://github.com/sergi/go-diff/issues/54. n := min(len(text1), len(text2)) for i := 0; i < n; i++ { if text1[len(text1)-i-1] != text2[len(text2)-i-1] { @@ -455,27 +458,6 @@ func commonSuffixLength(text1, text2 []rune) int { } } return n - - // TODO research and benchmark this, why is it not activated? https://github.com/sergi/go-diff/issues/54 - // Binary search. - // Performance analysis: http://neil.fraser.name/news/2007/10/09/ - /* - pointermin := 0 - pointermax := math.Min(len(text1), len(text2)) - pointermid := pointermax - pointerend := 0 - for pointermin < pointermid { - if text1[len(text1)-pointermid:len(text1)-pointerend] == - text2[len(text2)-pointermid:len(text2)-pointerend] { - pointermin = pointermid - pointerend = pointermin - } else { - pointermax = pointermid - } - pointermid = math.Floor((pointermax-pointermin)/2 + pointermin) - } - return pointermid - */ } // DiffCommonOverlap determines if the suffix of one string is the prefix of another. From 5d7c9c6b07e527de131b9bfd461767680920e9d2 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Sun, 12 Nov 2017 07:58:36 -0800 Subject: [PATCH 03/11] Add benchmarks for commonPrefixLength and commonSuffixLength The existing benchmark is dominated by the string to []rune conversion. It's good to have a benchmark that includes this, since it is part of the exposed API. However, during a diff, the conversion cost has been paid, and it is the core of the implementation that matters. --- diffmatchpatch/diff_test.go | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go index b52bd70..8596999 100644 --- a/diffmatchpatch/diff_test.go +++ b/diffmatchpatch/diff_test.go @@ -130,6 +130,8 @@ func TestDiffCommonSuffix(t *testing.T) { } } +var SinkInt int // exported sink var to avoid compiler optimizations in benchmarks + func BenchmarkDiffCommonSuffix(b *testing.B) { s := "ABCDEFGHIJKLMNOPQRSTUVWXYZÅÄÖ" @@ -138,10 +140,42 @@ func BenchmarkDiffCommonSuffix(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - dmp.DiffCommonSuffix(s, s) + SinkInt = dmp.DiffCommonSuffix(s, s) } } +func BenchmarkCommonLength(b *testing.B) { + data := []struct { + name string + x, y []rune + }{ + {name: "empty", x: nil, y: []rune{}}, + {name: "short", x: []rune("AABCC"), y: []rune("AA-CC")}, + {name: "long", + x: []rune(strings.Repeat("A", 1000) + "B" + strings.Repeat("C", 1000)), + y: []rune(strings.Repeat("A", 1000) + "-" + strings.Repeat("C", 1000)), + }, + } + b.Run("prefix", func(b *testing.B) { + for _, d := range data { + b.Run(d.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + SinkInt = commonPrefixLength(d.x, d.y) + } + }) + } + }) + b.Run("suffix", func(b *testing.B) { + for _, d := range data { + b.Run(d.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + SinkInt = commonSuffixLength(d.x, d.y) + } + }) + } + }) +} + func TestCommonSuffixLength(t *testing.T) { type TestCase struct { Text1 string From a16f115e57f5b95c15b0ceec19ff5183d5c20f42 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Wed, 8 Nov 2017 21:31:14 -0800 Subject: [PATCH 04/11] Optimize commonPrefixLength and commonSuffixLength MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commonPrefixLength is mainly simplification. It might seem like it'd be better to determine up front which string is shorter, so as to eliminate a conditional from the inner loop. But the compiler isn't currently smart enough to do bounds check elimination in that case, so we end up with a conditional anyway. And these branches are highly predictable anyway, and many calls to commonPrefixLength are for short shared prefixes, so opt for simpler code. The commonSuffixLength improvements come mainly from eliminating the subtraction in the inner loop. name old time/op new time/op delta DiffCommonPrefix-8 146ns ± 1% 145ns ± 3% -0.77% (p=0.011 n=15+15) DiffCommonSuffix-8 159ns ± 2% 153ns ± 2% -3.49% (p=0.000 n=15+15) CommonLength/prefix/empty-8 4.03ns ± 2% 3.74ns ± 4% -7.11% (p=0.000 n=14+15) CommonLength/prefix/short-8 5.29ns ± 1% 4.69ns ± 2% -11.25% (p=0.000 n=14+14) CommonLength/prefix/long-8 603ns ± 2% 608ns ± 2% ~ (p=0.050 n=15+15) CommonLength/suffix/empty-8 3.82ns ± 2% 3.66ns ± 3% -4.22% (p=0.000 n=14+15) CommonLength/suffix/short-8 6.36ns ± 2% 5.90ns ± 2% -7.21% (p=0.000 n=15+14) CommonLength/suffix/long-8 1.14µs ± 3% 0.90µs ± 2% -20.79% (p=0.000 n=15+15) --- diffmatchpatch/diff.go | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index 897ab67..05c380d 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -435,29 +435,28 @@ func (dmp *DiffMatchPatch) DiffCommonSuffix(text1, text2 string) int { // commonPrefixLength returns the length of the common prefix of two rune slices. func commonPrefixLength(text1, text2 []rune) int { // Linear search. See comment in commonSuffixLength. - short, long := text1, text2 - if len(short) > len(long) { - short, long = long, short - } - for i, r := range short { - if r != long[i] { - return i + n := 0 + for ; n < len(text1) && n < len(text2); n++ { + if text1[n] != text2[n] { + return n } } - return len(short) + return n } // commonSuffixLength returns the length of the common suffix of two rune slices. func commonSuffixLength(text1, text2 []rune) int { // Use linear search rather than the binary search discussed at https://neil.fraser.name/news/2007/10/09/. // See discussion at https://github.com/sergi/go-diff/issues/54. - n := min(len(text1), len(text2)) - for i := 0; i < n; i++ { - if text1[len(text1)-i-1] != text2[len(text2)-i-1] { - return i + i1 := len(text1) + i2 := len(text2) + for n := 0; ; n++ { + i1-- + i2-- + if i1 < 0 || i2 < 0 || text1[i1] != text2[i2] { + return n } } - return n } // DiffCommonOverlap determines if the suffix of one string is the prefix of another. From 6991d24dcdb99ba4f9d40f61f73cafb5e8e2a8e2 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Sun, 12 Nov 2017 22:46:57 -0800 Subject: [PATCH 05/11] Optimize splice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old code was a cute one-liner, but it allocated needlessly, quite a lot. Replace it with slightly more careful code that only allocates and copies as necessary. name old time/op new time/op delta DiffCommonPrefix-8 135ns ± 2% 133ns ± 1% ~ (p=0.213 n=10+9) DiffCommonSuffix-8 142ns ± 3% 141ns ± 2% ~ (p=0.173 n=10+9) DiffHalfMatch-8 107µs ± 0% 107µs ± 0% ~ (p=0.400 n=9+9) DiffCleanupSemantic-8 11.4ms ± 1% 0.9ms ± 1% -91.72% (p=0.000 n=10+9) DiffMain-8 1.01s ± 0% 1.01s ± 0% ~ (p=0.780 n=10+9) DiffMainLarge-8 134ms ± 1% 101ms ± 4% -24.45% (p=0.000 n=9+9) DiffMainRunesLargeLines-8 707µs ± 0% 681µs ± 2% -3.61% (p=0.000 n=9+10) name old alloc/op new alloc/op delta DiffCommonPrefix-8 0.00B 0.00B ~ (all equal) DiffCommonSuffix-8 0.00B 0.00B ~ (all equal) DiffHalfMatch-8 106kB ± 0% 106kB ± 0% ~ (all equal) DiffCleanupSemantic-8 17.7MB ± 0% 0.3MB ± 0% -98.56% (p=0.000 n=9+9) DiffMain-8 16.4MB ± 0% 16.4MB ± 0% -0.00% (p=0.000 n=10+10) DiffMainLarge-8 63.8MB ± 0% 4.8MB ± 0% -92.42% (p=0.000 n=9+10) DiffMainRunesLargeLines-8 209kB ± 0% 175kB ± 0% -16.54% (p=0.000 n=10+10) name old allocs/op new allocs/op delta DiffCommonPrefix-8 0.00 0.00 ~ (all equal) DiffCommonSuffix-8 0.00 0.00 ~ (all equal) DiffHalfMatch-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) DiffCleanupSemantic-8 11.4k ± 0% 3.1k ± 0% -72.46% (p=0.000 n=10+10) DiffMain-8 89.0 ± 0% 83.0 ± 0% -6.74% (p=0.000 n=10+10) DiffMainLarge-8 55.3k ± 0% 46.5k ± 0% -15.94% (p=0.000 n=10+10) DiffMainRunesLargeLines-8 1.19k ± 0% 1.09k ± 0% -8.60% (p=0.000 n=10+8) --- diffmatchpatch/diff.go | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index 05c380d..d94a8be 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -40,8 +40,41 @@ type Diff struct { Text string } +// splice removes amount elements from slice at index index, replacing them with elements. func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff { - return append(slice[:index], append(elements, slice[index+amount:]...)...) + if len(elements) == amount { + // Easy case: overwrite the relevant items. + copy(slice[index:], elements) + return slice + } + if len(elements) < amount { + // Fewer new items than old. + // Copy in the new items. + copy(slice[index:], elements) + // Shift the remaining items left. + copy(slice[index+len(elements):], slice[index+amount:]) + // Calculate the new end of the slice. + end := len(slice) - amount + len(elements) + // Zero stranded elements at end so that they can be garbage collected. + tail := slice[end:] + for i := range tail { + tail[i] = Diff{} + } + return slice[:end] + } + // More new items than old. + // Make room in slice for new elements. + // There's probably an even more efficient way to do this, + // but this is simple and clear. + need := len(slice) - amount + len(elements) + for len(slice) < need { + slice = append(slice, Diff{}) + } + // Shift slice elements right to make room for new elements. + copy(slice[index+len(elements):], slice[index+amount:]) + // Copy in new elements. + copy(slice[index:], elements) + return slice } // DiffMain finds the differences between two texts. From 258a5e0698872073d75f0cfc6cb80b66d48e333d Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Sun, 12 Nov 2017 22:51:51 -0800 Subject: [PATCH 06/11] Remove more nested appends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nested appends generally lead to unnecessary allocation and copying. Unwind them in diffCompute. Replace them in DiffCleanupSemantics with splice. DiffHalfMatch-8 107µs ± 0% 108µs ± 1% +0.70% (p=0.000 n=10+10) DiffCleanupSemantic-8 1.00ms ± 2% 0.97ms ± 0% -2.71% (p=0.000 n=10+8) DiffMain-8 1.01s ± 0% 1.01s ± 0% ~ (p=0.236 n=8+9) DiffMainLarge-8 110ms ± 1% 110ms ± 1% ~ (p=1.000 n=9+8) DiffMainRunesLargeLines-8 692µs ± 1% 693µs ± 1% ~ (p=0.762 n=10+8) name old alloc/op new alloc/op delta DiffHalfMatch-8 106kB ± 0% 106kB ± 0% ~ (all equal) DiffCleanupSemantic-8 255kB ± 0% 177kB ± 0% -30.76% (p=0.000 n=9+10) DiffMain-8 16.4MB ± 0% 16.4MB ± 0% ~ (all equal) DiffMainLarge-8 4.84MB ± 0% 4.81MB ± 0% -0.57% (p=0.000 n=10+10) DiffMainRunesLargeLines-8 175kB ± 0% 174kB ± 0% -0.34% (p=0.000 n=10+10) name old allocs/op new allocs/op delta DiffHalfMatch-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) DiffCleanupSemantic-8 3.13k ± 0% 3.12k ± 0% -0.06% (p=0.000 n=10+10) DiffMain-8 83.0 ± 0% 83.0 ± 0% ~ (all equal) DiffMainLarge-8 46.5k ± 0% 46.3k ± 0% -0.41% (p=0.000 n=10+10) DiffMainRunesLargeLines-8 1.09k ± 0% 1.08k ± 0% -0.83% (p=0.000 n=9+10) --- diffmatchpatch/diff.go | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index d94a8be..5d75755 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -178,7 +178,10 @@ func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, dea diffsA := dmp.diffMainRunes(text1A, text2A, checklines, deadline) diffsB := dmp.diffMainRunes(text1B, text2B, checklines, deadline) // Merge the results. - return append(diffsA, append([]Diff{Diff{DiffEqual, string(midCommon)}}, diffsB...)...) + diffs := diffsA + diffs = append(diffs, Diff{DiffEqual, string(midCommon)}) + diffs = append(diffs, diffsB...) + return diffs } else if checklines && len(text1) > 100 && len(text2) > 100 { return dmp.diffLineMode(text1, text2, deadline) } @@ -685,9 +688,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { (len(lastequality) <= difference2) { // Duplicate record. insPoint := equalities.data - diffs = append( - diffs[:insPoint], - append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...) + diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality}) // Change second copy to insert. diffs[insPoint+1].Type = DiffInsert @@ -738,10 +739,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { float64(overlapLength1) >= float64(len(insertion))/2 { // Overlap found. Insert an equality and trim the surrounding edits. - diffs = append( - diffs[:pointer], - append([]Diff{Diff{DiffEqual, insertion[:overlapLength1]}}, diffs[pointer:]...)...) - + diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]}) diffs[pointer-1].Text = deletion[0 : len(deletion)-overlapLength1] diffs[pointer+1].Text = insertion[overlapLength1:] @@ -752,10 +750,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { float64(overlapLength2) >= float64(len(insertion))/2 { // Reverse overlap found. Insert an equality and swap and trim the surrounding edits. overlap := Diff{DiffEqual, deletion[:overlapLength2]} - diffs = append( - diffs[:pointer], - append([]Diff{overlap}, diffs[pointer:]...)...) - + diffs = splice(diffs, pointer, 0, overlap) diffs[pointer-1].Type = DiffInsert diffs[pointer-1].Text = insertion[0 : len(insertion)-overlapLength2] diffs[pointer+1].Type = DiffDelete @@ -968,8 +963,7 @@ func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff { insPoint := equalities.data // Duplicate record. - diffs = append(diffs[:insPoint], - append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...) + diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality}) // Change second copy to insert. diffs[insPoint+1].Type = DiffInsert From 639b52b31e245f913428211fe7dcd15c266b96d2 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Sun, 12 Nov 2017 23:40:28 -0800 Subject: [PATCH 07/11] Check deadline less frequently MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling time.Now() is a bit expensive to do in a tight loop. Check it only every 16th time. This results in an average 0.16% time overrun when the deadline is hit, which seems a small price to pay for up to a 25% speedup on the actual work. name old time/op new time/op delta DiffHalfMatch-8 108µs ± 1% 108µs ± 0% ~ (p=0.211 n=10+9) DiffCleanupSemantic-8 973µs ± 0% 971µs ± 1% ~ (p=0.673 n=8+9) DiffMain-8 1.01s ± 0% 1.01s ± 0% +0.16% (p=0.003 n=9+10) DiffMainLarge-8 110ms ± 1% 102ms ± 3% -7.44% (p=0.000 n=8+10) DiffMainRunesLargeLines-8 693µs ± 1% 515µs ± 1% -25.70% (p=0.000 n=8+9) name old alloc/op new alloc/op delta DiffHalfMatch-8 106kB ± 0% 106kB ± 0% ~ (all equal) DiffCleanupSemantic-8 177kB ± 0% 177kB ± 0% ~ (all equal) DiffMain-8 16.4MB ± 0% 16.4MB ± 0% ~ (all equal) DiffMainLarge-8 4.81MB ± 0% 4.81MB ± 0% ~ (p=0.764 n=10+9) DiffMainRunesLargeLines-8 174kB ± 0% 174kB ± 0% +0.01% (p=0.014 n=10+10) name old allocs/op new allocs/op delta DiffHalfMatch-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) DiffCleanupSemantic-8 3.12k ± 0% 3.12k ± 0% ~ (all equal) DiffMain-8 83.0 ± 0% 83.0 ± 0% ~ (all equal) DiffMainLarge-8 46.3k ± 0% 46.3k ± 0% ~ (p=1.000 n=10+10) DiffMainRunesLargeLines-8 1.08k ± 0% 1.08k ± 0% ~ (p=0.211 n=10+10) --- diffmatchpatch/diff.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index 5d75755..1f41cfa 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -283,7 +283,7 @@ func (dmp *DiffMatchPatch) diffBisect(runes1, runes2 []rune, deadline time.Time) k2end := 0 for d := 0; d < maxD; d++ { // Bail out if deadline is reached. - if !deadline.IsZero() && time.Now().After(deadline) { + if !deadline.IsZero() && d%16 == 0 && time.Now().After(deadline) { break } From ded6142445da5211993e8bb2e7dfd30ef9cae14d Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Mon, 13 Nov 2017 00:01:33 -0800 Subject: [PATCH 08/11] Use a slice instead of a linked list to track equalities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is easier to follow and more idiomatic. It also offers a minor overall performance boost. name old time/op new time/op delta DiffHalfMatch-8 107µs ± 1% 108µs ± 0% +0.91% (p=0.000 n=9+9) DiffCleanupSemantic-8 968µs ± 1% 921µs ± 1% -4.87% (p=0.000 n=9+10) DiffMain-8 1.01s ± 0% 1.01s ± 0% ~ (p=0.353 n=10+10) DiffMainLarge-8 102ms ± 2% 101ms ± 1% ~ (p=0.842 n=10+9) DiffMainRunesLargeLines-8 515µs ± 1% 515µs ± 1% ~ (p=0.400 n=9+10) name old alloc/op new alloc/op delta DiffHalfMatch-8 106kB ± 0% 106kB ± 0% ~ (all equal) DiffCleanupSemantic-8 177kB ± 0% 163kB ± 0% -7.81% (p=0.000 n=10+10) DiffMain-8 16.4MB ± 0% 16.4MB ± 0% +0.00% (p=0.000 n=9+10) DiffMainLarge-8 4.81MB ± 0% 4.81MB ± 0% ~ (p=1.000 n=10+10) DiffMainRunesLargeLines-8 174kB ± 0% 174kB ± 0% ~ (p=0.810 n=10+10) name old allocs/op new allocs/op delta DiffHalfMatch-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) DiffCleanupSemantic-8 3.12k ± 0% 1.11k ± 0% -64.48% (p=0.000 n=10+10) DiffMain-8 83.0 ± 0% 84.0 ± 0% +1.20% (p=0.000 n=9+10) DiffMainLarge-8 46.3k ± 0% 46.3k ± 0% -0.08% (p=0.000 n=10+10) DiffMainRunesLargeLines-8 1.08k ± 0% 1.08k ± 0% ~ (all equal) --- diffmatchpatch/diff.go | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index 1f41cfa..0d7abd8 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -645,11 +645,7 @@ func (dmp *DiffMatchPatch) diffHalfMatchI(l, s []rune, i int) [][]rune { func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { changes := false // Stack of indices where equalities are found. - type equality struct { - data int - next *equality - } - var equalities *equality + equalities := make([]int, 0, len(diffs)) var lastequality string // Always equal to diffs[equalities[equalitiesLength - 1]][1] @@ -662,11 +658,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { for pointer < len(diffs) { if diffs[pointer].Type == DiffEqual { // Equality found. - - equalities = &equality{ - data: pointer, - next: equalities, - } + equalities = append(equalities, pointer) lengthInsertions1 = lengthInsertions2 lengthDeletions1 = lengthDeletions2 lengthInsertions2 = 0 @@ -687,21 +679,20 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { (len(lastequality) <= difference1) && (len(lastequality) <= difference2) { // Duplicate record. - insPoint := equalities.data + insPoint := equalities[len(equalities)-1] diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality}) // Change second copy to insert. diffs[insPoint+1].Type = DiffInsert // Throw away the equality we just deleted. - equalities = equalities.next + equalities = equalities[:len(equalities)-1] - if equalities != nil { - equalities = equalities.next + if len(equalities) > 0 { + equalities = equalities[:len(equalities)-1] } - if equalities != nil { - pointer = equalities.data - } else { - pointer = -1 + pointer = -1 + if len(equalities) > 0 { + pointer = equalities[len(equalities)-1] } lengthInsertions1 = 0 // Reset the counters. From dbf098baded732e20a70cd3460043d556505a2d3 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Wed, 31 Jan 2018 15:45:40 +0100 Subject: [PATCH 09/11] Fix DiffLevenshtein counting single runes as multiple edits --- diffmatchpatch/diff.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index 0d1c2d4..cb25b43 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -1236,9 +1236,9 @@ func (dmp *DiffMatchPatch) DiffLevenshtein(diffs []Diff) int { for _, aDiff := range diffs { switch aDiff.Type { case DiffInsert: - insertions += len(aDiff.Text) + insertions += utf8.RuneCountInString(aDiff.Text) case DiffDelete: - deletions += len(aDiff.Text) + deletions += utf8.RuneCountInString(aDiff.Text) case DiffEqual: // A deletion and an insertion is one substitution. levenshtein += max(insertions, deletions) From f7f9a5cc31d1a867e139c1d0dded822cb29d0fb6 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Wed, 31 Jan 2018 16:34:27 +0100 Subject: [PATCH 10/11] Add the utf-8 test for DiffLevenshtein --- diffmatchpatch/diff_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go index 8596999..5c165b1 100644 --- a/diffmatchpatch/diff_test.go +++ b/diffmatchpatch/diff_test.go @@ -1153,9 +1153,9 @@ func TestDiffLevenshtein(t *testing.T) { dmp := New() for i, tc := range []TestCase{ - {"Levenshtein with trailing equality", []Diff{{DiffDelete, "abc"}, {DiffInsert, "1234"}, {DiffEqual, "xyz"}}, 4}, - {"Levenshtein with leading equality", []Diff{{DiffEqual, "xyz"}, {DiffDelete, "abc"}, {DiffInsert, "1234"}}, 4}, - {"Levenshtein with middle equality", []Diff{{DiffDelete, "abc"}, {DiffEqual, "xyz"}, {DiffInsert, "1234"}}, 7}, + {"Levenshtein with trailing equality", []Diff{{DiffDelete, "абв"}, {DiffInsert, "1234"}, {DiffEqual, "эюя"}}, 4}, + {"Levenshtein with leading equality", []Diff{{DiffEqual, "эюя"}, {DiffDelete, "абв"}, {DiffInsert, "1234"}}, 4}, + {"Levenshtein with middle equality", []Diff{{DiffDelete, "абв"}, {DiffEqual, "эюя"}, {DiffInsert, "1234"}}, 7}, } { actual := dmp.DiffLevenshtein(tc.Diffs) assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %s", i, tc.Name)) From 74ac14522e85bfd8179aef8251f994afbb2f486e Mon Sep 17 00:00:00 2001 From: "M. J. Fromberger" Date: Mon, 4 Feb 2019 07:49:05 -0800 Subject: [PATCH 11/11] Add go.mod and go.sum files to support Go modules. This commit contains no functional changes; it's just adding config files for the Go modules facility. Ideally this commit should also be accompanied by a new version tag, since the current latest tag is v1.0.0 from Nov. 2017, and several optimizations have been added since then. Related changes: - Update to the import canonical path for golint. - Add Go 1.10, 1.11, 1.12, and 1.13 to CI; drop 1.8. --- .travis.yml | 5 ++++- Makefile | 2 +- go.mod | 11 +++++++++++ go.sum | 21 +++++++++++++++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 go.mod create mode 100644 go.sum diff --git a/.travis.yml b/.travis.yml index 85868de..d8da7c5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,8 +5,11 @@ os: - osx go: - - 1.8.x - 1.9.x + - 1.10.x + - 1.11.x + - 1.12.x + - 1.13.x sudo: false diff --git a/Makefile b/Makefile index e013f0b..710bc83 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ install-dependencies: go build -v $(PKG)/... install-tools: # Install linting tools - go get -u -v github.com/golang/lint/... + go get -u -v golang.org/x/lint/... go get -u -v github.com/kisielk/errcheck/... # Install code coverage tools diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..d46c77d --- /dev/null +++ b/go.mod @@ -0,0 +1,11 @@ +module github.com/sergi/go-diff + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/kr/pretty v0.1.0 // indirect + github.com/stretchr/testify v1.4.0 + gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect + gopkg.in/yaml.v2 v2.2.4 // indirect +) + +go 1.12 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..4b80e08 --- /dev/null +++ b/go.sum @@ -0,0 +1,21 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=