-
Notifications
You must be signed in to change notification settings - Fork 42
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add new checker ListEquals
#174
base: master
Are you sure you want to change the base?
Changes from all commits
a09aa25
1bce945
d8ee854
8aa60bb
33c8199
4181ec8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
// Copyright 2024 Canonical Ltd. | ||
// Licensed under the LGPLv3, see LICENCE file for details. | ||
|
||
package checkers | ||
|
||
import ( | ||
"fmt" | ||
"reflect" | ||
|
||
gc "gopkg.in/check.v1" | ||
) | ||
|
||
type listEqualsChecker struct { | ||
*gc.CheckerInfo | ||
} | ||
|
||
// The ListEquals checker verifies if two lists are equal. If they are not, | ||
// it will essentially run a "diff" algorithm to provide the developer with | ||
// an easily understandable summary of the difference between the two lists. | ||
var ListEquals gc.Checker = &listEqualsChecker{ | ||
&gc.CheckerInfo{Name: "ListEquals", Params: []string{"obtained", "expected"}}, | ||
} | ||
|
||
func (l *listEqualsChecker) Check(params []interface{}, names []string) (result bool, error string) { | ||
obtained := params[0] | ||
expected := params[1] | ||
|
||
// Do some simple pre-checks. First, that both 'obtained' and 'expected' | ||
// are indeed slices. | ||
vExp := reflect.ValueOf(expected) | ||
if vExp.Kind() != reflect.Slice { | ||
return false, fmt.Sprintf("expected value is not a slice") | ||
} | ||
|
||
vObt := reflect.ValueOf(obtained) | ||
if vObt.Kind() != reflect.Slice { | ||
return false, fmt.Sprintf("obtained value is not a slice") | ||
} | ||
|
||
// Check that element types are the same | ||
expElemType := vExp.Type().Elem() | ||
obtElemType := vObt.Type().Elem() | ||
|
||
if expElemType != obtElemType { | ||
return false, fmt.Sprintf("element types are not equal") | ||
} | ||
|
||
// Check that the element type is comparable. | ||
if !expElemType.Comparable() { | ||
return false, fmt.Sprintf("element type is not comparable") | ||
} | ||
|
||
// The approach here is to find a longest-common subsequence using dynamic | ||
// programming, and use this to generate the diff. This algorithm runs in | ||
// O(n^2). However, naive list equality is only O(n). Hence, to be more | ||
// efficient, we should first check if the lists are equal, and if they are | ||
// not, we do the more complicated work to find out exactly *how* they are | ||
// different. | ||
|
||
slicesEqual := true | ||
// Check length is equal | ||
if vObt.Len() == vExp.Len() { | ||
// Iterate through and check every element | ||
for i := 0; i < vExp.Len(); i++ { | ||
a := vObt.Index(i) | ||
b := vExp.Index(i) | ||
if !a.Equal(b) { | ||
slicesEqual = false | ||
break | ||
} | ||
} | ||
|
||
if slicesEqual { | ||
return true, "" | ||
} | ||
} | ||
|
||
// If we're here, the lists are not equal, so run the DP algorithm to | ||
// compute the diff. | ||
return false, generateDiff(vObt, vExp) | ||
} | ||
|
||
func generateDiff(obtained, expected reflect.Value) string { | ||
// lenLCS[m][n] stores the length of the longest common subsequence of | ||
// obtained[:m] and expected[:n] | ||
lenLCS := make([][]int, obtained.Len()+1) | ||
for i := 0; i <= obtained.Len(); i++ { | ||
lenLCS[i] = make([]int, expected.Len()+1) | ||
} | ||
|
||
// lenLCS[i][0] and lenLCS[0][j] are already correctly initialised to 0 | ||
|
||
for i := 1; i <= obtained.Len(); i++ { | ||
for j := 1; j <= expected.Len(); j++ { | ||
if obtained.Index(i - 1).Equal(expected.Index(j - 1)) { | ||
// We can extend the longest subsequence of obtained[:i-1] and expected[:j-1] | ||
lenLCS[i][j] = lenLCS[i-1][j-1] + 1 | ||
} else { | ||
// We can't extend a previous subsequence | ||
lenLCS[i][j] = max(lenLCS[i-1][j], lenLCS[i][j-1]) | ||
} | ||
} | ||
} | ||
|
||
// "Traceback" to calculate the diff | ||
var diffs []diff | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems far more simpler to just make diffs a At the moment anything that implements the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, you're probably right |
||
i := obtained.Len() | ||
j := expected.Len() | ||
|
||
for i > 0 && j > 0 { | ||
if lenLCS[i][j] == lenLCS[i-1][j-1] { | ||
// Element changed at this index | ||
diffs = append(diffs, elementChanged{j - 1, expected.Index(j - 1), obtained.Index(i - 1)}) | ||
i -= 1 | ||
j -= 1 | ||
|
||
} else if lenLCS[i][j] == lenLCS[i-1][j] { | ||
// Additional/unexpected element at this index | ||
diffs = append(diffs, elementAdded{j, obtained.Index(i - 1)}) | ||
i -= 1 | ||
|
||
} else if lenLCS[i][j] == lenLCS[i][j-1] { | ||
// Element missing at this index | ||
diffs = append(diffs, elementRemoved{j - 1, expected.Index(j - 1)}) | ||
j -= 1 | ||
|
||
} else { | ||
// Elements are the same at this index - no diff | ||
i -= 1 | ||
j -= 1 | ||
} | ||
} | ||
for i > 0 { | ||
// Extra elements have been added at the start | ||
diffs = append(diffs, elementAdded{0, obtained.Index(i - 1)}) | ||
i -= 1 | ||
} | ||
for j > 0 { | ||
// Elements are missing at the start | ||
diffs = append(diffs, elementRemoved{j - 1, expected.Index(j - 1)}) | ||
j -= 1 | ||
} | ||
|
||
// Convert diffs array into human-readable error | ||
description := "difference:" | ||
for k := len(diffs) - 1; k >= 0; k-- { | ||
description += "\n - " + diffs[k].String() | ||
} | ||
return description | ||
} | ||
|
||
// diff represents a single difference between the two slices. | ||
type diff interface { | ||
// Prints a user friendly description of this difference. | ||
String() string | ||
} | ||
|
||
type elementAdded struct { | ||
index int | ||
element any | ||
} | ||
|
||
func (d elementAdded) String() string { | ||
return fmt.Sprintf("at index %d: unexpected element %#v", d.index, d.element) | ||
} | ||
|
||
type elementChanged struct { | ||
index int | ||
|
||
original, changed any | ||
} | ||
|
||
func (d elementChanged) String() string { | ||
return fmt.Sprintf("at index %d: obtained element %#v, expected %#v", d.index, d.changed, d.original) | ||
} | ||
|
||
type elementRemoved struct { | ||
index int | ||
element any | ||
} | ||
|
||
func (d elementRemoved) String() string { | ||
return fmt.Sprintf("at index %d: missing element %#v", d.index, d.element) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
// Copyright 2024 Canonical Ltd. | ||
// Licensed under the LGPLv3, see LICENCE file for details. | ||
|
||
package checkers_test | ||
|
||
import ( | ||
"reflect" | ||
"testing" | ||
|
||
gc "gopkg.in/check.v1" | ||
|
||
jc "github.com/juju/testing/checkers" | ||
) | ||
|
||
type listEqualsSuite struct{} | ||
|
||
var _ = gc.Suite(&listEqualsSuite{}) | ||
|
||
type testCase struct { | ||
description string | ||
list1, list2 any | ||
equal bool | ||
error string | ||
} | ||
|
||
var testCases = []testCase{{ | ||
description: "both not slices", | ||
list1: struct{}{}, | ||
list2: map[string]string{}, | ||
error: "expected value is not a slice", | ||
}, { | ||
description: "obtained is not a slice", | ||
list1: 1, | ||
list2: []string{}, | ||
error: "obtained value is not a slice", | ||
}, { | ||
description: "expected is not a slice", | ||
list1: []string{}, | ||
list2: "foobar", | ||
error: "expected value is not a slice", | ||
}, { | ||
description: "same contents but different element type", | ||
list1: []string{"A", "B", "C", "DEF"}, | ||
list2: []any{"A", "B", "C", "DEF"}, | ||
error: "element types are not equal", | ||
}, { | ||
description: "different type in last position", | ||
list1: []any{"A", "B", "C", "DEF"}, | ||
list2: []any{"A", "B", "C", 321}, | ||
equal: false, | ||
error: `difference: | ||
- at index 3: obtained element "DEF", expected 321`, | ||
}, { | ||
description: "incomparable element type", | ||
list1: [][]string{{"A"}}, | ||
list2: [][]string{{"A"}}, | ||
error: "element type is not comparable", | ||
}, { | ||
description: "incomparable values are fine", | ||
list1: []any{"A"}, | ||
list2: []any{[]string{"A"}}, | ||
error: `difference: | ||
- at index 0: obtained element "A", expected \[\]string\{"A"\}`, | ||
}, { | ||
description: "elements missing at start", | ||
list1: []int{5, 6}, | ||
list2: []int{3, 4, 5, 6}, | ||
equal: false, | ||
error: `difference: | ||
- at index 0: missing element 3 | ||
- at index 1: missing element 4`, | ||
}, { | ||
description: "elements added at start", | ||
list1: []int{1, 2, 3, 4, 5, 6}, | ||
list2: []int{3, 4, 5, 6}, | ||
equal: false, | ||
error: `difference: | ||
- at index 0: unexpected element 1 | ||
- at index 0: unexpected element 2`, | ||
}, { | ||
description: "elements missing at end", | ||
list1: []int{3, 4}, | ||
list2: []int{3, 4, 5, 6}, | ||
equal: false, | ||
error: `difference: | ||
- at index 2: missing element 5 | ||
- at index 3: missing element 6`, | ||
}, { | ||
description: "elements added at end", | ||
list1: []int{3, 4, 5, 6, 7, 8}, | ||
list2: []int{3, 4, 5, 6}, | ||
equal: false, | ||
error: `difference: | ||
- at index 4: unexpected element 7 | ||
- at index 4: unexpected element 8`, | ||
}, { | ||
description: "basic test", | ||
list1: []int{0, 2, 62, 4, 43, 5, 7, 104, 9, 56, 10}, | ||
list2: []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, | ||
equal: false, | ||
error: `difference: | ||
- at index 1: missing element 1 | ||
- at index 3: obtained element 62, expected 3 | ||
- at index 5: unexpected element 43 | ||
- at index 6: missing element 6 | ||
- at index 8: obtained element 104, expected 8 | ||
- at index 10: unexpected element 56 | ||
- at index 11: missing element 11`, | ||
}, { | ||
description: "replaced elements", | ||
list1: []string{"A", "Z", "C", "Y", "E", "X", "G", "W", "I"}, | ||
list2: []string{"A", "B", "C", "D", "E", "F", "G", "H", "I"}, | ||
equal: false, | ||
error: `difference: | ||
- at index 1: obtained element "Z", expected "B" | ||
- at index 3: obtained element "Y", expected "D" | ||
- at index 5: obtained element "X", expected "F" | ||
- at index 7: obtained element "W", expected "H"`, | ||
}} | ||
|
||
func init() { | ||
// Add a test case with two super long but equal arrays. In this case, we | ||
// should find equality first in O(n), so it shouldn't be too slow. | ||
|
||
N := 10000 | ||
superLongSlice := make([]int, N) | ||
for i := 0; i < N; i++ { | ||
superLongSlice[i] = i | ||
} | ||
|
||
testCases = append(testCases, testCase{ | ||
description: "super long slice", | ||
list1: superLongSlice, | ||
list2: superLongSlice, | ||
equal: true, | ||
}) | ||
} | ||
|
||
func (s *listEqualsSuite) Test(c *gc.C) { | ||
for _, test := range testCases { | ||
c.Log(test.description) | ||
res, err := jc.ListEquals.Check([]any{test.list1, test.list2}, nil) | ||
c.Check(res, gc.Equals, test.equal) | ||
c.Check(err, gc.Matches, test.error) | ||
} | ||
} | ||
|
||
func BenchmarkListEquals(b *testing.B) { | ||
for _, test := range testCases { | ||
b.Run(test.description, func(b *testing.B) { | ||
for i := 0; i < b.N; i++ { | ||
jc.ListEquals.Check([]any{test.list1, test.list2}, nil) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
func FuzzListEquals(f *testing.F) { | ||
f.Fuzz(func(t *testing.T, list1, list2 []byte) { | ||
eq, errMsg := jc.ListEquals.Check([]any{list1, list1}, nil) | ||
if eq == false || errMsg != "" { | ||
t.Errorf("should ListEquals itself: %v", list1) | ||
} | ||
|
||
eq, errMsg = jc.ListEquals.Check([]any{list1, list2}, nil) | ||
if eq != (reflect.DeepEqual(list1, list2)) { | ||
t.Errorf(`ListEquals returned incorrect value for | ||
list1: %v | ||
list2: %v`, list1, list2) | ||
} | ||
}) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can clean up the memory allocations here. My 20 minute grok of this suggest we are blowing i.j.8 bytes for every check we perform when we can get away with (i+j).8 bytes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The full table is needed at the end to "trace backwards" and calculate the entire diff. Yes it's O(n*m) in space and time, the onus is on the user to not use this checker for gigantic arrays.
There might be slight optimisations to be made here but I don't think anything more efficient than O(n*m).