Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new checker ListEquals #174

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions checkers/listequals.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
// Copyright 2024 Canonical Ltd.
// Licensed under the LGPLv3, see LICENCE file for details.

package checkers

import (
"fmt"
"reflect"

gc "gopkg.in/check.v1"
)

type listEqualsChecker struct {
*gc.CheckerInfo
}

// The ListEquals checker verifies if two lists are equal. If they are not,
// it will essentially run a "diff" algorithm to provide the developer with
// an easily understandable summary of the difference between the two lists.
var ListEquals gc.Checker = &listEqualsChecker{
&gc.CheckerInfo{Name: "ListEquals", Params: []string{"obtained", "expected"}},
}

func (l *listEqualsChecker) Check(params []interface{}, names []string) (result bool, error string) {
obtained := params[0]
expected := params[1]

// Do some simple pre-checks. First, that both 'obtained' and 'expected'
// are indeed slices.
vExp := reflect.ValueOf(expected)
if vExp.Kind() != reflect.Slice {
return false, fmt.Sprintf("expected value is not a slice")
}

vObt := reflect.ValueOf(obtained)
if vObt.Kind() != reflect.Slice {
return false, fmt.Sprintf("obtained value is not a slice")
}

// Check that element types are the same
expElemType := vExp.Type().Elem()
obtElemType := vObt.Type().Elem()

if expElemType != obtElemType {
return false, fmt.Sprintf("element types are not equal")
}

// Check that the element type is comparable.
if !expElemType.Comparable() {
return false, fmt.Sprintf("element type is not comparable")
}

// The approach here is to find a longest-common subsequence using dynamic
// programming, and use this to generate the diff. This algorithm runs in
// O(n^2). However, naive list equality is only O(n). Hence, to be more
// efficient, we should first check if the lists are equal, and if they are
// not, we do the more complicated work to find out exactly *how* they are
// different.

slicesEqual := true
// Check length is equal
if vObt.Len() == vExp.Len() {
// Iterate through and check every element
for i := 0; i < vExp.Len(); i++ {
a := vObt.Index(i)
b := vExp.Index(i)
if !a.Equal(b) {
slicesEqual = false
break
}
}

if slicesEqual {
return true, ""
}
}

// If we're here, the lists are not equal, so run the DP algorithm to
// compute the diff.
return false, generateDiff(vObt, vExp)
}

func generateDiff(obtained, expected reflect.Value) string {
// lenLCS[m][n] stores the length of the longest common subsequence of
// obtained[:m] and expected[:n]
lenLCS := make([][]int, obtained.Len()+1)
Copy link
Member

@tlm tlm Jun 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can clean up the memory allocations here. My 20 minute grok of this suggest we are blowing i.j.8 bytes for every check we perform when we can get away with (i+j).8 bytes.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The full table is needed at the end to "trace backwards" and calculate the entire diff. Yes it's O(n*m) in space and time, the onus is on the user to not use this checker for gigantic arrays.

There might be slight optimisations to be made here but I don't think anything more efficient than O(n*m).

for i := 0; i <= obtained.Len(); i++ {
lenLCS[i] = make([]int, expected.Len()+1)
}

// lenLCS[i][0] and lenLCS[0][j] are already correctly initialised to 0

for i := 1; i <= obtained.Len(); i++ {
for j := 1; j <= expected.Len(); j++ {
if obtained.Index(i - 1).Equal(expected.Index(j - 1)) {
// We can extend the longest subsequence of obtained[:i-1] and expected[:j-1]
lenLCS[i][j] = lenLCS[i-1][j-1] + 1
} else {
// We can't extend a previous subsequence
lenLCS[i][j] = max(lenLCS[i-1][j], lenLCS[i][j-1])
}
}
}

// "Traceback" to calculate the diff
var diffs []diff
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems far more simpler to just make diffs a []string. We don't use the information in the diff for anything else other then generating the string at printing time. Can we not just calculate it at the time we make the diff and store the result?

At the moment anything that implements the Stringer interface could be a diff also.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, you're probably right

i := obtained.Len()
j := expected.Len()

for i > 0 && j > 0 {
if lenLCS[i][j] == lenLCS[i-1][j-1] {
// Element changed at this index
diffs = append(diffs, elementChanged{j - 1, expected.Index(j - 1), obtained.Index(i - 1)})
i -= 1
j -= 1

} else if lenLCS[i][j] == lenLCS[i-1][j] {
// Additional/unexpected element at this index
diffs = append(diffs, elementAdded{j, obtained.Index(i - 1)})
i -= 1

} else if lenLCS[i][j] == lenLCS[i][j-1] {
// Element missing at this index
diffs = append(diffs, elementRemoved{j - 1, expected.Index(j - 1)})
j -= 1

} else {
// Elements are the same at this index - no diff
i -= 1
j -= 1
}
}
for i > 0 {
// Extra elements have been added at the start
diffs = append(diffs, elementAdded{0, obtained.Index(i - 1)})
i -= 1
}
for j > 0 {
// Elements are missing at the start
diffs = append(diffs, elementRemoved{j - 1, expected.Index(j - 1)})
j -= 1
}

// Convert diffs array into human-readable error
description := "difference:"
for k := len(diffs) - 1; k >= 0; k-- {
description += "\n - " + diffs[k].String()
}
return description
}

// diff represents a single difference between the two slices.
type diff interface {
// Prints a user friendly description of this difference.
String() string
}

type elementAdded struct {
index int
element any
}

func (d elementAdded) String() string {
return fmt.Sprintf("at index %d: unexpected element %#v", d.index, d.element)
}

type elementChanged struct {
index int

original, changed any
}

func (d elementChanged) String() string {
return fmt.Sprintf("at index %d: obtained element %#v, expected %#v", d.index, d.changed, d.original)
}

type elementRemoved struct {
index int
element any
}

func (d elementRemoved) String() string {
return fmt.Sprintf("at index %d: missing element %#v", d.index, d.element)
}
172 changes: 172 additions & 0 deletions checkers/listequals_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// Copyright 2024 Canonical Ltd.
// Licensed under the LGPLv3, see LICENCE file for details.

package checkers_test

import (
"reflect"
"testing"

gc "gopkg.in/check.v1"

jc "github.com/juju/testing/checkers"
)

type listEqualsSuite struct{}

var _ = gc.Suite(&listEqualsSuite{})

type testCase struct {
description string
list1, list2 any
equal bool
error string
}

var testCases = []testCase{{
description: "both not slices",
list1: struct{}{},
list2: map[string]string{},
error: "expected value is not a slice",
}, {
description: "obtained is not a slice",
list1: 1,
list2: []string{},
error: "obtained value is not a slice",
}, {
description: "expected is not a slice",
list1: []string{},
list2: "foobar",
error: "expected value is not a slice",
}, {
description: "same contents but different element type",
list1: []string{"A", "B", "C", "DEF"},
list2: []any{"A", "B", "C", "DEF"},
error: "element types are not equal",
}, {
description: "different type in last position",
list1: []any{"A", "B", "C", "DEF"},
list2: []any{"A", "B", "C", 321},
equal: false,
error: `difference:
- at index 3: obtained element "DEF", expected 321`,
}, {
description: "incomparable element type",
list1: [][]string{{"A"}},
list2: [][]string{{"A"}},
error: "element type is not comparable",
}, {
description: "incomparable values are fine",
list1: []any{"A"},
list2: []any{[]string{"A"}},
error: `difference:
- at index 0: obtained element "A", expected \[\]string\{"A"\}`,
}, {
description: "elements missing at start",
list1: []int{5, 6},
list2: []int{3, 4, 5, 6},
equal: false,
error: `difference:
- at index 0: missing element 3
- at index 1: missing element 4`,
}, {
description: "elements added at start",
list1: []int{1, 2, 3, 4, 5, 6},
list2: []int{3, 4, 5, 6},
equal: false,
error: `difference:
- at index 0: unexpected element 1
- at index 0: unexpected element 2`,
}, {
description: "elements missing at end",
list1: []int{3, 4},
list2: []int{3, 4, 5, 6},
equal: false,
error: `difference:
- at index 2: missing element 5
- at index 3: missing element 6`,
}, {
description: "elements added at end",
list1: []int{3, 4, 5, 6, 7, 8},
list2: []int{3, 4, 5, 6},
equal: false,
error: `difference:
- at index 4: unexpected element 7
- at index 4: unexpected element 8`,
}, {
description: "basic test",
list1: []int{0, 2, 62, 4, 43, 5, 7, 104, 9, 56, 10},
list2: []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
equal: false,
error: `difference:
- at index 1: missing element 1
- at index 3: obtained element 62, expected 3
- at index 5: unexpected element 43
- at index 6: missing element 6
- at index 8: obtained element 104, expected 8
- at index 10: unexpected element 56
- at index 11: missing element 11`,
}, {
description: "replaced elements",
list1: []string{"A", "Z", "C", "Y", "E", "X", "G", "W", "I"},
list2: []string{"A", "B", "C", "D", "E", "F", "G", "H", "I"},
equal: false,
error: `difference:
- at index 1: obtained element "Z", expected "B"
- at index 3: obtained element "Y", expected "D"
- at index 5: obtained element "X", expected "F"
- at index 7: obtained element "W", expected "H"`,
}}

func init() {
// Add a test case with two super long but equal arrays. In this case, we
// should find equality first in O(n), so it shouldn't be too slow.

N := 10000
superLongSlice := make([]int, N)
for i := 0; i < N; i++ {
superLongSlice[i] = i
}

testCases = append(testCases, testCase{
description: "super long slice",
list1: superLongSlice,
list2: superLongSlice,
equal: true,
})
}

func (s *listEqualsSuite) Test(c *gc.C) {
for _, test := range testCases {
c.Log(test.description)
res, err := jc.ListEquals.Check([]any{test.list1, test.list2}, nil)
c.Check(res, gc.Equals, test.equal)
c.Check(err, gc.Matches, test.error)
}
}

func BenchmarkListEquals(b *testing.B) {
for _, test := range testCases {
b.Run(test.description, func(b *testing.B) {
for i := 0; i < b.N; i++ {
jc.ListEquals.Check([]any{test.list1, test.list2}, nil)
}
})
}
}

func FuzzListEquals(f *testing.F) {
f.Fuzz(func(t *testing.T, list1, list2 []byte) {
eq, errMsg := jc.ListEquals.Check([]any{list1, list1}, nil)
if eq == false || errMsg != "" {
t.Errorf("should ListEquals itself: %v", list1)
}

eq, errMsg = jc.ListEquals.Check([]any{list1, list2}, nil)
if eq != (reflect.DeepEqual(list1, list2)) {
t.Errorf(`ListEquals returned incorrect value for
list1: %v
list2: %v`, list1, list2)
}
})
}