Skip to content

Commit

Permalink
x/text/internal/colltab: Sort numbers with leading zeros
Browse files Browse the repository at this point in the history
Makes numeric sorting deterministic. Particularly useful for
  approximating how an OS file manager will sort files.
  • Loading branch information
lordwelch committed Nov 7, 2024
1 parent c25a7be commit ce407f2
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 24 deletions.
14 changes: 7 additions & 7 deletions collate/collate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -474,15 +474,15 @@ func TestNumeric(t *testing.T) {
{"A-1", "A-2", -1},
{"A-2", "A-12", -1},
{"A-12", "A-2", 1},
{"A-0001", "A-1", 0},
{"A-0001", "A-1", 1},
{"0000-", "1-", -1},
{"00001", "1", 0},
{"00001", "1", 1},
{"00", "00", 0},
{"0", "00", 0},
{"00", "0", 0},
{"01", "001", 0},
{"01", "1", 0},
{"1", "01", 0},
{"0", "00", -1},
{"00", "0", 1},
{"01", "001", -1},
{"01", "1", 1},
{"1", "01", -1},
{"9-A", "0-A", 1},
{"99-A", "0-A", 1},
{"9-A", "1-A", 1},
Expand Down
17 changes: 17 additions & 0 deletions internal/colltab/numeric.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,20 @@ func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) {
}
// ce might have been grown already, so take it instead of buf.
nc.init(ce, len(buf), isZero)
oldIndex := len(nc.elems)
for n < len(s) {
ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:])
nc.b = s
n += sz
if !nc.update(ce) {
break
}
oldIndex = len(nc.elems)
}
nc.elems = append(nc.elems, 0)
copy(nc.elems[oldIndex+1:], nc.elems[oldIndex:])
nc.elems[oldIndex], _ = MakeElem(nc.zero+1, defaultSecondary, defaultTertiary, 0)

return nc.result(), n
}

Expand All @@ -105,14 +111,20 @@ func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n
return ce, n
}
nc.init(ce, len(buf), isZero)
oldIndex := len(nc.elems)
for n < len(s) {
ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:])
nc.s = s
n += sz
if !nc.update(ce) {
break
}
oldIndex = len(nc.elems)
}
nc.elems = append(nc.elems, 0)
copy(nc.elems[oldIndex+1:], nc.elems[oldIndex:])
nc.elems[oldIndex], _ = MakeElem(nc.zero+1, defaultSecondary, defaultTertiary, 0)

return nc.result(), n
}

Expand All @@ -122,6 +134,7 @@ type numberConverter struct {
elems []Elem
nDigits int
lenIndex int
zero int

s string // set if the input was of type string
b []byte // set if the input was of type []byte
Expand All @@ -133,6 +146,7 @@ func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) {
// Insert a marker indicating the start of a number and a placeholder
// for the number of digits.
if isZero {
nc.zero++
elems = append(elems[:oldLen], nc.w.numberStart, 0)
} else {
elems = append(elems, 0, 0)
Expand Down Expand Up @@ -217,6 +231,9 @@ const maxDigits = 1<<maxPrimaryBits - 1
func (nc *numberConverter) update(elems []Elem) bool {
isZero, ok := nc.checkNextDigit(elems)
if nc.nDigits == 0 && isZero {
if nc.zero+1 < maxDigits {
nc.zero++
}
return true
}
nc.elems = elems
Expand Down
51 changes: 34 additions & 17 deletions internal/colltab/numeric_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,36 +78,37 @@ func TestNumericAppendNext(t *testing.T) {
{"a", p(5)},
{"klm", p(99)},
{"aa", p(5, 5)},
{"1", p(120, 2, 101)},
{"0", p(120, 1)},
{"00", p(120, 1)},
{"01", p(120, 2, 101)},
{"0001", p(120, 2, 101)},
{"02", p(120, 2, 102)},
{"10", p(120, 3, 101, 100)},
{"99", p(120, 3, 119, 119)},
{"9999", p(120, 5, 119, 119, 119, 119)},
{"1a", p(120, 2, 101, 5)},
{"0b", p(120, 1, 6)},
{"01c", p(120, 2, 101, 8, 2)},
{"10x", p(120, 3, 101, 100, 200)},
{"99y", p(120, 3, 119, 119, 201)},
{"9999nop", p(120, 5, 119, 119, 119, 119, 121)},
{"1", p(120, 2, 101, 1)},
{"0", p(120, 1, 2)},
{"00", p(120, 1, 3)},
{"01", p(120, 2, 101, 2)},
{"0001", p(120, 2, 101, 4)},
{"02", p(120, 2, 102, 2)},
{"10", p(120, 3, 101, 100, 1)},
{"99", p(120, 3, 119, 119, 1)},
{"9999", p(120, 5, 119, 119, 119, 119, 1)},
{"1a", p(120, 2, 101, 1, 5)},
{"0b", p(120, 1, 2, 6)},
{"01c", p(120, 2, 101, 2, 8, 2)},
{"10x", p(120, 3, 101, 100, 1, 200)},
{"99y", p(120, 3, 119, 119, 1, 201)},
{"9999nop", p(120, 5, 119, 119, 119, 119, 1, 121)},

// Allow follow-up collation elements if they have a zero non-primary.
{"١٢٩", []Elem{e(120), e(4), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3}},
{"١٢٩", []Elem{e(120), e(4), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3, e(1)}},
{
"129",
[]Elem{
e(120), e(4),
e(101, digSec, digTert+1),
e(102, digSec, digTert+3),
e(119, digSec, digTert+1),
e(1),
},
},

// Ensure AppendNext* adds to the given buffer.
{"a10", p(5, 120, 3, 101, 100)},
{"a10", p(5, 120, 3, 101, 100, 1)},
} {
nw := NewNumericWeighter(numWeighter)

Expand Down Expand Up @@ -148,6 +149,22 @@ func TestNumericOverflow(t *testing.T) {
}
}

func TestNumericZeroOverflow(t *testing.T) {
manyDigits := strings.Repeat("0", maxDigits+1) + "a"

nw := NewNumericWeighter(numWeighter)

got, n := nw.AppendNextString(nil, manyDigits)

if n != maxDigits+2 { // Zeros after maxDigits-1 are ignored but are still consumed so that a number with leading zeros is ordered after a number with less leading zeros
t.Errorf("n: got %d; want %d", n, maxDigits+2)
}

if got[2].Primary() != maxDigits {
t.Errorf("primary(e[2]): got %d; want %d", got[1].Primary(), maxDigits)
}
}

func TestNumericWeighterAlloc(t *testing.T) {
buf := make([]Elem, 100)
w := NewNumericWeighter(numWeighter)
Expand Down

0 comments on commit ce407f2

Please sign in to comment.