generated from cloudwego/.github
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
169 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
package strmap | ||
|
||
import ( | ||
"fmt" | ||
"sort" | ||
"strings" | ||
) | ||
|
||
// StrSet represents GC friendly string set implementation. | ||
// The pseudo code: | ||
// | ||
// off := 0 | ||
// m := make(map[string]int) | ||
// for _, s := range ss { | ||
// m[s] = off | ||
// off += len(s) | ||
// } | ||
// | ||
// it only supports Get method after StrSet is created. | ||
// it's useful for scenario like `map[string]YourStruct` where YourStruct contains no pointer | ||
// so you can change it to `map[int]YourStruct` and use StrSet for keys for better performance. | ||
// you can also it for replacing `map[string]struct{}{}` | ||
type StrSet struct { | ||
data []byte | ||
items []strItem | ||
} | ||
|
||
type strItem struct { | ||
off int | ||
sz int | ||
hash uint32 | ||
} | ||
|
||
// New creates a string set from []string | ||
func New(ss []string) *StrSet { | ||
sz := 0 | ||
for _, s := range ss { | ||
sz += len(s) | ||
} | ||
b := make([]byte, 0, sz) | ||
items := make([]strItem, len(ss)) | ||
dup := make(map[string]struct{}, len(ss)) | ||
for i, s := range ss { | ||
_, ok := dup[s] | ||
if ok { | ||
continue | ||
} | ||
dup[s] = struct{}{} | ||
e := &items[i] | ||
e.off = len(b) | ||
e.sz = len(s) | ||
e.hash = fnvhashstr(s) | ||
b = append(b, s...) | ||
} | ||
ret := &StrSet{data: b, items: items} | ||
ret.sort() | ||
return ret | ||
} | ||
|
||
func (m *StrSet) sort() { | ||
sort.Slice(m.items, func(i, j int) bool { | ||
return m.items[i].hash < m.items[j].hash | ||
}) | ||
} | ||
|
||
// Get returns an ID for s, -1 if not found | ||
func (m *StrSet) Get(s string) int { | ||
h := fnvhashstr(s) | ||
// binary search | ||
i, j := 0, len(m.items) | ||
for i < j { | ||
p := int(uint(i+j) >> 1) // (i+j)/2 | ||
// i ≤ p < j | ||
if h > m.items[p].hash { | ||
i = p + 1 | ||
} else { | ||
j = p | ||
} | ||
} | ||
// i == j, check if found | ||
for i < len(m.items) { | ||
e := &m.items[i] | ||
if e.hash != h { | ||
return -1 | ||
} | ||
if string(m.data[e.off:e.off+e.sz]) == s { // double check | ||
return e.off | ||
} | ||
i++ // hash conflict | ||
} | ||
return -1 | ||
} | ||
|
||
func (m *StrSet) String() string { | ||
b := strings.Builder{} | ||
b.WriteByte('[') | ||
for i, e := range m.items { | ||
if i != 0 { | ||
b.WriteString(", ") | ||
} | ||
fmt.Fprintf(&b, "{off:%d, hash:%x, str:%q}", e.off, e.hash, string(m.data[e.off:e.off+e.sz])) | ||
} | ||
b.WriteByte(']') | ||
return b.String() | ||
} | ||
|
||
const ( | ||
fnvHashOffset32 = uint32(2166136261) | ||
fnvHashPrime32 = uint32(16777619) | ||
) | ||
|
||
func fnvhashstr(s string) uint32 { | ||
h := fnvHashOffset32 | ||
for i := 0; i < len(s); i++ { | ||
h *= fnvHashPrime32 | ||
h ^= uint32(s[i]) | ||
} | ||
return h | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package strmap | ||
|
||
import ( | ||
"math/rand" | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") | ||
|
||
func randString(n int) string { | ||
b := make([]rune, n) | ||
for i := range b { | ||
b[i] = letters[rand.Intn(len(letters))] | ||
} | ||
return string(b) | ||
} | ||
|
||
func randStrings(m, n int) []string { | ||
seed := time.Now().UnixNano() | ||
seed = 1720779864101503000 | ||
println(seed) | ||
r := rand.New(rand.NewSource(seed)) | ||
b := make([]byte, m) | ||
ret := make([]string, 0, n) | ||
for i := 0; i < n; i++ { | ||
r.Read(b) | ||
ret = append(ret, string(b[:1+r.Intn(m)])) | ||
} | ||
return ret | ||
} | ||
|
||
func TestStrMap(t *testing.T) { | ||
ss := randStrings(5, 100) | ||
off := 0 | ||
m := make(map[string]int) | ||
for _, s := range ss { | ||
_, ok := m[s] | ||
if !ok { | ||
m[s] = off | ||
off += len(s) | ||
} | ||
} | ||
strset := New(ss) | ||
for i, s := range ss { | ||
require.Equal(t, m[s], strset.Get(s), i) | ||
} | ||
} |