Skip to content

Commit

Permalink
feat: new strmap
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaost committed Jul 12, 2024
1 parent a73b0c9 commit 8c98c39
Show file tree
Hide file tree
Showing 2 changed files with 295 additions and 0 deletions.
180 changes: 180 additions & 0 deletions container/strmap/strmap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
package strmap

import (
"fmt"
"sort"
"strings"
)

// StrMap represents GC friendly string map implementation.
// it's readonly after it's created
type StrMap struct {
data []byte
items []mapItem

hashtable []int
}

type mapItem struct {
off int
sz int
slot int
v uintptr
}

// New creates StrMap from map[string]uintptr
// uintptr can be any value and it will be returned by Get.
func New(m map[string]uintptr) *StrMap {
sz := 0
for k, _ := range m {
sz += len(k)
}
b := make([]byte, 0, sz)
items := make([]mapItem, 0, len(m))
for k, v := range m {
items = append(items, mapItem{off: len(b), sz: len(k), slot: int(fnvhashstr(k)), v: v})
b = append(b, k...)
}
ret := &StrMap{data: b, items: items}
ret.makeHashtable()
return ret
}

// Len returns the size of map
func (m *StrMap) Len() int {
return len(m.items)
}

func (m *StrMap) makeHashtable() {
slots := getHashtableSlots(len(m.items))
m.hashtable = make([]int, slots)

for i := range m.items {
m.items[i].slot = m.items[i].slot % slots
}

// make sure items with the same slot stored together
// good for cpu cache
sort.Slice(m.items, func(i, j int) bool {
return m.items[i].slot < m.items[j].slot
})

for i := 0; i < len(m.hashtable); i++ {
m.hashtable[i] = -1
}
for i := range m.items {
e := &m.items[i]
if m.hashtable[e.slot] < 0 {
// we only need to store the 1st item if hash conflict
// since they're already stored together
// will check the next item when Get
m.hashtable[e.slot] = i
}
}
}

// Get ...
func (m *StrMap) Get(s string) (uintptr, bool) {
slot := int(fnvhashstr(s)) % len(m.hashtable)
i := m.hashtable[slot]
if i < 0 {
return 0, false
}
e := &m.items[i]
for {
if string(m.data[e.off:e.off+e.sz]) == s { // double check
return e.v, true
}
i++
if i >= len(m.items) {
break
}
e = &m.items[i]
if e.slot != slot {
break
}
}
return 0, false
}

func (m *StrMap) String() string {
b := &strings.Builder{}
b.WriteByte('[')
for i, e := range m.items {
if i != 0 {
b.WriteString(", ")
}
fmt.Fprintf(b, "{off:%d, slot:%x, str:%q}", e.off, e.slot, string(m.data[e.off:e.off+e.sz]))
}
b.WriteByte(']')
return b.String()
}

const (
fnvHashOffset32 = uint32(2166136261)
fnvHashPrime32 = uint32(16777619)
)

func fnvhashstr(s string) uint32 {
h := fnvHashOffset32
for i := 0; i < len(s); i++ {
h *= fnvHashPrime32
h ^= uint32(s[i])
}
return h
}

var bits2primes = []int{
0: 17, // 1
1: 17, // 2
2: 17, // 4
3: 17, // 8
4: 17, // at least 17 for <= 16
5: 31, // 32
6: 61, // 64
7: 127, // 128
8: 251, // 256
9: 509, // 512
10: 1021, // 1024
11: 2039, // 2048
12: 4093, // 4096
13: 8191, // 8192
14: 16381, // 16384
15: 32749, // 32768
16: 65521, // 65536
17: 131071, // 131072
18: 262139, // 262144
19: 524287, // 524288
20: 1048573, // 1048576
21: 2097143, // 2097152
22: 4194301, // 4194304
23: 8388593, // 8388608
24: 16777213, // 16777216
25: 33554393, // 33554432
26: 67108859, // 67108864
27: 134217689, // 134217728
28: 268435399, // 268435456
29: 536870909, // 536870912
30: 1073741789, // 1073741824
}

func getHashtableSlots(n int) int {
// load factor
n = int(float32(n) / 0.75)

// count bits to decide which prime number to use
bits := 0
for v := uint64(n); v > 0; v = v >> 1 {
bits++
}

// add one more bit,
// so if n=1500, than returns 2039 instead of 1021
bits++

if bits > len(bits2primes) {
// ???? are you sure we need to hold so many items? ~ 1B items for 30 bits
return n
}
return bits2primes[bits] // a prime bigger than n
}
115 changes: 115 additions & 0 deletions container/strmap/strmap_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package strmap

import (
"runtime"
"testing"
"time"
"unsafe"

"github.com/stretchr/testify/require"
)

var randseed = uint64(time.Now().UnixNano())

func rand64() uint64 { // Xorshift64 without *
randseed ^= randseed >> 12
randseed ^= randseed << 25
randseed ^= randseed >> 27
return randseed
}

func randRead(b []byte) {
i := 0
for ; i+8 <= len(b); i += 8 {
*(*uint64)(unsafe.Pointer(&b[i])) = rand64()
}
v := rand64()
for ; i < len(b); i++ {
b[i] = byte(v)
v = v >> 8
}
}

func randIntn(n int) int {
return int(rand64()>>1) % n
}

func randStrings(m, n int) []string {
b := make([]byte, m)
ret := make([]string, 0, n)
for i := 0; i < n; i++ {
randRead(b)
ret = append(ret, string(b[:1+randIntn(m)]))
}
return ret
}

func newStdStrMap(ss []string) map[string]uintptr {
v := uintptr(1)
m := make(map[string]uintptr)
for _, s := range ss {
_, ok := m[s]
if !ok {
m[s] = v
v++
}
}
return m
}

func TestStrMap(t *testing.T) {
ss := randStrings(20, 100000)
m := newStdStrMap(ss)
strset := New(m)
require.Equal(t, len(m), strset.Len())
for i, s := range ss {
v0 := m[s]
v1, _ := strset.Get(s)
require.Equal(t, v0, v1, i)
}
}

func Benchmark_StrMap(b *testing.B) {
ss := randStrings(20, 100000)
m := newStdStrMap(ss)
strset := New(m)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = strset.Get(ss[i%len(ss)])
}
}

func Benchmark_StdMap(b *testing.B) {
ss := randStrings(20, 100000)
m := newStdStrMap(ss)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = m[ss[i%len(ss)]]
}
}

func Benchmark_StrMap_GC(b *testing.B) {
ss := randStrings(50, 1000000)
m := newStdStrMap(ss)
strset := New(m)
ss = nil
m = nil
runtime.GC()
b.ResetTimer()
for i := 0; i < b.N; i++ {
runtime.GC()
}
runtime.KeepAlive(strset)
}

func Benchmark_StdMap_GC(b *testing.B) {
ss := randStrings(50, 1000000)
m := newStdStrMap(ss)
ss = nil
runtime.GC()
b.ResetTimer()
for i := 0; i < b.N; i++ {
runtime.GC()
}
runtime.KeepAlive(m)
}

0 comments on commit 8c98c39

Please sign in to comment.