Skip to content

Commit

Permalink
feat(hash): add xfnv
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaost committed Sep 13, 2024
1 parent 979a33b commit fa3cc99
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 0 deletions.
56 changes: 56 additions & 0 deletions hash/xfnv/xfnv.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Package xfnv is modified and non-cross-platform version of FNV-1a.
//
// It computes 8 bytes per round by converting bytes to uint64 directly
// as a result it doesn't generate the same result for diff cpu arch.
package xfnv

import (
"unsafe"
)

const (
fnvHashOffset64 = uint64(14695981039346656037) // fnv hash offset64
fnvHashPrime64 = uint64(1099511628211)
)

func strDataPtr(s string) unsafe.Pointer {
// for str, the Data ptr is always the 1st field
return *(*unsafe.Pointer)(unsafe.Pointer(&s))
}

func bytesDataPtr(b []byte) unsafe.Pointer {
// for []byte, the Data ptr is always the 1st field
return *(*unsafe.Pointer)(unsafe.Pointer(&b))
}

func Hash(b []byte) uint64 {
return doHash(bytesDataPtr(b), len(b))
}

func HashStr(s string) uint64 {
return doHash(strDataPtr(s), len(s))
}

func doHash(p unsafe.Pointer, n int) uint64 {
// a modified version of fnv hash,
// it computes 8 bytes per round,
// and doesn't generate the same result for diff cpu arch,
// so it's ok for in-memory use

h := fnvHashOffset64

// 8 byte per round
i := 0
for n := n >> 3; i < n; i++ {
h ^= *(*uint64)(unsafe.Add(p, i<<3)) // p[i*8]
h *= fnvHashPrime64
}

// left 0-7 bytes
i = i << 3
for ; i < n; i++ {
h ^= uint64(*(*byte)(unsafe.Add(p, i)))
h *= fnvHashPrime64
}
return h
}
64 changes: 64 additions & 0 deletions hash/xfnv/xfnv_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package xfnv

import (
"crypto/rand"
"fmt"
"hash/maphash"
"testing"

"github.com/bytedance/gopkg/util/xxhash3"
"github.com/stretchr/testify/require"
)

func TestHashStr(t *testing.T) {
require.Equal(t, HashStr("1234"), HashStr("1234"))
require.NotEqual(t, HashStr("12345"), HashStr("12346"))
require.Equal(t, HashStr("12345678"), HashStr("12345678"))
require.NotEqual(t, HashStr("123456789"), HashStr("123456788"))
}

func BenchmarkHash(b *testing.B) {
sizes := []int{8, 16, 32, 64, 128, 512}
bb := make([][]byte, len(sizes))
for i := range bb {
b := make([]byte, sizes[i])
rand.Read(b)
bb[i] = b
}
b.ResetTimer()
for _, data := range bb {
b.Run(fmt.Sprintf("size-%d-xfnv", len(data)), func(b *testing.B) {
b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
_ = Hash(data)
}
})
}

println("")

for _, data := range bb {
b.Run(fmt.Sprintf("size-%d-xxhash3", len(data)), func(b *testing.B) {
b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
_ = xxhash3.Hash(data)
}
})
}

println("")

for _, data := range bb {
b.Run(fmt.Sprintf("size-%d-maphash", len(data)), func(b *testing.B) {
s := maphash.MakeSeed()
h := &maphash.Hash{}
h.SetSeed(s)
b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
// use maphash.Bytes which is more fair to benchmark after go1.19
// maphash.Bytes(s, data)
_, _ = h.Write(data)
}
})
}
}

0 comments on commit fa3cc99

Please sign in to comment.