diff --git a/hash/xfnv/xfnv.go b/hash/xfnv/xfnv.go new file mode 100644 index 0000000..8dfc169 --- /dev/null +++ b/hash/xfnv/xfnv.go @@ -0,0 +1,56 @@ +// Package xfnv is modified and non-cross-platform version of FNV-1a. +// +// It computes 8 bytes per round by converting bytes to uint64 directly +// as a result it doesn't generate the same result for diff cpu arch. +package xfnv + +import ( + "unsafe" +) + +const ( + fnvHashOffset64 = uint64(14695981039346656037) // fnv hash offset64 + fnvHashPrime64 = uint64(1099511628211) +) + +func strDataPtr(s string) unsafe.Pointer { + // for str, the Data ptr is always the 1st field + return *(*unsafe.Pointer)(unsafe.Pointer(&s)) +} + +func bytesDataPtr(b []byte) unsafe.Pointer { + // for []byte, the Data ptr is always the 1st field + return *(*unsafe.Pointer)(unsafe.Pointer(&b)) +} + +func Hash(b []byte) uint64 { + return doHash(bytesDataPtr(b), len(b)) +} + +func HashStr(s string) uint64 { + return doHash(strDataPtr(s), len(s)) +} + +func doHash(p unsafe.Pointer, n int) uint64 { + // a modified version of fnv hash, + // it computes 8 bytes per round, + // and doesn't generate the same result for diff cpu arch, + // so it's ok for in-memory use + + h := fnvHashOffset64 + + // 8 byte per round + i := 0 + for n := n >> 3; i < n; i++ { + h ^= *(*uint64)(unsafe.Add(p, i<<3)) // p[i*8] + h *= fnvHashPrime64 + } + + // left 0-7 bytes + i = i << 3 + for ; i < n; i++ { + h ^= uint64(*(*byte)(unsafe.Add(p, i))) + h *= fnvHashPrime64 + } + return h +} diff --git a/hash/xfnv/xfnv_test.go b/hash/xfnv/xfnv_test.go new file mode 100644 index 0000000..6e21a82 --- /dev/null +++ b/hash/xfnv/xfnv_test.go @@ -0,0 +1,64 @@ +package xfnv + +import ( + "crypto/rand" + "fmt" + "hash/maphash" + "testing" + + "github.com/bytedance/gopkg/util/xxhash3" + "github.com/stretchr/testify/require" +) + +func TestHashStr(t *testing.T) { + require.Equal(t, HashStr("1234"), HashStr("1234")) + require.NotEqual(t, HashStr("12345"), HashStr("12346")) + require.Equal(t, HashStr("12345678"), HashStr("12345678")) + require.NotEqual(t, HashStr("123456789"), HashStr("123456788")) +} + +func BenchmarkHash(b *testing.B) { + sizes := []int{8, 16, 32, 64, 128, 512} + bb := make([][]byte, len(sizes)) + for i := range bb { + b := make([]byte, sizes[i]) + rand.Read(b) + bb[i] = b + } + b.ResetTimer() + for _, data := range bb { + b.Run(fmt.Sprintf("size-%d-xfnv", len(data)), func(b *testing.B) { + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + _ = Hash(data) + } + }) + } + + println("") + + for _, data := range bb { + b.Run(fmt.Sprintf("size-%d-xxhash3", len(data)), func(b *testing.B) { + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + _ = xxhash3.Hash(data) + } + }) + } + + println("") + + for _, data := range bb { + b.Run(fmt.Sprintf("size-%d-maphash", len(data)), func(b *testing.B) { + s := maphash.MakeSeed() + h := &maphash.Hash{} + h.SetSeed(s) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + // use maphash.Bytes which is more fair to benchmark after go1.19 + // maphash.Bytes(s, data) + _, _ = h.Write(data) + } + }) + } +}