Skip to content

Commit

Permalink
feat: add NewStrStoreFromSlice
Browse files Browse the repository at this point in the history
  • Loading branch information
ppzqh committed Sep 16, 2024
1 parent 3710c68 commit 871de6e
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 26 deletions.
60 changes: 47 additions & 13 deletions container/strstore/strstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,43 +16,76 @@ package strstore

import (
"encoding/binary"
"math"
"unsafe"
)

const (
pageSize = 1 << 20
strlenSize = 4
defaultPageSize = 1 << 20
strlenSize = 4 // size of uint32, maximum 4GB for each string
)

// StrStore is used to store string with less GC overhead.
// The string stored here should not be longer than `pageSize` and does not need to be deleted.
type StrStore struct {
pages [][]byte
offset int // offset of the latest page
pages [][]byte
pageSize int // size of each page
offset int // offset of the latest page
}

// NewStrStore returns a StrStore.
// NewStrStore returns a StrStore with default page size.
func NewStrStore() *StrStore {
return &StrStore{
pages: [][]byte{make([]byte, pageSize)},
pages: [][]byte{make([]byte, defaultPageSize)},
pageSize: defaultPageSize,
}
}

// NewStrStoreFromSlice constructs a StrStore with the input string slice and returns the StrStore and indexes for the following reads.
// It panics if any string in the slice is longer than math.MaxUint32.
func NewStrStoreFromSlice(ss []string) (*StrStore, []int) {
n := len(ss)
totalLen := strlenSize * n
for i := 0; i < n; i++ {
if len(ss[i]) > math.MaxUint32 {
panic("string too long")
}
totalLen += len(ss[i])
}
idxes := make([]int, n)
st := &StrStore{
pages: make([][]byte, 1),
pageSize: totalLen,
}

page := make([]byte, totalLen)
offset := 0
for i := 0; i < n; i++ {
idxes[i] = offset
binary.BigEndian.PutUint32(page[offset:offset+strlenSize], uint32(len(ss[i])))
copy(page[offset+strlenSize:offset+strlenSize+len(ss[i])], ss[i])
offset += strlenSize + len(ss[i])
}
st.pages[0] = page
return st, idxes
}

// Set sets a string into the store and return an index.
// It returns -1 if the string is longer than pageSize or math.MaxUint32.
func (s *StrStore) Set(str string) int {
l := len(str)
if l > pageSize {
if l > s.pageSize || l > math.MaxUint32 {
return -1
}
totalLen := l + strlenSize
if len(s.pages) == 0 || s.offset+totalLen > pageSize {
if len(s.pages) == 0 || s.offset+totalLen > s.pageSize {
// grow pages
s.pages = append(s.pages, make([]byte, pageSize))
s.pages = append(s.pages, make([]byte, s.pageSize))
s.offset = 0
}
offset := s.offset
page := s.pages[len(s.pages)-1]
idx := (len(s.pages)-1)*pageSize + offset
idx := (len(s.pages)-1)*s.pageSize + offset

// write length + string
binary.BigEndian.PutUint32(page[offset:offset+strlenSize], uint32(l))
Expand All @@ -63,12 +96,13 @@ func (s *StrStore) Set(str string) int {
}

// Get gets the string with the idx.
// It returns empty string if the no string can be found with the input idx
func (s *StrStore) Get(idx int) string {
if idx < 0 {
return ""
}
pageIdx := idx / pageSize
offset := idx % pageSize
pageIdx := idx / s.pageSize
offset := idx % s.pageSize
if pageIdx > len(s.pages)-1 || offset > len(s.pages[pageIdx]) {
return ""
}
Expand All @@ -81,5 +115,5 @@ func (s *StrStore) Get(idx int) string {

// Len returns the total length of bytes.
func (s *StrStore) Len() int {
return len(s.pages) * pageSize
return len(s.pages) * s.pageSize
}
58 changes: 45 additions & 13 deletions container/strstore/strstore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,57 @@ import (
func TestStrStore(t *testing.T) {
// test when the pages grow
strNum := 1000000
strstore := NewStrStore()
strStore := NewStrStore()
for i := 0; i < strNum; i++ {
idx := strStore.Set(strconv.Itoa(i))
assert.Equal(t, strconv.Itoa(i), strStore.Get(idx), fmt.Sprintf("i=%d, idx=%d", i, idx))
}
s := strStore.Get(-1)
assert.Equal(t, "", s)
s = strStore.Get(strStore.Len() * 2)
assert.Equal(t, "", s)

// batch construct
ss := make([]string, strNum)
for i := 0; i < strNum; i++ {
ss[i] = strconv.Itoa(i)
}
bs, idxes := NewStrStoreFromSlice(ss)
for i := 0; i < strNum; i++ {
idx := strstore.Set(strconv.Itoa(i))
assert.Equal(t, strconv.Itoa(i), strstore.Get(idx), fmt.Sprintf("i=%d, idx=%d", i, idx))
assert.Equal(t, strconv.Itoa(i), bs.Get(idxes[i]))
}
s := strstore.Get(-1)
s = bs.Get(-1)
assert.Equal(t, "", s)
s = strstore.Get(strstore.Len() * 2)
s = bs.Get(strStore.Len() * 2)
assert.Equal(t, "", s)
// set more strings to this StrStore
for i := 0; i < strNum; i++ {
idx := bs.Set(strconv.Itoa(i))
assert.Equal(t, strconv.Itoa(i), bs.Get(idx), fmt.Sprintf("i=%d, idx=%d", i, idx))
}
}

func BenchmarkStrStoreGetSet(b *testing.B) {
mockStr := "0123456789"
avgStrlen := len(mockStr)
pageLen := pageSize / avgStrlen
pageLen := defaultPageSize / avgStrlen
strSlice := newStrSliceStore(pageLen)
strstore := &StrStore{}
strstore := NewStrStore()

b.Run("strbuf-set", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
strstore.Set(mockStr)
}
})
b.Run("strbuf-get", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
strstore.Get(0)
}
})

b.Run("strbuf-set", func(b *testing.B) {
b.ReportAllocs()
Expand Down Expand Up @@ -79,18 +113,16 @@ func BenchmarkStrStoreGetSet(b *testing.B) {

func BenchmarkStrStoreGC(b *testing.B) {
ss := randStrings(50, 1000000)
strstore := NewStrStore()
for i := 0; i < len(ss); i++ {
strstore.Set(ss[i])
}
strStore, idxes := NewStrStoreFromSlice(ss)
_ = ss
runtime.GC()
b.ResetTimer()

for i := 0; i < b.N; i++ {
runtime.GC()
}
runtime.KeepAlive(strstore)
runtime.KeepAlive(strStore)
runtime.KeepAlive(idxes)
}

func BenchmarkStdStrSliceGC(b *testing.B) {
Expand Down Expand Up @@ -122,7 +154,7 @@ type strSliceStore struct {
}

func (s *strSliceStore) Set(str string) int {
if len(s.ss) == 0 || len(s.ss[len(s.ss)-1]) == pageSize-1 {
if len(s.ss) == 0 || len(s.ss[len(s.ss)-1]) == defaultPageSize-1 {
s.ss = append(s.ss, make([]string, 0, s.pageLen))
}
pageIdx := len(s.ss) - 1
Expand Down

0 comments on commit 871de6e

Please sign in to comment.