Skip to content

Commit

Permalink
feat: strstore
Browse files Browse the repository at this point in the history
  • Loading branch information
ppzqh committed Sep 16, 2024
1 parent 979a33b commit 3710c68
Show file tree
Hide file tree
Showing 2 changed files with 238 additions and 0 deletions.
85 changes: 85 additions & 0 deletions container/strstore/strstore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright 2024 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package strstore

import (
"encoding/binary"
"unsafe"
)

const (
pageSize = 1 << 20
strlenSize = 4
)

// StrStore is used to store string with less GC overhead.
// The string stored here should not be longer than `pageSize` and does not need to be deleted.
type StrStore struct {
pages [][]byte
offset int // offset of the latest page
}

// NewStrStore returns a StrStore.
func NewStrStore() *StrStore {
return &StrStore{
pages: [][]byte{make([]byte, pageSize)},
}
}

// Set sets a string into the store and return an index.
func (s *StrStore) Set(str string) int {
l := len(str)
if l > pageSize {
return -1
}
totalLen := l + strlenSize
if len(s.pages) == 0 || s.offset+totalLen > pageSize {
// grow pages
s.pages = append(s.pages, make([]byte, pageSize))
s.offset = 0
}
offset := s.offset
page := s.pages[len(s.pages)-1]
idx := (len(s.pages)-1)*pageSize + offset

// write length + string
binary.BigEndian.PutUint32(page[offset:offset+strlenSize], uint32(l))
copy(page[offset+strlenSize:offset+strlenSize+l], str)
s.offset += totalLen

return idx
}

// Get gets the string with the idx.
func (s *StrStore) Get(idx int) string {
if idx < 0 {
return ""
}
pageIdx := idx / pageSize
offset := idx % pageSize
if pageIdx > len(s.pages)-1 || offset > len(s.pages[pageIdx]) {
return ""
}

page := s.pages[pageIdx]
length := binary.BigEndian.Uint32(page[offset : offset+4])
b := page[offset+strlenSize : offset+strlenSize+int(length)]
return *(*string)(unsafe.Pointer(&b))
}

// Len returns the total length of bytes.
func (s *StrStore) Len() int {
return len(s.pages) * pageSize
}
153 changes: 153 additions & 0 deletions container/strstore/strstore_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// Copyright 2024 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package strstore

import (
"fmt"
"math/rand"
_ "net/http/pprof"
"runtime"
"strconv"
"testing"

"github.com/stretchr/testify/assert"
)

func TestStrStore(t *testing.T) {
// test when the pages grow
strNum := 1000000
strstore := NewStrStore()
for i := 0; i < strNum; i++ {
idx := strstore.Set(strconv.Itoa(i))
assert.Equal(t, strconv.Itoa(i), strstore.Get(idx), fmt.Sprintf("i=%d, idx=%d", i, idx))
}
s := strstore.Get(-1)
assert.Equal(t, "", s)
s = strstore.Get(strstore.Len() * 2)
assert.Equal(t, "", s)
}

func BenchmarkStrStoreGetSet(b *testing.B) {
mockStr := "0123456789"
avgStrlen := len(mockStr)
pageLen := pageSize / avgStrlen
strSlice := newStrSliceStore(pageLen)
strstore := &StrStore{}

b.Run("strbuf-set", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
strstore.Set(mockStr)
}
})
b.Run("strbuf-get", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
strstore.Get(0)
}
})

b.Run("stdstrslice-set", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
strSlice.Set(mockStr)
}
})
b.Run("stdstrslice-get", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
strSlice.Get(0)
}
})
}

func BenchmarkStrStoreGC(b *testing.B) {
ss := randStrings(50, 1000000)
strstore := NewStrStore()
for i := 0; i < len(ss); i++ {
strstore.Set(ss[i])
}
_ = ss
runtime.GC()
b.ResetTimer()

for i := 0; i < b.N; i++ {
runtime.GC()
}
runtime.KeepAlive(strstore)
}

func BenchmarkStdStrSliceGC(b *testing.B) {
ss := randStrings(50, 1000000)
strSlice := make([]string, 0, len(ss))
for i := 0; i < len(ss); i++ {
strSlice = append(strSlice, ss[i])
}
_ = ss
runtime.GC()
b.ResetTimer()

for i := 0; i < b.N; i++ {
runtime.GC()
}
runtime.KeepAlive(strSlice)
}

func newStrSliceStore(pageLen int) *strSliceStore {
return &strSliceStore{
ss: [][]string{},
pageLen: pageLen,
}
}

type strSliceStore struct {
ss [][]string
pageLen int
}

func (s *strSliceStore) Set(str string) int {
if len(s.ss) == 0 || len(s.ss[len(s.ss)-1]) == pageSize-1 {
s.ss = append(s.ss, make([]string, 0, s.pageLen))
}
pageIdx := len(s.ss) - 1
s.ss[pageIdx] = append(s.ss[pageIdx], str)
return (pageIdx-1)*s.pageLen + len(s.ss[pageIdx]) - 1
}

func (s *strSliceStore) Get(idx int) string {
pageIdx := idx / s.pageLen
offset := idx % s.pageLen
return s.ss[pageIdx][offset]
}

func (s *strSliceStore) Len() int {
return len(s.ss) * s.pageLen
}

func randStrings(m, n int) []string {
b := make([]byte, m*n)
rand.Read(b)
ret := make([]string, 0, n)
for i := 0; i < n; i++ {
s := b[m*i:]
s = s[:m]
ret = append(ret, string(s))
}
return ret
}

0 comments on commit 3710c68

Please sign in to comment.