Skip to content

Commit

Permalink
Merge pull request #25 from cloudwego/feat-strstore
Browse files Browse the repository at this point in the history
feat: StrStore stores string with lower GC overhead
  • Loading branch information
ppzqh authored Sep 16, 2024
2 parents c2f7170 + c4d6e92 commit d404439
Show file tree
Hide file tree
Showing 2 changed files with 177 additions and 0 deletions.
70 changes: 70 additions & 0 deletions container/strstore/strstore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright 2024 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package strstore

import (
"math"
"unsafe"
)

const (
strlenSize = 4 // size of uint32, maximum 4GB for each string
)

// StrStore is used to store string with less GC overhead.
// The string stored here should not be longer than `pageSize` and does not need to be deleted.
type StrStore struct {
buf []byte
}

// New constructs a StrStore with the input string slice and returns the StrStore and indexes for the following reads.
// It panics if any string in the slice is longer than math.MaxUint32.
func New(ss []string) (*StrStore, []int) {
n := len(ss)
totalLen := strlenSize * n
for i := 0; i < n; i++ {
if len(ss[i]) > math.MaxUint32 {
panic("string too long")
}
totalLen += len(ss[i])
}
idxes := make([]int, n)
buf := make([]byte, totalLen)
offset := 0
for i := 0; i < n; i++ {
idxes[i] = offset
*(*uint32)(unsafe.Pointer(&buf[offset])) = uint32(len(ss[i]))
copy(buf[offset+strlenSize:offset+strlenSize+len(ss[i])], ss[i])
offset += strlenSize + len(ss[i])
}
st := &StrStore{buf: buf}
return st, idxes
}

// Get gets the string with the idx.
// It returns empty string if the no string can be found with the input idx
func (s *StrStore) Get(idx int) string {
if idx < 0 || idx >= len(s.buf) {
return ""
}
length := *(*uint32)(unsafe.Pointer(&s.buf[idx]))
b := s.buf[idx+strlenSize : idx+strlenSize+int(length)]
return *(*string)(unsafe.Pointer(&b))
}

// Len returns the total length of bytes.
func (s *StrStore) Len() int {
return len(s.buf)
}
107 changes: 107 additions & 0 deletions container/strstore/strstore_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright 2024 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package strstore

import (
"math/rand"
_ "net/http/pprof"
"runtime"
"testing"

"github.com/stretchr/testify/assert"
)

func TestStrStore(t *testing.T) {
// test when the pages grow
ss := randStrings(50, 1000000)
strStore, idxes := New(ss)
totalLen := 0
for i := 0; i < len(ss); i++ {
assert.Equal(t, ss[i], strStore.Get(idxes[i]))
totalLen += len(ss[i])
}
assert.Equal(t, totalLen+strlenSize*len(ss), strStore.Len())
s := strStore.Get(-1)
assert.Equal(t, "", s)
s = strStore.Get(strStore.Len() * 2)
assert.Equal(t, "", s)
}

func BenchmarkStrStoreGetSet(b *testing.B) {
ss := randStrings(50, 1000000)
strStore, idxes := New(ss)
strSlice := make([]string, 0, len(ss))
for i := 0; i < len(ss); i++ {
strSlice = append(strSlice, ss[i])
}

b.Run("strbuf-get", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
strStore.Get(idxes[0])
}
})

b.Run("stdstrslice-get", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = strSlice[0]
}
})
}

func BenchmarkStrStoreGC(b *testing.B) {
ss := randStrings(50, 1000000)
strStore, idxes := New(ss)
_ = ss
runtime.GC()
b.ResetTimer()

for i := 0; i < b.N; i++ {
runtime.GC()
}
runtime.KeepAlive(strStore)
runtime.KeepAlive(idxes)
}

func BenchmarkStdStrSliceGC(b *testing.B) {
ss := randStrings(50, 1000000)
strSlice := make([]string, 0, len(ss))
for i := 0; i < len(ss); i++ {
strSlice = append(strSlice, ss[i])
}
_ = ss
runtime.GC()
b.ResetTimer()

for i := 0; i < b.N; i++ {
runtime.GC()
}
runtime.KeepAlive(strSlice)
}

func randStrings(m, n int) []string {
b := make([]byte, m*n)
rand.Read(b)
ret := make([]string, 0, n)
for i := 0; i < n; i++ {
s := b[m*i:]
s = s[:m]
ret = append(ret, string(s))
}
return ret
}

0 comments on commit d404439

Please sign in to comment.