Skip to content

Commit

Permalink
feat:(ast) Node support concurrently-read (#661)
Browse files Browse the repository at this point in the history
  • Loading branch information
AsterDY authored Jul 29, 2024
1 parent 8a2444e commit df7126e
Show file tree
Hide file tree
Showing 17 changed files with 675 additions and 278 deletions.
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,20 @@ sub := root.Get("key3").Index(2).Int64() // == 3

**Tip**: since `Index()` uses offset to locate data, which is much faster than scanning like `Get()`, we suggest you use it as much as possible. And sonic also provides another API `IndexOrGet()` to underlying use offset as well as ensure the key is matched.

#### SearchOption
`Searcher` provides some options for user to meet different needs:
```go
opts := ast.SearchOption{ CopyReturn: true ... }
val, err := sonic.GetWithOptions(JSON, opts, "key")
```
- CopyReturn
Indicate the searcher to copy the result JSON string instead of refer from the input. This can help to reduce memory usage if you cache the results
- ConcurentRead
Since `ast.Node` use `Lazy-Load` design, it doesn't support Concurrently-Read by default. If you want to read it concurrently, please specify it.
- ValidateJSON
Indicate the searcher to validate the entire JSON. This option is enabled by default, which slow down the search speed a little.


#### Set/Unset

Modify the json content by Set()/Unset()
Expand All @@ -300,7 +314,6 @@ println(alias1 == alias2) // true
exist, err := root.UnsetByIndex(1) // exist == true
println(root.Get("key4").Check()) // "value not exist"
```

#### Serialize

To encode `ast.Node` as json, use `MarshalJson()` or `json.Marshal()` (MUST pass the node's pointer)
Expand Down
15 changes: 14 additions & 1 deletion README_ZH_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ fmt.Printf("%+v", data) // {A:0 B:1}

### `Ast.Node`

Sonic/ast.Node 是完全独立的 JSON 抽象语法树库。它实现了序列化和反序列化,并提供了获取和修改通用数据的鲁棒的 API。
Sonic/ast.Node 是完全独立的 JSON 抽象语法树库。它实现了序列化和反序列化,并提供了获取和修改JSON数据的鲁棒的 API。

#### 查找/索引

Expand All @@ -282,6 +282,19 @@ sub := root.Get("key3").Index(2).Int64() // == 3

**注意**:由于 `Index()` 使用偏移量来定位数据,比使用扫描的 `Get()` 要快的多,建议尽可能的使用 `Index` 。 Sonic 也提供了另一个 API, `IndexOrGet()` ,以偏移量为基础并且也确保键的匹配。

#### 查找选项
`ast.Searcher`提供了一些选项,以满足用户的不同需求:
```
opts:= ast.SearchOption{CopyReturn: true…}
Val, err:= sonic。gettwithoptions (JSON, opts, "key")
```
- CopyReturn
指示搜索器复制结果JSON字符串,而不是从输入引用。如果用户缓存结果,这有助于减少内存使用
- ConcurentRead
因为`ast.Node`使用`Lazy-Load`设计,默认不支持并发读取。如果您想同时读取,请指定它。
- ValidateJSON
指示搜索器来验证整个JSON。默认情况下启用该选项, 但是对于查找速度有一定影响。

#### 修改

使用 `Set()` / `Unset()` 修改 json 的内容
Expand Down
8 changes: 8 additions & 0 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,14 @@ func Get(src []byte, path ...interface{}) (ast.Node, error) {
return GetCopyFromString(rt.Mem2Str(src), path...)
}

//GetWithOptions searches and locates the given path from src json,
// with specific options of ast.Searcher
func GetWithOptions(src []byte, opts ast.SearchOptions, path ...interface{}) (ast.Node, error) {
s := ast.NewSearcher(rt.Mem2Str(src))
s.SearchOptions = opts
return s.GetByPath(path...)
}

// GetFromString is same with Get except src is string.
//
// WARNING: The returned JSON is **Referenced** from the input.
Expand Down
89 changes: 75 additions & 14 deletions ast/buffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
package ast

import (
`sort`
`unsafe`
"sort"
"unsafe"

"github.com/bytedance/sonic/internal/caching"
)

type nodeChunk [_DEFAULT_NODE_CAP]Node
Expand Down Expand Up @@ -90,18 +92,11 @@ func (self *linkedNodes) Pop() {
self.size--
}

func (self *linkedPairs) Pop() {
if self == nil || self.size == 0 {
return
}
self.Set(self.size-1, Pair{})
self.size--
}

func (self *linkedNodes) Push(v Node) {
self.Set(self.size, v)
}


func (self *linkedNodes) Set(i int, v Node) {
if i < _DEFAULT_NODE_CAP {
self.head[i] = v
Expand Down Expand Up @@ -195,11 +190,22 @@ func (self *linkedNodes) FromSlice(con []Node) {
type pairChunk [_DEFAULT_NODE_CAP]Pair

type linkedPairs struct {
index map[uint64]int
head pairChunk
tail []*pairChunk
size int
}

func (self *linkedPairs) BuildIndex() {
if self.index == nil {
self.index = make(map[uint64]int, self.size)
}
for i:=0; i<self.size; i++ {
p := self.At(i)
self.index[p.hash] = i
}
}

func (self *linkedPairs) Cap() int {
if self == nil {
return 0
Expand Down Expand Up @@ -233,7 +239,31 @@ func (self *linkedPairs) Push(v Pair) {
self.Set(self.size, v)
}

func (self *linkedPairs) Pop() {
if self == nil || self.size == 0 {
return
}
self.Unset(self.size-1)
self.size--
}

func (self *linkedPairs) Unset(i int) {
if self.index != nil {
p := self.At(i)
delete(self.index, p.hash)
}
self.set(i, Pair{})
}

func (self *linkedPairs) Set(i int, v Pair) {
if self.index != nil {
h := v.hash
self.index[h] = i
}
self.set(i, v)
}

func (self *linkedPairs) set(i int, v Pair) {
if i < _DEFAULT_NODE_CAP {
self.head[i] = v
if self.size <= i {
Expand Down Expand Up @@ -276,6 +306,21 @@ func (self *linkedPairs) growTailLength(l int) {

// linear search
func (self *linkedPairs) Get(key string) (*Pair, int) {
if self.index != nil {
// fast-path
i, ok := self.index[caching.StrHash(key)]
if ok {
n := self.At(i)
if n.Key == key {
return n, i
}
// hash conflicts
goto linear_search
} else {
return nil, -1
}
}
linear_search:
for i:=0; i<self.size; i++ {
if n := self.At(i); n.Key == key {
return n, i
Expand Down Expand Up @@ -313,15 +358,27 @@ func (self *linkedPairs) ToMap(con map[string]Node) {
}
}

func (self *linkedPairs) copyPairs(to []Pair, from []Pair, l int) {
copy(to, from)
if self.index != nil {
for i:=0; i<l; i++ {
// NOTICE: in case of user not pass hash, just cal it
h := caching.StrHash(from[i].Key)
from[i].hash = h
self.index[h] = i
}
}
}

func (self *linkedPairs) FromSlice(con []Pair) {
self.size = len(con)
i := self.size-1
a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP
if a < 0 {
copy(self.head[:b+1], con)
self.copyPairs(self.head[:b+1], con, b+1)
return
} else {
copy(self.head[:], con)
self.copyPairs(self.head[:], con, len(self.head))
con = con[_DEFAULT_NODE_CAP:]
}

Expand All @@ -333,12 +390,12 @@ func (self *linkedPairs) FromSlice(con []Pair) {

for i:=0; i<a; i++ {
self.tail[i] = new(pairChunk)
copy(self.tail[i][:], con)
self.copyPairs(self.tail[i][:], con, len(self.tail[i]))
con = con[_DEFAULT_NODE_CAP:]
}

self.tail[a] = new(pairChunk)
copy(self.tail[a][:b+1], con)
self.copyPairs(self.tail[a][:b+1], con, b+1)
}

func (self *linkedPairs) Less(i, j int) bool {
Expand All @@ -347,6 +404,10 @@ func (self *linkedPairs) Less(i, j int) bool {

func (self *linkedPairs) Swap(i, j int) {
a, b := self.At(i), self.At(j)
if self.index != nil {
self.index[a.hash] = j
self.index[b.hash] = i
}
*a, *b = *b, *a
}

Expand Down
14 changes: 7 additions & 7 deletions ast/buffer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func makeNodes(l int) []Node {
func makePairs(l int) []Pair {
r := make([]Pair, l)
for i := 0; i < l; i++ {
r[i] = Pair{strconv.Itoa(i), NewBool(true)}
r[i] = NewPair(strconv.Itoa(i), NewBool(true))
}
return r
}
Expand All @@ -53,23 +53,23 @@ func Test_linkedPairs_Push(t *testing.T) {
name: "add empty",
args: args{
in: []Pair{},
v: Pair{"a", NewBool(true)},
exp: []Pair{Pair{"a", NewBool(true)}},
v: NewPair("a", NewBool(true)),
exp: []Pair{NewPair("a", NewBool(true))},
},
},
{
name: "add one",
args: args{
in: []Pair{{"a", NewBool(false)}},
v: Pair{"b", NewBool(true)},
exp: []Pair{{"a", NewBool(false)}, {"b", NewBool(true)}},
in: []Pair{NewPair("a", NewBool(false))},
v: NewPair("b", NewBool(true)),
exp: []Pair{NewPair("a", NewBool(false)), NewPair("b", NewBool(true))},
},
},
{
name: "add _DEFAULT_NODE_CAP",
args: args{
in: makePairs(_DEFAULT_NODE_CAP),
v: Pair{strconv.Itoa(_DEFAULT_NODE_CAP), NewBool(true)},
v: NewPair(strconv.Itoa(_DEFAULT_NODE_CAP), NewBool(true)),
exp: makePairs(_DEFAULT_NODE_CAP+1),
},
},
Expand Down
15 changes: 10 additions & 5 deletions ast/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,10 @@ func freeBuffer(buf *[]byte) {
}

func (self *Node) encode(buf *[]byte) error {
if self.IsRaw() {
if self.isRaw() {
return self.encodeRaw(buf)
}
switch self.Type() {
switch int(self.itype()) {
case V_NONE : return ErrNotExist
case V_ERROR : return self.Check()
case V_NULL : return self.encodeNull(buf)
Expand All @@ -145,9 +145,14 @@ func (self *Node) encode(buf *[]byte) error {
}

func (self *Node) encodeRaw(buf *[]byte) error {
raw, err := self.Raw()
if err != nil {
return err
lock := self.rlock()
if !self.isRaw() {
self.runlock()
return self.encode(buf)
}
raw := self.toString()
if lock {
self.runlock()
}
*buf = append(*buf, raw...)
return nil
Expand Down
8 changes: 4 additions & 4 deletions ast/encode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ func TestEncodeValue(t *testing.T) {
{NewArray([]Node{}), "[]", false},
{NewArray([]Node{NewString(""), NewNull()}), `["",null]`, false},
{NewArray([]Node{NewBool(true), NewString("true"), NewString("\t")}), `[true,"true","\t"]`, false},
{NewObject([]Pair{Pair{"a", NewNull()}, Pair{"b", NewNumber("0")}}), `{"a":null,"b":0}`, false},
{NewObject([]Pair{Pair{"\ta", NewString("\t")}, Pair{"\bb", NewString("\b")}, Pair{"\nb", NewString("\n")}, Pair{"\ra", NewString("\r")}}),`{"\ta":"\t","\u0008b":"\u0008","\nb":"\n","\ra":"\r"}`, false},
{NewObject([]Pair{NewPair("a", NewNull()), NewPair("b", NewNumber("0"))}), `{"a":null,"b":0}`, false},
{NewObject([]Pair{NewPair("\ta", NewString("\t")), NewPair("\bb", NewString("\b")), NewPair("\nb", NewString("\n")), NewPair("\ra", NewString("\r"))}),`{"\ta":"\t","\u0008b":"\u0008","\nb":"\n","\ra":"\r"}`, false},
{NewObject([]Pair{}), `{}`, false},
{NewObject([]Pair{Pair{Key: "", Value: NewNull()}}), `{"":null}`, false},
{NewBytes([]byte("hello, world")), `"aGVsbG8sIHdvcmxk"`, false},
Expand Down Expand Up @@ -141,7 +141,7 @@ func TestEncodeNode(t *testing.T) {
if string(ret) != data {
t.Fatal(string(ret))
}
root.loadAllKey()
root.Load()
ret, err = root.MarshalJSON()
if err != nil {
t.Fatal(err)
Expand Down Expand Up @@ -228,7 +228,7 @@ func BenchmarkEncodeLoad_Sonic(b *testing.B) {
if e != 0 {
b.Fatal(root)
}
root.loadAllKey()
root.Load()
_, err := root.MarshalJSON()
if err != nil {
b.Fatal(err)
Expand Down
4 changes: 4 additions & 0 deletions ast/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ func newError(err types.ParsingError, msg string) *Node {
}
}

func newErrorPair(err SyntaxError) *Pair {
return &Pair{0, "", *newSyntaxError(err)}
}

// Error returns error message if the node is invalid
func (self Node) Error() string {
if self.t != V_ERROR {
Expand Down
Loading

0 comments on commit df7126e

Please sign in to comment.