diff --git a/README.md b/README.md index 14b776c67..defe2e4b2 100644 --- a/README.md +++ b/README.md @@ -282,6 +282,20 @@ sub := root.Get("key3").Index(2).Int64() // == 3 **Tip**: since `Index()` uses offset to locate data, which is much faster than scanning like `Get()`, we suggest you use it as much as possible. And sonic also provides another API `IndexOrGet()` to underlying use offset as well as ensure the key is matched. +#### SearchOption +`Searcher` provides some options for user to meet different needs: +```go +opts := ast.SearchOption{ CopyReturn: true ... } +val, err := sonic.GetWithOptions(JSON, opts, "key") +``` +- CopyReturn +Indicate the searcher to copy the result JSON string instead of refer from the input. This can help to reduce memory usage if you cache the results +- ConcurentRead +Since `ast.Node` use `Lazy-Load` design, it doesn't support Concurrently-Read by default. If you want to read it concurrently, please specify it. +- ValidateJSON +Indicate the searcher to validate the entire JSON. This option is enabled by default, which slow down the search speed a little. + + #### Set/Unset Modify the json content by Set()/Unset() @@ -300,7 +314,6 @@ println(alias1 == alias2) // true exist, err := root.UnsetByIndex(1) // exist == true println(root.Get("key4").Check()) // "value not exist" ``` - #### Serialize To encode `ast.Node` as json, use `MarshalJson()` or `json.Marshal()` (MUST pass the node's pointer) diff --git a/README_ZH_CN.md b/README_ZH_CN.md index 19da65b48..e4a3015ff 100644 --- a/README_ZH_CN.md +++ b/README_ZH_CN.md @@ -260,7 +260,7 @@ fmt.Printf("%+v", data) // {A:0 B:1} ### `Ast.Node` -Sonic/ast.Node 是完全独立的 JSON 抽象语法树库。它实现了序列化和反序列化,并提供了获取和修改通用数据的鲁棒的 API。 +Sonic/ast.Node 是完全独立的 JSON 抽象语法树库。它实现了序列化和反序列化,并提供了获取和修改JSON数据的鲁棒的 API。 #### 查找/索引 @@ -282,6 +282,19 @@ sub := root.Get("key3").Index(2).Int64() // == 3 **注意**:由于 `Index()` 使用偏移量来定位数据,比使用扫描的 `Get()` 要快的多,建议尽可能的使用 `Index` 。 Sonic 也提供了另一个 API, `IndexOrGet()` ,以偏移量为基础并且也确保键的匹配。 +#### 查找选项 +`ast.Searcher`提供了一些选项,以满足用户的不同需求: +``` +opts:= ast.SearchOption{CopyReturn: true…} +Val, err:= sonic。gettwithoptions (JSON, opts, "key") +``` +- CopyReturn +指示搜索器复制结果JSON字符串,而不是从输入引用。如果用户缓存结果,这有助于减少内存使用 +- ConcurentRead +因为`ast.Node`使用`Lazy-Load`设计,默认不支持并发读取。如果您想同时读取,请指定它。 +- ValidateJSON +指示搜索器来验证整个JSON。默认情况下启用该选项, 但是对于查找速度有一定影响。 + #### 修改 使用 `Set()` / `Unset()` 修改 json 的内容 diff --git a/api.go b/api.go index 0aae817c9..3a3d1d5fa 100644 --- a/api.go +++ b/api.go @@ -209,6 +209,14 @@ func Get(src []byte, path ...interface{}) (ast.Node, error) { return GetCopyFromString(rt.Mem2Str(src), path...) } +//GetWithOptions searches and locates the given path from src json, +// with specific options of ast.Searcher +func GetWithOptions(src []byte, opts ast.SearchOptions, path ...interface{}) (ast.Node, error) { + s := ast.NewSearcher(rt.Mem2Str(src)) + s.SearchOptions = opts + return s.GetByPath(path...) +} + // GetFromString is same with Get except src is string. // // WARNING: The returned JSON is **Referenced** from the input. diff --git a/ast/buffer.go b/ast/buffer.go index bccbf4814..04701ef5b 100644 --- a/ast/buffer.go +++ b/ast/buffer.go @@ -17,8 +17,10 @@ package ast import ( - `sort` - `unsafe` + "sort" + "unsafe" + + "github.com/bytedance/sonic/internal/caching" ) type nodeChunk [_DEFAULT_NODE_CAP]Node @@ -90,18 +92,11 @@ func (self *linkedNodes) Pop() { self.size-- } -func (self *linkedPairs) Pop() { - if self == nil || self.size == 0 { - return - } - self.Set(self.size-1, Pair{}) - self.size-- -} - func (self *linkedNodes) Push(v Node) { self.Set(self.size, v) } + func (self *linkedNodes) Set(i int, v Node) { if i < _DEFAULT_NODE_CAP { self.head[i] = v @@ -195,11 +190,22 @@ func (self *linkedNodes) FromSlice(con []Node) { type pairChunk [_DEFAULT_NODE_CAP]Pair type linkedPairs struct { + index map[uint64]int head pairChunk tail []*pairChunk size int } +func (self *linkedPairs) BuildIndex() { + if self.index == nil { + self.index = make(map[uint64]int, self.size) + } + for i:=0; i _Threshold_Index { + v.BuildIndex() + } return Node{ t: types.V_OBJECT, l: uint(v.Len()), @@ -1772,53 +1802,42 @@ func newObject(v *linkedPairs) Node { } func (self *Node) setObject(v *linkedPairs) { + if v.size > _Threshold_Index { + v.BuildIndex() + } self.t = types.V_OBJECT self.l = uint(v.Len()) self.p = unsafe.Pointer(v) } -func newRawNode(str string, typ types.ValueType) Node { - return Node{ - t: _V_RAW | typ, - p: rt.StrPtr(str), - l: uint(len(str)), - } -} - func (self *Node) parseRaw(full bool) { + lock := self.lock() + defer self.unlock() + if !self.isRaw() { + return + } raw := self.toString() parser := NewParserObj(raw) + var e types.ParsingError if full { parser.noLazy = true - parser.skipValue = false + *self, e = parser.Parse() + } else if lock { + var n Node + parser.noLazy = true + parser.loadOnce = true + n, e = parser.Parse() + self.assign(n) + } else { + *self, e = parser.Parse() } - var e types.ParsingError - *self, e = parser.Parse() if e != 0 { *self = *newSyntaxError(parser.syntaxError(e)) } } -var typeJumpTable = [256]types.ValueType{ - '"' : types.V_STRING, - '-' : _V_NUMBER, - '0' : _V_NUMBER, - '1' : _V_NUMBER, - '2' : _V_NUMBER, - '3' : _V_NUMBER, - '4' : _V_NUMBER, - '5' : _V_NUMBER, - '6' : _V_NUMBER, - '7' : _V_NUMBER, - '8' : _V_NUMBER, - '9' : _V_NUMBER, - '[' : types.V_ARRAY, - 'f' : types.V_FALSE, - 'n' : types.V_NULL, - 't' : types.V_TRUE, - '{' : types.V_OBJECT, -} - -func switchRawType(c byte) types.ValueType { - return typeJumpTable[c] +func (self *Node) assign(n Node) { + self.l = n.l + self.p = n.p + atomic.StoreInt64(&self.t, n.t) } diff --git a/ast/node_test.go b/ast/node_test.go index ae40c18c4..dc1027597 100644 --- a/ast/node_test.go +++ b/ast/node_test.go @@ -17,18 +17,19 @@ package ast import ( - `bytes` - `encoding/json` - `errors` - `fmt` - `reflect` - `strconv` - `testing` - - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` - `github.com/stretchr/testify/assert` - `github.com/stretchr/testify/require` + "bytes" + "encoding/json" + "errors" + "fmt" + "reflect" + "strconv" + "strings" + "testing" + + "github.com/bytedance/sonic/internal/native/types" + "github.com/bytedance/sonic/internal/rt" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestNodeSortKeys(t *testing.T) { @@ -151,20 +152,21 @@ func TestLoadAll(t *testing.T) { if err = root.Load(); err != nil { t.Fatal(err) } - if root.len() != 3 { - t.Fatal(root.len()) + + if l, _ := root.Len(); l != 3 { + t.Fatal(root.Len()) } c := root.Get("c") - if !c.IsRaw() { + if !c.isRaw() { t.Fatal(err) } err = c.LoadAll() if err != nil { t.Fatal(err) } - if c.len() != 2 { - t.Fatal(c.len()) + if l, _ := c.Len(); l != 2 { + t.Fatal(c.Len()) } c1 := c.nodeAt(0) if n, err := c1.Int64(); err != nil || n != 1 { @@ -174,47 +176,47 @@ func TestLoadAll(t *testing.T) { a := root.pairAt(0) if a.Key != "a" { t.Fatal(a.Key) - } else if !a.Value.IsRaw() { + } else if !a.Value.isRaw() { t.Fatal(a.Value.itype()) - } else if n, err := a.Value.Len(); n != 0 || err != nil { + } else if n, err := a.Value.Len(); n != 2 || err != nil { t.Fatal(n, err) } if err := a.Value.Load(); err != nil { t.Fatal(err) } - if a.Value.len() != 2 { - t.Fatal(a.Value.len()) + if l, _ := a.Value.Len(); l != 2 { + t.Fatal(a.Value.Len()) } a1 := a.Value.Get("1") - if !a1.IsRaw() { + if !a1.isRaw() { t.Fatal(a1) } a.Value.LoadAll() - if a1.t != types.V_ARRAY || a1.len() != 1 { - t.Fatal(a1.t, a1.len()) + if l, _ := a1.Len(); a1.t != types.V_ARRAY || l != 1 { + t.Fatal(a1.t) } b := root.pairAt(1) if b.Key != "b" { t.Fatal(b.Key) - } else if !b.Value.IsRaw() { + } else if !b.Value.isRaw() { t.Fatal(b.Value.itype()) - } else if n, err := b.Value.Len(); n != 0 || err != nil { + } else if n, err := b.Value.Len(); n != 2 || err != nil { t.Fatal(n, err) } if err := b.Value.Load(); err != nil { t.Fatal(err) } - if b.Value.len() != 2 { - t.Fatal(b.Value.len()) + if l, _ := b.Value.Len(); l != 2 { + t.Fatal(b.Value.Len()) } b1 := b.Value.Index(0) - if !b1.IsRaw() { + if !b1.isRaw() { t.Fatal(b1) } b.Value.LoadAll() - if b1.t != types.V_OBJECT || b1.len() != 1 { - t.Fatal(a1.t, a1.len()) + if l, _ := b1.Len(); b1.t != types.V_OBJECT || l != 1 { + t.Fatal(a1.Len()) } } @@ -270,7 +272,7 @@ func TestTypeCast(t *testing.T) { } var nonEmptyErr error = errors.New("") a1 := NewAny(1) - lazyArray, _ := NewParser("[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]").Parse() + lazyArray, _ := NewParser("["+strings.Repeat("1,", _DEFAULT_NODE_CAP)+"1]").Parse() lazyObject, _ := NewParser(`{"0":0,"1":1,"2":2,"3":3,"4":4,"5":5,"6":6,"7":7,"8":8,"9":9,"10":10,"11":11,"12":12,"13":13,"14":14,"15":15,"16":16}`).Parse() var cases = []tcase{ {"Interface", Node{}, interface{}(nil), ErrUnsupportType}, @@ -286,14 +288,14 @@ func TestTypeCast(t *testing.T) { {"Map", Node{}, map[string]interface{}(nil), ErrUnsupportType}, {"Map", NewAny(map[string]Node{"a": NewNumber("1")}), map[string]interface{}(nil), ErrUnsupportType}, {"Map", NewAny(map[string]interface{}{"a": 1}), map[string]interface{}{"a": 1}, nil}, - {"Map", NewObject([]Pair{{"a", NewNumber("1")}}), map[string]interface{}{"a": float64(1.0)}, nil}, + {"Map", NewObject([]Pair{NewPair("a", NewNumber("1"))}), map[string]interface{}{"a": float64(1.0)}, nil}, {"MapUseNode", Node{}, map[string]Node(nil), ErrUnsupportType}, {"MapUseNode", NewAny(map[string]interface{}{"a": 1}), map[string]Node(nil), ErrUnsupportType}, {"MapUseNode", NewAny(map[string]Node{"a": NewNumber("1")}), map[string]Node{"a": NewNumber("1")}, nil}, - {"MapUseNode", NewObject([]Pair{{"a", NewNumber("1")}}), map[string]Node{"a": NewNumber("1")}, nil}, + {"MapUseNode", NewObject([]Pair{NewPair("a", NewNumber("1"))}), map[string]Node{"a": NewNumber("1")}, nil}, {"MapUseNumber", Node{}, map[string]interface{}(nil), ErrUnsupportType}, {"MapUseNumber", NewAny(map[string]interface{}{"a": 1}), map[string]interface{}{"a": 1}, nil}, - {"MapUseNumber", NewObject([]Pair{{"a", NewNumber("1")}}), map[string]interface{}{"a": json.Number("1")}, nil}, + {"MapUseNumber", NewObject([]Pair{NewPair("a", NewNumber("1"))}), map[string]interface{}{"a": json.Number("1")}, nil}, {"Array", Node{}, []interface{}(nil), ErrUnsupportType}, {"Array", NewAny([]interface{}{1}), []interface{}{1}, nil}, {"Array", NewArray([]Node{NewNumber("1")}), []interface{}{float64(1.0)}, nil}, @@ -505,16 +507,16 @@ func TestTypeCast(t *testing.T) { {"Cap", NewAny(0), 0, ErrUnsupportType}, {"Cap", NewNull(), 0, nil}, {"Cap", NewRaw(`[1]`), _DEFAULT_NODE_CAP, nil}, - {"Cap", NewObject([]Pair{{"", NewNull()}}), _DEFAULT_NODE_CAP, nil}, + {"Cap", NewObject([]Pair{NewPair("", NewNull())}), _DEFAULT_NODE_CAP, nil}, {"Cap", NewRaw(`{"a":1}`), _DEFAULT_NODE_CAP, nil}, } lazyArray.skipAllIndex() lazyObject.skipAllKey() cases = append(cases, - tcase{"Len", lazyArray, 17, nil}, + tcase{"Len", lazyArray, _DEFAULT_NODE_CAP+1, nil}, tcase{"Len", lazyObject, 17, nil}, - tcase{"Cap", lazyArray, _DEFAULT_NODE_CAP * 3, nil}, - tcase{"Cap", lazyObject, _DEFAULT_NODE_CAP * 3, nil}, + tcase{"Cap", lazyArray, _DEFAULT_NODE_CAP*2, nil}, + tcase{"Cap", lazyObject, _DEFAULT_NODE_CAP*2, nil}, ) for i, c := range cases { @@ -699,12 +701,12 @@ func TestCheckError_Empty(t *testing.T) { t.Fatal() } - n := newRawNode("[hello]", types.V_ARRAY) + n := newRawNode("[hello]", types.V_ARRAY, false) n.parseRaw(false) if n.Check() != nil { t.Fatal(n.Check()) } - n = newRawNode("[hello]", types.V_ARRAY) + n = newRawNode("[hello]", types.V_ARRAY, false) n.parseRaw(true) p := NewParser("[hello]") p.noLazy = true @@ -735,7 +737,7 @@ func TestCheckError_Empty(t *testing.T) { if e != nil { t.Fatal(e) } - exist, e := a.Set("d", newRawNode("x", types.V_OBJECT)) + exist, e := a.Set("d", newRawNode("x", types.V_OBJECT, false)) if exist || e != nil { t.Fatal(err) } @@ -746,7 +748,7 @@ func TestCheckError_Empty(t *testing.T) { if d.Check() == nil { t.Fatal(d) } - exist, e = a.Set("e", newRawNode("[}", types.V_ARRAY)) + exist, e = a.Set("e", newRawNode("[}", types.V_ARRAY, false)) if e != nil { t.Fatal(e) } @@ -839,7 +841,7 @@ func TestUnset(t *testing.T) { *entities = NewRaw(string(out)) hashtags := entities.Get("hashtags").Index(0) - hashtags.Set("text2", newRawNode(`{}`, types.V_OBJECT)) + hashtags.Set("text2", NewRaw(`{}`)) exist, err = hashtags.Unset("indices") // NOTICE: Unset() won't change node.Len() here if !exist || err != nil || hashtags.len() != 2 { t.Fatal(hashtags.len()) @@ -1761,7 +1763,7 @@ func BenchmarkSliceUnsetByIndex(b *testing.B) { } func BenchmarkNodeAdd(b *testing.B) { - n := NewObject([]Pair{{"test", NewNumber("1")}}) + n := NewObject([]Pair{NewPair("test", NewNumber("1"))}) b.ResetTimer() for i := 0; i < b.N; i++ { node := NewArray([]Node{}) @@ -1770,7 +1772,7 @@ func BenchmarkNodeAdd(b *testing.B) { } func BenchmarkSliceAdd(b *testing.B) { - n := NewObject([]Pair{{"test", NewNumber("1")}}) + n := NewObject([]Pair{NewPair("test", NewNumber("1"))}) b.ResetTimer() for i := 0; i < b.N; i++ { node := []Node{} @@ -1779,7 +1781,7 @@ func BenchmarkSliceAdd(b *testing.B) { } func BenchmarkMapAdd(b *testing.B) { - n := NewObject([]Pair{{"test", NewNumber("1")}}) + n := NewObject([]Pair{NewPair("test", NewNumber("1"))}) b.ResetTimer() for i := 0; i < b.N; i++ { node := map[string]Node{} diff --git a/ast/parser.go b/ast/parser.go index 506f9d86c..30bd1f451 100644 --- a/ast/parser.go +++ b/ast/parser.go @@ -17,14 +17,16 @@ package ast import ( - `fmt` + "fmt" + "sync" + "sync/atomic" - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` + "github.com/bytedance/sonic/internal/native/types" + "github.com/bytedance/sonic/internal/rt" ) const ( - _DEFAULT_NODE_CAP int = 8 + _DEFAULT_NODE_CAP int = 16 _APPEND_GROW_SHIFT = 1 ) @@ -45,6 +47,7 @@ type Parser struct { p int s string noLazy bool + loadOnce bool skipValue bool dbuf *byte } @@ -152,7 +155,7 @@ func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) { if t == _V_NONE { return Node{}, types.ERR_INVALID_CHAR } - val = newRawNode(self.s[start:self.p], t) + val = newRawNode(self.s[start:self.p], t, false) }else{ /* decode the value */ if val, err = self.Parse(); err != 0 { @@ -238,7 +241,7 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) { if t == _V_NONE { return Node{}, types.ERR_INVALID_CHAR } - val = newRawNode(self.s[start:self.p], t) + val = newRawNode(self.s[start:self.p], t, false) } else { /* decode the value */ if val, err = self.Parse(); err != 0 { @@ -248,7 +251,7 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) { /* add the value to result */ // FIXME: ret's address may change here, thus previous referred node in ret may be invalid !! - ret.Push(Pair{Key: key, Value: val}) + ret.Push(NewPair(key, val)) self.p = self.lspace(self.p) /* check for EOF */ @@ -295,6 +298,10 @@ func (self *Parser) Pos() int { return self.p } + +// Parse returns a ast.Node representing the parser's JSON. +// NOTICE: the specific parsing lazy dependens parser's option +// It only parse first layer and first child for Object or Array be default func (self *Parser) Parse() (Node, types.ParsingError) { switch val := self.decodeValue(); val.Vt { case types.V_EOF : return Node{}, types.ERR_EOF @@ -303,22 +310,48 @@ func (self *Parser) Parse() (Node, types.ParsingError) { case types.V_FALSE : return falseNode, 0 case types.V_STRING : return self.decodeString(val.Iv, val.Ep) case types.V_ARRAY: + s := self.p - 1; if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' { self.p = p + 1 return Node{t: types.V_ARRAY}, 0 } if self.noLazy { + if self.loadOnce { + self.noLazy = false + } return self.decodeArray(new(linkedNodes)) } + // NOTICE: loadOnce always keep raw json for object or array + if self.loadOnce { + self.p = s + s, e := self.skipFast() + if e != 0 { + return Node{}, e + } + return newRawNode(self.s[s:self.p], types.V_ARRAY, true), 0 + } return newLazyArray(self), 0 case types.V_OBJECT: + s := self.p - 1; if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' { self.p = p + 1 return Node{t: types.V_OBJECT}, 0 } + // NOTICE: loadOnce always keep raw json for object or array if self.noLazy { + if self.loadOnce { + self.noLazy = false + } return self.decodeObject(new(linkedPairs)) } + if self.loadOnce { + self.p = s + s, e := self.skipFast() + if e != 0 { + return Node{}, e + } + return newRawNode(self.s[s:self.p], types.V_OBJECT, true), 0 + } return newLazyObject(self), 0 case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0 case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0 @@ -475,7 +508,7 @@ func (self *Node) skipNextNode() *Node { if t == _V_NONE { return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) } - val = newRawNode(parser.s[start:parser.p], t) + val = newRawNode(parser.s[start:parser.p], t, false) } /* add the value to result */ @@ -514,7 +547,7 @@ func (self *Node) skipNextPair() (*Pair) { /* check for EOF */ if parser.p = parser.lspace(sp); parser.p >= ns { - return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))} + return newErrorPair(parser.syntaxError(types.ERR_EOF)) } /* check for empty object */ @@ -531,7 +564,7 @@ func (self *Node) skipNextPair() (*Pair) { /* decode the key */ if njs = parser.decodeValue(); njs.Vt != types.V_STRING { - return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} + return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR)) } /* extract the key */ @@ -541,34 +574,34 @@ func (self *Node) skipNextPair() (*Pair) { /* check for escape sequence */ if njs.Ep != -1 { if key, err = unquote(key); err != 0 { - return &Pair{key, *newSyntaxError(parser.syntaxError(err))} + return newErrorPair(parser.syntaxError(err)) } } /* expect a ':' delimiter */ if err = parser.delim(); err != 0 { - return &Pair{key, *newSyntaxError(parser.syntaxError(err))} + return newErrorPair(parser.syntaxError(err)) } /* skip the value */ if start, err := parser.skipFast(); err != 0 { - return &Pair{key, *newSyntaxError(parser.syntaxError(err))} + return newErrorPair(parser.syntaxError(err)) } else { t := switchRawType(parser.s[start]) if t == _V_NONE { - return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} + return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR)) } - val = newRawNode(parser.s[start:parser.p], t) + val = newRawNode(parser.s[start:parser.p], t, false) } /* add the value to result */ - ret.Push(Pair{Key: key, Value: val}) + ret.Push(NewPair(key, val)) self.l++ parser.p = parser.lspace(parser.p) /* check for EOF */ if parser.p >= ns { - return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))} + return newErrorPair(parser.syntaxError(types.ERR_EOF)) } /* check for the next character */ @@ -581,7 +614,7 @@ func (self *Node) skipNextPair() (*Pair) { self.setObject(ret) return ret.At(ret.Len()-1) default: - return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} + return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR)) } } @@ -662,3 +695,72 @@ func backward(src string, i int) int { for ; i>=0 && isSpace(src[i]); i-- {} return i } + + +func newRawNode(str string, typ types.ValueType, lock bool) Node { + ret := Node{ + t: typ | _V_RAW, + p: rt.StrPtr(str), + l: uint(len(str)), + } + if lock { + ret.m = new(sync.RWMutex) + } + return ret +} + +var typeJumpTable = [256]types.ValueType{ + '"' : types.V_STRING, + '-' : _V_NUMBER, + '0' : _V_NUMBER, + '1' : _V_NUMBER, + '2' : _V_NUMBER, + '3' : _V_NUMBER, + '4' : _V_NUMBER, + '5' : _V_NUMBER, + '6' : _V_NUMBER, + '7' : _V_NUMBER, + '8' : _V_NUMBER, + '9' : _V_NUMBER, + '[' : types.V_ARRAY, + 'f' : types.V_FALSE, + 'n' : types.V_NULL, + 't' : types.V_TRUE, + '{' : types.V_OBJECT, +} + +func switchRawType(c byte) types.ValueType { + return typeJumpTable[c] +} + +func (self *Node) loadt() types.ValueType { + return (types.ValueType)(atomic.LoadInt64(&self.t)) +} + +func (self *Node) lock() bool { + if m := self.m; m != nil { + m.Lock() + return true + } + return false +} + +func (self *Node) unlock() { + if m := self.m; m != nil { + m.Unlock() + } +} + +func (self *Node) rlock() bool { + if m := self.m; m != nil { + m.RLock() + return true + } + return false +} + +func (self *Node) runlock() { + if m := self.m; m != nil { + m.RUnlock() + } +} diff --git a/ast/parser_test.go b/ast/parser_test.go index 2469bc411..594f3437d 100644 --- a/ast/parser_test.go +++ b/ast/parser_test.go @@ -17,16 +17,16 @@ package ast import ( - `encoding/json` - `os` - `runtime` - `runtime/debug` - `sync` - `testing` - `time` + "encoding/json" + "os" + "runtime" + "runtime/debug" + "sync" + "testing" + "time" - `github.com/stretchr/testify/assert` - `github.com/stretchr/testify/require` + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) var ( @@ -316,6 +316,46 @@ func BenchmarkParseOne_Parallel_Sonic(b *testing.B) { }) } +func BenchmarkParseNoLazy_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + ast := NewRawConcurrentRead(_TwitterJson) + for i := 0; i < b.N; i++ { + node := ast.GetByPath("statuses", 3) + if node.Check() != nil { + b.Fail() + } + } +} + +func BenchmarkParseNoLazy_Parallel_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + ast := NewRawConcurrentRead(_TwitterJson) + b.RunParallel(func(p *testing.PB) { + for p.Next() { + node := ast.GetByPath("statuses", 3) + if node.Check() != nil { + b.Fail() + } + } + }) +} + +func BenchmarkNodeRaw_Parallel_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + ast := NewRawConcurrentRead(_TwitterJson) + b.RunParallel(func(p *testing.PB) { + for p.Next() { + node := ast.GetByPath("statuses", 3) + if _, e := node.Raw(); e != nil { + b.Fatal(e) + } + } + }) +} + func BenchmarkParseSeven_Sonic(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() diff --git a/ast/search.go b/ast/search.go index a8d1e76f6..9a5fb9420 100644 --- a/ast/search.go +++ b/ast/search.go @@ -21,8 +21,23 @@ import ( `github.com/bytedance/sonic/internal/native/types` ) +// SearchOptions controls Searcher's behavior +type SearchOptions struct { + // ValidateJSON indicates the searcher to validate the entire JSON + ValidateJSON bool + + // CopyReturn indicates the searcher to copy the result JSON instead of refer from the input + // This can help to reduce memory usage if you cache the results + CopyReturn bool + + // ConcurrentRead indicates the searcher to return a concurrently-READ-safe node, + // including: GetByPath/Get/Index/GetOrIndex/Int64/Bool/Float64/String/Number/Interface/Array/Map/Raw/MarshalJSON + ConcurrentRead bool +} + type Searcher struct { parser Parser + SearchOptions } func NewSearcher(str string) *Searcher { @@ -31,12 +46,16 @@ func NewSearcher(str string) *Searcher { s: str, noLazy: false, }, + SearchOptions: SearchOptions{ + ValidateJSON: true, + }, } } // GetByPathCopy search in depth from top json and returns a **Copied** json node at the path location func (self *Searcher) GetByPathCopy(path ...interface{}) (Node, error) { - return self.getByPath(true, true, path...) + self.CopyReturn = true + return self.getByPath(path...) } // GetByPathNoCopy search in depth from top json and returns a **Referenced** json node at the path location @@ -44,15 +63,15 @@ func (self *Searcher) GetByPathCopy(path ...interface{}) (Node, error) { // WARN: this search directly refer partial json from top json, which has faster speed, // may consumes more memory. func (self *Searcher) GetByPath(path ...interface{}) (Node, error) { - return self.getByPath(false, true, path...) + return self.getByPath(path...) } -func (self *Searcher) getByPath(copystring bool, validate bool, path ...interface{}) (Node, error) { +func (self *Searcher) getByPath(path ...interface{}) (Node, error) { var err types.ParsingError var start int self.parser.p = 0 - start, err = self.parser.getByPath(validate, path...) + start, err = self.parser.getByPath(self.ValidateJSON, path...) if err != 0 { // for compatibility with old version if err == types.ERR_NOT_FOUND { @@ -71,12 +90,12 @@ func (self *Searcher) getByPath(copystring bool, validate bool, path ...interfac // copy string to reducing memory usage var raw string - if copystring { + if self.CopyReturn { raw = rt.Mem2Str([]byte(self.parser.s[start:self.parser.p])) } else { raw = self.parser.s[start:self.parser.p] } - return newRawNode(raw, t), nil + return newRawNode(raw, t, self.ConcurrentRead), nil } // GetByPath searches a path and returns relaction and types of target diff --git a/ast/search_test.go b/ast/search_test.go index 6cbacc500..af2160fbc 100644 --- a/ast/search_test.go +++ b/ast/search_test.go @@ -25,6 +25,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestGC_Search(t *testing.T) { @@ -54,6 +55,73 @@ func TestGC_Search(t *testing.T) { wg.Wait() } + +func TestNodeRace(t *testing.T) { + + src := `{"1":1,"2": [ 1 , 1 , { "3" : 1 , "4" : [] } ] }` + s := NewSearcher(src) + s.ConcurrentRead = true + node, _ := s.GetByPath() + + cases := []struct{ + path []interface{} + exp []string + scalar bool + lv int + }{ + {[]interface{}{"1"}, []string{`1`}, true, 0}, + {[]interface{}{"2"}, []string{`[ 1 , 1 , { "3" : 1 , "4" : [] } ]`, `[1,1,{ "3" : 1 , "4" : [] }]`, `[1,1,{"3":1,"4":[]}]`}, false, 3}, + {[]interface{}{"2", 1}, []string{`1`}, true, 1}, + {[]interface{}{"2", 2}, []string{`{ "3" : 1 , "4" : [] }`, `{"3":1,"4":[]}`}, false, 2}, + {[]interface{}{"2", 2, "3"}, []string{`1`}, true, 0}, + {[]interface{}{"2", 2, "4"}, []string{`[]`}, false, 0}, + } + + wg := sync.WaitGroup{} + start := sync.RWMutex{} + start.Lock() + + P := 100 + for i := range cases { + // println(i) + c := cases[i] + for j := 0; j < P; j++ { + wg.Add(1) + go func () { + defer wg.Done() + start.RLock() + n := node.GetByPath(c.path...) + _ = n.TypeSafe() + _ = n.isAny() + v, err := n.Raw() + iv, _ := n.Int64() + lv, _ := n.Len() + _, e := n.Interface() + e2 := n.SortKeys(false) + require.NoError(t, err) + require.NoError(t, e) + require.NoError(t, e2) + if c.scalar { + require.Equal(t, int64(1), iv) + } else { + require.Equal(t, c.lv, lv) + } + eq := false + for _, exp := range c.exp { + if exp == v { + eq = true + break + } + } + require.True(t, eq) + }() + } + } + + start.Unlock() + wg.Wait() +} + func TestExportErrorInvalidChar(t *testing.T) { data := `{"a":]` p := NewSearcher(data) @@ -325,6 +393,22 @@ func BenchmarkGetOne_Sonic(b *testing.B) { } } +func BenchmarkGetOneSafe_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + ast := NewSearcher(_TwitterJson) + ast.ConcurrentRead = true + for i := 0; i < b.N; i++ { + node, err := ast.GetByPath("statuses", 3, "id") + if err != nil { + b.Fatal(err) + } + x, _ := node.Int64() + if x != 249279667666817024 { + b.Fatal(node.Interface()) + } + } +} + func BenchmarkGetFull_Sonic(b *testing.B) { ast := NewSearcher(_TwitterJson) b.SetBytes(int64(len(_TwitterJson))) @@ -370,6 +454,24 @@ func BenchmarkGetOne_Parallel_Sonic(b *testing.B) { }) } +func BenchmarkGetOneSafe_Parallel_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + b.RunParallel(func(pb *testing.PB) { + ast := NewSearcher(_TwitterJson) + ast.ConcurrentRead = true + for pb.Next() { + node, err := ast.GetByPath("statuses", 3, "id") + if err != nil { + b.Fatal(err) + } + x, _ := node.Int64() + if x != 249279667666817024 { + b.Fatal(node.Interface()) + } + } + }) +} + func BenchmarkSetOne_Sonic(b *testing.B) { node, err := NewSearcher(_TwitterJson).GetByPath("statuses", 3) if err != nil { diff --git a/ast/visitor_test.go b/ast/visitor_test.go index 2618c5460..221996dfa 100644 --- a/ast/visitor_test.go +++ b/ast/visitor_test.go @@ -225,9 +225,7 @@ func (self *visitorNodeDiffTest) OnObjectEnd() error { require.NotNil(self.t, object) node := self.stk[self.sp-1].Node - ps, err := node.unsafeMap() - var pairs = make([]Pair, ps.Len()) - ps.ToSlice(pairs) + pairs, err := node.MapUseNode() require.NoError(self.t, err) keysGot := make([]string, 0, len(object)) @@ -235,16 +233,16 @@ func (self *visitorNodeDiffTest) OnObjectEnd() error { keysGot = append(keysGot, key) } keysWant := make([]string, 0, len(pairs)) - for _, pair := range pairs { - keysWant = append(keysWant, pair.Key) + for key := range pairs { + keysWant = append(keysWant, key) } sort.Strings(keysGot) sort.Strings(keysWant) require.EqualValues(self.t, keysWant, keysGot) - for _, pair := range pairs { - typeGot := object[pair.Key].Type() - typeWant := pair.Value.Type() + for key, pair := range pairs { + typeGot := object[key].Type() + typeWant := pair.Type() require.EqualValues(self.t, typeWant, typeGot) } @@ -278,10 +276,8 @@ func (self *visitorNodeDiffTest) OnArrayEnd() error { require.NotNil(self.t, array) node := self.stk[self.sp-1].Node - vs, err := node.unsafeArray() + values, err := node.ArrayUseNode() require.NoError(self.t, err) - var values = make([]Node, vs.Len()) - vs.ToSlice(values) require.EqualValues(self.t, len(values), len(array)) @@ -470,13 +466,13 @@ func (self *visitorUserNodeASTDecoder) decodeValue(root *Node) (visitorUserNode, value, ierr, ferr) case V_ARRAY: - nodes, err := root.unsafeArray() + nodes, err := root.ArrayUseNode() if err != nil { return nil, err } - values := make([]visitorUserNode, nodes.Len()) - for i := 0; i