Skip to content

Commit

Permalink
Merge branch 'main' into feat/plugin_mu
Browse files Browse the repository at this point in the history
  • Loading branch information
AsterDY authored Jan 14, 2025
2 parents 8b7c7b6 + 59be92f commit 0484d05
Show file tree
Hide file tree
Showing 102 changed files with 7,458 additions and 968 deletions.
5 changes: 5 additions & 0 deletions .codespellrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[codespell]
# ignore test files, go project names, binary files via `skip` and special var/regex via `ignore-words`
skip = fuzz,*_test.tmpl,testdata,*_test.go,go.mod,go.sum,*.gz
ignore-words = .github/workflows/.ignore_words
check-filenames = true
6 changes: 6 additions & 0 deletions .github/workflows/.ignore_words
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
socio-economic
nd
regArgs
oders
ure
alse
15 changes: 7 additions & 8 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
build:
strategy:
matrix:
os: [X64, arm]
os: [X64, macos-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Clear repository
Expand All @@ -30,9 +30,9 @@ jobs:
continue-on-error: true
run: |
export SONIC_NO_ASYNC_GC=1
go test -run ^$ -count=100 -benchmem -bench 'BenchmarkDecoder_(Generic|Binding)_Sonic' ./decoder >> /var/tmp/sonic_bench_target_${{ github.run_id }}.out
go test -run ^$ -count=100 -benchmem -bench 'BenchmarkEncoder_(Generic|Binding)_Sonic' ./encoder >> /var/tmp/sonic_bench_target_${{ github.run_id }}.out
go test -run ^$ -count=100 -benchmem -bench 'Benchmark(Get|Set)One_Sonic|BenchmarkParseSeven_Sonic' ./ast >> /var/tmp/sonic_bench_target_${{ github.run_id }}.out
go test -run ^$ -count=20 -benchmem -bench 'BenchmarkDecoder_(Generic|Binding)_Sonic' ./decoder >> /var/tmp/sonic_bench_target_${{ github.run_id }}.out
go test -run ^$ -count=20 -benchmem -bench 'BenchmarkEncoder_(Generic|Binding)_Sonic' ./encoder >> /var/tmp/sonic_bench_target_${{ github.run_id }}.out
go test -run ^$ -count=20 -benchmem -bench 'Benchmark(Get|Set)One_Sonic|BenchmarkParseSeven_Sonic' ./ast >> /var/tmp/sonic_bench_target_${{ github.run_id }}.out
- name: Clear repository
run: sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
Expand All @@ -46,10 +46,9 @@ jobs:
continue-on-error: true
run: |
export SONIC_NO_ASYNC_GC=1
UNIQUE_ID=${{ github.run_id }}
go test -run ^$ -count=100 -benchmem -bench 'BenchmarkDecoder_(Generic|Binding)_Sonic' ./decoder >> /var/tmp/sonic_bench_main_${{ github.run_id }}.out
go test -run ^$ -count=100 -benchmem -bench 'BenchmarkEncoder_(Generic|Binding)_Sonic' ./encoder >> /var/tmp/sonic_bench_main_${{ github.run_id }}.out
go test -run ^$ -count=100 -benchmem -bench 'Benchmark(Get|Set)One_Sonic|BenchmarkParseSeven_Sonic' ./ast >> /var/tmp/sonic_bench_main_${{ github.run_id }}.out
go test -run ^$ -count=20 -benchmem -bench 'BenchmarkDecoder_(Generic|Binding)_Sonic' ./decoder >> /var/tmp/sonic_bench_main_${{ github.run_id }}.out
go test -run ^$ -count=20 -benchmem -bench 'BenchmarkEncoder_(Generic|Binding)_Sonic' ./encoder >> /var/tmp/sonic_bench_main_${{ github.run_id }}.out
go test -run ^$ -count=20 -benchmem -bench 'Benchmark(Get|Set)One_Sonic|BenchmarkParseSeven_Sonic' ./ast >> /var/tmp/sonic_bench_main_${{ github.run_id }}.out
- name: Diff bench
continue-on-error: true
Expand Down
5 changes: 1 addition & 4 deletions .github/workflows/compatibility_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
strategy:
matrix:
go-version: [1.16.x, 1.17.x, 1.18.x, 1.19.x, 1.20.x, 1.21.x, 1.22.x, 1.23.x]
os: [arm, X64]
os: [X64, macos-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Clear repository
Expand Down Expand Up @@ -38,6 +38,3 @@ jobs:

- name: ast
run: go test -race -v -gcflags="all=-l" github.com/bytedance/sonic/ast

- name: qemu
run: sh scripts/qemu.sh
2 changes: 1 addition & 1 deletion .github/workflows/fuzzing-linux-opt-X64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
build:
strategy:
matrix:
os: [arm, X64]
os: [macos-latest, X64]
runs-on: ${{ matrix.os }}
steps:
- name: Clear repository
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/fuzzing-linux-x64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
build:
strategy:
matrix:
os: [arm, X64]
os: [macos-latest, X64]
runs-on: ${{ matrix.os }}
steps:
- name: Clear repository
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: Lint

on: pull_request

jobs:
misc:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: spell check
uses: codespell-project/actions-codespell@v2
with:
skip: loader/internal/iasm/obj/macho.go,loader/internal/iasm/obj/macho.go,loader/internal/iasm/obj/macho.go,loader/internal/iasm/obj/macho.go,loader/internal/iasm/x86_64/encodings.go:720,loader/internal/iasm/x86_64/program.go,loader/internal/iasm/x86_64/program.go,loader/internal/iasm/x86_64/program.go,loader/internal/iasm/expr/ast.go,loader/internal/iasm/expr/errors.go
4 changes: 2 additions & 2 deletions .github/workflows/unit_test-linux-arm.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Unit Test Linux arm
name: Unit Test Linux macos-latest

on: push

Expand All @@ -8,7 +8,7 @@ jobs:
matrix:
# TODO: 1.17.x, 1.18.x, 1.19.x not supported because golang asm bug
go-version: [1.20.x, 1.21.x, 1.22.x, 1.23.x]
runs-on: [arm]
runs-on: [macos-latest]
steps:
- name: Clear repository
run: sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
Expand Down
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ ret, err := Encode(v, EscapeHTML) // ret == `{"\u0026\u0026":{"X":"\u003c\u003e"

### Compact Format

Sonic encodes primitive objects (struct/map...) as compact-format JSON by default, except marshaling `json.RawMessage` or `json.Marshaler`: sonic ensures validating their output JSON but **DONOT** compacting them for performance concerns. We provide the option `encoder.CompactMarshaler` to add compacting process.
Sonic encodes primitive objects (struct/map...) as compact-format JSON by default, except marshaling `json.RawMessage` or `json.Marshaler`: sonic ensures validating their output JSON but **DO NOT** compacting them for performance concerns. We provide the option `encoder.CompactMarshaler` to add compacting process.

### Print Error

Expand Down Expand Up @@ -480,16 +480,21 @@ But `ast.Visitor` is not a very handy API. You might need to write a lot of code

### Buffer Size

Sonic use memory pool in many places like `encoder.Encode`, `ast.Node.MarshalJSON` to improve performace, which may produce more memory usage (in-use) when server's load is high. See [issue 614](https://github.com/bytedance/sonic/issues/614). Therefore, we introduce some options to let user control the behavior of memory pool. See [option](https://pkg.go.dev/github.com/bytedance/[email protected]/option#pkg-variables) package.
Sonic use memory pool in many places like `encoder.Encode`, `ast.Node.MarshalJSON` to improve performance, which may produce more memory usage (in-use) when server's load is high. See [issue 614](https://github.com/bytedance/sonic/issues/614). Therefore, we introduce some options to let user control the behavior of memory pool. See [option](https://pkg.go.dev/github.com/bytedance/[email protected]/option#pkg-variables) package.

### Faster JSON skip
### Faster JSON Skip

For security, sonic use [FSM](native/skip_one.c) algorithm to validate JSON when decoding raw JSON or encoding `json.Marshaler`, which is much slower (1~10x) than [SIMD-searching-pair](native/skip_one_fast.c) algorithm. If user has many redundant JSON value and DO NOT NEED to strictly validate JSON correctness, you can enable below options:

- `Config.NoValidateSkipJSON`: for faster skipping JSON when decoding, such as unknown fields, json.Unmarshaler(json.RawMessage), mismatched values, and redundant array elements
- `Config.NoValidateJSONMarshaler`: avoid validating JSON when encoding `json.Marshaler`
- `SearchOption.ValidateJSON`: indicates if validate located JSON value when `Get`

## JSON-Path Support (GJSON)

[tidwall/gjson](https://github.com/tidwall/gjson) has provided a comprehensive and popular JSON-Path API, and
a lot of older codes heavily relies on it. Therefore, we provides a wrapper library, which combines gjson's API with sonic's SIMD algorithm to boost up the performance. See [cloudwego/gjson](https://github.com/cloudwego/gjson).

## Community

Sonic is a subproject of [CloudWeGo](https://www.cloudwego.io/). We are committed to building a cloud native ecosystem.
8 changes: 4 additions & 4 deletions README_ZH_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,9 @@ sub := root.Get("key3").Index(2).Int64() // == 3

`ast.Searcher`提供了一些选项,以满足用户的不同需求:

```
opts:= ast.SearchOption{CopyReturn: true…}
Val, err:= sonic。gettwithoptions (JSON, opts "key")
```go
opts := ast.SearchOption{CopyReturn: true…}
val, err := sonic.GetWithOptions(JSON, opts, "key")
```

- CopyReturn
Expand Down Expand Up @@ -389,7 +389,7 @@ type Visitor interface {
- `ConfigDefault`: sonic的默认配置 (`EscapeHTML=false``SortKeys=false`…) 保证性能同时兼顾安全性。
- `ConfigStd`: 与 `encoding/json` 保证完全兼容的配置
- `ConfigFastest`: 最快的配置(`NoQuoteTextMarshaler=true...`) 保证性能最优但是会缺少一些安全性检查(validate UTF8 等)
Sonic ****确保支持所有环境,由于开发高性能代码的困难。在不支持声音的环境中,实现将回落到 `encoding/json`。因此上述配置将全部等于`ConfigStd`
Sonic ****确保支持所有环境,由于开发高性能代码的困难。在不支持sonic的环境中,实现将回落到 `encoding/json`。因此上述配置将全部等于`ConfigStd`

## 注意事项

Expand Down
4 changes: 2 additions & 2 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ type Config struct {
// CopyString indicates decoder to decode string values by copying instead of referring.
CopyString bool

// ValidateString indicates decoder and encoder to valid string values: decoder will return errors
// ValidateString indicates decoder and encoder to validate string values: decoder will return errors
// when unescaped control chars(\u0000-\u001f) in the string value of JSON.
ValidateString bool

Expand Down Expand Up @@ -120,7 +120,7 @@ var (

// API is a binding of specific config.
// This interface is inspired by github.com/json-iterator/go,
// and has same behaviors under equavilent config.
// and has same behaviors under equivalent config.
type API interface {
// MarshalToString returns the JSON encoding string of v
MarshalToString(v interface{}) (string, error)
Expand Down
2 changes: 1 addition & 1 deletion ast/api_compat.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func quote(buf *[]byte, val string) {
quoteString(buf, val)
}

// unquote unescapes a internal JSON string (it doesn't count quotas at the begining and end)
// unquote unescapes an internal JSON string (it doesn't count quotas at the beginning and end)
func unquote(src string) (string, types.ParsingError) {
sp := rt.IndexChar(src, -1)
out, ok := unquoteBytes(rt.BytesFrom(sp, len(src)+2, len(src)+2))
Expand Down
79 changes: 10 additions & 69 deletions ast/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,17 @@
package ast

import (
`encoding/base64`
`runtime`
`strconv`
`unsafe`

`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
"encoding/base64"
"runtime"
"strconv"
"unsafe"

"github.com/bytedance/sonic/internal/native/types"
"github.com/bytedance/sonic/internal/rt"
"github.com/bytedance/sonic/internal/utils"
)

// Hack: this is used for both checking space and cause firendly compile errors in 32-bit arch.
// Hack: this is used for both checking space and cause friendly compile errors in 32-bit arch.
const _Sonic_Not_Support_32Bit_Arch__Checking_32Bit_Arch_Here = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n')

var bytesNull = []byte("null")
Expand Down Expand Up @@ -290,67 +291,7 @@ func decodeValue(src string, pos int, skipnum bool) (ret int, v types.JsonState)

//go:nocheckptr
func skipNumber(src string, pos int) (ret int) {
sp := uintptr(rt.IndexChar(src, pos))
se := uintptr(rt.IndexChar(src, len(src)))
if uintptr(sp) >= se {
return -int(types.ERR_EOF)
}

if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
sp += 1
}
ss := sp

var pointer bool
var exponent bool
var lastIsDigit bool
var nextNeedDigit = true

for ; sp < se; sp += uintptr(1) {
c := *(*byte)(unsafe.Pointer(sp))
if isDigit(c) {
lastIsDigit = true
nextNeedDigit = false
continue
} else if nextNeedDigit {
return -int(types.ERR_INVALID_CHAR)
} else if c == '.' {
if !lastIsDigit || pointer || exponent || sp == ss {
return -int(types.ERR_INVALID_CHAR)
}
pointer = true
lastIsDigit = false
nextNeedDigit = true
continue
} else if c == 'e' || c == 'E' {
if !lastIsDigit || exponent {
return -int(types.ERR_INVALID_CHAR)
}
if sp == se-1 {
return -int(types.ERR_EOF)
}
exponent = true
lastIsDigit = false
nextNeedDigit = false
continue
} else if c == '-' || c == '+' {
if prev := *(*byte)(unsafe.Pointer(sp - 1)); prev != 'e' && prev != 'E' {
return -int(types.ERR_INVALID_CHAR)
}
lastIsDigit = false
nextNeedDigit = true
continue
} else {
break
}
}

if nextNeedDigit {
return -int(types.ERR_EOF)
}

runtime.KeepAlive(src)
return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
return utils.SkipNumber(src, pos)
}

//go:nocheckptr
Expand Down
2 changes: 1 addition & 1 deletion ast/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ type Scanner func(path Sequence, node *Node) bool
// ForEach scans one V_OBJECT node's children from JSON head to tail,
// and pass the Sequence and Node of corresponding JSON value.
//
// Especailly, if the node is not V_ARRAY or V_OBJECT,
// Especially, if the node is not V_ARRAY or V_OBJECT,
// the node itself will be returned and Sequence.Index == -1.
//
// NOTICE: A unsetted node WON'T trigger sc, but its index still counts into Path.Index
Expand Down
22 changes: 11 additions & 11 deletions ast/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ func (self *Node) Check() error {

// isRaw returns true if node's underlying value is raw json
//
// Deprecated: not concurent safe
// Deprecated: not concurrent safe
func (self Node) IsRaw() bool {
return self.t & _V_RAW != 0
}
Expand Down Expand Up @@ -440,7 +440,7 @@ func (self *Node) String() (string, error) {
}
}

// StrictString returns string value (unescaped), includeing V_STRING, V_ANY of string.
// StrictString returns string value (unescaped), including V_STRING, V_ANY of string.
// In other cases, it will return empty string.
func (self *Node) StrictString() (string, error) {
if err := self.checkRaw(); err != nil {
Expand Down Expand Up @@ -509,7 +509,7 @@ func (self *Node) Float64() (float64, error) {
}
}

// Float64 exports underlying float64 value, includeing V_NUMBER, V_ANY
// Float64 exports underlying float64 value, including V_NUMBER, V_ANY
func (self *Node) StrictFloat64() (float64, error) {
if err := self.checkRaw(); err != nil {
return 0.0, err
Expand All @@ -527,7 +527,7 @@ func (self *Node) StrictFloat64() (float64, error) {
}
}

/** Sequencial Value Methods **/
/** Sequential Value Methods **/

// Len returns children count of a array|object|string node
// WARN: For partially loaded node, it also works but only counts the parsed children
Expand Down Expand Up @@ -611,7 +611,7 @@ func (self *Node) Unset(key string) (bool, error) {
if err := self.should(types.V_OBJECT); err != nil {
return false, err
}
// NOTICE: must get acurate length before deduct
// NOTICE: must get accurate length before deduct
if err := self.skipAllKey(); err != nil {
return false, err
}
Expand Down Expand Up @@ -657,7 +657,7 @@ func (self *Node) SetAnyByIndex(index int, val interface{}) (bool, error) {
return self.SetByIndex(index, NewAny(val))
}

// UnsetByIndex REOMVE (softly) the node of given index.
// UnsetByIndex REMOVE (softly) the node of given index.
//
// WARN: this will change address of elements, which is a dangerous action.
// Use Unset() for object or Pop() for array instead.
Expand Down Expand Up @@ -957,7 +957,7 @@ func (self *Node) MapUseNumber() (map[string]interface{}, error) {
return self.toGenericObjectUseNumber()
}

// MapUseNode scans both parsed and non-parsed chidren nodes,
// MapUseNode scans both parsed and non-parsed children nodes,
// and map them by their keys
func (self *Node) MapUseNode() (map[string]Node, error) {
if self.isAny() {
Expand Down Expand Up @@ -1102,7 +1102,7 @@ func (self *Node) ArrayUseNumber() ([]interface{}, error) {
return self.toGenericArrayUseNumber()
}

// ArrayUseNode copys both parsed and non-parsed chidren nodes,
// ArrayUseNode copies both parsed and non-parsed children nodes,
// and indexes them by original order
func (self *Node) ArrayUseNode() ([]Node, error) {
if self.isAny() {
Expand Down Expand Up @@ -1147,9 +1147,9 @@ func (self *Node) unsafeArray() (*linkedNodes, error) {
return (*linkedNodes)(self.p), nil
}

// Interface loads all children under all pathes from this node,
// Interface loads all children under all paths from this node,
// and converts itself as generic type.
// WARN: all numberic nodes are casted to float64
// WARN: all numeric nodes are casted to float64
func (self *Node) Interface() (interface{}, error) {
if err := self.checkRaw(); err != nil {
return nil, err
Expand Down Expand Up @@ -1193,7 +1193,7 @@ func (self *Node) packAny() interface{} {
}

// InterfaceUseNumber works same with Interface()
// except numberic nodes are casted to json.Number
// except numeric nodes are casted to json.Number
func (self *Node) InterfaceUseNumber() (interface{}, error) {
if err := self.checkRaw(); err != nil {
return nil, err
Expand Down
Loading

0 comments on commit 0484d05

Please sign in to comment.