Skip to content

Commit

Permalink
Merge branch 'master' into add-set-impl
Browse files Browse the repository at this point in the history
  • Loading branch information
buger authored Jun 3, 2017
2 parents e5f9c2e + ee11858 commit 0da1997
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 14 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ Benchmarks run on standard Linode 1024 box.
Compared libraries:
* https://golang.org/pkg/encoding/json
* https://github.com/Jeffail/gabs
* https://github.com/a8m/djson
* https://github.com/bitly/go-simplejson
* https://github.com/antonholmquist/jason
* https://github.com/mreiferson/go-ujson
Expand Down Expand Up @@ -264,11 +265,12 @@ Library | time/op | bytes/op | allocs/op
------ | ------- | -------- | -------
encoding/json struct | 7879 | 880 | 18
encoding/json interface{} | 8946 | 1521 | 38
Jeffail/gabs | 10053 | 1649 | 46
Jeffail/gabs | 10053 | 1649 | 46
bitly/go-simplejson | 10128 | 2241 | 36
antonholmquist/jason | 27152 | 7237 | 101
github.com/ugorji/go/codec | 8806 | 2176 | 31
mreiferson/go-ujson | **7008** | **1409** | 37
a8m/djson | 3862 | 1249 | 30
pquerna/ffjson | **3769** | **624** | **15**
mailru/easyjson | **2002** | **192** | **9**
buger/jsonparser | **1367** | **0** | **0**
Expand All @@ -293,6 +295,7 @@ https://github.com/buger/jsonparser/blob/master/benchmark/benchmark_medium_paylo
| antonholmquist/jason | 94099 | 19013 | 247 |
| github.com/ugorji/go/codec | 114719 | 6712 | 152 |
| mreiferson/go-ujson | **56972** | 11547 | 270 |
| a8m/djson | 28525 | 10196 | 198 |
| pquerna/ffjson | **20298** | **856** | **20** |
| mailru/easyjson | **10512** | **336** | **12** |
| buger/jsonparser | **15955** | **0** | **0** |
Expand All @@ -316,6 +319,7 @@ https://github.com/buger/jsonparser/blob/master/benchmark/benchmark_large_payloa
| --- | --- | --- | --- |
| encoding/json struct | 748336 | 8272 | 307 |
| encoding/json interface{} | 1224271 | 215425 | 3395 |
| a8m/djson | 510082 | 213682 | 2845 |
| pquerna/ffjson | **312271** | **7792** | **298** |
| mailru/easyjson | **154186** | **6992** | **288** |
| buger/jsonparser | **85308** | **0** | **0** |
Expand Down
20 changes: 20 additions & 0 deletions benchmark/benchmark_large_payload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
// "github.com/Jeffail/gabs"
// "github.com/bitly/go-simplejson"
"encoding/json"
"github.com/a8m/djson"
jlexer "github.com/mailru/easyjson/jlexer"
"github.com/pquerna/ffjson/ffjson"
// "github.com/antonholmquist/jason"
Expand Down Expand Up @@ -109,3 +110,22 @@ func BenchmarkEasyJsonLarge(b *testing.B) {
}
}
}

/*
github.com/a8m/djson
*/
func BenchmarkDjsonLarge(b *testing.B) {
for i := 0; i < b.N; i++ {
m, _ := djson.DecodeObject(largeFixture)
users := m["users"].([]interface{})
for _, u := range users {
nothing(u.(map[string]interface{})["username"].(string))
}

topics := m["topics"].(map[string]interface{})["topics"].([]interface{})
for _, t := range topics {
tI := t.(map[string]interface{})
nothing(tI["id"].(float64), tI["slug"].(string))
}
}
}
21 changes: 21 additions & 0 deletions benchmark/benchmark_medium_payload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package benchmark
import (
"encoding/json"
"github.com/Jeffail/gabs"
"github.com/a8m/djson"
"github.com/antonholmquist/jason"
"github.com/bitly/go-simplejson"
"github.com/buger/jsonparser"
Expand Down Expand Up @@ -282,6 +283,26 @@ func BenchmarkUjsonMedium(b *testing.B) {
}
}

/*
github.com/a8m/djson
*/
func BenchmarkDjsonMedium(b *testing.B) {
for i := 0; i < b.N; i++ {
m, _ := djson.DecodeObject(mediumFixture)
person := m["person"].(map[string]interface{})
name := person["name"].(map[string]interface{})
github := person["github"].(map[string]interface{})
company := m["company"]
gravatar := person["gravatar"].(map[string]interface{})
avatars := gravatar["avatars"].([]interface{})

nothing(name["fullName"].(string), github["followers"].(float64), company)
for _, a := range avatars {
nothing(a.(map[string]interface{})["url"])
}
}
}

/*
github.com/ugorji/go/codec
*/
Expand Down
12 changes: 11 additions & 1 deletion benchmark/benchmark_small_payload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package benchmark
import (
"encoding/json"
"github.com/Jeffail/gabs"
"github.com/a8m/djson"
"github.com/antonholmquist/jason"
"github.com/bitly/go-simplejson"
"github.com/buger/jsonparser"
Expand Down Expand Up @@ -242,7 +243,6 @@ func BenchmarkJasonSmall(b *testing.B) {
/*
github.com/mreiferson/go-ujson
*/

func BenchmarkUjsonSmall(b *testing.B) {
for i := 0; i < b.N; i++ {
json, _ := ujson.NewFromBytes(smallFixture)
Expand All @@ -256,6 +256,16 @@ func BenchmarkUjsonSmall(b *testing.B) {
}
}

/*
github.com/a8m/djson
*/
func BenchmarkDjsonSmall(b *testing.B) {
for i := 0; i < b.N; i++ {
m, _ := djson.DecodeObject(smallFixture)
nothing(m["uuid"].(string), m["tz"].(float64), m["ua"].(string), m["st"].(float64))
}
}

/*
github.com/ugorji/go/codec
*/
Expand Down
13 changes: 11 additions & 2 deletions escape.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ const supplementalPlanesOffset = 0x10000
const highSurrogateOffset = 0xD800
const lowSurrogateOffset = 0xDC00

const basicMultilingualPlaneReservedOffset = 0xDFFF
const basicMultilingualPlaneOffset = 0xFFFF

func combineUTF16Surrogates(high, low rune) rune {
return supplementalPlanesOffset + (high-highSurrogateOffset)<<10 + (low - lowSurrogateOffset)
}
Expand Down Expand Up @@ -49,11 +52,18 @@ func decodeSingleUnicodeEscape(in []byte) (rune, bool) {
return rune(h1<<12 + h2<<8 + h3<<4 + h4), true
}

// isUTF16EncodedRune checks if a rune is in the range for non-BMP characters,
// which is used to describe UTF16 chars.
// Source: https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
func isUTF16EncodedRune(r rune) bool {
return highSurrogateOffset <= r && r <= basicMultilingualPlaneReservedOffset
}

func decodeUnicodeEscape(in []byte) (rune, int) {
if r, ok := decodeSingleUnicodeEscape(in); !ok {
// Invalid Unicode escape
return utf8.RuneError, -1
} else if r < highSurrogateOffset {
} else if r <= basicMultilingualPlaneOffset && !isUTF16EncodedRune(r) {
// Valid Unicode escape in Basic Multilingual Plane
return r, 6
} else if r2, ok := decodeSingleUnicodeEscape(in[6:]); !ok { // Note: previous decodeSingleUnicodeEscape success guarantees at least 6 bytes remain
Expand All @@ -66,7 +76,6 @@ func decodeUnicodeEscape(in []byte) (rune, int) {
// Valid UTF16 surrogate pair
return combineUTF16Surrogates(r, r2), 12
}

}

// backslashCharEscapeTable: when '\X' is found for some byte X, it is to be replaced with backslashCharEscapeTable[X]
Expand Down
7 changes: 5 additions & 2 deletions escape_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,14 @@ var singleUnicodeEscapeTests = append([]escapedUnicodeRuneTest{
{in: `\uD83D`, out: 0xD83D, len: 6},
{in: `\uDE03`, out: 0xDE03, len: 6},
{in: `\uFFFF`, out: 0xFFFF, len: 6},
{in: `\uFF11`, out: '1', len: 6},
}, commonUnicodeEscapeTests...)

var multiUnicodeEscapeTests = append([]escapedUnicodeRuneTest{
{in: `\uD83D`, isErr: true},
{in: `\uDE03`, isErr: true},
{in: `\uFFFF`, isErr: true},
{in: `\uFFFF`, out: '\uFFFF', len: 6},
{in: `\uFF11`, out: '1', len: 6},

{in: `\uD83D\uDE03`, out: '\U0001F603', len: 12},
{in: `\uD800\uDC00`, out: '\U00010000', len: 12},
Expand Down Expand Up @@ -109,13 +111,14 @@ var unescapeTests = []unescapeTest{
{in: `ab\\de`, out: `ab\de`, canAlloc: true},
{in: `ab\"de`, out: `ab"de`, canAlloc: true},
{in: `ab \u00B0 de`, out: `ab ° de`, canAlloc: true},
{in: `ab \uFF11 de`, out: `ab 1 de`, canAlloc: true},
{in: `\uFFFF`, out: "\uFFFF", canAlloc: true},
{in: `ab \uD83D\uDE03 de`, out: "ab \U0001F603 de", canAlloc: true},
{in: `\u0000\u0000\u0000\u0000\u0000`, out: "\u0000\u0000\u0000\u0000\u0000", canAlloc: true},
{in: `\u0000 \u0000 \u0000 \u0000 \u0000`, out: "\u0000 \u0000 \u0000 \u0000 \u0000", canAlloc: true},
{in: ` \u0000 \u0000 \u0000 \u0000 \u0000 `, out: " \u0000 \u0000 \u0000 \u0000 \u0000 ", canAlloc: true},

{in: `\uD800`, isErr: true},
{in: `\uFFFF`, isErr: true},
{in: `abcde\`, isErr: true},
{in: `abcde\x`, isErr: true},
{in: `abcde\u`, isErr: true},
Expand Down
27 changes: 19 additions & 8 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,16 +193,21 @@ func searchKeys(data []byte, keys ...string) int {
case '[':
// If we want to get array element by index
if keyLevel == level && keys[level][0] == '[' {
aIdx, _ := strconv.Atoi(keys[level][1 : len(keys[level])-1])

aIdx, err := strconv.Atoi(keys[level][1 : len(keys[level])-1])
if err != nil {
return -1
}
var curIdx int
var valueFound []byte
var valueOffset int

ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) {
if curIdx == aIdx {
valueFound = value
valueOffset = offset
if dataType == String {
valueOffset = valueOffset - 2
valueFound = data[i + valueOffset:i + valueOffset + len(value) + 2]
}
}
curIdx += 1
})
Expand Down Expand Up @@ -308,6 +313,10 @@ func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]str
}

if maxPath >= level {
if level < 1 {
cb(-1, []byte{}, Unknown, MalformedJsonError)
return -1
}
pathsBuf[level-1] = bytesToString(&keyUnesc)

for pi, p := range paths {
Expand Down Expand Up @@ -358,6 +367,12 @@ func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]str
case '[':
var arrIdxFlags int64
var pIdxFlags int64

if level < 0 {
cb(-1, []byte{}, Unknown, MalformedJsonError)
return -1
}

for pi, p := range paths {
if len(p) < level+1 || pathFlags&bitwiseFlags[pi+1] != 0 || p[level][0] != '[' || !sameTree(p, pathsBuf[:level]) {
continue
Expand Down Expand Up @@ -671,10 +686,6 @@ func internalGet(data []byte, keys ...string) (value []byte, dataType ValueType,
value = value[1 : len(value)-1]
}

if dataType == Null {
value = []byte{}
}

return value, dataType, offset, endOffset, nil
}

Expand Down Expand Up @@ -963,7 +974,7 @@ func ParseBoolean(b []byte) (bool, error) {
func ParseString(b []byte) (string, error) {
var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
if bU, err := Unescape(b, stackbuf[:]); err != nil {
return "", nil
return "", MalformedValueError
} else {
return string(bU), nil
}
Expand Down
37 changes: 37 additions & 0 deletions parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,13 @@ var getTests = []GetTest{
isFound: true,
data: `{"b":"2"}`,
},
{
desc: "get string from array",
json: `{"a":[{"b":1},"foo", 3],"c":{"c":[1,2]}}`,
path: []string{"a", "[1]"},
isFound: true,
data: "foo",
},
{
desc: "key in path is index",
json: `{"a":[{"b":"1"},{"b":"2"},3],"c":{"c":[1,2]}}`,
Expand Down Expand Up @@ -1413,3 +1420,33 @@ func TestParseFloat(t *testing.T) {
},
)
}

var parseStringTest = []ParseTest{
{
in: `\uFF11`,
intype: String,
out: "\uFF11",
},
{
in: `\uFFFF`,
intype: String,
out: "\uFFFF",
},
{
in: `\uDF00`,
intype: String,
isErr: true,
},
}

func TestParseString(t *testing.T) {
runParseTests(t, "ParseString()", parseStringTest,
func(test ParseTest) (value interface{}, err error) {
return ParseString([]byte(test.in))
},
func(test ParseTest, obtained interface{}) (bool, interface{}) {
expected := test.out.(string)
return obtained.(string) == expected, expected
},
)
}

0 comments on commit 0da1997

Please sign in to comment.