Skip to content

Commit

Permalink
Merge pull request buger#102 from JoshKCarroll/add-set-impl
Browse files Browse the repository at this point in the history
Add initial Set implementation buger#61
  • Loading branch information
buger authored Jun 3, 2017
2 parents ee11858 + 0da1997 commit bb14bb6
Show file tree
Hide file tree
Showing 4 changed files with 419 additions and 20 deletions.
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,20 @@ jsonparser.EachKey(smallFixture, func(idx int, value []byte, vt jsonparser.Value
}, paths...)
```

### **`Set`**
```go
func Set(data []byte, setValue []byte, keys ...string) (value []byte, err error)
```
Receives existing data structure, key path to set, and value to set at that key. *This functionality is experimental.*

Returns:
* `value` - Pointer to original data structure with updated or added key value.
* `err` - If any parsing issue, it should return error.

Accepts multiple keys to specify path to JSON value (in case of updating or creating nested structures).

Note that keys can be an array indexes: `jsonparser.Set(data, []byte("http://github.com"), "person", "avatars", "[0]", "url")`


## What makes it so fast?
* It does not rely on `encoding/json`, `reflection` or `interface{}`, the only real package dependency is `bytes`.
Expand Down
24 changes: 24 additions & 0 deletions benchmark/benchmark_small_payload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,17 @@ func BenchmarkJsonParserObjectEachStructSmall(b *testing.B) {
}
}

func BenchmarkJsonParserSetSmall(b *testing.B) {
for i := 0; i < b.N; i++ {
jsonparser.Set(smallFixture, []byte(`"c90927dd-1588-4fe7-a14f-8a8950cfcbd8"`), "uuid")
jsonparser.Set(smallFixture, []byte("-3"), "tz")
jsonparser.Set(smallFixture, []byte(`"server_agent"`), "ua")
jsonparser.Set(smallFixture, []byte("3"), "st")

nothing()
}
}

/*
encoding/json
*/
Expand Down Expand Up @@ -186,6 +197,19 @@ func BenchmarkGoSimplejsonSmall(b *testing.B) {
}
}

func BenchmarkGoSimplejsonSetSmall(b *testing.B) {
for i := 0; i < b.N; i++ {
json, _ := simplejson.NewJson(smallFixture)

json.SetPath([]string{"uuid"}, "c90927dd-1588-4fe7-a14f-8a8950cfcbd8")
json.SetPath([]string{"tz"}, -3)
json.SetPath([]string{"ua"}, "server_agent")
json.SetPath([]string{"st"}, 3)

nothing()
}
}

/*
github.com/pquerna/ffjson
*/
Expand Down
171 changes: 151 additions & 20 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"math"
"strconv"
"strings"
)

// Errors
Expand Down Expand Up @@ -49,6 +50,20 @@ func nextToken(data []byte) int {
return -1
}

// Find position of last character which is not whitespace
func lastToken(data []byte) int {
for i := len(data) - 1; i >= 0; i-- {
switch data[i] {
case ' ', '\n', '\r', '\t':
continue
default:
return i
}
}

return -1
}

// Tries to find the end of string
// Support if string contains escaped quote symbols.
func stringEnd(data []byte) (int, bool) {
Expand Down Expand Up @@ -460,34 +475,114 @@ var (
nullLiteral = []byte("null")
)

func createInsertComponent(keys []string, setValue []byte, comma, object bool) []byte {
var buffer bytes.Buffer
if comma {
buffer.WriteString(",")
}
if object {
buffer.WriteString("{")
}
buffer.WriteString("\"")
buffer.WriteString(keys[0])
buffer.WriteString("\":")
for i := 1; i < len(keys); i++ {
buffer.WriteString("{\"")
buffer.WriteString(keys[i])
buffer.WriteString("\":")
}
buffer.Write(setValue)
buffer.WriteString(strings.Repeat("}", len(keys)-1))
if object {
buffer.WriteString("}")
}
return buffer.Bytes()
}

/*
Get - Receives data structure, and key path to extract value from.
Set - Receives existing data structure, path to set, and data to set at that key.
Returns:
`value` - Pointer to original data structure containing key value, or just empty slice if nothing found or error
`dataType` - Can be: `NotExist`, `String`, `Number`, `Object`, `Array`, `Boolean` or `Null`
`offset` - Offset from provided data structure where key value ends. Used mostly internally, for example for `ArrayEach` helper.
`err` - If key not found or any other parsing issue it should return error. If key not found it also sets `dataType` to `NotExist`
`value` - modified byte array
`err` - On any parsing error
Accept multiple keys to specify path to JSON value (in case of quering nested structures).
If no keys provided it will try to extract closest JSON value (simple ones or object/array), useful for reading streams or arrays, see `ArrayEach` implementation.
*/
func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset int, err error) {
if len(keys) > 0 {
if offset = searchKeys(data, keys...); offset == -1 {
return []byte{}, NotExist, -1, KeyPathNotFoundError
func Set(data []byte, setValue []byte, keys ...string) (value []byte, err error) {
// ensure keys are set
if len(keys) == 0 {
return nil, KeyPathNotFoundError
}

_, _, startOffset, endOffset, err := internalGet(data, keys...)
if err != nil {
if err != KeyPathNotFoundError {
// problem parsing the data
return []byte{}, err
}
// full path doesnt exist
// does any subpath exist?
var depth int
for i := range keys {
_, _, start, end, sErr := internalGet(data, keys[:i+1]...)
if sErr != nil {
break
} else {
endOffset = end
startOffset = start
depth++
}
}
}
comma := true
object := false
if endOffset == -1 {
firstToken := nextToken(data)
// We can't set a top-level key if data isn't an object
if len(data) == 0 || data[firstToken] != '{' {
return nil, KeyPathNotFoundError
}
// Don't need a comma if the input is an empty object
secondToken := firstToken + 1 + nextToken(data[firstToken+1:])
if data[secondToken] == '}' {
comma = false
}
// Set the top level key at the end (accounting for any trailing whitespace)
// This assumes last token is valid like '}', could check and return error
endOffset = lastToken(data)
}
depthOffset := endOffset
if depth != 0 {
// if subpath is a non-empty object, add to it
if data[startOffset] == '{' && data[startOffset+1+nextToken(data[startOffset+1:])]!='}' {
depthOffset--
startOffset = depthOffset
// otherwise, over-write it with a new object
} else {
comma = false
object = true
}
} else {
startOffset = depthOffset
}
value = append(data[:startOffset], append(createInsertComponent(keys[depth:], setValue, comma, object), data[depthOffset:]...)...)
} else {
// path currently exists
startComponent := data[:startOffset]
endComponent := data[endOffset:]

// Go to closest value
nO := nextToken(data[offset:])
if nO == -1 {
return []byte{}, NotExist, -1, MalformedJsonError
value = make([]byte, len(startComponent)+len(endComponent)+len(setValue))
newEndOffset := startOffset + len(setValue)
copy(value[0:startOffset], startComponent)
copy(value[startOffset:newEndOffset], setValue)
copy(value[newEndOffset:], endComponent)
}
return value, nil
}

offset += nO

func getType(data []byte, offset int) ([]byte, ValueType, int, error) {
var dataType ValueType
endOffset := offset

// if string value
if data[offset] == '"' {
dataType = String
Expand Down Expand Up @@ -547,15 +642,51 @@ func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset

endOffset += end
}
return data[offset:endOffset], dataType, endOffset, nil
}

/*
Get - Receives data structure, and key path to extract value from.
Returns:
`value` - Pointer to original data structure containing key value, or just empty slice if nothing found or error
`dataType` - Can be: `NotExist`, `String`, `Number`, `Object`, `Array`, `Boolean` or `Null`
`offset` - Offset from provided data structure where key value ends. Used mostly internally, for example for `ArrayEach` helper.
`err` - If key not found or any other parsing issue it should return error. If key not found it also sets `dataType` to `NotExist`
value = data[offset:endOffset]
Accept multiple keys to specify path to JSON value (in case of quering nested structures).
If no keys provided it will try to extract closest JSON value (simple ones or object/array), useful for reading streams or arrays, see `ArrayEach` implementation.
*/
func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset int, err error) {
a, b, _, d, e := internalGet(data, keys...)
return a, b, d, e
}

func internalGet(data []byte, keys ...string) (value []byte, dataType ValueType, offset, endOffset int, err error) {
if len(keys) > 0 {
if offset = searchKeys(data, keys...); offset == -1 {
return []byte{}, NotExist, -1, -1, KeyPathNotFoundError
}
}

// Go to closest value
nO := nextToken(data[offset:])
if nO == -1 {
return []byte{}, NotExist, offset, -1, MalformedJsonError
}

offset += nO
value, dataType, endOffset, err = getType(data, offset)
if err != nil {
return value, dataType, offset, endOffset, err
}

// Strip quotes from string values
if dataType == String {
value = value[1 : len(value)-1]
}

return value, dataType, endOffset, nil
return value, dataType, offset, endOffset, nil
}

// ArrayEach is used when iterating arrays, accepts a callback function with the same return arguments as `Get`.
Expand Down
Loading

0 comments on commit bb14bb6

Please sign in to comment.