From e5f9c2e6f8e2ae288a119365be918417205e9dfb Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 1 Jun 2017 15:41:31 +0000 Subject: [PATCH] Add initial Set implementation --- README.md | 14 ++ benchmark/benchmark_small_payload_test.go | 24 +++ parser.go | 171 +++++++++++++-- parser_test.go | 244 +++++++++++++++++++++- 4 files changed, 426 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 895b89c..b86074c 100644 --- a/README.md +++ b/README.md @@ -201,6 +201,20 @@ jsonparser.EachKey(smallFixture, func(idx int, value []byte, vt jsonparser.Value }, paths...) ``` +### **`Set`** +```go +func Set(data []byte, setValue []byte, keys ...string) (value []byte, err error) +``` +Receives existing data structure, key path to set, and value to set at that key. *This functionality is experimental.* + +Returns: +* `value` - Pointer to original data structure with updated or added key value. +* `err` - If any parsing issue, it should return error. + +Accepts multiple keys to specify path to JSON value (in case of updating or creating nested structures). + +Note that keys can be an array indexes: `jsonparser.Set(data, []byte("http://github.com"), "person", "avatars", "[0]", "url")` + ## What makes it so fast? * It does not rely on `encoding/json`, `reflection` or `interface{}`, the only real package dependency is `bytes`. diff --git a/benchmark/benchmark_small_payload_test.go b/benchmark/benchmark_small_payload_test.go index 9abb0c5..2f90c7d 100644 --- a/benchmark/benchmark_small_payload_test.go +++ b/benchmark/benchmark_small_payload_test.go @@ -129,6 +129,17 @@ func BenchmarkJsonParserObjectEachStructSmall(b *testing.B) { } } +func BenchmarkJsonParserSetSmall(b *testing.B) { + for i := 0; i < b.N; i++ { + jsonparser.Set(smallFixture, []byte(`"c90927dd-1588-4fe7-a14f-8a8950cfcbd8"`), "uuid") + jsonparser.Set(smallFixture, []byte("-3"), "tz") + jsonparser.Set(smallFixture, []byte(`"server_agent"`), "ua") + jsonparser.Set(smallFixture, []byte("3"), "st") + + nothing() + } +} + /* encoding/json */ @@ -185,6 +196,19 @@ func BenchmarkGoSimplejsonSmall(b *testing.B) { } } +func BenchmarkGoSimplejsonSetSmall(b *testing.B) { + for i := 0; i < b.N; i++ { + json, _ := simplejson.NewJson(smallFixture) + + json.SetPath([]string{"uuid"}, "c90927dd-1588-4fe7-a14f-8a8950cfcbd8") + json.SetPath([]string{"tz"}, -3) + json.SetPath([]string{"ua"}, "server_agent") + json.SetPath([]string{"st"}, 3) + + nothing() + } +} + /* github.com/pquerna/ffjson */ diff --git a/parser.go b/parser.go index ca6d4bc..0acd3fb 100644 --- a/parser.go +++ b/parser.go @@ -6,6 +6,7 @@ import ( "fmt" "math" "strconv" + "strings" ) // Errors @@ -49,6 +50,20 @@ func nextToken(data []byte) int { return -1 } +// Find position of last character which is not whitespace +func lastToken(data []byte) int { + for i := len(data) - 1; i >= 0; i-- { + switch data[i] { + case ' ', '\n', '\r', '\t': + continue + default: + return i + } + } + + return -1 +} + // Tries to find the end of string // Support if string contains escaped quote symbols. func stringEnd(data []byte) (int, bool) { @@ -445,34 +460,114 @@ var ( nullLiteral = []byte("null") ) +func createInsertComponent(keys []string, setValue []byte, comma, object bool) []byte { + var buffer bytes.Buffer + if comma { + buffer.WriteString(",") + } + if object { + buffer.WriteString("{") + } + buffer.WriteString("\"") + buffer.WriteString(keys[0]) + buffer.WriteString("\":") + for i := 1; i < len(keys); i++ { + buffer.WriteString("{\"") + buffer.WriteString(keys[i]) + buffer.WriteString("\":") + } + buffer.Write(setValue) + buffer.WriteString(strings.Repeat("}", len(keys)-1)) + if object { + buffer.WriteString("}") + } + return buffer.Bytes() +} + /* -Get - Receives data structure, and key path to extract value from. + +Set - Receives existing data structure, path to set, and data to set at that key. Returns: -`value` - Pointer to original data structure containing key value, or just empty slice if nothing found or error -`dataType` - Can be: `NotExist`, `String`, `Number`, `Object`, `Array`, `Boolean` or `Null` -`offset` - Offset from provided data structure where key value ends. Used mostly internally, for example for `ArrayEach` helper. -`err` - If key not found or any other parsing issue it should return error. If key not found it also sets `dataType` to `NotExist` +`value` - modified byte array +`err` - On any parsing error -Accept multiple keys to specify path to JSON value (in case of quering nested structures). -If no keys provided it will try to extract closest JSON value (simple ones or object/array), useful for reading streams or arrays, see `ArrayEach` implementation. */ -func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset int, err error) { - if len(keys) > 0 { - if offset = searchKeys(data, keys...); offset == -1 { - return []byte{}, NotExist, -1, KeyPathNotFoundError +func Set(data []byte, setValue []byte, keys ...string) (value []byte, err error) { + // ensure keys are set + if len(keys) == 0 { + return nil, KeyPathNotFoundError + } + + _, _, startOffset, endOffset, err := internalGet(data, keys...) + if err != nil { + if err != KeyPathNotFoundError { + // problem parsing the data + return []byte{}, err + } + // full path doesnt exist + // does any subpath exist? + var depth int + for i := range keys { + _, _, start, end, sErr := internalGet(data, keys[:i+1]...) + if sErr != nil { + break + } else { + endOffset = end + startOffset = start + depth++ + } } - } + comma := true + object := false + if endOffset == -1 { + firstToken := nextToken(data) + // We can't set a top-level key if data isn't an object + if len(data) == 0 || data[firstToken] != '{' { + return nil, KeyPathNotFoundError + } + // Don't need a comma if the input is an empty object + secondToken := firstToken + 1 + nextToken(data[firstToken+1:]) + if data[secondToken] == '}' { + comma = false + } + // Set the top level key at the end (accounting for any trailing whitespace) + // This assumes last token is valid like '}', could check and return error + endOffset = lastToken(data) + } + depthOffset := endOffset + if depth != 0 { + // if subpath is a non-empty object, add to it + if data[startOffset] == '{' && data[startOffset+1+nextToken(data[startOffset+1:])]!='}' { + depthOffset-- + startOffset = depthOffset + // otherwise, over-write it with a new object + } else { + comma = false + object = true + } + } else { + startOffset = depthOffset + } + value = append(data[:startOffset], append(createInsertComponent(keys[depth:], setValue, comma, object), data[depthOffset:]...)...) + } else { + // path currently exists + startComponent := data[:startOffset] + endComponent := data[endOffset:] - // Go to closest value - nO := nextToken(data[offset:]) - if nO == -1 { - return []byte{}, NotExist, -1, MalformedJsonError + value = make([]byte, len(startComponent)+len(endComponent)+len(setValue)) + newEndOffset := startOffset + len(setValue) + copy(value[0:startOffset], startComponent) + copy(value[startOffset:newEndOffset], setValue) + copy(value[newEndOffset:], endComponent) } + return value, nil +} - offset += nO - +func getType(data []byte, offset int) ([]byte, ValueType, int, error) { + var dataType ValueType endOffset := offset + // if string value if data[offset] == '"' { dataType = String @@ -532,8 +627,44 @@ func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset endOffset += end } + return data[offset:endOffset], dataType, endOffset, nil +} + +/* +Get - Receives data structure, and key path to extract value from. + +Returns: +`value` - Pointer to original data structure containing key value, or just empty slice if nothing found or error +`dataType` - Can be: `NotExist`, `String`, `Number`, `Object`, `Array`, `Boolean` or `Null` +`offset` - Offset from provided data structure where key value ends. Used mostly internally, for example for `ArrayEach` helper. +`err` - If key not found or any other parsing issue it should return error. If key not found it also sets `dataType` to `NotExist` - value = data[offset:endOffset] +Accept multiple keys to specify path to JSON value (in case of quering nested structures). +If no keys provided it will try to extract closest JSON value (simple ones or object/array), useful for reading streams or arrays, see `ArrayEach` implementation. +*/ +func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset int, err error) { + a, b, _, d, e := internalGet(data, keys...) + return a, b, d, e +} + +func internalGet(data []byte, keys ...string) (value []byte, dataType ValueType, offset, endOffset int, err error) { + if len(keys) > 0 { + if offset = searchKeys(data, keys...); offset == -1 { + return []byte{}, NotExist, -1, -1, KeyPathNotFoundError + } + } + + // Go to closest value + nO := nextToken(data[offset:]) + if nO == -1 { + return []byte{}, NotExist, offset, -1, MalformedJsonError + } + + offset += nO + value, dataType, endOffset, err = getType(data, offset) + if err != nil { + return value, dataType, offset, endOffset, err + } // Strip quotes from string values if dataType == String { @@ -544,7 +675,7 @@ func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset value = []byte{} } - return value, dataType, endOffset, nil + return value, dataType, offset, endOffset, nil } // ArrayEach is used when iterating arrays, accepts a callback function with the same return arguments as `Get`. diff --git a/parser_test.go b/parser_test.go index 5f5ab66..8a8d39d 100644 --- a/parser_test.go +++ b/parser_test.go @@ -38,6 +38,173 @@ type GetTest struct { data interface{} } +type SetTest struct { + desc string + json string + setData string + path []string + + isErr bool + isFound bool + + data interface{} +} + +var setTests = []SetTest{ + { + desc: "set unknown key (string)", + json: `{"test":"input"}`, + isFound: true, + path: []string{"new.field"}, + setData: `"new value"`, + data: `{"test":"input","new.field":"new value"}`, + }, + { + desc: "set known key (string)", + json: `{"test":"input"}`, + isFound: true, + path: []string{"test"}, + setData: `"new value"`, + data: `{"test":"new value"}`, + }, + { + desc: "set unknown key (object)", + json: `{"test":"input"}`, + isFound: true, + path: []string{"new.field"}, + setData: `{"key": "new object"}`, + data: `{"test":"input","new.field":{"key": "new object"}}`, + }, + { + desc: "set known key (object)", + json: `{"test":"input"}`, + isFound: true, + path: []string{"test"}, + setData: `{"key": "new object"}`, + data: `{"test":{"key": "new object"}}`, + }, + { + desc: "set known key (object within array)", + json: `{"test":[{"key":"val-obj1"}]}`, + isFound: true, + path: []string{"test", "[0]"}, + setData: `{"key":"new object"}`, + data: `{"test":[{"key":"new object"}]}`, + }, + { + desc: "set unknown key (replace object)", + json: `{"test":[{"key":"val-obj1"}]}`, + isFound: true, + path: []string{"test", "newKey"}, + setData: `"new object"`, + data: `{"test":{"newKey":"new object"}}`, + }, + { + desc: "set unknown key (complex object within nested array)", + json: `{"test":[{"key":[{"innerKey":"innerKeyValue"}]}]}`, + isFound: true, + path: []string{"test", "[0]", "key", "[0]", "newInnerKey"}, + setData: `{"key":"new object"}`, + data: `{"test":[{"key":[{"innerKey":"innerKeyValue","newInnerKey":{"key":"new object"}}]}]}`, + }, + { + desc: "set known key (complex object within nested array)", + json: `{"test":[{"key":[{"innerKey":"innerKeyValue"}]}]}`, + isFound: true, + path: []string{"test", "[0]", "key", "[0]", "innerKey"}, + setData: `{"key":"new object"}`, + data: `{"test":[{"key":[{"innerKey":{"key":"new object"}}]}]}`, + }, + { + desc: "set unknown key (object, partial subtree exists)", + json: `{"test":{"input":"output"}}`, + isFound: true, + path: []string{"test", "new.field"}, + setData: `{"key":"new object"}`, + data: `{"test":{"input":"output","new.field":{"key":"new object"}}}`, + }, + { + desc: "set unknown key (object, empty partial subtree exists)", + json: `{"test":{}}`, + isFound: true, + path: []string{"test", "new.field"}, + setData: `{"key":"new object"}`, + data: `{"test":{"new.field":{"key":"new object"}}}`, + }, + { + desc: "set unknown key (object, no subtree exists)", + json: `{"test":"input"}`, + isFound: true, + path: []string{"new.field", "nested", "value"}, + setData: `{"key": "new object"}`, + data: `{"test":"input","new.field":{"nested":{"value":{"key": "new object"}}}}`, + }, + { + desc: "set in empty json", + json: `{}`, + isFound: true, + path: []string{"foo"}, + setData: `"null"`, + data: `{"foo":"null"}`, + }, + { + desc: "set subtree in empty json", + json: `{}`, + isFound: true, + path: []string{"foo", "bar"}, + setData: `"null"`, + data: `{"foo":{"bar":"null"}}`, + }, + { + desc: "set in empty string - not found", + json: ``, + isFound: false, + path: []string{"foo"}, + setData: `"null"`, + data: ``, + }, + { + desc: "set in Number - not found", + json: `1.323`, + isFound: false, + path: []string{"foo"}, + setData: `"null"`, + data: `1.323`, + }, + { + desc: "set known key (top level array)", + json: `[{"key":"val-obj1"}]`, + isFound: true, + path: []string{"[0]", "key"}, + setData: `"new object"`, + data: `[{"key":"new object"}]`, + }, + { + desc: "set unknown key (trailing whitespace)", + json: `{"key":"val-obj1"} `, + isFound: true, + path: []string{"alt-key"}, + setData: `"new object"`, + data: `{"key":"val-obj1","alt-key":"new object"} `, + }, + { // This test sets the key instead of returning a parse error, as checking for the malformed JSON would reduce performance (this is not ideal) + desc: `malformed with trailing whitespace`, + json: `{"a":1 `, + path: []string{"a"}, + setData: `2`, + isFound: true, + data: `{"a":2 `, + }, + { // This test sets the key instead of returning a parse error, as checking for the malformed JSON would reduce performance (this is not ideal) + desc: "malformed 'colon chain', set second string", + json: `{"a":"b":"c"}`, + path: []string{"b"}, + setData: `"d"`, + isFound: true, + data: `{"a":"b":"d"}`, + }, +} + var getTests = []GetTest{ // Trivial tests { @@ -358,17 +525,17 @@ var getTests = []GetTest{ isFound: false, }, { // Issue #81 - desc: `missing key in object in array`, - json: `{"p":{"a":[{"u":"abc","t":"th"}]}}`, - path: []string{"p", "a", "[0]", "x"}, + desc: `missing key in object in array`, + json: `{"p":{"a":[{"u":"abc","t":"th"}]}}`, + path: []string{"p", "a", "[0]", "x"}, isFound: false, }, { // Issue #81 counter test - desc: `existing key in object in array`, - json: `{"p":{"a":[{"u":"abc","t":"th"}]}}`, - path: []string{"p", "a", "[0]", "u"}, + desc: `existing key in object in array`, + json: `{"p":{"a":[{"u":"abc","t":"th"}]}}`, + path: []string{"p", "a", "[0]", "u"}, isFound: true, - data: "abc", + data: "abc", }, { // This test returns not found instead of a parse error, as checking for the malformed JSON would reduce performance desc: "malformed key (followed by comma followed by colon)", @@ -656,6 +823,69 @@ func runGetTests(t *testing.T, testKind string, tests []GetTest, runner func(Get } } +func setTestCheckFoundAndNoError(t *testing.T, testKind string, test SetTest, value interface{}, err error) bool { + isFound := (err != KeyPathNotFoundError) + isErr := (err != nil && err != KeyPathNotFoundError) + + if test.isErr != isErr { + // If the call didn't match the error expectation, fail + t.Errorf("%s test '%s' isErr mismatch: expected %t, obtained %t (err %v). Value: %v", testKind, test.desc, test.isErr, isErr, err, value) + return false + } else if isErr { + // Else, if there was an error, don't fail and don't check isFound or the value + return false + } else if test.isFound != isFound { + // Else, if the call didn't match the is-found expectation, fail + t.Errorf("%s test '%s' isFound mismatch: expected %t, obtained %t", testKind, test.desc, test.isFound, isFound) + return false + } else if !isFound { + // Else, if no value was found, don't fail and don't check the value + return false + } else { + // Else, there was no error and a value was found, so check the value + return true + } +} + +func runSetTests(t *testing.T, testKind string, tests []SetTest, runner func(SetTest) (interface{}, ValueType, error), resultChecker func(SetTest, interface{}) (bool, interface{})) { + for _, test := range tests { + if activeTest != "" && test.desc != activeTest { + continue + } + + fmt.Println("Running:", test.desc) + + value, _, err := runner(test) + + if setTestCheckFoundAndNoError(t, testKind, test, value, err) { + if test.data == nil { + t.Errorf("MALFORMED TEST: %v", test) + continue + } + + if string(value.([]byte)) != test.data { + t.Errorf("Unexpected result on %s test '%s'", testKind, test.desc) + t.Log("Got: ", string(value.([]byte))) + t.Log("Expected:", test.data) + t.Log("Error: ", err) + } + } + } +} + +func TestSet(t *testing.T) { + runSetTests(t, "Set()", setTests, + func(test SetTest) (value interface{}, dataType ValueType, err error) { + value, err = Set([]byte(test.json), []byte(test.setData), test.path...) + return + }, + func(test SetTest, value interface{}) (bool, interface{}) { + expected := []byte(test.data.(string)) + return bytes.Equal(expected, value.([]byte)), expected + }, + ) +} + func TestGet(t *testing.T) { runGetTests(t, "Get()", getTests, func(test GetTest) (value interface{}, dataType ValueType, err error) {