From ea8082d1c09100e833bbff7458788ff7deb6677f Mon Sep 17 00:00:00 2001 From: Dominik Richter Date: Mon, 22 Jan 2024 00:42:10 -0800 Subject: [PATCH] =?UTF-8?q?=E2=AD=90=20dict.recurse(=20..=20)=20(#3082)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ⭐ dict.recurse( .. ) Recurse through an arbitrary dict (JSON/YAML/...) structure and find entries that match a search criteria. For example, you can set up a data structure like this: ```json { "users": [ {"name": "bob"} ], "owners": { "admins": [ {"name": "joy", "isOwner": true} ] } } ``` Finding any user in this list is difficult with traditional mechanisms, since you'd have to understand the data structure and where to search. With `recurse` it is made much easier: ```coffee jdata.recurse( name != empty ) ``` ``` [ 0: { name: "bob" } 1: { isOwner: true name: "joy" } ] ``` You could then just grab the names and continue: ```coffee jdata.recurse( name != empty ).map(name) ``` ``` [ 0: "bob" 1: "joy" ] ``` Signed-off-by: Dominik Richter * 🟢 fix test for updated dummy.json Signed-off-by: Dominik Richter --------- Signed-off-by: Dominik Richter --- llx/builtin.go | 5 +- llx/builtin_map.go | 136 +++++++++++++++++- mqlc/builtin.go | 1 + mqlc/builtin_map.go | 20 ++- providers-sdk/v1/testutils/testdata/arch.json | 2 +- providers/os/resources/mql_test.go | 14 +- 6 files changed, 159 insertions(+), 19 deletions(-) diff --git a/llx/builtin.go b/llx/builtin.go index 02353cc94c..83c1f3ff4c 100644 --- a/llx/builtin.go +++ b/llx/builtin.go @@ -536,8 +536,9 @@ func init() { "trim": {f: dictTrimV2, Label: "trim"}, "keys": {f: dictKeysV2, Label: "keys"}, "values": {f: dictValuesV2, Label: "values"}, - "where": {f: dictWhereV2, Label: "where"}, - "$whereNot": {f: dictWhereNotV2}, + "where": {f: dictWhere, Label: "where"}, + "recurse": {f: dictRecurse, Label: "recurse"}, + "$whereNot": {f: dictWhereNot}, "$all": {f: dictAllV2}, "$none": {f: dictNoneV2}, "$any": {f: dictAnyV2}, diff --git a/llx/builtin_map.go b/llx/builtin_map.go index 712e1ef61d..d09ad19bd5 100644 --- a/llx/builtin_map.go +++ b/llx/builtin_map.go @@ -722,7 +722,13 @@ func _dictArrayWhere(e *blockExecutor, list []interface{}, chunk *Chunk, ref uin return nil, 0, nil } -func _dictWhereV2(e *blockExecutor, bind *RawData, chunk *Chunk, ref uint64, invert bool) (*RawData, uint64, error) { +// Query a dict object, cycling over all child objects it contains with the given +// query function. This only works in cases where q query function makes sense, +// like: +// - [a, b, c].query( F ) = [a, b] +// - {a: b, c: d}.query( F ) = {a: b} +// Note: Results get stored in cache for this ref +func _dictWhere(e *blockExecutor, bind *RawData, chunk *Chunk, ref uint64, invert bool) (*RawData, uint64, error) { itemsRef := chunk.Function.Args[0] items, rref, err := e.resolveValue(itemsRef, ref) if err != nil || rref > 0 { @@ -811,12 +817,132 @@ func _dictWhereV2(e *blockExecutor, bind *RawData, chunk *Chunk, ref uint64, inv return nil, 0, nil } -func dictWhereV2(e *blockExecutor, bind *RawData, chunk *Chunk, ref uint64) (*RawData, uint64, error) { - return _dictWhereV2(e, bind, chunk, ref, false) +func dictWhere(e *blockExecutor, bind *RawData, chunk *Chunk, ref uint64) (*RawData, uint64, error) { + return _dictWhere(e, bind, chunk, ref, false) } -func dictWhereNotV2(e *blockExecutor, bind *RawData, chunk *Chunk, ref uint64) (*RawData, uint64, error) { - return _dictWhereV2(e, bind, chunk, ref, true) +func dictWhereNot(e *blockExecutor, bind *RawData, chunk *Chunk, ref uint64) (*RawData, uint64, error) { + return _dictWhere(e, bind, chunk, ref, true) +} + +func filterList(e *blockExecutor, list []any, chunk *Chunk, ref uint64, invert bool) ([]any, uint64, error) { + arg1 := chunk.Function.Args[1] + fref, ok := arg1.RefV2() + if !ok { + return nil, 0, errors.New("failed to retrieve function reference (in dict list query)") + } + + dref, err := e.ensureArgsResolved(chunk.Function.Args[2:], ref) + if dref != 0 || err != nil { + return nil, dref, err + } + + argsList := make([][]*RawData, len(list)) + for i, value := range list { + argsList[i] = []*RawData{ + { + Type: types.Dict, + Value: i, + }, + { + Type: types.Dict, + Value: value, + }, + } + } + + var res []any + err = e.runFunctionBlocks(argsList, fref, func(results []arrayBlockCallResult, errs []error) { + resList := []any{} + for i, res := range results { + if res.isTruthy() == !invert { + key := argsList[i][0].Value.(int) + resList = append(resList, list[key]) + } + } + res = resList + }) + + if err != nil { + return nil, 0, err + } + + return res, 0, nil +} + +// The recurse function only works on lists and maps. It traverses child structures +// and finds any objects that match the given search function +func _dictRecurse(e *blockExecutor, bind *RawData, chunk *Chunk, ref uint64, invert bool) (*RawData, uint64, error) { + itemsRef := chunk.Function.Args[0] + items, rref, err := e.resolveValue(itemsRef, ref) + if err != nil || rref > 0 { + return nil, rref, err + } + + if items.Value == nil { + return &RawData{Type: items.Type}, 0, nil + } + + remaining := []any{items.Value} + res := []any{} + var candidate any + for len(remaining) != 0 { + candidate = remaining[0] + remaining = remaining[1:] + + if candidate == nil { + continue + } + + var list []any + if x, ok := candidate.([]any); ok { + list = x + } else if x, ok := candidate.(map[string]any); ok { + list = make([]any, len(x)) + i := 0 + for _, v := range x { + list[i] = v + i++ + } + } + + if len(list) == 0 { + continue + } + + partial, dref, err := filterList(e, list, chunk, ref, invert) + if dref != 0 || err != nil { + return nil, dref, err + } + res = append(res, partial...) + + // we only add items to the remaining, that actually match our type requirements + // (this is trying to keep additional memory juggling to a minimum, instead + // of doing this one level deeper) + for i := range list { + li := list[i] + if x, ok := li.([]any); ok { + remaining = append(remaining, x) + } else if x, ok := li.(map[string]any); ok { + remaining = append(remaining, x) + } + } + } + + data := &RawData{ + Type: types.Dict, + Value: res, + } + e.cache.Store(ref, &stepCache{ + Result: data, + IsStatic: false, + }) + + return data, 0, nil +} + +func dictRecurse(e *blockExecutor, bind *RawData, chunk *Chunk, ref uint64) (*RawData, uint64, error) { + return _dictRecurse(e, bind, chunk, ref, false) } func dictAllV2(e *blockExecutor, bind *RawData, chunk *Chunk, ref uint64) (*RawData, uint64, error) { diff --git a/mqlc/builtin.go b/mqlc/builtin.go index efc4532f41..9894a60dbb 100644 --- a/mqlc/builtin.go +++ b/mqlc/builtin.go @@ -78,6 +78,7 @@ func init() { "first": {typ: dictType, signature: FunctionSignature{}}, "last": {typ: dictType, signature: FunctionSignature{}}, "where": {compile: compileDictWhere, signature: FunctionSignature{Required: 1, Args: []types.Type{types.FunctionLike}}}, + "recurse": {compile: compileDictRecurse, signature: FunctionSignature{Required: 1, Args: []types.Type{types.FunctionLike}}}, "contains": {compile: compileDictContains, typ: boolType, signature: FunctionSignature{Required: 1, Args: []types.Type{types.FunctionLike}}}, "in": {typ: boolType, signature: FunctionSignature{Required: 1, Args: []types.Type{types.Array(types.String)}}}, "containsOnly": {compile: compileDictContainsOnly, signature: FunctionSignature{Required: 1, Args: []types.Type{types.FunctionLike}}}, diff --git a/mqlc/builtin_map.go b/mqlc/builtin_map.go index 9f32476881..5b6df19116 100644 --- a/mqlc/builtin_map.go +++ b/mqlc/builtin_map.go @@ -11,7 +11,7 @@ import ( "go.mondoo.com/cnquery/v10/types" ) -func compileDictWhere(c *compiler, typ types.Type, ref uint64, id string, call *parser.Call) (types.Type, error) { +func compileDictQuery(c *compiler, typ types.Type, ref uint64, id string, call *parser.Call) (types.Type, error) { if call == nil { return types.Nil, errors.New("missing filter argument for calling '" + id + "'") } @@ -129,8 +129,16 @@ func compileDictWhere(c *compiler, typ types.Type, ref uint64, id string, call * return typ, nil } +func compileDictWhere(c *compiler, typ types.Type, ref uint64, id string, call *parser.Call) (types.Type, error) { + return compileDictQuery(c, typ, ref, id, call) +} + +func compileDictRecurse(c *compiler, typ types.Type, ref uint64, id string, call *parser.Call) (types.Type, error) { + return compileDictQuery(c, typ, ref, "recurse", call) +} + func compileDictContains(c *compiler, typ types.Type, ref uint64, id string, call *parser.Call) (types.Type, error) { - _, err := compileDictWhere(c, typ, ref, "where", call) + _, err := compileDictQuery(c, typ, ref, "where", call) if err != nil { return types.Nil, err } @@ -326,7 +334,7 @@ func compileDictContainsNone(c *compiler, typ types.Type, ref uint64, id string, } func compileDictAll(c *compiler, typ types.Type, ref uint64, id string, call *parser.Call) (types.Type, error) { - _, err := compileDictWhere(c, typ, ref, "$whereNot", call) + _, err := compileDictQuery(c, typ, ref, "$whereNot", call) if err != nil { return types.Nil, err } @@ -352,7 +360,7 @@ func compileDictAll(c *compiler, typ types.Type, ref uint64, id string, call *pa } func compileDictAny(c *compiler, typ types.Type, ref uint64, id string, call *parser.Call) (types.Type, error) { - _, err := compileDictWhere(c, typ, ref, "where", call) + _, err := compileDictQuery(c, typ, ref, "where", call) if err != nil { return types.Nil, err } @@ -378,7 +386,7 @@ func compileDictAny(c *compiler, typ types.Type, ref uint64, id string, call *pa } func compileDictOne(c *compiler, typ types.Type, ref uint64, id string, call *parser.Call) (types.Type, error) { - _, err := compileDictWhere(c, typ, ref, "where", call) + _, err := compileDictQuery(c, typ, ref, "where", call) if err != nil { return types.Nil, err } @@ -404,7 +412,7 @@ func compileDictOne(c *compiler, typ types.Type, ref uint64, id string, call *pa } func compileDictNone(c *compiler, typ types.Type, ref uint64, id string, call *parser.Call) (types.Type, error) { - _, err := compileDictWhere(c, typ, ref, "where", call) + _, err := compileDictQuery(c, typ, ref, "where", call) if err != nil { return types.Nil, err } diff --git a/providers-sdk/v1/testutils/testdata/arch.json b/providers-sdk/v1/testutils/testdata/arch.json index 4afec784a7..aad2905db3 100644 --- a/providers-sdk/v1/testutils/testdata/arch.json +++ b/providers-sdk/v1/testutils/testdata/arch.json @@ -133,7 +133,7 @@ "Fields": { "content": { "type": "\u0007", - "value": "{\n \"_\": null,\n \"true\": true,\n \"1\": 1,\n \"1.0\": 1.0,\n \"int-array\": [1,2,3],\n \"dict\": {\n \"ee\": 3,\n \"ej\": 4,\n \"ek\": 5\n },\n \"f\": [{\"ff\": 3}],\n \"string-array\": [\"a\", \"b\", \"c\"],\n \"hello\": \"hello\",\n \"date\": \"2016-01-28T23:02:24Z\",\n \"aoa\": [[1, 2], 3]\n}\n" + "value": "{\n \"_\": null,\n \"true\": true,\n \"1\": 1,\n \"1.0\": 1.0,\n \"int-array\": [1,2,3],\n \"dict\": {\n \"ee\": 3,\n \"ej\": 4,\n \"ek\": 5\n },\n \"f\": [{\"ff\": 3}],\n \"string-array\": [\"a\", \"b\", \"c\"],\n \"hello\": \"hello\",\n \"date\": \"2016-01-28T23:02:24Z\",\n \"aoa\": [[1, 2], 3],\n \"users\": [\n {\n \"name\": \"yor\",\n \"children\": [\n {\"name\": \"anya\"}\n ]\n },\n {\"name\": \"loid\"}\n ],\n \"zzzlast\": \"🌒\"}\n" }, "path": { "type": "\u0007", diff --git a/providers/os/resources/mql_test.go b/providers/os/resources/mql_test.go index 14690204f6..b725a74689 100644 --- a/providers/os/resources/mql_test.go +++ b/providers/os/resources/mql_test.go @@ -71,7 +71,7 @@ func TestOS_Vars(t *testing.T) { x.TestSimple(t, []testutils.SimpleTest{ { Code: "p = file('/dummy.json'); parse.json(file: p).params.length", - Expectation: int64(11), + Expectation: int64(13), }, }) } @@ -97,15 +97,15 @@ func TestMap(t *testing.T) { }, { Code: "parse.json('/dummy.json').params.length", - Expectation: int64(11), + Expectation: int64(13), }, { Code: "parse.json('/dummy.json').params.keys.length", - Expectation: int64(11), + Expectation: int64(13), }, { Code: "parse.json('/dummy.json').params.values.length", - Expectation: int64(11), + Expectation: int64(13), }, { Code: "parse.json('/dummy.json').params { _['Protocol'] != 1 }", @@ -383,6 +383,10 @@ func TestDict_Methods_Map(t *testing.T) { Code: p + "params['string-array'].where(_ == 'a')", Expectation: []interface{}{"a"}, }, + { + Code: p + "params.users.recurse(name != empty).map(name)", + Expectation: []any{"yor", "loid", "anya"}, + }, { Code: p + "params['string-array'].in(['a', 'b', 'c'])", Expectation: true, @@ -457,7 +461,7 @@ func TestDict_Methods_Map(t *testing.T) { }, { Code: p + "params.last", - Expectation: true, + Expectation: "🌒", }, { Code: p + "params['aoa'].flat",