-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
From the code docs: Assets are generally structured in a giant graph. However, we often find it difficult to reason with arbitrary graphs. As humans, we tend to group assets into hierarchical tree structures, that make it easy for us to put them into a box and reason about them. For example: A techology-centric view of the world would group a VM in a cloud environment like this: /aws/accountX/ec2/instances/linux/debian/8.0 Every entry in this path structure follows a strict schema. Thus "aws" above is the chosen path value for the key "technology". As you can see, some keys lead to predefined (limited) values (technology can be aws, azure, os, k8s, etc), while other keys can have (almost) arbitrary values (eg account). Providers create this schema and may extend this schema. Providers cannot create conflicting entries in this schema. Assets can belong to multiple URLs at the same time, which allows us to look at it from different perspectives. URLs enable fast lookup, but do not restrict in terms of the search. This support looking at e.g. linux instances on all kinds of environments and runtimes. Signed-off-by: Dominik Richter <[email protected]>
- Loading branch information
Showing
5 changed files
with
1,013 additions
and
349 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,366 @@ | ||
// Copyright (c) Mondoo, Inc. | ||
// SPDX-License-Identifier: BUSL-1.1 | ||
|
||
package inventory | ||
|
||
import ( | ||
"errors" | ||
"regexp" | ||
"sort" | ||
"strings" | ||
) | ||
|
||
// # AssetUrl | ||
// | ||
// Assets are generally structured in a giant graph. However, we often find | ||
// it difficult to reason with arbitrary graphs. As humans, we tend to | ||
// group assets into hierarchical tree structures, that make it easy for us | ||
// to put them into a box and reason about them. | ||
// | ||
// For example: A techology-centric view of the world would group a VM | ||
// in a cloud environment like this: | ||
// /aws/accountX/ec2/instances/linux/debian/8.0 | ||
// | ||
// Every entry in this path structure follows a strict schema. Thus "aws" above | ||
// is the chosen path value for the key "technology". As you can see, some | ||
// keys lead to predefined (limited) values (technology can be aws, azure, os, | ||
// k8s, etc), while other keys can have (almost) arbitrary values (eg account). | ||
// | ||
// Providers create this schema and may extend this schema. Providers cannot | ||
// create conflicting entries in this schema. | ||
// | ||
// Assets can belong to multiple URLs at the same time, which allows us to | ||
// look at it from different perspectives. | ||
// | ||
// URLs enable fast lookup, but do not restrict in terms of the search. | ||
// This support looking at e.g. linux instances on all kinds of environments | ||
// and runtimes. | ||
|
||
// AssetUrlSchema defines the structure for an AssetUrl. | ||
type AssetUrlSchema struct { | ||
root *AssetUrlBranch | ||
|
||
// Possible keys that exist at any layer in this structure | ||
keys map[string][]*AssetUrlBranch | ||
} | ||
|
||
type KV struct { | ||
Key string | ||
Value string | ||
} | ||
|
||
type AssetUrlChain []KV | ||
|
||
func NewAssetUrlChain(segments []string) ([]KV, error) { | ||
res := make([]KV, len(segments)) | ||
for i, segment := range segments { | ||
if len(segment) > ASSETURL_MAX_KEY_CHARS+ASSETURL_MAX_VALUE_CHARS { | ||
return nil, errors.New("asset url path segment is too long") | ||
} | ||
KVs := strings.Split(segment, "=") | ||
if len(KVs) != 2 { | ||
return nil, errors.New("asset url path segment must be formatted as key=value") | ||
} | ||
res[i].Key = KVs[0] | ||
res[i].Value = KVs[1] | ||
} | ||
return res, nil | ||
} | ||
|
||
const ( | ||
ASSETURL_MAX_DEPTH = 100 | ||
ASSETURL_MAX_KEY_CHARS = 100 | ||
ASSETURL_MAX_VALUE_CHARS = 200 | ||
) | ||
|
||
var ( | ||
assetUrlKeyRegex = regexp.MustCompile("^[a-z0-9_-]+$") | ||
assetUrlValueRegex = regexp.MustCompile("^[A-Za-z0-9_ .-]+$") | ||
) | ||
|
||
func validateKey(key string) error { | ||
if len(key) > ASSETURL_MAX_KEY_CHARS { | ||
return errors.New("asset url branch key is too long: " + key[0:100] + "...") | ||
} | ||
if key == "" { | ||
return errors.New("asset url branch key cannot be empty") | ||
} | ||
if !assetUrlKeyRegex.MatchString(key) { | ||
return errors.New("asset url branch key '" + key + "' must only contain valid characters: " + assetUrlKeyRegex.String()) | ||
} | ||
return nil | ||
} | ||
|
||
func validateValue(value string) error { | ||
if len(value) > ASSETURL_MAX_VALUE_CHARS { | ||
return errors.New("asset url branch value is too long: " + value[0:100] + "...") | ||
} | ||
if value == "" { | ||
return errors.New("asset url branch value cannot be empty") | ||
} | ||
if value == "*" { | ||
return nil | ||
} | ||
if !assetUrlValueRegex.MatchString(value) { | ||
return errors.New("asset url branch value '" + value + "' must only contain valid characters: " + assetUrlKeyRegex.String()) | ||
} | ||
return nil | ||
} | ||
|
||
func newAssetUrlSchema(rootKey string) (*AssetUrlSchema, error) { | ||
if err := validateKey(rootKey); err != nil { | ||
return nil, err | ||
} | ||
|
||
return &AssetUrlSchema{ | ||
root: &AssetUrlBranch{ | ||
Key: rootKey, | ||
Values: map[string]*AssetUrlBranch{}, | ||
Depth: 1, | ||
}, | ||
}, nil | ||
} | ||
|
||
func (a *AssetUrlSchema) Add(branch *AssetUrlBranch) error { | ||
if branch == nil { | ||
return errors.New("cannot attach empty asset url branch") | ||
} | ||
if len(branch.Path) == 0 { | ||
return errors.New("don't know where to attach asset url branch") | ||
} | ||
|
||
urlChain, err := NewAssetUrlChain(branch.Path) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
found, lastKey, err := a.root.FindPath(urlChain) | ||
if err != nil { | ||
return errors.New("failed to add: " + err.Error()) | ||
} | ||
|
||
if found == nil { | ||
return errors.New("failed to attach asset url branch to any existing subtree for: " + strings.Join(branch.Path, "/")) | ||
} | ||
|
||
if err = branch.validate(); err != nil { | ||
return errors.New("failed to add url branch: " + err.Error()) | ||
} | ||
|
||
branch.setDepth(found.Depth + 1) | ||
found.Values[lastKey] = branch | ||
return nil | ||
} | ||
|
||
func (a *AssetUrlBranch) setDepth(i uint32) { | ||
a.Depth = i | ||
next := i + 1 | ||
for _, v := range a.Values { | ||
if v != nil { | ||
v.setDepth(next) | ||
} | ||
} | ||
} | ||
|
||
func (a *AssetUrlBranch) validate() error { | ||
branches := []*AssetUrlBranch{a} | ||
i := 0 | ||
for i < len(branches) { | ||
branch := branches[i] | ||
i++ | ||
|
||
if len(branch.Reference) != 0 { | ||
if len(branch.Key) != 0 { | ||
return errors.New("asset url segment with reference cannot have a key set") | ||
} | ||
if len(branch.Values) != 0 { | ||
return errors.New("asset url segment with reference cannot have values set") | ||
} | ||
continue | ||
} | ||
|
||
if err := validateKey(branch.Key); err != nil { | ||
return err | ||
} | ||
|
||
for value, next := range branch.Values { | ||
if err := validateValue(value); err != nil { | ||
return err | ||
} | ||
if next != nil { | ||
branches = append(branches, next) | ||
} | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (a *AssetUrlBranch) FindPath(path AssetUrlChain) (*AssetUrlBranch, string, error) { | ||
if len(path) > ASSETURL_MAX_DEPTH { | ||
return nil, "", errors.New("asset url branch path is too long") | ||
} | ||
|
||
curBranch := a | ||
for segmentIdx, segment := range path { | ||
key := segment.Key | ||
if key != curBranch.Key { | ||
return nil, "", errors.New("asset url path key is invalid (expected '" + curBranch.Key + "', got '" + key + "')") | ||
} | ||
|
||
value := segment.Value | ||
if err := validateValue(value); err != nil { | ||
return nil, "", err | ||
} | ||
|
||
// ending condition on the last element | ||
if segmentIdx == len(path)-1 { | ||
return curBranch, value, nil | ||
} | ||
|
||
if curBranch.Values == nil { | ||
return nil, "", errors.New("asset url search ended prematurely, no more keys in this chain") | ||
} | ||
|
||
branch, ok := curBranch.Values[value] | ||
if !ok { | ||
return nil, "", errors.New("cannot find asset url branch for '" + key + "=" + value + "'") | ||
} | ||
if branch == nil { | ||
return nil, "", errors.New("ran into premature end for asset url branch '" + key + "=" + value + "'") | ||
} | ||
curBranch = branch | ||
} | ||
|
||
return curBranch, "", nil | ||
} | ||
|
||
func (a *AssetUrlSchema) cloneBranch(branch *AssetUrlBranch, depth uint32, isDereferenced bool) (*AssetUrlBranch, error) { | ||
if depth > 1000 { | ||
return nil, errors.New("maximum depth reached for asset url during clone (look for circular branch references)") | ||
} | ||
|
||
if len(branch.Reference) != 0 { | ||
if isDereferenced { | ||
return nil, errors.New("dereferenced an asset url branch with more references (reference to = '" + strings.Join(branch.Reference, "/") + "')") | ||
} | ||
|
||
urlChain, err := NewAssetUrlChain(branch.Reference) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
found, lastKey, err := a.root.FindPath(urlChain) | ||
if err != nil { | ||
return nil, errors.New("failed to add asset url reference: " + err.Error()) | ||
} | ||
|
||
branch = found.Values[lastKey] | ||
return a.cloneBranch(branch, depth, true) | ||
} | ||
|
||
res := &AssetUrlBranch{ | ||
Key: branch.Key, | ||
Title: branch.Title, | ||
Values: make(map[string]*AssetUrlBranch, len(branch.Values)), | ||
Depth: depth, | ||
} | ||
|
||
for k, v := range branch.Values { | ||
if v == nil { | ||
res.Values[k] = nil | ||
continue | ||
} | ||
|
||
b, err := a.cloneBranch(v, depth+1, false) | ||
if err != nil { | ||
return nil, err | ||
} | ||
b.ParentValue = k | ||
b.Parent = res | ||
res.Values[k] = b | ||
} | ||
|
||
return res, nil | ||
} | ||
|
||
func (a *AssetUrlSchema) RefreshCache() error { | ||
a.keys = map[string][]*AssetUrlBranch{} | ||
|
||
branches := []*AssetUrlBranch{a.root} | ||
i := 0 | ||
for i < len(branches) { | ||
branch := branches[i] | ||
i++ | ||
|
||
if len(branch.Reference) != 0 { | ||
res, err := a.cloneBranch(branch, branch.Depth, false) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
branch.Key = res.Key | ||
branch.Title = res.Title | ||
branch.Values = res.Values | ||
} | ||
|
||
a.keys[branch.Key] = append(a.keys[branch.Key], branch) | ||
|
||
for k, next := range branch.Values { | ||
if next != nil { | ||
next.Parent = branch | ||
next.ParentValue = k | ||
branches = append(branches, next) | ||
} | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (a *AssetUrlSchema) BuildQueries(kvs []KV) []AssetUrlChain { | ||
var nodes []*AssetUrlBranch | ||
var values []string | ||
for i := range kvs { | ||
kv := kvs[i] | ||
nuNodes := a.keys[kv.Key] | ||
nodes = append(nodes, nuNodes...) | ||
for i := 0; i < len(nuNodes); i++ { | ||
values = append(values, kv.Value) | ||
} | ||
} | ||
|
||
sort.SliceStable(nodes, func(i, j int) bool { | ||
return nodes[i].Depth < nodes[j].Depth | ||
}) | ||
|
||
var res []AssetUrlChain | ||
for len(nodes) != 0 { | ||
lastIdx := len(nodes) - 1 | ||
cur := nodes[lastIdx] | ||
curKey := values[lastIdx] | ||
nodes = nodes[:lastIdx] | ||
values = values[:lastIdx] | ||
|
||
res = append(res, buildParentQuery(cur, curKey)) | ||
} | ||
return res | ||
} | ||
|
||
func buildParentQuery(leaf *AssetUrlBranch, value string) AssetUrlChain { | ||
res := make([]KV, leaf.Depth) | ||
|
||
cur := leaf | ||
curValue := value | ||
for cur != nil { | ||
res[cur.Depth-1] = KV{ | ||
Key: cur.Key, | ||
Value: curValue, | ||
} | ||
|
||
curValue = cur.ParentValue | ||
cur = cur.Parent | ||
} | ||
|
||
return res | ||
} |
Oops, something went wrong.