diff --git a/go.mod b/go.mod index bb4f1e063..31b65e7f8 100644 --- a/go.mod +++ b/go.mod @@ -73,6 +73,7 @@ require ( require ( connectrpc.com/grpchealth v1.3.0 // indirect connectrpc.com/otelconnect v0.7.0 // indirect + github.com/alecthomas/participle v0.7.1 // indirect github.com/bobg/go-generics/v2 v2.1.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/google/s2a-go v0.1.7 // indirect diff --git a/go.sum b/go.sum index 02c29b97d..5da2f9c9b 100644 --- a/go.sum +++ b/go.sum @@ -115,6 +115,9 @@ github.com/abourget/llerrgroup v0.2.0/go.mod h1:QukSa1Sim/0R4aRlWdiBdAy+0i1PBfOd github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= github.com/alecthomas/gometalinter v2.0.11+incompatible/go.mod h1:qfIpQGGz3d+NmgyPBqv+LSh50emm1pt72EtcX2vKYQk= +github.com/alecthomas/participle v0.7.1 h1:2bN7reTw//5f0cugJcTOnY/NYZcWQOaajW+BwZB5xWs= +github.com/alecthomas/participle v0.7.1/go.mod h1:HfdmEuwvr12HXQN44HPWXR0lHmVolVYe4dyL6lQ3duY= +github.com/alecthomas/repr v0.0.0-20181024024818-d37bc2a10ba1/go.mod h1:xTS7Pm1pD1mvyM075QCDSRqH6qRLXylzS24ZTpRiSzQ= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= diff --git a/pb/generate.sh b/pb/generate.sh index f1f6bbb65..db4dc4376 100755 --- a/pb/generate.sh +++ b/pb/generate.sh @@ -1,5 +1,5 @@ #!/bin/bash -u -# Copyright 2019 dfuse Platform Inc. +# Copyright 2024 StreamingFast Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/sqe/api.go b/sqe/api.go new file mode 100644 index 000000000..b793304f4 --- /dev/null +++ b/sqe/api.go @@ -0,0 +1,54 @@ +package sqe + +import ( + "context" + "fmt" +) + +// FindAllFieldNames returns all used field names in the AST. There +// is **NO** ordering on the elements, i.e. they might not come in the +// same order specified in the AST. +func ExtractAllKeys(expression Expression) (out []string) { + uniqueFieldNames := map[string]bool{} + onExpression := func(_ context.Context, expr Expression) error { + if v, ok := expr.(*KeyTerm); ok { + uniqueFieldNames[v.Value.Value] = true + } + + return nil + } + + visitor := NewDepthFirstVisitor(nil, onExpression) + expression.Visit(context.Background(), visitor) + + i := 0 + out = make([]string, len(uniqueFieldNames)) + for fieldName := range uniqueFieldNames { + out[i] = fieldName + i++ + } + + return +} + +func TransformExpression(expr Expression, transformer FieldTransformer) error { + if transformer == nil { + return nil + } + + onExpression := func(_ context.Context, expr Expression) error { + v, ok := expr.(*KeyTerm) + if !ok { + return nil + } + + if err := transformer.TransformStringLiteral("", v.Value); err != nil { + return fmt.Errorf("key %q transformation failed: %s", v.Value.Value, err) + } + + return nil + } + + visitor := NewDepthFirstVisitor(nil, onExpression) + return expr.Visit(context.Background(), visitor) +} diff --git a/sqe/api_test.go b/sqe/api_test.go new file mode 100644 index 000000000..08fdeaa52 --- /dev/null +++ b/sqe/api_test.go @@ -0,0 +1,150 @@ +// Copyright 2024 StreamingFast Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sqe + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// func TestExpressionToBleveQuery(t *testing.T) { +// tests := []struct { +// in string +// expectBleve string +// }{ +// { +// in: "account:eoscanadacom", +// expectBleve: `{"term":"eoscanadacom","field":"account"}`, +// }, +// { +// in: "data.active:true", +// expectBleve: `{"bool":true,"field":"data.active"}`, +// }, +// { +// in: "data.active:false", +// expectBleve: `{"bool":false,"field":"data.active"}`, +// }, +// { +// in: `data.active:"true"`, +// expectBleve: `{"term":"true","field":"data.active"}`, +// }, +// { +// in: "receiver:eoscanadacom account:eoscanadacom", +// expectBleve: `{"conjuncts":[{"term":"eoscanadacom","field":"receiver"},{"term":"eoscanadacom","field":"account"}]}`, +// }, +// { +// in: "account:eoscanadacom receiver:eoscanadacom", +// expectBleve: `{"conjuncts":[{"term":"eoscanadacom","field":"account"},{"term":"eoscanadacom","field":"receiver"}]}`, +// }, +// { +// in: "receiver:eoscanadacom (action:transfer OR action:issue)", +// expectBleve: `{"conjuncts":[{"term":"eoscanadacom","field":"receiver"},{"disjuncts":[{"term":"transfer","field":"action"},{"term":"issue","field":"action"}],"min":1}]}`, +// }, +// { +// in: "receiver:eoscanadacom -(action:transfer OR action:issue)", +// expectBleve: `{"conjuncts":[{"term":"eoscanadacom","field":"receiver"},{"must_not":{"disjuncts":[{"disjuncts":[{"term":"transfer","field":"action"},{"term":"issue","field":"action"}],"min":1}],"min":0}}]}`, +// }, +// { +// in: "-receiver:eoscanadacom (action:transfer OR action:issue)", +// expectBleve: `{"conjuncts":[{"must_not":{"disjuncts":[{"term":"eoscanadacom","field":"receiver"}],"min":0}},{"disjuncts":[{"term":"transfer","field":"action"},{"term":"issue","field":"action"}],"min":1}]}`, +// }, +// { +// in: "-action:patate", +// expectBleve: `{"must_not":{"disjuncts":[{"term":"patate","field":"action"}],"min":0}}`, +// }, +// { +// in: "receiver:eoscanadacom (action:transfer OR action:issue) account:eoscanadacom (data.from:eoscanadacom OR data.to:eoscanadacom)", +// expectBleve: `{ +// "conjuncts": [ +// { "term": "eoscanadacom", "field": "receiver" }, +// { "disjuncts": [ +// { "term": "transfer", "field": "action" }, +// { "term": "issue", "field": "action" } +// ], "min": 1 +// }, +// { "term": "eoscanadacom", "field": "account" }, +// { "disjuncts": [ +// { "term": "eoscanadacom", "field": "data.from" }, +// { "term": "eoscanadacom", "field": "data.to" } +// ], "min": 1 +// } +// ] +// }`, +// }, +// } + +// for idx, test := range tests { +// t.Run(fmt.Sprintf("index %d", idx+1), func(t *testing.T) { +// ast, err := Parse(context.Background(), test.in) +// require.NoError(t, err) + +// res := ExpressionToBleve(ast) + +// cnt, err := json.Marshal(res) +// require.NoError(t, err) +// assert.JSONEq(t, test.expectBleve, string(cnt), "Failed on SQE %q, got %s", test.in, string(cnt)) +// }) +// } +// } + +func TestExtractAllKeys(t *testing.T) { + tests := []struct { + in string + expectedKeys []string + }{ + { + "account", + []string{"account"}, + }, + { + "data.active", + []string{"data.active"}, + }, + { + "data.active", + []string{"data.active"}, + }, + { + `"data.active"`, + []string{"data.active"}, + }, + { + "receiver account", + []string{"receiver", "account"}, + }, + { + "receiver (action || action)", + []string{"receiver", "action"}, + }, + { + "receiver (action || action) account (data.from || data.to)", + []string{"receiver", "action", "account", "data.from", "data.to"}, + }, + } + + for idx, test := range tests { + t.Run(fmt.Sprintf("index %d", idx+1), func(t *testing.T) { + ast, err := Parse(context.Background(), test.in) + require.NoError(t, err) + + actuals := ExtractAllKeys(ast) + assert.ElementsMatch(t, test.expectedKeys, actuals, "Mistmatch for SQE %q", test.in) + }) + } +} diff --git a/sqe/errors.go b/sqe/errors.go new file mode 100644 index 000000000..52526d38f --- /dev/null +++ b/sqe/errors.go @@ -0,0 +1,39 @@ +package sqe + +import ( + "fmt" + + lex "github.com/alecthomas/participle/lexer" +) + +type ParseError struct { + message string + position lex.Position +} + +func parserError(message string, position lex.Position) *ParseError { + return &ParseError{ + message: message, + position: position, + } +} + +func rangeParserError(message string, start lex.Position, end lex.Position) *ParseError { + return &ParseError{ + message: message, + position: lex.Position{ + Filename: start.Filename, + Offset: start.Offset, + Line: start.Line, + Column: end.Column, + }, + } +} + +func (e *ParseError) Error() string { + if e.position.Line <= 1 { + return fmt.Sprintf("%s at column %d", e.message, e.position.Offset) + } + + return fmt.Sprintf("%s at line %d column %d", e.message, e.position.Line, e.position.Column) +} diff --git a/sqe/init_test.go b/sqe/init_test.go new file mode 100644 index 000000000..24d609881 --- /dev/null +++ b/sqe/init_test.go @@ -0,0 +1,79 @@ +package sqe + +import ( + "context" + "fmt" + "io" + "strings" +) + +func expressionToString(expression Expression) string { + builder := &strings.Builder{} + visitor := &TestVisitor{ + writer: builder, + } + + expression.Visit(context.Background(), visitor) + + return builder.String() +} + +type TestVisitor struct { + writer io.Writer +} + +func (v *TestVisitor) Visit_And(ctx context.Context, e *AndExpression) error { + return v.visit_binary(ctx, "<", "&&", ">", e.Children) +} + +func (v *TestVisitor) Visit_Or(ctx context.Context, e *OrExpression) error { + return v.visit_binary(ctx, "[", "||", "]", e.Children) +} + +func (v *TestVisitor) visit_binary(ctx context.Context, opStart, op, opEnd string, children []Expression) error { + v.print(opStart) + + for i, child := range children { + if i != 0 { + v.print(" %s ", op) + } + + child.Visit(ctx, v) + } + v.print(opEnd) + + return nil +} + +func (v *TestVisitor) Visit_Parenthesis(ctx context.Context, e *ParenthesisExpression) error { + v.print("(") + e.Child.Visit(ctx, v) + v.print(")") + + return nil +} + +func (v *TestVisitor) Visit_Not(ctx context.Context, e *NotExpression) error { + v.print("!") + e.Child.Visit(ctx, v) + + return nil +} + +func (v *TestVisitor) Visit_KeyTerm(ctx context.Context, e *KeyTerm) error { + v.printStringLiteral(e.Value) + return nil +} + +func (v *TestVisitor) printStringLiteral(literal *StringLiteral) error { + if literal.QuotingChar != "" { + return v.print("%s%s%s", literal.QuotingChar, literal.Value, literal.QuotingChar) + } + + return v.print(literal.Value) +} + +func (v *TestVisitor) print(message string, args ...interface{}) error { + fmt.Fprintf(v.writer, message, args...) + return nil +} diff --git a/sqe/lexer.go b/sqe/lexer.go new file mode 100644 index 000000000..331cf7157 --- /dev/null +++ b/sqe/lexer.go @@ -0,0 +1,111 @@ +package sqe + +import ( + "fmt" + "io" + + lex "github.com/alecthomas/participle/lexer" +) + +type lexer struct { + *lex.PeekingLexer + + symbols map[rune]string +} + +func newLexer(reader io.Reader) (*lexer, error) { + l, err := lexerDefinition.Lex(reader) + if err != nil { + return nil, fmt.Errorf("new lexer: %s", err) + } + + peekingLexer, err := lex.Upgrade(l) + if err != nil { + return nil, fmt.Errorf("peekable lexer: %s", err) + } + + return &lexer{ + PeekingLexer: peekingLexer, + symbols: lex.SymbolsByRune(lexerDefinition), + }, nil +} + +func (l *lexer) skipSpaces() { + for { + token, err := l.Peek(0) + if err != nil || !l.isSpace(token) { + return + } + + l.mustLexNext() + } +} + +func (l *lexer) mustLexNext() lex.Token { + token, err := l.Next() + if err != nil { + panic(err) + } + + return token +} + +func (l *lexer) peekPos() lex.Position { + peek, err := l.Peek(0) + if err != nil { + return lex.Position{Filename: "", Line: 1, Offset: l.PeekingLexer.Length() - 1, Column: l.PeekingLexer.Length()} + } + + return peek.Pos +} + +var lexerDefinition = lex.Must(lex.Regexp( + `(?m)` + + `(?P"|')` + + // `|(?P,)` + + `|(?P\-)` + + `|(?P\|\|)` + + `|(?P&&)` + + `|(?P\()` + + `|(?P\))` + + // `|(?P\[)` + + // `|(?P\])` + + `|(?P[^\s'"\-\(\)][^\s'"\(\)]*)` + + `|(?P\s+)`, +)) + +func (l *lexer) isSpace(t lex.Token) bool { return l.isTokenType(t, "Space") } +func (l *lexer) isQuoting(t lex.Token) bool { return l.isTokenType(t, "Quoting") } + +// func (l *lexer) isColon(t lex.Token) bool { return l.isTokenType(t, "Colon") } +// func (l *lexer) isComma(t lex.Token) bool { return l.isTokenType(t, "Comma") } +func (l *lexer) isNotOperator(t lex.Token) bool { return l.isTokenType(t, "NotOperator") } +func (l *lexer) isOrOperator(t lex.Token) bool { return l.isTokenType(t, "OrOperator") } +func (l *lexer) isAndOperator(t lex.Token) bool { return l.isTokenType(t, "AndOperator") } +func (l *lexer) isLeftParenthesis(t lex.Token) bool { return l.isTokenType(t, "LeftParenthesis") } +func (l *lexer) isRightParenthesis(t lex.Token) bool { return l.isTokenType(t, "RightParenthesis") } + +// func (l *lexer) isLeftSquareBracket(t lex.Token) bool { return l.isTokenType(t, "LeftSquareBracket") } +// func (l *lexer) isRightSquareBracket(t lex.Token) bool { return l.isTokenType(t, "RightSquareBracket") } +func (l *lexer) isName(t lex.Token) bool { return l.isTokenType(t, "Name") } + +func (l *lexer) isTokenType(token lex.Token, expectedType string) bool { + return l.symbols[token.Type] == expectedType +} + +func (l *lexer) isBinaryOperator(t lex.Token) bool { + return l.isAnyTokenType(t, "AndOperator", "OrOperator") +} + +func (l *lexer) isAnyTokenType(token lex.Token, expectedTypes ...string) bool { + for _, expectedType := range expectedTypes { + if l.symbols[token.Type] == expectedType { + return true + } + } + return false +} + +func (l *lexer) getTokenType(token lex.Token) string { + return l.symbols[token.Type] +} diff --git a/sqe/lexer_test.go b/sqe/lexer_test.go new file mode 100644 index 000000000..00406eb8c --- /dev/null +++ b/sqe/lexer_test.go @@ -0,0 +1,58 @@ +package sqe + +import ( + "bytes" + "fmt" + "testing" + + lex "github.com/alecthomas/participle/lexer" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLexer(t *testing.T) { + tests := []struct { + name string + sqe string + tokens []string + }{ + {"minus_followed_by_name", `-token`, []string{"NotOperator", "Name", "EOF"}}, + + {"name_with_inside_minus", `open-token`, []string{"Name", "EOF"}}, + + // {"legacy_and", `AND`, []string{"AndOperator", "EOF"}}, + {"new_and", `&&`, []string{"AndOperator", "EOF"}}, + + // {"legacy_or", `OR`, []string{"OrOperator", "EOF"}}, + {"new_or", `||`, []string{"OrOperator", "EOF"}}, + + {"quoting characters start", `'some "some`, []string{"Quoting", "Name", "Space", "Quoting", "Name", "EOF"}}, + {"quoting characters end", `some' some"`, []string{"Name", "Quoting", "Space", "Name", "Quoting", "EOF"}}, + + // {"square_brackets", `[field, "double quoted"]`, []string{"LeftSquareBracket", "Name", "Comma", "Space", "Quoting", "Name", "Space", "Name", "Quoting", "RightSquareBracket", "EOF"}}, + + {"expresion_with_and", `action && field`, []string{"Name", "Space", "AndOperator", "Space", "Name", "EOF"}}, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + actual := tokensList(t, test.sqe) + + assert.Equal(t, test.tokens, actual) + }) + } +} + +func tokensList(t *testing.T, input string) (out []string) { + lexer, err := newLexer(bytes.NewBufferString(input)) + require.NoError(t, err) + + tokens, err := lex.ConsumeAll(lexer.PeekingLexer) + require.NoError(t, err) + + for _, token := range tokens { + out = append(out, fmt.Sprintf("%s", lexer.getTokenType(token))) + } + + return +} diff --git a/sqe/optimizer.go b/sqe/optimizer.go new file mode 100644 index 000000000..63f297729 --- /dev/null +++ b/sqe/optimizer.go @@ -0,0 +1,33 @@ +package sqe + +import ( + "context" + "fmt" +) + +func optimizeExpression(ctx context.Context, expr Expression) Expression { + visitor := NewDepthFirstVisitor(nil, func(_ context.Context, expr Expression) error { + v, ok := expr.(*OrExpression) + if !ok { + return nil + } + + newChildren := make([]Expression, 0, len(v.Children)) + for _, child := range v.Children { + if w, ok := child.(*OrExpression); ok { + newChildren = append(newChildren, w.Children...) + } else { + newChildren = append(newChildren, child) + } + } + + v.Children = newChildren + return nil + }) + + if err := expr.Visit(ctx, visitor); err != nil { + panic(fmt.Errorf("optimizer visitor is never expected to return error, something changed: %w", err)) + } + + return expr +} diff --git a/sqe/optimizer_test.go b/sqe/optimizer_test.go new file mode 100644 index 000000000..4f6f9ab0c --- /dev/null +++ b/sqe/optimizer_test.go @@ -0,0 +1,120 @@ +package sqe + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestOptimizer(t *testing.T) { + tests := []struct { + name string + expr Expression + expected string + }{ + { + "top_or_no_or_children", + orExpr(keyTermExpr("a1"), keyTermExpr("a2")), + `[a1 || a2]`, + }, + { + "top_or_single_or_children", + orExpr(orExpr(keyTermExpr("a1"), keyTermExpr("a2")), keyTermExpr("b2")), + `[a1 || a2 || b2]`, + }, + { + "top_or_multiple_or_children", + orExpr( + orExpr(keyTermExpr("a1"), keyTermExpr("a2")), + orExpr(keyTermExpr("c1"), keyTermExpr("c2")), + ), + `[a1 || a2 || c1 || c2]`, + }, + { + "top_or_mixed_multiple_or_children", + orExpr( + keyTermExpr("before2"), + orExpr(keyTermExpr("a1"), keyTermExpr("a2")), + andExpr(keyTermExpr("middle1"), keyTermExpr("middle2")), + orExpr(keyTermExpr("c1"), keyTermExpr("c2")), + notExpr(keyTermExpr("after3")), + ), + `[before2 || a1 || a2 || || c1 || c2 || !after3]`, + }, + + { + "or_in_not_multiple_or_children", + notExpr( + orExpr( + orExpr(keyTermExpr("a1"), keyTermExpr("a2")), + orExpr(keyTermExpr("c1"), keyTermExpr("c2")), + ), + ), + `![a1 || a2 || c1 || c2]`, + }, + { + "or_in_parens_multiple_or_children", + parensExpr( + orExpr( + orExpr(keyTermExpr("a1"), keyTermExpr("a2")), + orExpr(keyTermExpr("c1"), keyTermExpr("c2")), + ), + ), + `([a1 || a2 || c1 || c2])`, + }, + + { + "multi_level_nested_only_or", + orExpr( + orExpr( + orExpr( + keyTermExpr("l3a1"), + orExpr(keyTermExpr("l4a1"), keyTermExpr("l4a2")), + ), + orExpr( + orExpr(keyTermExpr("l4b1"), keyTermExpr("l4b2")), + keyTermExpr("l3b1"), + ), + orExpr( + orExpr(keyTermExpr("l4c1"), keyTermExpr("l4c2")), + orExpr(keyTermExpr("l4d1"), keyTermExpr("l4d2")), + ), + ), + ), + `[l3a1 || l4a1 || l4a2 || l4b1 || l4b2 || l3b1 || l4c1 || l4c2 || l4d1 || l4d2]`, + }, + + { + "multi_level_nested_mixed_or", + orExpr( + orExpr( + andExpr( + keyTermExpr("l3a1"), + notExpr(orExpr(keyTermExpr("l4a1"), keyTermExpr("l4a2"))), + ), + orExpr( + orExpr(keyTermExpr("l4b1"), keyTermExpr("l4b2")), + keyTermExpr("l3b1"), + ), + orExpr( + orExpr(keyTermExpr("l4c1"), keyTermExpr("l4c2")), + parensExpr(orExpr(keyTermExpr("l4d1"), keyTermExpr("l4d2"))), + ), + ), + andExpr( + keyTermExpr("l2e1"), + orExpr(keyTermExpr("l3f1"), keyTermExpr("l3f2")), + ), + ), + `[ || l4b1 || l4b2 || l3b1 || l4c1 || l4c2 || ([l4d1 || l4d2]) || ]`, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + optimized := optimizeExpression(context.Background(), test.expr) + assert.Equal(t, test.expected, expressionToString(optimized), "Invalid optimization for %q", test.name) + }) + } +} diff --git a/sqe/parser.go b/sqe/parser.go new file mode 100644 index 000000000..d90856420 --- /dev/null +++ b/sqe/parser.go @@ -0,0 +1,275 @@ +package sqe + +import ( + "bytes" + "context" + "fmt" + "io" + "strings" + + lex "github.com/alecthomas/participle/lexer" +) + +// MaxRecursionDeepness is the limit we impose on the number of direct ORs expression. +// It's possible to have more than that, just not in a single successive sequence or `1 or 2 or 3 ...`. +// This is to avoid first a speed problem where parsing start to be +const MaxRecursionDeepness = 2501 + +func Parse(ctx context.Context, input string) (expr Expression, err error) { + parser, err := NewParser(bytes.NewBufferString(input)) + if err != nil { + return nil, fmt.Errorf("new parser: %w", err) + } + + return parser.Parse(ctx) +} + +type Parser struct { + ctx context.Context + l *lexer + + lookForRightParenthesis uint +} + +func NewParser(reader io.Reader) (*Parser, error) { + lexer, err := newLexer(reader) + if err != nil { + return nil, err + } + + return &Parser{ + ctx: context.Background(), + l: lexer, + }, nil +} + +func (p *Parser) Parse(ctx context.Context) (out Expression, err error) { + defer func() { + recoveredErr := recover() + if recoveredErr == nil { + return + } + + switch v := recoveredErr.(type) { + case *ParseError: + err = v + case error: + err = fmt.Errorf("unexpected error occurred while parsing SQE expression: %w", v) + case string, fmt.Stringer: + err = fmt.Errorf("unexpected error occurred while parsing SQE expression: %s", v) + default: + err = fmt.Errorf("unexpected error occurred while parsing SQE expression: %v", v) + } + }() + + rootExpr, err := p.parseExpression(0) + if err != nil { + return nil, err + } + + return optimizeExpression(ctx, rootExpr), nil +} + +func (p *Parser) parseExpression(depth int) (Expression, error) { + if depth >= MaxRecursionDeepness { + // This is a small hack, the panic is trapped at the public API `Parse` method. We do it with a panic + // to avoid the really deep wrapping of error that would happen if we returned right away. A test ensure + // that this behavior works as expected. + panic(parserError("expression is too long, too much ORs or parenthesis expressions", p.l.peekPos())) + } + + left, err := p.parseUnaryExpression(depth) + if err != nil { + return nil, err + } + + for { + p.l.skipSpaces() + next, err := p.l.Peek(0) + if err != nil { + return nil, err + } + + // If we reached end of file, we have finished our job + if next.EOF() { + return left, nil + } + + // If we reached right parenthesis, check if we were expecting one + if p.l.isRightParenthesis(next) { + if p.lookForRightParenthesis == 0 { + return nil, parserError("unexpected right parenthesis, expected right hand side expression or end of input", next.Pos) + } + + // We were expecting one, we finished our job for this part, decrement will be done at parsing site + return left, nil + } + + isImplicitAnd := true + if p.l.isBinaryOperator(next) { + isImplicitAnd = false + p.l.mustLexNext() + p.l.skipSpaces() + } + + // This implements precedence order between `&&` and `||`. A `&&` is parsed with the smallest + // next unit so it takes precedences while `||` parse with the longuest possibility. + parser := p.parseUnaryExpression + depthIncrease := 0 + if p.l.isOrOperator(next) { + parser = p.parseExpression + depthIncrease = 1 + } + + right, err := parser(depth + depthIncrease) + + switch { + case isImplicitAnd || p.l.isAndOperator(next): + if err != nil { + if isImplicitAnd { + return nil, fmt.Errorf("missing expression after implicit 'and' clause: %w", err) + } + + return nil, fmt.Errorf("missing expression after 'and' clause: %w", err) + } + + if v, ok := left.(*AndExpression); ok { + v.Children = append(v.Children, right) + } else { + left = &AndExpression{Children: []Expression{left, right}} + } + + case p.l.isOrOperator(next): + if err != nil { + return nil, fmt.Errorf("missing expression after 'or' clause: %w", err) + } + + // It's impossible to coascle `||` expressions since they are recursive + left = &OrExpression{Children: []Expression{left, right}} + + default: + if err != nil { + return nil, fmt.Errorf("unable to parse right hand side expression: %w", err) + } + + return nil, parserError(fmt.Sprintf("token type %s is not valid binary right hand side expression", p.l.getTokenType(next)), next.Pos) + } + } +} + +func (p *Parser) parseUnaryExpression(depth int) (Expression, error) { + p.l.skipSpaces() + + token, err := p.l.Peek(0) + if err != nil { + return nil, err + } + + if token.EOF() { + return nil, parserError("expected a key term, minus sign or left parenthesis, got end of input", token.Pos) + } + + switch { + case p.l.isName(token) || p.l.isQuoting(token): + return p.parseKeyTerm() + case p.l.isLeftParenthesis(token): + return p.parseParenthesisExpression(depth) + case p.l.isNotOperator(token): + return p.parseNotExpression(depth) + default: + return nil, parserError(fmt.Sprintf("expected a key term, minus sign or left parenthesis, got %s", p.l.getTokenType(token)), token.Pos) + } +} + +func (p *Parser) parseParenthesisExpression(depth int) (Expression, error) { + // Consume left parenthesis + openingParenthesis := p.l.mustLexNext() + p.lookForRightParenthesis++ + + child, err := p.parseExpression(depth + 1) + if err != nil { + return nil, fmt.Errorf("invalid expression after opening parenthesis: %w", err) + } + + p.l.skipSpaces() + token, err := p.l.Next() + if err != nil { + return nil, err + } + + if token.EOF() { + return nil, parserError("expecting closing parenthesis, got end of input", openingParenthesis.Pos) + } + + if !p.l.isRightParenthesis(token) { + return nil, parserError(fmt.Sprintf("expecting closing parenthesis after expression, got %s", p.l.getTokenType(token)), token.Pos) + } + + p.lookForRightParenthesis-- + return &ParenthesisExpression{child}, nil +} + +func (p *Parser) parseNotExpression(depth int) (Expression, error) { + // Consume minus sign + p.l.mustLexNext() + + child, err := p.parseUnaryExpression(depth) + if err != nil { + return nil, fmt.Errorf("invalid expression after minus sign: %w", err) + } + + return &NotExpression{child}, nil +} + +func (p *Parser) parseKeyTerm() (Expression, error) { + token := p.l.mustLexNext() + + var value *StringLiteral + switch { + case p.l.isName(token): + value = &StringLiteral{ + Value: token.String(), + } + case p.l.isQuoting(token): + literal, err := p.parseQuotedString(token) + if err != nil { + return nil, err + } + + value = literal + default: + return nil, parserError(fmt.Sprintf("expecting key term, either a string or quoted string but got %s", p.l.getTokenType(token)), token.Pos) + } + + return &KeyTerm{ + Value: value, + }, nil +} + +func (p *Parser) parseQuotedString(startQuoting lex.Token) (*StringLiteral, error) { + builder := &strings.Builder{} + for { + token, err := p.l.Next() + if err != nil { + return nil, err + } + + if token.EOF() { + return nil, parserError(fmt.Sprintf("expecting closing quoting char %q, got end of input", startQuoting.Value), startQuoting.Pos) + } + + if p.l.isQuoting(token) { + value := builder.String() + if value == "" { + return nil, rangeParserError("an empty string is not valid", startQuoting.Pos, token.Pos) + } + + return &StringLiteral{ + Value: value, + QuotingChar: startQuoting.Value, + }, nil + } + + builder.WriteString(token.Value) + } +} diff --git a/sqe/parser_bench_test.go b/sqe/parser_bench_test.go new file mode 100644 index 000000000..9bdde62dd --- /dev/null +++ b/sqe/parser_bench_test.go @@ -0,0 +1,54 @@ +package sqe + +import ( + "context" + "strings" + "testing" +) + +func BenchmarkParseExpression(b *testing.B) { + tests := []struct { + name string + sqe string + }{ + {"single term", "action:data"}, + + // Those are kind of standard query that are parsed quite often + {"triple and term", "eosio data specificacct"}, + {"multiple and term", "data data.from: 'action' string"}, + {"multiple and/or term", "data (data.from || data.from) ('action' || expected) 'action' string"}, + + // Some convoluted big ORs list + {"big ORs list 100", buildFromOrToList(100)}, + {"big ORs list 1_000", buildFromOrToList(1000)}, + } + + for _, test := range tests { + b.Run(test.name, func(b *testing.B) { + setupBench(b) + for n := 0; n < b.N; n++ { + _, err := Parse(context.Background(), test.sqe) + if err != nil { + b.Error(err) + b.FailNow() + } + } + }) + } +} + +func buildFromOrToList(count int) string { + var elements []string + + // The count is divided by 2 since we add 2 addresses per iteration + for i := 0; i < count/2; i++ { + elements = append(elements, "0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb") + } + + return "(" + strings.Join(elements, " || ") + ")" +} + +func setupBench(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() +} diff --git a/sqe/parser_test.go b/sqe/parser_test.go new file mode 100644 index 000000000..7e218ae5a --- /dev/null +++ b/sqe/parser_test.go @@ -0,0 +1,424 @@ +package sqe + +import ( + "context" + "fmt" + "os" + "strings" + "testing" + + lex "github.com/alecthomas/participle/lexer" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const ValidateOnlyThatItParses = "!__valiateOnlyThatItParses__!" + +func TestParser(t *testing.T) { + tests := []struct { + name string + sqe string + expected string + expectedErr error + }{ + { + "single_key_term", + `transfer`, + `transfer`, + nil, + }, + { + "single_key_term_space_before", + ` transfer`, + `transfer`, + nil, + }, + { + "single_key_term_space_after", + `transfer `, + `transfer`, + nil, + }, + { + "single_key_term_space_both", + ` transfer `, + `transfer`, + nil, + }, + { + "single_key_term_multi_spaces", + " \t transfer", + `transfer`, + nil, + }, + { + "single_key_term_with_dot", + `data.name`, + `data.name`, + nil, + }, + { + "double_quoted_string", + `"test || value AND other ( 10 )!"`, + `"test || value AND other ( 10 )!"`, + nil, + }, + { + "double_quoted_string_multi_spaces", + ` " test || value AND other ( 10 )!"`, + `" test || value AND other ( 10 )!"`, + nil, + }, + { + "double_quoted_string_with_minus_sign", + `"eosio.token-open"`, + `"eosio.token-open"`, + nil, + }, + + { + "single_quoted_string", + `'test:value || value AND other ( 10 )!'`, + `'test:value || value AND other ( 10 )!'`, + nil, + }, + { + "single_quoted_string_multi_spaces", + ` ' test:value || value AND other ( 10 )!'`, + `' test:value || value AND other ( 10 )!'`, + nil, + }, + + { + "top_level_single_and_implicit", + `one two`, + "", + nil, + }, + { + "top_level_single_and_implicit_double_quotes", + `"one" two`, + `<"one" && two>`, + nil, + }, + { + "top_level_single_and", + `one && two`, + "", + nil, + }, + { + "top_level_single_and_legacy", + `one && two`, + "", + nil, + }, + + { + "top_level_single_or", + `one || two`, + "[one || two]", + nil, + }, + { + "top_level_single_or_legacy", + `one || two`, + "[one || two]", + nil, + }, + + { + "top_level_parenthesis_single_term", + `(one)`, + `(one)`, + nil, + }, + { + "top_level_parenthesis_and_term", + `(one && two)`, + `()`, + nil, + }, + { + "top_level_parenthesis_and_term_double_quote", + `(one && "two")`, + `()`, + nil, + }, + { + "top_level_parenthesis_or_term", + `(one || two)`, + `([one || two])`, + nil, + }, + { + "top_level_parenthesis_or_term_with_double_quotes", + `( "one" || two)`, + `(["one" || two])`, + nil, + }, + { + "top_level_parenthesis_with_spaces", + ` ( one || two ) `, + `([one || two])`, + nil, + }, + { + "top_level_parenthesis_with_both_not", + ` ( -one || -two ) `, + `([!one || !two])`, + nil, + }, + + { + "top_level_not_term", + `- one`, + `!one`, + nil, + }, + { + "top_level_not_parenthesis", + `- ( one)`, + `!(one)`, + nil, + }, + { + "top_level_not_parenthesis_or", + `- ( one || two)`, + `!([one || two])`, + nil, + }, + + { + "top_level_implicit_and_with_left_not", + ` - two one`, + ``, + nil, + }, + { + "top_level_implicit_and_with_right_not", + `two -one`, + ``, + nil, + }, + { + "top_level_implicit_and_both_not", + `-two -one`, + ``, + nil, + }, + { + "top_level_and_with_left_not", + ` - two && one`, + ``, + nil, + }, + { + "top_level_and_with_right_not", + `two && -one`, + ``, + nil, + }, + { + "top_level_and_both_not", + `-two && -one`, + ``, + nil, + }, + { + "top_level_or_with_left_not", + ` - two || one`, + `[!two || one]`, + nil, + }, + { + "top_level_or_with_right_not", + `two || -one`, + `[two || !one]`, + nil, + }, + { + "top_level_or_with_both_not", + `-two || -one`, + `[!two || !one]`, + nil, + }, + { + "top_level_legacy_or_with_both_not", + `-two || -one`, + `[!two || !one]`, + nil, + }, + + { + "top_level_multi_and", + `a b c d`, + ``, + nil, + }, + { + "top_level_multi_or", + `a || b || c || d`, + `[a || b || c || d]`, + nil, + }, + + { + "precedence_and_or", + `a b || c`, + `[ || c]`, + nil, + }, + { + "precedence_or_and", + `a || b c`, + `[a || ]`, + nil, + }, + { + "precedence_and_or_and", + `a b || c d`, + `[ || ]`, + nil, + }, + { + "precedence_and_and_or", + `a b c || d`, + `[ || d]`, + nil, + }, + { + "precedence_not_and_or", + `-a b || c`, + `[ || c]`, + nil, + }, + { + "precedence_parenthesis_not_and_or", + `-a (b || c)`, + ``, + nil, + }, + { + "precedence_parenthesis_and_or_and", + `a (b || c) d`, + ``, + nil, + }, + { + "precedence_parenthesis_and_or", + `a (b || c)`, + ``, + nil, + }, + + { + "ported_big_example", + `"eos" (transfer || issue || matant) from to`, + `<"eos" && ([transfer || issue || matant]) && from && to>`, + nil, + }, + { + "ported_with_newlines", + "(a ||\n b)", + `([a || b])`, + nil, + }, + + { + "depthness_100_ors", + buildFromOrToList(100), + ValidateOnlyThatItParses, + nil, + }, + { + "depthness_1_000_ors", + buildFromOrToList(1000), + ValidateOnlyThatItParses, + nil, + }, + { + "depthness_2_500_ors", + buildFromOrToList(2500), + ValidateOnlyThatItParses, + nil, + }, + + { + "error_missing_expresssion_after_not", + `a - `, + "", + fmt.Errorf("missing expression after implicit 'and' clause: %w", + fmt.Errorf("invalid expression after minus sign: %w", + &ParseError{"expected a key term, minus sign or left parenthesis, got end of input", pos(1, 4, 5)}, + ), + ), + }, + { + "error_missing_expression_after_and", + `a && `, + "", + fmt.Errorf("missing expression after 'and' clause: %w", + &ParseError{"expected a key term, minus sign or left parenthesis, got end of input", pos(1, 5, 6)}, + ), + }, + { + "error_missing_expression_after_or", + `a || `, + "", + fmt.Errorf("missing expression after 'or' clause: %w", &ParseError{"expected a key term, minus sign or left parenthesis, got end of input", pos(1, 5, 6)}), + }, + { + "error_unstarted_right_parenthesis", + `a )`, + "", + &ParseError{"unexpected right parenthesis, expected right hand side expression or end of input", pos(1, 2, 3)}, + }, + { + "error_unclosed_over_left_parenthesis", + `( a`, + "", + &ParseError{"expecting closing parenthesis, got end of input", pos(1, 0, 1)}, + }, + { + "error_deepness_reached", + buildFromOrToList(MaxRecursionDeepness + 1), + "", + &ParseError{"expression is too long, too much ORs or parenthesis expressions", pos(1, 91251, 91252)}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if os.Getenv("DEBUG") != "" { + printTokens(t, test.sqe) + } + + parser, err := NewParser(strings.NewReader(test.sqe)) + require.NoError(t, err) + + expression, err := parser.Parse(context.Background()) + require.Equal(t, test.expectedErr, err) + + if test.expectedErr == nil && err == nil && test.expected != ValidateOnlyThatItParses { + assert.Equal(t, test.expected, expressionToString(expression), "Invalid parsing for SEQ %q", test.sqe) + } + }) + } +} + +func pos(line, offset, column int) lex.Position { + return lex.Position{Filename: "", Line: line, Offset: offset, Column: column} +} + +func printTokens(t *testing.T, input string) { + lexer, err := lexerDefinition.Lex(strings.NewReader(input)) + require.NoError(t, err) + + tokens, err := lex.ConsumeAll(lexer) + require.NoError(t, err) + + for _, token := range tokens { + fmt.Print(token.GoString()) + } +} diff --git a/sqe/transformer.go b/sqe/transformer.go new file mode 100644 index 000000000..7a94afac1 --- /dev/null +++ b/sqe/transformer.go @@ -0,0 +1,38 @@ +// Copyright 2024 StreamingFast Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sqe + +type FieldTransformer interface { + // TransformFieldName receives the field name and allow receiver of the invocation to update its name. The field's + // name is updated if the invocation returns a nil error. + TransformFieldName(field string) (string, error) + + // TransformStringLiteral receives the field name (the updated one from a prior invocation of `TransformFieldName`) + // and a string literal (either a direct one or a sub-element from a `StringList`) and allows transformation of the + // `StringLiteral` value in place. + TransformStringLiteral(field string, value *StringLiteral) error +} + +type noOpTransformer struct{} + +func (noOpTransformer) TransformFieldName(field string) (string, error) { + return field, nil +} + +func (noOpTransformer) TransformStringLiteral(field string, value *StringLiteral) error { + return nil +} + +var NoOpFieldTransformer noOpTransformer diff --git a/sqe/traversal.go b/sqe/traversal.go new file mode 100644 index 000000000..61113df99 --- /dev/null +++ b/sqe/traversal.go @@ -0,0 +1,113 @@ +package sqe + +import ( + "context" + "errors" +) + +type OnExpression func(ctx context.Context, expr Expression) error + +var ErrStopVisit = errors.New("stop") + +type DepthFirstVisitor struct { + beforeVisit OnExpression + afterVisit OnExpression + stopped bool +} + +func NewDepthFirstVisitor(beforeVisit, afterVisit OnExpression) *DepthFirstVisitor { + return &DepthFirstVisitor{beforeVisit: beforeVisit, afterVisit: afterVisit} +} + +func (v *DepthFirstVisitor) Visit_And(ctx context.Context, e *AndExpression) error { + return v.visit_binary(ctx, e, e.Children) +} + +func (v *DepthFirstVisitor) Visit_Or(ctx context.Context, e *OrExpression) error { + return v.visit_binary(ctx, e, e.Children) +} + +func (v *DepthFirstVisitor) visit_binary(ctx context.Context, parent Expression, children []Expression) error { + if stop, err := v.executeCallback(ctx, parent, v.beforeVisit); stop { + return err + } + + for _, child := range children { + err := child.Visit(ctx, v) + if v.stopped || err != nil { + return err + } + } + + if stop, err := v.executeCallback(ctx, parent, v.afterVisit); stop { + return err + } + + return nil +} + +func (v *DepthFirstVisitor) Visit_Parenthesis(ctx context.Context, e *ParenthesisExpression) error { + if stop, err := v.executeCallback(ctx, e, v.beforeVisit); stop { + return err + } + + if err := e.Child.Visit(ctx, v); err != nil { + return err + } + + if stop, err := v.executeCallback(ctx, e, v.afterVisit); stop { + return err + } + + return nil +} + +func (v *DepthFirstVisitor) Visit_Not(ctx context.Context, e *NotExpression) error { + if stop, err := v.executeCallback(ctx, e, v.beforeVisit); stop { + return err + } + + if err := e.Child.Visit(ctx, v); err != nil { + return err + } + + if stop, err := v.executeCallback(ctx, e, v.afterVisit); stop { + return err + } + + return nil +} + +func (v *DepthFirstVisitor) Visit_KeyTerm(ctx context.Context, e *KeyTerm) error { + if stop, err := v.executeCallback(ctx, e, v.beforeVisit); stop { + return err + } + + if stop, err := v.executeCallback(ctx, e, v.afterVisit); stop { + return err + } + + return nil +} + +func (v *DepthFirstVisitor) executeCallback(ctx context.Context, e Expression, callback OnExpression) (stop bool, err error) { + if callback == nil { + return false, nil + } + + if v.stopped == true { + return true, nil + } + + if err := callback(ctx, e); err != nil { + if err == ErrStopVisit { + v.stopped = true + return true, nil + } else { + v.stopped = true + return true, err + } + } + + return false, nil +} diff --git a/sqe/types.go b/sqe/types.go new file mode 100644 index 000000000..fe1755f43 --- /dev/null +++ b/sqe/types.go @@ -0,0 +1,115 @@ +package sqe + +import ( + "context" + "fmt" + "strings" +) + +type Visitor interface { + Visit_And(ctx context.Context, expr *AndExpression) error + Visit_Or(ctx context.Context, expr *OrExpression) error + Visit_Parenthesis(ctx context.Context, expr *ParenthesisExpression) error + Visit_Not(ctx context.Context, expr *NotExpression) error + Visit_KeyTerm(ctx context.Context, expr *KeyTerm) error +} + +type Expression interface { + Visit(ctx context.Context, visitor Visitor) error +} + +type AndExpression struct { + Children []Expression +} + +func andExpr(children ...Expression) *AndExpression { + return &AndExpression{Children: children} +} + +func (e *AndExpression) Visit(ctx context.Context, visitor Visitor) error { + return visitor.Visit_And(ctx, e) +} + +type OrExpression struct { + Children []Expression +} + +func orExpr(children ...Expression) *OrExpression { + return &OrExpression{Children: children} +} + +func (e *OrExpression) Visit(ctx context.Context, visitor Visitor) error { + return visitor.Visit_Or(ctx, e) +} + +type ParenthesisExpression struct { + Child Expression +} + +func parensExpr(expr Expression) *ParenthesisExpression { + return &ParenthesisExpression{Child: expr} +} + +func (e *ParenthesisExpression) Visit(ctx context.Context, visitor Visitor) error { + return visitor.Visit_Parenthesis(ctx, e) +} + +type NotExpression struct { + Child Expression +} + +func notExpr(expr Expression) *NotExpression { + return &NotExpression{Child: expr} +} + +func (e *NotExpression) Visit(ctx context.Context, visitor Visitor) error { + return visitor.Visit_Not(ctx, e) +} + +type KeyTerm struct { + Value *StringLiteral +} + +func keyTermExpr(value string) *KeyTerm { + return &KeyTerm{Value: &StringLiteral{Value: value}} +} + +func (e *KeyTerm) Visit(ctx context.Context, visitor Visitor) error { + return visitor.Visit_KeyTerm(ctx, e) +} + +type StringLiteral struct { + Value string + QuotingChar string +} + +const restrictedLiteralChars = `'":,-()[] ` + "\n" + "\t" + +func stringLiteral(in string) *StringLiteral { + stringLiteral := &StringLiteral{Value: in} + if strings.ContainsAny(in, restrictedLiteralChars) { + stringLiteral.QuotingChar = "\"" + } + + return stringLiteral +} + +func (e *StringLiteral) isValue() bool { + return true +} + +func (e *StringLiteral) Literal() string { + return e.Value +} + +func (e *StringLiteral) SetValue(value string) { + e.Value = value +} + +func (e *StringLiteral) String() string { + if e.QuotingChar != "" { + return fmt.Sprintf("%s%s%s", e.QuotingChar, e.Value, e.QuotingChar) + } + + return e.Value +} diff --git a/storage/execout/pb/generate.sh b/storage/execout/pb/generate.sh index 630c0b5c8..3feeb0fbf 100755 --- a/storage/execout/pb/generate.sh +++ b/storage/execout/pb/generate.sh @@ -1,5 +1,5 @@ #!/bin/bash -u -# Copyright 2019 dfuse Platform Inc. +# Copyright 2024 StreamingFast Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/storage/store/marshaller/pb/generate.sh b/storage/store/marshaller/pb/generate.sh index 961f3c4b2..2f7610837 100755 --- a/storage/store/marshaller/pb/generate.sh +++ b/storage/store/marshaller/pb/generate.sh @@ -1,5 +1,5 @@ #!/bin/bash -u -# Copyright 2019 dfuse Platform Inc. +# Copyright 2024 StreamingFast Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.