From c85b34eae9c20d3d4e3e9cd0a9fdce2f65e16237 Mon Sep 17 00:00:00 2001 From: Gani Georgiev Date: Sat, 22 Jan 2022 19:28:29 +0200 Subject: [PATCH] initial commit --- LICENSE.md | 29 +++ README.md | 95 ++++++++++ examples_test.go | 40 ++++ go.mod | 3 + parser.go | 116 ++++++++++++ parser_test.go | 97 ++++++++++ scanner.go | 479 +++++++++++++++++++++++++++++++++++++++++++++++ scanner_test.go | 116 ++++++++++++ 8 files changed, 975 insertions(+) create mode 100644 LICENSE.md create mode 100644 README.md create mode 100644 examples_test.go create mode 100644 go.mod create mode 100644 parser.go create mode 100644 parser_test.go create mode 100644 scanner.go create mode 100644 scanner_test.go diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..9ca6f39 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2022, Gani Georgiev +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..8295f56 --- /dev/null +++ b/README.md @@ -0,0 +1,95 @@ +fexpr +[![Go Report Card](https://goreportcard.com/badge/github.com/ganigeorgiev/fexpr)](https://goreportcard.com/report/github.com/ganigeorgiev/fexpr) +[![GoDoc](https://godoc.org/github.com/ganigeorgiev/fexpr?status.svg)](https://pkg.go.dev/github.com/ganigeorgiev/fexpr) +================================================================================ + +**fexpr** is a filter query language parser that generates extremely easy to work with AST structure so that you can create safely SQL, Elasticsearch, etc. queries from user input. + +Or in other words, transform the string `"id > 1"` into the struct `[{&& {{identifier id} > {number 1}}}]`. + +Supports parenthesis and various conditional expression operators (see [Grammar](https://github.com/ganigeorgiev/fexpr#grammar)). + + +## Example usage + +``` +go get github.com/ganigeorgiev/fexpr +``` + +```go +package main + +import github.com/ganigeorgiev/fexpr + +func main() { + result, err := fexpr.Parse("id=123 && status='active'") + // result: [{&& {{identifier id} = {number 123}}} {&& {{identifier status} = {text active}}}] +} +``` + +> Note that each parsed expression statement contains a join/union operator (`&&` or `||`) so that the result can be consumed on small chunks without having to rely on the group/nesting context. + +> See the [package documentation](https://pkg.go.dev/github.com/ganigeorgiev/fexpr) for more details and examples. + +## Grammar + +**fexpr** grammar resembles the SQL `WHERE` expression syntax. It recognizes several token types (identifiers, numbers, quoted text, expression operators, whitespaces, etc.). + +> You could find all supported tokens in [`scanner.go`](https://github.com/ganigeorgiev/fexpr/blob/master/scanner.go). + + +#### Operators + +- **`=`** Equal operator (eg. `a=b`) +- **`!=`** NOT Equal operator (eg. `a!=b`) +- **`>`** Greater than operator (eg. `a>b`) +- **`>=`** Greater than or equal operator (eg. `a>=b`) +- **`<`** Less than or equal operator (eg. `a 123")) + +// scan single token at a time until EOF or error is reached +for { + t, err := s.Scan() + if t.Type == fexpr.TokenEOF || err != nil { + break + } + + fmt.Println(t) +} + +// Output: +// {identifier id} +// {whitespace } +// {sign >} +// {whitespace } +// {number 123} +``` diff --git a/examples_test.go b/examples_test.go new file mode 100644 index 0000000..df2202d --- /dev/null +++ b/examples_test.go @@ -0,0 +1,40 @@ +package fexpr_test + +import ( + "fexpr" + "fmt" + "strings" +) + +func ExampleNewScanner() { + fexpr.NewScanner(strings.NewReader("id")) +} + +func ExampleScanner_Scan() { + s := fexpr.NewScanner(strings.NewReader("id > 123")) + + for { + t, err := s.Scan() + if t.Type == fexpr.TokenEOF || err != nil { + break + } + + fmt.Println(t) + } + + // Output: + // {identifier id} + // {whitespace } + // {sign >} + // {whitespace } + // {number 123} +} + +func ExampleParse() { + result, _ := fexpr.Parse("id > 123") + + fmt.Println(result) + + // Output: + // [{&& {{identifier id} > {number 123}}}] +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..641113c --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module fexpr + +go 1.18 diff --git a/parser.go b/parser.go new file mode 100644 index 0000000..d79a5e8 --- /dev/null +++ b/parser.go @@ -0,0 +1,116 @@ +package fexpr + +import ( + "errors" + "fmt" + "strings" +) + +// Expr represents an individual tokenized expression consisting +// of left operand, operator and a right operand. +type Expr struct { + Left Token + Op SignOp + Right Token +} + +// ExprGroup represents a wrapped expression and its join type. +// +// The group's Item could be either an `Expr` instance or `[]ExprGroup` slice (for nested expressions). +type ExprGroup struct { + Join JoinOp + Item interface{} +} + +// parser's state machine steps +const ( + stepBeforeSign = iota + stepSign + stepAfterSign + StepJoin +) + +// Parse parses the provided text and returns its processed AST +// in the form of `ExprGroup` slice(s). +func Parse(text string) ([]ExprGroup, error) { + result := []ExprGroup{} + scanner := NewScanner(strings.NewReader(text)) + step := stepBeforeSign + join := JoinAnd + + var expr Expr + + for { + t, err := scanner.Scan() + if err != nil { + return nil, err + } + + if t.Type == TokenEOF { + break + } + + if t.Type == TokenWS { + continue + } + + if t.Type == TokenGroup { + groupResult, err := Parse(t.Literal) + if err != nil { + return nil, err + } + + // append only if non-empyt group + if len(groupResult) > 0 { + result = append(result, ExprGroup{Join: join, Item: groupResult}) + } + + step = StepJoin + continue + } + + switch step { + case stepBeforeSign: + if t.Type != TokenIdentifier && t.Type != TokenText && t.Type != TokenNumber { + return nil, fmt.Errorf("Expected left operand (identifier, text or number), got %q (%s)", t.Literal, t.Type) + } + + expr = Expr{Left: t} + + step = stepSign + case stepSign: + if t.Type != TokenSign { + return nil, fmt.Errorf("Expected a sign operator, got %q (%s)", t.Literal, t.Type) + } + + expr.Op = SignOp(t.Literal) + step = stepAfterSign + case stepAfterSign: + if t.Type != TokenIdentifier && t.Type != TokenText && t.Type != TokenNumber { + return nil, fmt.Errorf("Expected right operand (identifier, text or number), got %q (%s)", t.Literal, t.Type) + } + + expr.Right = t + result = append(result, ExprGroup{Join: join, Item: expr}) + + step = StepJoin + case StepJoin: + if t.Type != TokenJoin { + return nil, fmt.Errorf("Expected && or ||, got %q (%s)", t.Literal, t.Type) + } + + join = JoinAnd + if t.Literal == "||" { + join = JoinOr + } + + step = stepBeforeSign + } + } + + if step != StepJoin { + return nil, errors.New("Invalid formatted filter expression.") + } + + return result, nil +} diff --git a/parser_test.go b/parser_test.go new file mode 100644 index 0000000..5553742 --- /dev/null +++ b/parser_test.go @@ -0,0 +1,97 @@ +package fexpr + +import ( + "fmt" + "testing" +) + +func TestParse(t *testing.T) { + testScenarios := []struct { + input string + expectedError bool + expectedPrint string + }{ + {`> 1`, true, "[]"}, + {`a >`, true, "[]"}, + {`a > >`, true, "[]"}, + {`a > %`, true, "[]"}, + {`a ! 1`, true, "[]"}, + {`a - 1`, true, "[]"}, + {`a + 1`, true, "[]"}, + {`> a 1`, true, "[]"}, + {`a || 1`, true, "[]"}, + {`a && 1`, true, "[]"}, + {`test > 1 &&`, true, `[]`}, + {`|| test = 1`, true, `[]`}, + {`test = 1 && ||`, true, "[]"}, + {`test = 1 && a`, true, "[]"}, + {`test = 1 && a`, true, "[]"}, + {`test = 1 && "a"`, true, "[]"}, + {`test = 1 a`, true, "[]"}, + {`test = 1 a`, true, "[]"}, + {`test = 1 "a"`, true, "[]"}, + {`test = 1@test`, true, "[]"}, + {`test = .@test`, true, "[]"}, + // mismatched text quotes + {`test = "demo'`, true, "[]"}, + {`test = 'demo"`, true, "[]"}, + {`test = 'demo'"`, true, "[]"}, + {`test = 'demo''`, true, "[]"}, + {`test = "demo"'`, true, "[]"}, + {`test = "demo""`, true, "[]"}, + {`test = ""demo""`, true, "[]"}, + {`test = ''demo''`, true, "[]"}, + {"test = `demo`", true, "[]"}, + // valid simple expression and sign operators check + {`1=12`, false, `[{&& {{number 1} = {number 12}}}]`}, + {` 1 = 12 `, false, `[{&& {{number 1} = {number 12}}}]`}, + {`"demo" != test`, false, `[{&& {{text demo} != {identifier test}}}]`}, + {`a~1`, false, `[{&& {{identifier a} ~ {number 1}}}]`}, + {`a !~ 1`, false, `[{&& {{identifier a} !~ {number 1}}}]`}, + {`test>12`, false, `[{&& {{identifier test} > {number 12}}}]`}, + {`test > 12`, false, `[{&& {{identifier test} > {number 12}}}]`}, + {`test >="test"`, false, `[{&& {{identifier test} >= {text test}}}]`}, + {`test<@demo.test2`, false, `[{&& {{identifier test} < {identifier @demo.test2}}}]`}, + {`1<="test"`, false, `[{&& {{number 1} <= {text test}}}]`}, + {`1<="te'st"`, false, `[{&& {{number 1} <= {text te'st}}}]`}, + {`demo='te\'st'`, false, `[{&& {{identifier demo} = {text te'st}}}]`}, + {`demo="te\'st"`, false, `[{&& {{identifier demo} = {text te\'st}}}]`}, + {`demo="te\"st"`, false, `[{&& {{identifier demo} = {text te"st}}}]`}, + // invalid parenthesis + {`(a=1`, true, `[]`}, + {`a=1)`, true, `[]`}, + {`((a=1)`, true, `[]`}, + {`{a=1}`, true, `[]`}, + {`[a=1]`, true, `[]`}, + {`((a=1 || a=2) && c=1))`, true, `[]`}, + // valid parenthesis + {`()`, true, `[]`}, + {`(a=1)`, false, `[{&& [{&& {{identifier a} = {number 1}}}]}]`}, + {`(a="test(")`, false, `[{&& [{&& {{identifier a} = {text test(}}}]}]`}, + {`(a="test)")`, false, `[{&& [{&& {{identifier a} = {text test)}}}]}]`}, + {`((a=1))`, false, `[{&& [{&& [{&& {{identifier a} = {number 1}}}]}]}]`}, + {`a=1 || 2!=3`, false, `[{&& {{identifier a} = {number 1}}} {|| {{number 2} != {number 3}}}]`}, + {`a=1 && 2!=3`, false, `[{&& {{identifier a} = {number 1}}} {&& {{number 2} != {number 3}}}]`}, + {`a=1 && 2!=3 || "b"=a`, false, `[{&& {{identifier a} = {number 1}}} {&& {{number 2} != {number 3}}} {|| {{text b} = {identifier a}}}]`}, + {`(a=1 && 2!=3) || "b"=a`, false, `[{&& [{&& {{identifier a} = {number 1}}} {&& {{number 2} != {number 3}}}]} {|| {{text b} = {identifier a}}}]`}, + {`((a=1 || a=2) && (c=1))`, false, `[{&& [{&& [{&& {{identifier a} = {number 1}}} {|| {{identifier a} = {number 2}}}]} {&& [{&& {{identifier c} = {number 1}}}]}]}]`}, + } + + for i, scenario := range testScenarios { + v, err := Parse(scenario.input) + + if scenario.expectedError && err == nil { + t.Errorf("(%d) Expected error, got nil (%q)", i, scenario.input) + } + + if !scenario.expectedError && err != nil { + t.Errorf("(%d) Did not expect error, got %q (%q).", i, err, scenario.input) + } + + vPrint := fmt.Sprintf("%v", v) + + if vPrint != scenario.expectedPrint { + t.Errorf("(%d) Expected %s, got %s", i, scenario.expectedPrint, vPrint) + } + } +} diff --git a/scanner.go b/scanner.go new file mode 100644 index 0000000..ea6b55b --- /dev/null +++ b/scanner.go @@ -0,0 +1,479 @@ +package fexpr + +import ( + "bufio" + "bytes" + "fmt" + "io" + "regexp" + "strconv" + "strings" +) + +// eof represents a marker rune for the end of the reader. +const eof = rune(0) + +// JoinOp represents a join type operator. +type JoinOp string + +// supported join type operators +const ( + JoinAnd JoinOp = "&&" + JoinOr JoinOp = "||" +) + +// JoinOp represents an expression sign operator. +type SignOp string + +// supported expression sign operators +const ( + SignEq SignOp = "=" + SignNeq SignOp = "!=" + SignLike SignOp = "~" + SignNlike SignOp = "!~" + SignLt SignOp = "<" + SignLte SignOp = "<=" + SignGt SignOp = ">" + SignGte SignOp = ">=" +) + +// TokenType represents a Token type. +type TokenType string + +// token type constants +const ( + TokenUnexpected TokenType = "unexpected" + TokenEOF TokenType = "eof" + TokenWS TokenType = "whitespace" + TokenJoin TokenType = "join" + TokenSign TokenType = "sign" + TokenIdentifier TokenType = "identifier" // variable, column name, placeholder, etc. + TokenNumber TokenType = "number" + TokenText TokenType = "text" // ' or " quoted string + TokenGroup TokenType = "group" // groupped/nested tokens +) + +// Token represents a single scanned literal (one or more combined runes). +type Token struct { + Type TokenType + Literal string +} + +// Scanner represents a filter and lexical scanner. +type Scanner struct { + r *bufio.Reader +} + +// NewScanner creates and returns a new scanner instance with the specified io.Reader. +func NewScanner(r io.Reader) *Scanner { + return &Scanner{bufio.NewReader(r)} +} + +// Scan reads and returns the next available token value from the scanner's buffer. +func (s *Scanner) Scan() (Token, error) { + ch := s.read() + + if isWhitespaceRune(ch) { + s.unread() + return s.scanWhitespace() + } + + if isGroupStartRune(ch) { + s.unread() + return s.scanGroup() + } + + if isIdentifierStartRune(ch) { + s.unread() + return s.scanIdentifier() + } + + if isNumberStartRune(ch) { + s.unread() + return s.scanNumber() + } + + if isTextStartRune(ch) { + s.unread() + return s.scanText() + } + + if isSignStartRune(ch) { + s.unread() + return s.scanSign() + } + + if isJoinStartRune(ch) { + s.unread() + return s.scanJoin() + } + + if ch == eof { + return Token{Type: TokenEOF, Literal: ""}, nil + } + + return Token{Type: TokenUnexpected, Literal: string(ch)}, fmt.Errorf("Unexpected character %q", ch) +} + +// scanWhitespace consumes all contiguous whitespace runes. +func (s *Scanner) scanWhitespace() (Token, error) { + var buf bytes.Buffer + + // Reads every subsequent whitespace character into the buffer. + // Non-whitespace runes and EOF will cause the loop to exit. + for { + ch := s.read() + + if ch == eof { + break + } + + if !isWhitespaceRune(ch) { + s.unread() + break + } + + // write the whitespace rune + buf.WriteRune(ch) + } + + return Token{Type: TokenWS, Literal: buf.String()}, nil +} + +// scanIdentifier consumes all contiguous ident runes. +func (s *Scanner) scanIdentifier() (Token, error) { + var buf bytes.Buffer + + // Read every subsequent identifier rune into the buffer. + // Non-ident runes and EOF will cause the loop to exit. + for { + ch := s.read() + + if ch == eof { + break + } + + if !isIdentifierStartRune(ch) && !isDigitRune(ch) && ch != '.' { + s.unread() + break + } + + // write the ident rune + buf.WriteRune(ch) + } + + literal := buf.String() + + var err error + if !isIdentifier(literal) { + err = fmt.Errorf("Invalid identifier %q", literal) + } + + return Token{Type: TokenIdentifier, Literal: literal}, err +} + +// scanNumber consumes all contiguous digit runes. +func (s *Scanner) scanNumber() (Token, error) { + var buf bytes.Buffer + + // Read every subsequent digit rune into the buffer. + // Non-digit runes and EOF will cause the loop to exit. + for { + ch := s.read() + + if ch == eof { + break + } + + if !isDigitRune(ch) && ch != '.' { + s.unread() + break + } + + // write the digit rune + buf.WriteRune(ch) + } + + literal := buf.String() + + var err error + if !isNumber(literal) { + err = fmt.Errorf("Invalid number %q", literal) + } + + return Token{Type: TokenNumber, Literal: literal}, err +} + +// scanText consumes all contiguous quoted text runes. +func (s *Scanner) scanText() (Token, error) { + var buf bytes.Buffer + + // read the first rune to determine the quotes type + firstCh := s.read() + buf.WriteRune(firstCh) + var prevCh rune + var hasMatchingQuotes bool + + // Read every subsequent text rune into the buffer. + // EOF and matching unescaped ending quote will cause the loop to exit. + for { + ch := s.read() + + if ch == eof { + break + } + + // write the text rune + buf.WriteRune(ch) + + // unescaped matching quote, aka. the end + if ch == firstCh && prevCh != '\\' { + hasMatchingQuotes = true + break + } + + prevCh = ch + } + + literal := buf.String() + + var err error + if !hasMatchingQuotes { + err = fmt.Errorf("Invalid quoted text %q", literal) + } else { + // unquote + literal = literal[1 : len(literal)-1] + // remove escaped quotes prefix (aka. \) + firstChStr := string(firstCh) + literal = strings.Replace(literal, `\`+firstChStr, firstChStr, -1) + } + + return Token{Type: TokenText, Literal: literal}, err +} + +// scanSign consumes all contiguous sign operator runes. +func (s *Scanner) scanSign() (Token, error) { + var buf bytes.Buffer + + // Read every subsequent sign rune into the buffer. + // Non-sign runes and EOF will cause the loop to exit. + for { + ch := s.read() + + if ch == eof { + break + } + + if !isSignStartRune(ch) { + s.unread() + break + } + + // write the sign rune + buf.WriteRune(ch) + } + + literal := buf.String() + + var err error + if !isSignOperator(literal) { + err = fmt.Errorf("Invalid sign operator %q", literal) + } + + return Token{Type: TokenSign, Literal: literal}, err +} + +// scanJoin consumes all contiguous join operator runes. +func (s *Scanner) scanJoin() (Token, error) { + var buf bytes.Buffer + + // Read every subsequent join operator rune into the buffer. + // Non-join runes and EOF will cause the loop to exit. + for { + ch := s.read() + + if ch == eof { + break + } + + if !isJoinStartRune(ch) { + s.unread() + break + } + + // write the join operator rune + buf.WriteRune(ch) + } + + literal := buf.String() + + var err error + if !isJoinOperator(literal) { + err = fmt.Errorf("Invalid join operator %q", literal) + } + + return Token{Type: TokenJoin, Literal: literal}, err +} + +// scanGroup consumes all runes within a group/parenthesis. +func (s *Scanner) scanGroup() (Token, error) { + var buf bytes.Buffer + + // read the first group bracket without writting it to the buffer + firstChar := s.read() + openGroups := 1 + + // Read every subsequent text rune into the buffer. + // EOF and matching unescaped ending quote will cause the loop to exit. + for { + ch := s.read() + + if ch == eof { + break + } + + if isGroupStartRune(ch) { + // nested group + openGroups++ + buf.WriteRune(ch) + } else if isTextStartRune(ch) { + s.unread() + t, err := s.scanText() + if err != nil { + // write the errored literal as it is + buf.WriteString(t.Literal) + return Token{Type: TokenGroup, Literal: buf.String()}, err + } + + // quote the literal to preserve the text start/end runes + buf.WriteString("\"" + t.Literal + "\"") + } else if ch == ')' { + openGroups-- + + if openGroups <= 0 { + // main group end + break + } else { + buf.WriteRune(ch) + } + } else { + buf.WriteRune(ch) + } + } + + literal := buf.String() + + var err error + if !isGroupStartRune(firstChar) || openGroups > 0 { + err = fmt.Errorf("Invalid formatted group - missing %d closing bracket(s).", openGroups) + } + + return Token{Type: TokenGroup, Literal: literal}, err +} + +// read reads the next rune from the buffered reader. +// Returns the `rune(0)` if an error or `io.EOF` occurs. +func (s *Scanner) read() rune { + ch, _, err := s.r.ReadRune() + if err != nil { + return eof + } + return ch +} + +// unread places the previously read rune back on the reader. +func (s *Scanner) unread() error { + return s.r.UnreadRune() +} + +// Lexical helpers: +// ------------------------------------------------------------------- + +// isWhitespaceRune checks if a rune is a space, tab, or newline. +func isWhitespaceRune(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' } + +// isLetterRune checks if a rune is a letter. +func isLetterRune(ch rune) bool { + return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') +} + +// isDigitRune checks if a rune is a digit. +func isDigitRune(ch rune) bool { + return (ch >= '0' && ch <= '9') +} + +// isIdentifierStartRune checks if a rune is valid identifier's first character. +func isIdentifierStartRune(ch rune) bool { + return isLetterRune(ch) || ch == '_' || ch == '@' || ch == '#' +} + +// isTextStartRune checks if a rune is a valid quoted text first character +// (aka. single or double quote). +func isTextStartRune(ch rune) bool { + return ch == '\'' || ch == '"' +} + +// isNumberStartRune checks if a rune is a valid number start character (aka. digit). +func isNumberStartRune(ch rune) bool { + return isDigitRune(ch) +} + +// isSignStartRune checks if a rune is a valid sign operator start character. +func isSignStartRune(ch rune) bool { + return ch == '=' || + ch == '!' || + ch == '>' || + ch == '<' || + ch == '~' +} + +// isJoinStartRune checks if a rune is a valid join type start character. +func isJoinStartRune(ch rune) bool { + return ch == '&' || ch == '|' +} + +// isGroupStartRune checks if a rune is a valid group/parenthesis start character. +func isGroupStartRune(ch rune) bool { + return ch == '(' +} + +// isSignOperator checks if a literal is a valid sign operator. +func isSignOperator(literal string) bool { + op := SignOp(literal) + + return op == SignEq || + op == SignNeq || + op == SignLt || + op == SignLte || + op == SignGt || + op == SignGte || + op == SignLike || + op == SignNlike +} + +// isJoinOperator checks if a literal is a valid join type operator. +func isJoinOperator(literal string) bool { + op := JoinOp(literal) + + return op == JoinAnd || op == JoinOr +} + +// isNumber checks if a literal is numeric. +func isNumber(literal string) bool { + // strconv.ParseFloat() considers numerics with dot suffix + // a valid floating point number (eg. "123."), but we don't want this + if literal == "" || literal[len(literal)-1] == '.' { + return false + } + + _, err := strconv.ParseFloat(literal, 64) + + return err == nil +} + +var identifierRegex = regexp.MustCompile(`^[\w\.\@\#]*\w+$`) + +// isIdentifier checks if a literal is properly formatted identifier. +func isIdentifier(literal string) bool { + return identifierRegex.MatchString(literal) +} diff --git a/scanner_test.go b/scanner_test.go new file mode 100644 index 0000000..f53ec27 --- /dev/null +++ b/scanner_test.go @@ -0,0 +1,116 @@ +package fexpr + +import ( + "fmt" + "strings" + "testing" +) + +func TestNewScanner(t *testing.T) { + s := NewScanner(strings.NewReader("test")) + dataBytes, _ := s.r.Peek(4) + data := string(dataBytes) + + if data != "test" { + t.Errorf("Expected the scanner reader data to be %q, got %q", "test", data) + } +} + +func TestScannerScan(t *testing.T) { + type output struct { + error bool + print string + } + testScenarios := []struct { + text string + expects []output + }{ + // whitespace + {" ", []output{{false, "{whitespace }"}}}, + {"test 123", []output{{false, "{identifier test}"}, {false, "{whitespace }"}, {false, "{number 123}"}}}, + // identifier + {`test`, []output{{false, `{identifier test}`}}}, + {`@test.123`, []output{{false, `{identifier @test.123}`}}}, + {`_test.123`, []output{{false, `{identifier _test.123}`}}}, + {`#test.123`, []output{{false, `{identifier #test.123}`}}}, + {`.test.123`, []output{{true, `{unexpected .}`}, {false, `{identifier test.123}`}}}, + {`test#@`, []output{{true, `{identifier test#@}`}}}, + {`test'`, []output{{false, `{identifier test}`}, {true, `{text '}`}}}, + {`test"d`, []output{{false, `{identifier test}`}, {true, `{text "d}`}}}, + // number + {`123`, []output{{false, `{number 123}`}}}, + {`123.123`, []output{{false, `{number 123.123}`}}}, + {`.123`, []output{{true, `{unexpected .}`}, {false, `{number 123}`}}}, + {`123.abc`, []output{{true, `{number 123.}`}, {false, `{identifier abc}`}}}, + // text + {`""`, []output{{false, `{text }`}}}, + {`''`, []output{{false, `{text }`}}}, + {`'test'`, []output{{false, `{text test}`}}}, + {`'te\'st'`, []output{{false, `{text te'st}`}}}, + {`"te\"st"`, []output{{false, `{text te"st}`}}}, + {`"tes@#,;!@#%^'\"t"`, []output{{false, `{text tes@#,;!@#%^'"t}`}}}, + {`'tes@#,;!@#%^\'"t'`, []output{{false, `{text tes@#,;!@#%^'"t}`}}}, + {`"test`, []output{{true, `{text "test}`}}}, + {`'test`, []output{{true, `{text 'test}`}}}, + // join types + {`&&||`, []output{{true, `{join &&||}`}}}, + {`&& ||`, []output{{false, `{join &&}`}, {false, `{whitespace }`}, {false, `{join ||}`}}}, + {`'||test&&'&&123`, []output{{false, `{text ||test&&}`}, {false, `{join &&}`}, {false, `{number 123}`}}}, + // expression signs + {`=!=`, []output{{true, `{sign =!=}`}}}, + {`= != ~ !~ > >= < <=`, []output{ + {false, `{sign =}`}, + {false, `{whitespace }`}, + {false, `{sign !=}`}, + {false, `{whitespace }`}, + {false, `{sign ~}`}, + {false, `{whitespace }`}, + {false, `{sign !~}`}, + {false, `{whitespace }`}, + {false, `{sign >}`}, + {false, `{whitespace }`}, + {false, `{sign >=}`}, + {false, `{whitespace }`}, + {false, `{sign <}`}, + {false, `{whitespace }`}, + {false, `{sign <=}`}, + }}, + // groups/parenthesis + {`a)`, []output{{false, `{identifier a}`}, {true, `{unexpected )}`}}}, + {`(a b c`, []output{{true, `{group a b c}`}}}, + {`(a b c)`, []output{{false, `{group a b c}`}}}, + {`((a b c))`, []output{{false, `{group (a b c)}`}}}, + {`((a )b c))`, []output{{false, `{group (a )b c}`}, {true, `{unexpected )}`}}}, + {`("ab)("c)`, []output{{false, `{group "ab)("c}`}}}, + {`("ab)(c)`, []output{{true, `{group "ab)(c)}`}}}, + } + + for i, scenario := range testScenarios { + s := NewScanner(strings.NewReader(scenario.text)) + + // scan the text tokens + for j, expect := range scenario.expects { + token, err := s.Scan() + + if expect.error && err == nil { + t.Errorf("(%d.%d) Expected error, got nil (%q)", i, j, scenario.text) + } + + if !expect.error && err != nil { + t.Errorf("(%d.%d) Did not expect error, got %s (%q)", i, j, err, scenario.text) + } + + tokenPrint := fmt.Sprintf("%v", token) + + if tokenPrint != expect.print { + t.Errorf("(%d.%d) Expected token %s, got %s", i, j, expect.print, tokenPrint) + } + } + + // the last remaining token should be the eof + lastToken, err := s.Scan() + if err != nil || lastToken.Type != TokenEOF { + t.Errorf("(%d) Expected EOF token, got %v (%v)", i, lastToken, err) + } + } +}