From 4c4f07e417bc81224d2d739cc4a45255ad648ab5 Mon Sep 17 00:00:00 2001 From: Wilson Hobbs Date: Sun, 20 Oct 2024 14:04:35 -0700 Subject: [PATCH] improvement: allowlist instead of denylist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this commit, instead of trying to whack-a-mole denying token sequences, being explicit about which tokens are allowed to follow one another is much more maintainable. It means that when you add new tokens to the language, you only need to think about where they are allowed to go, and otherwise it will error. It also makes it easier to prevent illegal syntax – there are edge cases in the "before" case here that an explicit allowlist solves. Attempting to parse "x2" still fails, but with a more cryptic error code: `runtime.boundsError{x:0, y:0, signed:true, code:0x0}`. Now we get a more helpful error message: `Unexpected token 'X' at the beginning of the sequence`. The same is true for other elements that start with "x" like "(x2)", which used to return a boundsError and now errors earlier in the parsing step. --- main_test.go | 2 ++ parser.go | 52 ++++++++++++++++++++++++++++++++-------------------- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/main_test.go b/main_test.go index dff9d92..d4e42a9 100644 --- a/main_test.go +++ b/main_test.go @@ -74,6 +74,8 @@ func TestParseNumberSequenceShorthand(t *testing.T) { "(1, 2, 3)2, (4, 5)x2", "(((1, 2x2)x2)x2", "((1, 2)x2)x2)x2", + "(x2)", + "x2", } for _, input := range invalidInputs { diff --git a/parser.go b/parser.go index 3f5b815..97bc514 100644 --- a/parser.go +++ b/parser.go @@ -1,5 +1,7 @@ package main +import "fmt" + // ASTNode represents a node in the Abstract Syntax Tree // It's a flexible structure that can represent any element in our language type ASTNode struct { @@ -29,10 +31,15 @@ func parseSequence(tokens []Token) *ASTNode { } // If there are no tokens, return an empty sequence - if len(tokens) == 0 { + if len(tokens) == 0 || tokens[0].Type == EOF { return sequence } + // The sequence must start with a NUMBER or a LPAREN + if tokens[0].Type != NUMBER && tokens[0].Type != LPAREN { + panic(fmt.Sprintf("Unexpected token '%s' at the beginning of the sequence", tokens[0].Type)) + } + // Initialize variables to keep track of the current element and parenthesis nesting element := make([]Token, 0) parenthesisCount := 0 @@ -47,34 +54,39 @@ func parseSequence(tokens []Token) *ASTNode { nextToken = Token{Type: EOF} } - // Validate sequence structure - // Ensure that two numbers are not adjacent without a comma - if token.Type == NUMBER && nextToken.Type == NUMBER { - panic("Invalid sequence: elements must be separated by commas") - } - // Handle different token types switch token.Type { case LPAREN: parenthesisCount++ - // Ensure a comma doesn't immediately follow an opening parenthesis - if nextToken.Type == COMMA { - panic("Invalid sequence: comma cannot follow left parenthesis") + // LPAREN can only be followed by a NUMBER or another LPAREN + if nextToken.Type != NUMBER && nextToken.Type != LPAREN { + panic(fmt.Sprintf("Unexpected token '%s' after left parenthesis", nextToken.Type)) } case RPAREN: parenthesisCount-- - // Ensure a number doesn't immediately follow a closing parenthesis - if nextToken.Type == NUMBER { - panic("Invalid sequence: number cannot follow right parenthesis") - } - // Ensure two left parentheses are not in a row - if nextToken.Type == LPAREN { - panic("Invalid sequence: two left parentheses in a row") + // RPAREN can only be followed by a comma, another RPAREN, EOF, or X + if nextToken.Type != COMMA && nextToken.Type != RPAREN && nextToken.Type != EOF && nextToken.Type != X { + panic(fmt.Sprintf("Unexpected token '%s' after right parenthesis", nextToken.Type)) } case COMMA: - // Ensure two commas are not in a row - if nextToken.Type == COMMA { - panic("Invalid sequence: two commas in a row") + // COMMA can only be followed by a NUMBER or a LPAREN + if nextToken.Type != NUMBER && nextToken.Type != LPAREN { + panic(fmt.Sprintf("Unexpected token '%s' after comma", nextToken.Type)) + } + case NUMBER: + // NUMBER can only be followed by a comma, RPAREN, EOF, or X + if nextToken.Type != COMMA && nextToken.Type != RPAREN && nextToken.Type != EOF && nextToken.Type != X { + panic(fmt.Sprintf("Unexpected token '%s' after number", nextToken.Type)) + } + case X: + // X can only be followed by a NUMBER + if nextToken.Type != NUMBER { + panic(fmt.Sprintf("Unexpected token '%s' after 'x'", nextToken.Type)) + } + case EOF: + // EOF can only be followed by EOF + if nextToken.Type != EOF { + panic(fmt.Sprintf("Unexpected token '%s' after EOF", nextToken.Type)) } }