Skip to content

Commit

Permalink
improvement: allowlist instead of denylist
Browse files Browse the repository at this point in the history
In this commit, instead of trying to whack-a-mole denying token sequences, being explicit about which tokens are allowed to follow one another is much more maintainable. It means that when you add new tokens to the language, you only need to think about where they are allowed to go, and otherwise it will error. It also makes it easier to prevent illegal syntax – there are edge cases in the "before" case here that an explicit allowlist solves.

Attempting to parse "x2" still fails, but with a more cryptic error code: `runtime.boundsError{x:0, y:0, signed:true, code:0x0}`. Now we get a more helpful error message: `Unexpected token 'X' at the beginning of the sequence`. The same is true for other elements that start with "x" like "(x2)", which used to return a boundsError and now errors earlier in the parsing step.
  • Loading branch information
wbhob committed Oct 20, 2024
1 parent 5c63fa3 commit 4c4f07e
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 20 deletions.
2 changes: 2 additions & 0 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ func TestParseNumberSequenceShorthand(t *testing.T) {
"(1, 2, 3)2, (4, 5)x2",
"(((1, 2x2)x2)x2",
"((1, 2)x2)x2)x2",
"(x2)",
"x2",
}

for _, input := range invalidInputs {
Expand Down
52 changes: 32 additions & 20 deletions parser.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package main

import "fmt"

// ASTNode represents a node in the Abstract Syntax Tree
// It's a flexible structure that can represent any element in our language
type ASTNode struct {
Expand Down Expand Up @@ -29,10 +31,15 @@ func parseSequence(tokens []Token) *ASTNode {
}

// If there are no tokens, return an empty sequence
if len(tokens) == 0 {
if len(tokens) == 0 || tokens[0].Type == EOF {
return sequence
}

// The sequence must start with a NUMBER or a LPAREN
if tokens[0].Type != NUMBER && tokens[0].Type != LPAREN {
panic(fmt.Sprintf("Unexpected token '%s' at the beginning of the sequence", tokens[0].Type))
}

// Initialize variables to keep track of the current element and parenthesis nesting
element := make([]Token, 0)
parenthesisCount := 0
Expand All @@ -47,34 +54,39 @@ func parseSequence(tokens []Token) *ASTNode {
nextToken = Token{Type: EOF}
}

// Validate sequence structure
// Ensure that two numbers are not adjacent without a comma
if token.Type == NUMBER && nextToken.Type == NUMBER {
panic("Invalid sequence: elements must be separated by commas")
}

// Handle different token types
switch token.Type {
case LPAREN:
parenthesisCount++
// Ensure a comma doesn't immediately follow an opening parenthesis
if nextToken.Type == COMMA {
panic("Invalid sequence: comma cannot follow left parenthesis")
// LPAREN can only be followed by a NUMBER or another LPAREN
if nextToken.Type != NUMBER && nextToken.Type != LPAREN {
panic(fmt.Sprintf("Unexpected token '%s' after left parenthesis", nextToken.Type))
}
case RPAREN:
parenthesisCount--
// Ensure a number doesn't immediately follow a closing parenthesis
if nextToken.Type == NUMBER {
panic("Invalid sequence: number cannot follow right parenthesis")
}
// Ensure two left parentheses are not in a row
if nextToken.Type == LPAREN {
panic("Invalid sequence: two left parentheses in a row")
// RPAREN can only be followed by a comma, another RPAREN, EOF, or X
if nextToken.Type != COMMA && nextToken.Type != RPAREN && nextToken.Type != EOF && nextToken.Type != X {
panic(fmt.Sprintf("Unexpected token '%s' after right parenthesis", nextToken.Type))
}
case COMMA:
// Ensure two commas are not in a row
if nextToken.Type == COMMA {
panic("Invalid sequence: two commas in a row")
// COMMA can only be followed by a NUMBER or a LPAREN
if nextToken.Type != NUMBER && nextToken.Type != LPAREN {
panic(fmt.Sprintf("Unexpected token '%s' after comma", nextToken.Type))
}
case NUMBER:
// NUMBER can only be followed by a comma, RPAREN, EOF, or X
if nextToken.Type != COMMA && nextToken.Type != RPAREN && nextToken.Type != EOF && nextToken.Type != X {
panic(fmt.Sprintf("Unexpected token '%s' after number", nextToken.Type))
}
case X:
// X can only be followed by a NUMBER
if nextToken.Type != NUMBER {
panic(fmt.Sprintf("Unexpected token '%s' after 'x'", nextToken.Type))
}
case EOF:
// EOF can only be followed by EOF
if nextToken.Type != EOF {
panic(fmt.Sprintf("Unexpected token '%s' after EOF", nextToken.Type))
}
}

Expand Down

0 comments on commit 4c4f07e

Please sign in to comment.