From 6d2e42aa10071e1b32f2d575aab007d7b84b618a Mon Sep 17 00:00:00 2001 From: goropikari Date: Mon, 29 Aug 2022 01:42:57 +0900 Subject: [PATCH] support range expression --- automata/dfa_test.go | 2 +- compiler/regexp/lexer.go | 21 + compiler/regexp/lexer_test.go | 17 +- compiler/regexp/parser.go | 120 ++- sample/README.md | 129 +++- sample/golex.yy.go | 1294 +++++++++++++++++++++++++++++++-- sample/sample.l | 63 +- 7 files changed, 1549 insertions(+), 97 deletions(-) diff --git a/automata/dfa_test.go b/automata/dfa_test.go index e094390..f3678a9 100644 --- a/automata/dfa_test.go +++ b/automata/dfa_test.go @@ -178,7 +178,7 @@ func TestDot(t *testing.T) { // generate dot file // go test ./automata/ -run TestDot - s, _ := lexerNFA([]string{"a", "abb", "a*bb*"}).ToDFA().LexerMinimize().RemoveBH().ToDot() + s, _ := lexerNFA([]string{"a", "abb", "a*bb*", "e[^a-zA-Z0-9]*h", "f[a-d]e"}).ToDFA().LexerMinimize().RemoveBH().ToDot() err := os.WriteFile("ex.dot", []byte(s), 0666) if err != nil { log.Fatal(err) diff --git a/compiler/regexp/lexer.go b/compiler/regexp/lexer.go index 2d89d18..463f7a2 100644 --- a/compiler/regexp/lexer.go +++ b/compiler/regexp/lexer.go @@ -15,9 +15,13 @@ const ( SymbolTokenType TokenType = iota + 1 DotTokenType StarTokenType + MinusTokenType LParenTokenType RParenTokenType + LSqBracketTokenType + RSqBracketTokenType BarTokenType + NegationTokenType ) type Token struct { @@ -99,10 +103,27 @@ func (lex *Lexer) Scan() []Token { typ = SymbolTokenType case '*': typ = StarTokenType + case '-': + typ = MinusTokenType case '(': typ = LParenTokenType case ')': typ = RParenTokenType + case '[': + lex.tokens = append(lex.tokens, NewToken(LSqBracketTokenType, ru)) + ru2, err := lex.read() + if errors.Is(err, io.EOF) { + panic(ErrInvalidRegex) + } + ru = ru2 + switch ru { + case '^': + typ = NegationTokenType + default: + typ = SymbolTokenType + } + case ']': + typ = RSqBracketTokenType case '|': typ = BarTokenType case '.': diff --git a/compiler/regexp/lexer_test.go b/compiler/regexp/lexer_test.go index be6a5fc..3e838da 100644 --- a/compiler/regexp/lexer_test.go +++ b/compiler/regexp/lexer_test.go @@ -15,7 +15,7 @@ func TestLexer_Scan(t *testing.T) { }{ { name: "lexer test", - regex: "a(b|c*)deあいう\t\n", + regex: "a(b|c*)deあいう\t\n[a-z][^A-Z]\\+\\-\\*/", expected: []regexp.Token{ regexp.NewToken(regexp.SymbolTokenType, 'a'), regexp.NewToken(regexp.LParenTokenType, '('), @@ -31,6 +31,21 @@ func TestLexer_Scan(t *testing.T) { regexp.NewToken(regexp.SymbolTokenType, 'う'), regexp.NewToken(regexp.SymbolTokenType, '\t'), regexp.NewToken(regexp.SymbolTokenType, '\n'), + regexp.NewToken(regexp.LSqBracketTokenType, '['), + regexp.NewToken(regexp.SymbolTokenType, 'a'), + regexp.NewToken(regexp.MinusTokenType, '-'), + regexp.NewToken(regexp.SymbolTokenType, 'z'), + regexp.NewToken(regexp.RSqBracketTokenType, ']'), + regexp.NewToken(regexp.LSqBracketTokenType, '['), + regexp.NewToken(regexp.NegationTokenType, '^'), + regexp.NewToken(regexp.SymbolTokenType, 'A'), + regexp.NewToken(regexp.MinusTokenType, '-'), + regexp.NewToken(regexp.SymbolTokenType, 'Z'), + regexp.NewToken(regexp.RSqBracketTokenType, ']'), + regexp.NewToken(regexp.SymbolTokenType, '+'), + regexp.NewToken(regexp.SymbolTokenType, '-'), + regexp.NewToken(regexp.SymbolTokenType, '*'), + regexp.NewToken(regexp.SymbolTokenType, '/'), }, }, } diff --git a/compiler/regexp/parser.go b/compiler/regexp/parser.go index d97f153..7576f0a 100644 --- a/compiler/regexp/parser.go +++ b/compiler/regexp/parser.go @@ -3,6 +3,9 @@ package regexp import ( "errors" "io" + + "github.com/goropikari/golex/automata" + "github.com/goropikari/golex/collection" ) var ( @@ -83,6 +86,75 @@ func (p *Parser) sum() (RegexExpr, error) { return lhs, nil } +func (p *Parser) set() (RegexExpr, error) { + neg := false + runes := make([]rune, 0) + var prev rune + + for { + tok, err := p.peek() + if err != nil { + return nil, err + } + switch tok.GetType() { + case RSqBracketTokenType: + if prev == '-' { + return nil, ErrParse + } + goto Out + case NegationTokenType: + prev = tok.GetRune() + neg = true + case MinusTokenType: + prev = tok.GetRune() + default: + ru := tok.GetRune() + if prev == '-' { + from := runes[len(runes)-1] + if from > ru { + return nil, ErrParse + } + for t := from + 1; t < ru; t++ { + runes = append(runes, t) + } + } + runes = append(runes, ru) + prev = ru + } + _, _ = p.read() + } +Out: + var expr RegexExpr + if !neg { + expr = NewSymbolExpr(runes[0]) + if len(runes) == 1 { + return expr, nil + } + + for i := 1; i < len(runes); i++ { + rhs := NewSymbolExpr(runes[i]) + expr = NewSumExpr(expr, rhs) + } + return expr, nil + } + + ruSet := collection.NewSet[rune]() + for _, ru := range runes { + ruSet.Insert(ru) + } + for _, ru := range automata.SupportedChars { + if !ruSet.Contains(ru) { + if expr == nil { + expr = NewSymbolExpr(ru) + } else { + expr = NewSumExpr(expr, NewSymbolExpr(ru)) + } + } + } + + return expr, nil +} + func (p *Parser) concat() (RegexExpr, error) { lhs, err := p.star() if err != nil { @@ -98,7 +170,7 @@ func (p *Parser) concat() (RegexExpr, error) { } switch b.GetType() { - case SymbolTokenType, DotTokenType, LParenTokenType: + case SymbolTokenType, DotTokenType, LParenTokenType, LSqBracketTokenType: rhs, err := p.concat() if err != nil { return nil, err @@ -133,28 +205,38 @@ func (p *Parser) primary() (RegexExpr, error) { if err != nil { return nil, err } - if s.GetType() == SymbolTokenType { + + switch s.GetType() { + case SymbolTokenType: return NewSymbolExpr(s.GetRune()), nil - } - if s.GetType() == DotTokenType { + case DotTokenType: return NewDotExpr(), nil - } - if s.GetType() != LParenTokenType { - return nil, ErrParse + case LParenTokenType: + sum, err := p.sum() + if err != nil { + return nil, err + } + r, err := p.read() + if err != nil { + return nil, err + } + if r.GetType() == RParenTokenType { + return sum, nil + } + case LSqBracketTokenType: + set, err := p.set() + if err != nil { + return nil, err + } + r, err := p.read() + if err != nil { + return nil, err + } + if r.GetType() == RSqBracketTokenType { + return set, nil + } } - // grouping expr - sum, err := p.sum() - if err != nil { - return nil, err - } - r, err := p.read() - if err != nil { - return nil, err - } - if r.GetType() == RParenTokenType { - return sum, nil - } return nil, ErrParse } diff --git a/sample/README.md b/sample/README.md index 5bf002c..93f6c4a 100644 --- a/sample/README.md +++ b/sample/README.md @@ -1,37 +1,132 @@ + +Lexical analize following function defition. +```go +func foo000() int { + x := 1 * 10 + 123 - 1000 / 5432 + + return x +} +``` + + ```bash $ go install github.com/goropikari/golex@v0.2.0 $ golex sample.l $ go run golex.yy.go - -2 abb -3 ab -1 a -1 a -1 a +Keyword + "func" +Identifier + "foo000" +LParen + "(" +RParen + ")" +Identifier + "int" +LBracket + "{" +Identifier + "x" +Operator + ":=" +Digit + "1" +Operator + "*" +Digit + "10" +Operator + "+" +Digit + "123" +Operator + "-" +Digit + "1000" +Operator + "/" +Digit + "5432" +Keyword + "return" +Identifier + "x" +RBracket + "}" +2022/08/29 01:39:20 EOF +exit status 1 ``` -`golex.yy.go` +`sample.l` ```go -// "a" { return State1, nil } -// "abb" { return State2, nil } -// "a*bb*" { return State3, nil } +%{ +import ( + "fmt" + "log" +) + +type Type = int +const ( + Keyword Type = iota + 1 + Identifier + Digit + Whitespace + LParen + RParen + LBracket + RBracket + Operator +) + +%} + +%% +"if|for|while|func" { return Keyword, nil } +"[a-zA-Z][a-zA-Z0-9]*" { return Identifier, nil } +"[1-9][0-9]*" { return Digit, nil } +"[ \t\n\r]*" { return Whitespace, nil } +"\\(" { return LParen, nil } +"\\)" { return RParen, nil } +"{" { return LBracket, nil } +"}" { return RBracket, nil } +"[\\+|\\-|\\*|/|:=|==|!=]" { return Operator, nil } +"." {} +%% func main() { - lex := New("abbabaaa") + lex := New(` +func foo000() { + x := 1 * 10 + 123 - 1000 / 5432 +} +`) for { n, err := lex.Next() if err != nil { + log.Fatal(err) return } switch n { - case State1: - fmt.Println(State1, YYtext) - case State2: - fmt.Println(State2, YYtext) - case State3: - fmt.Println(State3, YYtext) + case Keyword: + fmt.Println("Keyword") + case Identifier: + fmt.Println("Identifier") + case Digit: + fmt.Println("Digit") + case Whitespace: + fmt.Println("Whitespace") + case LParen: + fmt.Println("LParen") + case RParen: + fmt.Println("RParen") + case LBracket: + fmt.Println("LBracket") + case RBracket: + fmt.Println("RBracket") + case Operator: + fmt.Println("Operator") } + fmt.Printf("\t %#v\n",YYtext) } } ``` diff --git a/sample/golex.yy.go b/sample/golex.yy.go index 2192b46..74bab20 100644 --- a/sample/golex.yy.go +++ b/sample/golex.yy.go @@ -9,10 +9,15 @@ import ( type Type = int const ( - State1 Type = iota + 1 - State2 - State3 - Other + Keyword Type = iota + 1 + Identifier + Digit + Whitespace + LParen + RParen + LBracket + RBracket + Operator ) type yyStateID = int @@ -24,52 +29,1214 @@ var ( EOF = errors.New("EOF") ) -// 生成する // state id to regex id var yyStateIDToRegexID = []yyRegexID{ 0, // state 0 は BH state - 0, + 4, + 2, + 9, + 2, 2, - 1, - 0, - 3, 3, + 2, + 2, + 6, + 2, + 4, + 1, + 2, + 8, + 2, + 2, + 7, + 2, + 5, + 10, + 10, + 2, + 2, + 2, + 2, + 2, } -// 生成する var yyFinStates = map[yyStateID]struct{}{ - 2: {}, - 3: {}, - 5: {}, - 6: {}, + 1: {}, + 2: {}, + 3: {}, + 4: {}, + 5: {}, + 6: {}, + 7: {}, + 8: {}, + 9: {}, + 10: {}, + 11: {}, + 12: {}, + 13: {}, + 14: {}, + 15: {}, + 16: {}, + 17: {}, + 18: {}, + 19: {}, + 20: {}, + 21: {}, + 22: {}, + 23: {}, + 24: {}, + 25: {}, + 26: {}, } -// 生成する var yyTransitionTable = map[yyStateID]map[rune]yyStateID{ 1: { - 97: 3, - 98: 6, + 33: 20, + 34: 21, + 35: 21, + 36: 21, + 37: 21, + 38: 21, + 39: 21, + 40: 19, + 41: 9, + 42: 3, + 43: 3, + 44: 21, + 45: 3, + 46: 21, + 47: 3, + 48: 21, + 49: 6, + 50: 6, + 51: 6, + 52: 6, + 53: 6, + 54: 6, + 55: 6, + 56: 6, + 57: 6, + 58: 20, + 59: 21, + 60: 21, + 61: 20, + 62: 21, + 63: 21, + 64: 21, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 91: 21, + 92: 21, + 93: 21, + 94: 21, + 95: 21, + 96: 21, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 7, + 103: 4, + 104: 4, + 105: 8, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 25, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 2, + 120: 4, + 121: 4, + 122: 4, + 123: 17, + 124: 21, + 125: 14, + 126: 21, + 32: 11, + 9: 11, + 10: 11, + 13: 11, }, 2: { - 98: 6, - }, - 3: { - 97: 4, - 98: 5, + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 4, + 103: 4, + 104: 15, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, }, 4: { - 97: 4, - 98: 6, + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 4, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, }, 5: { - 98: 2, + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 4, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 12, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, }, 6: { - 98: 6, + 48: 6, + 49: 6, + 50: 6, + 51: 6, + 52: 6, + 53: 6, + 54: 6, + 55: 6, + 56: 6, + 57: 6, + }, + 7: { + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 4, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 5, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 4, + 117: 18, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, + }, + 8: { + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 12, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, + }, + 10: { + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 4, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 12, + 111: 4, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, + }, + 11: { + 32: 11, + 9: 11, + 10: 11, + 13: 11, + }, + 12: { + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 4, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, + }, + 13: { + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 12, + 102: 4, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, + }, + 15: { + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 4, + 103: 4, + 104: 4, + 105: 26, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, + }, + 16: { + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 4, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 24, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, + }, + 18: { + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 4, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 23, + 111: 4, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, + }, + 20: { + 61: 3, + }, + 22: { + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 4, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 10, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, + }, + 23: { + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 12, + 100: 4, + 101: 4, + 102: 4, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, + }, + 24: { + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 4, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 4, + 117: 22, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, + }, + 25: { + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 16, + 102: 4, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 4, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, + }, + 26: { + 48: 4, + 49: 4, + 50: 4, + 51: 4, + 52: 4, + 53: 4, + 54: 4, + 55: 4, + 56: 4, + 57: 4, + 65: 4, + 66: 4, + 67: 4, + 68: 4, + 69: 4, + 70: 4, + 71: 4, + 72: 4, + 73: 4, + 74: 4, + 75: 4, + 76: 4, + 77: 4, + 78: 4, + 79: 4, + 80: 4, + 81: 4, + 82: 4, + 83: 4, + 84: 4, + 85: 4, + 86: 4, + 87: 4, + 88: 4, + 89: 4, + 90: 4, + 97: 4, + 98: 4, + 99: 4, + 100: 4, + 101: 4, + 102: 4, + 103: 4, + 104: 4, + 105: 4, + 106: 4, + 107: 4, + 108: 13, + 109: 4, + 110: 4, + 111: 4, + 112: 4, + 113: 4, + 114: 4, + 115: 4, + 116: 4, + 117: 4, + 118: 4, + 119: 4, + 120: 4, + 121: 4, + 122: 4, }, } -// ここは固定値 func yyNextStep(id yyStateID, ru rune) yyStateID { if mp, ok := yyTransitionTable[id]; ok { return mp[ru] @@ -97,7 +1264,7 @@ func New(data string) *yyLexer { finPos: 0, currPos: 0, finRegexID: 0, - currStateID: 1, // init state id を 1 になるようにする + currStateID: 1, // init state id is 1. } } @@ -131,22 +1298,50 @@ start: return 0, ErrYYScan case 1: { - return State1, nil + return Keyword, nil } goto start case 2: { - return State2, nil + return Identifier, nil } goto start case 3: { - return State3, nil + return Digit, nil } goto start case 4: { - return Other, nil + } + goto start + case 5: + { + return LParen, nil + } + goto start + case 6: + { + return RParen, nil + } + goto start + case 7: + { + return LBracket, nil + } + goto start + case 8: + { + return RBracket, nil + } + goto start + case 9: + { + return Operator, nil + } + goto start + case 10: + { } goto start @@ -166,7 +1361,13 @@ start: } func main() { - lex := New("ababba") + lex := New(` +func foo000() int { + x := 1 * 10 + 123 - 1000 / 5432 + + return x +} +`) for { n, err := lex.Next() if err != nil { @@ -174,14 +1375,25 @@ func main() { return } switch n { - case State1: - fmt.Println(State1, YYtext) - case State2: - fmt.Println(State2, YYtext) - case State3: - fmt.Println(State3, YYtext) - default: - fmt.Println(n, YYtext) + case Keyword: + fmt.Println("Keyword") + case Identifier: + fmt.Println("Identifier") + case Digit: + fmt.Println("Digit") + case Whitespace: + fmt.Println("Whitespace") + case LParen: + fmt.Println("LParen") + case RParen: + fmt.Println("RParen") + case LBracket: + fmt.Println("LBracket") + case RBracket: + fmt.Println("RBracket") + case Operator: + fmt.Println("Operator") } + fmt.Printf("\t %#v\n", YYtext) } } diff --git a/sample/sample.l b/sample/sample.l index 7c8bf13..3063c62 100644 --- a/sample/sample.l +++ b/sample/sample.l @@ -1,4 +1,3 @@ - %{ import ( "fmt" @@ -7,23 +6,40 @@ import ( type Type = int const ( - State1 Type = iota + 1 - State2 - State3 - Other + Keyword Type = iota + 1 + Identifier + Digit + Whitespace + LParen + RParen + LBracket + RBracket + Operator ) %} %% -"a" { return State1, nil } -"abb" { return State2, nil } -"a*bb*" { return State3, nil } -"ab" { return Other, nil } +"if|for|while|func|return" { return Keyword, nil } +"[a-zA-Z][a-zA-Z0-9]*" { return Identifier, nil } +"[1-9][0-9]*" { return Digit, nil } +"[ \t\n\r]*" { } +"\\(" { return LParen, nil } +"\\)" { return RParen, nil } +"{" { return LBracket, nil } +"}" { return RBracket, nil } +"\\+|\\-|\\*|/|:=|==|!=" { return Operator, nil } +"." {} %% func main() { - lex := New("ababba") + lex := New(` +func foo000() int { + x := 1 * 10 + 123 - 1000 / 5432 + + return x +} +`) for { n, err := lex.Next() if err != nil { @@ -31,14 +47,25 @@ func main() { return } switch n { - case State1: - fmt.Println(State1, YYtext) - case State2: - fmt.Println(State2, YYtext) - case State3: - fmt.Println(State3, YYtext) - default: - fmt.Println(n, YYtext) + case Keyword: + fmt.Println("Keyword") + case Identifier: + fmt.Println("Identifier") + case Digit: + fmt.Println("Digit") + case Whitespace: + fmt.Println("Whitespace") + case LParen: + fmt.Println("LParen") + case RParen: + fmt.Println("RParen") + case LBracket: + fmt.Println("LBracket") + case RBracket: + fmt.Println("RBracket") + case Operator: + fmt.Println("Operator") } + fmt.Printf("\t %#v\n",YYtext) } }