-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlexer.go
161 lines (145 loc) · 4.61 KB
/
lexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
package go_vm
import (
"fmt"
"strings"
)
type Lexer struct {
currentChar byte // 当前读出来的字符
input string // 输入的源码
nextReadPosition int // 下一次要读的位置
currentPosition int // 当前读到的位置(读过了)
currentLine int // 当前行号
}
func NewLexer(input string) *Lexer {
return &Lexer{
currentChar: input[0], // 第一个字符立马读出来
input: input,
nextReadPosition: 1,
currentPosition: 0,
currentLine: 0,
}
}
// 跳过所有空格或者tab
func (lexer *Lexer) skipSpace() {
for lexer.currentChar == ' ' || lexer.currentChar == '\t' {
lexer.readChar()
}
}
// 读出下一个字符
func (lexer *Lexer) readChar() {
if lexer.nextReadPosition >= len(lexer.input) {
lexer.currentChar = 0
} else {
lexer.currentChar = lexer.input[lexer.nextReadPosition]
}
lexer.currentPosition++
lexer.nextReadPosition++
}
// 识别出下一个token
func (lexer *Lexer) NextToken() Token {
//fmt.Println(lexer.currentPosition, fmt.Sprintf("--%c--", rune(lexer.currentChar)))
var token Token
lexer.skipSpace()
if lexer.currentChar == '"' || lexer.currentChar == '\'' {
lexer.readChar()
token.StartPosition = lexer.currentPosition
token.Literal = lexer.readString()
token.Type = TokenType_STRING
token.EndPosition = lexer.currentPosition
token.LineNumber = lexer.currentLine
} else if lexer.currentChar == '/' && lexer.nextChar() == '/' {
lexer.readChar() // 读之前要读两次,把//读掉
lexer.readChar()
lexer.skipSpace()
token.StartPosition = lexer.currentPosition
token.Literal = lexer.readComment()
token.Type = TokenType_COMMENT
token.EndPosition = lexer.currentPosition
token.LineNumber = lexer.currentLine
} else if lexer.currentChar == '\r' || lexer.currentChar == '\n' {
token.StartPosition = lexer.currentPosition
token.Literal = "EOL"
token.Type = TokenType_EOL
token.EndPosition = lexer.currentPosition
token.LineNumber = lexer.currentLine
lexer.readChar()
lexer.currentLine++
} else if isLetter(lexer.currentChar) {
token.StartPosition = lexer.currentPosition
token.Literal = lexer.readInstruction()
tokenType, ok := StringToTokenType[strings.ToUpper(token.Literal)]
if !ok {
panic(fmt.Errorf("illegal keyword - line: %d, position: %d, keyword: %s", lexer.currentLine, lexer.currentPosition, token.Literal))
}
token.Type = tokenType
token.EndPosition = lexer.currentPosition
token.LineNumber = lexer.currentLine
} else if isDigit(lexer.currentChar) {
token.StartPosition = lexer.currentPosition
token.Literal = lexer.readDigit()
token.Type = TokenType_NUMBER
token.EndPosition = lexer.currentPosition
token.LineNumber = lexer.currentLine
} else if lexer.currentChar == 0 {
token.StartPosition = lexer.currentPosition
token.Literal = "EOF"
token.Type = TokenType_EOF
token.EndPosition = lexer.currentPosition
token.LineNumber = lexer.currentLine
} else {
panic(fmt.Errorf("illegal char - line: %d, position: %d, char: %c", lexer.currentLine, lexer.currentPosition, lexer.currentChar))
}
//fmt.Printf("找到token: %v\n", token)
return token
}
func (lexer *Lexer) readComment() string {
currentPosition := lexer.currentPosition
for lexer.currentChar != '\n' {
lexer.readChar()
}
result := lexer.input[currentPosition:lexer.currentPosition]
//lexer.readChar() // 不能读掉\n,他是token
return result
}
func (lexer *Lexer) readString() string {
currentPosition := lexer.currentPosition
for lexer.currentChar != '"' && lexer.currentChar != '\'' && lexer.currentChar != '\n' {
lexer.readChar()
}
result := lexer.input[currentPosition:lexer.currentPosition]
lexer.readChar() // 读掉"或者\n
return result
}
func (lexer *Lexer) readDigit() string {
currentPosition := lexer.currentPosition
for isDigit(lexer.currentChar) {
lexer.readChar()
}
return lexer.input[currentPosition:lexer.currentPosition]
}
func (lexer *Lexer) nextChar() byte {
if lexer.nextReadPosition >= len(lexer.input) {
return 0
}
return lexer.input[lexer.nextReadPosition]
}
func (lexer *Lexer) readInstruction() string {
currentPosition := lexer.currentPosition
for isLetter(lexer.currentChar) {
lexer.readChar()
}
return lexer.input[currentPosition:lexer.currentPosition]
}
func (lexer *Lexer) ParseTokens() []Token {
var tokens []Token
for token := lexer.NextToken(); token.Type != TokenType_EOF; token = lexer.NextToken() {
tokens = append(tokens, token)
}
return tokens
}
func isLetter(ch byte) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
}
func isDigit(ch byte) bool {
return ('0' <= ch && ch <= '9') || ch == '.'
}