-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathLexer.hs
56 lines (45 loc) · 1.36 KB
/
Lexer.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
module Lexer where
import Char
type Token = String
type TokenInfo = (Int, Token)
twoCharOps = ["==", "~=", ">=", "<=", "->", "++"]
-- lexer implementation
lex :: String -> [Token]
lex s = map snd $ lex' s 1
lex' :: String -> Int -> [TokenInfo]
lex' [] _ = []
-- ignore whitespaces
lex' (c : cs) lnum | isWhiteSpace c = lex' cs lnum
-- ignore newlines
lex' (c : cs) lnum | isNewLine c = lex' cs (lnum+1)
-- numbers
lex' (c : cs) lnum | isDigit c =
(lnum, numToken) : lex' restCs lnum
where
numToken = c : takeWhile isDigit cs
restCs = dropWhile isDigit cs
-- variable names
lex' (c : cs) lnum | isAlpha c =
(lnum, varToken) : lex' restCs lnum
where
varToken = c : takeWhile isIdChar cs
restCs = dropWhile isIdChar cs
-- comments
lex' (c1 : c2 : cs) lnum | isComment c1 c2 = lex' restCs (lnum+1)
where
restCs = dropWhile (not . isNewLine) cs
-- relational operators
lex' (c1 : c2 : cs) lnum | op `elem` twoCharOps = (lnum, op) : lex' cs (lnum+1)
where
op = [c1] ++ [c2]
-- other
lex' (c : cs) lnum = (lnum, [c]) : lex' cs lnum
-- private helper functions
isIdChar :: Char -> Bool
isIdChar c = isAlpha c || isDigit c || c == '_'
isWhiteSpace :: Char -> Bool
isWhiteSpace c = c `elem` " \t"
isNewLine :: Char -> Bool
isNewLine c = c == '\n'
isComment :: Char -> Char -> Bool
isComment c1 c2 = c1 == '/' && c2 == '/'