-
Notifications
You must be signed in to change notification settings - Fork 0
/
my_scanner.py
99 lines (89 loc) · 3.69 KB
/
my_scanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#to know the place of the character you will deal with it next
index = 0
#reading from file (input.txt)
fileHandler = open('input.txt','r')
input = fileHandler.read()
#writing to file (output.txt)
outFile = open('output.txt','w')
#function declaration that we will use it in parser
#def getToken (input):
#global index
#declaring our states , special symbols and reserved words in the DFA
states = ['start', 'inComment', 'inDigit', 'inLetter', 'inAssign', 'done']
specialSymbols = {'+' : 'plus', '-' : 'minus', '*' : 'multiply', '/' : 'division', '=' : 'equal', '<' : 'less than', '(' : 'openning bracket', ')' : 'closing bracket', ';' : 'semi colon'}
reservedWords = ['if','then','else','end','repeat','until','read','write']
while index < len(input):
#defining my current state and the next one
currentState = 'start'
nextState = ''
change = False
#variable that will carry the value of the token
token = ''
tokenType = ''
#the DFA process converts to code
while currentState != 'done':
if index >= len(input):
break
character = input[index]
change = False
#case of assign :=
if currentState == 'start' and character == ':':
nextState = 'inAssign'
change = True
elif currentState == 'inAssign' and character == '=':
token = ':='
tokenType = 'assign'
nextState = 'done'
change = True
#case of having letter
elif currentState == 'start' and character.isalpha():
nextState = 'inLetter'
change = True
elif currentState == 'inLetter' and not(character.isalpha()):
nextState = 'done'
change = True
tokenType = 'letter'
#case of having numbers
elif currentState == 'start' and character.isdigit():
nextState = 'inDigit'
change = True
elif currentState == 'inDigit' and not(character.isdigit()):
nextState = 'done'
change = True
tokenType = 'Number'
#case of special symbols
elif currentState == 'start' and (character == '+' or character == '-' or character == '/' or character == '*' or character == ';' or character == '(' or character == ')' or character == '=' or character == '<'):
nextState = 'done'
change = True
token = character
tokenType = 'specialSymbol'
#handling comments by ignoring them
elif currentState == 'start' and character == '{':
nextState = 'inComment'
change = True
elif currentState == 'inComment' and character == '}':
nextState = 'start'
change = True
#handle the delimiter character
if (nextState == 'done') and (currentState == 'inLetter' or currentState == 'inDigit'):
index = index -1
#getting the next character
index = index + 1
#if we have transition from one state to another
if change:
currentState = nextState
nextState = ''
#accumelating the token in case of set of letters and numbers
if nextState == 'inLetter' or currentState == 'inLetter' or nextState == 'inDigit' or currentState == 'inDigit':
token = token + character
#another checks for special symbols , reserved words and identifiers
if tokenType == 'letter':
if token in reservedWords:
tokenType = 'reserved word'
else:
tokenType = 'identifier'
elif tokenType == 'specialSymbol':
tokenType = specialSymbols[token]
#handling case of empty token
if token != '':
outFile.write(token + ' , ' + tokenType + '\n')