-
Notifications
You must be signed in to change notification settings - Fork 0
/
inliner.py
492 lines (454 loc) · 22.4 KB
/
inliner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
import json
from tokenizer import Tokenizer
from module import Module
import re
import regexes
from tokens import TokenType, Token
from generator_helper import GeneratorHelper
from copy import deepcopy
class Inliner:
def __init__(self, config_path, input_path):
self.config_path = config_path
self.tokenizer = Tokenizer(config_path)
self._token_gen = self._token_generator(input_path)
with open(self.config_path) as f:
config_data = json.load(f)
self.operators = config_data["operators"]
self.keywords = config_data["keywords"]
self.modules = {} # Dict mapping module names in file to module names
self._inlined_modules = {} # Dict mapping module names to their inlined versions; will be empty until _inline() is called
self.input_path = input_path
self.reference_tree = {}
def _token_generator(self, path_in):
with open(path_in,'r') as f:
text = f.read()
tokens = self.tokenizer.tokenize(text)
token, *tokens = tokens
while(tokens):
yield token
token, *tokens = tokens
yield token # Yield the last element
def _get_token(self):
return next(self._token_gen, None)
def inline(self,message=True):
''' The inlining function exposed as part of the API '''
self._index()
if(message):
print("Indexing complete . . .")
self._generate_reference_tree()
if(message):
print("Reference tree generation complete . . .")
self._inline()
if(message):
print("Inlining complete . . .")
def _index(self):
''' Reads the input file '''
token = self._get_token()
while(token):
if token.content == "module":
self._process_module(token)
token = self._get_token()
def _process_module(self, token):
balance = 1
module_content = [token] # Store the first token in the list
token = self._get_token()
# Capture the whole module text
while(balance != 0 and token):
if token.content == "module":
balance += 1
elif token.content == "endmodule":
balance -= 1
module_content.append(token)
token = self._get_token()
if not token and balance != 0:
raise ValueError(f"Incomplete module input into function Inliner._process_module\nCheck input file {self.input_path} for proper Verilog Syntax")
# Process the module header
module_header = []
module_body = module_content
for token in module_body:
module_header.append(token)
module_body = module_body[1:]
if token.content == ";":
break
# Get the module name
i = 0
while(module_header[i].token_type != TokenType.IDENTIFIER):
i += 1
name = module_header[i].content
raw_module_header = list(map(lambda x: x.to_string(), module_header)) # Make a string version to call balanced bounds
# Process the port list from the header
ports = []
open_paren_idx = raw_module_header.index("(")
close_paren_idx = balanced_bounds(raw_module_header,"(",")",open_paren_idx)
port_list = module_header[open_paren_idx:close_paren_idx+1]
ports = []
for token in port_list:
if token.token_type == TokenType.IDENTIFIER:
ports.append(token.content)
# Process the parameter list in the module header
parameters = []
hit_parameter = False
for token in module_body:
if token.token_type == TokenType.KEYWORD and token.content == "parameter":
hit_parameter = True
if hit_parameter:
if token.token_type == TokenType.IDENTIFIER:
parameters.append(token.content)
hit_parameter = False
# Make the module object
mod = Module(name, module_body, module_header, ports, parameters)
self.modules[name] = mod
def _generate_reference_tree(self):
''' Algorithm to generate the reference tree which
stores which modules reference which other modules, in a tree implemented
as a dicitonary mapping names (str) to sets of names (set, str).
This can then be used to efficiently inline the files without error.
Pre-conditions: The modules have already been indexed with _index()
'''
for name in self.modules:
self.reference_tree[name] = set()
for name, mod in self.modules.items():
for token in mod.body:
if token.token_type == TokenType.IDENTIFIER and token.content in self.modules:
# This module contains a reference to another module
self.reference_tree[name].add(token.content)
def _inline(self):
''' Algorithm to inline all modules into one single module for output
Pre-conditions: _index() and _generate_reference_tree() have already been called
'''
# Phase 1: Establish the inlining order based on the reference tree
order = self._get_inline_order(self.reference_tree)
for name in order:
if not self.reference_tree[name]:
# This module is a leaf; it references no other modules
self._inlined_modules[name] = self.modules[name]
else:
# Inline this module
self._inlined_modules[name] = self._get_inlined_module(name)
def _get_inlined_module(self, name):
''' Function which given a name appearing in the modules dictionary creates an inlined version of it
Params: name (str) which appears in self.modules
Returns: Inlined module object (module)
Pre-conditions: _inline() is calling this function in the correct order, as the modules referenced by
'name' must be inlined before 'name'
'''
top_body = self.modules[name].body
new_body = []
gen = GeneratorHelper(top_body)
token = gen.get_element()
while(token):
if token.token_type == TokenType.IDENTIFIER and token.content in self._inlined_modules:
try:
inlined_portion = self._process_module_instantiation(gen,token)
new_body.extend(inlined_portion)
except ValueError as e:
raise ValueError(f"Error encountered while attempting to inline module {name}:\n{str(e)}")
else:
new_body.append(token)
token = gen.get_element()
return Module(name, new_body, self.modules[name].header, self.modules[name].ports, self.modules[name].parameters)
def _process_module_instantiation(self, gen, curr_token):
''' Method to extract a module instantiation from a generator input
stream gen (GeneratorHelper), process it, and then write it to the output (List of Token)
'''
balance = 1
instance_type = curr_token
statement = [curr_token]
token = gen.get_element()
while(token and token.content != ";"):
statement.append(token)
token = gen.get_element()
if not token:
raise ValueError(f"Incomplete module instantiation detected in input to Inliner in function Inliner._process_module_instantiation\nCheck input file {self.input_path} for proper Verilog Syntax")
statement.append(token) # Add on the ending semicolon
try:
inlined_portion = self._instantation_to_inlined_body(statement)
except ValueError:
raise # Just hand it up to the caller function
else:
return inlined_portion
def _instantation_to_inlined_body(self, instantiation):
''' Accepts text of a module instantiation (List of Tokens)
in verilog syntax. Returns the body of the module instantiated with
the correct I/O assignments '''
raw_text = list(map( lambda x : x.to_string(), instantiation))
# Get the names
module_name = instantiation[0].content
i = 1
# Get the parameter assignment list, if it exists
param_list = []
if "#" in raw_text:
idx = raw_text.index("#")
param_list_start = raw_text.index("(",idx)
param_list_end = balanced_bounds(raw_text, "(", ")", param_list_start)
param_list = instantiation[param_list_start + 1 : param_list_end]
param_list = [tok for tok in param_list if tok.token_type != TokenType.COMMENT and tok.token_type != TokenType.WHTSPC]
i = param_list_end + 1
while(instantiation[i].token_type != TokenType.IDENTIFIER):
i += 1
instance_name = instantiation[i].content
# Get the port assignment list
if param_list:
port_list_start = raw_text.index("(", i + 1)
else:
port_list_start = raw_text.index("(")
port_list_end = balanced_bounds(raw_text, "(", ")", port_list_start)
port_list = instantiation[port_list_start + 1 : port_list_end]
port_list = [tok for tok in port_list if tok.token_type != TokenType.COMMENT and tok.token_type != TokenType.WHTSPC]
# Identify the type of port list (positional or named) and param list (positional or named)
port_list_text = "".join([x.to_string() for x in port_list])
param_list_text = "".join([x.to_string() for x in param_list])
# A regex for an identifier or an indexed identifier
# Regexes to match the naming list and positional list convention of port/parameter assignment
param_positional = regexes.positional_port_list.fullmatch(param_list_text)
param_naming = regexes.named_port_list.fullmatch(param_list_text)
port_positional = regexes.positional_port_list.fullmatch(port_list_text)
port_naming = regexes.named_port_list.fullmatch(port_list_text)
# Parse the param and port assignment lists now that we know their format
param_assignments = {}
if param_list:
if param_naming:
param_assignments = self._parse_named_param_list(param_list, module_name)
elif param_positional:
param_assignments = self._parse_positional_param_list(param_list, module_name)
else:
raise ValueError(f"Invalid parameter list syntax detected in Inliner._instantiation_to_inlined_body:" /
"\n" + "".join(raw_text) + "\n" + f"Check file {self.input_path} for proper verilog syntax.")
port_assignments = {}
if port_naming:
port_assignments = self._parse_named_port_list(port_list, module_name)
elif port_positional:
port_assignments = self._parse_positional_port_list(port_list, module_name)
# Resolve naming collisions by prefixing variable names
old_body = deepcopy(self._inlined_modules[module_name].body)
for token in old_body:
if token.token_type == TokenType.IDENTIFIER:
token.content = self._prefix_name(instance_name,token.content)
for key in param_assignments.copy():
new_key = self._prefix_name(instance_name,key)
param_assignments[new_key] = param_assignments[key]
del param_assignments[key]
for key in port_assignments.copy():
new_key = self._prefix_name(instance_name,key)
port_assignments[new_key] = port_assignments[key]
del port_assignments[key]
# Modify the body of the module by adding assignments, and renaming inputs and outputs to wires or regs.
inlined_body = []
gen = GeneratorHelper(old_body)
token = gen.get_element()
buffer = []
output_regs = []
output_wires = []
tkzr = Tokenizer(self.config_path)
# Change the input and output declarations
while( token.content != 'endmodule'):
buffer.append(token)
# End of a statement, process it
if buffer[-1].content == ";":
# stmt is buffer without comments
stmt = [x for x in buffer if x.token_type != TokenType.COMMENT]
stmt = list(map(lambda x : x.to_string(), stmt))
modified_stmt = deepcopy(stmt)
if "input" in stmt:
i = stmt.index("input")
modified_stmt[i] = "wire"
ports = [key for key in port_assignments if key in stmt] # Input ports mentioned in the statement
for port in ports:
if port_assignments[port]:
# If the port wasn't left empty
modified_stmt.append("\n")
modified_stmt.append(f"assign {port} = {port_assignments[port]};")
elif "output" in stmt:
start = 0
end = 0
if "[" in stmt:
# Remove the size field
size_field = True
start = stmt.index("[")
end = balanced_bounds(stmt,"[","]",start)
text = "".join(stmt)
output_reg_re = '[\\s\n]*output[\\s\n]+reg'
hasOutputReg = re.match(output_reg_re,text)
if hasOutputReg:
modified_stmt.remove("output")
for t in buffer[end:]:
if t.token_type == TokenType.IDENTIFIER:
output_regs.append(t)
else:
i = stmt.index("output")
modified_stmt[i] = "wire"
for t in buffer[end:]:
if t.token_type == TokenType.IDENTIFIER:
output_wires.append(t)
elif "inout" in stmt:
# TODO implement functionality for inout ports
pass
elif "parameter" in stmt:
# TODO: implement detection multiple assignments of multiple params in one line
# Example: 'parameter P1, P2, P3 = <value>'
param_assign_regex = ".*[\s\n]*parameter[\s\n]+([\w\$]+)[\s\n]*=[\s\n]*(.*?)[\s\n]*;"
m = re.match(param_assign_regex, "".join(stmt))
if m:
if m.group(1) in param_assignments:
val_idx = modified_stmt.index(m.group(2))
modified_stmt[val_idx] = param_assignments[m.group(1)]
modified_stmt = tkzr.tokenize( "".join(modified_stmt) )
i = 0
j = 0
# Merge the comments back into the output added to the inlined body
while(i < len(modified_stmt) and j < len(buffer)):
if buffer[j].token_type == TokenType.COMMENT:
inlined_body.append(buffer[j])
j += 1
else:
inlined_body.append(modified_stmt[i])
i += 1
j += 1
if len(buffer) < len(modified_stmt):
while(i < len(modified_stmt)):
inlined_body.append(modified_stmt[i])
i += 1
elif len(modified_stmt) < len(buffer):
while(j < len(buffer) - 1): # Minus 1 to avoid the final semicolon
inlined_body.append(buffer[j])
j += 1
buffer = [] # Empty the buffer
token = gen.get_element()
inlined_body.extend(buffer)
# Add on the output assignments at the end of the module body
# TODO: There will be errors if the connection to the output is a reg because these are continuous assignments
# How do we solve this problem?
for port in output_regs:
if port_assignments[port.content]: # Don't add if the port was left disconnected
s = f"\nassign {port_assignments[port.content]} = {port.content};"
inlined_body.extend( tkzr.tokenize(s) )
for port in output_wires:
if port_assignments[port.content]: # Don't add if the port was left disconnected
s = f"\nassign {port_assignments[port.content]} = {port.content};"
inlined_body.extend( tkzr.tokenize(s) )
inlined_body.append( Token("\n", TokenType.WHTSPC) )
return inlined_body
def _prefix_name(self, prefix, name):
''' Helper function to standardize the way variables are prefixed
when they are being inlined into a module that references them
to avoid naming collisions.
Params: Prefix (str), name (str)
Returns: new variable name (str)
'''
return f"_{prefix}_{name}"
def _parse_positional_param_list(self, param_list, module_name):
''' Function to parse a positional parameter list (List of tokens) in
verilog syntax for a particular module, and return a dictionary
mapping parameter names (str) to strings (str), whether they
represent a number, identifier, or otherwise.
'''
# TODO: Add error checking
# Remove spaces and commas, leaving only the parameter list arguments
param_list = [x for x in param_list if x.token_type != TokenType.WHTSPC]
param_list = [x for x in param_list if x.content != ","]
module_params = self.modules[module_name].ports
assignments = {}
min_size = min( len(module_params), len(param_list) )
for i in range( min_size ):
assignments[ module_params[i] ] = param_list[i].to_string()
return assignments
def _parse_named_param_list(self, param_list, module_name):
''' Function to parse a named parameter list (List of tokens) in
verilog syntax for a particular module, and return a dictionary
mapping parameter names (str) to strings (str), whether they
represent a number, identifier, or otherwise.
'''
# TODO: Add error checking
assignments = {}
regex = f"\.({regexes.identifier})\((({regexes.identifier})|({regexes.number}))\)" # Consider changing group 2 to any character
text = "".join(list(map(lambda x : x.to_string(), param_list)))
period = -1
while("." in text[period+1:]):
period = text.index(".",period+1)
text = text[period:] # Test this
m = re.match(regex, text)
assignments[m.group(1)] = m.group(2)
return assignments
def _parse_named_port_list(self, port_list, module_name):
''' Function to parse a named port assignment list (List of tokens) in
verilog syntax for a particular module, and return a dictionary
mapping port names (str) to the connected reg or wire names (str)
'''
# TODO: Add error checking
assignments = {}
regex = f"\.({regexes.s_identifier})\((.*?)\)"
text = "".join(list(map(lambda x : x.to_string(), port_list)))
period = text.index(".")
text = text[period:]
while("." in text[1:]):
m = re.match(regex, text)
assignments[m.group(1)] = m.group(2)
period = text.index(".",1)
text = text[period:] # Test this
m = re.match(regex, text)
assignments[m.group(1)] = m.group(2) # Add the last one
return assignments
def _parse_positional_port_list(self, port_list, module_name):
port_list = [x for x in port_list if x.token_type != TokenType.WHTSPC]
port_list = [x for x in port_list if x.content != ","]
ports = self.modules[module_name].ports
assignments = {}
min_size = min( len(ports), len(port_list) )
for i in range( min_size ):
assignments[ ports[i] ] = port_list[i].to_string()
return assignments
def _get_inline_order(self, ref_tree):
rt = deepcopy(ref_tree)
return self._get_inline_order_helper(rt,[])
def _get_inline_order_helper(self, ref_tree, order):
''' Algorithm to identify the order in which modules will be inlined '''
if not ref_tree:
# Base case, the tree has been completely trimmed and we have the ordering
return order
else:
# Identify leaf nodes
leaves = set()
for name, children in ref_tree.items():
if not children:
leaves.add(name)
if not leaves:
# There are no leaf nodes, implying a cycle must exist
# TODO: Throw an error when this occurs
pass
else:
order.extend( list(leaves) ) # Put the current leaves on the back of the order
# Trim the tree
for name, children in ref_tree.items():
for child in children.copy():
if child in leaves:
children.remove(child)
for name in leaves:
ref_tree.pop(name, None)
# Now that the tree is trimmed, execute a recursive call on the trimmed tree
return self._get_inline_order_helper(ref_tree, order)
def balanced_bounds(strings, open_token, close_token, start=0):
''' An algorithm which given a list of strings = [s0, s1, ... , sn]
and an open and closing strings t0 and t1, will start at index 'start'
and return the index at which an equal number of t0 and t1 have occurred '''
if not start < len(strings):
raise ValueError("Starting index too high")
if not strings:
return 0
i = start
# Go to the first open token
while(strings[i] != open_token):
i += 1
if not i < len(strings):
raise ValueError("Open token does not appear at or after start index")
count = 1
i += 1
while(count != 0 and i < len(strings)):
if(strings[i] == open_token):
count += 1
elif(strings[i] == close_token):
count -= 1
i += 1
if not i < len(strings) and count != 0:
raise ValueError("Input strings do not contain balanced open_token and closing_token groupings")
else:
return i - 1