Skip to content

Commit

Permalink
Merge branch 'main' of github.com:costa-group/grey
Browse files Browse the repository at this point in the history
  • Loading branch information
tutugordillo committed Nov 25, 2024
2 parents 449425d + 37a3af7 commit 2154c4f
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 64 deletions.
1 change: 1 addition & 0 deletions src/analysis/fixpoint_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def process_jumps(self, block_id: block_id_T, input_state: state_T) -> None:
if previous_block_info is None:
self.pending.append(previous_block)
self.blocks_info[previous_block] = self.constructor(self.vertices[previous_block], input_state)
self.blocks_info[previous_block].propagate_state(input_state)

# If we decide to revisit the block, we propagate the state and
# then include it as part of the pending blocks
Expand Down
96 changes: 96 additions & 0 deletions src/cfg_methods/constants_insertion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
Module that inserts an instruction for each constant that appears in the code. There might be several
ways to introduce such instructions if we want to reuse computations across different blocks in the CFG
"""
from typing import Dict, Tuple
from collections import defaultdict
from global_params.types import block_id_T, var_id_T, constant_T
from parser.cfg import CFG
from parser.cfg_function import CFGFunction
from parser.cfg_block_list import CFGBlockList
from parser.cfg_block import CFGBlock
from parser.cfg_instruction import CFGInstruction

# Insertion dict collects all constants that must be assigned a variable for a given block
insertion_dict_T = Dict[block_id_T, Dict[constant_T, var_id_T]]


def insert_variables_for_constants(cfg: CFG) -> None:
"""
Introduces variables and instructions for constants in the CFG, in order to simplify later stages of the
stack layout generation. This version introduces the constants just when they are being used
"""
for object_id, cfg_object in cfg.objectCFG.items():
constant_counter = 0

# We insert the variables of the block list in the cfg object
constants_per_block, constant_counter = insert_variables_for_constants_block_list(cfg_object.blocks,
constant_counter)
insert_constants_block_list(cfg_object.blocks, constants_per_block)

for function_name, cfg_function in cfg_object.functions.items():

# Insert the tags and jumps of the block list
constants_per_block, constant_counter = insert_variables_for_constants_block_list(cfg_function.blocks,
constant_counter)

insert_constants_block_list(cfg_function.blocks, constants_per_block)

sub_object = cfg.get_subobject()
if sub_object is not None:
insert_variables_for_constants(sub_object)


def insert_variables_for_constants_block_list(cfg_block_list: CFGBlockList, constant_counter: int = 0) -> \
Tuple[insertion_dict_T, int]:
"""
Traverse a CFG to annotate which constants must be introduced
"""
constants_per_block = defaultdict(lambda: dict())

for block_name, block in cfg_block_list.blocks.items():
# We must insert constants for phi instructions if they are needed
for instr in block.get_instructions():
for in_index, in_arg in enumerate(instr.get_in_args()):

if in_arg.startswith("0x"):
# For constants in phi functions, we need to consider the predecessor in which
# the constant was introduced
block_to_assign = block.entries[in_index] if instr.get_op_name() == "PhiFunction" else block_name
constants_in_block = constants_per_block[block_to_assign]

if in_arg not in constants_in_block:
constants_in_block[in_arg] = f"c{constant_counter}"
constant_counter += 1

return constants_per_block, constant_counter


def insert_constants_block_list(cfg_block_list: CFGBlockList, constants_per_block: insertion_dict_T) -> None:
"""
Given the dict that assigns a unique variable for each introduced constant in each block,
modifies all the blocks in the block_list accordingly.
"""
for block_name, cfg_block in cfg_block_list.blocks.items():
first_non_phi = None
for idx, instruction in enumerate(cfg_block.get_instructions()):
if instruction.get_op_name() == "PhiFunction":
# Phi functions are handled slightly different, as we have to retrieve the
# assigned variables from the predecessor blocks
instruction.in_args = [constants_per_block[predecessor_id].get(in_arg, in_arg)
for in_arg, predecessor_id in zip(instruction.in_args, cfg_block.entries)]

else:
# We detect the first non phi instruction, as we are introducing variables in this point
first_non_phi = idx if first_non_phi is None else first_non_phi
instruction.in_args = [constants_per_block[cfg_block.block_id].get(in_arg, in_arg)
for in_arg in instruction.in_args]

# We update by the end of the block if there are no other instructions
first_non_phi = len(cfg_block.get_instructions()) if first_non_phi is None else first_non_phi

# Finally, we insert the corresponding instructions
for constant_value, arg in constants_per_block[cfg_block.block_id].items():
push_instr = CFGInstruction("push", [], [arg])
push_instr.builtin_args = [constant_value]
cfg_block.insert_instruction(first_non_phi, push_instr)
8 changes: 7 additions & 1 deletion src/cfg_methods/preprocessing_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from cfg_methods.sub_block_generation import combine_remove_blocks_cfg, split_blocks_cfg
from cfg_methods.jump_insertion import insert_jumps_tags_cfg
from cfg_methods.variable_renaming import rename_variables_cfg
from cfg_methods.constants_insertion import insert_variables_for_constants


def preprocess_cfg(cfg: CFG, dot_file_dir: Path, visualization: bool) -> Dict[str, Dict[str, int]]:
Expand All @@ -27,12 +28,17 @@ def preprocess_cfg(cfg: CFG, dot_file_dir: Path, visualization: bool) -> Dict[st
if visualization:
liveness_info = dot_from_analysis(cfg, dot_file_dir.joinpath("inlined"))

# Finally we combine and remove the blocks from the CFG
# We combine and remove the blocks from the CFG
# Must be the latest step because we might have split blocks after insert jumps and tags
combine_remove_blocks_cfg(cfg)
if visualization:
liveness_info = dot_from_analysis(cfg, dot_file_dir.joinpath("combined"))

# We replace variables for constants
insert_variables_for_constants(cfg)
if visualization:
liveness_info = dot_from_analysis(cfg, dot_file_dir.joinpath("constants"))

# We introduce the jumps, tags and the stack requirements for each block
tag_dict = insert_jumps_tags_cfg(cfg)
if visualization:
Expand Down
3 changes: 3 additions & 0 deletions src/global_params/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
"""
from typing import Dict, Union, Any, List, Tuple

# Constants in the yul format are hexadecimal
constant_T = str

# Type for the stack variables that are introduced in the stack as args of other expressions
var_id_T = str

Expand Down
30 changes: 23 additions & 7 deletions src/liveness/layout_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import Dict, List, Type, Any, Set, Tuple, Optional
import networkx as nx
from pathlib import Path
from itertools import zip_longest

from global_params.types import SMS_T, component_name_T, var_id_T, block_id_T
from parser.cfg import CFG
Expand All @@ -23,6 +24,7 @@
from liveness.utils import functions_inputs_from_components



def unify_stacks(predecessor_stacks: List[List[str]], variable_depth_info: Dict[str, int]) -> List[str]:
"""
Unifies the given stacks, according to the information already provided in variable depth info
Expand Down Expand Up @@ -158,7 +160,7 @@ def output_stack_layout(input_stack: List[str], final_stack_elements: List[str],
return final_stack_elements + bottom_output_stack


def unify_stacks_brothers(taget_block_id: block_id_T, predecessor_blocks: List[block_id_T],
def unify_stacks_brothers(target_block_id: block_id_T, predecessor_blocks: List[block_id_T],
live_vars_dict: Dict[block_id_T, Set[var_id_T]], phi_functions: List[CFGInstruction],
variable_depth_info: Dict[str, int]) -> Tuple[
List[block_id_T], Dict[block_id_T, List[var_id_T]]]:
Expand All @@ -167,15 +169,29 @@ def unify_stacks_brothers(taget_block_id: block_id_T, predecessor_blocks: List[b
considering the PhiFunctions
"""
# TODO: uses the input stacks for all the brother stacks for a better combination

# First we generate the stack layout for the input stack of the target block. We make no assumptions on how the
# input stack works
combined_output_stack = output_stack_layout([], [], live_vars_dict[taget_block_id], variable_depth_info)

# From this layout, we reconstruct the previous stack layouts using the information from the phi functions
# First we extract the information from the phi functions
phi_func = {(phi_function.out_args[0], predecessor_block): input_arg for phi_function in phi_functions
for input_arg, predecessor_block in zip(phi_function.in_args, predecessor_blocks)}

# The variables that appear in some of the liveness set of the variables but not in the successor must be
# accounted as well. In order to do so, we introduce some kind of "PhiFunction" that combines these values
# in the resulting block

# First we identify these variables, removing the variables that are already part of a phi functions
variables_to_remove = {predecessor_block: live_vars_dict[predecessor_block].difference(live_vars_dict[target_block_id].union(phi_func.values()))
for predecessor_block in predecessor_blocks}

# Then we combine them as new phi functions. We fill with bottom values if there are not enought values to combine
pseudo_phi_functions = {f"b{i}": in_args for i, in_args in enumerate(zip_longest(*(variables_to_remove[predecessor_block]
for predecessor_block in predecessor_blocks),
fillvalue="bottom"))}
phi_func.update({(out_arg, predecessor_block): input_arg for out_arg, in_args in pseudo_phi_functions.items()
for input_arg, predecessor_block in zip(in_args, predecessor_blocks)})

# We generate the input stack of the combined information, considering the pseudo phi functions
combined_output_stack = output_stack_layout([], [], live_vars_dict[target_block_id].union(pseudo_phi_functions.keys()),
dict(variable_depth_info, **{key: 0 for key in pseudo_phi_functions.keys()}))

# Reconstruct all the output stacks
predecessor_output_stacks = dict()

Expand Down
10 changes: 5 additions & 5 deletions src/liveness/liveness_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class LivenessAnalysisInfoSSA(BlockAnalysisInfo):

def __init__(self, block_info: LivenessBlockInfoSSA, input_state: LivenessState) -> None:
# We need to copy the input state given, as it corresponds to the output state of a given previous state
super().__init__(block_info, copy.deepcopy(input_state))
super().__init__(block_info, LivenessState())

def propagate_information(self) -> None:
# If the output state is None, we need to propagate the information from the block and the input state
Expand All @@ -78,14 +78,14 @@ def propagate_information(self) -> None:

# Live in variables: remove from the out variables those that are defined (either as part of a
# normal function or a phi function) and add the ones that are used with no preceding definition
# 0TODO: check if it is correct (differs slightly from the book)
self.in_state.live_vars = self.block_info.upward_exposed.union(
self.out_state.live_vars.difference(self.block_info.defs.union(self.block_info.phi_defs)))
# TODO: check if it is correct (differs slightly from the book)
self.in_state.live_vars = self.block_info.upward_exposed.union(self.block_info.phi_defs,
self.out_state.live_vars.difference(self.block_info.defs))

def propagate_state(self, current_state: LivenessState) -> None: # Live out variables: the live in variables + those selected from the phi functions
self.out_state.live_vars = set().union(self.out_state.live_vars,
self.block_info.phi_uses,
current_state.live_vars)
current_state.live_vars.difference(self.block_info.pred_phi_defs))

def dot_repr(self) -> str:
instr_repr = '\n'.join([instr.dot_repr() for instr in self.block_info._instructions])
Expand Down
12 changes: 10 additions & 2 deletions src/liveness/liveness_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,14 @@ def __init__(self, basic_block: CFGBlock, block_dict: Dict[block_id_T, CFGBlock]
self._entries = basic_block.entries

self._phi_uses = set()
self._pred_phi_defs = set()
for successor in self._successors:
successor_block = block_dict[successor]
if len(successor_block.entries) > 0:
phi_uses, _ = _block_id_to_phi_uses(basic_block.block_id, successor_block.get_instructions(),
successor_block.entries)
phi_uses, phi_defs = _block_id_to_phi_uses(basic_block.block_id, successor_block.get_instructions(),
successor_block.entries)
self._phi_uses.update(phi_uses)
self._pred_phi_defs.update(phi_defs)

@property
def block_id(self) -> Any:
Expand Down Expand Up @@ -223,6 +225,12 @@ def phi_defs(self) -> Set[var_id_T]:
def phi_uses(self) -> Set[var_id_T]:
return self._phi_uses


@property
def pred_phi_defs(self) -> Set[var_id_T]:
return self._pred_phi_defs


def __repr__(self):
text_repr = [f"Block id: {self._id}", f"Block type: {self.block_type}", f"Successors: {self.successors}"]
return '\n'.join(text_repr)
1 change: 0 additions & 1 deletion src/parser/cfg_block_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def __init__(self, name: block_id_T):
self._terminal_blocks: List[block_id_T] = []
self._function_return_blocks: List[block_id_T] = []
self.block_tags_dict = {}
self.entry_dict: Dict[str, Tuple[str, str]] = dict()

@property
def terminal_blocks(self) -> List[block_id_T]:
Expand Down
17 changes: 5 additions & 12 deletions src/parser/cfg_instruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,22 +329,15 @@ def translate_built_in_function(self, subobjects_keys: List[str]):
def translate_opcode(self, subobjects_keys: List[str]):
if self.op in ["linkersymbol","memoryguard", "datasize", "dataoffset", "datacopy", "setimmutable", "loadimmutable"]:
self.translate_built_in_function(subobjects_keys)



def get_op_name(self):
return self.op

def get_instruction_representation(self):
outs = ""
if self.out_args != []:
outs = ",".join(self.out_args)
outs+= " = "

inps = ""
if self.in_args !=[]:
inps = ",".join(self.in_args)

instr = outs+self.op+"("+inps+")"
outs = f'{",".join(self.out_args)} = ' if self.out_args else ''
inps = f'({",".join(self.in_args)})' if self.in_args else ''
args = f'[{",".join(self.builtin_args)}]' if self.builtin_args else ''
instr = outs + self.op + inps + args

return instr

Expand Down
44 changes: 8 additions & 36 deletions src/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def process_block_entry(block_json: Dict[str, Any], phi_instr: Dict[str, Any]) -


def parse_block(object_name: str, block_json: Dict[str,Any], built_in_op: bool,
objects_keys: List[str]) -> Tuple[block_id_T, CFGBlock, Dict, Dict[str, Tuple[str, str]]]:
objects_keys: List[str]) -> Tuple[block_id_T, CFGBlock, Dict]:
block_id = block_json.get("id", -1)
block_instructions = block_json.get("instructions", -1)
block_exit = block_json.get("exit", -1)
Expand All @@ -85,29 +85,16 @@ def parse_block(object_name: str, block_json: Dict[str,Any], built_in_op: bool,
check_block_validity(block_id, block_instructions, block_exit, block_type)

list_cfg_instructions = []
assignment_dict = dict()
entry_dict = dict()
for instruction in block_instructions:
if "assignment" in instruction:
list_cfg_instructions.append(parse_assignment(instruction, assignment_dict))
else:
cfg_instruction = parse_instruction(instruction) if block_type != "FunctionReturn" else []
cfg_instruction = parse_instruction(instruction) if block_type != "FunctionReturn" else []

if not built_in_op and cfg_instruction != []:
cfg_instruction.translate_opcode(objects_keys)
if not built_in_op and cfg_instruction != []:
cfg_instruction.translate_opcode(objects_keys)

list_cfg_instructions.append(cfg_instruction)

if instruction["op"] == "PhiFunction":

# For phi functions, we store the assignment in an entry dict. This is needed because some phi
# functions involve introducing constants, and we need to introduce the corresponding PUSH
# in the block fom which the phi function uses that value
entry_dict.update((generate_block_name(object_name, entry), values)
for entry, values in process_block_entry(block_json, instruction).items())
list_cfg_instructions.append(cfg_instruction)

block_identifier = generate_block_name(object_name, block_id)
block = CFGBlock(block_identifier, list_cfg_instructions, block_type, assignment_dict)
block = CFGBlock(block_identifier, list_cfg_instructions, block_type, dict())
block.set_jump_info(block_exit)
block.entries = entries

Expand All @@ -118,7 +105,7 @@ def parse_block(object_name: str, block_json: Dict[str,Any], built_in_op: bool,

# block._process_dependences(block._instructions)

return block_identifier, block, block_exit, entry_dict
return block_identifier, block, block_exit


def update_comes_from(block_list: CFGBlockList, comes_from: Dict[str, List[str]]) -> None:
Expand All @@ -132,19 +119,6 @@ def update_comes_from(block_list: CFGBlockList, comes_from: Dict[str, List[str]]
block_list.get_block(block_id).add_comes_from(predecessor)


def update_assignments_from_phi_functions(block_list: CFGBlockList, phi_function_dict: Dict[str, Tuple[str, str]]) -> None:
"""
Given the list of blocks and the phi functions that appear in any of those blocks, introduces the values
that are constants from the phi function in the corresponding block as part of the assignments
"""
for block_id, (input_value, output_value) in phi_function_dict.items():
block = block_list.get_block(block_id)

# We update the assignments of constants
if input_value.startswith("0x"):
block.assignment_dict[output_value] = input_value


def parser_block_list(object_name: str, blocks: List[Dict[str, Any]], built_in_op : bool, objects_keys : List[str]):
"""
Returns the list of blocks parsed and the ids that correspond to Exit blocks
Expand All @@ -153,7 +127,7 @@ def parser_block_list(object_name: str, blocks: List[Dict[str, Any]], built_in_o
exit_blocks = []
comes_from = collections.defaultdict(lambda: [])
for b in blocks:
block_id, new_block, block_exit, block_entries = parse_block(object_name, b, built_in_op, objects_keys)
block_id, new_block, block_exit = parse_block(object_name, b, built_in_op, objects_keys)

# Annotate comes from
for succ_block in block_exit["targets"]:
Expand All @@ -163,11 +137,9 @@ def parser_block_list(object_name: str, blocks: List[Dict[str, Any]], built_in_o
exit_blocks.append(block_id)

block_list.add_block(new_block)
block_list.entry_dict.update(block_entries)

# We need to update some fields in the blocks using the previously gathered information
update_comes_from(block_list, comes_from)
update_assignments_from_phi_functions(block_list, block_list.entry_dict)

return block_list, exit_blocks

Expand Down

0 comments on commit 2154c4f

Please sign in to comment.