Skip to content

Commit

Permalink
Merge pull request #10 from costa-group/split_instructions
Browse files Browse the repository at this point in the history
Split instructions
  • Loading branch information
alexcere authored Oct 11, 2024
2 parents a55dd4a + 9ce5c7f commit 4708ac6
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 108 deletions.
31 changes: 14 additions & 17 deletions src/liveness/layout_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,34 +125,34 @@ def output_stack_layout(input_stack: List[str], final_stack_elements: List[str],
reversed_stack_relative_order.append(None)

# We undo the reversed traversal
relative_order = list(reversed(reversed_stack_relative_order))
bottom_output_stack = list(reversed(reversed_stack_relative_order))

output_stack = final_stack_elements + relative_order
vars_to_place = live_vars.difference(set(output_stack))
vars_to_place = live_vars.difference(set(final_stack_elements + bottom_output_stack))

# Sort the vars to place according to the variable depth info order in reversed order
vars_to_place_sorted = sorted(vars_to_place, key=lambda x: -variable_depth_info[x])

# Try to place the variables in reversed order
i, j = len(output_stack) - 1, 0
i, j = len(bottom_output_stack) - 1, 0

while i >= 0 and j < len(vars_to_place_sorted):
if output_stack[i] is None:
output_stack[i] = vars_to_place_sorted[j]
if bottom_output_stack[i] is None:
bottom_output_stack[i] = vars_to_place_sorted[j]
j += 1
i -= 1

# First exit condition: all variables have been placed in between. Hence, I have to insert the remaining
# elements at the beginning
if i == -1:
output_stack = list(reversed(vars_to_place_sorted[j:])) + output_stack
bottom_output_stack = list(reversed(vars_to_place_sorted[j:])) + bottom_output_stack

# Second condition: all variables have been placed in between. There can be some None values in between that
# must be removed
else:
output_stack = [var_ for var_ in output_stack if var_ is not None]
bottom_output_stack = [var_ for var_ in bottom_output_stack if var_ is not None]

return output_stack
# The final stack elements must appear in the top of the stack
return final_stack_elements + bottom_output_stack


def unify_stacks_brothers(input_stack: List[str], final_stack_elements: List[str],
Expand Down Expand Up @@ -256,6 +256,9 @@ def _construct_code_from_block(self, block: CFGBlock, input_stacks: Dict[str, Li
liveness_info = self._liveness_info[block_id]
comes_from = block.get_comes_from()

if block.block_id.startswith("abi_decode_available_length_t_string_memory_ptr_fromMemory"):
print("HOLA")

# Computing input stack...
# The stack from comes_from stacks must be equal
if comes_from:
Expand Down Expand Up @@ -374,21 +377,14 @@ def build_layout(self):
Builds the layout of the blocks from the given representation
"""
json_info = self._construct_code_from_block_list()
print(json_info.keys())

renamed_graph = information_on_graph(self._cfg_graph, {block_name: print_stacks(block_name, json_info[block_name])
for block_name in
self._block_list.blocks})

nx.nx_agraph.write_dot(renamed_graph, Path(self._dir.parent).joinpath(self._dir.stem + "_stacks.dot"))

# Skip blocks with split instructions in the JSON information. We must remove
# then at this point because their specification is needed to generate the "_stacks" dot file
non_split_blocks = set(block_name for block_name, block in self._block_list.blocks.items()
if block.get_jump_type() != "split_instruction_block")

json_info = {json_name: sfs for json_name, sfs in json_info.items()
if any(json_name == split_block for split_block in non_split_blocks)}

return json_info


Expand All @@ -404,6 +400,7 @@ def layout_generation(cfg: CFG, final_dir: Path = Path(".")) -> Tuple[Dict[str,
tags_dict = dict()

for component_name, liveness in results.items():
print(component_name)
cfg_info_suboject = cfg_info[component_name]["block_info"]
digraph = digraph_from_block_info(cfg_info_suboject.values())

Expand Down
104 changes: 54 additions & 50 deletions src/parser/cfg_block.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import itertools
import logging

from global_params.types import instr_id_T, dependencies_T
Expand All @@ -7,8 +8,9 @@
import parser.constants as constants
import json
import networkx as nx
from parser.constants import split_block

from typing import List, Dict, Tuple, Any
from typing import List, Dict, Tuple, Any, Set, Optional

global tag_idx
tag_idx = 0
Expand Down Expand Up @@ -63,6 +65,12 @@ def __init__(self, identifier: str, instructions: List[CFGInstruction], type_blo
assignment_dict: Dict[str, str]):
self.block_id = identifier
self._instructions = instructions

# Split instruction is recognized as the last instruction
# As we don't have information on the function calls, we assign it to None and then
# identify it once we set the function calls
self._split_instruction = None

# minimum size of the source stack
self.source_stack = 0
self._jump_type = type_block
Expand All @@ -72,8 +80,6 @@ def __init__(self, identifier: str, instructions: List[CFGInstruction], type_blo
self.is_function_call = False
self._comes_from = []
self.function_calls = set()
self.sto_dep = []
self.mem_dep = []

# Stack elements that must be placed in a specific order in the stack after performing
self._final_stack_elements: List[str] = []
Expand All @@ -91,6 +97,10 @@ def final_stack_elements(self) -> List[str]:
def final_stack_elements(self, value: List[str]):
self._final_stack_elements = value

@property
def split_instruction(self) -> Optional[CFGInstruction]:
return self._split_instruction

def get_block_id(self) -> str:
return self.block_id

Expand Down Expand Up @@ -118,18 +128,6 @@ def is_function_call(self) -> bool:
def set_function_call(self, v) -> None:
self.is_function_call = v

def set_instructions(self, new_instructions: List[CFGInstruction]) -> None:
self._instructions = new_instructions

# Then we update the source stack size
# TODO
# self.source_stack = utils.compute_stack_size(map(lambda x: x.disasm, self.instructions_to_optimize_bytecode()))

def add_instruction(self, new_instr: CFGInstruction) -> None:
self._instructions.append(new_instr)
# TODO
# self.source_stack = utils.compute_stack_size(map(lambda x: x.disasm, self.instructions_to_optimize_bytecode()))

def add_comes_from(self, block_id: str) -> None:
self._comes_from.append(block_id)

Expand All @@ -140,7 +138,7 @@ def set_comes_from(self, new_comes_from: List[str]) -> None:
self._comes_from = new_comes_from

def set_jump_type(self, t: str) -> None:
if t not in ["conditional", "unconditional", "terminal", "falls_to", "sub_block", "split_instruction_block"]:
if t not in ["conditional", "unconditional", "terminal", "falls_to", "sub_block"]:
raise Exception("Wrong jump type")
else:
self._jump_type = t
Expand Down Expand Up @@ -199,6 +197,22 @@ def process_function_calls(self, function_ids):
calls = filter(lambda x: x in function_ids, op_names)
self.function_calls = set(calls)

# Finally, we identify the possible split instruction using the now generated information
if len(self._instructions) > 0 and \
self._instructions[-1].get_op_name() in itertools.chain(split_block, self.function_calls, "JUMP", "JUMPI"):
self._split_instruction = self._instructions[-1]

@property
def instructions_to_synthesize(self) -> List[CFGInstruction]:
if self.split_instruction is not None:
return self._instructions[:-1]
else:
return self._instructions

@instructions_to_synthesize.setter
def instructions_to_synthesize(self, value):
raise NotImplementedError("The instructions for the greedy algorithm cannot be assigned")

def check_validity_arguments(self):
"""
It checks for each instruction in the block that there is not
Expand Down Expand Up @@ -372,35 +386,11 @@ def _build_spec_for_sequence(self, instructions, map_instructions: Dict, out_idx

map_positions_instructions = {}

jump_instr = None

for i in range(len(instructions)):
# Check if it has been already created
unprocessed_instr = None

ins = instructions[i]

# Ignore JUMP instructions
if ins.get_op_name().startswith("JUMP"):
jump_instr = ins
continue

# # TODO: temporal fix for PUSH instructions obtained through translating "memoryguard"
# elif ins.get_op_name() == "push":
# in_val = int(ins.builtin_args[0])
# str_in_val = hex(in_val)
# push_name = "PUSH" if in_val != 0 else "PUSH0"
# inst_idx = instrs_idx.get(push_name, 0)
# instrs_idx[push_name] = inst_idx + 1
# push_ins = build_push_spec(str_in_val, inst_idx, [ins.get_out_args()[0]])

# map_instructions[("PUSH", tuple([str_in_val]))] = push_ins

# uninter_functions.append(push_ins)

# map_positions_instructions[i] = push_ins["id"]

# continue
for i, ins in enumerate(instructions):

# Check if it has been already created
if ins.get_op_name().startswith("push"):
ins_spec = map_instructions.get((ins.get_op_name().upper(), tuple(ins.get_builtin_args())), None)
else:
Expand All @@ -412,8 +402,10 @@ def _build_spec_for_sequence(self, instructions, map_instructions: Dict, out_idx
uninter_functions += result

map_positions_instructions[i] = result[-1]["id"]

elif ins.get_op_name() == "push": #it is a push value that has been already created. If it comes from a memoryguard we have to rename the previous instructions to the output of the memoryguard

# it is a push value that has been already created. If it comes from a memoryguard,
# we have to rename the previous instructions to the output of the memoryguard
elif ins.get_op_name() == "push":
out_var_list = ins_spec["outpt_sk"]
new_out_var_list = ins.get_out_args()

Expand All @@ -427,9 +419,21 @@ def _build_spec_for_sequence(self, instructions, map_instructions: Dict, out_idx
pos = uninter["inpt_sk"].index(out_var)
uninter["inpt_sk"][pos] = new_out_var

# As JUMP instructions are not considered as part of the SFS, we must remove the corresponding values
# from the final stack
final_stack_bef_jump = (jump_instr.get_in_args() if jump_instr is not None else []) + final_stack
# We must remove the final output variable from the unprocessed instruction and
# add the inputs from that instruction
if self.split_instruction is not None:
unprocess_out = self.split_instruction.get_out_args()
assert unprocess_out == final_stack[:len(unprocess_out)], \
f"Stack elements from the instruction {self.split_instruction.get_op_name()} " \
f"do not match the ones from the final stack.\nFinal stack: {final_stack}." \
f"\nStack elements produced by the instruction: {unprocess_out}"

# As the unprocessed instruction is not considered as part of the SFS,
# we must remove the corresponding values from the final stack
final_stack_bef_jump = self.split_instruction.get_in_args() + final_stack[len(unprocess_out):]

else:
final_stack_bef_jump = final_stack

# If there is a bottom value in the final stack, then we introduce it as part of the assignments and
# then we pop it. Same for constant values in the final stack
Expand Down Expand Up @@ -596,8 +600,8 @@ def build_spec(self, block_tags_dict: Dict, block_tag_idx: int, initial_stack: L

out_idx = 0

spec, out_idx, map_positions = self._build_spec_for_sequence(self._instructions, map_instructions,
out_idx, initial_stack, final_stack)
spec, out_idx, map_positions = self._build_spec_for_sequence(self.instructions_to_synthesize, map_instructions, out_idx,
initial_stack, final_stack)

sto_deps, mem_deps = self._process_dependences(self._instructions, map_positions)
spec["storage_dependences"] = sto_deps
Expand Down
6 changes: 3 additions & 3 deletions src/parser/cfg_block_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import List, Dict, Any, Tuple
import networkx
from parser.cfg_block import CFGBlock, include_function_call_tags
import parser.constants
from parser.constants import split_block


class CFGBlockList:
Expand Down Expand Up @@ -53,7 +53,7 @@ def build_spec(self, block_tag_idx, return_function_element = 0):
"""
list_spec = {}

valid_blocks = filter(lambda x: x.get_jump_type() != "split_instruction_block", self.blocks)
valid_blocks = self.blocks

for b in valid_blocks:
block = self.blocks[b]
Expand All @@ -65,7 +65,7 @@ def build_spec(self, block_tag_idx, return_function_element = 0):
split_instr = split_block.get_instructions()[0]
#It only has one instruction

if split_instr.get_op() not in constants.split_block:
if split_instr.get_op() not in split_block:
#It is a call to a function
spec, out_idx = include_function_call_tags(split_instr, out_idx, spec)

Expand Down
5 changes: 3 additions & 2 deletions src/parser/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
split_block = {"LOG0","LOG1","LOG2","LOG3","LOG4","CALLDATACOPY","CODECOPY","EXTCODECOPY","RETURNDATACOPY",
"CALL","STATICCALL","DELEGATECALL","CREATE","CREATE2","ASSIGNIMMUTABLE", "GAS", "DATACOPY"}
split_block = {'calldatacopy', 'create', 'codecopy', 'call', 'log4', 'gas', 'delegatecall', 'extcodecopy', 'create2',
'assignimmutable', 'returndatacopy', 'log2', 'log1', 'log3', 'log0', 'datacopy', 'staticcall'}


# split_block = {"ASSIGNIMMUTABLE", "GAS", "MEMORYGUARD", "DATACOPY"}
40 changes: 17 additions & 23 deletions src/parser/optimizable_block_list.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Module that generates a list of sub-blocks to optimize using the greedy algorithm from the given ones
"""

import itertools
from copy import deepcopy
from typing import List, Dict, Tuple
from parser.cfg_instruction import CFGInstruction
Expand All @@ -28,7 +28,7 @@ def initialize_sub_blocks(initial_block: CFGBlock, sub_blocks_instrs: List[Tuple
# Determine the jump type according to whether it is a initial block or a call to a function
if sub_block_idx != len(sub_blocks_instrs) - 1:
# Current sub block does not correspond to the last one
new_sub_block_type = "split_instruction_block" if split_instruction_sub_block else "sub_block"
new_sub_block_type = "sub_block"
else:
new_sub_block_type = initial_block.get_jump_type()

Expand All @@ -48,24 +48,24 @@ def initialize_sub_blocks(initial_block: CFGBlock, sub_blocks_instrs: List[Tuple
# and the final stack values
current_falls_to = initial_block.get_falls_to()
current_jumps_to = initial_block.get_jump_to()
current_stack_values = initial_block.final_stack_elements

for cfg_sub_block in reversed(cfg_sub_blocks):

# All the blocks except the last one fall into this category
if cfg_sub_block.get_jump_type() == "sub_block":
# The last values of the stack corresponds to the values introduced by the final value
current_stack_values = cfg_sub_block.get_instructions_to_compute()[-1].get_out_args()
# Initially we keep the stack elements from the original block
else:
current_stack_values = initial_block.final_stack_elements

cfg_sub_block.set_falls_to(current_falls_to)
cfg_sub_block.set_jump_to(current_jumps_to)
cfg_sub_block.final_stack_elements = current_stack_values

current_falls_to = cfg_sub_block.block_id
current_jumps_to = None

# The final stack elements correspond to the arguments in the call to functions (if any)
if cfg_sub_block.get_jump_type() == "split_instruction_block":
# For split instructions, we need to force the previous block to place the arguments in the correct order
# TODO: check whether arguments must be reversed or not
current_stack_values = cfg_sub_block.get_instructions_to_compute()[0].get_in_args()
else:
current_stack_values = []

return cfg_sub_blocks


Expand Down Expand Up @@ -118,19 +118,13 @@ def compute_sub_block_list(block_list: CFGBlockList) -> CFGBlockList:
# a call to a function or an instruction that cannot be processed
sub_block_instructions: List[Tuple[List[CFGInstruction], bool]] = []
current_sub_block = []

for instr in instructions:
if instr.get_op_name().upper() in constants.split_block or instr.get_op_name() in cfg_block.function_calls:


# If there is at least a instruction, consider the corresponding sub-block
if current_sub_block:
sub_block_instructions.append((current_sub_block, False))
current_sub_block = []

# Include in an isolated sub block the split instruction
sub_block_instructions.append(([instr], True))

for instr in instructions:
if instr.get_op_name() in itertools.chain(constants.split_block, cfg_block.function_calls, "JUMP", "JUMPI"):
# Sub blocks contain a split instruction or a function call as the last instruction
current_sub_block.append(instr)
sub_block_instructions.append((current_sub_block, True))
current_sub_block = []
else:
current_sub_block.append(instr)

Expand Down
Loading

0 comments on commit 4708ac6

Please sign in to comment.