Merge pull request #10 from costa-group/split_instructions

Split instructions
costa-group · Oct 11, 2024 · 4708ac6 · 4708ac6
2 parents a55dd4a + 9ce5c7f
commit 4708ac6
Show file tree

Hide file tree

Showing 6 changed files with 107 additions and 108 deletions.
diff --git a/src/liveness/layout_generation.py b/src/liveness/layout_generation.py
@@ -125,34 +125,34 @@ def output_stack_layout(input_stack: List[str], final_stack_elements: List[str],
             reversed_stack_relative_order.append(None)
 
     # We undo the reversed traversal
-    relative_order = list(reversed(reversed_stack_relative_order))
+    bottom_output_stack = list(reversed(reversed_stack_relative_order))
 
-    output_stack = final_stack_elements + relative_order
-    vars_to_place = live_vars.difference(set(output_stack))
+    vars_to_place = live_vars.difference(set(final_stack_elements + bottom_output_stack))
 
     # Sort the vars to place according to the variable depth info order in reversed order
     vars_to_place_sorted = sorted(vars_to_place, key=lambda x: -variable_depth_info[x])
 
     # Try to place the variables in reversed order
-    i, j = len(output_stack) - 1, 0
+    i, j = len(bottom_output_stack) - 1, 0
 
     while i >= 0 and j < len(vars_to_place_sorted):
-        if output_stack[i] is None:
-            output_stack[i] = vars_to_place_sorted[j]
+        if bottom_output_stack[i] is None:
+            bottom_output_stack[i] = vars_to_place_sorted[j]
             j += 1
         i -= 1
 
     # First exit condition: all variables have been placed in between. Hence, I have to insert the remaining
     # elements at the beginning
     if i == -1:
-        output_stack = list(reversed(vars_to_place_sorted[j:])) + output_stack
+        bottom_output_stack = list(reversed(vars_to_place_sorted[j:])) + bottom_output_stack
 
     # Second condition: all variables have been placed in between. There can be some None values in between that
     # must be removed
     else:
-        output_stack = [var_ for var_ in output_stack if var_ is not None]
+        bottom_output_stack = [var_ for var_ in bottom_output_stack if var_ is not None]
 
-    return output_stack
+    # The final stack elements must appear in the top of the stack
+    return final_stack_elements + bottom_output_stack
 
 
 def unify_stacks_brothers(input_stack: List[str], final_stack_elements: List[str],
@@ -256,6 +256,9 @@ def _construct_code_from_block(self, block: CFGBlock, input_stacks: Dict[str, Li
         liveness_info = self._liveness_info[block_id]
         comes_from = block.get_comes_from()
 
+        if block.block_id.startswith("abi_decode_available_length_t_string_memory_ptr_fromMemory"):
+            print("HOLA")
+
         # Computing input stack...
         # The stack from comes_from stacks must be equal
         if comes_from:
@@ -374,21 +377,14 @@ def build_layout(self):
         Builds the layout of the blocks from the given representation
         """
         json_info = self._construct_code_from_block_list()
+        print(json_info.keys())
 
         renamed_graph = information_on_graph(self._cfg_graph, {block_name: print_stacks(block_name, json_info[block_name])
                                                                for block_name in
                                                                self._block_list.blocks})
 
         nx.nx_agraph.write_dot(renamed_graph, Path(self._dir.parent).joinpath(self._dir.stem + "_stacks.dot"))
 
-        # Skip blocks with split instructions in the JSON information. We must remove
-        # then at this point because their specification is needed to generate the "_stacks" dot file
-        non_split_blocks = set(block_name for block_name, block in self._block_list.blocks.items()
-                               if block.get_jump_type() != "split_instruction_block")
-
-        json_info = {json_name: sfs for json_name, sfs in json_info.items()
-                     if any(json_name == split_block for split_block in non_split_blocks)}
-
         return json_info
 
 
@@ -404,6 +400,7 @@ def layout_generation(cfg: CFG, final_dir: Path = Path(".")) -> Tuple[Dict[str,
     tags_dict = dict()
 
     for component_name, liveness in results.items():
+        print(component_name)
         cfg_info_suboject = cfg_info[component_name]["block_info"]
         digraph = digraph_from_block_info(cfg_info_suboject.values())
 

diff --git a/src/parser/cfg_block.py b/src/parser/cfg_block.py
@@ -1,3 +1,4 @@
+import itertools
 import logging
 
 from global_params.types import instr_id_T, dependencies_T
@@ -7,8 +8,9 @@
 import parser.constants as constants
 import json
 import networkx as nx
+from parser.constants import split_block
 
-from typing import List, Dict, Tuple, Any
+from typing import List, Dict, Tuple, Any, Set, Optional
 
 global tag_idx
 tag_idx = 0
@@ -63,6 +65,12 @@ def __init__(self, identifier: str, instructions: List[CFGInstruction], type_blo
                  assignment_dict: Dict[str, str]):
         self.block_id = identifier
         self._instructions = instructions
+
+        # Split instruction is recognized as the last instruction
+        # As we don't have information on the function calls, we assign it to None and then
+        # identify it once we set the function calls
+        self._split_instruction = None
+
         # minimum size of the source stack
         self.source_stack = 0
         self._jump_type = type_block
@@ -72,8 +80,6 @@ def __init__(self, identifier: str, instructions: List[CFGInstruction], type_blo
         self.is_function_call = False
         self._comes_from = []
         self.function_calls = set()
-        self.sto_dep = []
-        self.mem_dep = []
 
         # Stack elements that must be placed in a specific order in the stack after performing
         self._final_stack_elements: List[str] = []
@@ -91,6 +97,10 @@ def final_stack_elements(self) -> List[str]:
     def final_stack_elements(self, value: List[str]):
         self._final_stack_elements = value
 
+    @property
+    def split_instruction(self) -> Optional[CFGInstruction]:
+        return self._split_instruction
+
     def get_block_id(self) -> str:
         return self.block_id
 
@@ -118,18 +128,6 @@ def is_function_call(self) -> bool:
     def set_function_call(self, v) -> None:
         self.is_function_call = v
 
-    def set_instructions(self, new_instructions: List[CFGInstruction]) -> None:
-        self._instructions = new_instructions
-
-        # Then we update the source stack size
-        # TODO
-        # self.source_stack = utils.compute_stack_size(map(lambda x: x.disasm, self.instructions_to_optimize_bytecode()))
-
-    def add_instruction(self, new_instr: CFGInstruction) -> None:
-        self._instructions.append(new_instr)
-        # TODO
-        # self.source_stack = utils.compute_stack_size(map(lambda x: x.disasm, self.instructions_to_optimize_bytecode()))
-
     def add_comes_from(self, block_id: str) -> None:
         self._comes_from.append(block_id)
 
@@ -140,7 +138,7 @@ def set_comes_from(self, new_comes_from: List[str]) -> None:
         self._comes_from = new_comes_from
 
     def set_jump_type(self, t: str) -> None:
-        if t not in ["conditional", "unconditional", "terminal", "falls_to", "sub_block", "split_instruction_block"]:
+        if t not in ["conditional", "unconditional", "terminal", "falls_to", "sub_block"]:
             raise Exception("Wrong jump type")
         else:
             self._jump_type = t
@@ -199,6 +197,22 @@ def process_function_calls(self, function_ids):
         calls = filter(lambda x: x in function_ids, op_names)
         self.function_calls = set(calls)
 
+        # Finally, we identify the possible split instruction using the now generated information
+        if len(self._instructions) > 0 and \
+                self._instructions[-1].get_op_name() in itertools.chain(split_block, self.function_calls, "JUMP", "JUMPI"):
+            self._split_instruction = self._instructions[-1]
+
+    @property
+    def instructions_to_synthesize(self) -> List[CFGInstruction]:
+        if self.split_instruction is not None:
+            return self._instructions[:-1]
+        else:
+            return self._instructions
+
+    @instructions_to_synthesize.setter
+    def instructions_to_synthesize(self, value):
+        raise NotImplementedError("The instructions for the greedy algorithm cannot be assigned")
+
     def check_validity_arguments(self):
         """
         It checks for each instruction in the block that there is not
@@ -372,35 +386,11 @@ def _build_spec_for_sequence(self, instructions, map_instructions: Dict, out_idx
 
         map_positions_instructions = {}
 
-        jump_instr = None
-
-        for i in range(len(instructions)):
-            # Check if it has been already created
+        unprocessed_instr = None
 
-            ins = instructions[i]
-
-            # Ignore JUMP instructions
-            if ins.get_op_name().startswith("JUMP"):
-                jump_instr = ins
-                continue
-
-            # # TODO: temporal fix for PUSH instructions obtained through translating "memoryguard"
-            # elif ins.get_op_name() == "push":
-            #     in_val = int(ins.builtin_args[0])
-            #     str_in_val = hex(in_val)
-            #     push_name = "PUSH" if in_val != 0 else "PUSH0"
-            #     inst_idx = instrs_idx.get(push_name, 0)
-            #     instrs_idx[push_name] = inst_idx + 1
-            #     push_ins = build_push_spec(str_in_val, inst_idx, [ins.get_out_args()[0]])
-
-            #     map_instructions[("PUSH", tuple([str_in_val]))] = push_ins
-
-            #     uninter_functions.append(push_ins)
-
-            #     map_positions_instructions[i] = push_ins["id"]
-
-            #     continue
+        for i, ins in enumerate(instructions):
 
+            # Check if it has been already created
             if ins.get_op_name().startswith("push"):
                 ins_spec = map_instructions.get((ins.get_op_name().upper(), tuple(ins.get_builtin_args())), None)
             else:
@@ -412,8 +402,10 @@ def _build_spec_for_sequence(self, instructions, map_instructions: Dict, out_idx
                 uninter_functions += result
 
                 map_positions_instructions[i] = result[-1]["id"]
-
-            elif ins.get_op_name() == "push": #it is a push value that has been already created. If it comes from a memoryguard we have to rename the previous instructions to the output of the memoryguard
+
+            # it is a push value that has been already created. If it comes from a memoryguard,
+            # we have to rename the previous instructions to the output of the memoryguard
+            elif ins.get_op_name() == "push":
                 out_var_list = ins_spec["outpt_sk"]
                 new_out_var_list = ins.get_out_args()
 
@@ -427,9 +419,21 @@ def _build_spec_for_sequence(self, instructions, map_instructions: Dict, out_idx
                     pos = uninter["inpt_sk"].index(out_var)
                     uninter["inpt_sk"][pos] = new_out_var
 
-        # As JUMP instructions are not considered as part of the SFS, we must remove the corresponding values
-        # from the final stack
-        final_stack_bef_jump = (jump_instr.get_in_args() if jump_instr is not None else []) + final_stack
+        # We must remove the final output variable from the unprocessed instruction and
+        # add the inputs from that instruction
+        if self.split_instruction is not None:
+            unprocess_out = self.split_instruction.get_out_args()
+            assert unprocess_out == final_stack[:len(unprocess_out)], \
+                f"Stack elements from the instruction {self.split_instruction.get_op_name()} " \
+                f"do not match the ones from the final stack.\nFinal stack: {final_stack}." \
+                f"\nStack elements produced by the instruction: {unprocess_out}"
+
+            # As the unprocessed instruction is not considered as part of the SFS,
+            # we must remove the corresponding values from the final stack
+            final_stack_bef_jump = self.split_instruction.get_in_args() + final_stack[len(unprocess_out):]
+
+        else:
+            final_stack_bef_jump = final_stack
 
         # If there is a bottom value in the final stack, then we introduce it as part of the assignments and
         # then we pop it. Same for constant values in the final stack
@@ -596,8 +600,8 @@ def build_spec(self, block_tags_dict: Dict, block_tag_idx: int, initial_stack: L
 
         out_idx = 0
 
-        spec, out_idx, map_positions = self._build_spec_for_sequence(self._instructions, map_instructions,
-                                                                     out_idx, initial_stack, final_stack)
+        spec, out_idx, map_positions = self._build_spec_for_sequence(self.instructions_to_synthesize, map_instructions, out_idx,
+                                                                     initial_stack, final_stack)
 
         sto_deps, mem_deps = self._process_dependences(self._instructions, map_positions)
         spec["storage_dependences"] = sto_deps

diff --git a/src/parser/cfg_block_list.py b/src/parser/cfg_block_list.py
@@ -6,7 +6,7 @@
 from typing import List, Dict, Any, Tuple
 import networkx
 from parser.cfg_block import CFGBlock, include_function_call_tags
-import parser.constants
+from parser.constants import split_block
 
 
 class CFGBlockList:
@@ -53,7 +53,7 @@ def build_spec(self, block_tag_idx, return_function_element = 0):
         """
         list_spec = {}
 
-        valid_blocks = filter(lambda x: x.get_jump_type() != "split_instruction_block", self.blocks)
+        valid_blocks =  self.blocks
 
         for b in valid_blocks:
             block = self.blocks[b]
@@ -65,7 +65,7 @@ def build_spec(self, block_tag_idx, return_function_element = 0):
                 split_instr = split_block.get_instructions()[0]
                 #It only has one instruction
 
-                if split_instr.get_op() not in constants.split_block:
+                if split_instr.get_op() not in split_block:
                     #It is a call to a function
                     spec, out_idx = include_function_call_tags(split_instr, out_idx, spec)
 

diff --git a/src/parser/constants.py b/src/parser/constants.py
@@ -1,4 +1,5 @@
-split_block = {"LOG0","LOG1","LOG2","LOG3","LOG4","CALLDATACOPY","CODECOPY","EXTCODECOPY","RETURNDATACOPY",
-               "CALL","STATICCALL","DELEGATECALL","CREATE","CREATE2","ASSIGNIMMUTABLE", "GAS", "DATACOPY"}
+split_block = {'calldatacopy', 'create', 'codecopy', 'call', 'log4', 'gas', 'delegatecall', 'extcodecopy', 'create2',
+               'assignimmutable', 'returndatacopy', 'log2', 'log1', 'log3', 'log0', 'datacopy', 'staticcall'}
+
 
 # split_block = {"ASSIGNIMMUTABLE", "GAS", "MEMORYGUARD", "DATACOPY"}
diff --git a/src/parser/optimizable_block_list.py b/src/parser/optimizable_block_list.py
@@ -1,7 +1,7 @@
 """
 Module that generates a list of sub-blocks to optimize using the greedy algorithm from the given ones
 """
-
+import itertools
 from copy import deepcopy
 from typing import List, Dict, Tuple
 from parser.cfg_instruction import CFGInstruction
@@ -28,7 +28,7 @@ def initialize_sub_blocks(initial_block: CFGBlock, sub_blocks_instrs: List[Tuple
         # Determine the jump type according to whether it is a initial block or a call to a function
         if sub_block_idx != len(sub_blocks_instrs) - 1:
             # Current sub block does not correspond to the last one
-            new_sub_block_type = "split_instruction_block" if split_instruction_sub_block else "sub_block"
+            new_sub_block_type = "sub_block"
         else:
             new_sub_block_type = initial_block.get_jump_type()
 
@@ -48,24 +48,24 @@ def initialize_sub_blocks(initial_block: CFGBlock, sub_blocks_instrs: List[Tuple
     # and the final stack values
     current_falls_to = initial_block.get_falls_to()
     current_jumps_to = initial_block.get_jump_to()
-    current_stack_values = initial_block.final_stack_elements
 
     for cfg_sub_block in reversed(cfg_sub_blocks):
+
+        # All the blocks except the last one fall into this category
+        if cfg_sub_block.get_jump_type() == "sub_block":
+            # The last values of the stack corresponds to the values introduced by the final value
+            current_stack_values = cfg_sub_block.get_instructions_to_compute()[-1].get_out_args()
+            # Initially we keep the stack elements from the original block
+        else:
+            current_stack_values = initial_block.final_stack_elements
+
         cfg_sub_block.set_falls_to(current_falls_to)
         cfg_sub_block.set_jump_to(current_jumps_to)
         cfg_sub_block.final_stack_elements = current_stack_values
 
         current_falls_to = cfg_sub_block.block_id
         current_jumps_to = None
 
-        # The final stack elements correspond to the arguments in the call to functions (if any)
-        if cfg_sub_block.get_jump_type() == "split_instruction_block":
-            # For split instructions, we need to force the previous block to place the arguments in the correct order
-            # TODO: check whether arguments must be reversed or not
-            current_stack_values = cfg_sub_block.get_instructions_to_compute()[0].get_in_args()
-        else:
-            current_stack_values = []
-
     return cfg_sub_blocks
 
 
@@ -118,19 +118,13 @@ def compute_sub_block_list(block_list: CFGBlockList) -> CFGBlockList:
         # a call to a function or an instruction that cannot be processed
         sub_block_instructions: List[Tuple[List[CFGInstruction], bool]] = []
         current_sub_block = []
-
-        for instr in instructions:
-            if instr.get_op_name().upper() in constants.split_block or instr.get_op_name() in cfg_block.function_calls:
-
-
-                # If there is at least a instruction, consider the corresponding sub-block
-                if current_sub_block:
-                    sub_block_instructions.append((current_sub_block, False))
-                    current_sub_block = []
-
-                # Include in an isolated sub block the split instruction
-                sub_block_instructions.append(([instr], True))
 
+        for instr in instructions:
+            if instr.get_op_name() in itertools.chain(constants.split_block, cfg_block.function_calls, "JUMP", "JUMPI"):
+                # Sub blocks contain a split instruction or a function call as the last instruction
+                current_sub_block.append(instr)
+                sub_block_instructions.append((current_sub_block, True))
+                current_sub_block = []
             else:
                 current_sub_block.append(instr)