diff --git a/src/parser/cfg_block_actions/merge_blocks.py b/src/parser/cfg_block_actions/merge_blocks.py new file mode 100644 index 00000000..fb7e46dd --- /dev/null +++ b/src/parser/cfg_block_actions/merge_blocks.py @@ -0,0 +1,70 @@ +from typing import List, Optional +from parser.cfg_block_actions.actions_interface import BlockAction +from parser.cfg_block_actions.utils import modify_comes_from, modify_successors +from parser.cfg_block_list import CFGBlockList +from parser.cfg_block import CFGBlock +from global_params.types import block_id_T + + +def merged_block_id(block_id_1: block_id_T, block_id_2: block_id_T) -> block_id_T: + """ + Given the two merged nodes, generates a new name for the resulting split + """ + return '_merged_'.join([block_id_1, block_id_2]) + + +class MergeBlocks(BlockAction): + """ + Merges two blocks that belong to the same block list. Keeps the jump type of the second block + """ + + def __init__(self, first_block: CFGBlock, second_block: CFGBlock, cfg_blocklist: CFGBlockList): + self._first_block: Optional[CFGBlock] = first_block + self._second_block: Optional[CFGBlock] = second_block + self._cfg_blocklist: CFGBlockList = cfg_blocklist + self._combined_block: Optional[CFGBlock] = None + + self._first_block_id: block_id_T = first_block.block_id + self._second_block_id: block_id_T = second_block.block_id + + def perform_action(self): + combined_instrs = self._first_block.get_instructions() + self._second_block.get_instructions() + combined_block_id = merged_block_id(self._first_block_id, self._second_block_id) + # We assume the jump type from the second block + combined_jump_type = self._second_block.get_jump_type() + combined_assignment_dict = self._first_block.assignment_dict | self._second_block.assignment_dict + + combined_block = CFGBlock(combined_block_id, combined_instrs, combined_jump_type, combined_assignment_dict) + self._update_cfg_edges(combined_block) + + # Add the new block to the list of combined blocks + self._cfg_blocklist.add_block(combined_block) + + # Remove the elements from the block lists and the references + self._cfg_blocklist.blocks.pop(self._first_block_id) + self._cfg_blocklist.blocks.pop(self._second_block_id) + + del self._first_block + del self._second_block + + def _update_cfg_edges(self, combined_block: CFGBlock): + """ + Updates the CFG in the block list with the information of the combined block + """ + # Retrieve the information from the first and second blocks + predecessor_ids = self._first_block.get_comes_from() + jumps_to_id = self._second_block.get_jump_to() + falls_to_id = self._second_block.get_falls_to() + + # Update the information from the predecessors of the first block + for pred_block_id in predecessor_ids: + modify_successors(pred_block_id, self._first_block_id, combined_block.block_id, self._cfg_blocklist) + + # Update the "comes from" information from the successors of the first block + if jumps_to_id is not None: + modify_comes_from(jumps_to_id, self._second_block_id, combined_block.block_id, self._cfg_blocklist) + if falls_to_id is not None: + modify_comes_from(falls_to_id, self._second_block_id, combined_block.block_id, self._cfg_blocklist) + + def __str__(self): + return f"MergeBlocks {self._first_block_id} and {self._second_block_id}" diff --git a/src/parser/cfg_block_actions/split_block.py b/src/parser/cfg_block_actions/split_block.py index 56b59e99..a87ec028 100644 --- a/src/parser/cfg_block_actions/split_block.py +++ b/src/parser/cfg_block_actions/split_block.py @@ -1,21 +1,23 @@ -from typing import Optional +from typing import Optional, Tuple from parser.cfg_block_actions.actions_interface import BlockAction +from parser.cfg_block_actions.utils import modify_comes_from, modify_successors from parser.cfg_block_list import CFGBlockList from parser.cfg_block import CFGBlock -from parser.cfg_instruction import CFGInstruction from global_params.types import block_id_T -def new_node_name(current_node: str) -> str: +def split_blocks_ids(current_node: str) -> Tuple[str, str]: """ Given a node, generates a new name for the resulting split """ split_name = current_node.split("_") - if len(split_name) > 1: - split_name[1] = str(int(split_name[1]) + 1) - return '_'.join(split_name) + + # If the last keyword corresponds to split + number, then we just add one to that number + if len(split_name) > 1 and split_name[-2] == "split": + split_name[-1] = str(int(split_name[-1]) + 1) + return current_node, '_'.join(split_name) else: - return current_node + "_1" + return current_node + "_split_0", current_node + "_split_1" class SplitBlock(BlockAction): @@ -34,9 +36,7 @@ def __init__(self, instr_idx: int, cfg_block: CFGBlock, cfg_blocklist: CFGBlockL self._second_half: Optional[CFGBlock] = None def perform_action(self): - # - first_half_id = new_node_name(self._initial_id) - second_half_id = new_node_name(first_half_id) + first_half_id, second_half_id = split_blocks_ids(self._initial_id) # We reuse the block name, so we don't need to modify the previous blocks first_half = CFGBlock(first_half_id, self._cfg_block.get_instructions()[:self._instr_idx], "sub_block", @@ -53,7 +53,7 @@ def perform_action(self): self._update_second_half() # Remove the initial block from the list of blocks - del self._cfg_block_list.blocks[self._initial_id] + self._cfg_block_list.blocks.pop(self._initial_id) # Include the newly generated blocks in the list self._cfg_block_list.add_block(first_half) @@ -72,15 +72,9 @@ def _update_first_half(self): self._first_half.final_stack_elements = self._cfg_block.get_instructions()[self._instr_idx].get_in_args() # Finally, we update the information from the blocks that jumped (or fell) to the first one + for pred_block_id in self._cfg_block.get_comes_from(): - pred_block = self._cfg_block_list.blocks[pred_block_id] - if pred_block.get_jump_to(): - pred_block.set_jump_to(self._first_half.block_id) - else: - falls_to = pred_block.get_falls_to() - assert falls_to == self._initial_id, \ - f"Incoherent CFG: the predecessor block {pred_block_id} must reach block {self._initial_id}" - pred_block.set_falls_to(self._first_half.block_id) + modify_successors(pred_block_id, self._initial_id, self._first_half.block_id, self._cfg_block_list) def _update_second_half(self): # We need to update the corresponding information @@ -94,26 +88,17 @@ def _update_second_half(self): initial_falls_to = self._cfg_block.get_falls_to() if initial_jumps_to is not None: - self._modify_comes_from(initial_jumps_to, self._second_half.block_id) + modify_comes_from(initial_jumps_to, self._initial_id, self._second_half.block_id, self._cfg_block_list) if initial_falls_to is not None: - self._modify_comes_from(initial_falls_to, self._second_half.block_id) - - def _modify_comes_from(self, block_id: block_id_T, new_pred_block_id: block_id_T): - """ - Modifies the comes from the block id to replace the id of the initial block with the new one - """ - block = self._cfg_block_list.blocks[block_id] - found_previous = False - comes_from = block.get_comes_from() - new_comes_from = [] - for pred_block in comes_from: - if pred_block == self._initial_id: - found_previous = True - new_comes_from.append(new_pred_block_id) - else: - new_comes_from.append(pred_block) - block.set_comes_from(new_comes_from) - assert found_previous, f"Comes from list {comes_from} of {block_id} does not contain {self._initial_id}" + modify_comes_from(initial_falls_to, self._initial_id, self._second_half.block_id, self._cfg_block_list) + + @property + def first_half(self) -> Optional[CFGBlock]: + return self._first_half + + @property + def second_half(self) -> Optional[CFGBlock]: + return self._second_half def __str__(self): return f"SplitBlock {self._initial_id} at instruction with index {self._instr_idx}" diff --git a/src/parser/cfg_block_actions/utils.py b/src/parser/cfg_block_actions/utils.py new file mode 100644 index 00000000..aca6b29e --- /dev/null +++ b/src/parser/cfg_block_actions/utils.py @@ -0,0 +1,37 @@ +from global_params.types import block_id_T +from parser.cfg_block_list import CFGBlockList + + +def modify_comes_from(block_to_modify: block_id_T, previous_pred_id: block_id_T, + new_pred_id: block_id_T, cfg_block_list: CFGBlockList) -> None: + """ + Modifies the comes from the block id to replace the id of the initial block with the new one in the block list + """ + block = cfg_block_list.blocks[block_to_modify] + found_previous = False + comes_from = block.get_comes_from() + new_comes_from = [] + for pred_block in comes_from: + if pred_block == previous_pred_id: + found_previous = True + new_comes_from.append(new_pred_id) + else: + new_comes_from.append(pred_block) + block.set_comes_from(new_comes_from) + assert found_previous, f"Comes from list {comes_from} of {block_to_modify} does not contain {previous_pred_id}" + + +def modify_successors(block_to_modify: block_id_T, previous_successor_id: block_id_T, + new_successor_id: block_id_T, cfg_block_list: CFGBlockList): + """ + Modifies the successor "previous_successor_id" from block "block_to_modify" so that it falls to or jumps to + "new_successor_id" instead + """ + pred_block = cfg_block_list.blocks[block_to_modify] + if pred_block.get_jump_to() == previous_successor_id: + pred_block.set_jump_to(new_successor_id) + else: + falls_to = pred_block.get_falls_to() + assert falls_to == previous_successor_id, \ + f"Incoherent CFG: the predecessor block {block_to_modify} must reach block {previous_successor_id}" + pred_block.set_falls_to(new_successor_id) diff --git a/tests/test_merge_blocks.py b/tests/test_merge_blocks.py new file mode 100644 index 00000000..bddba566 --- /dev/null +++ b/tests/test_merge_blocks.py @@ -0,0 +1,55 @@ +from hypothesis import given, strategies as st +from parser.cfg_block import CFGBlock +from parser.cfg_block_list import CFGBlockList +from parser.cfg_block_actions.merge_blocks import MergeBlocks, merged_block_id +from utils import cfg_instruction_list + + +class TestMergeBlocks: + + @given(cfg_instruction_list(6, 24)) + def test_merge_block_simple(self, instructions): + split_list_index = len(instructions) // 6 + cfg_block_0 = CFGBlock("block_0", instructions[:split_list_index], "unconditional", dict()) + cfg_block_1 = CFGBlock("block_1", instructions[split_list_index:2*split_list_index], "unconditional", dict()) + cfg_block_2 = CFGBlock("block_2", instructions[2*split_list_index:3*split_list_index], "unconditional", dict()) + cfg_block_3 = CFGBlock("block_3", instructions[3*split_list_index:4*split_list_index], "conditional", dict()) + cfg_block_4 = CFGBlock("block_4", instructions[4*split_list_index:5*split_list_index], "terminal", dict()) + cfg_block_5 = CFGBlock("block_5", instructions[5*split_list_index:], "terminal", dict()) + + cfg_block_list = CFGBlockList() + cfg_block_list.add_block(cfg_block_0) + cfg_block_list.add_block(cfg_block_1) + cfg_block_list.add_block(cfg_block_2) + cfg_block_list.add_block(cfg_block_3) + cfg_block_list.add_block(cfg_block_4) + cfg_block_list.add_block(cfg_block_5) + + # CFG structure: + # 0 1 + # 2 + # 3 + # 4 5 + edges = [(cfg_block_0.block_id, cfg_block_2.block_id, "falls_to"), + (cfg_block_1.block_id, cfg_block_2.block_id, "jumps_to"), + (cfg_block_2.block_id, cfg_block_3.block_id, "jumps_to"), + (cfg_block_3.block_id, cfg_block_4.block_id, "falls_to"), + (cfg_block_3.block_id, cfg_block_5.block_id, "jumps_to")] + + for u, v, jump_type in edges: + cfg_block_list.blocks[v].add_comes_from(u) + if jump_type == "jumps_to": + cfg_block_list.blocks[u].set_jump_to(v) + else: + cfg_block_list.blocks[u].set_falls_to(v) + + merge_object = MergeBlocks(cfg_block_2, cfg_block_3, cfg_block_list) + merge_object.perform_action() + assert len(cfg_block_list.blocks) == 5, "There must be five sub blocks" + # At this point, the blocks are erased + joined_block_id = merged_block_id("block_2", "block_3") + assert joined_block_id in cfg_block_list.blocks, f"Block {joined_block_id} must appear in the block list" + assert joined_block_id == cfg_block_0.get_falls_to(), "Joined block must be the falls to from block 0" + assert joined_block_id == cfg_block_1.get_jump_to(), "Joined block must be the jumps to from block 1" + assert joined_block_id in cfg_block_4.get_comes_from(), "Joined block must appear in the comes from block 4" + assert joined_block_id in cfg_block_5.get_comes_from(), "Joined block must appear in the comes from block 5" diff --git a/tests/test_split_block.py b/tests/test_split_block.py index dfe2900f..75a47502 100644 --- a/tests/test_split_block.py +++ b/tests/test_split_block.py @@ -1,11 +1,13 @@ from hypothesis import given, strategies as st -from parser.cfg_block_actions.split_block import CFGBlock, CFGInstruction, CFGBlockList, SplitBlock +from parser.cfg_block import CFGBlock +from parser.cfg_block_list import CFGBlockList +from parser.cfg_block_actions.split_block import SplitBlock from utils import cfg_instruction_list class TestSplitBlock: - @given(st.integers(), cfg_instruction_list()) + @given(st.integers(), cfg_instruction_list(2, 10)) def test_split_block_simple(self, n, instructions): # Simple example: just one block cfg_block = CFGBlock("block_0", instructions, "sub_block", dict()) @@ -17,10 +19,10 @@ def test_split_block_simple(self, n, instructions): split_object = SplitBlock(selected_index, cfg_block, cfg_block_list) split_object.perform_action() assert len(cfg_block_list.blocks) == 2, "There must be two sub blocks" - assert "block_1" in cfg_block_list.blocks, "Block block_1 must appear in the block list" - assert "block_2" in cfg_block_list.blocks, "Block block_2 must appear in the block list" - assert len(cfg_block_list.blocks["block_1"].get_instructions()) == selected_index + 1, \ + assert "block_0_split_0" in cfg_block_list.blocks, "Block block_0_split_0 must appear in the block list" + assert "block_0_split_1" in cfg_block_list.blocks, "Block block_0_split_1 must appear in the block list" + assert len(cfg_block_list.blocks["block_0_split_0"].get_instructions()) == selected_index + 1, \ f"First block must contain {selected_index} instructions" n_remaining_instrs = len(cfg_block.get_instructions()) - selected_index - 1 - assert len(cfg_block_list.blocks["block_2"].get_instructions()) == n_remaining_instrs, \ + assert len(cfg_block_list.blocks["block_0_split_1"].get_instructions()) == n_remaining_instrs, \ f"Second block must contain {n_remaining_instrs} instructions" diff --git a/tests/utils.py b/tests/utils.py index e50dd1e9..8b53357c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,13 +1,14 @@ from hypothesis import strategies as st -from parser.cfg_block_actions.split_block import CFGInstruction +from parser.cfg_instruction import CFGInstruction @st.composite -def cfg_instruction_list(draw): +def cfg_instruction_list(draw, min_value: int, max_value: int): """ - Strategy to generate a list of CFG instructions with distinct output variables for each block + Strategy to generate a list of n in (min_size, max_size) CFG instructions + with distinct output variables for each block """ - n = draw(st.integers(min_value=2, max_value=20)) + n = draw(st.integers(min_value, max_value)) ops = draw(st.lists(st.text(min_size=3, max_size=5), min_size=n, max_size=n)) outs_already = set()