diff --git a/tests/unit/compiler/venom/test_duplicate_operands.py b/tests/unit/compiler/venom/test_duplicate_operands.py index fbff0835d2..ab55649dae 100644 --- a/tests/unit/compiler/venom/test_duplicate_operands.py +++ b/tests/unit/compiler/venom/test_duplicate_operands.py @@ -1,6 +1,8 @@ from vyper.compiler.settings import OptimizationLevel from vyper.venom import generate_assembly_experimental +from vyper.venom.analysis.analysis import IRAnalysesCache from vyper.venom.context import IRContext +from vyper.venom.passes.store_expansion import StoreExpansionPass def test_duplicate_operands(): @@ -13,7 +15,7 @@ def test_duplicate_operands(): %3 = mul %1, %2 stop - Should compile to: [PUSH1, 10, DUP1, DUP1, DUP1, ADD, MUL, POP, STOP] + Should compile to: [PUSH1, 10, DUP1, DUP2, ADD, MUL, POP, STOP] """ ctx = IRContext() fn = ctx.create_function("test") @@ -23,5 +25,9 @@ def test_duplicate_operands(): bb.append_instruction("mul", sum_, op) bb.append_instruction("stop") - asm = generate_assembly_experimental(ctx, optimize=OptimizationLevel.GAS) - assert asm == ["PUSH1", 10, "DUP1", "DUP1", "ADD", "MUL", "POP", "STOP"] + ac = IRAnalysesCache(fn) + StoreExpansionPass(ac, fn).run_pass() + + optimize = OptimizationLevel.GAS + asm = generate_assembly_experimental(ctx, optimize=optimize) + assert asm == ["PUSH1", 10, "DUP1", "DUP2", "ADD", "MUL", "POP", "STOP"] diff --git a/tests/unit/compiler/venom/test_stack_cleanup.py b/tests/unit/compiler/venom/test_stack_cleanup.py index 6015cf1c41..7198861771 100644 --- a/tests/unit/compiler/venom/test_stack_cleanup.py +++ b/tests/unit/compiler/venom/test_stack_cleanup.py @@ -9,7 +9,8 @@ def test_cleanup_stack(): bb = fn.get_basic_block() ret_val = bb.append_instruction("param") op = bb.append_instruction("store", 10) - bb.append_instruction("add", op, op) + op2 = bb.append_instruction("store", op) + bb.append_instruction("add", op, op2) bb.append_instruction("ret", ret_val) asm = generate_assembly_experimental(ctx, optimize=OptimizationLevel.GAS) diff --git a/tests/unit/compiler/venom/test_stack_reorder.py b/tests/unit/compiler/venom/test_stack_reorder.py index a9f505984e..a15dd4d540 100644 --- a/tests/unit/compiler/venom/test_stack_reorder.py +++ b/tests/unit/compiler/venom/test_stack_reorder.py @@ -1,5 +1,7 @@ from vyper.venom import generate_assembly_experimental +from vyper.venom.analysis.analysis import IRAnalysesCache from vyper.venom.context import IRContext +from vyper.venom.passes.store_expansion import StoreExpansionPass def test_stack_reorder(): @@ -25,4 +27,7 @@ def test_stack_reorder(): bb.append_instruction("ret", ret_val) + ac = IRAnalysesCache(fn) + StoreExpansionPass(ac, fn).run_pass() + generate_assembly_experimental(ctx) diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index afd79fc44f..a5f51b787d 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -12,13 +12,13 @@ from vyper.venom.passes.algebraic_optimization import AlgebraicOptimizationPass from vyper.venom.passes.branch_optimization import BranchOptimizationPass from vyper.venom.passes.dft import DFTPass -from vyper.venom.passes.extract_literals import ExtractLiteralsPass from vyper.venom.passes.make_ssa import MakeSSA from vyper.venom.passes.mem2var import Mem2Var from vyper.venom.passes.remove_unused_variables import RemoveUnusedVariablesPass from vyper.venom.passes.sccp import SCCP from vyper.venom.passes.simplify_cfg import SimplifyCFGPass from vyper.venom.passes.store_elimination import StoreElimination +from vyper.venom.passes.store_expansion import StoreExpansionPass from vyper.venom.venom_to_assembly import VenomCompiler DEFAULT_OPT_LEVEL = OptimizationLevel.default() @@ -54,8 +54,9 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel) -> None: SimplifyCFGPass(ac, fn).run_pass() AlgebraicOptimizationPass(ac, fn).run_pass() BranchOptimizationPass(ac, fn).run_pass() - ExtractLiteralsPass(ac, fn).run_pass() RemoveUnusedVariablesPass(ac, fn).run_pass() + + StoreExpansionPass(ac, fn).run_pass() DFTPass(ac, fn).run_pass() diff --git a/vyper/venom/analysis/equivalent_vars.py b/vyper/venom/analysis/equivalent_vars.py new file mode 100644 index 0000000000..9b0c03e3d1 --- /dev/null +++ b/vyper/venom/analysis/equivalent_vars.py @@ -0,0 +1,41 @@ +from vyper.venom.analysis.analysis import IRAnalysis +from vyper.venom.analysis.dfg import DFGAnalysis +from vyper.venom.basicblock import IRVariable + + +class VarEquivalenceAnalysis(IRAnalysis): + """ + Generate equivalence sets of variables. This is used to avoid swapping + variables which are the same during venom_to_assembly. Theoretically, + the DFTPass should order variable declarations optimally, but, it is + not aware of the "pickaxe" heuristic in venom_to_assembly, so they can + interfere. + """ + + def analyze(self): + dfg = self.analyses_cache.request_analysis(DFGAnalysis) + + equivalence_set: dict[IRVariable, int] = {} + + for bag, (var, inst) in enumerate(dfg._dfg_outputs.items()): + if inst.opcode != "store": + continue + + source = inst.operands[0] + + assert var not in equivalence_set # invariant + if source in equivalence_set: + equivalence_set[var] = equivalence_set[source] + continue + else: + equivalence_set[var] = bag + equivalence_set[source] = bag + + self._equivalence_set = equivalence_set + + def equivalent(self, var1, var2): + if var1 not in self._equivalence_set: + return False + if var2 not in self._equivalence_set: + return False + return self._equivalence_set[var1] == self._equivalence_set[var2] diff --git a/vyper/venom/passes/extract_literals.py b/vyper/venom/passes/store_expansion.py similarity index 73% rename from vyper/venom/passes/extract_literals.py rename to vyper/venom/passes/store_expansion.py index 91c0813e67..7718e67d33 100644 --- a/vyper/venom/passes/extract_literals.py +++ b/vyper/venom/passes/store_expansion.py @@ -1,12 +1,13 @@ from vyper.venom.analysis.dfg import DFGAnalysis from vyper.venom.analysis.liveness import LivenessAnalysis -from vyper.venom.basicblock import IRInstruction, IRLiteral +from vyper.venom.basicblock import IRInstruction, IRLiteral, IRVariable from vyper.venom.passes.base_pass import IRPass -class ExtractLiteralsPass(IRPass): +class StoreExpansionPass(IRPass): """ - This pass extracts literals so that they can be reordered by the DFT pass + This pass extracts literals and variables so that they can be + reordered by the DFT pass """ def run_pass(self): @@ -20,7 +21,7 @@ def _process_bb(self, bb): i = 0 while i < len(bb.instructions): inst = bb.instructions[i] - if inst.opcode in ("store", "offset"): + if inst.opcode in ("store", "offset", "phi", "param"): i += 1 continue @@ -29,9 +30,11 @@ def _process_bb(self, bb): if inst.opcode == "log" and j == 0: continue - if isinstance(op, IRLiteral): + if isinstance(op, (IRVariable, IRLiteral)): var = self.function.get_next_variable() to_insert = IRInstruction("store", [op], var) bb.insert_instruction(to_insert, index=i) inst.operands[j] = var + i += 1 + i += 1 diff --git a/vyper/venom/stack_model.py b/vyper/venom/stack_model.py index a98e5bb25b..e284b41fb2 100644 --- a/vyper/venom/stack_model.py +++ b/vyper/venom/stack_model.py @@ -30,7 +30,7 @@ def push(self, op: IROperand) -> None: def pop(self, num: int = 1) -> None: del self._stack[len(self._stack) - num :] - def get_depth(self, op: IROperand, n: int = 1) -> int: + def get_depth(self, op: IROperand) -> int: """ Returns the depth of the n-th matching operand in the stack map. If the operand is not in the stack map, returns NOT_IN_STACK. @@ -39,10 +39,7 @@ def get_depth(self, op: IROperand, n: int = 1) -> int: for i, stack_op in enumerate(reversed(self._stack)): if stack_op.value == op.value: - if n <= 1: - return -i - else: - n -= 1 + return -i return StackModel.NOT_IN_STACK # type: ignore diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 9de75dab38..56228d53d2 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -1,5 +1,3 @@ -from bisect import insort -from collections import Counter, defaultdict from typing import Any from vyper.exceptions import CompilerPanic, StackTooDeep @@ -13,6 +11,7 @@ ) from vyper.utils import MemoryPositions, OrderedSet from vyper.venom.analysis.analysis import IRAnalysesCache +from vyper.venom.analysis.equivalent_vars import VarEquivalenceAnalysis from vyper.venom.analysis.liveness import LivenessAnalysis from vyper.venom.basicblock import ( IRBasicBlock, @@ -26,6 +25,10 @@ from vyper.venom.passes.normalization import NormalizationPass from vyper.venom.stack_model import StackModel +DEBUG_SHOW_COST = False +if DEBUG_SHOW_COST: + import sys + # instructions which map one-to-one from venom to EVM _ONE_TO_ONE_INSTRUCTIONS = frozenset( [ @@ -153,6 +156,7 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]: NormalizationPass(ac, fn).run_pass() self.liveness_analysis = ac.request_analysis(LivenessAnalysis) + self.equivalence = ac.request_analysis(VarEquivalenceAnalysis) assert fn.normalized, "Non-normalized CFG!" @@ -199,36 +203,19 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]: def _stack_reorder( self, assembly: list, stack: StackModel, stack_ops: list[IROperand], dry_run: bool = False ) -> int: - cost = 0 - if dry_run: assert len(assembly) == 0, "Dry run should not work on assembly" stack = stack.copy() - stack_ops_count = len(stack_ops) - if stack_ops_count == 0: + if len(stack_ops) == 0: return 0 - counts = Counter(stack_ops) - - # positions stores the positions of relevant operands - # on stack for example operand %82 is on positions [0, 3] - # this operand could ocure even more deeper in the stack - # but only those that are needed/relevant in calculation - # are considered - positions: dict[IROperand, list[int]] = defaultdict(list) - for op in stack_ops: - positions[op] = [] - for i in range(counts[op]): - positions[op].append(stack.get_depth(op, i + 1)) - - for i in range(stack_ops_count): - op = stack_ops[i] - final_stack_depth = -(stack_ops_count - i - 1) - depth = positions[op].pop() # type: ignore - assert depth not in range( - -stack_ops_count + 1, final_stack_depth - ), f"{depth} : ({-stack_ops_count - 1}, {final_stack_depth})" + assert len(stack_ops) == len(set(stack_ops)) # precondition + + cost = 0 + for i, op in enumerate(stack_ops): + final_stack_depth = -(len(stack_ops) - i - 1) + depth = stack.get_depth(op) if depth == StackModel.NOT_IN_STACK: raise CompilerPanic(f"Variable {op} not in stack") @@ -236,25 +223,14 @@ def _stack_reorder( if depth == final_stack_depth: continue - if op == stack.peek(final_stack_depth): + to_swap = stack.peek(final_stack_depth) + if self.equivalence.equivalent(op, to_swap): + # perform a "virtual" swap + stack.poke(final_stack_depth, op) + stack.poke(depth, to_swap) continue - # moves the top item to original position - top_item_positions = positions[stack.peek(0)] - if len(top_item_positions) != 0: - top_item_positions.remove(0) - insort(top_item_positions, depth) - cost += self.swap(assembly, stack, depth) - - # moves the item from final position to top - final_item_positions = positions[stack.peek(final_stack_depth)] - if final_stack_depth in final_item_positions: - final_item_positions.remove(final_stack_depth) - final_item_positions.insert(0, 0) - else: - final_item_positions.insert(0, 0) - cost += self.swap(assembly, stack, final_stack_depth) assert stack._stack[-len(stack_ops) :] == stack_ops, (stack, stack_ops) @@ -273,19 +249,14 @@ def _emit_input_operands( # been scheduled to be killed. now it's just a matter of emitting # SWAPs, DUPs and PUSHes until we match the `ops` argument - # dumb heuristic: if the top of stack is not wanted here, swap - # it with something that is wanted - if ops and stack.height > 0 and stack.peek(0) not in ops: - for op in ops: - if isinstance(op, IRVariable) and op not in next_liveness: - self.swap_op(assembly, stack, op) - break + # to validate store expansion invariant - + # each op is emitted at most once. + seen: set[IROperand] = set() - emitted_ops = OrderedSet[IROperand]() for op in ops: if isinstance(op, IRLabel): - # invoke emits the actual instruction itself so we don't need to emit it here - # but we need to add it to the stack map + # invoke emits the actual instruction itself so we don't need + # to emit it here but we need to add it to the stack map if inst.opcode != "invoke": assembly.append(f"_sym_{op.value}") stack.push(op) @@ -300,13 +271,12 @@ def _emit_input_operands( stack.push(op) continue - if op in next_liveness and op not in emitted_ops: + if op in next_liveness: self.dup_op(assembly, stack, op) - if op in emitted_ops: - self.dup_op(assembly, stack, op) - - emitted_ops.add(op) + # guaranteed by store expansion + assert op not in seen, (op, seen) + seen.add(op) def _generate_evm_for_basicblock_r( self, asm: list, basicblock: IRBasicBlock, stack: StackModel @@ -315,6 +285,12 @@ def _generate_evm_for_basicblock_r( return self.visited_basicblocks.add(basicblock) + if DEBUG_SHOW_COST: + print(basicblock, file=sys.stderr) + + ref = asm + asm = [] + # assembly entry point into the block asm.append(f"_sym_{basicblock.label}") asm.append("JUMPDEST") @@ -330,8 +306,14 @@ def _generate_evm_for_basicblock_r( asm.extend(self._generate_evm_for_instruction(inst, stack, next_liveness)) + if DEBUG_SHOW_COST: + print(" ".join(map(str, asm)), file=sys.stderr) + print("\n", file=sys.stderr) + + ref.extend(asm) + for bb in basicblock.reachable: - self._generate_evm_for_basicblock_r(asm, bb, stack.copy()) + self._generate_evm_for_basicblock_r(ref, bb, stack.copy()) # pop values from stack at entry to bb # note this produces the same result(!) no matter which basic block @@ -413,6 +395,7 @@ def _generate_evm_for_instruction( # find an instance of %13 *or* %14 in the stack and replace it with %56. to_be_replaced = stack.peek(depth) if to_be_replaced in next_liveness: + # this branch seems unreachable (maybe due to make_ssa) # %13/%14 is still live(!), so we make a copy of it self.dup(assembly, stack, depth) stack.poke(0, ret) @@ -454,6 +437,13 @@ def _generate_evm_for_instruction( if cost_with_swap > cost_no_swap: operands[-1], operands[-2] = operands[-2], operands[-1] + cost = self._stack_reorder([], stack, operands, dry_run=True) + if DEBUG_SHOW_COST and cost: + print("ENTER", inst, file=sys.stderr) + print(" HAVE", stack, file=sys.stderr) + print(" WANT", operands, file=sys.stderr) + print(" COST", cost, file=sys.stderr) + # final step to get the inputs to this instruction ordered # correctly on the stack self._stack_reorder(assembly, stack, operands) @@ -570,10 +560,21 @@ def _generate_evm_for_instruction( if inst.output not in next_liveness: self.pop(assembly, stack) else: - # peek at next_liveness to find the next scheduled item, - # and optimistically swap with it + # heuristic: peek at next_liveness to find the next scheduled + # item, and optimistically swap with it + if DEBUG_SHOW_COST: + stack0 = stack.copy() + next_scheduled = next_liveness.last() - self.swap_op(assembly, stack, next_scheduled) + cost = 0 + if not self.equivalence.equivalent(inst.output, next_scheduled): + cost = self.swap_op(assembly, stack, next_scheduled) + + if DEBUG_SHOW_COST and cost != 0: + print("ENTER", inst, file=sys.stderr) + print(" HAVE", stack0, file=sys.stderr) + print(" NEXT LIVENESS", next_liveness, file=sys.stderr) + print(" NEW_STACK", stack, file=sys.stderr) return apply_line_numbers(inst, assembly) @@ -595,7 +596,7 @@ def dup(self, assembly, stack, depth): assembly.append(_evm_dup_for(depth)) def swap_op(self, assembly, stack, op): - self.swap(assembly, stack, stack.get_depth(op)) + return self.swap(assembly, stack, stack.get_depth(op)) def dup_op(self, assembly, stack, op): self.dup(assembly, stack, stack.get_depth(op))