From eed8f938ab786d05720dd8a810302396090c3c3b Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Fri, 11 Oct 2024 16:32:07 -0700 Subject: [PATCH] feat: lazy concretization (#392) --- src/halmos/cheatcodes.py | 2 +- src/halmos/sevm.py | 112 +++++++++++---------- src/halmos/utils.py | 9 +- tests/expected/all.json | 36 +++++++ tests/regression/test/Concretization.t.sol | 43 ++++++++ 5 files changed, 149 insertions(+), 53 deletions(-) diff --git a/src/halmos/cheatcodes.py b/src/halmos/cheatcodes.py index ca12d08a..16e810a2 100644 --- a/src/halmos/cheatcodes.py +++ b/src/halmos/cheatcodes.py @@ -568,7 +568,7 @@ def handle(sevm, ex, arg: ByteVec, stack, step_id) -> ByteVec | None: assume_cond = simplify(is_non_zero(arg.get_word(4))) if is_false(assume_cond): raise InfeasiblePath("vm.assume(false)") - ex.path.append(assume_cond) + ex.path.append(assume_cond, branching=True) return ret # vm.getCode(string) diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py index 569e032e..dfea8489 100644 --- a/src/halmos/sevm.py +++ b/src/halmos/sevm.py @@ -444,15 +444,15 @@ def dup(self, n: int) -> None: def swap(self, n: int) -> None: self.stack[-(n + 1)], self.stack[-1] = self.stack[-1], self.stack[-(n + 1)] - def mloc(self) -> int: - loc: int = int_of(self.pop(), "symbolic memory offset") + def mloc(self, subst: dict = None) -> int: + loc: int = int_of(self.pop(), subst, "symbolic memory offset") if loc > MAX_MEMORY_SIZE: raise OutOfGasError(f"MLOAD {loc} > MAX_MEMORY_SIZE") return loc - def ret(self) -> ByteVec: - loc: int = self.mloc() - size: int = int_of(self.pop(), "symbolic return data size") # size in bytes + def ret(self, subst: dict = None) -> ByteVec: + loc: int = self.mloc(subst) + size: int = int_of(self.pop(), subst, "symbolic return data size") returndata_slice = self.memory.slice(loc, loc + size) return returndata_slice @@ -643,19 +643,20 @@ class SMTQuery: @dataclass(frozen=True) class Concretization: """ - Mapping of symbols to concrete values or potential candidates. + Mapping of terms to concrete values, and of symbols to potential candidates. - A symbol is mapped to a concrete value when an equality between them is introduced in the path condition. - These symbols can be replaced by their concrete values during execution as needed. + A term is mapped to a concrete value when an equality between them is introduced in the path condition. + These terms can be replaced by their concrete values during execution as needed. + Note that cyclic substitutions do not occur, as terms are reduced to a ground value rather than another term. A symbol may also be associated with multiple concrete value candidates. If necessary, the path can later branch for each candidate. - TODO: Currently, this mechanism is applied only to calldata with dynamic parameters. - In the future, it may be used for other purposes, such as concrete hash reasoning. + TODO: Currently, the branching mechanism based on candidates is only applied to calldata with dynamic parameters. + In the future, it may be used for other purposes. """ - # symbol -> constant + # term -> constant substitution: dict[BitVecRef, BitVecRef] = field(default_factory=dict) # symbol -> set of constants candidates: dict[BitVecRef, list[int]] = field(default_factory=dict) @@ -664,9 +665,9 @@ def process_cond(self, cond): if not is_eq(cond): return left, right = cond.arg(0), cond.arg(1) - if is_expr_var(left) and is_bv_value(right): + if is_bv_value(right): # not is_bv_value(left) self.substitution[left] = right - elif is_expr_var(right) and is_bv_value(left): + elif is_bv_value(left): # not is_bv_value(right) self.substitution[right] = left def process_dyn_params(self, dyn_params): @@ -1104,8 +1105,8 @@ def balance_update(self, addr: Word, value: Word) -> None: self.balances[new_balance_var] = new_balance def sha3(self) -> None: - loc: int = self.st.mloc() - size: int = int_of(self.st.pop(), "symbolic SHA3 data size") + loc: int = self.mloc() + size: int = self.int_of(self.st.pop(), "symbolic SHA3 data size") data = self.st.memory.slice(loc, loc + size).unwrap() if size else b"" sha3_image = self.sha3_data(data) self.st.push(sha3_image) @@ -1227,6 +1228,15 @@ def resolve_libs(self, creation_hexcode, deployed_hexcode, lib_references) -> st return (creation_hexcode, deployed_hexcode) + def mloc(self) -> int: + return self.st.mloc(self.path.concretization.substitution) + + def ret(self) -> ByteVec: + return self.st.ret(self.path.concretization.substitution) + + def int_of(self, x: Any, err: str = None) -> int: + return int_of(x, self.path.concretization.substitution, err) + class Storage: pass @@ -1284,7 +1294,7 @@ def init( @classmethod def load(cls, ex: Exec, addr: Any, loc: Word) -> Word: - (slot, keys, num_keys, size_keys) = cls.get_key_structure(loc) + (slot, keys, num_keys, size_keys) = cls.get_key_structure(ex, loc) cls.init(ex, addr, slot, keys, num_keys, size_keys) @@ -1306,7 +1316,7 @@ def load(cls, ex: Exec, addr: Any, loc: Word) -> Word: @classmethod def store(cls, ex: Exec, addr: Any, loc: Any, val: Any) -> None: - (slot, keys, num_keys, size_keys) = cls.get_key_structure(loc) + (slot, keys, num_keys, size_keys) = cls.get_key_structure(ex, loc) cls.init(ex, addr, slot, keys, num_keys, size_keys) @@ -1329,12 +1339,12 @@ def store(cls, ex: Exec, addr: Any, loc: Any, val: Any) -> None: ex.storages[new_storage_var] = new_storage @classmethod - def get_key_structure(cls, loc) -> tuple: + def get_key_structure(cls, ex, loc) -> tuple: offsets = cls.decode(loc) if not len(offsets) > 0: raise ValueError(offsets) - slot, keys = int_of(offsets[0], "symbolic storage base slot"), offsets[1:] + slot, keys = ex.int_of(offsets[0], "symbolic storage base slot"), offsets[1:] num_keys = len(keys) size_keys = cls.bitsize(keys) @@ -1944,11 +1954,11 @@ def call( to = uint160(ex.st.pop()) fund = ZERO if op in [EVM.STATICCALL, EVM.DELEGATECALL] else ex.st.pop() - arg_loc: int = ex.st.mloc() - arg_size: int = int_of(ex.st.pop(), "symbolic CALL input data size") + arg_loc: int = ex.mloc() + arg_size: int = ex.int_of(ex.st.pop(), "symbolic CALL input data size") - ret_loc: int = ex.st.mloc() - ret_size: int = int_of(ex.st.pop(), "symbolic CALL return data size") + ret_loc: int = ex.mloc() + ret_size: int = ex.int_of(ex.st.pop(), "symbolic CALL return data size") if not arg_size >= 0: raise ValueError(arg_size) @@ -2102,7 +2112,7 @@ def call_unknown() -> None: # modexp elif eq(to, con_addr(5)): exit_code = con(1) - modulus_size = int_of(extract_bytes(arg, 64, 32)) + modulus_size = ex.int_of(extract_bytes(arg, 64, 32)) f_modexp = Function( f"f_modexp_{arg_size}_{modulus_size}", BitVecSorts[arg_size], @@ -2248,8 +2258,8 @@ def create( raise WriteInStaticContext(ex.context_str()) value: Word = ex.st.pop() - loc: int = int_of(ex.st.pop(), "symbolic CREATE offset") - size: int = int_of(ex.st.pop(), "symbolic CREATE size") + loc: int = ex.int_of(ex.st.pop(), "symbolic CREATE offset") + size: int = ex.int_of(ex.st.pop(), "symbolic CREATE size") if op == EVM.CREATE2: salt = ex.st.pop() @@ -2398,7 +2408,7 @@ def jumpi( ) -> None: jid = ex.jumpi_id() - target: int = int_of(ex.st.pop(), "symbolic JUMPI target") + target: int = ex.int_of(ex.st.pop(), "symbolic JUMPI target") cond: Word = ex.st.pop() visited = ex.jumpis.get(jid, {True: 0, False: 0}) @@ -2524,7 +2534,7 @@ def calldataload( - If the symbol is associated with candidate values, the current path is branched over these candidates. """ - offset: int = int_of(ex.st.pop(), "symbolic CALLDATALOAD offset") + offset: int = ex.int_of(ex.st.pop(), "symbolic CALLDATALOAD offset") loaded = ex.calldata().get_word(offset) if is_expr_var(loaded): @@ -2627,9 +2637,9 @@ def finalize(ex: Exec): error=InvalidOpcode(opcode), ) elif opcode == EVM.REVERT: - ex.halt(data=ex.st.ret(), error=Revert()) + ex.halt(data=ex.ret(), error=Revert()) elif opcode == EVM.RETURN: - ex.halt(data=ex.st.ret()) + ex.halt(data=ex.ret()) else: raise ValueError(opcode) @@ -2716,7 +2726,7 @@ def finalize(ex: Exec): ex.st.push(LShR(ex.st.pop(), w)) # bvlshr elif opcode == EVM.SIGNEXTEND: - w = int_of(ex.st.pop(), "symbolic SIGNEXTEND size") + w = ex.int_of(ex.st.pop(), "symbolic SIGNEXTEND size") if w <= 30: # if w == 31, result is SignExt(0, value) == value bl = (w + 1) * 8 ex.st.push(SignExt(256 - bl, Extract(bl - 1, 0, ex.st.pop()))) @@ -2769,9 +2779,9 @@ def finalize(ex: Exec): ) ex.st.pop() - loc: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY offset") - offset: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY offset") - size: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY size") + loc: int = ex.int_of(ex.st.pop(), "symbolic EXTCODECOPY offset") + offset: int = ex.int_of(ex.st.pop(), "symbolic EXTCODECOPY offset") + size: int = ex.int_of(ex.st.pop(), "symbolic EXTCODECOPY size") if size > 0: end_loc = loc + size @@ -2880,16 +2890,16 @@ def finalize(ex: Exec): ex.st.pop() elif opcode == EVM.MLOAD: - loc: int = ex.st.mloc() + loc: int = ex.mloc() ex.st.push(ex.st.memory.get_word(loc)) elif opcode == EVM.MSTORE: - loc: int = ex.st.mloc() + loc: int = ex.mloc() val: Word = ex.st.pop() ex.st.memory.set_word(loc, uint256(val)) elif opcode == EVM.MSTORE8: - loc: int = ex.st.mloc() + loc: int = ex.mloc() val: Word = ex.st.pop() ex.st.memory.set_byte(loc, uint8(val)) @@ -2912,9 +2922,9 @@ def finalize(ex: Exec): ex.st.push(ex.returndatasize()) elif opcode == EVM.RETURNDATACOPY: - loc: int = ex.st.mloc() - offset: int = int_of(ex.st.pop(), "symbolic RETURNDATACOPY offset") - size: int = int_of(ex.st.pop(), "symbolic RETURNDATACOPY size") + loc: int = ex.mloc() + offset = ex.int_of(ex.st.pop(), "symbolic RETURNDATACOPY offset") + size: int = ex.int_of(ex.st.pop(), "symbolic RETURNDATACOPY size") end_loc = loc + size if end_loc > MAX_MEMORY_SIZE: @@ -2928,9 +2938,9 @@ def finalize(ex: Exec): ex.st.memory.set_slice(loc, end_loc, data) elif opcode == EVM.CALLDATACOPY: - loc: int = ex.st.mloc() - offset: int = int_of(ex.st.pop(), "symbolic CALLDATACOPY offset") - size: int = int_of(ex.st.pop(), "symbolic CALLDATACOPY size") + loc: int = ex.mloc() + offset: int = ex.int_of(ex.st.pop(), "symbolic CALLDATACOPY offset") + size: int = ex.int_of(ex.st.pop(), "symbolic CALLDATACOPY size") end_loc = loc + size if end_loc > MAX_MEMORY_SIZE: @@ -2942,9 +2952,9 @@ def finalize(ex: Exec): ex.st.memory.set_slice(loc, end_loc, data) elif opcode == EVM.CODECOPY: - loc: int = ex.st.mloc() - offset: int = int_of(ex.st.pop(), "symbolic CODECOPY offset") - size: int = int_of(ex.st.pop(), "symbolic CODECOPY size") + loc: int = ex.mloc() + offset: int = ex.int_of(ex.st.pop(), "symbolic CODECOPY offset") + size: int = ex.int_of(ex.st.pop(), "symbolic CODECOPY size") end_loc = loc + size if end_loc > MAX_MEMORY_SIZE: @@ -2955,9 +2965,9 @@ def finalize(ex: Exec): ex.st.memory.set_slice(loc, loc + size, codeslice) elif opcode == EVM.MCOPY: - dest_offset = int_of(ex.st.pop(), "symbolic MCOPY destOffset") - src_offset = int_of(ex.st.pop(), "symbolic MCOPY srcOffset") - size = int_of(ex.st.pop(), "symbolic MCOPY size") + dest_offset = ex.int_of(ex.st.pop(), "symbolic MCOPY destOffset") + src_offset = ex.int_of(ex.st.pop(), "symbolic MCOPY srcOffset") + size = ex.int_of(ex.st.pop(), "symbolic MCOPY size") if size > 0: src_end_loc = src_offset + size @@ -2997,8 +3007,8 @@ def finalize(ex: Exec): raise WriteInStaticContext(ex.context_str()) num_topics: int = opcode - EVM.LOG0 - loc: int = ex.st.mloc() - size: int = int_of(ex.st.pop(), "symbolic LOG data size") + loc: int = ex.mloc() + size: int = ex.int_of(ex.st.pop(), "symbolic LOG data size") topics = list(ex.st.pop() for _ in range(num_topics)) data = ex.st.memory.slice(loc, loc + size) ex.emit_log(EventLog(ex.this(), topics, data)) diff --git a/src/halmos/utils.py b/src/halmos/utils.py index 20f2a1ef..df85c551 100644 --- a/src/halmos/utils.py +++ b/src/halmos/utils.py @@ -32,6 +32,7 @@ is_bv_value, is_not, simplify, + substitute, ) from halmos.exceptions import HalmosException, NotConcreteError @@ -342,15 +343,21 @@ def unbox_int(x: Any) -> Any: return x -def int_of(x: Any, err: str = "expected concrete value but got") -> int: +def int_of(x: Any, subst: dict = None, err: str = None) -> int: """ Converts int-like objects to int or raises NotConcreteError """ + + # attempt to replace symbolic (sub-)terms with their concrete values + if subst and is_bv(x) and not is_bv_value(x): + x = simplify(substitute(x, *subst.items())) + res = unbox_int(x) if isinstance(res, int): return res + err = err or "expected concrete value but got" raise NotConcreteError(f"{err}: {x}") diff --git a/tests/expected/all.json b/tests/expected/all.json index 71b18cc7..c0e7c76c 100644 --- a/tests/expected/all.json +++ b/tests/expected/all.json @@ -337,6 +337,15 @@ } ], "test/Concretization.t.sol:ConcretizationTest": [ + { + "name": "check_calldata_index(bytes,uint256)", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, { "name": "check_custom_calldata_decoding()", "exitcode": 0, @@ -345,6 +354,33 @@ "num_paths": null, "time": null, "num_bounded_loops": null + }, + { + "name": "check_memory_index(uint256)", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_memory_size(uint256)", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_storage_slot(uint256)", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null } ], "test/Console.t.sol:ConsoleTest": [ diff --git a/tests/regression/test/Concretization.t.sol b/tests/regression/test/Concretization.t.sol index 751ae632..2cb30c27 100644 --- a/tests/regression/test/Concretization.t.sol +++ b/tests/regression/test/Concretization.t.sol @@ -76,4 +76,47 @@ contract ConcretizationTest is SymTest, Test { assertEq(success1, success2); assertEq(retdata1, retdata2); } + + function check_memory_index(uint idx) public { + uint[3] memory arr = [uint(0), 1, 2]; + + vm.assume(idx == 0 || idx == 1 || idx == 2); + + // there are three paths at this point + assertEq(arr[idx], idx); + } + + uint[3] arr = [0, 1, 2]; + function check_storage_slot(uint idx) public { + vm.assume(idx == 0 || idx == 1 || idx == 2); + assertEq(arr[idx], idx); + } + + function check_calldata_index(bytes calldata data, uint idx) external { + vm.assume(idx == 0 || idx == 1 || idx == 2); + + vm.assume(data.length > 2); + + vm.assume(data[0] == bytes1(uint8(0))); + vm.assume(data[1] == bytes1(uint8(1))); + vm.assume(data[2] == bytes1(uint8(2))); + + assertEq(data[idx], bytes1(uint8(idx))); + } + + function check_memory_size(uint idx) public { + uint[] memory arr; + + vm.assume(idx == 0 || idx == 1 || idx == 2 || idx == 3); + + arr = new uint[](idx); + for (uint i = 0; i < idx; i++) { + arr[i] = i; + } + + assertEq(arr.length, idx); + if (idx > 0) assertEq(arr[0], 0); + if (idx > 1) assertEq(arr[1], 1); + if (idx > 2) assertEq(arr[2], 2); + } }