From 9a8386038248b26a01c9c79f8c6513c7f34dee02 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Mon, 14 Oct 2024 08:22:46 -0700 Subject: [PATCH] feat: handle concrete keccaks concretely (WIP) (#391) Co-authored-by: Daejun Park --- packages/halmos-builder/Dockerfile | 2 + pyproject.toml | 1 + src/halmos/sevm.py | 85 +++++++++++++++++++++++------- src/halmos/utils.py | 8 +++ tests/expected/all.json | 45 ++++++++++++++++ tests/regression/test/Sha3.t.sol | 50 ++++++++++++++++++ 6 files changed, 172 insertions(+), 19 deletions(-) diff --git a/packages/halmos-builder/Dockerfile b/packages/halmos-builder/Dockerfile index 59b88c7b..b8a172a4 100644 --- a/packages/halmos-builder/Dockerfile +++ b/packages/halmos-builder/Dockerfile @@ -6,6 +6,8 @@ RUN apt-get update && apt-get install --no-install-recommends -y \ python3-pip \ wget \ python3-venv \ + build-essential \ + python3.12-dev \ && rm -rf /var/lib/apt/lists/* # Install Foundry diff --git a/pyproject.toml b/pyproject.toml index c44cc1e6..f03198cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "sortedcontainers>=2.4.0", "toml>=0.10.2", "z3-solver==4.12.6.0", + "eth_hash[pysha3]>=0.7.0" ] dynamic = ["version"] diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py index dfea8489..80201701 100644 --- a/src/halmos/sevm.py +++ b/src/halmos/sevm.py @@ -13,6 +13,7 @@ TypeVar, ) +from eth_hash.auto import keccak from z3 import ( UGE, UGT, @@ -87,6 +88,7 @@ assert_address, assert_bv, assert_uint256, + bv_value_to_bytes, byte_length, bytes_to_bv_value, con, @@ -98,6 +100,7 @@ f_ecrecover, f_sha3_256_name, f_sha3_512_name, + f_sha3_empty, f_sha3_name, hexify, int_of, @@ -129,7 +132,7 @@ Steps = dict[int, dict[str, Any]] # execution tree EMPTY_BYTES = ByteVec() -EMPTY_KECCAK = con(0xC5D2460186F7233C927E7DB2DCC703C0E500B653CA82273B7BFAD8045D85A470) +EMPTY_KECCAK = 0xC5D2460186F7233C927E7DB2DCC703C0E500B653CA82273B7BFAD8045D85A470 ZERO, ONE = con(0), con(1) MAX_CALL_DEPTH = 1024 @@ -1111,29 +1114,68 @@ def sha3(self) -> None: sha3_image = self.sha3_data(data) self.st.push(sha3_image) - def sha3_data(self, data: Bytes) -> Word: + def sha3_hash(self, data: Bytes) -> bytes | None: + """return concrete bytes if the hash can be evaluated, otherwise None""" + size = byte_length(data) - if size > 0: - if isinstance(data, bytes): - data = bytes_to_bv_value(data) + if size == 0: + return EMPTY_KECCAK.to_bytes(32, byteorder="big") - f_sha3 = Function( - f_sha3_name(size * 8), BitVecSorts[size * 8], BitVecSort256 - ) - sha3_expr = f_sha3(data) - else: - sha3_expr = EMPTY_KECCAK + if isinstance(data, bytes): + return keccak(data) + + if is_bv_value(data): + return keccak(bv_value_to_bytes(data)) + + if isinstance(data, int): + # the problem here is that we're not sure about the bit-width of the int + # this is not supposed to happen, so just log and move on + debug(f"eval_sha3 received unexpected int value ({data})") + + return None + + def sha3_expr(self, data: Bytes) -> Word: + """return a symbolic sha3 expression, e.g. f_sha3_256(data)""" - # assume hash values are sufficiently smaller than the uint max - self.path.append(ULE(sha3_expr, 2**256 - 2**64)) + bitsize = byte_length(data) * 8 + if bitsize == 0: + return f_sha3_empty + + if isinstance(data, bytes): + data = bytes_to_bv_value(data) + + fname = f_sha3_name(bitsize) + f_sha3 = Function(fname, BitVecSorts[bitsize], BitVecSort256) + return f_sha3(data) + + def sha3_data(self, data: Bytes) -> Word: + sha3_expr = self.sha3_expr(data) + sha3_hash = self.sha3_hash(data) + + if sha3_hash is not None: + self.path.append(sha3_expr == bytes_to_bv_value(sha3_hash)) + + # ensure the hash value is within the safe range assumed below + sha3_hash_int = int.from_bytes(sha3_hash, "big") + if sha3_hash_int == 0 or sha3_hash_int > 2**256 - 2**64: + error_msg = f"hash value outside expected range: {sha3_hash.hex()}" + raise HalmosException(error_msg) + + else: + # assume hash values are non-zero and sufficiently small to prevent overflow when adding reasonable offsets + self.path.append(sha3_expr != ZERO) + self.path.append(ULE(sha3_expr, 2**256 - 2**64)) # assume no hash collision self.assume_sha3_distinct(sha3_expr) # handle create2 hash - if size == 85 and eq(extract_bytes(data, 0, 1), con(0xFF, size_bits=8)): - return con(create2_magic_address + self.sha3s[sha3_expr]) + size = byte_length(data) + if size == 85: + first_byte = unbox_int(ByteVec(data).get_byte(0)) + if isinstance(first_byte, int) and first_byte == 0xFF: + return con(create2_magic_address + self.sha3s[sha3_expr]) else: return sha3_expr @@ -1159,7 +1201,6 @@ def assume_sha3_distinct(self, sha3_expr) -> None: # inputs have different sizes: assume the outputs are different self.path.append(sha3_expr != prev_sha3_expr) - self.path.append(sha3_expr != ZERO) self.sha3s[sha3_expr] = len(self.sha3s) def new_gas_id(self) -> int: @@ -3019,9 +3060,15 @@ def finalize(ex: Exec): elif EVM.PUSH1 <= opcode <= EVM.PUSH32: val = unbox_int(insn.operand) if isinstance(val, int): - if opcode == EVM.PUSH32 and val in sha3_inv: - # restore precomputed hashes - ex.st.push(ex.sha3_data(con(sha3_inv[val]))) + if opcode == EVM.PUSH32: + if val in sha3_inv: + # restore precomputed hashes + ex.st.push(ex.sha3_data(con(sha3_inv[val]))) + # TODO: support more commonly used concrete keccak values + elif val == EMPTY_KECCAK: + ex.st.push(ex.sha3_data(b"")) + else: + ex.st.push(val) else: ex.st.push(val) else: diff --git a/src/halmos/utils.py b/src/halmos/utils.py index df85c551..4564ed07 100644 --- a/src/halmos/utils.py +++ b/src/halmos/utils.py @@ -11,6 +11,7 @@ Z3_OP_BADD, Z3_OP_CONCAT, Z3_OP_ULEQ, + BitVec, BitVecNumRef, BitVecRef, BitVecSort, @@ -108,9 +109,16 @@ def f_sha3_name(bitsize: int) -> str: return f"f_sha3_{bitsize}" +f_sha3_0_name = f_sha3_name(0) f_sha3_256_name = f_sha3_name(256) f_sha3_512_name = f_sha3_name(512) +# NOTE: another way to encode the empty keccak is to use 0-ary function like: +# f_sha3_empty = Function(f_sha3_0_name, BitVecSort256) +# then `f_sha3_empty()` is equivalent to `BitVec(f_sha3_0_name, BitVecSort256)`. +# in both cases, decl() == f_sha3_0_name, and num_args() == 0. +f_sha3_empty = BitVec(f_sha3_0_name, BitVecSort256) + def uid() -> str: return uuid.uuid4().hex[:7] diff --git a/tests/expected/all.json b/tests/expected/all.json index c0e7c76c..cb73d3e7 100644 --- a/tests/expected/all.json +++ b/tests/expected/all.json @@ -2279,6 +2279,33 @@ } ], "test/Sha3.t.sol:Sha3Test": [ + { + "name": "check_concrete_keccak_does_not_split_paths()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_concrete_keccak_memory_lookup()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_empty_hash_value()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, { "name": "check_hash()", "exitcode": 0, @@ -2288,6 +2315,15 @@ "time": null, "num_bounded_loops": null }, + { + "name": "check_hash_collision_with_empty()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, { "name": "check_no_hash_collision_assumption()", "exitcode": 0, @@ -2296,6 +2332,15 @@ "num_paths": null, "time": null, "num_bounded_loops": null + }, + { + "name": "check_only_empty_bytes_matches_empty_hash(bytes)", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null } ], "test/SignExtend.t.sol:SignExtendTest": [ diff --git a/tests/regression/test/Sha3.t.sol b/tests/regression/test/Sha3.t.sol index b4353e52..9f3a9465 100644 --- a/tests/regression/test/Sha3.t.sol +++ b/tests/regression/test/Sha3.t.sol @@ -5,6 +5,9 @@ import "forge-std/Test.sol"; import {SymTest} from "halmos-cheatcodes/SymTest.sol"; contract Sha3Test is Test, SymTest { + // 0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470 + bytes32 constant EMPTY_HASH = keccak256(""); + function check_hash() public { _assert_eq("", ""); _assert_eq("1", "1"); @@ -26,6 +29,53 @@ contract Sha3Test is Test, SymTest { assert(data32_1[0] == data32_2[0]); } + function check_hash_collision_with_empty() public { + bytes memory data = svm.createBytes(1, "data"); + assertNotEq(keccak256(data), keccak256("")); + } + + function check_empty_hash_value() public { + assertEq(keccak256(""), EMPTY_HASH); + + // TODO: uncomment when we support empty bytes + // bytes memory data = svm.createBytes(0, "data"); + // assertEq(keccak256(data), EMPTY_HASH); + } + + function check_only_empty_bytes_matches_empty_hash(bytes memory data) public { + // empty hash value + vm.assume(keccak256(data) == EMPTY_HASH); + assertEq(data.length, 0); + } + + function check_concrete_keccak_does_not_split_paths() external { + bytes32 hash = keccak256("data"); + uint256 bit = uint256(hash) & 1; + + // this tests that the hash value is concrete + // if it was symbolic, we would split paths and fail in the even case + // (keccak("data") is odd) + if (uint256(hash) & 1 == 0) { + console2.log("even"); + assert(false); + } else { + console2.log("odd"); + assert(true); + } + } + + function check_concrete_keccak_memory_lookup() external { + bytes32 hash = keccak256(abi.encodePacked(uint256(3))); + uint256 bit = uint256(hash) & 1; + + string[] memory x = new string[](2); + x[0] = "even"; + x[1] = "odd"; + + // checks that we don't fail with symbolic memory offset error + console2.log(x[bit]); + } + function _assert_eq(bytes memory data1, bytes memory data2) internal { assert(keccak256(data1) == keccak256(data2)); }