From 888fac5dbb82bca1bfb1bee8c23dfa4276b0d36a Mon Sep 17 00:00:00 2001 From: Mario Vega Date: Fri, 4 Oct 2024 13:05:05 -0600 Subject: [PATCH] feat(cli): `evm_bytes` rename + print asm (#844) * feat(cli): evm_bytes print asm * docs: changelog * fix(cli): evm_bytes * fix(cli): use commands in `evm_bytes` * chore(evm_bytes): add google-style docstrings with examples * docs(evm_bytes): add evm_bytes cli to docs * fix(docs): tox * fix(cli): tox * docs(evm_bytes): improve sub-section titles * chore(evm_bytes): use titles in example admonitions --------- Co-authored-by: danceratopz --- docs/CHANGELOG.md | 1 + docs/library/cli/evm_bytes.md | 20 ++ docs/library/cli/index.md | 3 + docs/navigation.md | 1 + pyproject.toml | 2 +- src/cli/evm_bytes.py | 212 ++++++++++++++++++ src/cli/evm_bytes_to_python.py | 64 ------ ...m_bytes_to_python.py => test_evm_bytes.py} | 20 +- whitelist.txt | 2 + 9 files changed, 250 insertions(+), 75 deletions(-) create mode 100644 docs/library/cli/evm_bytes.md create mode 100644 docs/library/cli/index.md create mode 100644 src/cli/evm_bytes.py delete mode 100644 src/cli/evm_bytes_to_python.py rename src/cli/tests/{test_evm_bytes_to_python.py => test_evm_bytes.py} (82%) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 8d5fbd83bb..f92ab8896d 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -232,6 +232,7 @@ Due to changes in the framework, there is a breaking change in the directory str - 🔀 Filling tool: Updated the default filling tool (`t8n`) to go-ethereum@master ([#368](https://github.com/ethereum/execution-spec-tests/pull/368)). - 🐞 Docs: Fix error banner in online docs due to mermaid syntax error ([#398](https://github.com/ethereum/execution-spec-tests/pull/398)). - 🐞 Docs: Fix incorrectly formatted nested lists in online doc ([#403](https://github.com/ethereum/execution-spec-tests/pull/403)). +- 🔀 CLI: `evm_bytes_to_python` is renamed to `evm_bytes` and now accepts flag `--assembly` to output the code in assembly format ([#844](https://github.com/ethereum/execution-spec-tests/pull/844)) ### 💥 Breaking Changes diff --git a/docs/library/cli/evm_bytes.md b/docs/library/cli/evm_bytes.md new file mode 100644 index 0000000000..8d8fe36fdd --- /dev/null +++ b/docs/library/cli/evm_bytes.md @@ -0,0 +1,20 @@ +# The `evm_bytes` CLI + +::: cli.evm_bytes.cli + options: + show_source: false + show_root_toc_entry: false + +## `evm_bytes hex-string ` + +::: cli.evm_bytes.hex_string + options: + show_source: false + show_root_toc_entry: false + +## `evm_bytes binary-file ` + +::: cli.evm_bytes.binary_file + options: + show_source: false + show_root_toc_entry: false diff --git a/docs/library/cli/index.md b/docs/library/cli/index.md new file mode 100644 index 0000000000..fa090f3026 --- /dev/null +++ b/docs/library/cli/index.md @@ -0,0 +1,3 @@ +# EEST CLI Tools + +* [`evm_bytes`](evm_bytes.md) - Convert the given EVM bytes from a binary file or a hex string to EEST's python opcodes. diff --git a/docs/navigation.md b/docs/navigation.md index 2dabf851b6..ec622fe46e 100644 --- a/docs/navigation.md +++ b/docs/navigation.md @@ -36,6 +36,7 @@ * [Running Github Actions Locally](dev/test_actions_locally.md) * [Changelog](CHANGELOG.md) * [Library Reference](library/index.md) + * [Miscellaneous CLI Tools](library/cli/index.md) * [Ethereum Test Base Types Package](library/ethereum_test_base_types.md) * [Ethereum Test Exceptions Package](library/ethereum_test_exceptions.md) * [Ethereum Test Fixtures Package](library/ethereum_test_fixtures.md) diff --git a/pyproject.toml b/pyproject.toml index 7a9f9b4507..ed6308503b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,7 +89,7 @@ gentest = "cli.gentest:make_test" pyspelling_soft_fail = "cli.tox_helpers:pyspelling" markdownlintcli2_soft_fail = "cli.tox_helpers:markdownlint" order_fixtures = "cli.order_fixtures:order_fixtures" -evm_bytes_to_python = "cli.evm_bytes_to_python:main" +evm_bytes = "cli.evm_bytes:cli" hasher = "cli.hasher:main" [tool.setuptools.packages.find] diff --git a/src/cli/evm_bytes.py b/src/cli/evm_bytes.py new file mode 100644 index 0000000000..5f3a060687 --- /dev/null +++ b/src/cli/evm_bytes.py @@ -0,0 +1,212 @@ +""" +Define an entry point wrapper for pytest. +""" + +from dataclasses import dataclass, field +from typing import List + +import click + +from ethereum_test_base_types import ZeroPaddedHexNumber +from ethereum_test_vm import Macro +from ethereum_test_vm import Opcodes as Op + +OPCODES_WITH_EMPTY_LINES_AFTER = { + Op.STOP, + Op.REVERT, + Op.INVALID, + Op.JUMP, + Op.JUMPI, +} + +OPCODES_WITH_EMPTY_LINES_BEFORE = { + Op.JUMPDEST, +} + + +@dataclass(kw_only=True) +class OpcodeWithOperands: + """Simple opcode with its operands.""" + + opcode: Op | None + operands: List[int] = field(default_factory=list) + + def format(self, assembly: bool) -> str: + """Format the opcode with its operands.""" + if self.opcode is None: + return "" + if assembly: + return self.format_assembly() + if self.operands: + operands = ", ".join(hex(operand) for operand in self.operands) + return f"Op.{self.opcode._name_}[{operands}]" + return f"Op.{self.opcode._name_}" + + def format_assembly(self) -> str: + """Format the opcode with its operands as assembly.""" + if self.opcode is None: + return "" + opcode_name = self.opcode._name_.lower() + if self.opcode.data_portion_length == 0: + return f"{opcode_name}" + elif self.opcode == Op.RJUMPV: + operands = ", ".join(str(ZeroPaddedHexNumber(operand)) for operand in self.operands) + return f"{opcode_name} {operands}" + else: + operands = ", ".join(str(ZeroPaddedHexNumber(operand)) for operand in self.operands) + return f"{opcode_name} {operands}" + + +def process_evm_bytes(evm_bytes: bytes, assembly: bool = False) -> str: # noqa: D103 + evm_bytes = bytearray(evm_bytes) + + opcodes: List[OpcodeWithOperands] = [] + + while evm_bytes: + opcode_byte = evm_bytes.pop(0) + + opcode: Op + for op in Op: + if not isinstance(op, Macro) and op.int() == opcode_byte: + opcode = op + break + else: + raise ValueError(f"Unknown opcode: {opcode_byte}") + + if opcode.data_portion_length > 0: + opcodes.append( + OpcodeWithOperands( + opcode=opcode, + operands=[int.from_bytes(evm_bytes[: opcode.data_portion_length], "big")], + ) + ) + evm_bytes = evm_bytes[opcode.data_portion_length :] + elif opcode == Op.RJUMPV: + max_index = evm_bytes.pop(0) + operands: List[int] = [] + for _ in range(max_index + 1): + operands.append(int.from_bytes(evm_bytes[:2], "big")) + evm_bytes = evm_bytes[2:] + opcodes.append(OpcodeWithOperands(opcode=opcode, operands=operands)) + else: + opcodes.append(OpcodeWithOperands(opcode=opcode)) + + if assembly: + opcodes_with_empty_lines: List[OpcodeWithOperands] = [] + for i, op_with_operands in enumerate(opcodes): + if ( + op_with_operands.opcode in OPCODES_WITH_EMPTY_LINES_BEFORE + and len(opcodes_with_empty_lines) > 0 + and opcodes_with_empty_lines[-1].opcode is not None + ): + opcodes_with_empty_lines.append(OpcodeWithOperands(opcode=None)) + opcodes_with_empty_lines.append(op_with_operands) + if op_with_operands.opcode in OPCODES_WITH_EMPTY_LINES_AFTER and i < len(opcodes) - 1: + opcodes_with_empty_lines.append(OpcodeWithOperands(opcode=None)) + return "\n".join(op.format(assembly) for op in opcodes_with_empty_lines) + return " + ".join(op.format(assembly) for op in opcodes) + + +def process_evm_bytes_string(evm_bytes_hex_string: str, assembly: bool = False) -> str: + """Process the given EVM bytes hex string.""" + if evm_bytes_hex_string.startswith("0x"): + evm_bytes_hex_string = evm_bytes_hex_string[2:] + + evm_bytes = bytes.fromhex(evm_bytes_hex_string) + return process_evm_bytes(evm_bytes, assembly=assembly) + + +assembly_option = click.option( + "-a", + "--assembly", + default=False, + is_flag=True, + help="Output the code as assembly instead of python.", +) + + +@click.group(context_settings=dict(help_option_names=["-h", "--help"])) +def cli(): + """ + Convert the given EVM bytes to EEST's python opcodes or assembly string. + + The input can be either a hex string or a binary file containing EVM bytes. + """ + pass + + +@cli.command() +@assembly_option +@click.argument("hex_string") +def hex_string(hex_string: str, assembly: bool): + """ + Process a hex string representing EVM bytes and convert it into EEST's Python opcodes. + + Args: + hex_string (str): The hex string representing the EVM bytes. + assembly (bool): Whether to print the output as assembly or Python opcodes. + + Returns: + (str): The processed EVM opcodes in Python or assembly format. + + Example: Convert a hex string to EEST Python `Opcodes` + ```bash + uv run evm_bytes hex-string 604260005260206000F3 + ``` + + Output: + + ```python + Op.PUSH1[0x42] + Op.PUSH1[0x0] + Op.MSTORE + Op.PUSH1[0x20] + Op.PUSH1[0x0] + Op.RETURN + ``` + + Example: Convert a hex string to assembly + ```bash + uv run evm_bytes hex-string --assembly 604260005260206000F3 + ``` + + Output: + + ```text + push1 0x42 + push1 0x00 + mstore + push1 0x20 + push1 0x00 + return + ``` + """ # noqa: E501 + processed_output = process_evm_bytes_string(hex_string, assembly=assembly) + click.echo(processed_output) + + +@cli.command() +@assembly_option +@click.argument("binary_file_path", type=click.File("rb")) +def binary_file(binary_file_path, assembly: bool): + """ + Convert the given EVM bytes binary file. + + Args: + binary_file_path (BinaryIO): A binary file containing EVM bytes to be processed or use `-` + to read from stdin. + assembly (bool): Whether to print the output as assembly or Python opcodes. + + Example: Convert the Withdrawal Request contract to assembly + ```bash + uv run evm_bytes binary-file ./src/ethereum_test_forks/forks/contracts/withdrawal_request.bin --assembly + ``` + + Output: + + ```text + caller + push20 0xfffffffffffffffffffffffffffffffffffffffe + eq + push1 0x90 + jumpi + ... + ``` + """ # noqa: E501 + processed_output = process_evm_bytes(binary_file_path.read(), assembly=assembly) + click.echo(processed_output) diff --git a/src/cli/evm_bytes_to_python.py b/src/cli/evm_bytes_to_python.py deleted file mode 100644 index 243b69ccc0..0000000000 --- a/src/cli/evm_bytes_to_python.py +++ /dev/null @@ -1,64 +0,0 @@ -""" -Define an entry point wrapper for pytest. -""" - -from typing import Any, List, Optional - -import click - -from ethereum_test_vm import Macro -from ethereum_test_vm import Opcodes as Op - - -def process_evm_bytes(evm_bytes_hex_string: Any) -> str: # noqa: D103 - if evm_bytes_hex_string.startswith("0x"): - evm_bytes_hex_string = evm_bytes_hex_string[2:] - - evm_bytes = bytearray(bytes.fromhex(evm_bytes_hex_string)) - - opcodes_strings: List[str] = [] - - while evm_bytes: - opcode_byte = evm_bytes.pop(0) - - opcode: Optional[Op] = None - for op in Op: - if not isinstance(op, Macro) and op.int() == opcode_byte: - opcode = op - break - - if opcode is None: - raise ValueError(f"Unknown opcode: {opcode_byte}") - - if opcode.data_portion_length > 0: - data_portion = hex(int.from_bytes(evm_bytes[: opcode.data_portion_length], "big")) - evm_bytes = evm_bytes[opcode.data_portion_length :] - opcodes_strings.append(f"Op.{opcode._name_}[{data_portion}]") - elif opcode == Op.RJUMPV: - max_index = evm_bytes.pop(0) - operands: List[str] = [] - for _ in range(max_index + 1): - operands.append(hex(int.from_bytes(evm_bytes[:2], "big"))) - evm_bytes = evm_bytes[2:] - opcodes_strings.append(f"Op.{opcode._name_}[{','.join(operands)}]") - else: - opcodes_strings.append(f"Op.{opcode._name_}") - - return " + ".join(opcodes_strings) - - -@click.command() -@click.argument("evm_bytes_hex_string") -def main(evm_bytes_hex_string: str): - """ - Convert the given EVM bytes hex string to an EEST Opcodes. - - \b - EVM_BYTES_HEX_STRING: A hex string representing EVM bytes to be processed. - """ # noqa: D301 - processed_output = process_evm_bytes(evm_bytes_hex_string) - click.echo(processed_output) - - -if __name__ == "__main__": - main() diff --git a/src/cli/tests/test_evm_bytes_to_python.py b/src/cli/tests/test_evm_bytes.py similarity index 82% rename from src/cli/tests/test_evm_bytes_to_python.py rename to src/cli/tests/test_evm_bytes.py index 21b4ff295b..561eeb04a8 100644 --- a/src/cli/tests/test_evm_bytes_to_python.py +++ b/src/cli/tests/test_evm_bytes.py @@ -1,12 +1,12 @@ """ -Test suite for `cli.evm_bytes_to_python` module. +Test suite for `cli.evm_bytes` module. """ import pytest from ethereum_test_tools import Opcodes as Op -from ..evm_bytes_to_python import process_evm_bytes +from ..evm_bytes import process_evm_bytes_string basic_vector = [ "0x60008080808061AAAA612d5ff1600055", @@ -27,9 +27,9 @@ (complex_vector[0][2:], complex_vector[1]), # no "0x" prefix ], ) -def test_evm_bytes_to_python(evm_bytes, python_opcodes): - """Test evm_bytes_to_python using the basic and complex vectors""" - assert process_evm_bytes(evm_bytes) == python_opcodes +def test_evm_bytes(evm_bytes: str, python_opcodes: str): + """Test evm_bytes using the basic and complex vectors""" + assert process_evm_bytes_string(evm_bytes) == python_opcodes DUPLICATES = [Op.NOOP] @@ -40,7 +40,7 @@ def test_evm_bytes_to_python(evm_bytes, python_opcodes): [op for op in Op if op not in DUPLICATES], ids=lambda op: op._name_, ) -def test_individual_opcodes(opcode): +def test_individual_opcodes(opcode: Op): """Test each opcode individually""" data_portion = b"" if opcode.data_portion_length > 0: @@ -53,17 +53,17 @@ def test_individual_opcodes(opcode): expected_output = f"Op.{opcode._name_}" bytecode = opcode.int().to_bytes(1, byteorder="big") + data_portion - assert process_evm_bytes("0x" + bytecode.hex()) == expected_output + assert process_evm_bytes_string("0x" + bytecode.hex()) == expected_output def test_invalid_opcode(): """Invalid hex string""" with pytest.raises(ValueError): - process_evm_bytes("0xZZ") + process_evm_bytes_string("0xZZ") def test_unknown_opcode(): """Opcode not defined in Op""" with pytest.raises(ValueError): - process_evm_bytes("0x0F") - process_evm_bytes("0x0F") + process_evm_bytes_string("0x0F") + process_evm_bytes_string("0x0F") diff --git a/whitelist.txt b/whitelist.txt index c79d67f7d2..207ee9479c 100644 --- a/whitelist.txt +++ b/whitelist.txt @@ -15,6 +15,7 @@ argcount argnames argvalues ase +asm at5 AuthorizationInvalidityType AutoSection @@ -537,6 +538,7 @@ tempdir testdir teststatus tmpdir +toc tryfirst trylast usefixtures