From b0b29937430d354fb9d5e21d3c587d3f1cdc7094 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tam=C3=A1s=20T=C3=B3th?= Date: Tue, 11 Jun 2024 20:28:29 +0200 Subject: [PATCH] Refactor `parse_outer` (#4436) Preparatory refactoring for: * #4406 Changes: * Expose function `slurp_definitions` * When slurping, a given file's parent is always considered for requires (and takes priority) * Extract a few helper functions * Rename, remove or change type for a few parameters --- pyk/src/pyk/__main__.py | 16 ++++-- pyk/src/pyk/kast/utils.py | 79 +++++++++++++++++---------- pyk/src/tests/unit/kast/test_utils.py | 7 ++- 3 files changed, 68 insertions(+), 34 deletions(-) diff --git a/pyk/src/pyk/__main__.py b/pyk/src/pyk/__main__.py index bad14b5119a..17d7cfc441a 100644 --- a/pyk/src/pyk/__main__.py +++ b/pyk/src/pyk/__main__.py @@ -410,10 +410,14 @@ def exec_json_to_kore(options: JsonToKoreOptions) -> None: def exec_parse_outer(options: ParseOuterOptions) -> None: definition_file = options.main_file.resolve() - search_paths = [definition_file.parent, *options.includes] main_module_name = options.main_module or definition_file.stem.upper() - final_definition = parse_outer(definition_file, main_module_name, search_paths, options.md_selector) + final_definition = parse_outer( + definition_file, + main_module_name, + include_dirs=options.includes, + md_selector=options.md_selector, + ) result_text = json.dumps(final_definition.to_dict()) try: @@ -424,10 +428,14 @@ def exec_parse_outer(options: ParseOuterOptions) -> None: def exec_kompilex(options: KompileXCommandOptions) -> None: definition_file = Path(options.main_file).resolve() - search_paths = [definition_file.parent, *options.includes] main_module_name = options.main_module or definition_file.stem.upper() - final_definition = parse_outer(definition_file, main_module_name, search_paths, options.md_selector) + final_definition = parse_outer( + definition_file, + main_module_name, + include_dirs=options.includes, + md_selector=options.md_selector, + ) if options.pre_parsed_prelude: prelude_json = json.loads(options.pre_parsed_prelude.read()) diff --git a/pyk/src/pyk/kast/utils.py b/pyk/src/pyk/kast/utils.py index e42a2575f25..3795c7a2259 100644 --- a/pyk/src/pyk/kast/utils.py +++ b/pyk/src/pyk/kast/utils.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +from pathlib import Path from typing import TYPE_CHECKING from ._ast_to_kast import _ast_to_kast @@ -11,55 +12,75 @@ if TYPE_CHECKING: from collections.abc import Iterable - from pathlib import Path from typing import Final - from .outer_syntax import Module + from .outer_syntax import Require _LOGGER: Final = logging.getLogger(__name__) def parse_outer( - definition_file: Path, + definition_file: str | Path, main_module: str, - search_paths: Iterable[Path] = (), + *, + include_dirs: Iterable[str | Path] = (), md_selector: str = 'k', include_source: bool = True, ) -> KDefinition: - modules = _slurp(definition_file, search_paths, [definition_file], md_selector, include_source) + parsed_files = slurp_definitions( + definition_file, + include_dirs=include_dirs, + md_selector=md_selector, + include_source=include_source, + ) + modules = tuple(module for _, definition in parsed_files.items() for module in definition.modules) final_definition = _ast_to_kast(Definition(modules), main_module=main_module) assert isinstance(final_definition, KDefinition) return final_definition -def _slurp( - definition_file: Path, - search_paths: Iterable[Path] = (), - processed_files: list[Path] | None = None, +def slurp_definitions( + main_file: str | Path, + *, + include_dirs: Iterable[str | Path] = (), md_selector: str = 'k', include_source: bool = True, -) -> tuple[Module, ...]: - processed_files = processed_files if processed_files is not None else [] +) -> dict[Path, Definition]: + main_file = Path(main_file).resolve() + _include_dirs = [Path(include_dir) for include_dir in include_dirs] + + result: dict[Path, Definition] = {} + + pending = [main_file] + while pending: # DFS + current_file = pending.pop() + + if current_file in result: + continue + + definition = _parse_file(current_file, md_selector, include_source) + pending += reversed([_resolve_require(require, current_file, _include_dirs) for require in definition.requires]) + + result[current_file] = definition + + return result + + +def _parse_file(definition_file: Path, md_selector: str, include_source: bool) -> Definition: _LOGGER.info(f'Reading {definition_file}') + text = definition_file.read_text() if definition_file.suffix == '.md': text = select_code_blocks(text, md_selector) + parser = OuterParser(text, source=definition_file if include_source else None) - definition = parser.definition() - result = definition.modules - for require in definition.requires: - try_files = [include_dir / require.path for include_dir in search_paths] - try: - # Get the first source file we can find by iterating through search_paths - index = [file.exists() for file in try_files].index(True) - except ValueError as v: - # TODO Include the source location of the requires clause - raise FileNotFoundError( - f'{require.path} not found\nLookup directories: {[str(path) for path in search_paths]}' - ) from v - - required_file = try_files[index] - if required_file not in processed_files: - processed_files.append(required_file) - result += _slurp(required_file, search_paths, processed_files, md_selector, include_source) - return result + return parser.definition() + + +def _resolve_require(require: Require, definition_file: Path, include_dirs: list[Path]) -> Path: + try_dirs = [definition_file.parent] + include_dirs + try_files = [try_dir / require.path for try_dir in try_dirs] + for file in try_files: + if file.is_file(): + return file.resolve() + raise FileNotFoundError(f'{require.path} not found. Searched paths: {[str(path) for path in try_dirs]}') diff --git a/pyk/src/tests/unit/kast/test_utils.py b/pyk/src/tests/unit/kast/test_utils.py index 893b045bc49..01f9f5e474a 100644 --- a/pyk/src/tests/unit/kast/test_utils.py +++ b/pyk/src/tests/unit/kast/test_utils.py @@ -27,7 +27,12 @@ def test_parse_outer(test_file: Path) -> None: main_module = test_file.stem.upper() # When - actual = parse_outer(test_file, main_module, (test_file.parent, test_file.parent / 'include'), include_source=False) + actual = parse_outer( + test_file, + main_module, + include_dirs=[test_file.parent / 'include'], + include_source=False, + ) # Then assert actual == expected