From 225b9207b6106f1ca1d67dc2e2479ab1bc24c979 Mon Sep 17 00:00:00 2001 From: Jan Eglinger Date: Fri, 9 Aug 2024 14:13:38 +0200 Subject: [PATCH 1/3] Add new test data --- pyproject.toml | 1 + tests/resources/sample_4ch_1pos_1z.nd | 14 ++++++++++++++ tests/test_nd.py | 9 +++++++++ 3 files changed, 24 insertions(+) create mode 100644 tests/resources/sample_4ch_1pos_1z.nd diff --git a/pyproject.toml b/pyproject.toml index 505d6a2..707e1f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,6 +76,7 @@ metamorph_mda_parser = ["src/metamorph_mda_parser", "*/metamorph-mda-parser/src/ tests = ["tests", "*/metamorph-mda-parser/tests"] [tool.coverage.report] +show_missing = true exclude_lines = [ "no cov", "if __name__ == .__main__.:", diff --git a/tests/resources/sample_4ch_1pos_1z.nd b/tests/resources/sample_4ch_1pos_1z.nd new file mode 100644 index 0000000..f959a5c --- /dev/null +++ b/tests/resources/sample_4ch_1pos_1z.nd @@ -0,0 +1,14 @@ +"NDInfoFile", Version 1.0 +"Description", File recreated from images. +"StartTime1", 20240603 12:25:48.610 +"DoTimelapse", FALSE +"DoStage", FALSE +"DoWave", TRUE +"NWavelengths", 4 +"WaveName1", "confDAPI" +"WaveName2", "confGFP" +"WaveName3", "confmCherry" +"WaveName4", "confCy5" +"DoZSeries", FALSE +"WaveInFileName", TRUE +"EndFile" diff --git a/tests/test_nd.py b/tests/test_nd.py index b642d0e..4564482 100644 --- a/tests/test_nd.py +++ b/tests/test_nd.py @@ -15,6 +15,11 @@ def sample_2ch_75pos_361t(): return Path("tests/resources/sample_2ch_75pos_361t.nd") +@pytest.fixture +def sample_4ch_1pos_1z(): + return Path("tests/resources/sample_4ch_1pos_1z.nd") + + def test_sample_4ch_4pos(sample_4ch_4pos): nd_info = NdInfo(sample_4ch_4pos) @@ -66,3 +71,7 @@ def test_sample_2ch_75pos_361t(sample_2ch_75pos_361t): files = nd_info.get_files() assert len(files) == 54150 + + +def test_sample_4ch_1pos_1z(sample_4ch_1pos_1z): + nd_info = NdInfo(sample_4ch_1pos_1z) From 706ee0d4f06f5f12cad1991887c4bde1a172239d Mon Sep 17 00:00:00 2001 From: Jan Eglinger Date: Tue, 27 Aug 2024 13:58:14 +0200 Subject: [PATCH 2/3] Use lark grammar for nd file parsing --- pyproject.toml | 5 ++ src/metamorph_mda_parser/lark.py | 36 ++++++++ src/metamorph_mda_parser/nd.py | 105 +++++++---------------- src/metamorph_mda_parser/nd_grammar.lark | 40 +++++++++ tests/test_nd.py | 31 +++++-- 5 files changed, 138 insertions(+), 79 deletions(-) create mode 100644 src/metamorph_mda_parser/lark.py create mode 100644 src/metamorph_mda_parser/nd_grammar.lark diff --git a/pyproject.toml b/pyproject.toml index 707e1f0..2c71dd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,9 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] dependencies = [ + "lark", "pandas", + "pydantic", ] [project.urls] @@ -82,3 +84,6 @@ exclude_lines = [ "if __name__ == .__main__.:", "if TYPE_CHECKING:", ] + +[tool.ruff.lint.flake8-type-checking] +runtime-evaluated-base-classes = ["pydantic.BaseModel"] diff --git a/src/metamorph_mda_parser/lark.py b/src/metamorph_mda_parser/lark.py new file mode 100644 index 0000000..0231963 --- /dev/null +++ b/src/metamorph_mda_parser/lark.py @@ -0,0 +1,36 @@ +from lark import Lark, Transformer + + +class NDInfoTransformer(Transformer): + def __init__(self): + self.wave_names = [] + self.wave_do_z = [] + self.stage_positions = [] + + def start(self, items): + result = dict(i for i in items if i is not None) + result["WaveNames"] = self.wave_names + result["WaveDoZ"] = self.wave_do_z + result["StagePositions"] = self.stage_positions + return result + + def line(self, key_value): + key, value = key_value + if key.startswith("WaveName"): + self.wave_names.append(value) + return None # We handle WaveName entries separately + if key.startswith("Stage"): + self.stage_positions.append(value) + return None # We handle Stage entries separately + if key.startswith("WaveDoZ"): + self.wave_do_z.append(value) + return None # We handle WaveDoZ entries separately + return (key, value) + + def boolean_value(self, b): + return b[0].value == "TRUE" + + +def parse(content): + parser = Lark.open("nd_grammar.lark", rel_to=__file__, parser="lalr", transformer=NDInfoTransformer()) + return parser.parse(content) diff --git a/src/metamorph_mda_parser/nd.py b/src/metamorph_mda_parser/nd.py index 03cf572..e7f5987 100644 --- a/src/metamorph_mda_parser/nd.py +++ b/src/metamorph_mda_parser/nd.py @@ -1,14 +1,22 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Callable, Literal - -if TYPE_CHECKING: - from pathlib import Path +from pathlib import Path +from typing import Literal import pandas as pd +from pydantic import AliasGenerator, BaseModel, ConfigDict +from pydantic.alias_generators import to_pascal + +from metamorph_mda_parser.lark import parse -class NdInfo: +class NdInfo(BaseModel): + model_config = ConfigDict( + alias_generator=AliasGenerator( + validation_alias=to_pascal, + ), + ) + path: Path name: str version: Literal["1.0", "2.0"] @@ -16,74 +24,25 @@ class NdInfo: do_timelapse: bool do_stage: bool do_wave: bool - do_z: bool - stage_positions: list[str] - wave_names: list[str] - wave_do_z: list[bool] - n_timepoints: int - n_z_steps: int - z_step_size: float + do_z_series: bool + stage_positions: list[str] = [] + wave_names: list[str] = [] + wave_do_z: list[bool] = [] + n_stage_positions: int = 1 + n_time_points: int = 1 + n_z_steps: int = 1 + z_step_size: float | None = None wave_in_file_name: bool - def __init__(self, path: Path): - self.path = path - self.name = path.stem - self._parse_nd() - - def _parse_nd(self) -> None: - with open(self.path) as nd: - # Version - self.version = self._parse_line(nd.readline(), "NDInfoFile", self._extract_version) - self.description = self._parse_line(nd.readline(), "Description", str) - self.start_time = self._parse_line(nd.readline(), "StartTime1", str) - # Time lapse - self.do_timelapse = self._parse_line(nd.readline(), "DoTimelapse", self._parse_bool) - if self.do_timelapse: - self.n_timepoints = self._parse_line(nd.readline(), "NTimePoints", int) - - # Stage positions - self.do_stage = self._parse_line(nd.readline(), "DoStage", self._parse_bool) - if self.do_stage: - n_stage_positions = self._parse_line(nd.readline(), "NStagePositions", int) - self.stage_positions = [] - for s in range(n_stage_positions): - self.stage_positions.append(self._parse_line(nd.readline(), f"Stage{s+1}", str)) - - # Wavelengths - self.do_wave = self._parse_line(nd.readline(), "DoWave", self._parse_bool) - if self.do_wave: - n_wavelengths = self._parse_line(nd.readline(), "NWavelengths", int) - self.wave_names = [] - self.wave_do_z = [] - for w in range(n_wavelengths): - self.wave_names.append(self._parse_line(nd.readline(), f"WaveName{w+1}", str)) - self.wave_do_z.append(self._parse_line(nd.readline(), f"WaveDoZ{w+1}", self._parse_bool)) - - # Z steps - self.do_z = self._parse_line(nd.readline(), "DoZSeries", self._parse_bool) - self.n_z_steps = self._parse_line(nd.readline(), "NZSteps", int) - self.z_step_size = self._parse_line(nd.readline(), "ZStepSize", float) - - self.wave_in_file_name = self._parse_line(nd.readline(), "WaveInFileName", self._parse_bool) - - # End of file - last_line = nd.readline() - if last_line.strip(' "\n') != "EndFile": - message = f"Expected end of file, got: {last_line}" - raise ValueError(message) - - def _parse_line(self, line: str, key: str, value_function: Callable): - tokens = line.split(",") - if tokens[0].strip(' "') != key: - message = f"Invalid nd file contents.\n\texpected: {key}\n\tgot: {line}" - raise ValueError(message) - return value_function(tokens[1].strip(' "\n')) - - def _extract_version(self, value: str) -> str: - return value[8:] - - def _parse_bool(self, value: str) -> bool: - return value.lower() == "true" + @staticmethod + def from_path(path: Path): + with open(path) as f: + content = f.read() + result = parse(content) + result["Path"] = path + result["Name"] = path.stem + result["Version"] = "1.0" # HACK + return NdInfo(**result) def _wavelengths(self): for i, w in enumerate(self.wave_names): @@ -92,7 +51,7 @@ def _wavelengths(self): i, w, f"_w{i+1}{w}" if self.wave_in_file_name else "", - self.wave_do_z[i], + self.wave_do_z[i] if self.wave_do_z else False, ) def _stage_positions(self): @@ -102,7 +61,7 @@ def _stage_positions(self): def _timepoints(self): if self.do_timelapse: - for t in range(self.n_timepoints): + for t in range(self.n_time_points): yield t, f"_t{t+1}" def _get_path_channel_position_time(self): diff --git a/src/metamorph_mda_parser/nd_grammar.lark b/src/metamorph_mda_parser/nd_grammar.lark new file mode 100644 index 0000000..aac10bc --- /dev/null +++ b/src/metamorph_mda_parser/nd_grammar.lark @@ -0,0 +1,40 @@ +start: line+ "\"EndFile\"" +line: _QUOTE _special_key _QUOTE "," _special_value + | _QUOTE _string_key _QUOTE "," _QUOTE _string_value _QUOTE + | _QUOTE _boolean_key _QUOTE "," boolean_value + | _QUOTE _integer_key _QUOTE "," _integer_value + | _QUOTE _float_key _QUOTE "," _float_value + +_special_key: /NDInfoFile/ + | /Description/ + | /StartTime[0-9]+/ +_special_value: /.+/ + +_string_key: /WaveName[0-9]+/ + | /Stage[0-9]+/ +_string_value: /[^"]+/ + +_boolean_key: /DoTimelapse/ + | /DoStage/ + | /DoWave/ + | /DoZSeries/ + | /WaveInFileName/ + | /WaveDoZ[0-9]+/ +boolean_value: BOOLEAN + +_integer_key: /NWavelengths/ + | /NStagePositions/ + | /NTimePoints/ + | /NZSteps/ +_integer_value: INT + +_float_key: /ZStepSize/ +_float_value: DECIMAL | INT + +%import common.INT +%import common.DECIMAL +%import common.WS +%ignore WS + +BOOLEAN: "TRUE" | "FALSE" +_QUOTE: "\"" diff --git a/tests/test_nd.py b/tests/test_nd.py index 4564482..4445302 100644 --- a/tests/test_nd.py +++ b/tests/test_nd.py @@ -21,7 +21,7 @@ def sample_4ch_1pos_1z(): def test_sample_4ch_4pos(sample_4ch_4pos): - nd_info = NdInfo(sample_4ch_4pos) + nd_info = NdInfo.from_path(sample_4ch_4pos) assert nd_info.version == "1.0" assert nd_info.description == "File recreated from images." @@ -30,7 +30,7 @@ def test_sample_4ch_4pos(sample_4ch_4pos): assert len(nd_info.stage_positions) == 4 assert nd_info.do_wave assert len(nd_info.wave_names) == 4 - assert nd_info.do_z + assert nd_info.do_z_series assert nd_info.n_z_steps == 42 assert nd_info.z_step_size == 3.0 assert nd_info.wave_in_file_name @@ -52,17 +52,17 @@ def test_sample_4ch_4pos(sample_4ch_4pos): def test_sample_2ch_75pos_361t(sample_2ch_75pos_361t): - nd_info = NdInfo(sample_2ch_75pos_361t) + nd_info = NdInfo.from_path(sample_2ch_75pos_361t) assert nd_info.version == "1.0" assert nd_info.description == "File recreated from images." assert nd_info.do_timelapse - assert nd_info.n_timepoints == 361 + assert nd_info.n_time_points == 361 assert nd_info.do_stage assert len(nd_info.stage_positions) == 75 assert nd_info.do_wave assert len(nd_info.wave_names) == 2 - assert nd_info.do_z + assert nd_info.do_z_series assert nd_info.n_z_steps == 25 assert nd_info.z_step_size == 2.0 assert nd_info.wave_in_file_name @@ -74,4 +74,23 @@ def test_sample_2ch_75pos_361t(sample_2ch_75pos_361t): def test_sample_4ch_1pos_1z(sample_4ch_1pos_1z): - nd_info = NdInfo(sample_4ch_1pos_1z) + nd_info = NdInfo.from_path(sample_4ch_1pos_1z) + + assert nd_info.version == "1.0" + assert nd_info.description == "File recreated from images." + assert not nd_info.do_timelapse + assert nd_info.n_time_points == 1 + assert not nd_info.do_stage + assert len(nd_info.stage_positions) == 0 + assert nd_info.do_wave + assert len(nd_info.wave_names) == 4 + assert not nd_info.do_z_series + assert nd_info.n_z_steps == 1 + assert nd_info.z_step_size is None + assert nd_info.wave_in_file_name + assert nd_info.wave_names == ["confDAPI", "confGFP", "confmCherry", "confCy5"] + + files = nd_info.get_files() + + assert len(files) == 4 + assert all(p.suffix == ".tif" for p in files["path"]) From 52e0bdc73bc8719ec19a5869cf29b6c5000289f0 Mon Sep 17 00:00:00 2001 From: Jan Eglinger Date: Tue, 27 Aug 2024 15:25:46 +0200 Subject: [PATCH 3/3] Increase minimum Python version to 3.10 --- .github/workflows/test.yml | 2 +- pyproject.toml | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 04c9346..a772c7d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v3 diff --git a/pyproject.toml b/pyproject.toml index 2c71dd0..bbc3e34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "metamorph-mda-parser" dynamic = ["version"] description = 'Light-weight parsing of Metamorph/VisiView .nd files.' readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.10" license = "BSD-3-Clause" keywords = [] authors = [ @@ -16,8 +16,6 @@ authors = [ classifiers = [ "Development Status :: 4 - Beta", "Programming Language :: Python", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -56,7 +54,7 @@ cov = [ ] [[tool.hatch.envs.all.matrix]] -python = ["3.8", "3.9", "3.10", "3.11", "3.12"] +python = ["3.10", "3.11", "3.12"] [tool.hatch.envs.types] dependencies = [