Skip to content

Commit

Permalink
Merge pull request #5 from fmi-faim/single-plane
Browse files Browse the repository at this point in the history
Switch to lark for parsing nd file contents
  • Loading branch information
imagejan authored Aug 27, 2024
2 parents bc1c02d + 52e0bdc commit 8e1449d
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 83 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
python-version: ['3.10', '3.11', '3.12']

steps:
- uses: actions/checkout@v3
Expand Down
12 changes: 8 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name = "metamorph-mda-parser"
dynamic = ["version"]
description = 'Light-weight parsing of Metamorph/VisiView .nd files.'
readme = "README.md"
requires-python = ">=3.8"
requires-python = ">=3.10"
license = "BSD-3-Clause"
keywords = []
authors = [
Expand All @@ -16,16 +16,16 @@ authors = [
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"lark",
"pandas",
"pydantic",
]

[project.urls]
Expand Down Expand Up @@ -54,7 +54,7 @@ cov = [
]

[[tool.hatch.envs.all.matrix]]
python = ["3.8", "3.9", "3.10", "3.11", "3.12"]
python = ["3.10", "3.11", "3.12"]

[tool.hatch.envs.types]
dependencies = [
Expand All @@ -76,8 +76,12 @@ metamorph_mda_parser = ["src/metamorph_mda_parser", "*/metamorph-mda-parser/src/
tests = ["tests", "*/metamorph-mda-parser/tests"]

[tool.coverage.report]
show_missing = true
exclude_lines = [
"no cov",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
]

[tool.ruff.lint.flake8-type-checking]
runtime-evaluated-base-classes = ["pydantic.BaseModel"]
36 changes: 36 additions & 0 deletions src/metamorph_mda_parser/lark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from lark import Lark, Transformer


class NDInfoTransformer(Transformer):
def __init__(self):
self.wave_names = []
self.wave_do_z = []
self.stage_positions = []

def start(self, items):
result = dict(i for i in items if i is not None)
result["WaveNames"] = self.wave_names
result["WaveDoZ"] = self.wave_do_z
result["StagePositions"] = self.stage_positions
return result

def line(self, key_value):
key, value = key_value
if key.startswith("WaveName"):
self.wave_names.append(value)
return None # We handle WaveName entries separately
if key.startswith("Stage"):
self.stage_positions.append(value)
return None # We handle Stage entries separately
if key.startswith("WaveDoZ"):
self.wave_do_z.append(value)
return None # We handle WaveDoZ entries separately
return (key, value)

def boolean_value(self, b):
return b[0].value == "TRUE"


def parse(content):
parser = Lark.open("nd_grammar.lark", rel_to=__file__, parser="lalr", transformer=NDInfoTransformer())
return parser.parse(content)
105 changes: 32 additions & 73 deletions src/metamorph_mda_parser/nd.py
Original file line number Diff line number Diff line change
@@ -1,89 +1,48 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Callable, Literal

if TYPE_CHECKING:
from pathlib import Path
from pathlib import Path
from typing import Literal

import pandas as pd
from pydantic import AliasGenerator, BaseModel, ConfigDict
from pydantic.alias_generators import to_pascal

from metamorph_mda_parser.lark import parse


class NdInfo:
class NdInfo(BaseModel):
model_config = ConfigDict(
alias_generator=AliasGenerator(
validation_alias=to_pascal,
),
)

path: Path
name: str
version: Literal["1.0", "2.0"]
description: str
do_timelapse: bool
do_stage: bool
do_wave: bool
do_z: bool
stage_positions: list[str]
wave_names: list[str]
wave_do_z: list[bool]
n_timepoints: int
n_z_steps: int
z_step_size: float
do_z_series: bool
stage_positions: list[str] = []
wave_names: list[str] = []
wave_do_z: list[bool] = []
n_stage_positions: int = 1
n_time_points: int = 1
n_z_steps: int = 1
z_step_size: float | None = None
wave_in_file_name: bool

def __init__(self, path: Path):
self.path = path
self.name = path.stem
self._parse_nd()

def _parse_nd(self) -> None:
with open(self.path) as nd:
# Version
self.version = self._parse_line(nd.readline(), "NDInfoFile", self._extract_version)
self.description = self._parse_line(nd.readline(), "Description", str)
self.start_time = self._parse_line(nd.readline(), "StartTime1", str)
# Time lapse
self.do_timelapse = self._parse_line(nd.readline(), "DoTimelapse", self._parse_bool)
if self.do_timelapse:
self.n_timepoints = self._parse_line(nd.readline(), "NTimePoints", int)

# Stage positions
self.do_stage = self._parse_line(nd.readline(), "DoStage", self._parse_bool)
if self.do_stage:
n_stage_positions = self._parse_line(nd.readline(), "NStagePositions", int)
self.stage_positions = []
for s in range(n_stage_positions):
self.stage_positions.append(self._parse_line(nd.readline(), f"Stage{s+1}", str))

# Wavelengths
self.do_wave = self._parse_line(nd.readline(), "DoWave", self._parse_bool)
if self.do_wave:
n_wavelengths = self._parse_line(nd.readline(), "NWavelengths", int)
self.wave_names = []
self.wave_do_z = []
for w in range(n_wavelengths):
self.wave_names.append(self._parse_line(nd.readline(), f"WaveName{w+1}", str))
self.wave_do_z.append(self._parse_line(nd.readline(), f"WaveDoZ{w+1}", self._parse_bool))

# Z steps
self.do_z = self._parse_line(nd.readline(), "DoZSeries", self._parse_bool)
self.n_z_steps = self._parse_line(nd.readline(), "NZSteps", int)
self.z_step_size = self._parse_line(nd.readline(), "ZStepSize", float)

self.wave_in_file_name = self._parse_line(nd.readline(), "WaveInFileName", self._parse_bool)

# End of file
last_line = nd.readline()
if last_line.strip(' "\n') != "EndFile":
message = f"Expected end of file, got: {last_line}"
raise ValueError(message)

def _parse_line(self, line: str, key: str, value_function: Callable):
tokens = line.split(",")
if tokens[0].strip(' "') != key:
message = f"Invalid nd file contents.\n\texpected: {key}\n\tgot: {line}"
raise ValueError(message)
return value_function(tokens[1].strip(' "\n'))

def _extract_version(self, value: str) -> str:
return value[8:]

def _parse_bool(self, value: str) -> bool:
return value.lower() == "true"
@staticmethod
def from_path(path: Path):
with open(path) as f:
content = f.read()
result = parse(content)
result["Path"] = path
result["Name"] = path.stem
result["Version"] = "1.0" # HACK
return NdInfo(**result)

def _wavelengths(self):
for i, w in enumerate(self.wave_names):
Expand All @@ -92,7 +51,7 @@ def _wavelengths(self):
i,
w,
f"_w{i+1}{w}" if self.wave_in_file_name else "",
self.wave_do_z[i],
self.wave_do_z[i] if self.wave_do_z else False,
)

def _stage_positions(self):
Expand All @@ -102,7 +61,7 @@ def _stage_positions(self):

def _timepoints(self):
if self.do_timelapse:
for t in range(self.n_timepoints):
for t in range(self.n_time_points):
yield t, f"_t{t+1}"

def _get_path_channel_position_time(self):
Expand Down
40 changes: 40 additions & 0 deletions src/metamorph_mda_parser/nd_grammar.lark
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
start: line+ "\"EndFile\""
line: _QUOTE _special_key _QUOTE "," _special_value
| _QUOTE _string_key _QUOTE "," _QUOTE _string_value _QUOTE
| _QUOTE _boolean_key _QUOTE "," boolean_value
| _QUOTE _integer_key _QUOTE "," _integer_value
| _QUOTE _float_key _QUOTE "," _float_value

_special_key: /NDInfoFile/
| /Description/
| /StartTime[0-9]+/
_special_value: /.+/

_string_key: /WaveName[0-9]+/
| /Stage[0-9]+/
_string_value: /[^"]+/

_boolean_key: /DoTimelapse/
| /DoStage/
| /DoWave/
| /DoZSeries/
| /WaveInFileName/
| /WaveDoZ[0-9]+/
boolean_value: BOOLEAN

_integer_key: /NWavelengths/
| /NStagePositions/
| /NTimePoints/
| /NZSteps/
_integer_value: INT

_float_key: /ZStepSize/
_float_value: DECIMAL | INT

%import common.INT
%import common.DECIMAL
%import common.WS
%ignore WS

BOOLEAN: "TRUE" | "FALSE"
_QUOTE: "\""
14 changes: 14 additions & 0 deletions tests/resources/sample_4ch_1pos_1z.nd
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"NDInfoFile", Version 1.0
"Description", File recreated from images.
"StartTime1", 20240603 12:25:48.610
"DoTimelapse", FALSE
"DoStage", FALSE
"DoWave", TRUE
"NWavelengths", 4
"WaveName1", "confDAPI"
"WaveName2", "confGFP"
"WaveName3", "confmCherry"
"WaveName4", "confCy5"
"DoZSeries", FALSE
"WaveInFileName", TRUE
"EndFile"
38 changes: 33 additions & 5 deletions tests/test_nd.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,13 @@ def sample_2ch_75pos_361t():
return Path("tests/resources/sample_2ch_75pos_361t.nd")


@pytest.fixture
def sample_4ch_1pos_1z():
return Path("tests/resources/sample_4ch_1pos_1z.nd")


def test_sample_4ch_4pos(sample_4ch_4pos):
nd_info = NdInfo(sample_4ch_4pos)
nd_info = NdInfo.from_path(sample_4ch_4pos)

assert nd_info.version == "1.0"
assert nd_info.description == "File recreated from images."
Expand All @@ -25,7 +30,7 @@ def test_sample_4ch_4pos(sample_4ch_4pos):
assert len(nd_info.stage_positions) == 4
assert nd_info.do_wave
assert len(nd_info.wave_names) == 4
assert nd_info.do_z
assert nd_info.do_z_series
assert nd_info.n_z_steps == 42
assert nd_info.z_step_size == 3.0
assert nd_info.wave_in_file_name
Expand All @@ -47,17 +52,17 @@ def test_sample_4ch_4pos(sample_4ch_4pos):


def test_sample_2ch_75pos_361t(sample_2ch_75pos_361t):
nd_info = NdInfo(sample_2ch_75pos_361t)
nd_info = NdInfo.from_path(sample_2ch_75pos_361t)

assert nd_info.version == "1.0"
assert nd_info.description == "File recreated from images."
assert nd_info.do_timelapse
assert nd_info.n_timepoints == 361
assert nd_info.n_time_points == 361
assert nd_info.do_stage
assert len(nd_info.stage_positions) == 75
assert nd_info.do_wave
assert len(nd_info.wave_names) == 2
assert nd_info.do_z
assert nd_info.do_z_series
assert nd_info.n_z_steps == 25
assert nd_info.z_step_size == 2.0
assert nd_info.wave_in_file_name
Expand All @@ -66,3 +71,26 @@ def test_sample_2ch_75pos_361t(sample_2ch_75pos_361t):
files = nd_info.get_files()

assert len(files) == 54150


def test_sample_4ch_1pos_1z(sample_4ch_1pos_1z):
nd_info = NdInfo.from_path(sample_4ch_1pos_1z)

assert nd_info.version == "1.0"
assert nd_info.description == "File recreated from images."
assert not nd_info.do_timelapse
assert nd_info.n_time_points == 1
assert not nd_info.do_stage
assert len(nd_info.stage_positions) == 0
assert nd_info.do_wave
assert len(nd_info.wave_names) == 4
assert not nd_info.do_z_series
assert nd_info.n_z_steps == 1
assert nd_info.z_step_size is None
assert nd_info.wave_in_file_name
assert nd_info.wave_names == ["confDAPI", "confGFP", "confmCherry", "confCy5"]

files = nd_info.get_files()

assert len(files) == 4
assert all(p.suffix == ".tif" for p in files["path"])

0 comments on commit 8e1449d

Please sign in to comment.