Skip to content

Commit

Permalink
First version of nd file parsing
Browse files Browse the repository at this point in the history
This likely doesn't yet cover all VisiView modalities.
  • Loading branch information
imagejan committed Mar 14, 2024
1 parent 885dd8a commit bc1c02d
Show file tree
Hide file tree
Showing 7 changed files with 328 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
dist
__pycache__
.coverage*
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@

-----

Light-weight parsing of Metamorph/VisiView Multi Dimensional Acquisition .nd files.

For the file format description, see: https://support.moleculardevices.com/s/article/MDA-file-formats

**Table of Contents**

- [Installation](#installation)
Expand Down
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "hatchling.build"
[project]
name = "metamorph-mda-parser"
dynamic = ["version"]
description = ''
description = 'Light-weight parsing of Metamorph/VisiView .nd files.'
readme = "README.md"
requires-python = ">=3.8"
license = "BSD-3-Clause"
Expand All @@ -24,7 +24,9 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = []
dependencies = [
"pandas",
]

[project.urls]
Documentation = "https://github.com/fmi-faim/metamorph-mda-parser#readme"
Expand Down
132 changes: 132 additions & 0 deletions src/metamorph_mda_parser/nd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Callable, Literal

if TYPE_CHECKING:
from pathlib import Path

import pandas as pd


class NdInfo:
path: Path
name: str
version: Literal["1.0", "2.0"]
description: str
do_timelapse: bool
do_stage: bool
do_wave: bool
do_z: bool
stage_positions: list[str]
wave_names: list[str]
wave_do_z: list[bool]
n_timepoints: int
n_z_steps: int
z_step_size: float
wave_in_file_name: bool

def __init__(self, path: Path):
self.path = path
self.name = path.stem
self._parse_nd()

def _parse_nd(self) -> None:
with open(self.path) as nd:
# Version
self.version = self._parse_line(nd.readline(), "NDInfoFile", self._extract_version)
self.description = self._parse_line(nd.readline(), "Description", str)
self.start_time = self._parse_line(nd.readline(), "StartTime1", str)
# Time lapse
self.do_timelapse = self._parse_line(nd.readline(), "DoTimelapse", self._parse_bool)
if self.do_timelapse:
self.n_timepoints = self._parse_line(nd.readline(), "NTimePoints", int)

# Stage positions
self.do_stage = self._parse_line(nd.readline(), "DoStage", self._parse_bool)
if self.do_stage:
n_stage_positions = self._parse_line(nd.readline(), "NStagePositions", int)
self.stage_positions = []
for s in range(n_stage_positions):
self.stage_positions.append(self._parse_line(nd.readline(), f"Stage{s+1}", str))

# Wavelengths
self.do_wave = self._parse_line(nd.readline(), "DoWave", self._parse_bool)
if self.do_wave:
n_wavelengths = self._parse_line(nd.readline(), "NWavelengths", int)
self.wave_names = []
self.wave_do_z = []
for w in range(n_wavelengths):
self.wave_names.append(self._parse_line(nd.readline(), f"WaveName{w+1}", str))
self.wave_do_z.append(self._parse_line(nd.readline(), f"WaveDoZ{w+1}", self._parse_bool))

# Z steps
self.do_z = self._parse_line(nd.readline(), "DoZSeries", self._parse_bool)
self.n_z_steps = self._parse_line(nd.readline(), "NZSteps", int)
self.z_step_size = self._parse_line(nd.readline(), "ZStepSize", float)

self.wave_in_file_name = self._parse_line(nd.readline(), "WaveInFileName", self._parse_bool)

# End of file
last_line = nd.readline()
if last_line.strip(' "\n') != "EndFile":
message = f"Expected end of file, got: {last_line}"
raise ValueError(message)

def _parse_line(self, line: str, key: str, value_function: Callable):
tokens = line.split(",")
if tokens[0].strip(' "') != key:
message = f"Invalid nd file contents.\n\texpected: {key}\n\tgot: {line}"
raise ValueError(message)
return value_function(tokens[1].strip(' "\n'))

def _extract_version(self, value: str) -> str:
return value[8:]

def _parse_bool(self, value: str) -> bool:
return value.lower() == "true"

def _wavelengths(self):
for i, w in enumerate(self.wave_names):
if self.do_wave:
yield (
i,
w,
f"_w{i+1}{w}" if self.wave_in_file_name else "",
self.wave_do_z[i],
)

def _stage_positions(self):
for s, s_name in enumerate(self.stage_positions):
if self.do_stage:
yield s, s_name, f"_s{s+1}"

def _timepoints(self):
if self.do_timelapse:
for t in range(self.n_timepoints):
yield t, f"_t{t+1}"

def _get_path_channel_position_time(self):
for w_idx, w_name, w, has_z in list(self._wavelengths()) or [("", self.do_z)]:
for s_idx, s_name, s in list(self._stage_positions()) or [(0, None, "")]:
for t_idx, t in list(self._timepoints()) or [(0, "")]:
yield (
self.path.parent / (self.name + w + s + t + (".stk" if has_z else ".tif")),
w_idx,
w_name,
s_idx,
s_name,
t_idx,
)

def get_files(self) -> pd.DataFrame:
return pd.DataFrame.from_records(
self._get_path_channel_position_time(),
columns=[
"path",
"channel",
"channel_name",
"position",
"position_name",
"time",
],
)
93 changes: 93 additions & 0 deletions tests/resources/sample_2ch_75pos_361t.nd
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"NDInfoFile", Version 1.0
"Description", File recreated from images.
"StartTime1", 20240209 16:25:29.500
"DoTimelapse", TRUE
"NTimePoints", 361
"DoStage", TRUE
"NStagePositions", 75
"Stage1", "1"
"Stage2", "2"
"Stage3", "3"
"Stage4", "4"
"Stage5", "5"
"Stage6", "6"
"Stage7", "7"
"Stage8", "8"
"Stage9", "9"
"Stage10", "10"
"Stage11", "11"
"Stage12", "12"
"Stage13", "13"
"Stage14", "14"
"Stage15", "15"
"Stage16", "16"
"Stage17", "17"
"Stage18", "18"
"Stage19", "19"
"Stage20", "20"
"Stage21", "21"
"Stage22", "22"
"Stage23", "23"
"Stage24", "24"
"Stage25", "25"
"Stage26", "26"
"Stage27", "27"
"Stage28", "28"
"Stage29", "29"
"Stage30", "30"
"Stage31", "31"
"Stage32", "32"
"Stage33", "33"
"Stage34", "34"
"Stage35", "35"
"Stage36", "36"
"Stage37", "37"
"Stage38", "38"
"Stage39", "39"
"Stage40", "40"
"Stage41", "41"
"Stage42", "42"
"Stage43", "43"
"Stage44", "44"
"Stage45", "45"
"Stage46", "46"
"Stage47", "47"
"Stage48", "48"
"Stage49", "49"
"Stage50", "50"
"Stage51", "51"
"Stage52", "52"
"Stage53", "53"
"Stage54", "54"
"Stage55", "55"
"Stage56", "56"
"Stage57", "57"
"Stage58", "58"
"Stage59", "59"
"Stage60", "60"
"Stage61", "61"
"Stage62", "62"
"Stage63", "63"
"Stage64", "64"
"Stage65", "65"
"Stage66", "66"
"Stage67", "67"
"Stage68", "68"
"Stage69", "69"
"Stage70", "70"
"Stage71", "71"
"Stage72", "72"
"Stage73", "73"
"Stage74", "74"
"Stage75", "75"
"DoWave", TRUE
"NWavelengths", 2
"WaveName1", "BF-488-Cam1"
"WaveDoZ1", TRUE
"WaveName2", "488-BF-Cam0"
"WaveDoZ2", TRUE
"DoZSeries", TRUE
"NZSteps", 25
"ZStepSize", 2.00
"WaveInFileName", TRUE
"EndFile"
25 changes: 25 additions & 0 deletions tests/resources/sample_4ch_4pos.nd
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"NDInfoFile", Version 1.0
"Description", File recreated from images.
"StartTime1", 20230922 12:33:22.559
"DoTimelapse", FALSE
"DoStage", TRUE
"NStagePositions", 4
"Stage1", "Position1"
"Stage2", "Position2"
"Stage3", "Position3"
"Stage4", "Position4"
"DoWave", TRUE
"NWavelengths", 4
"WaveName1", "Conf640"
"WaveDoZ1", TRUE
"WaveName2", "Conf561"
"WaveDoZ2", TRUE
"WaveName3", "Conf488"
"WaveDoZ3", TRUE
"WaveName4", "Conf405"
"WaveDoZ4", TRUE
"DoZSeries", TRUE
"NZSteps", 42
"ZStepSize", 3
"WaveInFileName", TRUE
"EndFile"
68 changes: 68 additions & 0 deletions tests/test_nd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from pathlib import Path

import pytest

from metamorph_mda_parser.nd import NdInfo


@pytest.fixture
def sample_4ch_4pos():
return Path("tests/resources/sample_4ch_4pos.nd")


@pytest.fixture
def sample_2ch_75pos_361t():
return Path("tests/resources/sample_2ch_75pos_361t.nd")


def test_sample_4ch_4pos(sample_4ch_4pos):
nd_info = NdInfo(sample_4ch_4pos)

assert nd_info.version == "1.0"
assert nd_info.description == "File recreated from images."
assert not nd_info.do_timelapse
assert nd_info.do_stage
assert len(nd_info.stage_positions) == 4
assert nd_info.do_wave
assert len(nd_info.wave_names) == 4
assert nd_info.do_z
assert nd_info.n_z_steps == 42
assert nd_info.z_step_size == 3.0
assert nd_info.wave_in_file_name
assert nd_info.wave_names == ["Conf640", "Conf561", "Conf488", "Conf405"]

files = nd_info.get_files()

assert len(files) == 16
assert files["channel"].unique().tolist() == [0, 1, 2, 3]
assert files["channel_name"].unique().tolist() == nd_info.wave_names
assert files["position"].unique().tolist() == [0, 1, 2, 3]
assert files["position_name"].unique().tolist() == [
"Position1",
"Position2",
"Position3",
"Position4",
]
assert files["time"].unique().tolist() == [0]


def test_sample_2ch_75pos_361t(sample_2ch_75pos_361t):
nd_info = NdInfo(sample_2ch_75pos_361t)

assert nd_info.version == "1.0"
assert nd_info.description == "File recreated from images."
assert nd_info.do_timelapse
assert nd_info.n_timepoints == 361
assert nd_info.do_stage
assert len(nd_info.stage_positions) == 75
assert nd_info.do_wave
assert len(nd_info.wave_names) == 2
assert nd_info.do_z
assert nd_info.n_z_steps == 25
assert nd_info.z_step_size == 2.0
assert nd_info.wave_in_file_name
assert nd_info.wave_names == ["BF-488-Cam1", "488-BF-Cam0"]

files = nd_info.get_files()

assert len(files) == 54150

0 comments on commit bc1c02d

Please sign in to comment.