Skip to content

Commit

Permalink
feat: output file taps input file, introcude --drain-basins CLI option
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed May 28, 2024
1 parent 5e2d511 commit a4f45e9
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 5 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
0.4.0
- feat: CLI by default creates output file that contains no redundant
data and uses the input file as a basin; old behavior can be brought
back with the "--drain-basins" command-line option
- fix: show correct data PPID in CLI
- setup: bump dcnum from 0.19.1 to 0.20.1
0.3.1
Expand Down
9 changes: 7 additions & 2 deletions chipstream/cli/cli_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@
"You can also specify a step size (e.g. '5000-7000-2' for "
"every second event). The convention follows Python slices "
"with 'n' substituting for 'None'.")
@click.option("--drain-basins", type=str, is_flag=True,
help="Write all basin features from input to output file. This "
"option trades computation time and small file size for "
"an output file that contains all available features.")
@click.option("-r", "--recursive", is_flag=True,
help="Recurse into subdirectories.")
@click.option("--num-cpus",
Expand All @@ -129,6 +133,7 @@ def chipstream_cli(
gate_kwargs=None,
pixel_size=0,
limit_events="0",
drain_basins=False,
recursive=False,
num_cpus=None,
dry_run=False,
Expand All @@ -142,7 +147,6 @@ def chipstream_cli(
verbose = True

# Parse limit_frames to get the HDF5Data index_mapping

if limit_events == "0":
index_mapping = None
elif limit_events.count("-"):
Expand Down Expand Up @@ -175,8 +179,9 @@ def chipstream_cli(
feature_kwargs=feature_kwargs,
gate_kwargs=gate_kwargs,
pixel_size=pixel_size,
# Below this line are arguments that do not define the pipeline ID
index_mapping=index_mapping,
# Below this line are arguments that do not define the pipeline ID
basin_strategy="drain" if drain_basins else "tap",
num_cpus=num_cpus or mp.cpu_count(),
dry_run=dry_run,
debug=debug,
Expand Down
7 changes: 4 additions & 3 deletions chipstream/cli/cli_proc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pathlib
import time
from typing import List
from typing import List, Literal

import click
import dcnum.logic
Expand All @@ -25,8 +25,9 @@ def process_dataset(
feature_kwargs: List[str],
gate_kwargs: List[str],
pixel_size: float,
# Below this line are arguments that do not affect the pipeline ID
index_mapping: int | slice | None,
# Below this line are arguments that do not affect the pipeline ID
basin_strategy: Literal["drain", "tap"],
num_cpus: int,
dry_run: bool,
debug: bool,
Expand Down Expand Up @@ -99,7 +100,7 @@ def process_dataset(
feature_kwargs=feat_kwargs,
gate_code=gate_cls.get_ppid_code(),
gate_kwargs=gate_kwargs,
basin_strategy="drain",
basin_strategy=basin_strategy,
num_procs=num_cpus,
debug=debug,
)
Expand Down
34 changes: 34 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,39 @@
from chipstream.cli import cli_main # noqa: E402


@pytest.mark.parametrize("drain", [True, False])
def test_cli_basins(cli_runner, drain):
path_temp = retrieve_data(
"fmt-hdf5_cytoshot_full-features_legacy_allev_2023.zip")
path = path_temp.with_name("input_path.rtdc")

# create a test file for more than 100 events
with dcnum.read.concatenated_hdf5_data(
paths=3*[path_temp],
path_out=path,
compute_frame=True):
pass

path_out = path.with_name("with_pixel_size_dcn.rtdc")
args = [str(path),
str(path_out),
"-s", "thresh",
]
if drain:
args.append("--drain-basins")
result = cli_runner.invoke(cli_main.chipstream_cli, args)
assert result.exit_code == 0

with h5py.File(path_out) as h5:
for feat in ["image", "frame"]:
if drain:
assert feat in h5["events"]
else:
assert feat not in h5["events"]
for feat in ["mask", "deform", "aspect"]:
assert feat in h5["events"]


@pytest.mark.parametrize("limit_events,dcnum_mapping,dcnum_yield,f0", [
# this is the default
["0", "0", 36, 1],
Expand Down Expand Up @@ -46,6 +79,7 @@ def test_cli_limit_events(cli_runner, limit_events, dcnum_yield,
str(path_out),
"-s", "thresh",
"--limit-events", limit_events,
"--drain-basins",
])
assert result.exit_code == 0

Expand Down

0 comments on commit a4f45e9

Please sign in to comment.