From be87111399ec58affbe7372c03053649110e01d8 Mon Sep 17 00:00:00 2001 From: rhoadesScholar Date: Wed, 20 Mar 2024 15:49:06 -0400 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20=E2=9C=A8=20Generalize=20get=5Fview?= =?UTF-8?q?er=20util?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dacapo/examples/utils.py | 112 ++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 60 deletions(-) diff --git a/dacapo/examples/utils.py b/dacapo/examples/utils.py index e4268590d..617cb5127 100644 --- a/dacapo/examples/utils.py +++ b/dacapo/examples/utils.py @@ -19,51 +19,71 @@ def get_viewer( - raw_array: gp.Array | Array | ZarrArray, - labels_array: gp.Array | Array | ZarrArray, - pred_array: Optional[gp.Array | Array | ZarrArray] = None, - pred_labels_array: Optional[gp.Array | Array | ZarrArray] = None, - width: int = 1500, - height: int = 600, -) -> IFrame: - arrays = { - "raw": raw_array, - "labels": labels_array, - } - if pred_array is not None: - arrays["pred"] = pred_array - if pred_labels_array is not None: - arrays["pred_labels"] = pred_labels_array - - data = {} - voxel_sizes = {} - for name, array in arrays.items(): + arrays: dict, width: int = 1500, height: int = 600, headless: bool = True +) -> neuroglancer.Viewer | IFrame: + for name, array_data in arrays.items(): + array = array_data["array"] if hasattr(array, "to_ndarray"): - data[name] = array.to_ndarray() + arrays[name]["array"] = array.to_ndarray() else: - data[name] = array.data + arrays[name]["array"] = array.data if hasattr(array, "voxel_size"): - voxel_sizes[name] = array.voxel_size + arrays[name]["voxel_sizes"] = array.voxel_size else: - voxel_sizes[name] = array.spec.voxel_size + arrays[name]["voxel_sizes"] = array.spec.voxel_size neuroglancer.set_server_bind_address("0.0.0.0") viewer = neuroglancer.Viewer() with viewer.txn() as state: state.showSlices = False - add_seg_layer(state, "labels", data["labels"], voxel_sizes["labels"]) + for name, array_data in arrays.items(): + meshes = "meshes" in array_data and array_data["meshes"] + is_seg = "is_seg" in array_data and array_data["is_seg"] + if is_seg: + add_seg_layer( + state, name, array_data["array"], array_data["voxel_sizes"], meshes + ) + else: + add_scalar_layer( + state, name, array_data["array"], array_data["voxel_sizes"] + ) - add_scalar_layer(state, "raw", data["raw"], voxel_sizes["raw"]) + if headless: + return viewer + else: + return IFrame(src=viewer, width=width, height=height) - if "pred" in data: - add_scalar_layer(state, "pred", data["pred"], voxel_sizes["pred"]) - if "pred_labels" in data: - add_seg_layer( - state, "pred_labels", data["pred_labels"], voxel_sizes["pred_labels"] - ) +def add_seg_layer(state, name, data, voxel_size, meshes=False): + if meshes: + kwargs = {"segments": np.unique(data[data > 0])} + else: + kwargs = {} + state.layers[name] = neuroglancer.SegmentationLayer( + # segments=[str(i) for i in np.unique(data[data > 0])], # this line will cause all objects to be selected and thus all meshes to be generated...will be slow if lots of high res meshes + source=neuroglancer.LocalVolume( + data=data, + dimensions=neuroglancer.CoordinateSpace( + names=["z", "y", "x"], + units=["nm", "nm", "nm"], + scales=voxel_size, + ), + ), + **kwargs, + ) + - return IFrame(src=viewer, width=width, height=height) +def add_scalar_layer(state, name, data, voxel_size): + state.layers[name] = neuroglancer.ImageLayer( + source=neuroglancer.LocalVolume( + data=data, + dimensions=neuroglancer.CoordinateSpace( + names=["z", "y", "x"], + units=["nm", "nm", "nm"], + scales=voxel_size, + ), + ), + ) class NeuroglancerRunViewer: @@ -214,31 +234,3 @@ def update_with_new_validation_if_possible(self): self.most_recent_iteration, validation_dataset.name, ) - - -def add_seg_layer(state, name, data, voxel_size): - state.layers[name] = neuroglancer.SegmentationLayer( - # segments=[str(i) for i in np.unique(data[data > 0])], # this line will cause all objects to be selected and thus all meshes to be generated...will be slow if lots of high res meshes - source=neuroglancer.LocalVolume( - data=data, - dimensions=neuroglancer.CoordinateSpace( - names=["z", "y", "x"], - units=["nm", "nm", "nm"], - scales=voxel_size, - ), - ), - segments=np.unique(data[data > 0]), - ) - - -def add_scalar_layer(state, name, data, voxel_size): - state.layers[name] = neuroglancer.ImageLayer( - source=neuroglancer.LocalVolume( - data=data, - dimensions=neuroglancer.CoordinateSpace( - names=["z", "y", "x"], - units=["nm", "nm", "nm"], - scales=voxel_size, - ), - ), - ) From 7f44619f6461b8dd83fa0814e6cadc70ee8673ba Mon Sep 17 00:00:00 2001 From: rhoadesScholar Date: Wed, 20 Mar 2024 16:56:00 -0400 Subject: [PATCH 2/2] =?UTF-8?q?feat:=20=E2=9C=A8=20Update=20synthetic=20ex?= =?UTF-8?q?ample=20notebook.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../distance_task/synthetic_example.ipynb | 300 +++++++++++++----- .../distance_task/synthetic_example.py | 75 +++-- dacapo/examples/utils.py | 2 +- 3 files changed, 274 insertions(+), 103 deletions(-) diff --git a/dacapo/examples/distance_task/synthetic_example.ipynb b/dacapo/examples/distance_task/synthetic_example.ipynb index 5673ea26e..3c8d0cfe5 100644 --- a/dacapo/examples/distance_task/synthetic_example.ipynb +++ b/dacapo/examples/distance_task/synthetic_example.ipynb @@ -70,33 +70,57 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating FileConfigStore:\n", + "\tpath: /nrs/cellmap/rhoadesj/dacapo_runs/configs\n" + ] + } + ], "source": [ "# First we need to create a config store to store our configurations\n", "from dacapo.store.create_store import create_config_store\n", "\n", "config_store = create_config_store()\n", - "\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Then let's make sure we have data to train on. If this is already provided, you can skip to the Datasplit section." + "\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ + "# Then let's make sure we have data to train on. If this is already provided, you can skip to the Datasplit section.\n", + "\n", "from pathlib import Path\n", "from dacapo import Options\n", "from dacapo.examples.utils import get_viewer\n", @@ -127,15 +151,41 @@ " raw_array = open_ds(str(train_data_path), \"raw\")\n", " labels_array = open_ds(str(train_data_path), \"labels\")\n", "\n", - "get_viewer(raw_array, labels_array)\n", - "" + "arrays = {\n", + " \"raw\": {\"array\": raw_array},\n", + " \"labels\": {\"array\": labels_array, \"meshes\": True},\n", + "}\n", + "get_viewer(arrays, headless=False)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Then for validation data\n", "validate_data_path = Path(runs_base_dir, \"example_validate.zarr\")\n", @@ -152,9 +202,63 @@ " overwrite=True,\n", " num_workers=num_workers,\n", " )\n", + "arrays = {\n", + " \"raw\": {\"array\": raw_array},\n", + " \"labels\": {\"array\": labels_array, \"meshes\": True},\n", + "}\n", + "get_viewer(arrays, headless=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Then let's make some test data\n", + "test_data_path = Path(runs_base_dir, \"example_test.zarr\")\n", + "try:\n", + " assert not force_example_creation\n", + " raw_array = open_ds(str(test_data_path), \"raw\")\n", + " labels_array = open_ds(str(test_data_path), \"labels\")\n", + "except:\n", + " test_shape = Coordinate((152, 152, 152)) * 5\n", + " generate_synthetic_dataset(\n", + " test_data_path,\n", + " shape=test_shape,\n", + " overwrite=True,\n", + " write_shape=Coordinate((152, 152, 152)),\n", + " num_workers=num_workers,\n", + " )\n", "\n", - "get_viewer(raw_array, labels_array)\n", - "" + "arrays = {\n", + " \"raw\": {\"array\": raw_array},\n", + " \"labels\": {\"array\": labels_array, \"meshes\": True},\n", + "}\n", + "get_viewer(arrays, headless=False)\n" ] }, { @@ -169,9 +273,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:dacapo.experiments.datasplits.datasplit_generator: No targets specified, using all classes in the dataset as target ['labels'].\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Neuroglancer link: http://rhoadesj-ws1.hhmi.org:32941/v/d893eb501d1df982c9931ec02a68ee97a11888a5/\n" + ] + } + ], "source": [ "from pathlib import Path\n", "from dacapo.experiments.datasplits import DataSplitGenerator\n", @@ -187,8 +306,7 @@ "\n", "datasplit = datasplit_config.datasplit_type(datasplit_config)\n", "viewer = datasplit._neuroglancer()\n", - "config_store.store_datasplit_config(datasplit_config)\n", - "" + "config_store.store_datasplit_config(datasplit_config)\n" ] }, { @@ -283,7 +401,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -296,8 +414,7 @@ " tol_distance=80.0,\n", " scale_factor=160.0,\n", ")\n", - "config_store.store_task_config(task_config)\n", - "" + "config_store.store_task_config(task_config)\n" ] }, { @@ -311,7 +428,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -331,8 +448,7 @@ " config_store.store_architecture_config(architecture_config)\n", "except:\n", " config_store.delete_architecture_config(architecture_config.name)\n", - " config_store.store_architecture_config(architecture_config)\n", - "" + " config_store.store_architecture_config(architecture_config)\n" ] }, { @@ -346,7 +462,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -379,8 +495,7 @@ " min_masked=0.05,\n", " clip_raw=True,\n", ")\n", - "config_store.store_trainer_config(trainer_config)\n", - "" + "config_store.store_trainer_config(trainer_config)\n" ] }, { @@ -393,9 +508,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "example_synthetic_distance_run\n" + ] + } + ], "source": [ "from dacapo.experiments import RunConfig\n", "from dacapo.experiments.run import Run\n", @@ -409,7 +532,7 @@ "# )\n", "\n", "iterations = 2000\n", - "validation_interval = iterations // 2\n", + "validation_interval = 200 # iterations // 2\n", "repetitions = 1\n", "for i in range(repetitions):\n", " run_config = RunConfig(\n", @@ -442,8 +565,7 @@ " except:\n", " config_store.delete_run_config(run_config.name)\n", " config_store.store_run_config(run_config)\n", - "\n", - "" + "\n" ] }, { @@ -457,19 +579,51 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating FileConfigStore:\n", + "\tpath: /nrs/cellmap/rhoadesj/dacapo_runs/configs\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from dacapo.train import train_run\n", "from dacapo.experiments.run import Run\n", "from dacapo.store.create_store import create_config_store\n", "\n", "config_store = create_config_store()\n", - "\n", "run = Run(config_store.retrieve_run_config(run_config.name))\n", - "train_run(run)\n", - "" + "\n", + "# Now run\n", + "train_run(run)" ] }, { @@ -496,8 +650,7 @@ "source": [ "from dacapo.validate import validate\n", "\n", - "validate(run_config.name, iterations, num_workers=16, overwrite=True)\n", - "" + "validate(run_config.name, iterations, num_workers=1, overwrite=True)\n" ] }, { @@ -508,32 +661,6 @@ " Once you have trained and validated your model, you can use it to predict on new data. You can use the `dacapo.predict` function to do this. You can also use the command line interface to predict on a run: dacapo predict -r {run_config.name} -i {iteration} -ic {input_container} -id {input_dataset} -op {output_path}" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# First let's make some test data\n", - "test_data_path = Path(runs_base_dir, \"example_test.zarr\")\n", - "try:\n", - " assert not force_example_creation\n", - " raw_array = open_ds(str(test_data_path), \"raw\")\n", - " labels_array = open_ds(str(test_data_path), \"labels\")\n", - "except:\n", - " test_shape = Coordinate((152, 152, 152)) * 5\n", - " generate_synthetic_dataset(\n", - " test_data_path,\n", - " shape=test_shape,\n", - " overwrite=True,\n", - " write_shape=Coordinate((152, 152, 152)),\n", - " num_workers=num_workers,\n", - " )\n", - "\n", - "get_viewer(raw_array, labels_array)\n", - "" - ] - }, { "cell_type": "code", "execution_count": null, @@ -542,6 +669,8 @@ "source": [ "from dacapo.predict import predict\n", "\n", + "# test_data_path = Path(runs_base_dir, \"example_test.zarr\")\n", + "\n", "predict(\n", " run_config.name,\n", " iterations,\n", @@ -553,13 +682,26 @@ " output_dtype=\"float32\",\n", " output_roi=raw_array.roi,\n", ")\n", - "" + "\n", + "raw_array = open_ds(str(test_data_path), \"raw\")\n", + "pred_array = open_ds(str(test_data_path), \"predictions\")\n", + "gt_array = open_ds(str(test_data_path), \"labels\")\n", + "\n", + "arrays = {\n", + " \"raw\": {\"array\": raw_array},\n", + " \"labels\": {\"array\": gt_array, \"meshes\": True},\n", + " \"predictions\": {\"array\": pred_array},\n", + "}\n", + "get_viewer(arrays, headless=False) " ] } ], - "nbformat": 4, - "nbformat_minor": 2, "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, "language_info": { "codemirror_mode": { "name": "ipython", @@ -570,7 +712,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": 3 + "version": "3.10.13" } - } -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/dacapo/examples/distance_task/synthetic_example.py b/dacapo/examples/distance_task/synthetic_example.py index a8e75498e..f89f66fb8 100644 --- a/dacapo/examples/distance_task/synthetic_example.py +++ b/dacapo/examples/distance_task/synthetic_example.py @@ -93,8 +93,11 @@ ) raw_array = open_ds(str(train_data_path), "raw") labels_array = open_ds(str(train_data_path), "labels") - -get_viewer(raw_array, labels_array) +arrays = { + "raw": {"array": raw_array}, + "labels": {"array": labels_array, "meshes": True}, +} +get_viewer(arrays, headless=False) # %% # Then for validation data @@ -104,7 +107,7 @@ raw_array = open_ds(str(validate_data_path), "raw") labels_array = open_ds(str(validate_data_path), "labels") except: - validate_shape = Coordinate((152, 152, 152)) * 3 + validate_shape = Coordinate((152, 152, 152)) * 1 generate_synthetic_dataset( validate_data_path, shape=validate_shape, @@ -113,7 +116,34 @@ num_workers=num_workers, ) -get_viewer(raw_array, labels_array) +arrays = { + "raw": {"array": raw_array}, + "labels": {"array": labels_array, "meshes": True}, +} +get_viewer(arrays, headless=False) + +# %% +# Then let's make some test data +test_data_path = Path(runs_base_dir, "example_test.zarr") +try: + assert not force_example_creation + raw_array = open_ds(str(test_data_path), "raw") + labels_array = open_ds(str(test_data_path), "labels") +except: + test_shape = Coordinate((152, 152, 152)) * 3 + generate_synthetic_dataset( + test_data_path, + shape=test_shape, + overwrite=True, + write_shape=Coordinate((152, 152, 152)), + num_workers=num_workers, + ) + +arrays = { + "raw": {"array": raw_array}, + "labels": {"array": labels_array, "meshes": True}, +} +get_viewer(arrays, headless=False) # %% [markdown] # ## Datasplit @@ -356,10 +386,15 @@ from dacapo.train import train_run from dacapo.experiments.run import Run from dacapo.store.create_store import create_config_store +from dacapo.examples.utils import NeuroglancerRunViewer config_store = create_config_store() - run = Run(config_store.retrieve_run_config(run_config.name)) + +# Visualize as we go +run_viewer = NeuroglancerRunViewer(run) +run_viewer.start() +# %% train_run(run) # %% [markdown] @@ -382,25 +417,6 @@ # ## Predict # Once you have trained and validated your model, you can use it to predict on new data. You can use the `dacapo.predict` function to do this. You can also use the command line interface to predict on a run: dacapo predict -r {run_config.name} -i {iteration} -ic {input_container} -id {input_dataset} -op {output_path} -# %% -# First let's make some test data -test_data_path = Path(runs_base_dir, "example_test.zarr") -try: - assert not force_example_creation - raw_array = open_ds(str(test_data_path), "raw") - labels_array = open_ds(str(test_data_path), "labels") -except: - test_shape = Coordinate((152, 152, 152)) * 5 - generate_synthetic_dataset( - test_data_path, - shape=test_shape, - overwrite=True, - write_shape=Coordinate((152, 152, 152)), - num_workers=num_workers, - ) - -get_viewer(raw_array, labels_array) - # %% from dacapo.predict import predict @@ -416,3 +432,14 @@ output_dtype="float32", output_roi=raw_array.roi, ) + +raw_array = open_ds(str(test_data_path), "raw") +pred_array = open_ds(str(test_data_path), "predictions") +gt_array = open_ds(str(test_data_path), "labels") + +arrays = { + "raw": {"array": raw_array}, + "labels": {"array": gt_array, "meshes": True}, + "predictions": {"array": pred_array}, +} +get_viewer(arrays, headless=False) diff --git a/dacapo/examples/utils.py b/dacapo/examples/utils.py index 617cb5127..82c9aa2d9 100644 --- a/dacapo/examples/utils.py +++ b/dacapo/examples/utils.py @@ -38,7 +38,7 @@ def get_viewer( state.showSlices = False for name, array_data in arrays.items(): meshes = "meshes" in array_data and array_data["meshes"] - is_seg = "is_seg" in array_data and array_data["is_seg"] + is_seg = meshes or ("is_seg" in array_data and array_data["is_seg"]) if is_seg: add_seg_layer( state, name, array_data["array"], array_data["voxel_sizes"], meshes