diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..9bc9e6c39 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.ipynb \ No newline at end of file diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index a9ebfdec7..fc1d4fa03 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -1,6 +1,12 @@ name: black-action -on: [push, pull_request] +on: + pull_request: + branches: + - main + push: + branches: + - main jobs: linter_name: diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index d8d7b388d..97e21aef8 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -1,6 +1,12 @@ name: Generate Pages -on: [push, pull_request] +on: + pull_request: + branches: + - main + push: + branches: + - main jobs: docs: diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index 5c091f8dd..125d00529 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -1,6 +1,12 @@ name: Python mypy -on: [push, pull_request] +on: + pull_request: + branches: + - main + push: + branches: + - main jobs: static-analysis: diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index fa581b97d..efa119c1e 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -34,4 +34,7 @@ jobs: pip install -r requirements-dev.txt - name: Test - run: pytest tests -s -v --color=yes + run: pytest --color=yes --cov --cov-report=xml --cov-report=term-missing + + - name: Coverage + uses: codecov/codecov-action@v3 \ No newline at end of file diff --git a/examples/distance_task/finetune_liver_many.ipynb b/examples/distance_task/finetune_liver_many.ipynb new file mode 100644 index 000000000..dbaf94fe7 --- /dev/null +++ b/examples/distance_task/finetune_liver_many.ipynb @@ -0,0 +1,745 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dacapo" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import PosixPath\n", + "from dacapo.experiments.datasplits.datasets.arrays import (\n", + " BinarizeArrayConfig,\n", + " IntensitiesArrayConfig,\n", + " MissingAnnotationsMaskConfig,\n", + " ResampledArrayConfig,\n", + " ZarrArrayConfig,\n", + ")\n", + "from dacapo.experiments.tasks import DistanceTaskConfig\n", + "from dacapo.experiments.architectures import CNNectomeUNetConfig\n", + "from dacapo.experiments.trainers import GunpowderTrainerConfig\n", + "from dacapo.experiments.trainers.gp_augments import (\n", + " ElasticAugmentConfig,\n", + " GammaAugmentConfig,\n", + " IntensityAugmentConfig,\n", + " IntensityScaleShiftAugmentConfig,\n", + ")\n", + "from dacapo.experiments.datasplits import TrainValidateDataSplitConfig\n", + "from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig\n", + "from dacapo.experiments.starts import StartConfig\n", + "from dacapo.experiments import RunConfig" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Config Store" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.store.create_store import create_config_store\n", + "config_store = create_config_store()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "task_config = DistanceTaskConfig(\n", + " name=\"example_distances_4nm_many\",\n", + " channels=[\n", + " \"ecs\",\n", + " \"plasma_membrane\",\n", + " \"mito\",\n", + " \"mito_membrane\",\n", + " \"vesicle\",\n", + " \"vesicle_membrane\",\n", + " \"mvb\",\n", + " \"mvb_membrane\",\n", + " \"er\",\n", + " \"er_membrane\",\n", + " \"eres\",\n", + " \"nucleus\",\n", + " \"microtubules\",\n", + " \"microtubules_out\",\n", + " ],\n", + " clip_distance=40.0,\n", + " tol_distance=40.0,\n", + " scale_factor=80.0,\n", + " mask_distances=True,\n", + ")\n", + "config_store.store_task_config(task_config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Architecture" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "architecture_config = CNNectomeUNetConfig(\n", + " name=\"example_upsample-unet\",\n", + " input_shape=(216, 216, 216),\n", + " fmaps_out=72,\n", + " fmaps_in=1,\n", + " num_fmaps=12,\n", + " fmap_inc_factor=6,\n", + " downsample_factors=[(2, 2, 2), (3, 3, 3), (3, 3, 3)],\n", + " kernel_size_down=None,\n", + " kernel_size_up=None,\n", + " eval_shape_increase=(72, 72, 72),\n", + " upsample_factors=[(2, 2, 2)],\n", + " constant_upsample=True,\n", + " padding=\"valid\",\n", + ")\n", + "config_store.store_architecture_config(architecture_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "trainer_config = GunpowderTrainerConfig(\n", + " name=\"example_default\",\n", + " batch_size=2,\n", + " learning_rate=0.0001,\n", + " num_data_fetchers=20,\n", + " augments=[\n", + " ElasticAugmentConfig(\n", + " control_point_spacing=[100, 100, 100],\n", + " control_point_displacement_sigma=[10.0, 10.0, 10.0],\n", + " rotation_interval=(0.0, 1.5707963267948966),\n", + " subsample=8,\n", + " uniform_3d_rotation=True,\n", + " ),\n", + " IntensityAugmentConfig(scale=(0.25, 1.75), shift=(-0.5, 0.35), clip=True),\n", + " GammaAugmentConfig(gamma_range=(0.5, 2.0)),\n", + " IntensityScaleShiftAugmentConfig(scale=2.0, shift=-1.0),\n", + " ],\n", + " snapshot_interval=10000,\n", + " min_masked=0.05,\n", + " clip_raw=True,\n", + ")\n", + "config_store.store_trainer_config(trainer_config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Datasplit" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "datasplit_config = TrainValidateDataSplitConfig(\n", + " name=\"example_jrc_mus-liver-zon-1_many_4nm\",\n", + " train_configs=[\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver-zon-1_266_many_4nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"em/fibsem-uint8/s0\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_266_many_4nm_gt\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_266_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop266/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver-zon-1_266_many_4nm_mask\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_266_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop266/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver-zon-1_267_many_4nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"em/fibsem-uint8/s0\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_267_many_4nm_gt\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_267_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop267/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver-zon-1_267_many_4nm_mask\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_267_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop267/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver-zon-1_268_many_4nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"em/fibsem-uint8/s0\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_268_many_4nm_gt\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_268_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop268/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver-zon-1_268_many_4nm_mask\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_268_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop268/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " ],\n", + " validate_configs=[\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_many_4nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"em/fibsem-uint8/s0\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_many_4nm_gt\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_gt_resampled_4nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop270/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " upsample=(2, 2, 2),\n", + " downsample=(0, 0, 0),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_many_4nm_mask\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_gt_resampled_4nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_270_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop270/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " upsample=(2, 2, 2),\n", + " downsample=(0, 0, 0),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver-zon-1_272_many_4nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"em/fibsem-uint8/s0\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_272_many_4nm_gt\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_272_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop272/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver-zon-1_272_many_4nm_mask\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_272_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop272/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver-zon-1_279_many_4nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"em/fibsem-uint8/s0\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_279_many_4nm_gt\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_279_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop279/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver-zon-1_279_many_4nm_mask\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver-zon-1_279_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop279/labels//all\",\n", + " snap_to_grid=(8, 8, 8),\n", + " axes=None,\n", + " ),\n", + " groupings=[\n", + " (\"ecs\", [1]),\n", + " (\"plasma_membrane\", [2]),\n", + " (\"mito\", [3, 4, 5]),\n", + " (\"mito_membrane\", [3]),\n", + " (\"vesicle\", [8, 9]),\n", + " (\"vesicle_membrane\", [8]),\n", + " (\"mvb\", [10, 11]),\n", + " (\"mvb_membrane\", [10]),\n", + " (\"er\", [16, 17, 18, 19, 20, 21, 22, 23]),\n", + " (\"er_membrane\", [16, 18, 20]),\n", + " (\"eres\", [18, 19]),\n", + " (\"nucleus\", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),\n", + " (\"microtubules\", [30, 31, 36]),\n", + " (\"microtubules_out\", [30]),\n", + " ],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " ],\n", + ")\n", + "config_store.store_datasplit_config(datasplit_config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "example_finetuned_example_distances_4nm_many_example_upsample-unet_example_default_example_jrc_mus-liver-zon-1_many_4nm__0\n", + "example_finetuned_example_distances_4nm_many_example_upsample-unet_example_default_example_jrc_mus-liver-zon-1_many_4nm__1\n", + "example_finetuned_example_distances_4nm_many_example_upsample-unet_example_default_example_jrc_mus-liver-zon-1_many_4nm__2\n" + ] + } + ], + "source": [ + "start_config = StartConfig(\n", + " \"setup04\",\n", + " \"best\",\n", + ")\n", + "iterations = 200000\n", + "validation_interval = 5000\n", + "repetitions = 3\n", + "run_configs = []\n", + "for i in range(repetitions):\n", + " run_config = RunConfig(\n", + " name=(\"_\").join(\n", + " [\n", + " \"example\",\n", + " \"scratch\" if start_config is None else \"finetuned\",\n", + " task_config.name,\n", + " architecture_config.name,\n", + " trainer_config.name,\n", + " datasplit_config.name,\n", + " ]\n", + " )\n", + " + f\"__{i}\",\n", + " task_config=task_config,\n", + " architecture_config=architecture_config,\n", + " trainer_config=trainer_config,\n", + " datasplit_config=datasplit_config,\n", + " num_iterations=iterations,\n", + " validation_interval=validation_interval,\n", + " repetition=i,\n", + " start_config=start_config,\n", + " )\n", + " config_store.store_run_config(run_config)\n", + " print(run_config.name)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "plasmodesmata_dacapo", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/distance_task/liver_peroxisome.ipynb b/examples/distance_task/liver_peroxisome.ipynb new file mode 100644 index 000000000..fe0c39934 --- /dev/null +++ b/examples/distance_task/liver_peroxisome.ipynb @@ -0,0 +1,531 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dacapo\n", + "\n", + "DaCapo is a framework that allows for easy configuration and execution of established machine learning techniques on arbitrarily large volumes of multi-dimensional images.\n", + "\n", + "DaCapo has 4 major configurable components:\n", + "1. **dacapo.datasplits.DataSplit**\n", + "\n", + "2. **dacapo.tasks.Task**\n", + "\n", + "3. **dacapo.architectures.Architecture**\n", + "\n", + "4. **dacapo.trainers.Trainer**\n", + "\n", + "These are then combined in a single **dacapo.experiments.Run** that includes your starting point (whether you want to start training from scratch or continue off of a previously trained model) and stopping criterion (the number of iterations you want to train)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environment setup\n", + "If you have not already done so, you will need to install DaCapo. You can do this by first creating a new environment and then installing DaCapo using pip.\n", + "\n", + "```bash\n", + "conda create -n dacapo python=3.10\n", + "conda activate dacapo\n", + "```\n", + "\n", + "Then, you can install DaCapo using pip, via GitHub:\n", + "\n", + "```bash\n", + "pip install git+https://github.com/janelia-cellmap/dacapo.git\n", + "```\n", + "\n", + "Or you can clone the repository and install it locally:\n", + "\n", + "```bash\n", + "git clone https://github.com/janelia-cellmap/dacapo.git\n", + "cd dacapo\n", + "pip install -e .\n", + "```\n", + "\n", + "Be sure to select this environment in your Jupyter notebook or JupyterLab." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Config Store\n", + "To define where the data goes, create a dacapo.yaml configuration file either in `~/.config/dacapo/dacapo.yaml` or in `./dacapo.yaml`. Here is a template:\n", + "\n", + "```yaml\n", + "mongodbhost: mongodb://dbuser:dbpass@dburl:dbport/\n", + "mongodbname: dacapo\n", + "runs_base_dir: /path/to/my/data/storage\n", + "```\n", + "The runs_base_dir defines where your on-disk data will be stored. The mongodbhost and mongodbname define the mongodb host and database that will store your cloud data. If you want to store everything on disk, replace mongodbhost and mongodbname with a single type `files` and everything will be saved to disk:\n", + "\n", + "```yaml\n", + "type: files\n", + "runs_base_dir: /path/to/my/data/storage\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.store.create_store import create_config_store\n", + "\n", + "config_store = create_config_store()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Datasplit\n", + "Where can you find your data? What format is it in? Does it need to be normalized? What data do you want to use for validation?" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.experiments.datasplits.datasets.arrays import (\n", + " BinarizeArrayConfig,\n", + " IntensitiesArrayConfig,\n", + " MissingAnnotationsMaskConfig,\n", + " ResampledArrayConfig,\n", + " ZarrArrayConfig,\n", + ")\n", + "from dacapo.experiments.datasplits import TrainValidateDataSplitConfig\n", + "from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig\n", + "from pathlib import PosixPath\n", + "\n", + "datasplit_config = TrainValidateDataSplitConfig(\n", + " name=\"example_jrc_mus-livers_peroxisome_8nm\",\n", + " train_configs=[\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver_124_peroxisome_8nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver_s1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/raw/s1\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver_124_peroxisome_8nm_gt\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver_124_gt_resampled_8nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_124_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop124/labels//all\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " upsample=(0, 0, 0),\n", + " downsample=(2, 2, 2),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[(\"peroxisome\", [47, 48])],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver_124_peroxisome_8nm_mask\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver_124_gt_resampled_8nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_124_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop124/labels//all\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " upsample=(0, 0, 0),\n", + " downsample=(2, 2, 2),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[(\"peroxisome\", [47, 48])],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver_125_peroxisome_8nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver_s1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/raw/s1\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver_125_peroxisome_8nm_gt\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver_125_gt_resampled_8nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_125_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop125/labels//all\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " upsample=(0, 0, 0),\n", + " downsample=(2, 2, 2),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[(\"peroxisome\", [47, 48])],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver_125_peroxisome_8nm_mask\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver_125_gt_resampled_8nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_125_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop125/labels//all\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " upsample=(0, 0, 0),\n", + " downsample=(2, 2, 2),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[(\"peroxisome\", [47, 48])],\n", + " ),\n", + " sample_points=None,\n", + " ),\n", + " ],\n", + " validate_configs=[\n", + " RawGTDatasetConfig(\n", + " name=\"jrc_mus-liver_145_peroxisome_8nm\",\n", + " weight=1,\n", + " raw_config=IntensitiesArrayConfig(\n", + " name=\"jrc_mus-liver_s1_raw\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_raw_uint8\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/raw/s1\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " min=0.0,\n", + " max=255.0,\n", + " ),\n", + " gt_config=BinarizeArrayConfig(\n", + " name=\"jrc_mus-liver_145_peroxisome_8nm_gt\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver_145_gt_resampled_8nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_145_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop145/labels//all\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " upsample=(0, 0, 0),\n", + " downsample=(2, 2, 2),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[(\"peroxisome\", [47, 48])],\n", + " background=0,\n", + " ),\n", + " mask_config=MissingAnnotationsMaskConfig(\n", + " name=\"jrc_mus-liver_145_peroxisome_8nm_mask\",\n", + " source_array_config=ResampledArrayConfig(\n", + " name=\"jrc_mus-liver_145_gt_resampled_8nm\",\n", + " source_array_config=ZarrArrayConfig(\n", + " name=\"jrc_mus-liver_145_gt\",\n", + " file_name=PosixPath(\n", + " \"/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5\"\n", + " ),\n", + " dataset=\"volumes/groundtruth/crop145/labels//all\",\n", + " snap_to_grid=(16, 16, 16),\n", + " axes=None,\n", + " ),\n", + " upsample=(0, 0, 0),\n", + " downsample=(2, 2, 2),\n", + " interp_order=False,\n", + " ),\n", + " groupings=[(\"peroxisome\", [47, 48])],\n", + " ),\n", + " sample_points=None,\n", + " )\n", + " ],\n", + ")\n", + "\n", + "config_store.store_datasplit_config(datasplit_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task\n", + "What do you want to learn? An instance segmentation? If so, how? Affinities,\n", + "Distance Transform, Foreground/Background, etc. Each of these tasks are commonly learned\n", + "and evaluated with specific loss functions and evaluation metrics. Some tasks may\n", + "also require specific non-linearities or output formats from your model." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.experiments.tasks import DistanceTaskConfig\n", + "\n", + "task_config = DistanceTaskConfig(\n", + " name=\"example_distances_8nm_peroxisome\",\n", + " channels=[\"peroxisome\"],\n", + " clip_distance=80.0,\n", + " tol_distance=80.0,\n", + " scale_factor=160.0,\n", + " mask_distances=True,\n", + ")\n", + "config_store.store_task_config(task_config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Architecture\n", + "\n", + "The setup of the network you will train. Biomedical image to image translation often utilizes a UNet, but even after choosing a UNet you still need to provide some additional parameters. How much do you want to downsample? How many convolutional layers do you want?" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.experiments.architectures import CNNectomeUNetConfig\n", + "\n", + "architecture_config = CNNectomeUNetConfig(\n", + " name=\"example_attention-upsample-unet\",\n", + " input_shape=(216, 216, 216),\n", + " fmaps_out=72,\n", + " fmaps_in=1,\n", + " num_fmaps=12,\n", + " fmap_inc_factor=6,\n", + " downsample_factors=[(2, 2, 2), (3, 3, 3), (3, 3, 3)],\n", + " kernel_size_down=None,\n", + " kernel_size_up=None,\n", + " eval_shape_increase=(72, 72, 72),\n", + " upsample_factors=[(2, 2, 2)],\n", + " constant_upsample=True,\n", + " padding=\"valid\",\n", + ")\n", + "config_store.store_architecture_config(architecture_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Trainer\n", + "\n", + "How do you want to train? This config defines the training loop and how the other three components work together. What sort of augmentations to apply during training, what learning rate and optimizer to use, what batch size to train with." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.experiments.trainers import GunpowderTrainerConfig\n", + "from dacapo.experiments.trainers.gp_augments import (\n", + " ElasticAugmentConfig,\n", + " GammaAugmentConfig,\n", + " IntensityAugmentConfig,\n", + " IntensityScaleShiftAugmentConfig,\n", + ")\n", + "\n", + "trainer_config = GunpowderTrainerConfig(\n", + " name=\"example_default_one_label_finetuning\",\n", + " batch_size=2,\n", + " learning_rate=1e-05,\n", + " num_data_fetchers=20,\n", + " augments=[\n", + " ElasticAugmentConfig(\n", + " control_point_spacing=[100, 100, 100],\n", + " control_point_displacement_sigma=[10.0, 10.0, 10.0],\n", + " rotation_interval=(0.0, 1.5707963267948966),\n", + " subsample=8,\n", + " uniform_3d_rotation=True,\n", + " ),\n", + " IntensityAugmentConfig(scale=(0.5, 1.5), shift=(-0.2, 0.2), clip=True),\n", + " GammaAugmentConfig(gamma_range=(0.5, 1.5)),\n", + " IntensityScaleShiftAugmentConfig(scale=2.0, shift=-1.0),\n", + " ],\n", + " snapshot_interval=10000,\n", + " min_masked=0.05,\n", + " clip_raw=False,\n", + ")\n", + "config_store.store_trainer_config(trainer_config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run\n", + "Now that we have our components configured, we just need to combine them into a run and start training. We can have multiple repetitions of a single set of configs in order to increase our chances of finding an optimum." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "example_finetuned_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__0\n", + "example_finetuned_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__1\n", + "example_finetuned_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__2\n" + ] + } + ], + "source": [ + "from dacapo.experiments.starts import StartConfig\n", + "from dacapo.experiments import RunConfig\n", + "from dacapo.experiments.run import Run\n", + "\n", + "start_config = None\n", + "\n", + "# Uncomment to start from a pretrained model\n", + "start_config = StartConfig(\n", + " \"setup04\",\n", + " \"best\",\n", + ")\n", + "\n", + "iterations = 200000\n", + "validation_interval = 5\n", + "repetitions = 3\n", + "for i in range(repetitions):\n", + " run_config = RunConfig(\n", + " name=(\"_\").join(\n", + " [\n", + " \"example\",\n", + " \"scratch\" if start_config is None else \"finetuned\",\n", + " datasplit_config.name,\n", + " task_config.name,\n", + " architecture_config.name,\n", + " trainer_config.name,\n", + " ]\n", + " )\n", + " + f\"__{i}\",\n", + " datasplit_config=datasplit_config,\n", + " task_config=task_config,\n", + " architecture_config=architecture_config,\n", + " trainer_config=trainer_config,\n", + " num_iterations=iterations,\n", + " validation_interval=validation_interval,\n", + " repetition=i,\n", + " start_config=start_config,\n", + " )\n", + "\n", + " print(run_config.name)\n", + " config_store.store_run_config(run_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To train one of the runs, you can either do it by first creating a **Run** directly from the run config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dacapo.train import train_run\n", + "\n", + "run = Run(config_store.retrieve_run_config(run_config.name))\n", + "train_run(run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to start your run on some compute cluster, you might want to use the command line interface: dacapo train -r {run_config.name}. This makes it particularly convenient to run on compute nodes where you can specify specific compute requirements." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "plasmodesmata_dacapo", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/distance_task/liver_peroxisome.md b/examples/distance_task/liver_peroxisome.md new file mode 100644 index 000000000..397b80c4f --- /dev/null +++ b/examples/distance_task/liver_peroxisome.md @@ -0,0 +1,413 @@ +# Dacapo + +DaCapo is a framework that allows for easy configuration and execution of established machine learning techniques on arbitrarily large volumes of multi-dimensional images. + +DaCapo has 4 major configurable components: +1. **dacapo.datasplits.DataSplit** + +2. **dacapo.tasks.Task** + +3. **dacapo.architectures.Architecture** + +4. **dacapo.trainers.Trainer** + +These are then combined in a single **dacapo.experiments.Run** that includes your starting point (whether you want to start training from scratch or continue off of a previously trained model) and stopping criterion (the number of iterations you want to train). + +## Environment setup +If you have not already done so, you will need to install DaCapo. You can do this by first creating a new environment and then installing DaCapo using pip. + +```bash +conda create -n dacapo python=3.10 +conda activate dacapo +``` + +Then, you can install DaCapo using pip, via GitHub: + +```bash +pip install git+https://github.com/janelia-cellmap/dacapo.git +``` + +Or you can clone the repository and install it locally: + +```bash +git clone https://github.com/janelia-cellmap/dacapo.git +cd dacapo +pip install -e . +``` + +Be sure to select this environment in your Jupyter notebook or JupyterLab. + +## Config Store +To define where the data goes, create a dacapo.yaml configuration file either in `~/.config/dacapo/dacapo.yaml` or in `./dacapo.yaml`. Here is a template: + +```yaml +mongodbhost: mongodb://dbuser:dbpass@dburl:dbport/ +mongodbname: dacapo +runs_base_dir: /path/to/my/data/storage +``` +The runs_base_dir defines where your on-disk data will be stored. The mongodbhost and mongodbname define the mongodb host and database that will store your cloud data. If you want to store everything on disk, replace mongodbhost and mongodbname with a single type `files` and everything will be saved to disk: + +```yaml +type: files +runs_base_dir: /path/to/my/data/storage +``` + + + +```python +from dacapo.store.create_store import create_config_store + +config_store = create_config_store() +``` + +## Datasplit +Where can you find your data? What format is it in? Does it need to be normalized? What data do you want to use for validation? + + +```python +from dacapo.experiments.datasplits.datasets.arrays import ( + BinarizeArrayConfig, + IntensitiesArrayConfig, + MissingAnnotationsMaskConfig, + ResampledArrayConfig, + ZarrArrayConfig, +) +from dacapo.experiments.datasplits import TrainValidateDataSplitConfig +from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig +from pathlib import PosixPath + +datasplit_config = TrainValidateDataSplitConfig( + name="example_jrc_mus-livers_peroxisome_8nm", + train_configs=[ + RawGTDatasetConfig( + name="jrc_mus-liver_124_peroxisome_8nm", + weight=1, + raw_config=IntensitiesArrayConfig( + name="jrc_mus-liver_s1_raw", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_raw_uint8", + file_name=PosixPath( + "/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/raw/s1", + snap_to_grid=(16, 16, 16), + axes=None, + ), + min=0.0, + max=255.0, + ), + gt_config=BinarizeArrayConfig( + name="jrc_mus-liver_124_peroxisome_8nm_gt", + source_array_config=ResampledArrayConfig( + name="jrc_mus-liver_124_gt_resampled_8nm", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_124_gt", + file_name=PosixPath( + "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/groundtruth/crop124/labels//all", + snap_to_grid=(16, 16, 16), + axes=None, + ), + upsample=(0, 0, 0), + downsample=(2, 2, 2), + interp_order=False, + ), + groupings=[("peroxisome", [47, 48])], + background=0, + ), + mask_config=MissingAnnotationsMaskConfig( + name="jrc_mus-liver_124_peroxisome_8nm_mask", + source_array_config=ResampledArrayConfig( + name="jrc_mus-liver_124_gt_resampled_8nm", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_124_gt", + file_name=PosixPath( + "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/groundtruth/crop124/labels//all", + snap_to_grid=(16, 16, 16), + axes=None, + ), + upsample=(0, 0, 0), + downsample=(2, 2, 2), + interp_order=False, + ), + groupings=[("peroxisome", [47, 48])], + ), + sample_points=None, + ), + RawGTDatasetConfig( + name="jrc_mus-liver_125_peroxisome_8nm", + weight=1, + raw_config=IntensitiesArrayConfig( + name="jrc_mus-liver_s1_raw", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_raw_uint8", + file_name=PosixPath( + "/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/raw/s1", + snap_to_grid=(16, 16, 16), + axes=None, + ), + min=0.0, + max=255.0, + ), + gt_config=BinarizeArrayConfig( + name="jrc_mus-liver_125_peroxisome_8nm_gt", + source_array_config=ResampledArrayConfig( + name="jrc_mus-liver_125_gt_resampled_8nm", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_125_gt", + file_name=PosixPath( + "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/groundtruth/crop125/labels//all", + snap_to_grid=(16, 16, 16), + axes=None, + ), + upsample=(0, 0, 0), + downsample=(2, 2, 2), + interp_order=False, + ), + groupings=[("peroxisome", [47, 48])], + background=0, + ), + mask_config=MissingAnnotationsMaskConfig( + name="jrc_mus-liver_125_peroxisome_8nm_mask", + source_array_config=ResampledArrayConfig( + name="jrc_mus-liver_125_gt_resampled_8nm", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_125_gt", + file_name=PosixPath( + "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/groundtruth/crop125/labels//all", + snap_to_grid=(16, 16, 16), + axes=None, + ), + upsample=(0, 0, 0), + downsample=(2, 2, 2), + interp_order=False, + ), + groupings=[("peroxisome", [47, 48])], + ), + sample_points=None, + ), + ], + validate_configs=[ + RawGTDatasetConfig( + name="jrc_mus-liver_145_peroxisome_8nm", + weight=1, + raw_config=IntensitiesArrayConfig( + name="jrc_mus-liver_s1_raw", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_raw_uint8", + file_name=PosixPath( + "/nrs/cellmap/data/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/raw/s1", + snap_to_grid=(16, 16, 16), + axes=None, + ), + min=0.0, + max=255.0, + ), + gt_config=BinarizeArrayConfig( + name="jrc_mus-liver_145_peroxisome_8nm_gt", + source_array_config=ResampledArrayConfig( + name="jrc_mus-liver_145_gt_resampled_8nm", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_145_gt", + file_name=PosixPath( + "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/groundtruth/crop145/labels//all", + snap_to_grid=(16, 16, 16), + axes=None, + ), + upsample=(0, 0, 0), + downsample=(2, 2, 2), + interp_order=False, + ), + groupings=[("peroxisome", [47, 48])], + background=0, + ), + mask_config=MissingAnnotationsMaskConfig( + name="jrc_mus-liver_145_peroxisome_8nm_mask", + source_array_config=ResampledArrayConfig( + name="jrc_mus-liver_145_gt_resampled_8nm", + source_array_config=ZarrArrayConfig( + name="jrc_mus-liver_145_gt", + file_name=PosixPath( + "/nrs/cellmap/zouinkhim/data/tmp_data_v3/jrc_mus-liver/jrc_mus-liver.n5" + ), + dataset="volumes/groundtruth/crop145/labels//all", + snap_to_grid=(16, 16, 16), + axes=None, + ), + upsample=(0, 0, 0), + downsample=(2, 2, 2), + interp_order=False, + ), + groupings=[("peroxisome", [47, 48])], + ), + sample_points=None, + ) + ], +) + +config_store.store_datasplit_config(datasplit_config) +``` + +## Task +What do you want to learn? An instance segmentation? If so, how? Affinities, +Distance Transform, Foreground/Background, etc. Each of these tasks are commonly learned +and evaluated with specific loss functions and evaluation metrics. Some tasks may +also require specific non-linearities or output formats from your model. + + +```python +from dacapo.experiments.tasks import DistanceTaskConfig + +task_config = DistanceTaskConfig( + name="example_distances_8nm_peroxisome", + channels=["peroxisome"], + clip_distance=80.0, + tol_distance=80.0, + scale_factor=160.0, + mask_distances=True, +) +config_store.store_task_config(task_config) +``` + +## Architecture + +The setup of the network you will train. Biomedical image to image translation often utilizes a UNet, but even after choosing a UNet you still need to provide some additional parameters. How much do you want to downsample? How many convolutional layers do you want? + + +```python +from dacapo.experiments.architectures import CNNectomeUNetConfig + +architecture_config = CNNectomeUNetConfig( + name="example_attention-upsample-unet", + input_shape=(216, 216, 216), + fmaps_out=72, + fmaps_in=1, + num_fmaps=12, + fmap_inc_factor=6, + downsample_factors=[(2, 2, 2), (3, 3, 3), (3, 3, 3)], + kernel_size_down=None, + kernel_size_up=None, + eval_shape_increase=(72, 72, 72), + upsample_factors=[(2, 2, 2)], + constant_upsample=True, + padding="valid", +) +config_store.store_architecture_config(architecture_config) +``` + +## Trainer + +How do you want to train? This config defines the training loop and how the other three components work together. What sort of augmentations to apply during training, what learning rate and optimizer to use, what batch size to train with. + + +```python +from dacapo.experiments.trainers import GunpowderTrainerConfig +from dacapo.experiments.trainers.gp_augments import ( + ElasticAugmentConfig, + GammaAugmentConfig, + IntensityAugmentConfig, + IntensityScaleShiftAugmentConfig, +) + +trainer_config = GunpowderTrainerConfig( + name="example_default_one_label_finetuning", + batch_size=2, + learning_rate=1e-05, + num_data_fetchers=20, + augments=[ + ElasticAugmentConfig( + control_point_spacing=[100, 100, 100], + control_point_displacement_sigma=[10.0, 10.0, 10.0], + rotation_interval=(0.0, 1.5707963267948966), + subsample=8, + uniform_3d_rotation=True, + ), + IntensityAugmentConfig(scale=(0.5, 1.5), shift=(-0.2, 0.2), clip=True), + GammaAugmentConfig(gamma_range=(0.5, 1.5)), + IntensityScaleShiftAugmentConfig(scale=2.0, shift=-1.0), + ], + snapshot_interval=10000, + min_masked=0.05, + clip_raw=False, +) +config_store.store_trainer_config(trainer_config) +``` + +## Run +Now that we have our components configured, we just need to combine them into a run and start training. We can have multiple repetitions of a single set of configs in order to increase our chances of finding an optimum. + + +```python +from dacapo.experiments.starts import StartConfig +from dacapo.experiments import RunConfig +from dacapo.experiments.run import Run + +start_config = None + +# Uncomment to start from a pretrained model +start_config = StartConfig( + "setup04", + "best", +) + +iterations = 200000 +validation_interval = 5 +repetitions = 3 +for i in range(repetitions): + run_config = RunConfig( + name=("_").join( + [ + "example", + "scratch" if start_config is None else "finetuned", + datasplit_config.name, + task_config.name, + architecture_config.name, + trainer_config.name, + ] + ) + + f"__{i}", + datasplit_config=datasplit_config, + task_config=task_config, + architecture_config=architecture_config, + trainer_config=trainer_config, + num_iterations=iterations, + validation_interval=validation_interval, + repetition=i, + start_config=start_config, + ) + + print(run_config.name) + config_store.store_run_config(run_config) +``` + + example_finetuned_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__0 + example_finetuned_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__1 + example_finetuned_example_jrc_mus-livers_peroxisome_8nm_example_distances_8nm_peroxisome_example_attention-upsample-unet_example_default_one_label_finetuning__2 + + +## Train + +To train one of the runs, you can either do it by first creating a **Run** directly from the run config + + +```python +from dacapo.train import train_run + +run = Run(config_store.retrieve_run_config(run_config.name)) +train_run(run) +``` + +If you want to start your run on some compute cluster, you might want to use the command line interface: dacapo train -r {run_config.name}. This makes it particularly convenient to run on compute nodes where you can specify specific compute requirements. diff --git a/pyproject.toml b/pyproject.toml index ebbe10e94..2ba7c4f10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,6 +73,11 @@ docs = [ "sphinx-autodoc-typehints", "sphinx-material", ] +examples = [ + "ipython", + "ipykernel", + "jupyter", +] [project.urls] homepage = "https://github.com/janelia-cellmap/dacapo"