Skip to content

Commit

Permalink
Dev/main (#117)
Browse files Browse the repository at this point in the history
  • Loading branch information
rhoadesScholar authored Feb 21, 2024
2 parents c97d8cf + 415a7b3 commit 874a4a0
Show file tree
Hide file tree
Showing 116 changed files with 2,622 additions and 869 deletions.
26 changes: 26 additions & 0 deletions .github/ISSUE_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
* {{ cookiecutter.project_name }} version:
* Python version:
* Operating System:

### Bug or feature?

Are you reporting a bug or adding a new feature? Perhaps you want more documentation?
Please tell us here and tag the issue appropriately.

### Description

Describe the bug you want fixed, the feature you want to see, etc.
If you are requesting a new feature, please say why you think that feature is important and belongs in this library, and any guesses on how to implement it.

### What I Did/What I Would Like to Do

For a bug:
```
Paste the command(s) you ran and the output.
If there was a crash, please include the traceback here.
```

For a feature:
```
Provide sample code that you would hope to run and what it would output.
```
3 changes: 1 addition & 2 deletions .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@ jobs:
with:
fetch-depth: 0 # otherwise, you will failed to push refs to dest repo
- name: install dacapo
run: pip install .
run: pip install .[docs]
- name: Build and Commit
uses: sphinx-notes/pages@v2
with:
documentation_path: ./docs/source
requirements_path: ./docs/requirements.txt
- name: Push changes
uses: ad-m/github-push-action@master
with:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ __pycache__
# vscode stuff
.vscode
.mypy_cache
daisy_logs/

9 changes: 0 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,3 @@ repos:
rev: v0.16
hooks:
- id: validate-pyproject

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.8.0
hooks:
- id: mypy
files: "^dacapo/"
# # you have to add the things you want to type check against here
# additional_dependencies:
# - numpy
3 changes: 3 additions & 0 deletions CONTRIBUTOR.md → CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ To run tests with coverage locally:
`pytest tests --color=yes --cov --cov-report=term-missing`
This will also be run automatically when a PR is made to master and a codecov report will be generated telling you if your PR increased or decreased coverage.

## Doc Generation
Docstrings are generated using github action. but you can generate them using
`sphinx-build -M html docs/source/ docs/Cbuild/`

## Branching and PRs
- Users that have been added to the CellMap organization and the DaCapo project should be able to develop directly into the CellMap fork of DaCapo. Other users will need to create a fork.
Expand Down
18 changes: 18 additions & 0 deletions dacapo/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
"""
dacapo module
==============
This module contains several useful methods for performing common tasks in dacapo library.
Modules:
-----------
Options - Deals with configuring aspects of the program's operations.
experiments - This module is responsible for conducting experiments.
apply - Applies the results of the training process to the given dataset.
train - Trains the model using given data set.
validate - This module is for validating the model.
predict - This module is used to generate predictions based on the model.
"""

from .options import Options # noqa
from . import experiments # noqa
from .apply import apply # noqa
from .train import train # noqa
from .validate import validate # noqa
from .predict import predict # noqa

70 changes: 38 additions & 32 deletions dacapo/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@

def apply(
run_name: str,
input_container: Path or str,
input_container: Path | str,
input_dataset: str,
output_path: Path or str,
validation_dataset: Optional[Dataset or str] = None,
criterion: Optional[str] = "voi",
output_path: Path | str,
validation_dataset: Optional[Dataset | str] = None,
criterion: str = "voi",
iteration: Optional[int] = None,
parameters: Optional[PostProcessorParameters or str] = None,
roi: Optional[Roi or str] = None,
num_cpu_workers: int = 30,
parameters: Optional[PostProcessorParameters | str] = None,
roi: Optional[Roi | str] = None,
num_workers: int = 30,
output_dtype: Optional[np.dtype | str] = np.uint8, # type: ignore
compute_context: ComputeContext = LocalTorch(),
overwrite: bool = True,
Expand Down Expand Up @@ -75,24 +75,27 @@ def apply(
logger.info("Loading weights for iteration %i", iteration)
weights_store.retrieve_weights(run_name, iteration)

# find the best parameters
if isinstance(validation_dataset, str) and run.datasplit.validate is not None:
val_ds_name = validation_dataset
validation_dataset = [
dataset for dataset in run.datasplit.validate if dataset.name == val_ds_name
][0]
elif isinstance(validation_dataset, Dataset) or parameters is not None:
pass
else:
raise ValueError(
"validation_dataset must be a dataset name or a Dataset object, or parameters must be provided explicitly."
)
if parameters is None:
# find the best parameters
_validation_dataset: Dataset
if isinstance(validation_dataset, str) and run.datasplit.validate is not None:
val_ds_name = validation_dataset
_validation_dataset = [
dataset
for dataset in run.datasplit.validate
if dataset.name == val_ds_name
][0]
elif isinstance(validation_dataset, Dataset):
_validation_dataset = validation_dataset
else:
raise ValueError(
"validation_dataset must be a dataset name or a Dataset object, or parameters must be provided explicitly."
)
logger.info(
"Finding best parameters for validation dataset %s", validation_dataset
"Finding best parameters for validation dataset %s", _validation_dataset
)
parameters = run.task.evaluator.get_overall_best_parameters( # TODO
validation_dataset, criterion
_validation_dataset, criterion
)
assert (
parameters is not None
Expand Down Expand Up @@ -157,42 +160,45 @@ def apply(
Path(input_container, input_dataset),
)
return apply_run(
run,
run.name,
iteration,
parameters,
input_array,
input_array_identifier,
prediction_array_identifier,
output_array_identifier,
roi,
num_cpu_workers,
num_workers,
output_dtype,
compute_context,
overwrite,
)


def apply_run(
run: Run,
run_name: str,
iteration: int,
parameters: PostProcessorParameters,
input_array: Array,
input_array_identifier: "LocalArrayIdentifier",
prediction_array_identifier: "LocalArrayIdentifier",
output_array_identifier: "LocalArrayIdentifier",
roi: Optional[Roi] = None,
num_cpu_workers: int = 30,
num_workers: int = 30,
output_dtype: Optional[np.dtype] = np.uint8, # type: ignore
compute_context: ComputeContext = LocalTorch(),
overwrite: bool = True,
):
"""Apply the model to a dataset. If roi is None, the whole input dataset is used. Assumes model is already loaded."""
run.model.eval()

# render prediction dataset
logger.info("Predicting on dataset %s", prediction_array_identifier)
predict(
run.model,
input_array,
prediction_array_identifier,
run_name,
iteration,
input_container=input_array_identifier.container,
input_dataset=input_array_identifier.dataset,
output_path=prediction_array_identifier.container,
output_roi=roi,
num_workers=num_cpu_workers,
num_workers=num_workers,
output_dtype=output_dtype,
compute_context=compute_context,
overwrite=overwrite,
Expand Down
2 changes: 1 addition & 1 deletion dacapo/blockwise/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .blockwise_task import DaCapoBlockwiseTask
from .scheduler import run_blockwise
from .scheduler import run_blockwise, segment_blockwise
6 changes: 3 additions & 3 deletions dacapo/blockwise/argmax_worker.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path
from dacapo.experiments.datasplits.datasets.arrays.zarr_array import ZarrArray
from dacapo.store.array_store import LocalArrayIdentifier
from dacapo.compute_context import ComputeContext, LocalTorch, Bsub
from dacapo.compute_context import ComputeContext, LocalTorch

import daisy

Expand Down Expand Up @@ -41,9 +41,9 @@ def cli(log_level):
)
@click.option("-od", "--output_dataset", required=True, type=str)
def start_worker(
input_container: Path or str,
input_container: Path | str,
input_dataset: str,
output_container: Path or str,
output_container: Path | str,
output_dataset: str,
):
# get arrays
Expand Down
15 changes: 7 additions & 8 deletions dacapo/blockwise/blockwise_task.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
from datetime import datetime
from importlib.machinery import SourceFileLoader
from pathlib import Path
from typing import Callable, Optional
from daisy import Task, Roi
from dacapo.compute_context import ComputeContext, LocalTorch, Bsub
from dacapo.compute_context import ComputeContext
import dacapo.compute_context


class DaCapoBlockwiseTask(Task):
def __init__(
self,
worker_file: str or Path,
compute_context: ComputeContext or str,
worker_file: str | Path,
compute_context: ComputeContext | str,
total_roi: Roi,
read_roi: Roi,
write_roi: Roi,
Expand All @@ -25,14 +24,14 @@ def __init__(
if isinstance(compute_context, str):
compute_context = getattr(dacapo.compute_context, compute_context)()

# Make the task_id unique
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
task_id = worker_file + timestamp

# Load worker functions
worker_name = Path(worker_file).stem
worker = SourceFileLoader(worker_name, str(worker_file)).load_module()

# Make the task_id unique
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
task_id = worker_name + timestamp

process_function = worker.spawn_worker(
*args, **kwargs, compute_context=compute_context
)
Expand Down
9 changes: 7 additions & 2 deletions dacapo/blockwise/predict_worker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from pathlib import Path

import torch
from dacapo.experiments.datasplits.datasets.arrays.zarr_array import ZarrArray
from dacapo.gp.dacapo_array_source import DaCapoArraySource
from dacapo.store.array_store import LocalArrayIdentifier
Expand Down Expand Up @@ -60,9 +62,9 @@ def cli(log_level):
def start_worker(
run_name: str,
iteration: int,
input_container: Path or str,
input_container: Path | str,
input_dataset: str,
output_container: Path or str,
output_container: Path | str,
output_dataset: str,
device: str = "cuda",
):
Expand All @@ -86,6 +88,9 @@ def start_worker(
)
output_array = ZarrArray.open_from_array_identifier(output_array_identifier)

# set benchmark flag to True for performance
torch.backends.cudnn.benchmark = True

# get the model's input and output size
model = run.model.eval()
input_voxel_size = Coordinate(raw_array.voxel_size)
Expand Down
Loading

0 comments on commit 874a4a0

Please sign in to comment.