From e2c4f4b122b6c517615a2a24e7dc7ae567bc787a Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Wed, 23 Oct 2024 20:00:05 +0200 Subject: [PATCH 01/35] AZ segmentation --- scripts/cooper/AZ_segmentation_h5.py | 142 ++++++++++++++++++++++++ scripts/rizzoli/evaluation_2D.py | 4 +- synaptic_reconstruction/inference/AZ.py | 82 ++++++++++++++ 3 files changed, 226 insertions(+), 2 deletions(-) create mode 100644 scripts/cooper/AZ_segmentation_h5.py create mode 100644 synaptic_reconstruction/inference/AZ.py diff --git a/scripts/cooper/AZ_segmentation_h5.py b/scripts/cooper/AZ_segmentation_h5.py new file mode 100644 index 0000000..07ff718 --- /dev/null +++ b/scripts/cooper/AZ_segmentation_h5.py @@ -0,0 +1,142 @@ +import argparse +import h5py +import os +from pathlib import Path + +from tqdm import tqdm +from elf.io import open_file + +from synaptic_reconstruction.inference.AZ import segment_AZ +from synaptic_reconstruction.inference.util import parse_tiling + +def _require_output_folders(output_folder): + #seg_output = os.path.join(output_folder, "segmentations") + seg_output = output_folder + os.makedirs(seg_output, exist_ok=True) + return seg_output + +def get_volume(input_path): + ''' + with h5py.File(input_path) as seg_file: + input_volume = seg_file["raw"][:] + ''' + with open_file(input_path, "r") as f: + + # Try to automatically derive the key with the raw data. + keys = list(f.keys()) + if len(keys) == 1: + key = keys[0] + elif "data" in keys: + key = "data" + elif "raw" in keys: + key = "raw" + + input_volume = f[key][:] + return input_volume + +def run_AZ_segmentation(input_path, output_path, model_path, mask_path, mask_key,tile_shape, halo, key_label): + tiling = parse_tiling(tile_shape, halo) + print(f"using tiling {tiling}") + input = get_volume(input_path) + + #check if we have a restricting mask for the segmentation + if mask_path is not None: + with open_file(mask_path, "r") as f: + mask = f[mask_key][:] + else: + mask = None + + foreground = segment_AZ(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, mask = mask) + + seg_output = _require_output_folders(output_path) + file_name = Path(input_path).stem + seg_path = os.path.join(seg_output, f"{file_name}.h5") + + #check + os.makedirs(Path(seg_path).parent, exist_ok=True) + + print(f"Saving results in {seg_path}") + with h5py.File(seg_path, "a") as f: + if "raw" in f: + print("raw image already saved") + else: + f.create_dataset("raw", data=input, compression="gzip") + + key=f"AZ/segment_from_{key_label}" + if key in f: + print("Skipping", input_path, "because", key, "exists") + else: + f.create_dataset(key, data=foreground, compression="gzip") + + if mask is not None: + if mask_key in f: + print("mask image already saved") + else: + f.create_dataset(mask_key, data = mask, compression = "gzip") + + + + +def segment_folder(args): + input_files = [] + for root, dirs, files in os.walk(args.input_path): + input_files.extend([ + os.path.join(root, name) for name in files if name.endswith(".h5") + ]) + print(input_files) + pbar = tqdm(input_files, desc="Run segmentation") + for input_path in pbar: + + filename = os.path.basename(input_path) + try: + mask_path = os.path.join(args.mask_path, filename) + except: + print(f"Mask file not found for {input_path}") + mask_path = None + + run_AZ_segmentation(input_path, args.output_path, args.model_path, mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label) + +def main(): + parser = argparse.ArgumentParser(description="Segment vesicles in EM tomograms.") + parser.add_argument( + "--input_path", "-i", required=True, + help="The filepath to the mrc file or the directory containing the tomogram data." + ) + parser.add_argument( + "--output_path", "-o", required=True, + help="The filepath to directory where the segmentations will be saved." + ) + parser.add_argument( + "--model_path", "-m", required=True, help="The filepath to the vesicle model." + ) + parser.add_argument( + "--mask_path", help="The filepath to a h5 file with a mask that will be used to restrict the segmentation. Needs to be in combination with mask_key." + ) + parser.add_argument( + "--mask_key", help="Key name that holds the mask segmentation" + ) + parser.add_argument( + "--tile_shape", type=int, nargs=3, + help="The tile shape for prediction. Lower the tile shape if GPU memory is insufficient." + ) + parser.add_argument( + "--halo", type=int, nargs=3, + help="The halo for prediction. Increase the halo to minimize boundary artifacts." + ) + parser.add_argument( + "--key_label", "-k", default = "combined_vesicles", + help="Give the key name for saving the segmentation in h5." + ) + args = parser.parse_args() + + input_ = args.input_path + + if os.path.isdir(input_): + segment_folder(args) + else: + run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label) + + print("Finished segmenting!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/rizzoli/evaluation_2D.py b/scripts/rizzoli/evaluation_2D.py index 1cae666..18fd4f1 100644 --- a/scripts/rizzoli/evaluation_2D.py +++ b/scripts/rizzoli/evaluation_2D.py @@ -58,8 +58,8 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key) #get the labels and vesicles with h5py.File(labels_path) as label_file: labels = label_file["labels"] - #vesicles = labels["vesicles"] - gt = labels[anno_key][:] + vesicles = labels["vesicles"] + gt = vesicles[anno_key][:] with h5py.File(vesicles_path) as seg_file: segmentation = seg_file["vesicles"] diff --git a/synaptic_reconstruction/inference/AZ.py b/synaptic_reconstruction/inference/AZ.py new file mode 100644 index 0000000..b93218f --- /dev/null +++ b/synaptic_reconstruction/inference/AZ.py @@ -0,0 +1,82 @@ +import time +from typing import Dict, List, Optional, Tuple, Union + +import elf.parallel as parallel +import numpy as np +import torch + +from synaptic_reconstruction.inference.util import get_prediction, _Scaler + + +def _run_segmentation( + foreground, verbose, min_size, + # blocking shapes for parallel computation + block_shape=(128, 256, 256), +): + + # get the segmentation via seeded watershed + t0 = time.time() + seg = parallel.label(foreground > 0.5, block_shape=block_shape, verbose=verbose) + if verbose: + print("Compute connected components in", time.time() - t0, "s") + + # size filter + t0 = time.time() + ids, sizes = parallel.unique(seg, return_counts=True, block_shape=block_shape, verbose=verbose) + filter_ids = ids[sizes < min_size] + seg[np.isin(seg, filter_ids)] = 0 + if verbose: + print("Size filter in", time.time() - t0, "s") + seg = np.where(seg > 0, 1, 0) + return seg + +def segment_AZ( + input_volume: np.ndarray, + model_path: Optional[str] = None, + model: Optional[torch.nn.Module] = None, + tiling: Optional[Dict[str, Dict[str, int]]] = None, + min_size: int = 500, + verbose: bool = True, + return_predictions: bool = False, + scale: Optional[List[float]] = None, + mask: Optional[np.ndarray] = None, +) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: + """ + Segment mitochondria in an input volume. + + Args: + input_volume: The input volume to segment. + model_path: The path to the model checkpoint if `model` is not provided. + model: Pre-loaded model. Either `model_path` or `model` is required. + tiling: The tiling configuration for the prediction. + verbose: Whether to print timing information. + scale: The scale factor to use for rescaling the input volume before prediction. + mask: An optional mask that is used to restrict the segmentation. + + Returns: + The foreground mask as a numpy array. + """ + if verbose: + print("Segmenting AZ in volume of shape", input_volume.shape) + # Create the scaler to handle prediction with a different scaling factor. + scaler = _Scaler(scale, verbose) + input_volume = scaler.scale_input(input_volume) + + # Rescale the mask if it was given and run prediction. + if mask is not None: + mask = scaler.scale_input(mask, is_segmentation=True) + pred = get_prediction(input_volume, model_path=model_path, model=model, tiling=tiling, mask=mask, verbose=verbose) + + # Run segmentation and rescale the result if necessary. + foreground = pred[0] + #print(f"shape {foreground.shape}") + #foreground = pred[0, :, :, :] + print(f"shape {foreground.shape}") + + segmentation = _run_segmentation(foreground, verbose=verbose, min_size=min_size) + + if return_predictions: + pred = scaler.rescale_output(pred, is_segmentation=False) + return segmentation, pred + return segmentation + From a0f713f80ce3c75b0534cbdc2a79214c6dd52c6f Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Mon, 28 Oct 2024 13:43:56 +0100 Subject: [PATCH 02/35] updates --- .gitignore | 4 +++- scripts/cooper/training/train_AZ.py | 11 ++++++---- scripts/rizzoli/2D_vesicle_segmentation.py | 20 +++++++++++++------ scripts/rizzoli/train_2D_domain_adaptation.py | 16 ++++++++++----- 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index d040431..ca03577 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,6 @@ slurm/ scripts/cooper/evaluation_results/ scripts/cooper/training/copy_testset.py scripts/rizzoli/upsample_data.py -scripts/cooper/training/find_rec_testset.py \ No newline at end of file +scripts/cooper/training/find_rec_testset.py +scripts/rizzoli/combine_2D_slices.py +scripts/rizzoli/combine_2D_slices_raw.py \ No newline at end of file diff --git a/scripts/cooper/training/train_AZ.py b/scripts/cooper/training/train_AZ.py index 1468eaf..9d7d283 100644 --- a/scripts/cooper/training/train_AZ.py +++ b/scripts/cooper/training/train_AZ.py @@ -12,7 +12,7 @@ from synaptic_reconstruction.training import semisupervised_training TRAIN_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects" -OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/training_AZ_v1" +OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/training_AZ_v2" def _require_train_val_test_split(datasets): @@ -80,8 +80,11 @@ def get_paths(split, datasets, testset=True): def train(key, ignore_label = None, training_2D = False, testset = True): + os.makedirs(OUTPUT_ROOT, exist_ok=True) + datasets = [ "01_hoi_maus_2020_incomplete", + "04_hoi_stem_examples", "06_hoi_wt_stem750_fm", "12_chemical_fix_cryopreparation" ] @@ -93,7 +96,7 @@ def train(key, ignore_label = None, training_2D = False, testset = True): print(len(val_paths), "tomograms for validation") patch_shape = [48, 256, 256] - model_name=f"3D-AZ-model-v1" + model_name=f"3D-AZ-model-v3" #checking for 2D training if training_2D: @@ -109,11 +112,11 @@ def train(key, ignore_label = None, training_2D = False, testset = True): val_paths=val_paths, label_key=f"/labels/{key}", patch_shape=patch_shape, batch_size=batch_size, - sampler = torch_em.data.sampler.MinInstanceSampler(min_num_instances=1), + sampler = torch_em.data.sampler.MinInstanceSampler(min_num_instances=1, p_reject = 0.95), n_samples_train=None, n_samples_val=25, check=check, save_root="/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/AZ_models", - n_iterations=int(5e3), + n_iterations=int(5e4), ignore_label= ignore_label, label_transform=torch_em.transform.label.labels_to_binary, out_channels = 1, diff --git a/scripts/rizzoli/2D_vesicle_segmentation.py b/scripts/rizzoli/2D_vesicle_segmentation.py index 7974e3b..ddfdab7 100644 --- a/scripts/rizzoli/2D_vesicle_segmentation.py +++ b/scripts/rizzoli/2D_vesicle_segmentation.py @@ -57,7 +57,7 @@ def get_volume(input_path): input_volume = seg_file["raw"][:] return input_volume -def run_vesicle_segmentation(input_path, output_path, model_path, tile_shape, halo, include_boundary, key_label): +def run_vesicle_segmentation(input_path, output_path, model_path, tile_shape, halo, include_boundary, key_label, scale): tiling = get_2D_tiling() @@ -72,20 +72,24 @@ def run_vesicle_segmentation(input_path, output_path, model_path, tile_shape, ha device = "cuda" if torch.cuda.is_available() else "cpu" model = torch_em.util.load_model(checkpoint=model_path, device=device) - def process_slices(input_volume): + def process_slices(input_volume, scale): processed_slices = [] foreground = [] boundaries = [] for z in range(input_volume.shape[0]): slice_ = input_volume[z, :, :] - segmented_slice, prediction_slice = segment_vesicles(input_volume=slice_, model=model, verbose=False, tiling=tiling, return_predictions=True, exclude_boundary=not include_boundary) + segmented_slice, prediction_slice = segment_vesicles(input_volume=slice_, model=model, verbose=False, tiling=tiling, return_predictions=True, scale = scale, exclude_boundary=not include_boundary) processed_slices.append(segmented_slice) foreground_pred_slice, boundaries_pred_slice = prediction_slice[:2] foreground.append(foreground_pred_slice) boundaries.append(boundaries_pred_slice) return processed_slices, foreground, boundaries - segmentation, foreground, boundaries = process_slices(input) + if input.ndim == 2: + segmentation, prediction = segment_vesicles(input_volume=input, model=model, verbose=False, tiling=tiling, return_predictions=True, scale = scale, exclude_boundary=not include_boundary) + foreground, boundaries = prediction[:2] + else: + segmentation, foreground, boundaries = process_slices(input, scale) seg_output = _require_output_folders(output_path) file_name = Path(input_path).stem @@ -121,7 +125,7 @@ def segment_folder(args): print(input_files) pbar = tqdm(input_files, desc="Run segmentation") for input_path in pbar: - run_vesicle_segmentation(input_path, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label) + run_vesicle_segmentation(input_path, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.scale) def main(): parser = argparse.ArgumentParser(description="Segment vesicles in EM tomograms.") @@ -152,6 +156,10 @@ def main(): "--key_label", "-k", default = "combined_vesicles", help="Give the key name for saving the segmentation in h5." ) + parser.add_argument( + "--scale", "-s", type=float, nargs=2, + help="Scales the input data." + ) args = parser.parse_args() input_ = args.input_path @@ -159,7 +167,7 @@ def main(): if os.path.isdir(input_): segment_folder(args) else: - run_vesicle_segmentation(input_, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label) + run_vesicle_segmentation(input_, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.scale) print("Finished segmenting!") diff --git a/scripts/rizzoli/train_2D_domain_adaptation.py b/scripts/rizzoli/train_2D_domain_adaptation.py index 86eedd1..c8a9419 100644 --- a/scripts/rizzoli/train_2D_domain_adaptation.py +++ b/scripts/rizzoli/train_2D_domain_adaptation.py @@ -6,11 +6,13 @@ from sklearn.model_selection import train_test_split from synaptic_reconstruction.training.domain_adaptation import mean_teacher_adaptation -TRAIN_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted" -OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/2D_DA_training_rizzoli" +TRAIN_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data" +OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/2D_DA_training_rizzoli_v4" def _require_train_val_test_split(datasets): train_ratio, val_ratio, test_ratio = 0.8, 0.1, 0.1 + if len(datasets) < 10: + train_ratio, val_ratio, test_ratio = 0.5, 0.25, 0.25 def _train_val_test_split(names): train, test = train_test_split(names, test_size=1 - train_ratio, shuffle=True) @@ -71,8 +73,12 @@ def get_paths(split, datasets, testset=True): return paths def vesicle_domain_adaptation(teacher_model, testset = True): + + os.makedirs(OUTPUT_ROOT, exist_ok=True) + datasets = [ - "upsampled_by2" + "maus_2020_tem2d_wt_unt_div14_exported_scaled_grouped", + "20241021_imig_2014_data_transfer_exported_grouped" ] train_paths = get_paths("train", datasets=datasets, testset=testset) val_paths = get_paths("val", datasets=datasets, testset=testset) @@ -83,7 +89,7 @@ def vesicle_domain_adaptation(teacher_model, testset = True): #adjustable parameters patch_shape = [1, 256, 256] #2D - model_name = "2D-vesicle-DA-rizzoli-v3" + model_name = "2D-vesicle-DA-rizzoli-v5" model_root = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/models_v2/checkpoints/" checkpoint_path = os.path.join(model_root, teacher_model) @@ -97,7 +103,7 @@ def vesicle_domain_adaptation(teacher_model, testset = True): save_root="/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/DA_models", source_checkpoint=checkpoint_path, confidence_threshold=0.75, - n_iterations=int(5e4), + n_iterations=int(5e5), ) From ac1ac0082b0154e04729c8e55802b73a81ab3afd Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Mon, 28 Oct 2024 16:59:33 +0100 Subject: [PATCH 03/35] update 2D DA --- scripts/rizzoli/train_2D_domain_adaptation.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/rizzoli/train_2D_domain_adaptation.py b/scripts/rizzoli/train_2D_domain_adaptation.py index c8a9419..3beb487 100644 --- a/scripts/rizzoli/train_2D_domain_adaptation.py +++ b/scripts/rizzoli/train_2D_domain_adaptation.py @@ -7,7 +7,7 @@ from synaptic_reconstruction.training.domain_adaptation import mean_teacher_adaptation TRAIN_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data" -OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/2D_DA_training_rizzoli_v4" +OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/2D_DA_training_2Dcooper_v1" def _require_train_val_test_split(datasets): train_ratio, val_ratio, test_ratio = 0.8, 0.1, 0.1 @@ -77,9 +77,8 @@ def vesicle_domain_adaptation(teacher_model, testset = True): os.makedirs(OUTPUT_ROOT, exist_ok=True) datasets = [ - "maus_2020_tem2d_wt_unt_div14_exported_scaled_grouped", "20241021_imig_2014_data_transfer_exported_grouped" -] +]#"maus_2020_tem2d_wt_unt_div14_exported_scaled_grouped", train_paths = get_paths("train", datasets=datasets, testset=testset) val_paths = get_paths("val", datasets=datasets, testset=testset) @@ -89,7 +88,7 @@ def vesicle_domain_adaptation(teacher_model, testset = True): #adjustable parameters patch_shape = [1, 256, 256] #2D - model_name = "2D-vesicle-DA-rizzoli-v5" + model_name = "2D-vesicle-DA-2Dcooper-imig-v1" model_root = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/models_v2/checkpoints/" checkpoint_path = os.path.join(model_root, teacher_model) From 61c57faf1de13b2e29176a1f7896274dc7ae01ae Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Thu, 7 Nov 2024 10:52:06 +0100 Subject: [PATCH 04/35] small updates, compartment segmentation --- scripts/cooper/AZ_segmentation_h5.py | 8 +- scripts/cooper/compartment_segmentation_h5.py | 116 ++++++++++++++++++ scripts/cooper/training/evaluation.py | 18 ++- scripts/cooper/vesicle_segmentation_h5.py | 30 ++++- scripts/rizzoli/evaluation_2D.py | 30 ++++- scripts/rizzoli/train_2D_domain_adaptation.py | 11 +- synaptic_reconstruction/inference/vesicles.py | 4 +- 7 files changed, 194 insertions(+), 23 deletions(-) create mode 100644 scripts/cooper/compartment_segmentation_h5.py diff --git a/scripts/cooper/AZ_segmentation_h5.py b/scripts/cooper/AZ_segmentation_h5.py index 07ff718..4deadc8 100644 --- a/scripts/cooper/AZ_segmentation_h5.py +++ b/scripts/cooper/AZ_segmentation_h5.py @@ -81,7 +81,7 @@ def segment_folder(args): input_files = [] for root, dirs, files in os.walk(args.input_path): input_files.extend([ - os.path.join(root, name) for name in files if name.endswith(".h5") + os.path.join(root, name) for name in files if name.endswith(args.data_ext) ]) print(input_files) pbar = tqdm(input_files, desc="Run segmentation") @@ -127,6 +127,10 @@ def main(): "--key_label", "-k", default = "combined_vesicles", help="Give the key name for saving the segmentation in h5." ) + parser.add_argument( + "--data_ext", "-d", default = ".h5", + help="Format extension of data to be segmented, default is .h5." + ) args = parser.parse_args() input_ = args.input_path @@ -134,7 +138,7 @@ def main(): if os.path.isdir(input_): segment_folder(args) else: - run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label) + run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label, args.data_ext) print("Finished segmenting!") diff --git a/scripts/cooper/compartment_segmentation_h5.py b/scripts/cooper/compartment_segmentation_h5.py new file mode 100644 index 0000000..1d0020a --- /dev/null +++ b/scripts/cooper/compartment_segmentation_h5.py @@ -0,0 +1,116 @@ +import argparse +import h5py +import os +from pathlib import Path + +from tqdm import tqdm +from elf.io import open_file + +from synaptic_reconstruction.inference.compartments import segment_compartments +from synaptic_reconstruction.inference.util import parse_tiling + +def _require_output_folders(output_folder): + #seg_output = os.path.join(output_folder, "segmentations") + seg_output = output_folder + os.makedirs(seg_output, exist_ok=True) + return seg_output + +def get_volume(input_path): + + with open_file(input_path, "r") as f: + + # Try to automatically derive the key with the raw data. + keys = list(f.keys()) + if len(keys) == 1: + key = keys[0] + elif "data" in keys: + key = "data" + elif "raw" in keys: + key = "raw" + + input_volume = f[key][:] + return input_volume + +def run_compartment_segmentation(input_path, output_path, model_path, tile_shape, halo, key_label): + tiling = parse_tiling(tile_shape, halo) + print(f"using tiling {tiling}") + input = get_volume(input_path) + + segmentation, prediction = segment_compartments(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, return_predictions=True, scale=[0.25, 0.25, 0.25]) + + seg_output = _require_output_folders(output_path) + file_name = Path(input_path).stem + seg_path = os.path.join(seg_output, f"{file_name}.h5") + + #check + os.makedirs(Path(seg_path).parent, exist_ok=True) + + print(f"Saving results in {seg_path}") + with h5py.File(seg_path, "a") as f: + if "raw" in f: + print("raw image already saved") + else: + f.create_dataset("raw", data=input, compression="gzip") + + key=f"compartments/segment_from_{key_label}" + if key in f: + print("Skipping", input_path, "because", key, "exists") + else: + f.create_dataset(key, data=segmentation, compression="gzip") + f.create_dataset(f"compartment_pred_{key_label}/foreground", data = prediction, compression="gzip") + + + + +def segment_folder(args): + input_files = [] + for root, dirs, files in os.walk(args.input_path): + input_files.extend([ + os.path.join(root, name) for name in files if name.endswith(args.data_ext) + ]) + print(input_files) + pbar = tqdm(input_files, desc="Run segmentation") + for input_path in pbar: + run_compartment_segmentation(input_path, args.output_path, args.model_path, args.tile_shape, args.halo, args.key_label) + +def main(): + parser = argparse.ArgumentParser(description="Segment vesicles in EM tomograms.") + parser.add_argument( + "--input_path", "-i", required=True, + help="The filepath to the mrc file or the directory containing the tomogram data." + ) + parser.add_argument( + "--output_path", "-o", required=True, + help="The filepath to directory where the segmentations will be saved." + ) + parser.add_argument( + "--model_path", "-m", required=True, help="The filepath to the vesicle model." + ) + parser.add_argument( + "--tile_shape", type=int, nargs=3, + help="The tile shape for prediction. Lower the tile shape if GPU memory is insufficient." + ) + parser.add_argument( + "--halo", type=int, nargs=3, + help="The halo for prediction. Increase the halo to minimize boundary artifacts." + ) + parser.add_argument( + "--data_ext", "-d", default=".h5", help="The extension of the tomogram data. By default .h5." + ) + parser.add_argument( + "--key_label", "-k", default = "3Dmodel_v1", + help="Give the key name for saving the segmentation in h5." + ) + args = parser.parse_args() + + input_ = args.input_path + + if os.path.isdir(input_): + segment_folder(args) + else: + run_compartment_segmentation(input_, args.output_path, args.model_path, args.tile_shape, args.halo, args.key_label) + + print("Finished segmenting!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/cooper/training/evaluation.py b/scripts/cooper/training/evaluation.py index d7aaf6e..68fa863 100644 --- a/scripts/cooper/training/evaluation.py +++ b/scripts/cooper/training/evaluation.py @@ -21,7 +21,7 @@ def summarize_eval(results): table = summary.to_markdown(index=False) print(table) -def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key): +def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key, mask_key = None): print(f"Evaluate labels {labels_path} and vesicles {vesicles_path}") ds_name = os.path.basename(os.path.dirname(labels_path)) @@ -33,11 +33,16 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key) #vesicles = labels["vesicles"] gt = labels[anno_key][:] + if mask_key is not None: + mask = labels[mask_key][:] + with h5py.File(vesicles_path) as seg_file: segmentation = seg_file["vesicles"] vesicles = segmentation[segment_key][:] - + if mask_key is not None: + gt[mask == 0] = 0 + vesicles[mask == 0] = 0 #evaluate the match of ground truth and vesicles scores = evaluate(gt, vesicles) @@ -65,7 +70,7 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key) summarize_eval(results) -def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_key): +def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_key, mask_key = None): print(f"Evaluating folder {vesicles_path}") print(f"Using labels stored in {labels_path}") @@ -75,7 +80,7 @@ def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_ke for vesicle_file in vesicles_files: if vesicle_file in label_files: - evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(vesicles_path, vesicle_file), model_name, segment_key, anno_key) + evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(vesicles_path, vesicle_file), model_name, segment_key, anno_key, mask_key) @@ -87,13 +92,14 @@ def main(): parser.add_argument("-n", "--model_name", required=True) parser.add_argument("-sk", "--segment_key", required=True) parser.add_argument("-ak", "--anno_key", required=True) + parser.add_argument("-m", "--mask_key") args = parser.parse_args() vesicles_path = args.vesicles_path if os.path.isdir(vesicles_path): - evaluate_folder(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key) + evaluate_folder(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key) else: - evaluate_file(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key) + evaluate_file(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key) diff --git a/scripts/cooper/vesicle_segmentation_h5.py b/scripts/cooper/vesicle_segmentation_h5.py index 9c8b1d1..1136f18 100644 --- a/scripts/cooper/vesicle_segmentation_h5.py +++ b/scripts/cooper/vesicle_segmentation_h5.py @@ -34,7 +34,7 @@ def get_volume(input_path): input_volume = f[key][:] return input_volume -def run_vesicle_segmentation(input_path, output_path, model_path, mask_path, mask_key,tile_shape, halo, include_boundary, key_label): +def run_vesicle_segmentation(input_path, output_path, model_path, mask_path, mask_key,tile_shape, halo, include_boundary, key_label, distance_threshold = None): tiling = parse_tiling(tile_shape, halo) print(f"using tiling {tiling}") input = get_volume(input_path) @@ -45,8 +45,17 @@ def run_vesicle_segmentation(input_path, output_path, model_path, mask_path, mas mask = f[mask_key][:] else: mask = None + if distance_threshold is not None: + segmentation, prediction = segment_vesicles( + input_volume=input, model_path=model_path, verbose=False, tiling=tiling, return_predictions=True, + exclude_boundary=not include_boundary, mask = mask, distance_threshold = distance_threshold + ) + else: + segmentation, prediction = segment_vesicles( + input_volume=input, model_path=model_path, verbose=False, tiling=tiling, return_predictions=True, + exclude_boundary=not include_boundary, mask = mask + ) - segmentation, prediction = segment_vesicles(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, return_predictions=True, exclude_boundary=not include_boundary, mask = mask) foreground, boundaries = prediction[:2] seg_output = _require_output_folders(output_path) @@ -84,7 +93,7 @@ def segment_folder(args): input_files = [] for root, dirs, files in os.walk(args.input_path): input_files.extend([ - os.path.join(root, name) for name in files if name.endswith(".h5") + os.path.join(root, name) for name in files if name.endswith(args.data_ext) ]) print(input_files) pbar = tqdm(input_files, desc="Run segmentation") @@ -97,7 +106,10 @@ def segment_folder(args): print(f"Mask file not found for {input_path}") mask_path = None - run_vesicle_segmentation(input_path, args.output_path, args.model_path, mask_path, args.mask_key, args.tile_shape, args.halo, args.include_boundary, args.key_label) + run_vesicle_segmentation( + input_path, args.output_path, args.model_path, mask_path, args.mask_key, + args.tile_shape, args.halo, args.include_boundary, args.key_label, args.distance_threshold + ) def main(): parser = argparse.ArgumentParser(description="Segment vesicles in EM tomograms.") @@ -134,6 +146,14 @@ def main(): "--key_label", "-k", default = "combined_vesicles", help="Give the key name for saving the segmentation in h5." ) + parser.add_argument( + "--distance_threshold", "-t", type=int, + help="Used for distance based segmentation." + ) + parser.add_argument( + "--data_ext", "-d", default = ".h5", + help="Format extension of data to be segmented, default is .h5." + ) args = parser.parse_args() input_ = args.input_path @@ -141,7 +161,7 @@ def main(): if os.path.isdir(input_): segment_folder(args) else: - run_vesicle_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.include_boundary, args.key_label) + run_vesicle_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.distance_threshold) print("Finished segmenting!") diff --git a/scripts/rizzoli/evaluation_2D.py b/scripts/rizzoli/evaluation_2D.py index 18fd4f1..9f918df 100644 --- a/scripts/rizzoli/evaluation_2D.py +++ b/scripts/rizzoli/evaluation_2D.py @@ -6,8 +6,13 @@ import numpy as np from elf.evaluation import matching +from skimage.transform import rescale - +def transpose_tomo(tomogram): + data0 = np.swapaxes(tomogram, 0, -1) + data1 = np.fliplr(data0) + transposed_data = np.swapaxes(data1, 0, -1) + return transposed_data def evaluate(labels, vesicles): assert labels.shape == vesicles.shape @@ -54,21 +59,34 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key) ds_name = os.path.basename(os.path.dirname(labels_path)) tomo = os.path.basename(labels_path) - + use_mask = True #get the labels and vesicles with h5py.File(labels_path) as label_file: labels = label_file["labels"] - vesicles = labels["vesicles"] - gt = vesicles[anno_key][:] + #vesicles = labels["vesicles"] + gt = labels[anno_key][:] + gt = rescale(gt, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(gt.dtype) + gt = transpose_tomo(gt) + + if use_mask: + mask = labels["mask"][:] + mask = rescale(mask, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(mask.dtype) + mask = transpose_tomo(mask) with h5py.File(vesicles_path) as seg_file: segmentation = seg_file["vesicles"] vesicles = segmentation[segment_key][:] + if use_mask: + gt[mask == 0] = 0 + vesicles[mask == 0] = 0 - #evaluate the match of ground truth and vesicles - scores = evaluate_slices(gt, vesicles) + #evaluate the match of ground truth and vesicles + if len(vesicles.shape) == 3: + scores = evaluate_slices(gt, vesicles) + else: + scores = evaluate(gt,vesicles) #store results result_folder ="/user/muth9/u12095/synaptic-reconstruction/scripts/cooper/evaluation_results" os.makedirs(result_folder, exist_ok=True) diff --git a/scripts/rizzoli/train_2D_domain_adaptation.py b/scripts/rizzoli/train_2D_domain_adaptation.py index 3beb487..ac2a28f 100644 --- a/scripts/rizzoli/train_2D_domain_adaptation.py +++ b/scripts/rizzoli/train_2D_domain_adaptation.py @@ -78,7 +78,7 @@ def vesicle_domain_adaptation(teacher_model, testset = True): datasets = [ "20241021_imig_2014_data_transfer_exported_grouped" -]#"maus_2020_tem2d_wt_unt_div14_exported_scaled_grouped", +] train_paths = get_paths("train", datasets=datasets, testset=testset) val_paths = get_paths("val", datasets=datasets, testset=testset) @@ -88,11 +88,13 @@ def vesicle_domain_adaptation(teacher_model, testset = True): #adjustable parameters patch_shape = [1, 256, 256] #2D - model_name = "2D-vesicle-DA-2Dcooper-imig-v1" + model_name = "2D-vesicle-DA-2Dcooper-imig-v2" model_root = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/models_v2/checkpoints/" checkpoint_path = os.path.join(model_root, teacher_model) + patch_shape = [256, 256] if any("maus" in dataset for dataset in datasets) else [1, 256, 256] + mean_teacher_adaptation( name=model_name, unsupervised_train_paths=train_paths, @@ -102,7 +104,10 @@ def vesicle_domain_adaptation(teacher_model, testset = True): save_root="/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/DA_models", source_checkpoint=checkpoint_path, confidence_threshold=0.75, - n_iterations=int(5e5), + batch_size=8, + n_iterations=int(1.5e4), + n_samples_train=8000, + n_samples_val=50, ) diff --git a/synaptic_reconstruction/inference/vesicles.py b/synaptic_reconstruction/inference/vesicles.py index 237d95a..4a56b0f 100644 --- a/synaptic_reconstruction/inference/vesicles.py +++ b/synaptic_reconstruction/inference/vesicles.py @@ -49,6 +49,7 @@ def distance_based_vesicle_segmentation( # Get the segmentation via seeded watershed of components in the boundary distances. t0 = time.time() + print(f"using a distance thresholf of {distance_threshold} for distance based segmentation") seeds = parallel.label(bd_dist > distance_threshold, block_shape=block_shape, verbose=verbose) if verbose: print("Compute connected components in", time.time() - t0, "s") @@ -129,6 +130,7 @@ def segment_vesicles( min_size: int = 500, verbose: bool = True, distance_based_segmentation: bool = True, + distance_threshold: int = 8, return_predictions: bool = False, scale: Optional[List[float]] = None, exclude_boundary: bool = False, @@ -174,7 +176,7 @@ def segment_vesicles( if distance_based_segmentation: seg = distance_based_vesicle_segmentation( - foreground, boundaries, verbose=verbose, min_size=min_size, **kwargs + foreground, boundaries, verbose=verbose, min_size=min_size, distance_threshold = distance_threshold, **kwargs ) else: seg = simple_vesicle_segmentation( From 40e965ed72c87250b5c2ecf02e8ba1bb06b1e2e9 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Thu, 7 Nov 2024 17:24:28 +0100 Subject: [PATCH 05/35] Implement code for first analysis --- scripts/cooper/analysis/run_analysis_1.py | 66 +++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 scripts/cooper/analysis/run_analysis_1.py diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py new file mode 100644 index 0000000..5518189 --- /dev/null +++ b/scripts/cooper/analysis/run_analysis_1.py @@ -0,0 +1,66 @@ +# This is the code for the first analysis for the cooper data. +# Here, we only compute the vesicle numbers and size distributions for the STEM tomograms +# in the 04 dataset. + +import os +from glob import glob + +import pandas as pd +import h5py +from tqdm import tqdm +from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres + +DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/ground_truth/04Dataset_for_vesicle_eval" # noqa +PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/04Dataset_for_vesicle_eval/model_segmentation" # noqa +RESULT_FOLDER = "./analysis_results/analysis_1" + + +# We compute the sizes for all vesicles in the compartment masks. +# We use the same logic in the size computation as for the vesicle extraction to IMOD, +# including the radius correction factor. +# The number of vesicles is automatically computed as the length of the size list. +def compute_sizes_for_all_tomorams(): + os.makedirs(RESULT_FOLDER, exist_ok=True) + + resolution = (0.8681,) * 3 + radius_factor = 1.3 + estimate_radius_2d = True + + tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True)) + for tomo in tqdm(tomograms): + ds_name, fname = os.path.split(tomo) + ds_name = os.path.split(ds_name)[1] + fname = os.path.splitext(fname)[0] + output_path = os.path.join(RESULT_FOLDER, f"{ds_name}_{fname}.csv") + if os.path.exists(output_path): + continue + + # Load the vesicle segmentation from the predictions. + with h5py.File(tomo, "r") as f: + segmentation = f["/vesicles/segment_from_combined_vesicles"][:] + + input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5") + assert os.path.exists(input_path), input_path + # Load the compartment mask from the tomogram + with h5py.File(input_path, "r") as f: + mask = f["labels/compartment"][:] + + segmentation[mask == 0] = 0 + _, sizes = convert_segmentation_to_spheres( + segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d + ) + + result = pd.DataFrame({ + "dataset": [ds_name] * len(sizes), + "tomogram": [fname] * len(sizes), + "sizes": sizes + }) + result.to_csv(output_path, index=False) + + +def main(): + compute_sizes_for_all_tomorams() + + +if __name__ == "__main__": + main() From 7be9ee8fd74d0667e61c84f0d3db104427527581 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Mon, 11 Nov 2024 20:21:47 +0100 Subject: [PATCH 06/35] 2D seg with mask --- .gitignore | 3 +- scripts/cooper/AZ_segmentation_h5.py | 2 +- scripts/rizzoli/2D_vesicle_segmentation.py | 37 ++++++++++++++++++---- scripts/rizzoli/evaluation_2D.py | 6 ++-- 4 files changed, 37 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index ca03577..4db569e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,5 @@ scripts/cooper/training/copy_testset.py scripts/rizzoli/upsample_data.py scripts/cooper/training/find_rec_testset.py scripts/rizzoli/combine_2D_slices.py -scripts/rizzoli/combine_2D_slices_raw.py \ No newline at end of file +scripts/rizzoli/combine_2D_slices_raw.py +scripts/cooper/remove_h5key.py \ No newline at end of file diff --git a/scripts/cooper/AZ_segmentation_h5.py b/scripts/cooper/AZ_segmentation_h5.py index 4deadc8..2fb7045 100644 --- a/scripts/cooper/AZ_segmentation_h5.py +++ b/scripts/cooper/AZ_segmentation_h5.py @@ -138,7 +138,7 @@ def main(): if os.path.isdir(input_): segment_folder(args) else: - run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label, args.data_ext) + run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label) print("Finished segmenting!") diff --git a/scripts/rizzoli/2D_vesicle_segmentation.py b/scripts/rizzoli/2D_vesicle_segmentation.py index ddfdab7..159be28 100644 --- a/scripts/rizzoli/2D_vesicle_segmentation.py +++ b/scripts/rizzoli/2D_vesicle_segmentation.py @@ -7,6 +7,7 @@ import torch import torch_em import numpy as np +from elf.io import open_file from synaptic_reconstruction.inference.vesicles import segment_vesicles from synaptic_reconstruction.inference.util import parse_tiling @@ -57,7 +58,7 @@ def get_volume(input_path): input_volume = seg_file["raw"][:] return input_volume -def run_vesicle_segmentation(input_path, output_path, model_path, tile_shape, halo, include_boundary, key_label, scale): +def run_vesicle_segmentation(input_path, output_path, model_path, tile_shape, halo, include_boundary, key_label, scale, mask_path, mask_key): tiling = get_2D_tiling() @@ -69,16 +70,29 @@ def run_vesicle_segmentation(input_path, output_path, model_path, tile_shape, ha tiling = parse_tiling(tile_shape, halo) input = get_volume(input_path) + #check if we have a restricting mask for the segmentation + if mask_path is not None: + with open_file(mask_path, "r") as f: + mask = f[mask_key][:] + else: + mask = None + device = "cuda" if torch.cuda.is_available() else "cpu" model = torch_em.util.load_model(checkpoint=model_path, device=device) - def process_slices(input_volume, scale): + def process_slices(input_volume, scale, mask): processed_slices = [] foreground = [] boundaries = [] for z in range(input_volume.shape[0]): slice_ = input_volume[z, :, :] - segmented_slice, prediction_slice = segment_vesicles(input_volume=slice_, model=model, verbose=False, tiling=tiling, return_predictions=True, scale = scale, exclude_boundary=not include_boundary) + #check if we have a restricting mask for the segmentation + if mask is not None: + mask_slice = mask[z, :, :] + segmented_slice, prediction_slice = segment_vesicles(input_volume=slice_, model=model, verbose=False, tiling=tiling, return_predictions=True, scale = scale, exclude_boundary=not include_boundary, mask = mask_slice) + else: + segmented_slice, prediction_slice = segment_vesicles(input_volume=slice_, model=model, verbose=False, tiling=tiling, return_predictions=True, scale = scale, exclude_boundary=not include_boundary) + processed_slices.append(segmented_slice) foreground_pred_slice, boundaries_pred_slice = prediction_slice[:2] foreground.append(foreground_pred_slice) @@ -86,10 +100,11 @@ def process_slices(input_volume, scale): return processed_slices, foreground, boundaries if input.ndim == 2: + #TODO: check if we have a restricting mask for the segmentation segmentation, prediction = segment_vesicles(input_volume=input, model=model, verbose=False, tiling=tiling, return_predictions=True, scale = scale, exclude_boundary=not include_boundary) foreground, boundaries = prediction[:2] else: - segmentation, foreground, boundaries = process_slices(input, scale) + segmentation, foreground, boundaries = process_slices(input, scale, mask) seg_output = _require_output_folders(output_path) file_name = Path(input_path).stem @@ -125,7 +140,11 @@ def segment_folder(args): print(input_files) pbar = tqdm(input_files, desc="Run segmentation") for input_path in pbar: - run_vesicle_segmentation(input_path, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.scale) + if args.mask_path is not None: + mask_path_for_file = os.path.join(args.mask_path, os.path.basename(input_path)) + else: + mask_path_for_file = None + run_vesicle_segmentation(input_path, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.scale, mask_path_for_file, args.mask_key) def main(): parser = argparse.ArgumentParser(description="Segment vesicles in EM tomograms.") @@ -160,6 +179,12 @@ def main(): "--scale", "-s", type=float, nargs=2, help="Scales the input data." ) + parser.add_argument( + "--mask_path", help="The filepath to a h5 file with a mask that will be used to restrict the segmentation. Needs to be in combination with mask_key." + ) + parser.add_argument( + "--mask_key", help="Key name that holds the mask segmentation" + ) args = parser.parse_args() input_ = args.input_path @@ -167,7 +192,7 @@ def main(): if os.path.isdir(input_): segment_folder(args) else: - run_vesicle_segmentation(input_, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.scale) + run_vesicle_segmentation(input_, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.scale, args.mask_path, args.mask_key) print("Finished segmenting!") diff --git a/scripts/rizzoli/evaluation_2D.py b/scripts/rizzoli/evaluation_2D.py index 9f918df..5b5bbbd 100644 --- a/scripts/rizzoli/evaluation_2D.py +++ b/scripts/rizzoli/evaluation_2D.py @@ -59,14 +59,14 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key) ds_name = os.path.basename(os.path.dirname(labels_path)) tomo = os.path.basename(labels_path) - use_mask = True + use_mask = False #get the labels and vesicles with h5py.File(labels_path) as label_file: labels = label_file["labels"] #vesicles = labels["vesicles"] gt = labels[anno_key][:] - gt = rescale(gt, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(gt.dtype) - gt = transpose_tomo(gt) + #gt = rescale(gt, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(gt.dtype) + #gt = transpose_tomo(gt) if use_mask: mask = labels["mask"][:] From f85e4452bd3c32132a2cd94ff139ea47334e4566 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Mon, 11 Nov 2024 22:47:38 +0100 Subject: [PATCH 07/35] spatial distribution analysis --- scripts/cooper/analysis/run_analysis_1.py | 6 +- .../run_spatial_distribution_analysis.py | 75 +++++++++++++++++++ 2 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 scripts/cooper/analysis/run_spatial_distribution_analysis.py diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py index 5518189..523aba1 100644 --- a/scripts/cooper/analysis/run_analysis_1.py +++ b/scripts/cooper/analysis/run_analysis_1.py @@ -10,8 +10,8 @@ from tqdm import tqdm from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres -DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/ground_truth/04Dataset_for_vesicle_eval" # noqa -PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/04Dataset_for_vesicle_eval/model_segmentation" # noqa +DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014" # noqa +PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/20241102_TOMO_DATA_Imig2014" # noqa RESULT_FOLDER = "./analysis_results/analysis_1" @@ -22,7 +22,7 @@ def compute_sizes_for_all_tomorams(): os.makedirs(RESULT_FOLDER, exist_ok=True) - resolution = (0.8681,) * 3 + resolution = (0.8681,) * 3 #change for each dataset radius_factor = 1.3 estimate_radius_2d = True diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py new file mode 100644 index 0000000..2002431 --- /dev/null +++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py @@ -0,0 +1,75 @@ +import os +from glob import glob +import pandas as pd +import h5py +from tqdm import tqdm +from synaptic_reconstruction.distance_measurements import measure_segmentation_to_object_distances + +DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014" # noqa +PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/20241102_TOMO_DATA_Imig2014" # noqa +RESULT_FOLDER = "./analysis_results" + + +# We compute the distances for all vesicles in the compartment masks to the AZ. +# We use the same different resolution, depending on dataset. +# The closest distance is calculated, i.e., the closest point on the outer membrane of the vesicle to the AZ. +def compute_sizes_for_all_tomorams(): + os.makedirs(RESULT_FOLDER, exist_ok=True) + + resolution = (0.8681,) * 3 # Change for each dataset + + # Dictionary to hold the results for each dataset + dataset_results = {} + + tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True)) + for tomo in tqdm(tomograms): + ds_name, fname = os.path.split(tomo) + ds_name = os.path.split(ds_name)[1] + fname = os.path.splitext(fname)[0] + + # Initialize a new dictionary entry for each dataset if not already present + if ds_name not in dataset_results: + dataset_results[ds_name] = {} + + # Skip if this tomogram already exists in the dataset dictionary + if fname in dataset_results[ds_name]: + continue + + # Load the vesicle segmentation from the predictions + with h5py.File(tomo, "r") as f: + segmentation = f["/vesicles/segment_from_combined_vesicles"][:] + segmented_object = f["/AZ/segment_from_AZmodel_v3"][:] + + input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5") + assert os.path.exists(input_path), input_path + + # Load the compartment mask from the tomogram + with h5py.File(input_path, "r") as f: + mask = f["labels/compartment"][:] + + segmentation[mask == 0] = 0 + distances, _, _, _ = measure_segmentation_to_object_distances( + segmentation, segmented_object=segmented_object, resolution=resolution + ) + + # Add distances to the dataset dictionary under the tomogram name + dataset_results[ds_name][fname] = distances + + # Save each dataset's results to a single CSV file + for ds_name, tomogram_data in dataset_results.items(): + # Create a DataFrame where each column is a tomogram's distances + result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose() + + # Define the output file path + output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}.csv") + + # Save the DataFrame to CSV + result_df.to_csv(output_path, index=False) + + +def main(): + compute_sizes_for_all_tomorams() + + +if __name__ == "__main__": + main() From 8ef16bcafd4289b1793e1275d826a386a2cf8c87 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Tue, 12 Nov 2024 13:46:23 +0100 Subject: [PATCH 08/35] intersection between compartment boundary and AZ segmentaiton --- scripts/cooper/AZ_segmentation_h5.py | 35 ++++++++++++++++--- .../run_spatial_distribution_analysis.py | 2 +- synaptic_reconstruction/inference/AZ.py | 9 ++++- .../postprocessing/postprocess_AZ.py | 25 +++++++++++++ 4 files changed, 65 insertions(+), 6 deletions(-) create mode 100644 synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py diff --git a/scripts/cooper/AZ_segmentation_h5.py b/scripts/cooper/AZ_segmentation_h5.py index 2fb7045..baa7225 100644 --- a/scripts/cooper/AZ_segmentation_h5.py +++ b/scripts/cooper/AZ_segmentation_h5.py @@ -34,7 +34,7 @@ def get_volume(input_path): input_volume = f[key][:] return input_volume -def run_AZ_segmentation(input_path, output_path, model_path, mask_path, mask_key,tile_shape, halo, key_label): +def run_AZ_segmentation(input_path, output_path, model_path, mask_path, mask_key,tile_shape, halo, key_label, compartment_seg): tiling = parse_tiling(tile_shape, halo) print(f"using tiling {tiling}") input = get_volume(input_path) @@ -46,7 +46,14 @@ def run_AZ_segmentation(input_path, output_path, model_path, mask_path, mask_key else: mask = None - foreground = segment_AZ(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, mask = mask) + #check if intersection with compartment is necessary + if compartment_seg is None: + foreground = segment_AZ(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, mask = mask) + intersection = None + else: + with open_file(compartment_seg, "r") as f: + compartment = f["/compartments/segment_from_3Dmodel_v1"][:] + foreground, intersection = segment_AZ(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, mask = mask, compartment=compartment) seg_output = _require_output_folders(output_path) file_name = Path(input_path).stem @@ -73,6 +80,13 @@ def run_AZ_segmentation(input_path, output_path, model_path, mask_path, mask_key print("mask image already saved") else: f.create_dataset(mask_key, data = mask, compression = "gzip") + + if intersection is not None: + intersection_key = "AZ/compartment_AZ_intersection" + if intersection_key in f: + print("intersection already saved") + else: + f.create_dataset(intersection_key, data = intersection, compression = "gzip") @@ -93,8 +107,15 @@ def segment_folder(args): except: print(f"Mask file not found for {input_path}") mask_path = None + + if args.compartment_seg is not None: + try: + compartment_seg = os.path.join(args.compartment_seg, os.path.splitext(filename)[0] + '.h5') + except: + print(f"compartment file not found for {input_path}") + compartment_seg = None - run_AZ_segmentation(input_path, args.output_path, args.model_path, mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label) + run_AZ_segmentation(input_path, args.output_path, args.model_path, mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label, compartment_seg) def main(): parser = argparse.ArgumentParser(description="Segment vesicles in EM tomograms.") @@ -131,6 +152,12 @@ def main(): "--data_ext", "-d", default = ".h5", help="Format extension of data to be segmented, default is .h5." ) + parser.add_argument( + "--compartment_seg", "-c", + help="Path to compartment segmentation." + "If the compartment segmentation was executed before, this will add a key to output file that stores the intersection between compartment boundary and AZ." + "Maybe need to adjust the compartment key that the segmentation is stored under" + ) args = parser.parse_args() input_ = args.input_path @@ -138,7 +165,7 @@ def main(): if os.path.isdir(input_): segment_folder(args) else: - run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label) + run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label, args.compartment_seg) print("Finished segmenting!") diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py index 2002431..f8d0708 100644 --- a/scripts/cooper/analysis/run_spatial_distribution_analysis.py +++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py @@ -16,7 +16,7 @@ def compute_sizes_for_all_tomorams(): os.makedirs(RESULT_FOLDER, exist_ok=True) - resolution = (0.8681,) * 3 # Change for each dataset + resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset # Dictionary to hold the results for each dataset dataset_results = {} diff --git a/synaptic_reconstruction/inference/AZ.py b/synaptic_reconstruction/inference/AZ.py index b93218f..a1c9da8 100644 --- a/synaptic_reconstruction/inference/AZ.py +++ b/synaptic_reconstruction/inference/AZ.py @@ -6,7 +6,7 @@ import torch from synaptic_reconstruction.inference.util import get_prediction, _Scaler - +from synaptic_reconstruction.inference.postprocessing.postprocess_AZ import find_intersection_boundary def _run_segmentation( foreground, verbose, min_size, @@ -40,6 +40,7 @@ def segment_AZ( return_predictions: bool = False, scale: Optional[List[float]] = None, mask: Optional[np.ndarray] = None, + compartment: Optional[np.ndarray] = None, ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: """ Segment mitochondria in an input volume. @@ -75,8 +76,14 @@ def segment_AZ( segmentation = _run_segmentation(foreground, verbose=verbose, min_size=min_size) + #returning prediciton and intersection not possible atm, but currently do not need prediction anyways if return_predictions: pred = scaler.rescale_output(pred, is_segmentation=False) return segmentation, pred + + if compartment is not None: + intersection = find_intersection_boundary(segmentation, compartment) + return segmentation, intersection + return segmentation diff --git a/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py b/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py new file mode 100644 index 0000000..54d9e2c --- /dev/null +++ b/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py @@ -0,0 +1,25 @@ +import numpy as np +from scipy.ndimage import binary_erosion + +def find_intersection_boundary(segmented_AZ, segmented_compartment): + """ + Find the intersection of the boundary of segmented_compartment with segmented_AZ. + + Parameters: + segmented_AZ (numpy.ndarray): 3D array representing the active zone (AZ). + segmented_compartment (numpy.ndarray): 3D array representing the compartment. + + Returns: + numpy.ndarray: 3D array with the intersection of the boundary of segmented_compartment and segmented_AZ. + """ + # Step 0: Binarize the segmented_compartment + binarized_compartment = (segmented_compartment > 0).astype(int) + + # Step 1: Create a binary mask of the compartment boundary + eroded_compartment = binary_erosion(binarized_compartment) + boundary_compartment = binarized_compartment - eroded_compartment + + # Step 2: Find the intersection with the AZ + intersection = np.logical_and(boundary_compartment, segmented_AZ) + + return intersection.astype(int) # Convert boolean array to int (1 for intersecting points, 0 elsewhere) From 09f6c846cfc391096ee6c86fca49d4d5f56d8799 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Tue, 12 Nov 2024 17:20:26 +0100 Subject: [PATCH 09/35] Update compartment postprocessing --- .../inference/compartments.py | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/synaptic_reconstruction/inference/compartments.py b/synaptic_reconstruction/inference/compartments.py index a822d9f..701c222 100644 --- a/synaptic_reconstruction/inference/compartments.py +++ b/synaptic_reconstruction/inference/compartments.py @@ -77,6 +77,12 @@ def _segment_compartments_2d( mask = np.logical_or(binary_closing(mask, iterations=4), mask) segmentation[bb][mask] = prop.label + # import napari + # v = napari.Viewer() + # v.add_image(boundaries) + # v.add_labels(segmentation) + # napari.run() + return segmentation @@ -117,6 +123,7 @@ def _segment_compartments_3d( boundary_threshold=0.4, n_slices_exclude=0, min_z_extent=10, + postprocess_segments=False, ): distances = distance_transform_edt(prediction < boundary_threshold).astype("float32") seg_2d = np.zeros(prediction.shape, dtype="uint32") @@ -132,7 +139,8 @@ def _segment_compartments_3d( seg_2d[z] = seg_z seg = _merge_segmentation_3d(seg_2d, min_z_extent) - seg = _postprocess_seg_3d(seg) + if postprocess_segments: + seg = _postprocess_seg_3d(seg) # import napari # v = napari.Viewer() @@ -155,6 +163,9 @@ def segment_compartments( scale: Optional[List[float]] = None, mask: Optional[np.ndarray] = None, n_slices_exclude: int = 0, + boundary_threshold: float = 0.4, + min_z_extent: int = 10, + postprocess_segments: bool = False, **kwargs, ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: """ @@ -194,9 +205,14 @@ def segment_compartments( # We may want to expose some of the parameters here. t0 = time.time() if input_volume.ndim == 2: - seg = _segment_compartments_2d(pred) + seg = _segment_compartments_2d(pred, boundary_threshold=boundary_threshold) else: - seg = _segment_compartments_3d(pred, n_slices_exclude=n_slices_exclude) + seg = _segment_compartments_3d( + pred, + boundary_threshold=boundary_threshold, + n_slices_exclude=n_slices_exclude, + postprocess_segments=postprocess_segments, + ) if verbose: print("Run segmentation in", time.time() - t0, "s") From f893d2300fab14a737b3d85adecb72af04644e22 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Wed, 13 Nov 2024 12:05:09 +0100 Subject: [PATCH 10/35] updating data analysis on smaller details --- .gitignore | 1 + scripts/cooper/AZ_segmentation_h5.py | 2 +- scripts/cooper/analysis/run_analysis_1.py | 106 +++++++++++++++++- .../run_spatial_distribution_analysis.py | 64 +++++++++-- scripts/cooper/compartment_segmentation_h5.py | 2 +- .../postprocessing/postprocess_AZ.py | 36 +++--- 6 files changed, 185 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 4db569e..0377c4a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ models/*/ run_sbatch.sbatch slurm/ scripts/cooper/evaluation_results/ +analysis_results/ scripts/cooper/training/copy_testset.py scripts/rizzoli/upsample_data.py scripts/cooper/training/find_rec_testset.py diff --git a/scripts/cooper/AZ_segmentation_h5.py b/scripts/cooper/AZ_segmentation_h5.py index baa7225..da694c1 100644 --- a/scripts/cooper/AZ_segmentation_h5.py +++ b/scripts/cooper/AZ_segmentation_h5.py @@ -52,7 +52,7 @@ def run_AZ_segmentation(input_path, output_path, model_path, mask_path, mask_key intersection = None else: with open_file(compartment_seg, "r") as f: - compartment = f["/compartments/segment_from_3Dmodel_v1"][:] + compartment = f["/labels/compartment"][:] foreground, intersection = segment_AZ(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, mask = mask, compartment=compartment) seg_output = _require_output_folders(output_path) diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py index 523aba1..b166a71 100644 --- a/scripts/cooper/analysis/run_analysis_1.py +++ b/scripts/cooper/analysis/run_analysis_1.py @@ -5,15 +5,56 @@ import os from glob import glob +import numpy as np import pandas as pd import h5py from tqdm import tqdm from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres -DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014" # noqa -PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/20241102_TOMO_DATA_Imig2014" # noqa -RESULT_FOLDER = "./analysis_results/analysis_1" +DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa +PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa +RESULT_FOLDER = "./analysis_results/AZ_intersect_autoCompartment" +def get_compartment_with_max_overlap(compartments, vesicles): + """ + Given 3D numpy arrays of compartments and vesicles, this function returns a binary mask + of the compartment with the most overlap with vesicles based on the number of overlapping voxels. + + Parameters: + compartments (numpy.ndarray): 3D array of compartment labels. + vesicles (numpy.ndarray): 3D array of vesicle labels or binary mask. + + Returns: + numpy.ndarray: Binary mask of the compartment with the most overlap with vesicles. + """ + + unique_compartments = np.unique(compartments) + if 0 in unique_compartments: + unique_compartments = unique_compartments[unique_compartments != 0] + + max_overlap_count = 0 + best_compartment = None + + # Iterate over each compartment and calculate the overlap with vesicles + for compartment_label in unique_compartments: + # Create a binary mask for the current compartment + compartment_mask = compartments == compartment_label + vesicle_mask = vesicles > 0 + + intersection = np.logical_and(compartment_mask, vesicle_mask) + + # Calculate the number of overlapping voxels + overlap_count = np.sum(intersection) + + # Track the compartment with the most overlap in terms of voxel count + if overlap_count > max_overlap_count: + max_overlap_count = overlap_count + best_compartment = compartment_label + + # Create the final mask for the compartment with the most overlap + final_mask = compartments == best_compartment + + return final_mask # We compute the sizes for all vesicles in the compartment masks. # We use the same logic in the size computation as for the vesicle extraction to IMOD, @@ -57,9 +98,66 @@ def compute_sizes_for_all_tomorams(): }) result.to_csv(output_path, index=False) +def compute_sizes_for_all_tomorams_manComp(): + os.makedirs(RESULT_FOLDER, exist_ok=True) + + resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset + radius_factor = 1.3 + estimate_radius_2d = True + + # Dictionary to hold the results for each dataset + dataset_results = {} + + tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True)) + for tomo in tqdm(tomograms): + ds_name, fname = os.path.split(tomo) + ds_name = os.path.split(ds_name)[1] + fname = os.path.splitext(fname)[0] + # Initialize a new dictionary entry for each dataset if not already present + if ds_name not in dataset_results: + dataset_results[ds_name] = {} + + # Skip if this tomogram already exists in the dataset dictionary + if fname in dataset_results[ds_name]: + continue + + # Load the vesicle segmentation from the predictions. + with h5py.File(tomo, "r") as f: + segmentation = f["/vesicles/segment_from_combined_vesicles"][:] + + input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5") + assert os.path.exists(input_path), input_path + # Load the compartment mask from the tomogram + with h5py.File(input_path, "r") as f: + compartments = f["/compartments/segment_from_3Dmodel_v2"][:] + mask = get_compartment_with_max_overlap(compartments, segmentation) + + #if more than half of the vesicles (approximation, its checking pixel and not label) would get filtered by mask it means the compartment seg didn't work and thus we won't use the mask + if np.sum(segmentation[mask == 0] > 0) > (0.5 * np.sum(segmentation > 0)): + print("using no mask") + else: + segmentation[mask == 0] = 0 + _, sizes = convert_segmentation_to_spheres( + segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d + ) + + # Add sizes to the dataset dictionary under the tomogram name + dataset_results[ds_name][fname] = sizes + + # Save each dataset's results to a single CSV file + for ds_name, tomogram_data in dataset_results.items(): + # Create a DataFrame where each column is a tomogram's sizes + result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose() + + # Define the output file path + output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}.csv") + + # Save the DataFrame to CSV + result_df.to_csv(output_path, index=False) def main(): - compute_sizes_for_all_tomorams() + #compute_sizes_for_all_tomorams() + compute_sizes_for_all_tomorams_manComp() if __name__ == "__main__": diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py index f8d0708..fca7eed 100644 --- a/scripts/cooper/analysis/run_spatial_distribution_analysis.py +++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py @@ -4,12 +4,54 @@ import h5py from tqdm import tqdm from synaptic_reconstruction.distance_measurements import measure_segmentation_to_object_distances +import numpy as np -DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014" # noqa -PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/20241102_TOMO_DATA_Imig2014" # noqa -RESULT_FOLDER = "./analysis_results" +DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa +PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa +RESULT_FOLDER = "./analysis_results/AZ_intersect_autoCompartment" +def get_compartment_with_max_overlap(compartments, vesicles): + """ + Given 3D numpy arrays of compartments and vesicles, this function returns a binary mask + of the compartment with the most overlap with vesicles based on the number of overlapping voxels. + + Parameters: + compartments (numpy.ndarray): 3D array of compartment labels. + vesicles (numpy.ndarray): 3D array of vesicle labels or binary mask. + + Returns: + numpy.ndarray: Binary mask of the compartment with the most overlap with vesicles. + """ + + unique_compartments = np.unique(compartments) + if 0 in unique_compartments: + unique_compartments = unique_compartments[unique_compartments != 0] + + max_overlap_count = 0 + best_compartment = None + + # Iterate over each compartment and calculate the overlap with vesicles + for compartment_label in unique_compartments: + # Create a binary mask for the current compartment + compartment_mask = compartments == compartment_label + vesicle_mask = vesicles > 0 + + intersection = np.logical_and(compartment_mask, vesicle_mask) + + # Calculate the number of overlapping voxels + overlap_count = np.sum(intersection) + + # Track the compartment with the most overlap in terms of voxel count + if overlap_count > max_overlap_count: + max_overlap_count = overlap_count + best_compartment = compartment_label + + # Create the final mask for the compartment with the most overlap + final_mask = compartments == best_compartment + + return final_mask + # We compute the distances for all vesicles in the compartment masks to the AZ. # We use the same different resolution, depending on dataset. # The closest distance is calculated, i.e., the closest point on the outer membrane of the vesicle to the AZ. @@ -38,16 +80,24 @@ def compute_sizes_for_all_tomorams(): # Load the vesicle segmentation from the predictions with h5py.File(tomo, "r") as f: segmentation = f["/vesicles/segment_from_combined_vesicles"][:] - segmented_object = f["/AZ/segment_from_AZmodel_v3"][:] + segmented_object = f["/AZ/compartment_AZ_intersection"][:] + #if AZ intersect is small, compartment seg didn't align with AZ so we use the normal AZ and not intersect + if (segmented_object == 0).all() or np.sum(segmented_object == 1) < 2000: + segmented_object = f["/AZ/segment_from_AZmodel_v3"][:] input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5") assert os.path.exists(input_path), input_path # Load the compartment mask from the tomogram with h5py.File(input_path, "r") as f: - mask = f["labels/compartment"][:] - - segmentation[mask == 0] = 0 + compartments = f["/compartments/segment_from_3Dmodel_v2"][:] + mask = get_compartment_with_max_overlap(compartments, segmentation) + + #if more than half of the vesicles (approximation, its checking pixel and not label) would get filtered by mask it means the compartment seg didn't work and thus we won't use the mask + if np.sum(segmentation[mask == 0] > 0) > (0.5 * np.sum(segmentation > 0)): + print("using no mask") + else: + segmentation[mask == 0] = 0 distances, _, _, _ = measure_segmentation_to_object_distances( segmentation, segmented_object=segmented_object, resolution=resolution ) diff --git a/scripts/cooper/compartment_segmentation_h5.py b/scripts/cooper/compartment_segmentation_h5.py index 1d0020a..573ac48 100644 --- a/scripts/cooper/compartment_segmentation_h5.py +++ b/scripts/cooper/compartment_segmentation_h5.py @@ -36,7 +36,7 @@ def run_compartment_segmentation(input_path, output_path, model_path, tile_shape print(f"using tiling {tiling}") input = get_volume(input_path) - segmentation, prediction = segment_compartments(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, return_predictions=True, scale=[0.25, 0.25, 0.25]) + segmentation, prediction = segment_compartments(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, return_predictions=True, scale=[0.25, 0.25, 0.25],boundary_threshold=0.2, postprocess_segments=False) seg_output = _require_output_folders(output_path) file_name = Path(input_path).stem diff --git a/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py b/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py index 54d9e2c..8ef2cd3 100644 --- a/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py +++ b/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py @@ -1,25 +1,35 @@ import numpy as np -from scipy.ndimage import binary_erosion +from skimage.segmentation import find_boundaries def find_intersection_boundary(segmented_AZ, segmented_compartment): """ - Find the intersection of the boundary of segmented_compartment with segmented_AZ. + Find the cumulative intersection of the boundary of each label in segmented_compartment with segmented_AZ. Parameters: segmented_AZ (numpy.ndarray): 3D array representing the active zone (AZ). - segmented_compartment (numpy.ndarray): 3D array representing the compartment. + segmented_compartment (numpy.ndarray): 3D array representing the compartment, with multiple labels. Returns: - numpy.ndarray: 3D array with the intersection of the boundary of segmented_compartment and segmented_AZ. + numpy.ndarray: 3D array with the cumulative intersection of all boundaries of segmented_compartment labels with segmented_AZ. """ - # Step 0: Binarize the segmented_compartment - binarized_compartment = (segmented_compartment > 0).astype(int) + # Step 0: Initialize an empty array to accumulate intersections + cumulative_intersection = np.zeros_like(segmented_AZ, dtype=bool) - # Step 1: Create a binary mask of the compartment boundary - eroded_compartment = binary_erosion(binarized_compartment) - boundary_compartment = binarized_compartment - eroded_compartment - - # Step 2: Find the intersection with the AZ - intersection = np.logical_and(boundary_compartment, segmented_AZ) + # Step 1: Loop through each unique label in segmented_compartment (excluding 0 if it represents background) + labels = np.unique(segmented_compartment) + labels = labels[labels != 0] # Exclude background label (0) if necessary + + for label in labels: + # Step 2: Create a binary mask for the current label + label_mask = (segmented_compartment == label) + + # Step 3: Find the boundary of the current label's compartment + boundary_compartment = find_boundaries(label_mask, mode='outer') + + # Step 4: Find the intersection with the AZ for this label's boundary + intersection = np.logical_and(boundary_compartment, segmented_AZ) + + # Step 5: Accumulate intersections for each label + cumulative_intersection = np.logical_or(cumulative_intersection, intersection) - return intersection.astype(int) # Convert boolean array to int (1 for intersecting points, 0 elsewhere) + return cumulative_intersection.astype(int) # Convert boolean array to int (1 for intersecting points, 0 elsewhere) From 08c56b9c08bb244869ad77d689efcc5ca294dfa8 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Wed, 13 Nov 2024 13:03:23 +0100 Subject: [PATCH 11/35] minor updates data analysis --- scripts/cooper/analysis/run_analysis_1.py | 44 ++++++++----- .../run_spatial_distribution_analysis.py | 62 +++++++++++++++++-- 2 files changed, 87 insertions(+), 19 deletions(-) diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py index b166a71..459d490 100644 --- a/scripts/cooper/analysis/run_analysis_1.py +++ b/scripts/cooper/analysis/run_analysis_1.py @@ -11,9 +11,9 @@ from tqdm import tqdm from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres -DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa -PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa -RESULT_FOLDER = "./analysis_results/AZ_intersect_autoCompartment" +DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/" # noqa +PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp" # noqa +RESULT_FOLDER = "./analysis_results/AZ_intersect_manualCompartment" def get_compartment_with_max_overlap(compartments, vesicles): """ @@ -60,20 +60,27 @@ def get_compartment_with_max_overlap(compartments, vesicles): # We use the same logic in the size computation as for the vesicle extraction to IMOD, # including the radius correction factor. # The number of vesicles is automatically computed as the length of the size list. -def compute_sizes_for_all_tomorams(): +def compute_sizes_for_all_tomorams_manComp(): os.makedirs(RESULT_FOLDER, exist_ok=True) - resolution = (0.8681,) * 3 #change for each dataset + resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset radius_factor = 1.3 estimate_radius_2d = True + # Dictionary to hold the results for each dataset + dataset_results = {} + tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True)) for tomo in tqdm(tomograms): ds_name, fname = os.path.split(tomo) ds_name = os.path.split(ds_name)[1] fname = os.path.splitext(fname)[0] - output_path = os.path.join(RESULT_FOLDER, f"{ds_name}_{fname}.csv") - if os.path.exists(output_path): + # Initialize a new dictionary entry for each dataset if not already present + if ds_name not in dataset_results: + dataset_results[ds_name] = {} + + # Skip if this tomogram already exists in the dataset dictionary + if fname in dataset_results[ds_name]: continue # Load the vesicle segmentation from the predictions. @@ -91,14 +98,21 @@ def compute_sizes_for_all_tomorams(): segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d ) - result = pd.DataFrame({ - "dataset": [ds_name] * len(sizes), - "tomogram": [fname] * len(sizes), - "sizes": sizes - }) - result.to_csv(output_path, index=False) + # Add sizes to the dataset dictionary under the tomogram name + dataset_results[ds_name][fname] = sizes -def compute_sizes_for_all_tomorams_manComp(): + # Save each dataset's results to a single CSV file + for ds_name, tomogram_data in dataset_results.items(): + # Create a DataFrame where each column is a tomogram's sizes + result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose() + + # Define the output file path + output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}.csv") + + # Save the DataFrame to CSV + result_df.to_csv(output_path, index=False) + +def compute_sizes_for_all_tomorams_autoComp(): os.makedirs(RESULT_FOLDER, exist_ok=True) resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset @@ -156,8 +170,8 @@ def compute_sizes_for_all_tomorams_manComp(): result_df.to_csv(output_path, index=False) def main(): - #compute_sizes_for_all_tomorams() compute_sizes_for_all_tomorams_manComp() + #compute_sizes_for_all_tomorams_autoComp() if __name__ == "__main__": diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py index fca7eed..9a890a1 100644 --- a/scripts/cooper/analysis/run_spatial_distribution_analysis.py +++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py @@ -6,9 +6,9 @@ from synaptic_reconstruction.distance_measurements import measure_segmentation_to_object_distances import numpy as np -DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa -PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa -RESULT_FOLDER = "./analysis_results/AZ_intersect_autoCompartment" +DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/" # noqa +PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp" # noqa +RESULT_FOLDER = "./analysis_results/AZ_intersect_manualCompartment" def get_compartment_with_max_overlap(compartments, vesicles): @@ -116,9 +116,63 @@ def compute_sizes_for_all_tomorams(): # Save the DataFrame to CSV result_df.to_csv(output_path, index=False) + +def compute_sizes_for_all_tomorams_manComp(): + os.makedirs(RESULT_FOLDER, exist_ok=True) + + resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset + + # Dictionary to hold the results for each dataset + dataset_results = {} + + tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True)) + for tomo in tqdm(tomograms): + ds_name, fname = os.path.split(tomo) + ds_name = os.path.split(ds_name)[1] + fname = os.path.splitext(fname)[0] + # Initialize a new dictionary entry for each dataset if not already present + if ds_name not in dataset_results: + dataset_results[ds_name] = {} + + # Skip if this tomogram already exists in the dataset dictionary + if fname in dataset_results[ds_name]: + continue + + # Load the vesicle segmentation from the predictions + with h5py.File(tomo, "r") as f: + segmentation = f["/vesicles/segment_from_combined_vesicles"][:] + segmented_object = f["/AZ/compartment_AZ_intersection_manComp"][:] + + input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5") + assert os.path.exists(input_path), input_path + + # Load the compartment mask from the tomogram + with h5py.File(input_path, "r") as f: + mask = f["/labels/compartment"][:] + + segmentation[mask == 0] = 0 + distances, _, _, _ = measure_segmentation_to_object_distances( + segmentation, segmented_object=segmented_object, resolution=resolution + ) + + # Add distances to the dataset dictionary under the tomogram name + dataset_results[ds_name][fname] = distances + + # Save each dataset's results to a single CSV file + for ds_name, tomogram_data in dataset_results.items(): + # Create a DataFrame where each column is a tomogram's distances + result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose() + + # Define the output file path + output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}.csv") + + # Save the DataFrame to CSV + result_df.to_csv(output_path, index=False) + def main(): - compute_sizes_for_all_tomorams() + #compute_sizes_for_all_tomorams() + compute_sizes_for_all_tomorams_manComp() if __name__ == "__main__": From 36d834fd4a92571819be466ddccf405e311fffc2 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Thu, 14 Nov 2024 15:25:32 +0100 Subject: [PATCH 12/35] Implement inner ear analysis WIP --- .../inner_ear/analysis/analyze_distances.py | 0 .../analysis/analyze_vesicle_pools.py | 73 +++++++++++++++++++ scripts/inner_ear/processing/run_analyis.py | 41 +++++++---- 3 files changed, 100 insertions(+), 14 deletions(-) create mode 100644 scripts/inner_ear/analysis/analyze_distances.py create mode 100644 scripts/inner_ear/analysis/analyze_vesicle_pools.py diff --git a/scripts/inner_ear/analysis/analyze_distances.py b/scripts/inner_ear/analysis/analyze_distances.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/inner_ear/analysis/analyze_vesicle_pools.py b/scripts/inner_ear/analysis/analyze_vesicle_pools.py new file mode 100644 index 0000000..b53a9f6 --- /dev/null +++ b/scripts/inner_ear/analysis/analyze_vesicle_pools.py @@ -0,0 +1,73 @@ +import sys + +import numpy as np +import pandas as pd + +sys.path.append("..") +sys.path.append("../processing") + +from combine_measurements import combine_manual_results, combine_automatic_results # noqa +# from compare_pool_assignments import create_manual_assignment +from parse_table import parse_table, get_data_root # noqa + + +def get_manual_assignments(): + result_path = "../results/20240917_1/fully_manual_analysis_results.xlsx" + results = pd.read_excel(result_path) + return results + + +def get_automatic_assignments(tomograms): + result_path = "../results/20240917_1/automatic_analysis_results.xlsx" + results = pd.read_excel(result_path) + results = results[results["tomogram"].isin(tomograms)] + return results + + +def plot_confusion_matrix(manual_assignments, automatic_assignments): + pass + + +def for_tomos_with_annotation(): + manual_assignments = get_manual_assignments() + manual_tomograms = pd.unique(manual_assignments["tomogram"]) + automatic_assignments = get_automatic_assignments(manual_tomograms) + + tomograms = pd.unique(automatic_assignments["tomogram"]) + manual_assignments = manual_assignments[manual_assignments["tomogram"].isin(tomograms)] + assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(automatic_assignments["tomogram"])) + + n_tomograms = len(tomograms) + pool_names, manual_pool_counts = np.unique(manual_assignments["pool"].values, return_counts=True) + _, automatic_pool_counts = np.unique(automatic_assignments["pool"].values, return_counts=True) + + manual_pool_counts = manual_pool_counts.astype("float32") + manual_pool_counts /= n_tomograms + automatic_pool_counts = automatic_pool_counts.astype("float32") + automatic_pool_counts /= n_tomograms + + print(pool_names) + print(manual_pool_counts) + print(automatic_pool_counts) + + # TODO plot as a bar chart + # TODO save excel + # TODO add 'more automatic' results + + breakpoint() + + +# TODO +def for_all_tomos(): + pass + + +def main(): + # data_root = get_data_root() + # table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx") + # table = parse_table(table_path, data_root) + for_tomos_with_annotation() + + +if __name__ == "__main__": + main() diff --git a/scripts/inner_ear/processing/run_analyis.py b/scripts/inner_ear/processing/run_analyis.py index baeade1..8508673 100644 --- a/scripts/inner_ear/processing/run_analyis.py +++ b/scripts/inner_ear/processing/run_analyis.py @@ -334,8 +334,7 @@ def _insert_missing_vesicles(vesicle_path, original_vesicle_path, pool_correctio imageio.imwrite(vesicle_path, vesicles) -# TODO adapt to segmentation without PD -def analyze_folder(folder, version, n_ribbons, force): +def analyze_folder(folder, version, n_ribbons, force, use_corrected_vesicles): data_path = get_data_path(folder) output_folder = os.path.join(folder, "automatisch", f"v{version}") @@ -352,12 +351,20 @@ def analyze_folder(folder, version, n_ribbons, force): correction_folder = _match_correction_folder(folder) if os.path.exists(correction_folder): output_folder = correction_folder - result_path = os.path.join(output_folder, "measurements.xlsx") + + if use_corrected_vesicles: + result_path = os.path.join(output_folder, "measurements.xlsx") + else: + result_path = os.path.join(output_folder, "measurements_uncorrected_assignments.xlsx") + if os.path.exists(result_path) and not force: return print("Analyse the corrected segmentations from", correction_folder) for seg_name in segmentation_names: + if seg_name == "vesicles" and not use_corrected_vesicles: + continue + seg_path = _match_correction_file(correction_folder, seg_name) if os.path.exists(seg_path): @@ -371,7 +378,10 @@ def analyze_folder(folder, version, n_ribbons, force): segmentation_paths[seg_name] = seg_path - result_path = os.path.join(output_folder, "measurements.xlsx") + if use_corrected_vesicles: + result_path = os.path.join(output_folder, "measurements.xlsx") + else: + result_path = os.path.join(output_folder, "measurements_uncorrected_assignments.xlsx") if os.path.exists(result_path) and not force: return @@ -398,7 +408,7 @@ def analyze_folder(folder, version, n_ribbons, force): ) -def run_analysis(table, version, force=False, val_table=None): +def run_analysis(table, version, force=False, val_table=None, use_corrected_vesicles=True): for i, row in tqdm(table.iterrows(), total=len(table)): folder = row["Local Path"] if folder == "": @@ -426,19 +436,19 @@ def run_analysis(table, version, force=False, val_table=None): micro = row["EM alt vs. Neu"] if micro == "beides": - analyze_folder(folder, version, n_ribbons, force=force) + analyze_folder(folder, version, n_ribbons, force=force, use_corrected_vesicles=use_corrected_vesicles) folder_new = os.path.join(folder, "Tomo neues EM") if not os.path.exists(folder_new): folder_new = os.path.join(folder, "neues EM") assert os.path.exists(folder_new), folder_new - analyze_folder(folder_new, version, n_ribbons, force=force) + analyze_folder(folder_new, version, n_ribbons, force=force, use_corrected_vesicles=use_corrected_vesicles) elif micro == "alt": - analyze_folder(folder, version, n_ribbons, force=force) + analyze_folder(folder, version, n_ribbons, force=force, use_corrected_vesicles=use_corrected_vesicles) elif micro == "neu": - analyze_folder(folder, version, n_ribbons, force=force) + analyze_folder(folder, version, n_ribbons, force=force, use_corrected_vesicles=use_corrected_vesicles) def main(): @@ -447,13 +457,16 @@ def main(): table = parse_table(table_path, data_root) version = 2 - force = True + force = False + use_corrected_vesicles = False - val_table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx") - val_table = pandas.read_excel(val_table_path) - # val_table = None + # val_table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx") + # val_table = pandas.read_excel(val_table_path) + val_table = None - run_analysis(table, version, force=force, val_table=val_table) + run_analysis( + table, version, force=force, val_table=val_table, use_corrected_vesicles=use_corrected_vesicles + ) if __name__ == "__main__": From 49d1b7ce8834dd9e81c594f7a3e96f91f6b2208e Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Thu, 14 Nov 2024 16:52:48 +0100 Subject: [PATCH 13/35] calculation of AZ area --- scripts/cooper/analysis/calc_AZ_area.py | 227 ++++++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 scripts/cooper/analysis/calc_AZ_area.py diff --git a/scripts/cooper/analysis/calc_AZ_area.py b/scripts/cooper/analysis/calc_AZ_area.py new file mode 100644 index 0000000..e9fcb52 --- /dev/null +++ b/scripts/cooper/analysis/calc_AZ_area.py @@ -0,0 +1,227 @@ +import h5py +import numpy as np +import os +import csv +from scipy.ndimage import binary_opening, median_filter,zoom, binary_closing +from skimage.measure import label, regionprops +from synaptic_reconstruction.morphology import compute_object_morphology +from skimage.morphology import ball +from scipy.spatial import ConvexHull +from skimage.draw import polygon + +def calculate_AZ_area_per_slice(AZ_slice, pixel_size_nm=1.554): + """ + Calculate the area of the AZ in a single 2D slice after applying error-reducing processing. + + Parameters: + - AZ_slice (numpy array): 2D array representing a single slice of the AZ segmentation. + - pixel_size_nm (float): Size of a pixel in nanometers. + + Returns: + - slice_area_nm2 (float): The area of the AZ in the slice in square nanometers. + """ + # Apply binary opening or median filter to reduce small segmentation errors + AZ_slice_filtered = binary_opening(AZ_slice, structure=np.ones((3, 3))).astype(int) + + # Calculate area in this slice + num_AZ_pixels = np.sum(AZ_slice_filtered == 1) + slice_area_nm2 = num_AZ_pixels * (pixel_size_nm ** 2) + + return slice_area_nm2 + +def calculate_total_AZ_area(tomo_path, pixel_size_nm=1.554): + """ + Calculate the total area of the AZ across all slices in a 3D tomogram file. + + Parameters: + - tomo_path (str): Path to the tomogram file (HDF5 format). + - pixel_size_nm (float): Size of a pixel in nanometers. + + Returns: + - total_AZ_area_nm2 (float): The total area of the AZ in square nanometers. + """ + with h5py.File(tomo_path, "r") as f: + AZ_intersect_seg = f["/AZ/compartment_AZ_intersection_manComp"][:] + + # Calculate the AZ area for each slice along the z-axis + total_AZ_area_nm2 = 0 + for z_slice in AZ_intersect_seg: + slice_area_nm2 = calculate_AZ_area_per_slice(z_slice, pixel_size_nm) + total_AZ_area_nm2 += slice_area_nm2 + + return total_AZ_area_nm2 + +def calculate_AZ_area_simple(tomo_path, pixel_size_nm=1.554): + """ + Calculate the volume of the AZ (active zone) in a 3D tomogram file. + + Parameters: + - tomo_path (str): Path to the tomogram file (HDF5 format). + - pixel_size_nm (float): Size of a pixel in nanometers (default is 1.554 nm). + + Returns: + - AZ_volume_nm3 (float): The volume of the AZ in cubic nanometers. + """ + # Open the file and read the AZ intersection segmentation data + with h5py.File(tomo_path, "r") as f: + AZ_intersect_seg = f["/AZ/compartment_AZ_intersection_manComp"][:] + + # Count voxels with label = 1 + num_AZ_voxels = np.sum(AZ_intersect_seg == 1) + + # Calculate the volume in cubic nanometers + AZ_area_nm2 = num_AZ_voxels * (pixel_size_nm ** 2) + + return AZ_area_nm2 + +def calculate_AZ_surface(tomo_path, pixel_size_nm=1.554): + with h5py.File(tomo_path, "r") as f: + AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:] + + # Apply binary closing to smooth the segmented regions + struct_elem = ball(1) # Use a small 3D structuring element + AZ_seg_smoothed = binary_closing(AZ_seg > 0, structure=struct_elem, iterations=20) + + labeled_seg = label(AZ_seg_smoothed) + + regions = regionprops(labeled_seg) + if regions: + # Sort regions by area and get the label of the largest region + largest_region = max(regions, key=lambda r: r.area) + largest_label = largest_region.label + + largest_component_mask = (labeled_seg == largest_label) + AZ_seg_filtered = largest_component_mask.astype(np.uint8) + + else: + # If no regions found, return an empty array + AZ_seg_filtered = np.zeros_like(AZ_seg_interp, dtype=np.uint8) + + morphology_data = compute_object_morphology(AZ_seg_filtered, "AZ Structure", resolution=(pixel_size_nm, pixel_size_nm, pixel_size_nm)) + surface_column = "surface [nm^2]" #if resolution is not None else "surface [pixel^2]" + surface_area = morphology_data[surface_column].iloc[0] + + return surface_area + +def calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm=1.554): + with h5py.File(tomo_path, "r") as f: + AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:] + + # Apply binary closing to smooth the segmented regions + struct_elem = ball(1) # Use a small 3D structuring element + AZ_seg_smoothed = binary_closing(AZ_seg > 0, structure=struct_elem, iterations=10) + + labeled_seg = label(AZ_seg_smoothed) + + regions = regionprops(labeled_seg) + if regions: + # Sort regions by area and get the label of the largest region + largest_region = max(regions, key=lambda r: r.area) + largest_label = largest_region.label + + largest_component_mask = (labeled_seg == largest_label) + AZ_seg_filtered = largest_component_mask.astype(np.uint8) + AZ_seg = AZ_seg_filtered + # Extract coordinates of non-zero points + points = np.argwhere(AZ_seg > 0) # Get the coordinates of non-zero (foreground) pixels + + if points.shape[0] < 4: + # ConvexHull requires at least 4 points in 3D to form a valid hull + AZ_seg_filtered = np.zeros_like(AZ_seg, dtype=np.uint8) + else: + # Apply ConvexHull to the points + hull = ConvexHull(points) + + # Create a binary mask for the convex hull + convex_hull_mask = np.zeros_like(AZ_seg, dtype=bool) + + # Iterate over each simplex (facet) of the convex hull and fill in the polygon + for simplex in hull.simplices: + # For each face of the convex hull, extract the vertices and convert to a 2D polygon + polygon_coords = points[simplex] + rr, cc = polygon(polygon_coords[:, 0], polygon_coords[:, 1]) + convex_hull_mask[rr, cc] = True + + # Optional: Label the convex hull mask + labeled_seg = label(convex_hull_mask) + regions = regionprops(labeled_seg) + + if regions: + # Sort regions by area and get the label of the largest region + largest_region = max(regions, key=lambda r: r.area) + largest_label = largest_region.label + + largest_component_mask = (labeled_seg == largest_label) + AZ_seg_filtered = largest_component_mask.astype(np.uint8) + + else: + AZ_seg_filtered = np.zeros_like(AZ_seg, dtype=np.uint8) + + # Calculate surface area + morphology_data = compute_object_morphology(AZ_seg_filtered, "AZ Structure", resolution=(pixel_size_nm, pixel_size_nm, pixel_size_nm)) + surface_column = "surface [nm^2]" + surface_area = morphology_data[surface_column].iloc[0] + + return surface_area + +def process_datasets(folder_path, output_csv="AZ_areas.csv", pixel_size_nm=1.554): + """ + Process all tomograms in multiple datasets within a folder and save results to a CSV. + + Parameters: + - folder_path (str): Path to the folder containing dataset folders with tomograms. + - output_csv (str): Filename for the output CSV file. + - pixel_size_nm (float): Size of a pixel in nanometers. + """ + results = [] + + # Iterate over each dataset folder + for dataset_name in os.listdir(folder_path): + dataset_path = os.path.join(folder_path, dataset_name) + + # Check if it's a directory (skip files in the main folder) + if not os.path.isdir(dataset_path): + continue + + # Iterate over each tomogram file in the dataset folder + for tomo_file in os.listdir(dataset_path): + tomo_path = os.path.join(dataset_path, tomo_file) + + # Check if the file is an HDF5 file (optional) + if tomo_file.endswith(".h5") or tomo_file.endswith(".hdf5"): + try: + # Calculate AZ area + #AZ_area = calculate_total_AZ_area(tomo_path, pixel_size_nm) + #AZ_area = calculate_AZ_area_simple(tomo_path, pixel_size_nm) + #AZ_surface_area = calculate_AZ_surface(tomo_path, pixel_size_nm) + AZ_surface_area = calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm) + # Append results to list + results.append({ + "Dataset": dataset_name, + "Tomogram": tomo_file, + "AZ_surface_area": AZ_surface_area + }) + except Exception as e: + print(f"Error processing {tomo_file} in {dataset_name}: {e}") + + # Write results to a CSV file + with open(output_csv, mode="w", newline="") as csvfile: + fieldnames = ["Dataset", "Tomogram", "AZ_surface_area"] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + for result in results: + writer.writerow(result) + + print(f"Results saved to {output_csv}") + +def main(): + # Define the path to the folder containing dataset folders + folder_path = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp" + output_csv = "./analysis_results/AZ_intersect_manualCompartment/AZ_surface_area.csv" + # Call the function to process datasets and save results + process_datasets(folder_path, output_csv = output_csv) + +# Call main +if __name__ == "__main__": + main() From 8a515d1704409f328d2fffd7cbc99e9d63986ba5 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Thu, 14 Nov 2024 18:01:29 +0100 Subject: [PATCH 14/35] corrected radius factor --- scripts/cooper/analysis/run_analysis_1.py | 53 ++++++++++++++--------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py index 459d490..4077ea3 100644 --- a/scripts/cooper/analysis/run_analysis_1.py +++ b/scripts/cooper/analysis/run_analysis_1.py @@ -11,9 +11,9 @@ from tqdm import tqdm from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres -DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/" # noqa -PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp" # noqa -RESULT_FOLDER = "./analysis_results/AZ_intersect_manualCompartment" +DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa +PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa +RESULT_FOLDER = "./analysis_results/AZ_intersect_autoCompartment" def get_compartment_with_max_overlap(compartments, vesicles): """ @@ -64,7 +64,7 @@ def compute_sizes_for_all_tomorams_manComp(): os.makedirs(RESULT_FOLDER, exist_ok=True) resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset - radius_factor = 1.3 + radius_factor = 0.7 estimate_radius_2d = True # Dictionary to hold the results for each dataset @@ -112,14 +112,21 @@ def compute_sizes_for_all_tomorams_manComp(): # Save the DataFrame to CSV result_df.to_csv(output_path, index=False) +import os +import pandas as pd +import numpy as np +from glob import glob +import h5py +from tqdm import tqdm + def compute_sizes_for_all_tomorams_autoComp(): os.makedirs(RESULT_FOLDER, exist_ok=True) resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset - radius_factor = 1.3 + radius_factor = 0.7 estimate_radius_2d = True - # Dictionary to hold the results for each dataset + # Dictionary to hold the results for each dataset and category (CTRL or DKO) dataset_results = {} tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True)) @@ -127,12 +134,16 @@ def compute_sizes_for_all_tomorams_autoComp(): ds_name, fname = os.path.split(tomo) ds_name = os.path.split(ds_name)[1] fname = os.path.splitext(fname)[0] - # Initialize a new dictionary entry for each dataset if not already present + + # Determine if the tomogram is 'CTRL' or 'DKO' + category = "CTRL" if "CTRL" in fname else "DKO" + + # Initialize a new dictionary entry for each dataset and category if not already present if ds_name not in dataset_results: - dataset_results[ds_name] = {} + dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}} # Skip if this tomogram already exists in the dataset dictionary - if fname in dataset_results[ds_name]: + if fname in dataset_results[ds_name][category]: continue # Load the vesicle segmentation from the predictions. @@ -146,32 +157,34 @@ def compute_sizes_for_all_tomorams_autoComp(): compartments = f["/compartments/segment_from_3Dmodel_v2"][:] mask = get_compartment_with_max_overlap(compartments, segmentation) - #if more than half of the vesicles (approximation, its checking pixel and not label) would get filtered by mask it means the compartment seg didn't work and thus we won't use the mask + # if more than half of the vesicles (approximation, its checking pixel and not label) would get filtered by mask it means the compartment seg didn't work and thus we won't use the mask if np.sum(segmentation[mask == 0] > 0) > (0.5 * np.sum(segmentation > 0)): - print("using no mask") + print(f"using no mask for {tomo}") else: segmentation[mask == 0] = 0 _, sizes = convert_segmentation_to_spheres( segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d ) - # Add sizes to the dataset dictionary under the tomogram name - dataset_results[ds_name][fname] = sizes + # Add sizes to the dataset dictionary under the appropriate category + dataset_results[ds_name][category][fname] = sizes - # Save each dataset's results to a single CSV file - for ds_name, tomogram_data in dataset_results.items(): - # Create a DataFrame where each column is a tomogram's sizes - result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose() + # Save each dataset's results into separate CSV files for CTRL and DKO tomograms + for ds_name, categories in dataset_results.items(): + for category, tomogram_data in categories.items(): + # Sort tomograms by name within the category + sorted_data = dict(sorted(tomogram_data.items())) # Sort by tomogram names + result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose() # Define the output file path - output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}.csv") + output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}.csv") # Save the DataFrame to CSV result_df.to_csv(output_path, index=False) def main(): - compute_sizes_for_all_tomorams_manComp() - #compute_sizes_for_all_tomorams_autoComp() + #compute_sizes_for_all_tomorams_manComp() + compute_sizes_for_all_tomorams_autoComp() if __name__ == "__main__": From 0f40d3c331b99431b6251e3090935dcf5fb9d00d Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Fri, 15 Nov 2024 20:54:23 +0100 Subject: [PATCH 15/35] Update inner ear analysis --- .../inner_ear/analysis/analyze_distances.py | 23 ++++ .../analysis/analyze_vesicle_pools.py | 126 +++++++++++------- scripts/inner_ear/analysis/common.py | 53 ++++++++ 3 files changed, 155 insertions(+), 47 deletions(-) create mode 100644 scripts/inner_ear/analysis/common.py diff --git a/scripts/inner_ear/analysis/analyze_distances.py b/scripts/inner_ear/analysis/analyze_distances.py index e69de29..e8a77c1 100644 --- a/scripts/inner_ear/analysis/analyze_distances.py +++ b/scripts/inner_ear/analysis/analyze_distances.py @@ -0,0 +1,23 @@ +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns + +from common import get_all_measurements, get_measurements_with_annotation + + +def for_tomos_with_annotation(): + manual_assignments, automatic_assignments = get_measurements_with_annotation() + breakpoint() + + +# def for_all_tomos(): +# automatic_assignments = get_all_measurements() + + +def main(): + for_tomos_with_annotation() + # for_all_tomos() + + +if __name__ == "__main__": + main() diff --git a/scripts/inner_ear/analysis/analyze_vesicle_pools.py b/scripts/inner_ear/analysis/analyze_vesicle_pools.py index b53a9f6..6273277 100644 --- a/scripts/inner_ear/analysis/analyze_vesicle_pools.py +++ b/scripts/inner_ear/analysis/analyze_vesicle_pools.py @@ -1,72 +1,104 @@ -import sys - -import numpy as np +import matplotlib.pyplot as plt import pandas as pd +import seaborn as sns -sys.path.append("..") -sys.path.append("../processing") - -from combine_measurements import combine_manual_results, combine_automatic_results # noqa -# from compare_pool_assignments import create_manual_assignment -from parse_table import parse_table, get_data_root # noqa +from common import get_all_measurements, get_measurements_with_annotation -def get_manual_assignments(): - result_path = "../results/20240917_1/fully_manual_analysis_results.xlsx" - results = pd.read_excel(result_path) - return results +def plot_pools(data, errors): + data_for_plot = pd.melt(data, id_vars="Pool", var_name="Method", value_name="Measurement") + # Plot using seaborn + plt.figure(figsize=(8, 6)) + sns.barplot(data=data_for_plot, x="Pool", y="Measurement", hue="Method") -def get_automatic_assignments(tomograms): - result_path = "../results/20240917_1/automatic_analysis_results.xlsx" - results = pd.read_excel(result_path) - results = results[results["tomogram"].isin(tomograms)] - return results + # FIXME + # error_for_plot = pd.melt(errors, id_vars="Pool", var_name="Method", value_name="Error") + # # Add error bars manually + # for i, bar in enumerate(plt.gca().patches): + # # Get Standard Deviation for the current bar + # err = error_for_plot.iloc[i % len(error_for_plot)]["Error"] + # bar_x = bar.get_x() + bar.get_width() / 2 + # bar_y = bar.get_height() + # plt.errorbar(bar_x, bar_y, yerr=err, fmt="none", c="black", capsize=4) + # Customize the chart + plt.title("Different measurements for vesicles per pool") + plt.xlabel("Vesicle Pools") + plt.ylabel("Vesicles per Tomogram") + plt.grid(axis="y", linestyle="--", alpha=0.7) + plt.legend(title="Approaches") -def plot_confusion_matrix(manual_assignments, automatic_assignments): - pass + # Show the plot + plt.tight_layout() + plt.show() +# TODO use the actual results without vesicle post-processing. def for_tomos_with_annotation(): - manual_assignments = get_manual_assignments() - manual_tomograms = pd.unique(manual_assignments["tomogram"]) - automatic_assignments = get_automatic_assignments(manual_tomograms) + manual_assignments, automatic_assignments = get_measurements_with_annotation() - tomograms = pd.unique(automatic_assignments["tomogram"]) - manual_assignments = manual_assignments[manual_assignments["tomogram"].isin(tomograms)] - assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(automatic_assignments["tomogram"])) + manual_counts = manual_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) + automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) - n_tomograms = len(tomograms) - pool_names, manual_pool_counts = np.unique(manual_assignments["pool"].values, return_counts=True) - _, automatic_pool_counts = np.unique(automatic_assignments["pool"].values, return_counts=True) + manual_stats = manual_counts.agg(["mean", "std"]).transpose().reset_index() + automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index() - manual_pool_counts = manual_pool_counts.astype("float32") - manual_pool_counts /= n_tomograms - automatic_pool_counts = automatic_pool_counts.astype("float32") - automatic_pool_counts /= n_tomograms + data = pd.DataFrame({ + "Pool": manual_stats["pool"], + "Manual": manual_stats["mean"], + "Semi-automatic": automatic_stats["mean"], + "Automatic": automatic_stats["mean"], + }) + errors = pd.DataFrame({ + "Pool": manual_stats["pool"], + "Manual": manual_stats["std"], + "Semi-automatic": automatic_stats["std"], + "Automatic": automatic_stats["std"], + }) - print(pool_names) - print(manual_pool_counts) - print(automatic_pool_counts) + plot_pools(data, errors) - # TODO plot as a bar chart - # TODO save excel - # TODO add 'more automatic' results + output_path = "./vesicle_pools_small.xlsx" + data.to_excel(output_path, index=False, sheet_name="Average") + with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer: + errors.to_excel(writer, sheet_name="StandardDeviation", index=False) - breakpoint() - -# TODO +# TODO use the actual results without vesicle post-processing. def for_all_tomos(): - pass + + automatic_assignments = get_all_measurements() + # TODO double check why this number is so different! (64 vs. 81) + # tomos = pd.unique(automatic_assignments["tomogram"]) + # print(len(tomos), n_tomos) + # assert len(tomos) == n_tomos + + automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) + automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index() + + data = pd.DataFrame({ + "Pool": automatic_stats["pool"], + "Semi-automatic": automatic_stats["mean"], + "Automatic": automatic_stats["mean"], + }) + errors = pd.DataFrame({ + "Pool": automatic_stats["pool"], + "Semi-automatic": automatic_stats["std"], + "Automatic": automatic_stats["std"], + }) + + plot_pools(data, errors) + + output_path = "./vesicle_pools_large.xlsx" + data.to_excel(output_path, index=False, sheet_name="Average") + with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer: + errors.to_excel(writer, sheet_name="StandardDeviation", index=False) def main(): - # data_root = get_data_root() - # table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx") - # table = parse_table(table_path, data_root) - for_tomos_with_annotation() + # for_tomos_with_annotation() + for_all_tomos() if __name__ == "__main__": diff --git a/scripts/inner_ear/analysis/common.py b/scripts/inner_ear/analysis/common.py new file mode 100644 index 0000000..cea779c --- /dev/null +++ b/scripts/inner_ear/analysis/common.py @@ -0,0 +1,53 @@ +import os +import sys +import pandas as pd + +sys.path.append("../processing") + +from parse_table import get_data_root # noqa + + +def get_manual_assignments(): + result_path = "../results/20240917_1/fully_manual_analysis_results.xlsx" + results = pd.read_excel(result_path) + return results + + +def get_automatic_assignments(tomograms): + result_path = "../results/20240917_1/automatic_analysis_results.xlsx" + results = pd.read_excel(result_path) + results = results[results["tomogram"].isin(tomograms)] + return results + + +def get_measurements_with_annotation(): + manual_assignments = get_manual_assignments() + manual_tomograms = pd.unique(manual_assignments["tomogram"]) + automatic_assignments = get_automatic_assignments(manual_tomograms) + + tomograms = pd.unique(automatic_assignments["tomogram"]) + manual_assignments = manual_assignments[manual_assignments["tomogram"].isin(tomograms)] + assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(automatic_assignments["tomogram"])) + + return manual_assignments, automatic_assignments + + +def get_all_measurements(): + data_root = get_data_root() + val_table = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx") + val_table = pd.read_excel(val_table) + + val_table = val_table[val_table["Kommentar 27-10-24"] == "passt"] + n_tomos = len(val_table) + assert n_tomos > 0 + tomo_names = [] + for _, row in val_table.iterrows(): + name = "/".join([ + row.Bedingung, f"Mouse {int(row.Maus)}", + row["Ribbon-Orientierung"].lower().rstrip("?"), + str(int(row["OwnCloud-Unterordner"]))] + ) + tomo_names.append(name) + + automatic_assignments = get_automatic_assignments(tomo_names) + return automatic_assignments From ad4741b72b3a36041d341f55a6bf0269c20ed3d5 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 17 Nov 2024 14:03:01 +0100 Subject: [PATCH 16/35] Update inner ear analysis --- scripts/inner_ear/analysis/.gitignore | 2 + .../inner_ear/analysis/analyze_distances.py | 84 +++++++++++++++++-- .../analysis/analyze_vesicle_pools.py | 32 ++++--- .../combine_fully_automatic_results.py | 69 +++++++++++++++ scripts/inner_ear/analysis/common.py | 40 +++++++-- 5 files changed, 199 insertions(+), 28 deletions(-) create mode 100644 scripts/inner_ear/analysis/.gitignore create mode 100644 scripts/inner_ear/analysis/combine_fully_automatic_results.py diff --git a/scripts/inner_ear/analysis/.gitignore b/scripts/inner_ear/analysis/.gitignore new file mode 100644 index 0000000..383f264 --- /dev/null +++ b/scripts/inner_ear/analysis/.gitignore @@ -0,0 +1,2 @@ +panels/ +*.zip diff --git a/scripts/inner_ear/analysis/analyze_distances.py b/scripts/inner_ear/analysis/analyze_distances.py index e8a77c1..029dc63 100644 --- a/scripts/inner_ear/analysis/analyze_distances.py +++ b/scripts/inner_ear/analysis/analyze_distances.py @@ -6,17 +6,91 @@ def for_tomos_with_annotation(): - manual_assignments, automatic_assignments = get_measurements_with_annotation() - breakpoint() + manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation() + manual_distances = manual_assignments[ + ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + ] + manual_distances["approach"] = ["manual"] * len(manual_distances) -# def for_all_tomos(): -# automatic_assignments = get_all_measurements() + semi_automatic_distances = semi_automatic_assignments[ + ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + ] + semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances) + + automatic_distances = automatic_assignments[ + ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + ] + automatic_distances["approach"] = ["automatic"] * len(automatic_distances) + + distances = pd.concat([manual_distances, semi_automatic_distances, automatic_distances]) + distances.to_excel("./results/distances_with_manual_annotations.xlsx", index=False) + + pools = pd.unique(distances["pool"]) + dist_cols = ["ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + + fig, axes = plt.subplots(3, 3) + + # multiple = "stack" + multiple = "layer" + + structures = ["Ribbon", "PD", "Boundary"] + for i, pool in enumerate(pools): + pool_distances = distances[distances["pool"] == pool] + for j, dist_col in enumerate(dist_cols): + ax = axes[i, j] + ax.set_title(f"{pool} to {structures[j]}") + sns.histplot( + data=pool_distances, x=dist_col, hue="approach", multiple=multiple, kde=False, ax=ax + ) + ax.set_xlabel("distance [nm]") + + fig.tight_layout() + plt.show() + + +def for_all_tomos(): + semi_automatic_assignments, automatic_assignments = get_all_measurements() + + semi_automatic_distances = semi_automatic_assignments[ + ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + ] + semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances) + + automatic_distances = automatic_assignments[ + ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + ] + automatic_distances["approach"] = ["automatic"] * len(automatic_distances) + + distances = pd.concat([semi_automatic_distances, automatic_distances]) + distances.to_excel("./results/distances_all_tomograms.xlsx", index=False) + + pools = pd.unique(distances["pool"]) + dist_cols = ["ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + + fig, axes = plt.subplots(3, 3) + + # multiple = "stack" + multiple = "layer" + + structures = ["Ribbon", "PD", "Boundary"] + for i, pool in enumerate(pools): + pool_distances = distances[distances["pool"] == pool] + for j, dist_col in enumerate(dist_cols): + ax = axes[i, j] + ax.set_title(f"{pool} to {structures[j]}") + sns.histplot( + data=pool_distances, x=dist_col, hue="approach", multiple=multiple, kde=False, ax=ax + ) + ax.set_xlabel("distance [nm]") + + fig.tight_layout() + plt.show() def main(): for_tomos_with_annotation() - # for_all_tomos() + for_all_tomos() if __name__ == "__main__": diff --git a/scripts/inner_ear/analysis/analyze_vesicle_pools.py b/scripts/inner_ear/analysis/analyze_vesicle_pools.py index 6273277..1bd5fe1 100644 --- a/scripts/inner_ear/analysis/analyze_vesicle_pools.py +++ b/scripts/inner_ear/analysis/analyze_vesicle_pools.py @@ -34,63 +34,61 @@ def plot_pools(data, errors): plt.show() -# TODO use the actual results without vesicle post-processing. def for_tomos_with_annotation(): - manual_assignments, automatic_assignments = get_measurements_with_annotation() + manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation() manual_counts = manual_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) + semi_automatic_counts = semi_automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) manual_stats = manual_counts.agg(["mean", "std"]).transpose().reset_index() + semi_automatic_stats = semi_automatic_counts.agg(["mean", "std"]).transpose().reset_index() automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index() data = pd.DataFrame({ "Pool": manual_stats["pool"], - "Manual": manual_stats["mean"], - "Semi-automatic": automatic_stats["mean"], + "Semi-automatic": semi_automatic_stats["mean"], "Automatic": automatic_stats["mean"], + "Manual": manual_stats["mean"], }) errors = pd.DataFrame({ "Pool": manual_stats["pool"], - "Manual": manual_stats["std"], - "Semi-automatic": automatic_stats["std"], + "Semi-automatic": semi_automatic_stats["std"], "Automatic": automatic_stats["std"], + "Manual": manual_stats["std"], }) plot_pools(data, errors) - output_path = "./vesicle_pools_small.xlsx" + output_path = "./results/vesicle_pools_with_manual_annotations.xlsx" data.to_excel(output_path, index=False, sheet_name="Average") with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer: errors.to_excel(writer, sheet_name="StandardDeviation", index=False) -# TODO use the actual results without vesicle post-processing. def for_all_tomos(): - - automatic_assignments = get_all_measurements() - # TODO double check why this number is so different! (64 vs. 81) - # tomos = pd.unique(automatic_assignments["tomogram"]) - # print(len(tomos), n_tomos) - # assert len(tomos) == n_tomos + semi_automatic_assignments, automatic_assignments = get_all_measurements() automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index() + semi_automatic_counts = semi_automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) + semi_automatic_stats = semi_automatic_counts.agg(["mean", "std"]).transpose().reset_index() + data = pd.DataFrame({ "Pool": automatic_stats["pool"], - "Semi-automatic": automatic_stats["mean"], + "Semi-automatic": semi_automatic_stats["mean"], "Automatic": automatic_stats["mean"], }) errors = pd.DataFrame({ "Pool": automatic_stats["pool"], - "Semi-automatic": automatic_stats["std"], + "Semi-automatic": semi_automatic_stats["std"], "Automatic": automatic_stats["std"], }) plot_pools(data, errors) - output_path = "./vesicle_pools_large.xlsx" + output_path = "./results/vesicle_pools_all_tomograms.xlsx" data.to_excel(output_path, index=False, sheet_name="Average") with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer: errors.to_excel(writer, sheet_name="StandardDeviation", index=False) diff --git a/scripts/inner_ear/analysis/combine_fully_automatic_results.py b/scripts/inner_ear/analysis/combine_fully_automatic_results.py new file mode 100644 index 0000000..54bdbc1 --- /dev/null +++ b/scripts/inner_ear/analysis/combine_fully_automatic_results.py @@ -0,0 +1,69 @@ +import os +import sys + +import pandas as pd + +sys.path.append("..") +sys.path.append("../processing") + + +def combine_fully_auto_results(table, data_root, output_path): + from combine_measurements import combine_results + + val_table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx") + val_table = pd.read_excel(val_table_path) + + results = {} + for _, row in table.iterrows(): + folder = row["Local Path"] + if folder == "": + continue + + row_selection = (val_table.Bedingung == row.Bedingung) &\ + (val_table.Maus == row.Maus) &\ + (val_table["Ribbon-Orientierung"] == row["Ribbon-Orientierung"]) &\ + (val_table["OwnCloud-Unterordner"] == row["OwnCloud-Unterordner"]) + complete_vals = val_table[row_selection]["Fertig!"].values + is_complete = (complete_vals == "ja").all() + if not is_complete: + continue + + micro = row["EM alt vs. Neu"] + + tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse")) + tab_name = "measurements_uncorrected_assignments.xlsx" + res_path = os.path.join(folder, "korrektur", tab_name) + if not os.path.exists(res_path): + res_path = os.path.join(folder, "Korrektur", tab_name) + assert os.path.exists(res_path), res_path + results[tomo_name] = (res_path, "alt" if micro == "beides" else micro) + + if micro == "beides": + micro = "neu" + + new_root = os.path.join(folder, "neues EM") + if not os.path.exists(new_root): + new_root = os.path.join(folder, "Tomo neues EM") + assert os.path.exists(new_root) + + res_path = os.path.join(new_root, "korrektur", "measurements.xlsx") + if not os.path.exists(res_path): + res_path = os.path.join(new_root, "Korrektur", "measurements.xlsx") + assert os.path.exists(res_path), res_path + results[tomo_name] = (res_path, "alt" if micro == "beides" else micro) + + combine_results(results, output_path, sheet_name="vesicles") + + +def main(): + from parse_table import parse_table, get_data_root + + data_root = get_data_root() + table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx") + table = parse_table(table_path, data_root) + + res_path = "../results/fully_automatic_analysis_results.xlsx" + combine_fully_auto_results(table, data_root, output_path=res_path) + + +main() diff --git a/scripts/inner_ear/analysis/common.py b/scripts/inner_ear/analysis/common.py index cea779c..c3622d5 100644 --- a/scripts/inner_ear/analysis/common.py +++ b/scripts/inner_ear/analysis/common.py @@ -1,5 +1,6 @@ import os import sys + import pandas as pd sys.path.append("../processing") @@ -13,23 +14,35 @@ def get_manual_assignments(): return results -def get_automatic_assignments(tomograms): +def get_semi_automatic_assignments(tomograms): result_path = "../results/20240917_1/automatic_analysis_results.xlsx" results = pd.read_excel(result_path) results = results[results["tomogram"].isin(tomograms)] return results +def get_automatic_assignments(tomograms): + result_path = "../results/fully_automatic_analysis_results.xlsx" + results = pd.read_excel(result_path) + results = results[results["tomogram"].isin(tomograms)] + return results + + def get_measurements_with_annotation(): manual_assignments = get_manual_assignments() manual_tomograms = pd.unique(manual_assignments["tomogram"]) - automatic_assignments = get_automatic_assignments(manual_tomograms) + semi_automatic_assignments = get_semi_automatic_assignments(manual_tomograms) - tomograms = pd.unique(automatic_assignments["tomogram"]) + tomograms = pd.unique(semi_automatic_assignments["tomogram"]) manual_assignments = manual_assignments[manual_assignments["tomogram"].isin(tomograms)] - assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(automatic_assignments["tomogram"])) + assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(semi_automatic_assignments["tomogram"])) - return manual_assignments, automatic_assignments + automatic_assignments = get_automatic_assignments(tomograms) + filtered_tomograms = pd.unique(manual_assignments["tomogram"]) + assert len(filtered_tomograms) == len(pd.unique(automatic_assignments["tomogram"])) + + print("Tomograms with manual annotations:", len(filtered_tomograms)) + return manual_assignments, semi_automatic_assignments, automatic_assignments def get_all_measurements(): @@ -39,6 +52,7 @@ def get_all_measurements(): val_table = val_table[val_table["Kommentar 27-10-24"] == "passt"] n_tomos = len(val_table) + print("All tomograms:", n_tomos) assert n_tomos > 0 tomo_names = [] for _, row in val_table.iterrows(): @@ -49,5 +63,19 @@ def get_all_measurements(): ) tomo_names.append(name) + semi_automatic_assignments = get_semi_automatic_assignments(tomo_names) + filtered_tomo_names = pd.unique(semi_automatic_assignments["tomogram"]).tolist() + automatic_assignments = get_automatic_assignments(tomo_names) - return automatic_assignments + assert len(filtered_tomo_names) == len(pd.unique(automatic_assignments["tomogram"])) + + return semi_automatic_assignments, automatic_assignments + + +def main(): + get_measurements_with_annotation() + get_all_measurements() + + +if __name__ == "__main__": + main() From 305a80b74950e3f00bbdc9f168670fb4cf0b2126 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 17 Nov 2024 21:16:46 +0100 Subject: [PATCH 17/35] Updates to inner ear training and eval --- scripts/inner_ear/processing/run_analyis.py | 19 ++++++++++++++----- .../training/postprocessing_and_evaluation.py | 8 ++++---- .../structure_prediction_and_evaluation.py | 8 ++++---- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/scripts/inner_ear/processing/run_analyis.py b/scripts/inner_ear/processing/run_analyis.py index 8508673..ca5ea0b 100644 --- a/scripts/inner_ear/processing/run_analyis.py +++ b/scripts/inner_ear/processing/run_analyis.py @@ -52,7 +52,7 @@ def _load_segmentation(seg_path, tomo_shape): return seg -def compute_distances(segmentation_paths, save_folder, resolution, force, tomo_shape): +def compute_distances(segmentation_paths, save_folder, resolution, force, tomo_shape, use_corrected_vesicles=True): os.makedirs(save_folder, exist_ok=True) vesicles = None @@ -61,9 +61,10 @@ def _require_vesicles(): vesicle_path = segmentation_paths["vesicles"] if vesicles is None: - vesicle_pool_path = os.path.join(os.path.split(save_folder)[0], "vesicle_pools.tif") - if os.path.exists(vesicle_pool_path): - vesicle_path = vesicle_pool_path + if use_corrected_vesicles: + vesicle_pool_path = os.path.join(os.path.split(save_folder)[0], "vesicle_pools.tif") + if os.path.exists(vesicle_pool_path): + vesicle_path = vesicle_pool_path return _load_segmentation(vesicle_path, tomo_shape) else: @@ -394,14 +395,22 @@ def analyze_folder(folder, version, n_ribbons, force, use_corrected_vesicles): with open_file(data_path, "r") as f: tomo_shape = f["data"].shape - out_distance_folder = os.path.join(output_folder, "distances") + if use_corrected_vesicles: + out_distance_folder = os.path.join(output_folder, "distances") + else: + out_distance_folder = os.path.join(output_folder, "distances_uncorrected") distance_paths, skip = compute_distances( segmentation_paths, out_distance_folder, resolution, force=force, tomo_shape=tomo_shape, + use_corrected_vesicles=use_corrected_vesicles ) if skip: return if force or not os.path.exists(result_path): + + if not use_corrected_vesicles: + pool_correction_path = None + analyze_distances( segmentation_paths, distance_paths, resolution, result_path, tomo_shape, pool_correction_path=pool_correction_path diff --git a/scripts/inner_ear/training/postprocessing_and_evaluation.py b/scripts/inner_ear/training/postprocessing_and_evaluation.py index 30c9e42..30c1313 100644 --- a/scripts/inner_ear/training/postprocessing_and_evaluation.py +++ b/scripts/inner_ear/training/postprocessing_and_evaluation.py @@ -13,8 +13,8 @@ from train_structure_segmentation import get_train_val_test_split -ROOT = "/home/pape/Work/data/synaptic_reconstruction/moser" -# ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/moser" +# ROOT = "/home/pape/Work/data/synaptic_reconstruction/moser" +ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/moser" MODEL_PATH = "/mnt/lustre-emmy-hdd/projects/nim00007/models/synaptic-reconstruction/vesicle-DA-inner_ear-v2" OUTPUT_ROOT = "./predictions" @@ -187,8 +187,8 @@ def segment_train_domain(): name = "train_domain" run_vesicle_segmentation(paths, MODEL_PATH, name, is_nested=True) postprocess_structures(paths, name, is_nested=True) - visualize(paths, name, is_nested=True) - results = evaluate(paths, name, is_nested=True, save_path="./results/train_domain_postprocessed.csv") + # visualize(paths, name, is_nested=True) + results = evaluate(paths, name, is_nested=True, save_path="./results/train_domain_postprocessed_v2.csv") print(results) print("Ribbon segmentation:", results["ribbon"].mean(), "+-", results["ribbon"].std()) print("PD segmentation:", results["PD"].mean(), "+-", results["PD"].std()) diff --git a/scripts/inner_ear/training/structure_prediction_and_evaluation.py b/scripts/inner_ear/training/structure_prediction_and_evaluation.py index cb174c7..7ed89a9 100644 --- a/scripts/inner_ear/training/structure_prediction_and_evaluation.py +++ b/scripts/inner_ear/training/structure_prediction_and_evaluation.py @@ -143,10 +143,10 @@ def predict_and_evaluate_train_domain(): print("Run evaluation on", len(paths), "tomos") name = "train_domain" - model_path = "./checkpoints/inner_ear_structure_model" + model_path = "./checkpoints/inner_ear_structure_model_v2" run_prediction(paths, model_path, name, is_nested=True) - evaluate(paths, name, is_nested=True, save_path="./results/train_domain.csv") + evaluate(paths, name, is_nested=True, save_path="./results/train_domain_v2.csv") visualize(paths, name, is_nested=True) @@ -187,9 +187,9 @@ def predict_and_evaluate_rat(): def main(): - # predict_and_evaluate_train_domain() + predict_and_evaluate_train_domain() # predict_and_evaluate_vesicle_pools() - predict_and_evaluate_rat() + # predict_and_evaluate_rat() if __name__ == "__main__": From 903e59ec13d099e16a8e71063c0621da62b66116 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Mon, 18 Nov 2024 19:50:18 +0100 Subject: [PATCH 18/35] Update inner ear analysis --- .../inner_ear/analysis/analyze_distances.py | 131 ++++++++++++------ .../analysis/analyze_vesicle_pools.py | 2 + 2 files changed, 89 insertions(+), 44 deletions(-) diff --git a/scripts/inner_ear/analysis/analyze_distances.py b/scripts/inner_ear/analysis/analyze_distances.py index 029dc63..534f90b 100644 --- a/scripts/inner_ear/analysis/analyze_distances.py +++ b/scripts/inner_ear/analysis/analyze_distances.py @@ -1,31 +1,14 @@ +import os + import matplotlib.pyplot as plt +import numpy as np import pandas as pd import seaborn as sns from common import get_all_measurements, get_measurements_with_annotation -def for_tomos_with_annotation(): - manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation() - - manual_distances = manual_assignments[ - ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] - ] - manual_distances["approach"] = ["manual"] * len(manual_distances) - - semi_automatic_distances = semi_automatic_assignments[ - ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] - ] - semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances) - - automatic_distances = automatic_assignments[ - ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] - ] - automatic_distances["approach"] = ["automatic"] * len(automatic_distances) - - distances = pd.concat([manual_distances, semi_automatic_distances, automatic_distances]) - distances.to_excel("./results/distances_with_manual_annotations.xlsx", index=False) - +def _plot_all(distances): pools = pd.unique(distances["pool"]) dist_cols = ["ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] @@ -49,8 +32,64 @@ def for_tomos_with_annotation(): plt.show() -def for_all_tomos(): - semi_automatic_assignments, automatic_assignments = get_all_measurements() +# TODO rename the method names. +# We only care about the following distances: +# - MP-V -> PD, AZ (Boundary) +# - Docked-V -> PD +# - RA-V -> Ribbon +def _plot_selected(distances, save_path=None): + fig, axes = plt.subplots(2, 2) + multiple = "layer" + + if save_path is not None and os.path.exists(save_path): + os.remove(save_path) + + def _plot(pool_name, distance_col, structure_name, ax): + + this_distances = distances[distances["pool"] == pool_name][["approach", distance_col]] + + ax.set_title(f"{pool_name} to {structure_name}") + sns.histplot( + data=this_distances, x=distance_col, hue="approach", multiple=multiple, kde=False, ax=ax + ) + ax.set_xlabel("distance [nm]") + + if save_path is not None: + approaches = pd.unique(this_distances["approach"]) + dist_values = [ + this_distances[this_distances["approach"] == approach][distance_col].values.tolist() + for approach in approaches + ] + max_len = max([len(vals) for vals in dist_values]) + save_distances = { + approach: dists + [np.nan] * (max_len - len(dists)) + for approach, dists in zip(approaches, dist_values) + } + save_distances = pd.DataFrame(save_distances) + + sheet_name = f"{pool_name}_{structure_name}" + if os.path.exists(save_path): + with pd.ExcelWriter(save_path, engine="openpyxl", mode="a") as writer: + save_distances.to_excel(writer, sheet_name=sheet_name, index=False) + else: + save_distances.to_excel(save_path, index=False, sheet_name=sheet_name) + + _plot("MP-V", "pd_distance [nm]", "PD", axes[0, 0]) + _plot("MP-V", "boundary_distance [nm]", "AZ Membrane", axes[0, 1]) + _plot("Docked-V", "pd_distance [nm]", "PD", axes[1, 0]) + _plot("RA-V", "ribbon_distance [nm]", "Ribbon", axes[1, 1]) + + fig.tight_layout() + plt.show() + + +def for_tomos_with_annotation(plot_all=True): + manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation() + + manual_distances = manual_assignments[ + ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + ] + manual_distances["approach"] = ["manual"] * len(manual_distances) semi_automatic_distances = semi_automatic_assignments[ ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] @@ -62,35 +101,39 @@ def for_all_tomos(): ] automatic_distances["approach"] = ["automatic"] * len(automatic_distances) - distances = pd.concat([semi_automatic_distances, automatic_distances]) - distances.to_excel("./results/distances_all_tomograms.xlsx", index=False) + distances = pd.concat([manual_distances, semi_automatic_distances, automatic_distances]) + if plot_all: + distances.to_excel("./results/distances_with_manual_annotations.xlsx", index=False) + _plot_all(distances) + else: + _plot_selected(distances, save_path="./results/selected_distances_manual_annotations.xlsx") - pools = pd.unique(distances["pool"]) - dist_cols = ["ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] - fig, axes = plt.subplots(3, 3) +def for_all_tomos(plot_all=True): + semi_automatic_assignments, automatic_assignments = get_all_measurements() - # multiple = "stack" - multiple = "layer" + semi_automatic_distances = semi_automatic_assignments[ + ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + ] + semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances) - structures = ["Ribbon", "PD", "Boundary"] - for i, pool in enumerate(pools): - pool_distances = distances[distances["pool"] == pool] - for j, dist_col in enumerate(dist_cols): - ax = axes[i, j] - ax.set_title(f"{pool} to {structures[j]}") - sns.histplot( - data=pool_distances, x=dist_col, hue="approach", multiple=multiple, kde=False, ax=ax - ) - ax.set_xlabel("distance [nm]") + automatic_distances = automatic_assignments[ + ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + ] + automatic_distances["approach"] = ["automatic"] * len(automatic_distances) - fig.tight_layout() - plt.show() + distances = pd.concat([semi_automatic_distances, automatic_distances]) + if plot_all: + distances.to_excel("./results/distances_all_tomograms.xlsx", index=False) + _plot_all(distances) + else: + _plot_selected(distances, save_path="./results/selected_distances_all_tomograms.xlsx") def main(): - for_tomos_with_annotation() - for_all_tomos() + plot_all = False + for_tomos_with_annotation(plot_all=plot_all) + for_all_tomos(plot_all=plot_all) if __name__ == "__main__": diff --git a/scripts/inner_ear/analysis/analyze_vesicle_pools.py b/scripts/inner_ear/analysis/analyze_vesicle_pools.py index 1bd5fe1..e9ad651 100644 --- a/scripts/inner_ear/analysis/analyze_vesicle_pools.py +++ b/scripts/inner_ear/analysis/analyze_vesicle_pools.py @@ -94,6 +94,8 @@ def for_all_tomos(): errors.to_excel(writer, sheet_name="StandardDeviation", index=False) +# TODO: export the vesicle diameters +# TODO: export the ribbon and pd stats def main(): # for_tomos_with_annotation() for_all_tomos() From b1449d23ff090a8b8c8c2ebd5845ea578e7f9be1 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Tue, 19 Nov 2024 17:57:53 +0100 Subject: [PATCH 19/35] minor changes --- scripts/cooper/analysis/calc_AZ_area.py | 20 ++++- scripts/cooper/analysis/run_analysis_1.py | 53 ++++++----- .../run_spatial_distribution_analysis.py | 87 +++++++++++++++++-- 3 files changed, 122 insertions(+), 38 deletions(-) diff --git a/scripts/cooper/analysis/calc_AZ_area.py b/scripts/cooper/analysis/calc_AZ_area.py index e9fcb52..592b043 100644 --- a/scripts/cooper/analysis/calc_AZ_area.py +++ b/scripts/cooper/analysis/calc_AZ_area.py @@ -76,7 +76,8 @@ def calculate_AZ_area_simple(tomo_path, pixel_size_nm=1.554): def calculate_AZ_surface(tomo_path, pixel_size_nm=1.554): with h5py.File(tomo_path, "r") as f: - AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:] + #AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:] + AZ_seg = f["/filtered_az"][:] # Apply binary closing to smooth the segmented regions struct_elem = ball(1) # Use a small 3D structuring element @@ -103,6 +104,16 @@ def calculate_AZ_surface(tomo_path, pixel_size_nm=1.554): return surface_area +def calculate_AZ_surface_simple(tomo_path, pixel_size_nm=1.554): + with h5py.File(tomo_path, "r") as f: + AZ_seg = f["/labels/AZ"][:] + + morphology_data = compute_object_morphology(AZ_seg, "AZ Structure", resolution=(pixel_size_nm, pixel_size_nm, pixel_size_nm)) + surface_column = "surface [nm^2]" #if resolution is not None else "surface [pixel^2]" + surface_area = morphology_data[surface_column].iloc[0] + + return surface_area + def calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm=1.554): with h5py.File(tomo_path, "r") as f: AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:] @@ -194,7 +205,8 @@ def process_datasets(folder_path, output_csv="AZ_areas.csv", pixel_size_nm=1.554 #AZ_area = calculate_total_AZ_area(tomo_path, pixel_size_nm) #AZ_area = calculate_AZ_area_simple(tomo_path, pixel_size_nm) #AZ_surface_area = calculate_AZ_surface(tomo_path, pixel_size_nm) - AZ_surface_area = calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm) + #AZ_surface_area = calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm) + AZ_surface_area = calculate_AZ_surface_simple(tomo_path, pixel_size_nm) # Append results to list results.append({ "Dataset": dataset_name, @@ -217,8 +229,8 @@ def process_datasets(folder_path, output_csv="AZ_areas.csv", pixel_size_nm=1.554 def main(): # Define the path to the folder containing dataset folders - folder_path = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp" - output_csv = "./analysis_results/AZ_intersect_manualCompartment/AZ_surface_area.csv" + folder_path = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/" + output_csv = "./analysis_results/manual_AZ_exported/AZ_surface_area.csv" # Call the function to process datasets and save results process_datasets(folder_path, output_csv = output_csv) diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py index 4077ea3..3afde5d 100644 --- a/scripts/cooper/analysis/run_analysis_1.py +++ b/scripts/cooper/analysis/run_analysis_1.py @@ -11,9 +11,9 @@ from tqdm import tqdm from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres -DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa -PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa -RESULT_FOLDER = "./analysis_results/AZ_intersect_autoCompartment" +DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/" # noqa +PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp" # noqa +RESULT_FOLDER = "./analysis_results/AZ_intersect_manualCompartment" def get_compartment_with_max_overlap(compartments, vesicles): """ @@ -64,10 +64,10 @@ def compute_sizes_for_all_tomorams_manComp(): os.makedirs(RESULT_FOLDER, exist_ok=True) resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset - radius_factor = 0.7 + radius_factor = 1 estimate_radius_2d = True - # Dictionary to hold the results for each dataset + # Dictionary to hold the results for each dataset and category (CTRL or DKO) dataset_results = {} tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True)) @@ -75,14 +75,18 @@ def compute_sizes_for_all_tomorams_manComp(): ds_name, fname = os.path.split(tomo) ds_name = os.path.split(ds_name)[1] fname = os.path.splitext(fname)[0] - # Initialize a new dictionary entry for each dataset if not already present + + # Determine if the tomogram is 'CTRL' or 'DKO' + category = "CTRL" if "CTRL" in fname else "DKO" + + # Initialize a new dictionary entry for each dataset and category if not already present if ds_name not in dataset_results: - dataset_results[ds_name] = {} + dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}} # Skip if this tomogram already exists in the dataset dictionary - if fname in dataset_results[ds_name]: + if fname in dataset_results[ds_name][category]: continue - + # Load the vesicle segmentation from the predictions. with h5py.File(tomo, "r") as f: segmentation = f["/vesicles/segment_from_combined_vesicles"][:] @@ -98,32 +102,27 @@ def compute_sizes_for_all_tomorams_manComp(): segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d ) - # Add sizes to the dataset dictionary under the tomogram name - dataset_results[ds_name][fname] = sizes + # Add sizes to the dataset dictionary under the appropriate category + dataset_results[ds_name][category][fname] = sizes - # Save each dataset's results to a single CSV file - for ds_name, tomogram_data in dataset_results.items(): - # Create a DataFrame where each column is a tomogram's sizes - result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose() + # Save each dataset's results into separate CSV files for CTRL and DKO tomograms + for ds_name, categories in dataset_results.items(): + for category, tomogram_data in categories.items(): + # Sort tomograms by name within the category + sorted_data = dict(sorted(tomogram_data.items())) # Sort by tomogram names + result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose() # Define the output file path - output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}.csv") + output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}_rf1.csv") # Save the DataFrame to CSV result_df.to_csv(output_path, index=False) -import os -import pandas as pd -import numpy as np -from glob import glob -import h5py -from tqdm import tqdm - def compute_sizes_for_all_tomorams_autoComp(): os.makedirs(RESULT_FOLDER, exist_ok=True) resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset - radius_factor = 0.7 + radius_factor = 1 estimate_radius_2d = True # Dictionary to hold the results for each dataset and category (CTRL or DKO) @@ -177,14 +176,14 @@ def compute_sizes_for_all_tomorams_autoComp(): result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose() # Define the output file path - output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}.csv") + output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}_rf1.csv") # Save the DataFrame to CSV result_df.to_csv(output_path, index=False) def main(): - #compute_sizes_for_all_tomorams_manComp() - compute_sizes_for_all_tomorams_autoComp() + compute_sizes_for_all_tomorams_manComp() + #compute_sizes_for_all_tomorams_autoComp() if __name__ == "__main__": diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py index 9a890a1..cdc4c0d 100644 --- a/scripts/cooper/analysis/run_spatial_distribution_analysis.py +++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py @@ -6,9 +6,9 @@ from synaptic_reconstruction.distance_measurements import measure_segmentation_to_object_distances import numpy as np -DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/" # noqa -PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp" # noqa -RESULT_FOLDER = "./analysis_results/AZ_intersect_manualCompartment" +DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa +PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa +RESULT_FOLDER = "./analysis_results/AZ_filtered_autoComp" def get_compartment_with_max_overlap(compartments, vesicles): @@ -55,7 +55,7 @@ def get_compartment_with_max_overlap(compartments, vesicles): # We compute the distances for all vesicles in the compartment masks to the AZ. # We use the same different resolution, depending on dataset. # The closest distance is calculated, i.e., the closest point on the outer membrane of the vesicle to the AZ. -def compute_sizes_for_all_tomorams(): +def compute_per_vesicle_distance_to_AZ(): os.makedirs(RESULT_FOLDER, exist_ok=True) resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset @@ -116,8 +116,80 @@ def compute_sizes_for_all_tomorams(): # Save the DataFrame to CSV result_df.to_csv(output_path, index=False) +def compute_per_vesicle_distance_to_filteredAZ(): + filtered_AZ_path = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/az_seg_filtered" + os.makedirs(RESULT_FOLDER, exist_ok=True) + + resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset + + # Dictionary to hold the results for each dataset and category (CTRL or DKO) + dataset_results = {} + + tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True)) + for tomo in tqdm(tomograms): + ds_name, fname = os.path.split(tomo) + ds_name = os.path.split(ds_name)[1] + fname = os.path.splitext(fname)[0] + + # Determine if the tomogram is 'CTRL' or 'DKO' + category = "CTRL" if "CTRL" in fname else "DKO" + + # Initialize a new dictionary entry for each dataset and category if not already present + if ds_name not in dataset_results: + dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}} + + # Skip if this tomogram already exists in the dataset dictionary + if fname in dataset_results[ds_name][category]: + continue + + #Load the AZ segmentations + AZ_path = os.path.join(filtered_AZ_path, ds_name, f"{fname}.h5") + with h5py.File(AZ_path, "r") as f: + segmented_object = f["/filtered_az"][:] + + # Load the vesicle segmentation from the predictions + with h5py.File(tomo, "r") as f: + segmentation = f["/vesicles/segment_from_combined_vesicles"][:] + + #if AZ intersect is small, compartment seg didn't align with AZ so we use the normal AZ and not intersect + if (segmented_object == 0).all() or np.sum(segmented_object == 1) < 2000: + segmented_object = f["/AZ/segment_from_AZmodel_v3"][:] + + input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5") + assert os.path.exists(input_path), input_path + + # Load the compartment mask from the tomogram + with h5py.File(input_path, "r") as f: + compartments = f["/compartments/segment_from_3Dmodel_v2"][:] + mask = get_compartment_with_max_overlap(compartments, segmentation) + + #if more than half of the vesicles (approximation, its checking pixel and not label) would get filtered by mask it means the compartment seg didn't work and thus we won't use the mask + if np.sum(segmentation[mask == 0] > 0) > (0.5 * np.sum(segmentation > 0)): + print("using no mask") + else: + segmentation[mask == 0] = 0 + distances, _, _, _ = measure_segmentation_to_object_distances( + segmentation, segmented_object=segmented_object, resolution=resolution + ) + + # Add distances to the dataset dictionary under the appropriate category + dataset_results[ds_name][category][fname] = distances + + # Save each dataset's results into separate CSV files for CTRL and DKO tomograms + for ds_name, categories in dataset_results.items(): + for category, tomogram_data in categories.items(): + # Sort tomograms by name within the category + sorted_data = dict(sorted(tomogram_data.items())) # Sort by tomogram names + result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose() + + # Define the output file path + output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}_{category}.csv") + + # Save the DataFrame to CSV + result_df.to_csv(output_path, index=False) + -def compute_sizes_for_all_tomorams_manComp(): +def compute_per_vesicle_distance_to_AZ_manComp(): os.makedirs(RESULT_FOLDER, exist_ok=True) resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset @@ -171,8 +243,9 @@ def compute_sizes_for_all_tomorams_manComp(): result_df.to_csv(output_path, index=False) def main(): - #compute_sizes_for_all_tomorams() - compute_sizes_for_all_tomorams_manComp() + #compute_per_vesicle_distance_to_AZ() + #compute_per_vesicle_distance_to_AZ_manComp() + compute_per_vesicle_distance_to_filteredAZ() if __name__ == "__main__": From 186c92dc282ca32a485bb669b42bc7fe33abb5e7 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 20 Nov 2024 08:11:12 +0100 Subject: [PATCH 20/35] Update inner ear analysis scripts --- .../full_reconstruction/visualize_results.py | 23 +++++++--- .../inner_ear/analysis/analyze_distances.py | 46 ++++++++++++------- .../analysis/analyze_vesicle_pools.py | 3 +- scripts/summarize_data.py | 27 +++++++++++ 4 files changed, 74 insertions(+), 25 deletions(-) create mode 100644 scripts/summarize_data.py diff --git a/scripts/cooper/full_reconstruction/visualize_results.py b/scripts/cooper/full_reconstruction/visualize_results.py index 5e3f596..839626b 100644 --- a/scripts/cooper/full_reconstruction/visualize_results.py +++ b/scripts/cooper/full_reconstruction/visualize_results.py @@ -6,11 +6,14 @@ import numpy as np import pandas as pd +from skimage.filters import gaussian + ROOT = "./04_full_reconstruction" TABLE = "/home/pape/Desktop/sfb1286/mboc_synapse/draft_figures/full_reconstruction.xlsx" # Skip datasets for which all figures were already done. -SKIP_DS = ["20241019_Tomo-eval_MF_Synapse"] +SKIP_DS = ["20241019_Tomo-eval_MF_Synapse", "20241019_Tomo-eval_PS_Synapse"] +# SKIP_DS = [] def _get_name_and_row(path, table): @@ -46,13 +49,12 @@ def visualize_result(path, table): if ds_name in SKIP_DS: return - # if row["Use for vis"].values[0] == "yes": - if row["Use for vis"].values[0] in ("yes", "no"): + if row["Use for Vis"].values[0] == "no": return compartment_ids = _get_compartment_ids(row) # access = np.s_[:] - access = np.s_[::2, ::2, ::2] + access = np.s_[::3, ::3, ::3] with h5py.File(path, "r") as f: raw = f["raw"][access] @@ -60,6 +62,10 @@ def visualize_result(path, table): active_zone = f["labels/active_zone"][access] mitos = f["labels/mitochondria"][access] compartments = f["labels/compartments"][access] + print("Loading done") + + raw = gaussian(raw) + print("Gaussian done") if any(comp_ids is not None for comp_ids in compartment_ids): mask = np.zeros(raw.shape, dtype="bool") @@ -78,12 +84,14 @@ def visualize_result(path, table): mitos[~mask] = 0 compartments = compartments_new + vesicle_ids = np.unique(vesicles)[1:] + v = napari.Viewer() v.add_image(raw) v.add_labels(mitos) - v.add_labels(vesicles) - v.add_labels(compartments) - v.add_labels(active_zone) + v.add_labels(vesicles, colormap={ves_id: "orange" for ves_id in vesicle_ids}) + v.add_labels(compartments, colormap={1: "red", 2: "green", 3: "orange"}) + v.add_labels(active_zone, colormap={1: "blue"}) v.title = f"{ds_name}/{name}" napari.run() @@ -115,6 +123,7 @@ def main(): paths = sorted(glob(os.path.join(ROOT, "**/*.h5"), recursive=True)) table = pd.read_excel(TABLE) for path in paths: + print(path) visualize_result(path, table) # visualize_only_compartment(path, table) diff --git a/scripts/inner_ear/analysis/analyze_distances.py b/scripts/inner_ear/analysis/analyze_distances.py index 534f90b..473d6b8 100644 --- a/scripts/inner_ear/analysis/analyze_distances.py +++ b/scripts/inner_ear/analysis/analyze_distances.py @@ -35,7 +35,7 @@ def _plot_all(distances): # TODO rename the method names. # We only care about the following distances: # - MP-V -> PD, AZ (Boundary) -# - Docked-V -> PD +# - Docked-V -> PD, AZ # - RA-V -> Ribbon def _plot_selected(distances, save_path=None): fig, axes = plt.subplots(2, 2) @@ -46,7 +46,7 @@ def _plot_selected(distances, save_path=None): def _plot(pool_name, distance_col, structure_name, ax): - this_distances = distances[distances["pool"] == pool_name][["approach", distance_col]] + this_distances = distances[distances["pool"] == pool_name][["tomogram", "approach", distance_col]] ax.set_title(f"{pool_name} to {structure_name}") sns.histplot( @@ -56,15 +56,27 @@ def _plot(pool_name, distance_col, structure_name, ax): if save_path is not None: approaches = pd.unique(this_distances["approach"]) - dist_values = [ - this_distances[this_distances["approach"] == approach][distance_col].values.tolist() - for approach in approaches - ] - max_len = max([len(vals) for vals in dist_values]) - save_distances = { - approach: dists + [np.nan] * (max_len - len(dists)) - for approach, dists in zip(approaches, dist_values) - } + tomo_names = pd.unique(this_distances["tomogram"]) + + tomograms = [] + distance_values = {approach: [] for approach in approaches} + + for tomo in tomo_names: + tomo_dists = this_distances[this_distances["tomogram"] == tomo] + max_vesicles = 0 + for approach in approaches: + n_vesicles = len(tomo_dists[tomo_dists["approach"] == approach].values) + if n_vesicles > max_vesicles: + max_vesicles = n_vesicles + + for approach in approaches: + app_dists = tomo_dists[tomo_dists["approach"] == approach][distance_col].values.tolist() + app_dists = app_dists + [np.nan] * (max_vesicles - len(app_dists)) + distance_values[approach].extend(app_dists) + tomograms.extend([tomo] * max_vesicles) + + save_distances = {"tomograms": tomograms} + save_distances.update(distance_values) save_distances = pd.DataFrame(save_distances) sheet_name = f"{pool_name}_{structure_name}" @@ -74,9 +86,11 @@ def _plot(pool_name, distance_col, structure_name, ax): else: save_distances.to_excel(save_path, index=False, sheet_name=sheet_name) + # NOTE: we over-ride a plot here, should not do this in the actual version _plot("MP-V", "pd_distance [nm]", "PD", axes[0, 0]) _plot("MP-V", "boundary_distance [nm]", "AZ Membrane", axes[0, 1]) _plot("Docked-V", "pd_distance [nm]", "PD", axes[1, 0]) + _plot("Docked-V", "boundary_distance [nm]", "AZ Membrane", axes[1, 0]) _plot("RA-V", "ribbon_distance [nm]", "Ribbon", axes[1, 1]) fig.tight_layout() @@ -87,17 +101,17 @@ def for_tomos_with_annotation(plot_all=True): manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation() manual_distances = manual_assignments[ - ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] ] manual_distances["approach"] = ["manual"] * len(manual_distances) semi_automatic_distances = semi_automatic_assignments[ - ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] ] semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances) automatic_distances = automatic_assignments[ - ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] ] automatic_distances["approach"] = ["automatic"] * len(automatic_distances) @@ -113,12 +127,12 @@ def for_all_tomos(plot_all=True): semi_automatic_assignments, automatic_assignments = get_all_measurements() semi_automatic_distances = semi_automatic_assignments[ - ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] ] semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances) automatic_distances = automatic_assignments[ - ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] + ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] ] automatic_distances["approach"] = ["automatic"] * len(automatic_distances) diff --git a/scripts/inner_ear/analysis/analyze_vesicle_pools.py b/scripts/inner_ear/analysis/analyze_vesicle_pools.py index e9ad651..7b67c99 100644 --- a/scripts/inner_ear/analysis/analyze_vesicle_pools.py +++ b/scripts/inner_ear/analysis/analyze_vesicle_pools.py @@ -94,8 +94,7 @@ def for_all_tomos(): errors.to_excel(writer, sheet_name="StandardDeviation", index=False) -# TODO: export the vesicle diameters -# TODO: export the ribbon and pd stats +# TODO: export the ribbon and pd stats (first need to discuss this with Fid) def main(): # for_tomos_with_annotation() for_all_tomos() diff --git a/scripts/summarize_data.py b/scripts/summarize_data.py new file mode 100644 index 0000000..3658354 --- /dev/null +++ b/scripts/summarize_data.py @@ -0,0 +1,27 @@ +import numpy as np +import pandas as pd + + +az_train = pd.read_excel("data_summary/active_zone_training_data.xlsx") +compartment_train = pd.read_excel("data_summary/compartment_training_data.xlsx") +vesicle_train = pd.read_excel("data_summary/vesicle_training_data.xlsx") +vesicle_da = pd.read_excel("data_summary/vesicle_domain_adaptation_data.xlsx", sheet_name="cryo") + + +def training_resolutions(): + res_az = np.round(az_train["resolution"].mean(), 2) + res_compartment = np.round(compartment_train["resolution"].mean(), 2) + res_cryo = np.round(vesicle_da["resolution"].mean(), 2) + res_vesicles = np.round(vesicle_train["resolution"].mean(), 2) + + print("Training resolutions for models:") + print("active_zone:", res_az) + print("compartments:", res_compartment) + # TODO + print("mitochondria:", 1.0) + print("vesicles_2d:", res_vesicles) + print("vesicles_3d:", res_vesicles) + print("vesicles_cryo:", res_cryo) + + +training_resolutions() From 2ccf3404d318097c59d6eb5cc1f7cf6e682b1c58 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Wed, 20 Nov 2024 13:49:29 +0100 Subject: [PATCH 21/35] Add script to extract vesicle diameters for inner ear data --- .../analysis/analyze_vesicle_radii.py | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 scripts/inner_ear/analysis/analyze_vesicle_radii.py diff --git a/scripts/inner_ear/analysis/analyze_vesicle_radii.py b/scripts/inner_ear/analysis/analyze_vesicle_radii.py new file mode 100644 index 0000000..8fa5d9e --- /dev/null +++ b/scripts/inner_ear/analysis/analyze_vesicle_radii.py @@ -0,0 +1,132 @@ +import os +import sys + +from glob import glob + +import mrcfile +import pandas as pd +from tqdm import tqdm + +from synaptic_reconstruction.imod.export import load_points_from_imodinfo +from synaptic_reconstruction.file_utils import get_data_path + +sys.path.append("../processing") + + +def aggregate_radii(data_root, table, save_path, get_tab): + if os.path.exists(save_path): + return + + radius_table = [] + for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"): + folder = row["Local Path"] + if folder == "": + continue + + tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse")) + tab_path = get_tab(folder) + if tab_path is None: + continue + + tab = pd.read_excel(tab_path) + this_tab = tab[["pool", "radius [nm]"]] + this_tab.insert(0, "tomogram", [tomo_name] * len(this_tab)) + radius_table.append(this_tab) + + radius_table = pd.concat(radius_table) + print("Saving table for", len(radius_table), "vesicles to", save_path) + radius_table.to_excel(save_path, index=False) + + +def aggregate_radii_imod(data_root, table, save_path): + if os.path.exists(save_path): + return + + radius_table = [] + for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"): + folder = row["Local Path"] + if folder == "": + continue + + tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse")) + annotation_folder = os.path.join(folder, "manuell") + if not os.path.exists(annotation_folder): + annotation_folder = os.path.join(folder, "Manuell") + if not os.path.exists(annotation_folder): + continue + + annotations = glob(os.path.join(annotation_folder, "*.mod")) + annotation_file = [ann for ann in annotations if ("vesikel" in ann.lower()) or ("vesicle" in ann.lower())] + if len(annotation_file) != 1: + continue + annotation_file = annotation_file[0] + + tomo_file = get_data_path(folder) + with mrcfile.open(tomo_file) as f: + shape = f.data.shape + resolution = list(f.voxel_size.item()) + resolution = [res / 10 for res in resolution][0] + + try: + _, radii, labels, label_names = load_points_from_imodinfo(annotation_file, shape, resolution=resolution) + except AssertionError: + continue + + this_tab = pd.DataFrame({ + "tomogram": [tomo_name] * len(radii), + "pool": [label_names[label_id] for label_id in labels], + "radius [nm]": radii, + }) + radius_table.append(this_tab) + + radius_table = pd.concat(radius_table) + print("Saving table for", len(radius_table), "vesicles to", save_path) + radius_table.to_excel(save_path, index=False) + + +def get_tab_automatic(folder): + tab_name = "measurements_uncorrected_assignments.xlsx" + res_path = os.path.join(folder, "korrektur", tab_name) + if not os.path.exists(res_path): + res_path = os.path.join(folder, "Korrektur", tab_name) + if not os.path.exists(res_path): + res_path = None + return res_path + + +def get_tab_semi_automatic(folder): + tab_name = "measurements.xlsx" + res_path = os.path.join(folder, "korrektur", tab_name) + if not os.path.exists(res_path): + res_path = os.path.join(folder, "Korrektur", tab_name) + if not os.path.exists(res_path): + res_path = None + return res_path + + +def get_tab_manual(folder): + tab_name = "measurements.xlsx" + res_path = os.path.join(folder, "manuell", tab_name) + if not os.path.exists(res_path): + res_path = os.path.join(folder, "Manuell", tab_name) + if not os.path.exists(res_path): + res_path = None + return res_path + + +def main(): + from parse_table import parse_table, get_data_root + + data_root = get_data_root() + table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx") + table = parse_table(table_path, data_root) + + # TODO get the radii from imod + aggregate_radii(data_root, table, save_path="./results/vesicle_radii_automatic.xlsx", get_tab=get_tab_automatic) + aggregate_radii(data_root, table, save_path="./results/vesicle_radii_semi_automatic.xlsx", get_tab=get_tab_semi_automatic) # noqa + aggregate_radii(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual) + aggregate_radii_imod(data_root, table, save_path="./results/vesicle_radii_imod.xlsx") + + +if __name__ == "__main__": + main() From 5feff6a2b43c07af16bf2d3d2d579cacb77a08df Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Thu, 21 Nov 2024 12:38:36 +0100 Subject: [PATCH 22/35] Update active zone analysis for SNAP/MUNC data --- .../cooper/analysis/active_zone_analysis.py | 197 +++++++++++++++++- .../cooper/analysis/compute_skeleton_area.py | 44 ++++ scripts/summarize_data.py | 129 +++++++++++- synaptic_reconstruction/morphology.py | 112 +++++++++- 4 files changed, 476 insertions(+), 6 deletions(-) create mode 100644 scripts/cooper/analysis/compute_skeleton_area.py diff --git a/scripts/cooper/analysis/active_zone_analysis.py b/scripts/cooper/analysis/active_zone_analysis.py index d2234c9..bb13ac5 100644 --- a/scripts/cooper/analysis/active_zone_analysis.py +++ b/scripts/cooper/analysis/active_zone_analysis.py @@ -3,15 +3,22 @@ import h5py import numpy as np +import napari +import pandas as pd from scipy.ndimage import binary_closing from skimage.measure import label from synaptic_reconstruction.ground_truth.shape_refinement import edge_filter +from synaptic_reconstruction.morphology import skeletonize_object +from synaptic_reconstruction.distance_measurements import measure_segmentation_to_object_distances from tqdm import tqdm -ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/final_Imig2014_seg_autoComp" # noqa +from compute_skeleton_area import calculate_surface_area -OUTPUT_AZ = "./boundary_az" +ROOT = "./imig_data" # noqa +OUTPUT_AZ = "./az_segmentation" + +RESOLUTION = (1.554,) * 3 def filter_az(path): @@ -20,6 +27,7 @@ def filter_az(path): ds = os.path.basename(ds) out_path = os.path.join(OUTPUT_AZ, ds, fname) os.makedirs(os.path.join(OUTPUT_AZ, ds), exist_ok=True) + if os.path.exists(out_path): return @@ -56,11 +64,192 @@ def filter_az(path): f.create_dataset("filtered_az", data=az_filtered, compression="gzip") -def main(): +def filter_all_azs(): files = sorted(glob(os.path.join(ROOT, "**/*.h5"), recursive=True)) - for ff in tqdm(files): + for ff in tqdm(files, desc="Filter AZ segmentations."): filter_az(ff) +def process_az(path, view=True): + key = "thin_az" + + with h5py.File(path, "r") as f: + if key in f and not view: + return + az_seg = f["filtered_az"][:] + + az_thin = skeletonize_object(az_seg) + + if view: + ds, fname = os.path.split(path) + ds = os.path.basename(ds) + raw_path = os.path.join(ROOT, ds, fname) + with h5py.File(raw_path, "r") as f: + raw = f["raw"][:] + v = napari.Viewer() + v.add_image(raw) + v.add_labels(az_seg) + v.add_labels(az_thin) + napari.run() + else: + with h5py.File(path, "a") as f: + f.create_dataset(key, data=az_thin, compression="gzip") + + +# Apply thinning to all active zones to obtain 1d surface. +def process_all_azs(): + files = sorted(glob(os.path.join(OUTPUT_AZ, "**/*.h5"), recursive=True)) + for ff in tqdm(files, desc="Thin AZ segmentations."): + process_az(ff, view=False) + + +def measure_az_area(path): + from skimage import measure + + with h5py.File(path, "r") as f: + seg = f["thin_az"][:] + + # Try via surface mesh. + verts, faces, normals, values = measure.marching_cubes(seg, spacing=RESOLUTION) + surface_area1 = measure.mesh_surface_area(verts, faces) + + # Try via custom function. + surface_area2 = calculate_surface_area(seg, voxel_size=RESOLUTION) + + ds, fname = os.path.split(path) + ds = os.path.basename(ds) + + return pd.DataFrame({ + "Dataset": [ds], + "Tomogram": [fname], + "surface_mesh [nm^2]": [surface_area1], + "surface_custom [nm^2]": [surface_area2], + }) + + +# Measure the AZ surface areas. +def measure_all_areas(): + save_path = "./results/area_measurements.xlsx" + if os.path.exists(save_path): + return + + files = sorted(glob(os.path.join(OUTPUT_AZ, "**/*.h5"), recursive=True)) + area_table = [] + for ff in tqdm(files, desc="Measure AZ areas."): + area = measure_az_area(ff) + area_table.append(area) + area_table = pd.concat(area_table) + area_table.to_excel(save_path, index=False) + + manual_results = "/home/pape/Work/my_projects/synaptic-reconstruction/scripts/cooper/debug/surface/manualAZ_surface_area.xlsx" # noqa + manual_results = pd.read_excel(manual_results)[["Dataset", "Tomogram", "manual"]] + comparison_table = pd.merge(area_table, manual_results, on=["Dataset", "Tomogram"], how="inner") + comparison_table.to_excel("./results/area_comparison.xlsx", index=False) + + +def analyze_areas(): + import seaborn as sns + import matplotlib.pyplot as plt + + table = pd.read_excel("./results/area_comparison.xlsx") + + fig, axes = plt.subplots(2) + sns.scatterplot(data=table, x="manual", y="surface_mesh [nm^2]", ax=axes[0]) + sns.scatterplot(data=table, x="manual", y="surface_custom [nm^2]", ax=axes[1]) + plt.show() + + +def measure_distances(ves_path, az_path): + with h5py.File(az_path, "r") as f: + az = f["thin_az"][:] + + with h5py.File(ves_path, "r") as f: + vesicles = f["vesicles/segment_from_combined_vesicles"][:] + + distances, _, _, _ = measure_segmentation_to_object_distances(vesicles, az, resolution=RESOLUTION) + + ds, fname = os.path.split(az_path) + ds = os.path.basename(ds) + + return pd.DataFrame({ + "Dataset": [ds] * len(distances), + "Tomogram": [fname] * len(distances), + "Distance": distances, + }) + + +# Measure the AZ vesicle distances for all vesicles. +def measure_all_distances(): + save_path = "./results/vesicle_az_distances.xlsx" + if os.path.exists(save_path): + return + + ves_files = sorted(glob(os.path.join(ROOT, "**/*.h5"), recursive=True)) + az_files = sorted(glob(os.path.join(OUTPUT_AZ, "**/*.h5"), recursive=True)) + assert len(ves_files) == len(az_files) + + dist_table = [] + for ves_file, az_file in tqdm(zip(ves_files, az_files), total=len(az_files), desc="Measure distances."): + dist = measure_distances(ves_file, az_file) + dist_table.append(dist) + dist_table = pd.concat(dist_table) + + dist_table.to_excel(save_path, index=False) + + +def reformat_distances(): + tab = pd.read_excel("./results/vesicle_az_distances.xlsx") + + munc_ko = {} + munc_ctrl = {} + + snap_ko = {} + snap_ctrl = {} + + for _, row in tab.iterrows(): + ds = row.Dataset + tomo = row.Tomogram + + if ds == "Munc13DKO": + if "CTRL" in tomo: + group = munc_ctrl + else: + group = munc_ko + else: + assert ds == "SNAP25" + if "CTRL" in tomo: + group = snap_ctrl + else: + group = snap_ko + + name = os.path.splitext(tomo)[0] + val = row["Distance [nm]"] + if name in group: + group[name].append(val) + else: + group[name] = [val] + + def save_tab(group, path): + n_ves_max = max(len(v) for v in group.values()) + group = {k: v + [np.nan] * (n_ves_max - len(v)) for k, v in group.items()} + group_tab = pd.DataFrame(group) + group_tab.to_excel(path, index=False) + + os.makedirs("./results/distances_formatted", exist_ok=True) + save_tab(munc_ko, "./results/distances_formatted/munc_ko.xlsx") + save_tab(munc_ctrl, "./results/distances_formatted/munc_ctrl.xlsx") + save_tab(snap_ko, "./results/distances_formatted/snap_ko.xlsx") + save_tab(snap_ctrl, "./results/distances_formatted/snap_ctrl.xlsx") + + +def main(): + # filter_all_azs() + # process_all_azs() + # measure_all_areas() + # analyze_areas() + # measure_all_distances() + reformat_distances() + + if __name__ == "__main__": main() diff --git a/scripts/cooper/analysis/compute_skeleton_area.py b/scripts/cooper/analysis/compute_skeleton_area.py new file mode 100644 index 0000000..6fb05d0 --- /dev/null +++ b/scripts/cooper/analysis/compute_skeleton_area.py @@ -0,0 +1,44 @@ +import numpy as np + + +def calculate_surface_area(skeleton, voxel_size=(1.0, 1.0, 1.0)): + """ + Calculate the surface area of a 3D skeletonized object. + + Parameters: + skeleton (3D array): Binary 3D skeletonized array. + voxel_size (tuple): Physical size of voxels (z, y, x). + + Returns: + float: Approximate surface area of the skeleton. + """ + # Define the voxel dimensions + voxel_area = ( + voxel_size[1] * voxel_size[2], # yz-face area + voxel_size[0] * voxel_size[2], # xz-face area + voxel_size[0] * voxel_size[1], # xy-face area + ) + + # Compute the number of exposed faces for each voxel + exposed_faces = 0 + directions = [ + (1, 0, 0), (-1, 0, 0), # x-axis neighbors + (0, 1, 0), (0, -1, 0), # y-axis neighbors + (0, 0, 1), (0, 0, -1), # z-axis neighbors + ] + + # Iterate over all voxels in the skeleton + for z, y, x in np.argwhere(skeleton): + for i, (dz, dy, dx) in enumerate(directions): + neighbor = (z + dz, y + dy, x + dx) + # Check if the neighbor is outside the volume or not part of the skeleton + if ( + 0 <= neighbor[0] < skeleton.shape[0] and + 0 <= neighbor[1] < skeleton.shape[1] and + 0 <= neighbor[2] < skeleton.shape[2] and + skeleton[neighbor] == 1 + ): + continue + exposed_faces += voxel_area[i // 2] + + return exposed_faces diff --git a/scripts/summarize_data.py b/scripts/summarize_data.py index 3658354..df57059 100644 --- a/scripts/summarize_data.py +++ b/scripts/summarize_data.py @@ -1,12 +1,21 @@ +import matplotlib.pyplot as plt import numpy as np import pandas as pd +# TODO inner ear train data and mito training data are missing az_train = pd.read_excel("data_summary/active_zone_training_data.xlsx") compartment_train = pd.read_excel("data_summary/compartment_training_data.xlsx") vesicle_train = pd.read_excel("data_summary/vesicle_training_data.xlsx") vesicle_da = pd.read_excel("data_summary/vesicle_domain_adaptation_data.xlsx", sheet_name="cryo") +# Inner ear trainign data: +# Sophia: 92 +# Rat: 19 +# Tether: 3 +# Ves Pools: 6 +# Total = 120 + def training_resolutions(): res_az = np.round(az_train["resolution"].mean(), 2) @@ -22,6 +31,124 @@ def training_resolutions(): print("vesicles_2d:", res_vesicles) print("vesicles_3d:", res_vesicles) print("vesicles_cryo:", res_cryo) + # TODO inner ear + + +def pie_chart(data, count_col, title): + # Plot the pie chart + plt.figure(figsize=(8, 6)) + wedges, texts, autotexts = plt.pie( + data[count_col], + labels=data["Condition"], + autopct="%1.1f%%", # Display percentages + startangle=90, # Start at the top + colors=plt.cm.Paired.colors[:len(data)], # Optional: Custom color palette + textprops={"fontsize": 14} + ) + + for autot in autotexts: + autot.set_fontsize(18) + + plt.title(title, fontsize=18) + plt.tight_layout() + plt.show() + + +def summarize_vesicle_train_data(): + condition_summary = { + "Condition": [], + "Tomograms": [], + "Vesicles": [], + } + + conditions = pd.unique(vesicle_train.condition) + for condition in conditions: + ctab = vesicle_train[vesicle_train.condition == condition] + n_tomos = len(ctab) + n_vesicles = ctab["vesicle_count"].sum() + print(condition) + print("Tomograms:", n_tomos) + print("Vesicles:", n_vesicles) + print() + condition_summary["Condition"].append(condition) + condition_summary["Tomograms"].append(n_tomos) + condition_summary["Vesicles"].append(n_vesicles) + condition_summary = pd.DataFrame(condition_summary) + + print("Total:") + print("Tomograms:", len(vesicle_train)) + print("Vesicles:", vesicle_train["vesicle_count"].sum()) + print() + + train_tomos = vesicle_train[vesicle_train.used_for == "train/val"] + print("Training:") + print("Tomograms:", len(train_tomos)) + print("Vesicles:", train_tomos["vesicle_count"].sum()) + print() + + test_tomos = vesicle_train[vesicle_train.used_for == "test"] + print("Test:") + print("Tomograms:", len(test_tomos)) + print("Vesicles:", test_tomos["vesicle_count"].sum()) + + pie_chart(condition_summary, "Tomograms", "Tomograms per Condition") + pie_chart(condition_summary, "Vesicles", "Vesicles per Condition") + + +def summarize_vesicle_da(): + for name in ("inner_ear", "endbulb", "cryo", "frog", "maus_2d"): + tab = pd.read_excel("data_summary/vesicle_domain_adaptation_data.xlsx", sheet_name=name) + print(name) + print("N-tomograms:", len(tab)) + print("N-test:", (tab["used_for"] == "test").sum()) + print("N-vesicles:", tab["vesicle_count"].sum()) + print() + + +def summarize_az_train(): + conditions = pd.unique(az_train.condition) + print(conditions) + + print("Total:") + print("Tomograms:", len(az_train)) + print("Active Zones:", az_train["az_count"].sum()) + print() + + train_tomos = az_train[az_train.used_for == "train/val"] + print("Training:") + print("Tomograms:", len(train_tomos)) + print("Active Zones:", train_tomos["az_count"].sum()) + print() + + test_tomos = az_train[az_train.used_for == "test"] + print("Test:") + print("Tomograms:", len(test_tomos)) + print("Active Zones:", test_tomos["az_count"].sum()) + + +def summarize_compartment_train(): + conditions = pd.unique(compartment_train.condition) + print(conditions) + + print("Total:") + print("Tomograms:", len(compartment_train)) + print("Compartments:", compartment_train["compartment_count"].sum()) + print() + + train_tomos = compartment_train[compartment_train.used_for == "train/val"] + print("Training:") + print("Tomograms:", len(train_tomos)) + print("Compartments:", train_tomos["compartment_count"].sum()) + print() + + test_tomos = compartment_train[compartment_train.used_for == "test"] + print("Test:") + print("Tomograms:", len(test_tomos)) + print("Compartments:", test_tomos["compartment_count"].sum()) -training_resolutions() +# training_resolutions() +# summarize_vesicle_train_data() +# summarize_vesicle_da() +summarize_az_train() +# summarize_compartment_train() diff --git a/synaptic_reconstruction/morphology.py b/synaptic_reconstruction/morphology.py index 8afea3d..126042f 100644 --- a/synaptic_reconstruction/morphology.py +++ b/synaptic_reconstruction/morphology.py @@ -6,8 +6,11 @@ import numpy as np import pandas as pd -from scipy.ndimage import distance_transform_edt + +from scipy.ndimage import distance_transform_edt, convolve +from skimage.graph import MCP from skimage.measure import regionprops, marching_cubes +from skimage.morphology import skeletonize, medial_axis, label from skimage.segmentation import find_boundaries @@ -87,3 +90,110 @@ def compute_object_morphology(object_, structure_name, resolution=None): "surface [pixel^2]" if resolution is None else "surface [nm^2]": [surface], }) return morphology + + +def _find_endpoints(component): + # Define a 3x3 kernel to count neighbors + kernel = np.ones((3, 3), dtype=int) + neighbor_count = convolve(component.astype(int), kernel, mode="constant", cval=0) + endpoints = np.argwhere((component == 1) & (neighbor_count == 2)) # Degree = 1 + return endpoints + + +def _compute_longest_path(component, endpoints): + # Use the first endpoint as the source + src = tuple(endpoints[0]) + cost = np.where(component, 1, np.inf) # Cost map: 1 for skeleton, inf for background + mcp = MCP(cost) + _, traceback = mcp.find_costs([src]) + + # Use the second endpoint as the destination + dst = tuple(endpoints[-1]) + + # Trace back the path + path = np.zeros_like(component, dtype=bool) + current = dst + + # Extract offsets from the MCP object + offsets = np.array(mcp.offsets) + nrows, ncols = component.shape + + while current != src: + path[current] = True + current_offset_index = traceback[current] + if current_offset_index < 0: + # No valid path found + break + offset = offsets[current_offset_index] + # Move to the predecessor + current = (current[0] - offset[0], current[1] - offset[1]) + # Ensure indices are within bounds + if not (0 <= current[0] < nrows and 0 <= current[1] < ncols): + break + + path[src] = True # Include the source + return path + + +def _prune_skeleton_longest_path(skeleton): + pruned_skeleton = np.zeros_like(skeleton, dtype=bool) + + # Label connected components in the skeleton + labeled_skeleton, num_labels = label(skeleton, return_num=True) + + for label_id in range(1, num_labels + 1): + # Isolate the current connected component + component = (labeled_skeleton == label_id) + + # Find the endpoints of the component + endpoints = _find_endpoints(component) + if len(endpoints) < 2: + continue # Skip if there are no valid endpoints + elif len(endpoints) == 2: # Nothing to prune + pruned_skeleton |= component + continue + + # Compute the longest path using MCP + longest_path = _compute_longest_path(component, endpoints) + + # import napari + # v = napari.Viewer() + # v.add_labels(component) + # v.add_labels(longest_path) + # v.add_points(endpoints) + # napari.run() + + pruned_skeleton |= longest_path + + return pruned_skeleton.astype(skeleton.dtype) + + +def skeletonize_object( + segmentation: np.ndarray, + method: str = "skeletonize", + prune: bool = True, + min_prune_size: int = 10, +): + """Skeletonize a 3D object by inidividually skeletonizing each slice. + + Args: + + Returns: + """ + assert method in ("skeletonize", "medial_axis") + seg_thin = np.zeros_like(segmentation) + skeletor = skeletonize if method == "skeletonize" else medial_axis + # Parallelize? + for z in range(segmentation.shape[0]): + skeleton = skeletor(segmentation[z]) + + if prune: + skeleton = _prune_skeleton_longest_path(skeleton) + if min_prune_size > 0: + skeleton = label(skeleton) + ids, sizes = np.unique(skeleton, return_counts=True) + ids, sizes = ids[1:], sizes[1:] + skeleton = np.isin(skeleton, ids[sizes >= min_prune_size]) + + seg_thin[z] = skeleton + return seg_thin From 9b8c7a21b9178efc48bfa4805745af8f3683df55 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Thu, 21 Nov 2024 22:16:39 +0100 Subject: [PATCH 23/35] Add more inner ear analysis code --- .../analysis/analyze_vesicle_diameters.py | 132 ++++++++++++++++++ .../analysis/extract_ribbon_stats.py | 36 +++++ 2 files changed, 168 insertions(+) create mode 100644 scripts/inner_ear/analysis/analyze_vesicle_diameters.py create mode 100644 scripts/inner_ear/analysis/extract_ribbon_stats.py diff --git a/scripts/inner_ear/analysis/analyze_vesicle_diameters.py b/scripts/inner_ear/analysis/analyze_vesicle_diameters.py new file mode 100644 index 0000000..8fa5d9e --- /dev/null +++ b/scripts/inner_ear/analysis/analyze_vesicle_diameters.py @@ -0,0 +1,132 @@ +import os +import sys + +from glob import glob + +import mrcfile +import pandas as pd +from tqdm import tqdm + +from synaptic_reconstruction.imod.export import load_points_from_imodinfo +from synaptic_reconstruction.file_utils import get_data_path + +sys.path.append("../processing") + + +def aggregate_radii(data_root, table, save_path, get_tab): + if os.path.exists(save_path): + return + + radius_table = [] + for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"): + folder = row["Local Path"] + if folder == "": + continue + + tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse")) + tab_path = get_tab(folder) + if tab_path is None: + continue + + tab = pd.read_excel(tab_path) + this_tab = tab[["pool", "radius [nm]"]] + this_tab.insert(0, "tomogram", [tomo_name] * len(this_tab)) + radius_table.append(this_tab) + + radius_table = pd.concat(radius_table) + print("Saving table for", len(radius_table), "vesicles to", save_path) + radius_table.to_excel(save_path, index=False) + + +def aggregate_radii_imod(data_root, table, save_path): + if os.path.exists(save_path): + return + + radius_table = [] + for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"): + folder = row["Local Path"] + if folder == "": + continue + + tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse")) + annotation_folder = os.path.join(folder, "manuell") + if not os.path.exists(annotation_folder): + annotation_folder = os.path.join(folder, "Manuell") + if not os.path.exists(annotation_folder): + continue + + annotations = glob(os.path.join(annotation_folder, "*.mod")) + annotation_file = [ann for ann in annotations if ("vesikel" in ann.lower()) or ("vesicle" in ann.lower())] + if len(annotation_file) != 1: + continue + annotation_file = annotation_file[0] + + tomo_file = get_data_path(folder) + with mrcfile.open(tomo_file) as f: + shape = f.data.shape + resolution = list(f.voxel_size.item()) + resolution = [res / 10 for res in resolution][0] + + try: + _, radii, labels, label_names = load_points_from_imodinfo(annotation_file, shape, resolution=resolution) + except AssertionError: + continue + + this_tab = pd.DataFrame({ + "tomogram": [tomo_name] * len(radii), + "pool": [label_names[label_id] for label_id in labels], + "radius [nm]": radii, + }) + radius_table.append(this_tab) + + radius_table = pd.concat(radius_table) + print("Saving table for", len(radius_table), "vesicles to", save_path) + radius_table.to_excel(save_path, index=False) + + +def get_tab_automatic(folder): + tab_name = "measurements_uncorrected_assignments.xlsx" + res_path = os.path.join(folder, "korrektur", tab_name) + if not os.path.exists(res_path): + res_path = os.path.join(folder, "Korrektur", tab_name) + if not os.path.exists(res_path): + res_path = None + return res_path + + +def get_tab_semi_automatic(folder): + tab_name = "measurements.xlsx" + res_path = os.path.join(folder, "korrektur", tab_name) + if not os.path.exists(res_path): + res_path = os.path.join(folder, "Korrektur", tab_name) + if not os.path.exists(res_path): + res_path = None + return res_path + + +def get_tab_manual(folder): + tab_name = "measurements.xlsx" + res_path = os.path.join(folder, "manuell", tab_name) + if not os.path.exists(res_path): + res_path = os.path.join(folder, "Manuell", tab_name) + if not os.path.exists(res_path): + res_path = None + return res_path + + +def main(): + from parse_table import parse_table, get_data_root + + data_root = get_data_root() + table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx") + table = parse_table(table_path, data_root) + + # TODO get the radii from imod + aggregate_radii(data_root, table, save_path="./results/vesicle_radii_automatic.xlsx", get_tab=get_tab_automatic) + aggregate_radii(data_root, table, save_path="./results/vesicle_radii_semi_automatic.xlsx", get_tab=get_tab_semi_automatic) # noqa + aggregate_radii(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual) + aggregate_radii_imod(data_root, table, save_path="./results/vesicle_radii_imod.xlsx") + + +if __name__ == "__main__": + main() diff --git a/scripts/inner_ear/analysis/extract_ribbon_stats.py b/scripts/inner_ear/analysis/extract_ribbon_stats.py new file mode 100644 index 0000000..8ee9e12 --- /dev/null +++ b/scripts/inner_ear/analysis/extract_ribbon_stats.py @@ -0,0 +1,36 @@ +import numpy as np +import pandas as pd + + +def main(): + man_path = "../results/20240917_1/fully_manual_analysis_results.xlsx" + auto_path = "../results/20240917_1/automatic_analysis_results.xlsx" + + man_measurements = pd.read_excel(man_path, sheet_name="morphology") + man_measurements = man_measurements[man_measurements.structure == "ribbon"][ + ["tomogram", "surface [nm^2]", "volume [nm^3]"] + ] + + auto_measurements = pd.read_excel(auto_path, sheet_name="morphology") + auto_measurements = auto_measurements[auto_measurements.structure == "ribbon"][ + ["tomogram", "surface [nm^2]", "volume [nm^3]"] + ] + + # save all the automatic measurements + auto_measurements.to_excel("./results/ribbon_morphology_auto.xlsx", index=False) + + man_tomograms = pd.unique(man_measurements["tomogram"]) + auto_tomograms = pd.unique(auto_measurements["tomogram"]) + tomos = np.intersect1d(man_tomograms, auto_tomograms) + + man_measurements = man_measurements[man_measurements.tomogram.isin(tomos)] + auto_measurements = auto_measurements[auto_measurements.tomogram.isin(tomos)] + + save_path = "./results/ribbon_morphology_man-v-auto.xlsx" + man_measurements.to_excel(save_path, sheet_name="manual", index=False) + with pd.ExcelWriter(save_path, engine="openpyxl", mode="a") as writer: + auto_measurements.to_excel(writer, sheet_name="auto", index=False) + + +if __name__ == "__main__": + main() From db89b441b25770ec298bbe51eb8d82233b92a0f6 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Sat, 23 Nov 2024 14:43:53 +0100 Subject: [PATCH 24/35] evaluation of AZ seg --- scripts/cooper/analysis/run_analysis_1.py | 11 +- .../run_spatial_distribution_analysis.py | 2 +- scripts/cooper/training/evaluate_AZ.py | 107 ++++++++++++++++++ synaptic_reconstruction/imod/to_imod.py | 31 +++++ 4 files changed, 144 insertions(+), 7 deletions(-) create mode 100644 scripts/cooper/training/evaluate_AZ.py diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py index 3afde5d..94d0a62 100644 --- a/scripts/cooper/analysis/run_analysis_1.py +++ b/scripts/cooper/analysis/run_analysis_1.py @@ -11,9 +11,9 @@ from tqdm import tqdm from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres -DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/" # noqa -PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp" # noqa -RESULT_FOLDER = "./analysis_results/AZ_intersect_manualCompartment" +DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa +PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa +RESULT_FOLDER = "./analysis_results/AZ_filtered_autoComp" def get_compartment_with_max_overlap(compartments, vesicles): """ @@ -182,9 +182,8 @@ def compute_sizes_for_all_tomorams_autoComp(): result_df.to_csv(output_path, index=False) def main(): - compute_sizes_for_all_tomorams_manComp() - #compute_sizes_for_all_tomorams_autoComp() - + #compute_sizes_for_all_tomorams_manComp() + compute_sizes_for_all_tomorams_autoComp() if __name__ == "__main__": main() diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py index cdc4c0d..edd8308 100644 --- a/scripts/cooper/analysis/run_spatial_distribution_analysis.py +++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py @@ -188,7 +188,6 @@ def compute_per_vesicle_distance_to_filteredAZ(): # Save the DataFrame to CSV result_df.to_csv(output_path, index=False) - def compute_per_vesicle_distance_to_AZ_manComp(): os.makedirs(RESULT_FOLDER, exist_ok=True) @@ -248,5 +247,6 @@ def main(): compute_per_vesicle_distance_to_filteredAZ() + if __name__ == "__main__": main() diff --git a/scripts/cooper/training/evaluate_AZ.py b/scripts/cooper/training/evaluate_AZ.py new file mode 100644 index 0000000..fc32214 --- /dev/null +++ b/scripts/cooper/training/evaluate_AZ.py @@ -0,0 +1,107 @@ +import argparse +import os + +import h5py +import pandas as pd +import numpy as np + +from elf.evaluation.dice import dice_score + +def extract_gt_bounding_box(segmentation, gt, halo=[20, 320, 320]): + # Find the bounding box for the ground truth + bb = np.where(gt > 0) + bb = tuple(slice( + max(int(b.min() - ha), 0), # Ensure indices are not below 0 + min(int(b.max() + ha), sh) # Ensure indices do not exceed shape dimensions + ) for b, sh, ha in zip(bb, gt.shape, halo)) + + # Apply the bounding box to both segmentations + segmentation_cropped = segmentation[bb] + gt_cropped = gt[bb] + + return segmentation_cropped, gt_cropped + +def evaluate(labels, segmentation): + assert labels.shape == segmentation.shape + score = dice_score(segmentation, labels) + return score + +def evaluate_file(labels_path, segmentation_path, model_name, crop= False): + print(f"Evaluate labels {labels_path} and vesicles {segmentation_path}") + + ds_name = os.path.basename(os.path.dirname(labels_path)) + tomo = os.path.basename(labels_path) + + #get the labels and segmentation + with h5py.File(labels_path) as label_file: + gt = label_file["/labels/AZ"][:] + + with h5py.File(segmentation_path) as seg_file: + segmentation = seg_file["/AZ/segment_from_AZmodel_v3"][:] + + if crop: + print("cropping the annotation and segmentation") + segmentation, gt = extract_gt_bounding_box(segmentation, gt) + + # Evaluate the match of ground truth and segmentation + dice_score = evaluate(gt, segmentation) + + # Store results + result_folder = "/user/muth9/u12095/synaptic-reconstruction/scripts/cooper/evaluation_results" + os.makedirs(result_folder, exist_ok=True) + result_path = os.path.join(result_folder, f"evaluation_{model_name}.csv") + print("Evaluation results are saved to:", result_path) + + # Load existing results if the file exists + if os.path.exists(result_path): + results = pd.read_csv(result_path) + else: + results = None + + # Create a new DataFrame for the current evaluation + res = pd.DataFrame( + [[ds_name, tomo, dice_score]], columns=["dataset", "tomogram", "dice_score"] + ) + + # Combine with existing results or initialize with the new results + if results is None: + results = res + else: + results = pd.concat([results, res]) + + # Save the results to the CSV file + results.to_csv(result_path, index=False) + +def evaluate_folder(labels_path, segmentation_path, model_name, crop = False): + print(f"Evaluating folder {segmentation_path}") + print(f"Using labels stored in {labels_path}") + + label_files = os.listdir(labels_path) + vesicles_files = os.listdir(segmentation_path) + + for vesicle_file in vesicles_files: + if vesicle_file in label_files: + + evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(segmentation_path, vesicle_file), model_name, crop) + + + +def main(): + + parser = argparse.ArgumentParser() + parser.add_argument("-l", "--labels_path", required=True) + parser.add_argument("-v", "--segmentation_path", required=True) + parser.add_argument("-n", "--model_name", required=True) + parser.add_argument("--crop", action="store_true", help="Crop around the annotation.") + args = parser.parse_args() + + segmentation_path = args.segmentation_path + if os.path.isdir(segmentation_path): + evaluate_folder(args.labels_path, segmentation_path, args.model_name, args.crop) + else: + evaluate_file(args.labels_path, segmentation_path, args.model_name, args.crop) + + + +if __name__ == "__main__": + main() diff --git a/synaptic_reconstruction/imod/to_imod.py b/synaptic_reconstruction/imod/to_imod.py index 7a98469..6b217aa 100644 --- a/synaptic_reconstruction/imod/to_imod.py +++ b/synaptic_reconstruction/imod/to_imod.py @@ -121,6 +121,37 @@ def coords_and_rads(prop): rads = [re[1] for re in res] return np.array(coords), np.array(rads) + def coords_and_rads(prop): + seg_id = prop.label + + bbox = prop.bbox + bb = np.s_[bbox[0]:bbox[3], bbox[1]:bbox[4], bbox[2]:bbox[5]] + mask = segmentation[bb] == seg_id + + if estimate_radius_2d: + dists = np.array([distance_transform_edt(ma, sampling=resolution[1:]) for ma in mask]) + else: + dists = distance_transform_edt(mask, sampling=resolution) + + max_coord = np.unravel_index(np.argmax(dists), mask.shape) + radius = dists[max_coord] * radius_factor + + offset = np.array(bbox[:3]) + coord = np.array(max_coord) + offset + return coord, radius, seg_id + + with futures.ThreadPoolExecutor(num_workers) as tp: + res = list(tqdm( + tp.map(coords_and_rads, props), disable=not verbose, total=len(props), + desc="Compute coordinates and radii" + )) + + coords = [re[0] for re in res] + rads = [re[1] for re in res] + label_indxes = [re[2] for re in res] + return np.array(coords), np.array(rads), np.array(label_indxes) + + def write_points_to_imod( coordinates: np.ndarray, From 51165a5699a65e34ed3ca9ef87dcacea93549c3d Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sat, 23 Nov 2024 15:30:07 +0100 Subject: [PATCH 25/35] Fix issues with the segmentation export to IMOD --- scripts/cooper/export_mask_to_imod.py | 8 -- scripts/inner_ear/analysis/.gitignore | 1 + .../inner_ear/analysis/export_seg_to_imod.py | 128 ++++++++++++++++++ synaptic_reconstruction/imod/to_imod.py | 57 ++++---- 4 files changed, 162 insertions(+), 32 deletions(-) create mode 100644 scripts/inner_ear/analysis/export_seg_to_imod.py diff --git a/scripts/cooper/export_mask_to_imod.py b/scripts/cooper/export_mask_to_imod.py index 98b4b2f..4273707 100644 --- a/scripts/cooper/export_mask_to_imod.py +++ b/scripts/cooper/export_mask_to_imod.py @@ -4,19 +4,11 @@ def export_mask_to_imod(args): - # Test script - # write_segmentation_to_imod( - # "synapse-examples/36859_J1_66K_TS_CA3_PS_26_rec_2Kb1dawbp_crop.mrc", - # "synapse-examples/36859_J1_66K_TS_CA3_PS_26_rec_2Kb1dawbp_crop_mitos.tif", - # "synapse-examples/mito.mod" - # ) write_segmentation_to_imod(args.input_path, args.segmentation_path, args.output_path) def main(): parser = argparse.ArgumentParser() - - args = parser.parse_args() parser.add_argument( "-i", "--input_path", required=True, help="The filepath to the mrc file containing the data." diff --git a/scripts/inner_ear/analysis/.gitignore b/scripts/inner_ear/analysis/.gitignore index 383f264..cbad005 100644 --- a/scripts/inner_ear/analysis/.gitignore +++ b/scripts/inner_ear/analysis/.gitignore @@ -1,2 +1,3 @@ panels/ +auto_seg_export/ *.zip diff --git a/scripts/inner_ear/analysis/export_seg_to_imod.py b/scripts/inner_ear/analysis/export_seg_to_imod.py new file mode 100644 index 0000000..eea4b14 --- /dev/null +++ b/scripts/inner_ear/analysis/export_seg_to_imod.py @@ -0,0 +1,128 @@ +import os +from shutil import copyfile +from subprocess import run + +import imageio.v3 as imageio +import mrcfile +import napari +import numpy as np +import pandas as pd +from elf.io import open_file +from skimage.transform import resize +from synaptic_reconstruction.imod.to_imod import write_segmentation_to_imod, write_segmentation_to_imod_as_points + +out_folder = "./auto_seg_export" +os.makedirs(out_folder, exist_ok=True) + + +def _resize(seg, tomo_path): + with open_file(tomo_path, "r") as f: + shape = f["data"].shape + + if shape != seg.shape: + seg = resize(seg, shape, order=0, anti_aliasing=False, preserve_range=True).astype(seg.dtype) + assert seg.shape == shape + return seg + + +def check_imod(tomo_path, mod_path): + run(["imod", tomo_path, mod_path]) + + +def export_pool(pool_name, pool_seg, tomo_path): + seg_path = f"./auto_seg_export/{pool_name}.tif" + pool_seg = _resize(pool_seg, tomo_path) + imageio.imwrite(seg_path, pool_seg, compression="zlib") + + output_path = f"./auto_seg_export/{pool_name}.mod" + write_segmentation_to_imod_as_points(tomo_path, seg_path, output_path, min_radius=5) + + check_imod(tomo_path, output_path) + + +def export_vesicles(folder, tomo_path): + vesicle_pool_path = os.path.join(folder, "Korrektur", "vesicle_pools.tif") + # pool_correction_path = os.path.join(folder, "Korrektur", "pool_correction.tif") + # pool_correction = imageio.imread(pool_correction_path) + + assignment_path = os.path.join(folder, "Korrektur", "measurements.xlsx") + assignments = pd.read_excel(assignment_path) + + vesicles = imageio.imread(vesicle_pool_path) + + pools = {} + for pool_name in pd.unique(assignments.pool): + pool_ids = assignments[assignments.pool == pool_name].id.values + pool_seg = vesicles.copy() + pool_seg[~np.isin(vesicles, pool_ids)] = 0 + pools[pool_name] = pool_seg + + view = False + if view: + v = napari.Viewer() + v.add_labels(vesicles, visible=False) + for pool_name, pool_seg in pools.items(): + v.add_labels(pool_seg, name=pool_name) + napari.run() + else: + for pool_name, pool_seg in pools.items(): + export_pool(pool_name, pool_seg, tomo_path) + + +def export_structure(folder, tomo, name, view=False): + path = os.path.join(folder, "Korrektur", f"{name}.tif") + seg = imageio.imread(path) + seg = _resize(seg, tomo) + + if view: + with open_file(tomo, "r") as f: + raw = f["data"][:] + + v = napari.Viewer() + v.add_image(raw) + v.add_labels(seg) + napari.run() + + return + + seg_path = f"./auto_seg_export/{name}.tif" + imageio.imwrite(seg_path, seg, compression="zlib") + output_path = f"./auto_seg_export/{name}.mod" + write_segmentation_to_imod(tomo, seg_path, output_path) + check_imod(tomo, output_path) + + +def remove_scale(tomo): + new_path = "./auto_seg_export/Emb71M1aGridA1sec1mod7.rec.rec" + if os.path.exists(new_path): + return new_path + + copyfile(tomo, new_path) + + with mrcfile.open(new_path, "r+") as f: + # Set the origin to (0, 0, 0) + f.header.nxstart = 0 + f.header.nystart = 0 + f.header.nzstart = 0 + f.header.origin = (0.0, 0.0, 0.0) + + # Save changes + f.flush() + + return new_path + + +def main(): + folder = "/home/pape/Work/data/moser/em-synapses/Electron-Microscopy-Susi/Analyse/WT strong stim/Mouse 1/modiolar/1" + tomo = os.path.join(folder, "Emb71M1aGridA1sec1mod7.rec.rec") + + tomo = remove_scale(tomo) + + # export_vesicles(folder, tomo) + # export_structure(folder, tomo, "ribbon", view=False) + # export_structure(folder, tomo, "membrane", view=False) + export_structure(folder, tomo, "PD", view=False) + + +if __name__ == "__main__": + main() diff --git a/synaptic_reconstruction/imod/to_imod.py b/synaptic_reconstruction/imod/to_imod.py index 7a98469..307e645 100644 --- a/synaptic_reconstruction/imod/to_imod.py +++ b/synaptic_reconstruction/imod/to_imod.py @@ -16,51 +16,60 @@ from tqdm import tqdm -# FIXME how to bring the data to the IMOD axis convention? -def _to_imod_order(data): - # data = np.swapaxes(data, 0, -1) - # data = np.fliplr(data) - # data = np.swapaxes(data, 0, -1) - return data - - +# TODO: this has still some issues with some tomograms that has an offset info. +# For now, this occurs for the inner ear data tomograms; it works for Fidi's STEM tomograms. +# Ben's theory is that this might be due to data form JEOL vs. ThermoFischer microscopes. +# To test this I can check how it works for data from Maus et al. / Imig et al., which were taken on a JEOL. +# Can also check out the mrc documentation here: https://www.ccpem.ac.uk/mrc_format/mrc2014.php def write_segmentation_to_imod( mrc_path: str, - segmentation_path: str, + segmentation: Union[str, np.ndarray], output_path: str, ) -> None: - """Write a segmentation to a mod file as contours. + """Write a segmentation to a mod file as closed contour objects. Args: - mrc_path: a - segmentation_path: a - output_path: a + mrc_path: The filepath to the mrc file from which the segmentation was derived. + segmentation: The segmentation (either as numpy array or filepath to a .tif file). + output_path: The output path where the mod file will be saved. """ cmd = "imodauto" cmd_path = shutil.which(cmd) assert cmd_path is not None, f"Could not find the {cmd} imod command." + # Load the segmentation from a tif file in case a filepath was passed. + if isinstance(segmentation, str): + assert os.path.exists(segmentation) + segmentation = imageio.imread(segmentation) + + # Binarize the segmentation and flip its axes to match the IMOD axis convention. + segmentation = (segmentation > 0).astype("uint8") + segmentation = np.flip(segmentation, axis=1) + + # Read the voxel size and origin information from the mrc file. assert os.path.exists(mrc_path) - with mrcfile.open(mrc_path, mode="r+") as f: + with mrcfile.open(mrc_path, mode="r") as f: voxel_size = f.voxel_size + nx, ny, nz = f.header.nxstart, f.header.nystart, f.header.nzstart + origin = f.header.origin + # Write the input for imodauto to a temporary mrc file. with tempfile.NamedTemporaryFile(suffix=".mrc") as f: tmp_path = f.name - seg = (imageio.imread(segmentation_path) > 0).astype("uint8") - seg_ = _to_imod_order(seg) - - # import napari - # v = napari.Viewer() - # v.add_image(seg) - # v.add_labels(seg_) - # napari.run() - - mrcfile.new(tmp_path, data=seg_, overwrite=True) + mrcfile.new(tmp_path, data=segmentation, overwrite=True) + # Write the voxel_size and origin infomration. with mrcfile.open(tmp_path, mode="r+") as f: f.voxel_size = voxel_size + + f.header.nxstart = nx + f.header.nystart = ny + f.header.nzstart = nz + f.header.origin = (0.0, 0.0, 0.0) * 3 if origin is None else origin + f.update_header_from_data() + # Run the command. cmd_list = [cmd, "-E", "1", "-u", tmp_path, output_path] run(cmd_list) From aa5d78e2c3257a653a576c5adb006b686aedcb7f Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Sat, 23 Nov 2024 16:38:35 +0100 Subject: [PATCH 26/35] clean up --- scripts/cooper/analysis/calc_AZ_area.py | 239 ------------------ scripts/cooper/analysis/run_analysis_1.py | 35 +-- .../run_spatial_distribution_analysis.py | 149 +++-------- 3 files changed, 52 insertions(+), 371 deletions(-) delete mode 100644 scripts/cooper/analysis/calc_AZ_area.py diff --git a/scripts/cooper/analysis/calc_AZ_area.py b/scripts/cooper/analysis/calc_AZ_area.py deleted file mode 100644 index 592b043..0000000 --- a/scripts/cooper/analysis/calc_AZ_area.py +++ /dev/null @@ -1,239 +0,0 @@ -import h5py -import numpy as np -import os -import csv -from scipy.ndimage import binary_opening, median_filter,zoom, binary_closing -from skimage.measure import label, regionprops -from synaptic_reconstruction.morphology import compute_object_morphology -from skimage.morphology import ball -from scipy.spatial import ConvexHull -from skimage.draw import polygon - -def calculate_AZ_area_per_slice(AZ_slice, pixel_size_nm=1.554): - """ - Calculate the area of the AZ in a single 2D slice after applying error-reducing processing. - - Parameters: - - AZ_slice (numpy array): 2D array representing a single slice of the AZ segmentation. - - pixel_size_nm (float): Size of a pixel in nanometers. - - Returns: - - slice_area_nm2 (float): The area of the AZ in the slice in square nanometers. - """ - # Apply binary opening or median filter to reduce small segmentation errors - AZ_slice_filtered = binary_opening(AZ_slice, structure=np.ones((3, 3))).astype(int) - - # Calculate area in this slice - num_AZ_pixels = np.sum(AZ_slice_filtered == 1) - slice_area_nm2 = num_AZ_pixels * (pixel_size_nm ** 2) - - return slice_area_nm2 - -def calculate_total_AZ_area(tomo_path, pixel_size_nm=1.554): - """ - Calculate the total area of the AZ across all slices in a 3D tomogram file. - - Parameters: - - tomo_path (str): Path to the tomogram file (HDF5 format). - - pixel_size_nm (float): Size of a pixel in nanometers. - - Returns: - - total_AZ_area_nm2 (float): The total area of the AZ in square nanometers. - """ - with h5py.File(tomo_path, "r") as f: - AZ_intersect_seg = f["/AZ/compartment_AZ_intersection_manComp"][:] - - # Calculate the AZ area for each slice along the z-axis - total_AZ_area_nm2 = 0 - for z_slice in AZ_intersect_seg: - slice_area_nm2 = calculate_AZ_area_per_slice(z_slice, pixel_size_nm) - total_AZ_area_nm2 += slice_area_nm2 - - return total_AZ_area_nm2 - -def calculate_AZ_area_simple(tomo_path, pixel_size_nm=1.554): - """ - Calculate the volume of the AZ (active zone) in a 3D tomogram file. - - Parameters: - - tomo_path (str): Path to the tomogram file (HDF5 format). - - pixel_size_nm (float): Size of a pixel in nanometers (default is 1.554 nm). - - Returns: - - AZ_volume_nm3 (float): The volume of the AZ in cubic nanometers. - """ - # Open the file and read the AZ intersection segmentation data - with h5py.File(tomo_path, "r") as f: - AZ_intersect_seg = f["/AZ/compartment_AZ_intersection_manComp"][:] - - # Count voxels with label = 1 - num_AZ_voxels = np.sum(AZ_intersect_seg == 1) - - # Calculate the volume in cubic nanometers - AZ_area_nm2 = num_AZ_voxels * (pixel_size_nm ** 2) - - return AZ_area_nm2 - -def calculate_AZ_surface(tomo_path, pixel_size_nm=1.554): - with h5py.File(tomo_path, "r") as f: - #AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:] - AZ_seg = f["/filtered_az"][:] - - # Apply binary closing to smooth the segmented regions - struct_elem = ball(1) # Use a small 3D structuring element - AZ_seg_smoothed = binary_closing(AZ_seg > 0, structure=struct_elem, iterations=20) - - labeled_seg = label(AZ_seg_smoothed) - - regions = regionprops(labeled_seg) - if regions: - # Sort regions by area and get the label of the largest region - largest_region = max(regions, key=lambda r: r.area) - largest_label = largest_region.label - - largest_component_mask = (labeled_seg == largest_label) - AZ_seg_filtered = largest_component_mask.astype(np.uint8) - - else: - # If no regions found, return an empty array - AZ_seg_filtered = np.zeros_like(AZ_seg_interp, dtype=np.uint8) - - morphology_data = compute_object_morphology(AZ_seg_filtered, "AZ Structure", resolution=(pixel_size_nm, pixel_size_nm, pixel_size_nm)) - surface_column = "surface [nm^2]" #if resolution is not None else "surface [pixel^2]" - surface_area = morphology_data[surface_column].iloc[0] - - return surface_area - -def calculate_AZ_surface_simple(tomo_path, pixel_size_nm=1.554): - with h5py.File(tomo_path, "r") as f: - AZ_seg = f["/labels/AZ"][:] - - morphology_data = compute_object_morphology(AZ_seg, "AZ Structure", resolution=(pixel_size_nm, pixel_size_nm, pixel_size_nm)) - surface_column = "surface [nm^2]" #if resolution is not None else "surface [pixel^2]" - surface_area = morphology_data[surface_column].iloc[0] - - return surface_area - -def calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm=1.554): - with h5py.File(tomo_path, "r") as f: - AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:] - - # Apply binary closing to smooth the segmented regions - struct_elem = ball(1) # Use a small 3D structuring element - AZ_seg_smoothed = binary_closing(AZ_seg > 0, structure=struct_elem, iterations=10) - - labeled_seg = label(AZ_seg_smoothed) - - regions = regionprops(labeled_seg) - if regions: - # Sort regions by area and get the label of the largest region - largest_region = max(regions, key=lambda r: r.area) - largest_label = largest_region.label - - largest_component_mask = (labeled_seg == largest_label) - AZ_seg_filtered = largest_component_mask.astype(np.uint8) - AZ_seg = AZ_seg_filtered - # Extract coordinates of non-zero points - points = np.argwhere(AZ_seg > 0) # Get the coordinates of non-zero (foreground) pixels - - if points.shape[0] < 4: - # ConvexHull requires at least 4 points in 3D to form a valid hull - AZ_seg_filtered = np.zeros_like(AZ_seg, dtype=np.uint8) - else: - # Apply ConvexHull to the points - hull = ConvexHull(points) - - # Create a binary mask for the convex hull - convex_hull_mask = np.zeros_like(AZ_seg, dtype=bool) - - # Iterate over each simplex (facet) of the convex hull and fill in the polygon - for simplex in hull.simplices: - # For each face of the convex hull, extract the vertices and convert to a 2D polygon - polygon_coords = points[simplex] - rr, cc = polygon(polygon_coords[:, 0], polygon_coords[:, 1]) - convex_hull_mask[rr, cc] = True - - # Optional: Label the convex hull mask - labeled_seg = label(convex_hull_mask) - regions = regionprops(labeled_seg) - - if regions: - # Sort regions by area and get the label of the largest region - largest_region = max(regions, key=lambda r: r.area) - largest_label = largest_region.label - - largest_component_mask = (labeled_seg == largest_label) - AZ_seg_filtered = largest_component_mask.astype(np.uint8) - - else: - AZ_seg_filtered = np.zeros_like(AZ_seg, dtype=np.uint8) - - # Calculate surface area - morphology_data = compute_object_morphology(AZ_seg_filtered, "AZ Structure", resolution=(pixel_size_nm, pixel_size_nm, pixel_size_nm)) - surface_column = "surface [nm^2]" - surface_area = morphology_data[surface_column].iloc[0] - - return surface_area - -def process_datasets(folder_path, output_csv="AZ_areas.csv", pixel_size_nm=1.554): - """ - Process all tomograms in multiple datasets within a folder and save results to a CSV. - - Parameters: - - folder_path (str): Path to the folder containing dataset folders with tomograms. - - output_csv (str): Filename for the output CSV file. - - pixel_size_nm (float): Size of a pixel in nanometers. - """ - results = [] - - # Iterate over each dataset folder - for dataset_name in os.listdir(folder_path): - dataset_path = os.path.join(folder_path, dataset_name) - - # Check if it's a directory (skip files in the main folder) - if not os.path.isdir(dataset_path): - continue - - # Iterate over each tomogram file in the dataset folder - for tomo_file in os.listdir(dataset_path): - tomo_path = os.path.join(dataset_path, tomo_file) - - # Check if the file is an HDF5 file (optional) - if tomo_file.endswith(".h5") or tomo_file.endswith(".hdf5"): - try: - # Calculate AZ area - #AZ_area = calculate_total_AZ_area(tomo_path, pixel_size_nm) - #AZ_area = calculate_AZ_area_simple(tomo_path, pixel_size_nm) - #AZ_surface_area = calculate_AZ_surface(tomo_path, pixel_size_nm) - #AZ_surface_area = calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm) - AZ_surface_area = calculate_AZ_surface_simple(tomo_path, pixel_size_nm) - # Append results to list - results.append({ - "Dataset": dataset_name, - "Tomogram": tomo_file, - "AZ_surface_area": AZ_surface_area - }) - except Exception as e: - print(f"Error processing {tomo_file} in {dataset_name}: {e}") - - # Write results to a CSV file - with open(output_csv, mode="w", newline="") as csvfile: - fieldnames = ["Dataset", "Tomogram", "AZ_surface_area"] - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - - writer.writeheader() - for result in results: - writer.writerow(result) - - print(f"Results saved to {output_csv}") - -def main(): - # Define the path to the folder containing dataset folders - folder_path = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/" - output_csv = "./analysis_results/manual_AZ_exported/AZ_surface_area.csv" - # Call the function to process datasets and save results - process_datasets(folder_path, output_csv = output_csv) - -# Call main -if __name__ == "__main__": - main() diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py index 94d0a62..abad440 100644 --- a/scripts/cooper/analysis/run_analysis_1.py +++ b/scripts/cooper/analysis/run_analysis_1.py @@ -1,7 +1,3 @@ -# This is the code for the first analysis for the cooper data. -# Here, we only compute the vesicle numbers and size distributions for the STEM tomograms -# in the 04 dataset. - import os from glob import glob @@ -37,13 +33,10 @@ def get_compartment_with_max_overlap(compartments, vesicles): # Iterate over each compartment and calculate the overlap with vesicles for compartment_label in unique_compartments: - # Create a binary mask for the current compartment compartment_mask = compartments == compartment_label vesicle_mask = vesicles > 0 intersection = np.logical_and(compartment_mask, vesicle_mask) - - # Calculate the number of overlapping voxels overlap_count = np.sum(intersection) # Track the compartment with the most overlap in terms of voxel count @@ -51,14 +44,13 @@ def get_compartment_with_max_overlap(compartments, vesicles): max_overlap_count = overlap_count best_compartment = compartment_label - # Create the final mask for the compartment with the most overlap final_mask = compartments == best_compartment return final_mask -# We compute the sizes for all vesicles in the compartment masks. +# We compute the sizes for all vesicles in the MANUALLY ANNOTATED compartment masks. # We use the same logic in the size computation as for the vesicle extraction to IMOD, -# including the radius correction factor. +# including the radius correction factor. --> not needed here # The number of vesicles is automatically computed as the length of the size list. def compute_sizes_for_all_tomorams_manComp(): os.makedirs(RESULT_FOLDER, exist_ok=True) @@ -66,8 +58,6 @@ def compute_sizes_for_all_tomorams_manComp(): resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset radius_factor = 1 estimate_radius_2d = True - - # Dictionary to hold the results for each dataset and category (CTRL or DKO) dataset_results = {} tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True)) @@ -79,11 +69,9 @@ def compute_sizes_for_all_tomorams_manComp(): # Determine if the tomogram is 'CTRL' or 'DKO' category = "CTRL" if "CTRL" in fname else "DKO" - # Initialize a new dictionary entry for each dataset and category if not already present if ds_name not in dataset_results: dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}} - - # Skip if this tomogram already exists in the dataset dictionary + if fname in dataset_results[ds_name][category]: continue @@ -93,6 +81,7 @@ def compute_sizes_for_all_tomorams_manComp(): input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5") assert os.path.exists(input_path), input_path + # Load the compartment mask from the tomogram with h5py.File(input_path, "r") as f: mask = f["labels/compartment"][:] @@ -102,30 +91,30 @@ def compute_sizes_for_all_tomorams_manComp(): segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d ) - # Add sizes to the dataset dictionary under the appropriate category + dataset_results[ds_name][category][fname] = sizes # Save each dataset's results into separate CSV files for CTRL and DKO tomograms for ds_name, categories in dataset_results.items(): for category, tomogram_data in categories.items(): - # Sort tomograms by name within the category sorted_data = dict(sorted(tomogram_data.items())) # Sort by tomogram names result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose() - # Define the output file path output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}_rf1.csv") # Save the DataFrame to CSV result_df.to_csv(output_path, index=False) +# We compute the sizes for all vesicles in the AUTOMATIC SEGMENTED compartment masks. +# We use the same logic in the size computation as for the vesicle extraction to IMOD, +# including the radius correction factor. --> not needed here +# The number of vesicles is automatically computed as the length of the size list. def compute_sizes_for_all_tomorams_autoComp(): os.makedirs(RESULT_FOLDER, exist_ok=True) resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset radius_factor = 1 estimate_radius_2d = True - - # Dictionary to hold the results for each dataset and category (CTRL or DKO) dataset_results = {} tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True)) @@ -137,11 +126,9 @@ def compute_sizes_for_all_tomorams_autoComp(): # Determine if the tomogram is 'CTRL' or 'DKO' category = "CTRL" if "CTRL" in fname else "DKO" - # Initialize a new dictionary entry for each dataset and category if not already present if ds_name not in dataset_results: dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}} - # Skip if this tomogram already exists in the dataset dictionary if fname in dataset_results[ds_name][category]: continue @@ -151,6 +138,7 @@ def compute_sizes_for_all_tomorams_autoComp(): input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5") assert os.path.exists(input_path), input_path + # Load the compartment mask from the tomogram with h5py.File(input_path, "r") as f: compartments = f["/compartments/segment_from_3Dmodel_v2"][:] @@ -165,17 +153,14 @@ def compute_sizes_for_all_tomorams_autoComp(): segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d ) - # Add sizes to the dataset dictionary under the appropriate category dataset_results[ds_name][category][fname] = sizes # Save each dataset's results into separate CSV files for CTRL and DKO tomograms for ds_name, categories in dataset_results.items(): for category, tomogram_data in categories.items(): - # Sort tomograms by name within the category sorted_data = dict(sorted(tomogram_data.items())) # Sort by tomogram names result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose() - # Define the output file path output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}_rf1.csv") # Save the DataFrame to CSV diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py index edd8308..6943484 100644 --- a/scripts/cooper/analysis/run_spatial_distribution_analysis.py +++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py @@ -9,7 +9,8 @@ DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/" # noqa RESULT_FOLDER = "./analysis_results/AZ_filtered_autoComp" - +AZ_PATH = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/az_seg_filtered" + def get_compartment_with_max_overlap(compartments, vesicles): """ @@ -33,13 +34,10 @@ def get_compartment_with_max_overlap(compartments, vesicles): # Iterate over each compartment and calculate the overlap with vesicles for compartment_label in unique_compartments: - # Create a binary mask for the current compartment compartment_mask = compartments == compartment_label vesicle_mask = vesicles > 0 intersection = np.logical_and(compartment_mask, vesicle_mask) - - # Calculate the number of overlapping voxels overlap_count = np.sum(intersection) # Track the compartment with the most overlap in terms of voxel count @@ -47,85 +45,20 @@ def get_compartment_with_max_overlap(compartments, vesicles): max_overlap_count = overlap_count best_compartment = compartment_label - # Create the final mask for the compartment with the most overlap final_mask = compartments == best_compartment return final_mask -# We compute the distances for all vesicles in the compartment masks to the AZ. -# We use the same different resolution, depending on dataset. +# We compute the distances for all vesicles in the AUTOMATIC SEGMENTED compartment masks to the AZ. +# We use different resolution, depending on dataset. # The closest distance is calculated, i.e., the closest point on the outer membrane of the vesicle to the AZ. -def compute_per_vesicle_distance_to_AZ(): - os.makedirs(RESULT_FOLDER, exist_ok=True) - - resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset - - # Dictionary to hold the results for each dataset - dataset_results = {} - - tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True)) - for tomo in tqdm(tomograms): - ds_name, fname = os.path.split(tomo) - ds_name = os.path.split(ds_name)[1] - fname = os.path.splitext(fname)[0] - - # Initialize a new dictionary entry for each dataset if not already present - if ds_name not in dataset_results: - dataset_results[ds_name] = {} - - # Skip if this tomogram already exists in the dataset dictionary - if fname in dataset_results[ds_name]: - continue - - # Load the vesicle segmentation from the predictions - with h5py.File(tomo, "r") as f: - segmentation = f["/vesicles/segment_from_combined_vesicles"][:] - segmented_object = f["/AZ/compartment_AZ_intersection"][:] - #if AZ intersect is small, compartment seg didn't align with AZ so we use the normal AZ and not intersect - if (segmented_object == 0).all() or np.sum(segmented_object == 1) < 2000: - segmented_object = f["/AZ/segment_from_AZmodel_v3"][:] - - input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5") - assert os.path.exists(input_path), input_path - - # Load the compartment mask from the tomogram - with h5py.File(input_path, "r") as f: - compartments = f["/compartments/segment_from_3Dmodel_v2"][:] - mask = get_compartment_with_max_overlap(compartments, segmentation) - - #if more than half of the vesicles (approximation, its checking pixel and not label) would get filtered by mask it means the compartment seg didn't work and thus we won't use the mask - if np.sum(segmentation[mask == 0] > 0) > (0.5 * np.sum(segmentation > 0)): - print("using no mask") - else: - segmentation[mask == 0] = 0 - distances, _, _, _ = measure_segmentation_to_object_distances( - segmentation, segmented_object=segmented_object, resolution=resolution - ) +def compute_per_vesicle_distance_to_AZ_autoComp(separate_AZseg=False): - # Add distances to the dataset dictionary under the tomogram name - dataset_results[ds_name][fname] = distances - - # Save each dataset's results to a single CSV file - for ds_name, tomogram_data in dataset_results.items(): - # Create a DataFrame where each column is a tomogram's distances - result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose() - - # Define the output file path - output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}.csv") - - # Save the DataFrame to CSV - result_df.to_csv(output_path, index=False) - -def compute_per_vesicle_distance_to_filteredAZ(): - filtered_AZ_path = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/az_seg_filtered" os.makedirs(RESULT_FOLDER, exist_ok=True) - resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset - - # Dictionary to hold the results for each dataset and category (CTRL or DKO) dataset_results = {} - tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True)) + for tomo in tqdm(tomograms): ds_name, fname = os.path.split(tomo) ds_name = os.path.split(ds_name)[1] @@ -134,23 +67,27 @@ def compute_per_vesicle_distance_to_filteredAZ(): # Determine if the tomogram is 'CTRL' or 'DKO' category = "CTRL" if "CTRL" in fname else "DKO" - # Initialize a new dictionary entry for each dataset and category if not already present if ds_name not in dataset_results: dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}} - - # Skip if this tomogram already exists in the dataset dictionary + if fname in dataset_results[ds_name][category]: continue - #Load the AZ segmentations - AZ_path = os.path.join(filtered_AZ_path, ds_name, f"{fname}.h5") - with h5py.File(AZ_path, "r") as f: - segmented_object = f["/filtered_az"][:] # Load the vesicle segmentation from the predictions with h5py.File(tomo, "r") as f: segmentation = f["/vesicles/segment_from_combined_vesicles"][:] - + + #Check if AZ seg is stored in a different tomo or same + if separate_AZseg: + print(f"using AZ segmentation from {AZ_PATH}") + #Load the AZ segmentations + AZ_path = os.path.join(AZ_PATH, ds_name, f"{fname}.h5") + with h5py.File(AZ_path, "r") as f_AZ: + segmented_object = f_AZ["/thin_az"][:] + else: + segmented_object = f["/AZ/compartment_AZ_intersection"][:] + #if AZ intersect is small, compartment seg didn't align with AZ so we use the normal AZ and not intersect if (segmented_object == 0).all() or np.sum(segmented_object == 1) < 2000: segmented_object = f["/AZ/segment_from_AZmodel_v3"][:] @@ -168,6 +105,7 @@ def compute_per_vesicle_distance_to_filteredAZ(): print("using no mask") else: segmentation[mask == 0] = 0 + distances, _, _, _ = measure_segmentation_to_object_distances( segmentation, segmented_object=segmented_object, resolution=resolution ) @@ -178,36 +116,35 @@ def compute_per_vesicle_distance_to_filteredAZ(): # Save each dataset's results into separate CSV files for CTRL and DKO tomograms for ds_name, categories in dataset_results.items(): for category, tomogram_data in categories.items(): - # Sort tomograms by name within the category sorted_data = dict(sorted(tomogram_data.items())) # Sort by tomogram names result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose() - - # Define the output file path output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}_{category}.csv") # Save the DataFrame to CSV result_df.to_csv(output_path, index=False) +# We compute the distances for all vesicles in the MANUALLY ANNOTATED compartment masks to the AZ. +# We use different resolution, depending on dataset. +# The closest distance is calculated, i.e., the closest point on the outer membrane of the vesicle to the AZ. def compute_per_vesicle_distance_to_AZ_manComp(): os.makedirs(RESULT_FOLDER, exist_ok=True) resolution = (1.554,) * 3 # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset - - # Dictionary to hold the results for each dataset dataset_results = {} - tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True)) + for tomo in tqdm(tomograms): ds_name, fname = os.path.split(tomo) ds_name = os.path.split(ds_name)[1] fname = os.path.splitext(fname)[0] - # Initialize a new dictionary entry for each dataset if not already present - if ds_name not in dataset_results: - dataset_results[ds_name] = {} + # Determine if the tomogram is 'CTRL' or 'DKO' + category = "CTRL" if "CTRL" in fname else "DKO" - # Skip if this tomogram already exists in the dataset dictionary - if fname in dataset_results[ds_name]: + if ds_name not in dataset_results: + dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}} + + if fname in dataset_results[ds_name][category]: continue # Load the vesicle segmentation from the predictions @@ -223,28 +160,26 @@ def compute_per_vesicle_distance_to_AZ_manComp(): mask = f["/labels/compartment"][:] segmentation[mask == 0] = 0 + distances, _, _, _ = measure_segmentation_to_object_distances( segmentation, segmented_object=segmented_object, resolution=resolution ) - # Add distances to the dataset dictionary under the tomogram name - dataset_results[ds_name][fname] = distances - - # Save each dataset's results to a single CSV file - for ds_name, tomogram_data in dataset_results.items(): - # Create a DataFrame where each column is a tomogram's distances - result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose() - - # Define the output file path - output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}.csv") - - # Save the DataFrame to CSV - result_df.to_csv(output_path, index=False) + # Add distances to the dataset dictionary under the appropriate category + dataset_results[ds_name][category][fname] = distances + # Save each dataset's results into separate CSV files for CTRL and DKO tomograms + for ds_name, categories in dataset_results.items(): + for category, tomogram_data in categories.items(): + sorted_data = dict(sorted(tomogram_data.items())) # Sort by tomogram names + result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose() + output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}_{category}.csv") + + # Save the DataFrame to CSV + result_df.to_csv(output_path, index=False) def main(): - #compute_per_vesicle_distance_to_AZ() + compute_per_vesicle_distance_to_AZ_autoComp(separate_AZseg=False) #compute_per_vesicle_distance_to_AZ_manComp() - compute_per_vesicle_distance_to_filteredAZ() From 20e429b3b7b5e000a3aeb48c603326ceb2be1c5c Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Sat, 23 Nov 2024 16:40:26 +0100 Subject: [PATCH 27/35] clean up --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 0377c4a..d955053 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,5 @@ scripts/rizzoli/upsample_data.py scripts/cooper/training/find_rec_testset.py scripts/rizzoli/combine_2D_slices.py scripts/rizzoli/combine_2D_slices_raw.py -scripts/cooper/remove_h5key.py \ No newline at end of file +scripts/cooper/remove_h5key.py +scripts/cooper/analysis/calc_AZ_area.py \ No newline at end of file From 19f618e388a510884c82475622b66074237c46ba Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Sat, 23 Nov 2024 16:42:00 +0100 Subject: [PATCH 28/35] clean up --- .../cooper/analysis/{run_analysis_1.py => run_size_analysis.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/cooper/analysis/{run_analysis_1.py => run_size_analysis.py} (100%) diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_size_analysis.py similarity index 100% rename from scripts/cooper/analysis/run_analysis_1.py rename to scripts/cooper/analysis/run_size_analysis.py From cb693b13a14f96bfccd62f117eced546e4b476d3 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 24 Nov 2024 15:03:00 +0100 Subject: [PATCH 29/35] Update data summaries --- scripts/aggregate_data_information.py | 78 +++++++++++++----- .../active_zone_training_data.xlsx | Bin 0 -> 9607 bytes .../compartment_training_data.xlsx | Bin 0 -> 5747 bytes .../vesicle_domain_adaptation_data.xlsx | Bin 0 -> 11526 bytes .../data_summary/vesicle_training_data.xlsx | Bin 0 -> 13832 bytes scripts/summarize_data.py | 2 +- 6 files changed, 60 insertions(+), 20 deletions(-) create mode 100644 scripts/data_summary/active_zone_training_data.xlsx create mode 100644 scripts/data_summary/compartment_training_data.xlsx create mode 100644 scripts/data_summary/vesicle_domain_adaptation_data.xlsx create mode 100644 scripts/data_summary/vesicle_training_data.xlsx diff --git a/scripts/aggregate_data_information.py b/scripts/aggregate_data_information.py index d90ec8c..03ca0af 100644 --- a/scripts/aggregate_data_information.py +++ b/scripts/aggregate_data_information.py @@ -14,7 +14,7 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions): tomo_names = [] - tomo_vesicles = [] + tomo_vesicles_all, tomo_vesicles_imod = [], [] tomo_condition = [] tomo_resolution = [] tomo_train = [] @@ -43,14 +43,19 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions) except KeyError: tomo_name = fname - n_label_sets = len(f["labels"]) - if n_label_sets > 2: - print(tomo_path, "contains the following labels:", list(f["labels"].keys())) - seg = f["labels/vesicles"][:] - n_vesicles = len(np.unique(seg)) - 1 + if "labels/vesicles/combined_vesicles" in f: + all_vesicles = f["labels/vesicles/combined_vesicles"][:] + imod_vesicles = f["labels/vesicles/masked_vesicles"][:] + n_vesicles_all = len(np.unique(all_vesicles)) - 1 + n_vesicles_imod = len(np.unique(imod_vesicles)) - 2 + else: + vesicles = f["labels/vesicles"][:] + n_vesicles_all = len(np.unique(vesicles)) - 1 + n_vesicles_imod = n_vesicles_all tomo_names.append(tomo_name) - tomo_vesicles.append(n_vesicles) + tomo_vesicles_all.append(n_vesicles_all) + tomo_vesicles_imod.append(n_vesicles_imod) tomo_condition.append(this_condition) tomo_resolution.append(this_resolution) tomo_train.append("test" if fname in this_test_tomograms else "train/val") @@ -60,7 +65,8 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions) "condition": tomo_condition, "resolution": tomo_resolution, "used_for": tomo_train, - "vesicle_count": tomo_vesicles, + "vesicle_count_all": tomo_vesicles_all, + "vesicle_count_imod": tomo_vesicles_imod, }) os.makedirs("data_summary", exist_ok=True) @@ -70,15 +76,15 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions) def vesicle_train_data(): roots = { "01": { - "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/01_hoi_maus_2020_incomplete", # noqa + "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/01_hoi_maus_2020_incomplete", # noqa "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/01_hoi_maus_2020_incomplete", # noqa }, "02": { - "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/02_hcc_nanogold", # noqa + "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/02_hcc_nanogold", # noqa "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/02_hcc_nanogold", # noqa }, "03": { - "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/03_hog_cs1sy7", # noqa + "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/03_hog_cs1sy7", # noqa "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/03_hog_cs1sy7", # noqa }, "04": { @@ -86,27 +92,27 @@ def vesicle_train_data(): "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/ground_truth/04Dataset_for_vesicle_eval/", # noqa }, "05": { - "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/05_stem750_sv_training", # noqa + "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/05_stem750_sv_training", # noqa "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/05_stem750_sv_training", # noqa }, "07": { - "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/07_hoi_s1sy7_tem250_ihgp", # noqa + "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/07_hoi_s1sy7_tem250_ihgp", # noqa "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/07_hoi_s1sy7_tem250_ihgp", # noqa }, "09": { - "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/09_stem750_66k", # noqa + "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/09_stem750_66k", # noqa "test": "", }, "10": { - "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/10_tem_single_release", # noqa + "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/10_tem_single_release", # noqa "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/10_tem_single_release", # noqa }, "11": { - "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/11_tem_multiple_release", # noqa + "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/11_tem_multiple_release", # noqa "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/11_tem_multiple_release", # noqa }, "12": { - "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/12_chemical_fix_cryopreparation", # noqa + "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/12_chemical_fix_cryopreparation", # noqa "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/12_chemical_fix_cryopreparation", # noqa }, } @@ -397,6 +403,11 @@ def vesicle_domain_adaptation_data(): "MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5", "MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5", "MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5" + ], + "frog": [ + "block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5", + "block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5", + "block30UB_one.h5", "block10U3A_five.h5", ] } @@ -439,13 +450,42 @@ def vesicle_domain_adaptation_data(): aggregate_da(roots, train_tomograms, test_tomograms, resolutions) +def get_n_images_frog(): + root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted/upsampled_by2" + tomos = ["block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5", + "block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5", + "block30UB_one.h5", "block10U3A_five.h5"] + + n_images = 0 + for tomo in tomos: + path = os.path.join(root, tomo) + with h5py.File(path, "r") as f: + n_images += f["raw"].shape[0] + print(n_images) + + +def get_image_sizes_tem_2d(): + root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/maus_2020_tem2d_wt_unt_div14_exported_scaled/good_for_DAtraining/maus_2020_tem2d_wt_unt_div14_exported_scaled" # noqa + tomos = [ + "MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5", + "MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5", + "MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5" + ] + for tomo in tomos: + path = os.path.join(root, tomo) + with h5py.File(path, "r") as f: + print(f["raw"].shape) + + def main(): # active_zone_train_data() # compartment_train_data() # mito_train_data() - # vesicle_train_data() + vesicle_train_data() - vesicle_domain_adaptation_data() + # vesicle_domain_adaptation_data() + # get_n_images_frog() + # get_image_sizes_tem_2d() main() diff --git a/scripts/data_summary/active_zone_training_data.xlsx b/scripts/data_summary/active_zone_training_data.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b19365309eac854d7d9ace55088d8c6518d6c387 GIT binary patch literal 9607 zcmZ{K1z1~8vp&VOxD+X_1xj#lf#R-(;1DFZLy=;^ic4{KhnCSw+14rLqP2J3D`V@PVITZ#>UlQZxv0_h7)Jq$#5ZrhN!>l4`n|P$17fL69g< z6s6I!&SQUykt?<#UNmB1xziO;Ca<6v^yw>P!_ZKdV|6w^YAq z+3v-m0C@R$`=%w2M=LU9bt~}#or%>GV$IrEGH)9K9s9m>-&O>81C-X>o;HnD5AD0u zEsKHj*(zEyKV?>V*!Ag+9_?RETogPoL5(unuftB*@Nn*K1WqvilbJMwOnN+21cXdx z1O!4jGhTKt-K;I_E&jOz9=SOH>bouo61_jF`RZkLX^+*9qPU-8v#AI!vjUu|2sOJXf4v$^k0KwQ85dWr@? z*U4jcWD+Uai1o%jiF}@_by>pTSocsU0{?e^AMz}C@>w#kj7wG=%2Nb^-U%f&-x`UA z4{O~ClLm3Kx1?%N;xCvjOXN9KPs8J#oVgQo!LVrQ~ zEAiI|{Te&a2UnxRuPPkV#op%W$}d2E;mMG1WV(KLov^z_E0USG)NWfUNYN#Jl0vr1 zTh7PcH|zDW>!d1El!}mGGzkk;A~c18iJ(mAy|NtP?-xZ2zcZ&^A1A$JxKW6Y1=rD+ zxG|0X;30>})KIvGSx<_3|3Tef9&;r-O{%bk%T)4emzx)@AYBim9in5CQy#`EUizFH z6Xm4&x%jrX7p6dRf16r$kk6Bci3Ne~ieEupLjIh7<3&f!=VE!2jRN$3R3SUWByoy?brK9scadL? z*?KX4fbl(n4KEm zj5(zd=h$<-+vi2TAvBNN?3eo1e&^f7#O)4Dy|&;;G?{83{4}5TFfTN9yhkeRosV&< z)yIA{ULt$DN(OeBMw6zw6Lz9CkO@aFT5ydz;;DwV2|&Le=|x<=uTqrs52u|J1LMt! zgw4fbi+YC|R?lblbQ(h4;8a4ZiZ3e`pJ3Wlxb(RY$zKZ%J3)=DRwg}@mQ6(kiRs0aHxvWBqIUQ zhgxhDT2hzIqoYI{W@_CR}x7B^^y^ovvkOpM1x>YEnhS@i-FR>2lKia>o%0%^P-e(!-}#R3`8&kt3n7+6)Qi_Zo&&+0~3{dBW)I6A5icVg>shYNOUaB-)${Z~g0 zNmq@jhEIl#@5DRX_3eLeoZtNfdyBNDSN-+^I@bO^bxU#O@lO>KA195K9iHELJ5aM- z(x|bMVbA}tGk*Q^w(iDv`nPZnN#x4|($1AOVU>j(8;@75M)kB)7u2hkq;UX zcUBZWlpXf1MZfO|9?n9$ZqV*{{jv?&4c*#Pt}0eZSN+rM{VVA1 z!c(SZ&hPA+f4zs=`7fW6A4JuNlCT3^{k1s@0rK5lYqKnM=fecU&hY) zkLSdSclfQMoeUHoI6Zi9GE2wjWWArtRXew7UXfSnR6e>TbC>iCq{5t!k>n4T{J@$^ z6D0Xv?ezpx$Vf@t5zAIE3nla6KUDGHIj#-JZ&UyLLmTr%d5;QAlI?h>^_mZV@7 zqCbL3e*{_lFlS;U`NAb5{s?aW5&Tl3cEmCr%%b&2a2hVyo-S5%gD@=~V2PId3^^Bu zpoaG(*BK$#+Leq~97R6Jlu=4hE=Z`00kPLZUkNcZgQewDK*(ohknue`wsVj?Hr9#m zYifQybQqmrVJd-MP3%+o#LBAO=oj%^5vJ>42JA*XJUUhwL!Z_V5=p2+H$!L$b0XK0 zrNoay!pJrs%{u4Oft&A8E9lt^sGu|)ynT0FnSIuG$HoI zFn-M`{wqB#=2`a^^kpTnWym@|wxocj`;beVgAViTV0g35u2BP}=W@dFDDN-Jyy5K}V8nl%7Wl7@8|Dl@zX#6tM?o#L?#`T$$EbR&X_v~h$Dc>bS zSE>v|Itg(ekh|EMQ&PHS2?j|C9#sj`CS^#|LK+InXx!oj9;;C}5o$a-{-`K^$T~m1 z7z{QnRsG})*_PJe*uCrZzw-Wx@Kof7>4KHLM-Xtd*%VbkGzr~7zg_Sn-w3ckbCU5C|QuHH9@efvEt zG9b1hKL~!l#oM?AMJg11CzN6P z%9H`$Bi+XXj>iDW`VLcZ9acd~@UOd7g!`JE+3iGu?hNC_4Ncb%T|nv1-0T)a4QDRp zT0)gXw|S1t*N9w?1rh2CamL0QS2bAL?r2c@?QNU3SE4SF3k~? zPm-MP5y(Z4+z*Hy_ld#mnE*7>P}ZZz0Ci&)EMNw|1DxS;sBYSH&;2B?R9bDiTVwiZ zoSEkcN2NI@HYm;uu42z1Ok#k{8dN}%UGO{Za&C(ck8A+pgpxS4=RU+~noiJWUC72{ zLTFBwosP2p7~U>LGO{nbcTcaXiiGw3oaB**tGNF5@Hc~C{4MD5mKz$w1HiK~V(a@7 zeqKq)BjYlXKfw6s(BpHrNCw9vW5)qGdxVIuQ{7XVWKXY1yOknMibJf5MMx6t^eSJO zkliDMlMOOJ-ODn4A zR2;!E$#N&&UU*XU-7<>fGNw(V!D^G-EZ_*;M*Ip@ zzApPtVqXcF*D7G+=u5bUf{CpYjKRAMv)R5?8n2aLG3vM$HpSGZg z`Hv?cC^k2c4kq#jHj^W$M;7zijgF}cOZ9cGIO{Fq2ic*BEU*;1)qIF<0 zFTTjs5zdO3JeTUNQ-7RBNl;7$pt?f(#QUdbWOUg{@Pv(STCB}A^?`fpCxdy!nE~L)njUNcti?yAmKDbO}633wnREY|N ztUK+|EI~0Hfa(tEM>!KJUkN{Q#Rjg=vtoyFp#!RdQ)iP>Nw(UaOr>gfhyp^^zrWWY zGy|dms#pOWgYS@v0iyp{lT_T#E#fOGLTRt-5)BP6DdJjUlC%a>L7~M^C`eWgk~I5g ztZ1}F?s1dp36lxbwD>@v5Ae^7&zOZN+kR1g;Q@gU|KM@>@8H8n6R3J7;3qW48~_jN zgCTfpAf5{lO{nUbke|>Dy!9LS;L*_vTkieHauk_ofcq14ej)$fO5QzAKHXm# zt1Kj(KRX;Ac0PDd7m(5H4bYZ*j-Q%^*N*qsslszCMz?r|NlvtFjt~3sA--vLv{g17 zBF`M=I;+ez-p@ClEL~l!3?hCkNb<~&1!-~~dDqThSBw2;8C|yk#be9MlC#=W z*m;iC7W?L#<>ovScGwhC9kd;|m`yMm^o#&dvDiAih}(|3fEU9WVYHO}c_aZLu%LAN z;Qr!3meVQc*I6-xO~fsPVC+MxkTa2Vui^Idevvn+?uUt5_<7Je4Rd6|2)#XV=ed@! zlc)HW^|x<0Jag1e^(p{C)h25!AHrKV#G*XIxolUMZ6fwl%!e&kTr0HQ zmA*NpMMs2RWGHN+dfDISG*ztjcr_KfOB$eld{gC@!zJ9}hrB8jJ)lQ~<9Y8x77ONz zTxGVoxP{5->442-OOqa(L;Kr9=XOGmEy2sa(b1VYH>{HN##oxJAhV4 z@IF7eTa`>id8nPylJBn9@F{h_$Y$_~mSH#Yesqf*H>$iAZlZB7@8^h!3bRQ=rKnO% zZ<4^d_f8}sy+`KcdV{^G<<+1d3t2;kk!`L_X>=) zN3S7=CW*}WVliE2mQUSJxuq6EhxR~f$WzS)Ng=A?re2xeC!=(OX?Y1*B>pnbE`?W5 z3Z}@t!HfC9wuT-nvlNxta2x7V1lDb{p_%o zNRxum!I21VfNQd%yZ||w`yzgb_nDUjXYm__B@$N3qd3!%xApQEKfx|TYXu%rzgzk= z1XpN#e$?zG6MS3PN`T28klnl67C#!DFrMLMtnC5kjPxD$<{n$F!Fv=-9($ZkW<(fk zC(2@#%7?n12^I$`EMc-y9znx+o>l@&k&{LKm=#1CNK%hjlnL3S;*LV9kYo#FRP_V7 zD5H*b0gALX8Z_3uyts$XH6qxG33M-~(TcvRt!+_gsxUf;5XX3iVik0j=PH=&YEq|Z zB(lQExOdktcM^Th8FgJA!xq@C>ZfLdgObhBcdiwouNAVHwcedtvsX^=?E$LFWY9aD zW$vzI{IMxS`*DD&HEh)cx~MaMp4=zle&6LwxWkWZ5bI*FhwRZO+H(I2Co=DFh=ki7 zLQ+%_nEKm2Y_+o23btD0ON9#w+LBbVUH8aZr#2n~vR!RMT|}(no53y0GyUK!X^X^iE( ziYA2G)XyAHW58nsjlx(1A|SGR?E^hTtg@Ryrvy^XTt>y~S& zOdQS?&`0D6W9>4=+7h-G`}t{i2`bg%P|YJ34>9>wzl4n4D^=CR{p}5mpukHkur-ge z%HC)3T!6|u5!kc*yd+Y7ZFlQJDeKppA-tF={U)OpEzma~!5BYF_5ub&0|Jt(k6g`&HoCtEFs7UKoI_n2h}c@jizCm^%Z}Rg8Vm5_T>pZ*t6k4} z&P2HG%|}ZN_WZrO8>y#-O#)a!c%;38#1;y5g<5Ptx=)LfgHRpcJnPqlA-ozX{c9yS z3jLzmR>@sVaPwwx^VxWBzQfIDWSO|#sYUA{`jiUumeca0E?Vv`d~a%~bl@t?vmSwz?j6X?DwlCZ<2VhD#d=(RLND1N3vm*LfxO72=Iqo(=I zns~WW^8uuhd=#ph7hF{5RzxUZF@KBcila?ziBXvs1e&A;q3T=j0$Di6J`Zt`l;6}L zH)u$3&*O0iM?>AB-3(%N1O(AQT8YVB@7d&vDPN<$wWa0$xL>wOUf%-yF<2m6k9?^m z!7UXt)MSR0;N0hKP^Tk6mY}|Yd0S4C=8c{PBhYUq5UDn;poprDE{ov^Y}XDS$wlI? zUJ3#kR8Y-9XK+HzW%M}gYJ)(bl=aTnBn+G6oQF$rcVh8M_c#JWbmB(h3iuB<;PQ)9 zZ((qGpF|BKU3-)vY$2OX9ZtFanN?>CgJqK@0amn7#I@7Tm_hk(xsw09Y6 zncy902MWP6jUF?kSKfJ23G=(|H@*THRh%UiyVvtm=N^~uTiUW-iBm6+i`O*Ds{jE8 zV~$52#+4+hwvYkY9FmHe4b!s|2mbanj>U)C-&f<2eKuEpyEhcR#_XMT9Yfa-e=YA_ zUB+URAR-{xV<8~m{Ik60>h{jg!u4@yXjx}5CW8y_p12n+Nm)(3RI-qNQsLu*P0?~# zN4iN6qj4#q0@T^T(?FQ;^Xc;Cr(DPhWlpmQNUW(~E0$`9Q;4N3jO`$E|CPmj?S7M4 z589L~yi`}3Jx%t=!D-*Fd=^&-YG`YH!QBT@5wI02_t_2wS;-ufpK~gI9&O?C*5H`R z4QN+7*-9H?Tjpl9dC_fI3=u`WpS1OmAPQ`2X*}X!#r?KV7wvL|%A&Q*o zK#^~+{i{XS=U9WAUzzgGj44*#*ni4HwhG8bdQpU%cuAv-zZw3{ST+)+xUzsmL5&~C zqc?Gr8#mVG!aOhoxI1Y^g0nB$Mc*fz)ez&(cP?&Pj;gd3^60b$2xJaXu?R-}*SzAYK_yug^tB^nUlT@2Y;ZAJG`dc0ujLl-=fN#Icp4{8Ks7YjSr zmzHeZBaG$Nv3{agd|OVYX4p^-lgA zGk(9t6>8y$Np$h#S9MQ1y@C{Q-Z_9r@nLjn9CJE7tETejT?yZD@yOW)OM~*05-uHmV-6?L6+)$=!Ie@vS!*%6%ZEGNv9AEOy=dc!7!U ztt;1s;VbQZBSq=NPrlLk$Wuhr1P{?ew)6Hw4kyr$6|h9PE~r}kx*=rO(lKryK3S^h zvuNit;pSEE!(x%YN(*P3lO4DGmWjzECwEhOg%%eZ99NFX9L*SCJSUL2xYi-?ozwc| zVT9Ygxn0}dDb{~R5zqc+OBI~dTR6ew|B(AnApK8@{{&Tiyx7xVF6@vRf^7t{(&;?J zV$Mop((s^b{J=M#D!LCSUgqtCzeVd_>geLE;j(quS49melk2v zZqk^loL;!6dF(Zfn{e6Mw!Q*?%X?Znq+G zP%>`NKxhjBI+$mMtT^M`=m@I*qA&C~M((Q}p+57>VZPTVe}|iQf8;Y;Q6Grqp;J;?n32vwNwRaHAs>9?19r01 zbairuK6&Bn%;o7|uNI|--O7#s^hSAO&QW)dmWOscd$H%WqL5$=Sk67@(nq@`dRBa% zJEESX*Pxx=Lm;?4&yS!-0Z@%C-zGQq`H=!6xs3s}^q^Nb(X>@NLn%cUnmLIo-(bom z)mD}08fBxv66;1`P0;&Pw`5x$ma}BL!>FLiMw)7;^A{M+@fR4H|MGYL8ms>o($78= z0D$11{#rV@LT(*~$F``qapMG=L4JLCHJYXL)PzeeRa(}9Fw|(p)9C4#e3({4O&+Jw z{XmMYvzyClJzE@LgQ7+mHu1EUA#RgRd~0b>RKE-ggLs_KXRYcJoW;qN73$T%9#k-)tm72Fsc@saF1C5HL(>^9J-N5JaK!xY#C$MLrN_Sq0HiVl z07S^dc-lX4vxYc8{=9i^Q?qSo1f3Bk_S;1ads>}1V0Q`zR96x~ZQN%oKft!tX@Uq0 zBJI7vv9LGI3WE53Nx0i_YV|4N=o}lr2SEyJ>%v}#J6sm)!J!EUp61g}W+xhIp3E=m z^D+>~a6igJ?0p5oRZXi5Z2)b;zCA>QZQldKF;zT?`zDMRjmV$SPyDK!ozsKa*k_U{ zfX#hPIjp6Jb+inf%xEiDY52}a`g9^8*(gN%GE8-$l?Uhc9hWonl8~VdQ7Ya5M{WuO z+Bbtvc`NFMp+9t5+rQdcN$J^jjf^V#2ln_TVALy}WRH;^g zkwzM0b{=zMQDL3M!=AWJ`x+r_C1IU~!)}{4Ow3m$b~zglt!3qxH@yi-s#c}PXaNS! z?i2l^Xi2857q1e?r>Pp|WQ_L=c0|Gmt~)!>Cu~L^CGpFQhR{Y(R;8Q2-VJo!>8^e1%0{`Y;j{K$!J~J}(l8mo)F*8i;*0I1&4RAkI_>xppu-hM<``SXlYvc9x3) z`c=AVn^MbqVBgEDTEF0LeD)ksrwZew$}4zGc~Tp^{P(jzKF9h3)3ePe2;rB^r{)D> z?AK9Cu6la{g_1fOHA@0tquh+l2(=cT1%4HI!{s{!->o~6$Q-Q^qW86G84EX>mv=O3 z|6Rf}Gs(}zVyW*s(A}kjh-aM=$@sc;Wi6S$zkM;y07HCy&Po3;MkSy^hN1Q{ymybS z9cvwI#E8|WGX@cQN0=^4;kcoFteiNxil}3$6*pShKxDc}atxh|o2^w2VDrh`Gc>vvYC?!ytY5|5**_V`34@cYEXeT5#*t(`1aOD@}l=1`yZT~s*zeQX_Y z>{*Oc+xbR^C;5`dGwZeMVD$w*XW?M$yTD0M1AP`d2x3J4uSeZY(hoL z1kzmPtiwmzO>68*?YhFvAiiqCWU|KPpIt8M+O>r4)p)49bqyk)y&*hSxd#iJI9MD; zq0b4I)J(R&t)--vct3W(NcLzkKe%%4QNUXjTQfT{kt^{#nOnYaj2BZ>P4(&^Kyd@1 z(kD{hdq~Q=qPY_nbB_pRAfe~!9%>*sm(u_J1@@N%oH|yL!D2~~Hz1C;!Rn@?jD*DR zN^vmgNL`nAcjIk6A|@|50vupEyx?!7y>ekK5`3<5Q#y=?B{NLeL99;}Vsm67q-H9~P-ALZB($wXL9{8-oezbKV6J=ULHtG}E6%9CA5+ExzyG z`4bdcG}R+C&KXn1Dqrsh(575Y%;ucTE#ml$7aGFB>;zIO_kC#M?pa0{9r2q*6I&GL zmKAr-@Zfm)8ywv0evLr`xag#5(#vY`6j(8H9L-LeS*tR8H-`c^PyTWdqo{6 z{{4Cvx3slA>c@6?Gs?dY5XJ|b)Jf<7z%DHSK=$VV@o;jrgIYr%ZqO%x{rq)`z(2ll znh~Z9X$CH#@t4t|FiFoCsRa-K)=$)&PVy+CUJE%BZFiiybVQQr9 zwR21L9n^^eGw|X72xmi()AspABPu;Jr6aODo=(yWdHJZn|}C#25{x?owC+83`JZ!Dhl_E!SL<$RYpw)fDYJJ`uXoG~QZc4sf?@UdiQm8KL> zLfXlj(<&MK7(w^}Pr1wtN2(m})Mo<6CjCJF@nE9MYt81A$b_NePMU^}VT6ZrjzyuZ ze9=u~Xrkg6IoxA)SN~V(>u=sUUcYf~-~^veorv@;7{@Yt)Tv0@Hk?a|exD^2Fne9I znJ(xIZ!fJGbo6<#4!te+tLZzhZW?LyKHNDTi*IzV(u4c)k0A1rlwTfbc20lS_BQ+V zyK#H7TjM0w=)-Q@#lSpSeK{T{h;fXF?Dw?fiB6bF&RTb5>}d(r;JaF#i-n@BV)4~q z)hN&PUagxwg&V+)B0<$ZYtq2-U` zaqX5CFT^40^^bJ1v$U^iS+EyBw{&QuIZh7fR_vUjL)$OnRb4--4*&FuVAvRTWkSpA zN+4`ZoBj>7D5=~!0a6rJrk*5V?v}P@pm+OYOtgv9a?&eMY zjNG?-E~q2jU7G+7bi!i!lF|q?M)~@zga(%L-t1$`6{4RQ=&}U;jHtm?y0EZSN&FEI zONIVA2z?b0AkETBk`ZG~`ocoXrHo7ZQE$C=*|pydH`Zmogz-`4GmYq)^ZoA%*j2J0 z*yZR(N>+j44-!hAJ_3pt1wjKJ+D7y0$H-=xgoi;;hDSlHP@Rok<3<@lwD}o>ptOw@=QYgbmZEwFv$AW3?o}Me}Q|eZy=Fv4zE63HfydYr;C3>G*L^o zN>BEDD>I1=3TI=ONqleM#=BvgDB^lN2ccpd&Tr{8?f{0lGwKfjOSU~Hi@uK(?WUr` zzVX;ry6RWTX88v6duR~N3$WJ+Z917;$qDK#_3A5Dvh$Q#Cy}Y-CkW^igP$6p`P=8W z*S)*2>&w@t!!9}TBrQ)ZPf#`{4X|Ht*DZ^&_OP&!nLXsPsI!zfP9to^dU)%83)20b z+K@Oda}h>!JH~`Vb^-G`c5`H2+S{`XmrYt+mk!tgz=z-A}hC!hz3C+joGr_>V4RY#+ zbw6UaXOjk1ynU6fr%;CN#5>}R&`4g^!9m9bM6M_6T+)j^G446^kYy>2Ws`Zm>-1TH zd-k=SICGNVomEK!@i5>2(nCBpJK zgl#)@QwlO&x>;+{c7F_tt7unb@gT`P1NX~n(W96gaCKt?J8uU}UC2(N;Pq-S*h+r8 z;D}4De4t*y`x)ohC;u<1`zs%M*yeayZP{C2=EB79U5(ng%af-FhX(6!v=#;@m%boK zCbO-C!vrVnDi)c1CS#q$&bk0%u!PC|0E>#9576(wUK^mtO*VCAUAk@2HGP3}bsSiF zx9s6LYg5cRzg}g@7qsg!3c+DNNh3|&*w><1i;(?HVDH3_9h+*c@3zp*_KJ$%qWo1M zsI8`ceO1rbod`qmMHk$s-9c&=N{!vUJj|4NXhyN%#<8UYTgfKtZ^sa=D$Q=lBbV`#YL*r!L)?M>yv?-zZ_|&zi2Eg3AV~10n`f6_4yZTg@aZ-B z3#Il@u?Ru%tQ=N~*C@g^R{qS8;ZCND9o z0c?rswP~~@qD_6!Q+slp!w_S13olvKEXHdam)KEEah^= z`Le*he2EI7`E;(mab$o|QrGU>7@{B<{3hpdOucl=j2_G$OG&guMyww(gkwg;I+?)6 zFu&LkmZ82lq(iOUP{LL;F|^nh=Clk6Yd=8Vhmw8jyLI*#Z@--5{+rP?3Y+h~BANUU z$!a920cNfcd*~Cc+jm-=iX$oyjx>T0Vy61J9hIkOs#Go%yEbes`$K!IW&6=d6PUSr(OK{RBi^o4*|zNrXh;9F7pKTa{CXD zX=Ab!K+YUTu{o;Y?g&Ear9vBY_E8aF&_bP|Ncf#Urs-P>!3g_D!rZSh9NjNFd*pf~W|`@@cbN~)Wt@BNi9@#S6*8G2MD zwI?n+mLiyySXGyTn7bw?dcK&ZhT@53$(_82JcM%TvX2fNe%I$T*hb`|k-o$&`$lLu zzc$Wq(DJTeJ}}1*kDRn<4`fW@BkU5euHg{4ees-}* z=qH!;lbtZPD@*&vUkBL#W)a`!a(yu}slSm4CjXP%KScUxivJ>2RjkB=x7;{E2*OnW zS^jt?YA#n132A8HIYGeFvclGFiWYz(RXz9fp`_+hu+^$nRSOdHCD7Me%p-BGGqoDXj3W^y+KB#Zr`h4=BeeTr!E z<-n|Lt41rvt@bB^ErxL{F>sbm(>bkDbyCz zEgo$y-3c-!SQbWs3~&S9K96xl+qE0jTuUEEw!p_p0 zToLSBU$J5BCfPqTES%h&qG!vMKb7Gos$_SboI)=d3u`RV`$erJ^t|MVX@aD``Ntas zW2V!;QdB3r$xRD=R^@ts$*;}_pUo3h1LK(d+`B5kaEB8zMCFu?)znL5&|i_qQ)Z=G z7coN47`D8$c#NmK4`Qiv?QZFOYV0hV9##=bcf|ff=giwbAtKnGo#=~VmrYg$>V`E+ zUmv+uJzhECAq-Quf#B}$?v~KFyF+jY?(Xg`2@u=~E+07Wy>n0gxjn}2 z9;>=)?%K8HUbUp;rNF?^0RR9rz%t+Ev#5F}y#Gt9>g7UyxeRR#f0RT|`(_aG{d!tu} zr%kBBLx*VHOw#-V8xA}ZwqEQ4) zEP%Vg$ZDB*;b)6qS^<}6wAy%FQ8w=S(0z4zwoz10XW)l+J$DE z#S>AY5c3sVQ~BMv(EOsi(sRF>q@9Z%4~#Md`*VRe;Gf}1)=VQsh6Di8C;IovLe>GgMmL*vd}br%_y=bGyo);3g`$xs9t z-xXGzcEILKYuBoq0lGQ72GLNvSNy}ErCia6=QKAoa2QDE_G^|`KLwguW@C#P>UwF@ znhH!PE2!J(5Y=oGvfN+{D~B89f}p7iQZ$CvoZnjZJ}j;A8IA98;WPVLGvcZd`Ha~V zZpo^L{!s4f=`c6p|76iWIVJAvKj;+)(Ijz|JB_|jK4;fm09CKu9O{-v*E!tub2g7UH4B5x1VR7iC5z#SE%c3WVY2%u)`JU220 zmY_|2=M;yth~K;_sClY(%=rc7sjnA&&TQ&k0;{NfdK3gAfW2o@THdoluBwZ_M{GJy7^&8t^x zURQEr)}-@A9n~cBaiU}*nFDl9TPPg05(vy_{LtyaN#^o+pm{J%3E9CnHGQQh8)#nj zZO{Yj;i1*)`N4 zhZ$C$He&yZnjwU1y{GhBGJbx$qLHeBM`LTRBHNLVcKnt{XQyTqjzzfMUTVKCuC11W zL-Mj}#8#GI7@pgNCkDUHB@3VJ{z}Vvi!`56DN(oqN2m?VswsViiz6c`x)F9-G0LX< zcCXhJ=O^bPc%xU6s@2ZI z1+~jND-uP{F0&*wlaiq|qpD67{BaeH^U5}J-Wm+q@u$ya1DWtBvY2qF;s$Y81(G=z z@08kh-b?hTaI}lC)S)I~w`~6Ex06+BKy|J|0%dM&<}7kIVlpW>U#EkIq16cejQUeX z>u7c-HLcv8#Q84KrBnC((RNS_X`6dT=Tb|!-1};E^UgY!TUp6TQHO5#OAE%E9Z9uTB_7piP`qxT$2x2Vz z&6AT@a~I(J9j%{Lpfa=JEY^_lmrh<5d*KCT@{95%3b>%>2#FO z%i1%%HCVi&7rDtB1l1u9sqPJ3BBjr2a|E}{>zuaBTx($3ztCuf^SnE~zJB#ZDba6h zs*t-SPvfrfIP)V)eVAL$yIS3V_nawF&o_LJ!Y>8oNf-lZ0Mxi-)rmycFDs}j>sw-i zck@-dfX_DOSPv0N!C{#VE9DG_wGo^Z*ydlJK`^~SV8x4u&}K@m`;faO3(K?ebb?UX z)f4gKD6Adip9$p5-O!yM0sv^F006N6N+2#a_7)DNMn;Yf3~xVgS)?pQD{@H`J@^^+ z{INV4sf|QH(pja}9IfoTnS5iGJ-rg@Iu_u zFgxwnTCSDpckXn**T~ClXK`)0X+GUc7~MIcRBrU+>P#PvvDyR<-}0>$n;1Q zacOu0Wm?h_@0{f4YKGku+fAF;FRu>F6rMkg7t~}H)Mb9gNk6c9w{tPE&^mJxG-$R> z`8=ek!o@vPqdLcTGfzNM8*%uh?fdy;`}wzG)q|X0p&^C zf!waAD+*cd97XbLw`9$Gy0zjN)ipcLmdvU_9v)}6Z2E$lc?(xywZ%6Rk!61qyqs-h z=#YkuZ_lC@@w-2v^$r-BV{Dgzg$D0IEX+oVBE$cJ4-o8rUas!d+xW#) z2spT7m#c*RRpYOxEjX>}Y>gT_d1I2KP16ZY-1H!T0psiHhgPEw+(G3bf{ANz-F&?eRtH@pbWsFs09A{<<$4x_!73ZYyYVkH=}7( u^0-5Y~A8OD@X*% zv(Nx_e^Wu-)v#l|;$XRQE$POQKVGZpWnpXBj zqNL|f16>FUsvLA}C;%FU-cjfM!JiY_1Q8U7zJ>+{Lo5DjM&&}yRZqP^lx+e}Hj#?{ zc&v!52>ydr*f-_uxkmu>0?8K4ThiYLdWR77GScNmGL2MOLKvP!*tHkpThl=Z1|!Yh z(eyNm<|xduikxIUL8hv!2e#0UBzy|@<3UK*TQJNI9P@*(*8?MXJMtI<_%0TEuK*|< z?(2j4%ZpQ@qr%~uCMNfEK~_dSd((Cj<*`7+;Pl8aMZJj#gR|hLr~|;`e-vPg>u0|q z+k)@m3HnFaD#4Rgoq`;0hGg@!Pq_JlqzucihnKiq^9?e2X+h;TqQBtnRp`vJ-{k$T?40-yqC2C*>0jrI zOWSq%*G;XK0`5@CuFPt;ef~M<`a1>OMnD4q`f~qu&}Dv|x~H$KS}%(t2mjDua@3Yu z**=c(qs@|#uYm&bzoZO-G?=bKmUN_EvOa0cBe59hS|2$i=ylG)Z~r39oA*hxy=L{g zTUhHF>-jL{@rU`@#C7CJ!f^MfaBB%-;*misq4!Stk+TZF=h7+zQlysFDgj}QMS^kd z`LI^)D#M3^&3QhTRHyqH_GPcf#%2zl7v1ZV`zAA;a?Hh_zu|Q1u8|(k;D$%BkRNtX+OTRa#wRX?eNS-f((cSJm`~;5`rMEZ=Q2-rne!-p+Qlla=`A@Y;n?TUax)Cea-A`<>uhN$SalWN@4Z2I@A%E8BE)Bxo!~aw zXUoBBVU6|oZ<+g=lN}@-#JwGFhhui#{0g53$LZtya;tLw+HUqsCFjPbwc7gSx}qkd zx+YIso5%bIHR}fy&o#qDH#LUL!c|lCbp-zU;hkc)_ijzU_FFbe`!hT&)#V`FY1ab^ zs(3#g503M?sYinFa=x`2S2o9Z>5KaP@cMVM^eiJ{9UeIQDYsptHgEA*fum>kvK%{s zd1(N9(9D3~Do(*M>M>3cMOhGgSUxBO0|!{nLY|#C2~P3>>NkDr?KDe~@z#>0tbQw& zUusbf>hM<`dX^KGD$1;r;O_drzD`VT)L_&5o9l%!i=f;RDS>s4WcbR*0>C*UY!s1+ zNl6XO$*?ipnFV2N2?+CTg+%2CW)vA|ZVf>(HhB0X{eplsQALfq$`6C!j2&*CasKe{ zl8Kda^#~v$Jwb6_RX{?3TTw-OjFCeMalTL-k&%k0#H9R&<^m(R$f_)U$gj8pA4R_o z%m90wqBq5FPTl7hJL<*)z!>HGxX+ous(lX$O5M=-5^_ zdd`3ZIwvVCH9Y>hLnnqU#WPoV7Y1h|iHRn0P&S-lV-_4oERxbL38mH+kUo#mu{a5? z&zI$vF^I{YS#%6r7*=>EtWf~rMQQplI>|Cd_uGpdTB+s|O>~{A5OG9WPc+NVsIKzs z-*Dz=hammuRk!h#Vg`3?SUfll_*z$+_^KYoN$^3{+ zdx$Q)@3yxB6?|ld5ZRKg=L14XeH$Eo ziF`OZ318c%A*4PHjy^=*9DZ-@jSy1r22*eG#Jn_LB#!9usl%Bx%UNJwi0$VgiDkCw zD+`*1AEEg4Dfopnt^>#;AW-Kjunru|Az>dvDsM+%HhjTuX&^Z8{JKQ~HkBceut3>1 z1llA85yWO}Cufl9$+gdhVp*lVx!of^Uu8o{rY3o;PB2?kCB;R%D_SH}by7~cdVr#Y9^|D5v536K2Rb@u3F{*-fQX%I!8(KM zi;0^+agjg@;=e{#UW~{o(9tOiOdomTO~DadNK9M|%2O#z7m*|8L@|jpic_eGb3#H5 zdEiaK9-Kr>oDzx)0nVReL%dDuqHbpJUNN`Vj;rzGxVE6Xjrmfl62=O2PdM@@!Vgdl zC{V;5?N0RAw z@dt>N(C;+VRFeb5RFuFzw$5CCn)NnlRbn=_OyuMV}ELsz?bE*ANgs_i~ zwAuz6NGThT9I$;On3q(+6Y{$!#%DMIvKg>`aJJ~cWI9vY(BzdNlH+{jUO$1Rbch;I z$rQQ<8Ua+n>`)K91AR3mEhy~cu*e3 za4FCH>GYKK#<(ZE*G2os_4nN(O6v%b>RJ(n?W>UiB(_*QAa;HCjjin8OgAqDdUTEF4*A{_7lu<#i5ImU5)H%z++kLmms) zF&UDchsNC9kKRNVBvilpxXtu2N;*BM7GrnqaR^aOHuV~>^Zb1kzQl>a@R)%_Y=6#! z!f1F^Yv!=0%F*@C+**5B&c3mA!slAmmDG4{dyTl^k*3je`r>1$*wGZBVV;-(K~%cx z-eUfA)_eM)y*#yL&njjtqY8C~#Lu@U*@I#Ik8QjBCk>h_qgK_VDYD1a+zv*Enb+mM z%fKafV z46O7Aa5B4Tt8V8!Tqs+t*Q+NUdi#_=c{zY;wjGo*i-|mWF;!nYK7CtHO zHp3(6L_%QlAyszhXO-$)&GewmW4>8ywhO;b>Ar)Lk(7($kELoQ4cf3~W(`@AgYPr< zIjMy)A&!Gn$|7y$W-f8OIm6}eS)^8j))dZ@=x=q4du6PrQuSNEFZYs$tep=h==i4~ z8Xqec5ROk*zi(C({kei!zt>rxJ074{JiA-*YeL(LmRD(yQRPgn*Wh_a1ls;4z&VgjN(~b zQ~r#NaB@^!O2Nl>;*z6EeRM_-6mcnmKr?Z4zd)edKwgma8o0f<5CtpQR{WTPeoSb9RJI?uER#pI?1hrhf?e z@c8>OB0(v6WQvP#M(EJ5 zwT9=lkSop$&{dFj@F$;V#6j@QX<))|Tn6mWq27IjHO79Bd1#!X+)?5L=1 z`Dl~l2P*2PJA5fG=nE=QA%MCF)e)w|sqAC98grZ>-D4U`_3qIR#B>Po7@clMNJiKh zmlC`M>7~uSk4D5lH5~bIAB1RilwDj_eSl>a7~F8rBp3(0?*OnG`QD44@S!G)G3Mh< z!~yT#M0es9e5hag*L&Ta>Mw5h@4n$=T1G18xR?&QB3Wh_!=DoDwThAKWns5dw(EOO zQMbeauihecC0@acDj^njYmsPGz7=w%br&Yt4;_UUS zo2&~=uNDRF^bQ55J~v(H6xI&6JQsp5ftyBoF%(9nP{COeV5MSi2MPnc+M-;R;*$fR z`ry#PStcl;;Nor|iUa)EQ@O|3QT~xcy}NIIO%QJX79)Oi1=%cW7H0QJJGEy<1y`E* zM(m)IKFF{|#PAV=m4^NCchhYJ08Mc%5FIrdD`zSXh|r15YUYbTZ6o}(j#!`=ijX@` zEcGch^bC;AtoRB5qL|l$PObjcUx6)rC=^9nFc&g3`dmNHlnGW;##C7}S{D|FO&~f> zmFzc-$Q^z>g%LW;tNZ8@ZX$}MBuAB6oL9LzIw^}x`(Ju)P+lo?6Mf~K_nH4E6B?=# zi#<5nL>Ln2mbeG!)PSr%JXQ{qEVxGP@yJo$Qn5TKJ;O(wqJav{{v!zP@YG~t|F4sT z3lw?#pT5mt%e!4q4sZ>;i@DQ@#1fL1F9yr{Pc9Nv+zg&uKGHnw_v%+@f+_^387;`Sn^~YU zjErt|GE!WVm1CQR^{4!~%^q==#E<0sxb~R2267hK?>wdhPL_iR_eD@7KPc{W?2msb zh0f=SS06edF7YWUChhMh+#|fh&6>OZ3h|lgu4Hk@{l~bMjg<3^_%;i-4=ET;EA1J9 zlxBQdI&XQcO5-Gy*Kh3#dK)gU3T-k~6I>O!PcgNY<8~U5ie)0>5UIrWW|3PT8Qo`= z-dDZ%I8=x_Qh%IKB{?~YrX)#0AU{ChcDBbMplUi3>mvk7CLo;COCeu$|4mIRpW&@u z-p!acyH~4mmhq9_wU8z)Uql(QkO_;1q|se70Igs?FE)?jM<5EWs3#m42ymg(_p92w zs?=y~QF}B47!K4Buws2DGHWq&ygj-IXx%(SNada(@m1gmBQtldnh!qq`(dScPHcIU z_oNOi_T2+Ex@5j2E>u}KpG1y7jfCqTEAk+GWiW@Q`h^Ig++Cd1j1i=O&Q8TFpumAH zk5H0h`(EDFEcFLM1fDItgf6sNFHLd*1XV6RU`D#4USKz$JFiN&Lk1G%yx1U(NytIK zes6nVa)k1EcR~ZJF@;kN!V$>QX9vPTKZ?iMblZa>jHt>ERpJ|A*GCl&=+93j730oq zbM1s0o*nCCCNfMz$$sD$XIx@9K=KzGxt5{ik#we^N!uvSIgj~)bzvBXwV41-6yeLz81p6n^w@1Jl^j?UL3;SP*&h|?5@Py@v7orF2K%4=X zw$d<&tWZ^sMDl0Lr&@t{*q~oKXFgTgr2vR zoeHDCOWd;w4?99xcsEWNWb}qv@gy85y7DGQ^=q45`KpyN3mNFaXf|j-9!khCeLOk>tshU zI-IO6^Ulhwl#&d^7o;#+a)Ka!YE($H{HhBe#6yxf4{gm&tL2i5Ha-hJ7HCAbvPs<$ zG1ojOpLff(DE-jqKekN$;$8_*cMSEQwMA!K;*b`>g{U&GX|A6f-gYap|4osUHoEOi zf?AVh5*!AA8O-UE_f)k$l7vi)Q5U%vXs))fKpa$f+Jzcw)pCQ#=m(ErDw8^E#~Yk* zdQifDp4o6DVQsrv)4TGN-fmAZ3=`wTE>@|tK}qq1h^0i=Vsb-Yk;y|mWjhCT^b+8k z-m12!=t6pT$eN6KZ%9obp~-6jPt*0I=}!1 zwwo!~whS9O6QgfO4Kn4Qj<-3kB^@d(S6EIeQAUu|@x(aqAx;gyT4pWe9m zh|vQTCnK-$O(^fflw|ypdH!lhTZ*nfxz0DH62ta3mO8YUL&Vjqs;s~>yv#;!L*9nm ztw{VGdQPgM5SpX31Ae2$%7X^|Zm4Mo1aXT}eZ&OkOoZ0$(sxmLhIK8?@0oi8W!Wuw zi#@i34NXLkiZAJ9zK=Gsxhv95XZf~DpKT=%(yTI5o4@ZeE(qj-e4Mgy7R5>B2n|-< z>naIOtkgqE#5S+V5BxA^QN2OoIUjBN<)$CN9T+!%=BHnMob0f&@1X`BGvC&i^Wb<$ z+}3K;-+ON0-g$~-tAx93>#jkAk${Zftvucd`V|;Ri|xys z#-p+=c`Tn=eA&|m@u@g~hb;B7hw@-Dun=4|RgycPxAt1IPo*-a0xEk*Jm1bqCf(7e zVDMGbO>xR~>H&w`pStBb zOEE@aTiCP9+9huo}+$Su^+Z;0VzP)I^9gVvtv&RM`1Sw8qA zA$%XMHV`i0VaYKZVu=?E{YGp*6oUAA=z}JQ8Y352^xq&i<=rWylEUhXcD^i9&!a2jurn#? z6{Uv{g>0tFDAaB^WL_nGv(zZdm({niv3N@)G0T!YuZbjo3&gVi7?ysy zRe{I%+qc8AyirH)B*P&xC#}r2ynJ;kTJ8J`xu*(;fnNd-3aDOWI-(l&zl2!V>JM0* zRKP@=Sj;I9SU6KfLWj7=k$ zBYD#Ct}r{%EqO)4Smnj?-6;S~^z;3Mls)s*1m%xs<-?JZBy3vxqBcC(0ENaim^b)j za?rOhLVTpYDn^CFU!ETVpFMxEe|I=(Ou0#Bq{H(rt(DM9-+%Q$Wq3Ts}IMF7^< z`%%pdV@-+YPVm?8>`&&>^65JoQ@Ja@yt~z*o4h*yf6@8dy>AqZiEQ#Z&3kH}?8 zE40x*39&k@s={RLP7NsMRQlOb9GC|FFWTr@pc(f*yQqW}C`+A@adsDBl z`z{EV@heZRDVS)L3A#-O)fq5`Ej(0*cPfX)q=H|oI_jKd1&5z9S8PM}=<~IsHCBTspqg|3oIOX8Y@9{sW%CRlTBJqHVu~Pe*DZu4;KNWnK4es*j%0!CiRPf0jQ3XNU~M}j0#^~JN$|_a*Gn?35m>V5 zxMb}|Ni3&TK@1wM57tn;1Tpl(0K7}T%ByCX>Jbk1ZpE?6%H=E?m~gTs;H(8C9^mVn zHKUDxL>5nX+aelV$<=|j`U!#Fj@PPxC)33I8@9f4Tj?)D`{-@Mk5-{{i?RzZ`YG0Q_rR$)6~H9*zEu67({I z`4^P8gVH}y{@k4W8$|;1Us2w+D*r_JbNTu=iWT0!qP#6;|3vw7ZSXhBC&GV4d0Q#` ziSp-E?*C9gj)>n-{+`(VY5Hf*{M)ph>>nBQPs2ZR(BFpN$p6pt|IJ8$8vi*|{%y?n sQmgvXk^g1j{1fNTA?H7EsQz^Tl9z&bDTsZY@FD;{y=2c6>etr)13FZT(*OVf literal 0 HcmV?d00001 diff --git a/scripts/data_summary/vesicle_training_data.xlsx b/scripts/data_summary/vesicle_training_data.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..8cceb3e101a6d6e2aef5b70072270dce310e3eaa GIT binary patch literal 13832 zcmZ{L1z48d(lv^Rl(Zlr($X!B(%s$NDcuM@bUt(=-Q6wS-BQvG(hdLp;CIe@j{o<; z1zuc>nOU>;%$|MYzGcLrUtmB%LA`>Cb#zu0Qfou>0e&h6zEFTK18aR5TWcFTI$awZ zT4zfO$uLQTc6txtSY3{`_5)WJWI|5lh3$5iFr=V6y%^f}{Bk}}*LXdO%mrs1SJ{QYW>x0{K^>{a@ zly%&G_AHJ6h(YGt7nX*a;PxvAN0O80?-PkR@Ph^HDHblm%j0j zD@FN<#@}|uYer8~i#?+LRMkP!34YBP^aK7SYb<@;m2pQh^*VNvFOFY2c0qzTE`zh1 zt9wG+ND^)2A)=9(8nMJ@Ne`I`!3VAyv=+<_!wcnS2b7X1_7`=-C4J`AbBFI3yTrA& zLN=O~%EcDG$j%%^?Fm0hZoAI!CwXC)J||t7Y0a1ZZ($NNl8I4XLO~^yLqVYf!Z=&d z*_#+z8vgmt0FmZEP2KJ%8;0jm>8P{ug(X7wNAH>{bURbW`KkoaffRu+s!FJZivbwq z)hhfEWgr&mAX2g+iR%UR&h4r!d~D+=wMKg^6jDVg_-jf;Hs>GOz&X%M~0oWOj=h{VZ}qB890e$jc57SjLIp4k%T z*V?f&Tg||DunpdeU$Tv_FU00t2aJRyoXY;ahx6; zhmO^vxN(TanZs{fyQt`q16=V%l{4Z7j2pZAt{pYbe&vF$o#>Qr?=NU+t0O z8WGbY6xA{~uM&H`&xL&myB13C&*K{VvY1>`JUE8UqrFTEpXDAjCXGbh*=?_<A`li5nL$ zR6H)Eg{_EZ@_$qk%|r{42&VPYG_0etRf{4qqJMgo;umiwgZDfOjv+SP@2aZ12=y1b zM{Uci-sRAM%R=i!Zsl|p(Vw6z6d|0|WV=nfF^gN+Y@x9W#ioVKI2FuiaoEeu1uWEE z)6OwJj*FAI@X^tRq7h#5`o)ouq2;rCN{XOAP-XvoNS^roJDQHNnms zU-CgZYG~bK&Qjx5sCTMGEKxiJ0*#h<-Fy zj#$R&TltnPD$!0wwpKx=8nigMI@44x=^3l3qB`~ z!F%}zi*a4%Ff!sL`m>?vK7rrQeGGE&zQ*4mG@l^WQ(z7k^RRmfQh)Q4(i$g4!w4!v zgdxPTT{}98GII)UsV`t#AzYZ5$|xd zN}(ZtoY$Jhs2@`AH;`3nXSdFpL&MLe0m_bt!nR zg@+@gvi8ofm4o2cXa7Aw`0%Ob|!{~_I7kn|2$2R>=aEq zydJRNBmB3cH@O9^_!z_%eeg$CY<%V}P6VhZ>_n)~;b@^mm%Lh`B+HqowhT5gQUxf; zE2D&&7HbO%cU$fbHZQgsoqE>pE4^A;A8anqPn|9=2_COjyB7DHdM@Lfv=`mEJe>pX z?CowJP99dYt(RNsZk^ql+)pWX?$3i=*#_yBw6#2(A9go#A203?3gcK?7qqoK*gU;% z_YdPPkLau)k4Ad-l9pRMwNf7+2p&AoFBb1}AAhe-4TQ&8hkwF5dfZ#%Z|`Vrxsux& znz~){Z!Ang9qHb$&H;d~;Ji%}@7uTfcvLUlE>gGSV9Nc!=)p|I(|eiPO{0?*5aq z_2II|y}QT5{?KvmfOg5zqvz?oc6i#OQbw0G-4UI3b5ry6!Q8=`_T|*TOt{yT{kgQ) zy??m2hcoEBh;X}PJn+5PdP>z*Djj`wn2e@; zWWBVFL-%gm>sni;^^Pv@+1)3+*Z!qTlDQ8AGP}si1WQ^3kH2%hv=+Hcqr8_hVsAVgXKo4}%!moS8) zj86Q{2%}I&6&^(CYSK#0*bqITvPq}1i9PU2G??ysdE=aN8FyNyB?S!_fS}Cv5s(*aacUH*}e~7MVFcN(7no$rmv+-+eS) z6b#0y?@M}rF{b3my>2w&>eNvBwwpJtbU$hw&>Yq)ihT9{$)M!_d5tapR#&*%Eu zj>$&s1uU-JN?wKk1#fZt-b;JOa&^6Gb&(leD?h0$r+S9CFiOs9X)^p0n^ua(U7GTJ z8j|1itl!AdS{CAig=4raXx?Rq1P+&Yic@rd*7%CH z9G;fslAaZse7tQT9zi$;e~?bx7XcBqpVm*j$PZzMXK?6O;2B`GDr@Lp-^J9SslGO+ z_$AQr3*obX`nOzQ`ps?)F0uPWy6_;Sua&`TXoYKh94G*LB9Fn)dK(2*w>8-naOzH@ z7;U)(?FZPj?@{6;(e(^i;na-@+OcTkzZdfxl}gH$XmKTvxOu5&M5yfa4-wNS)y?^Z z{Smmv_ex^Ti(;vUJ$ZO(W<+T0^l9IzPD?0kf8eSth<_W_WlLHfQva51W!+|Zzl^#O zhmBDzO5Gu@pCz_GCW~>?=$C+80w7~=g}&R&m~1>*Nuyjzxm-zBGy{;eTt6gh$v3sg z)R>yDhD)N&i=wG03f&ii*dZQV{#@+0rGZMP-XHj>WE2I`6t@9r)a>!&}`O{cHnxrIHqgp zr)!9Q&@8D!&_z}3m<@;>2- zgam|y*|f9Sih;pN*}ytsY#($9rn8U4JEDXq?dSNZU=*F9tC<3PG0LUhiI77g)x~C5 zGx$=P`)zSZL}7K5MEk2yAVbvn7r)rWpIg@vs>%%eVlDwOC!fWDtZay~As(aUK2m)# zh>l%FY?OPam(3L+*S&z4EJ6V+3ZAN?x$qVTeCDa{Odo$y6zq6ke3Cc%j)B#nFY!Pk zLu}A^VWO@w07b&eJwf?@Af?R6b46n`&PIuMdI?S*C{pZFwI7;HF0t3gfJynd@<>`7fUFNQ-omyqle5i_9boQd)w>Yv>NU(9E=SJ@jc zRG(m*Z=fni@bFTFgOvcqII4cEGU3x?!;R3!_NY*YXT5}^jy3@4nb_iLkG#FYVf#JQ zfpGnDuE=+t3y7&UFt*ER>#~ap^Dc(?JVjrP;|BDpgZ+BKR<6(oI5c?C!x|0V>GyGs zPf|dvA|X~E`nYQNWj&@aU+%8aCiJK1I3Jgw7fe7&Q{+D0q^0$@axhEjHF4WJ|I!N~!62Ee&8!ZNu zlc$a&Pkh<&Z?opf419}x`XyJ#X09$*mu}%3^s^I;9e_-BWuCRai$#7REy(zZsBz-f zQ#d+EIPNZsi>_S?qj>MMKbAs6r!Shh1YMsbuYThf7BbG}u{3ppSGe((_4OxSnh~(t zX7@sKN5TGV|Hj)lW+og!68iOAQJa9*l&2){6{iQi^XWsYHZ|!+%C%BOPjn!AZ5*UG zQ$K%P#&^hp`H<=|GhkN~XAF@o9sP+ZP~h3vlwwrUTbeW@PT=nom+9B0TTE%#J#F~ys4EGZ9WrW~t9!79{rB(Myu{N!AR8AnYBsx{LW zfhDD4oW_5=YKgqovwA80MeF9z}h_F|f(4hQ=>AGQC% zc>7LYRC5WNJ?T=B*_PQ3LnafaNk><%rL{eUv}rUP9e4)pb*Yp<71t|*IJkZ^8_&U< zq9JVh3LyWxiZRaD%&dnvLYy7DnmU7A2>dKYw5@i@(qo}!6w#uz+V zJymgDMb};3WFWSUv6kadQ)i+JDa4KgYy_z%S6;vC6BALSQo%S)Cc1JXEzt`=HtDPc z$aJ7If2qO2HBoGgH6%`*sGPX{wDio7rKd?mN_~4MffLR~LHlNi>K*1~bmeM$$Lft# zu)O@>CJDKR;!nrQ{S22t!Ba7LZb+8~8UbCJsZj3HnB9>F?=;rbnc*U+c=ckeNg(5o zFq_fL_&J0#4Z)i%fX{rs!67_$Y={}Ry-W=L&p0m*_xD_lCR+MqO_7w4T|wlDi=v~M zmj;K1PNK`A;3-b-h$!2BBxTnB0ba7u1BF~&5SyJFF(vZPnu1Xk%@ z7!;iZs{pN3nOCi1h1?^v0=HCP-KeyrSKI_v0tP|~WQ5b=t4{AX$d19&3Ds&+Sf|U; zn$Y<`MyDLG^lTMH?6dNMuyv%?jWSDo=^CSkFCarE9WrF9(5$TcC?%v%Eob`F^P&(M zLZ6y23etqsoQX+ca$_%yl>sOcAu;T!&mtjxMrr<(tzKpO>9#51{Jd#HmP4PlULp0jYZ+YsQ>$T6;@dzQoM>3a4CQ%E? z@g2zwX@!puxRhZRB(N*ewy!p9*&{SI*D`8s5Bu+osEBMX;cGE4_!z|7IPZ5<bHHvhXpEx#Hg`-B-g%I-uu%i6;G+B`^EVjz|uu zB-S`VDT7%ujU=rd|Ir0&=MDBe%p3bafJU~Axf93B-F8PGfgaOr8E}nvxwQ^&<&(oH z$6SZ+v`sipT=;1w7F3JS`nNxPQW=tyGca}!q+oe9uR5Wnq}?+uD`XbR!*zI%04sKK z|DeVZF&V6yT2lO+t)$;*)a3~}ya;b0hO|7pdgw$ zPVPFx6*DF0k65YO*JMpF;uf?jqjG*x^3i-li(7Pg2=n^eNo*-GnxH5u&Rs-_(CkF7Pr^{%VL8e-@cRdSMzOXx3AyW z%ReRVbg7EG+wQ13O!L*i549bN z2F^1=d)HG6Q%%d44WaUcOU=HAVT-R2eil7(X}m>*OKNvS%w4PMFMXxljy8|>#$J78 z9Wi?}espIjI}4%Ku+@C^Dj59&M0!C^ZPL)*Mbijk@CY1bPtlrg@emwzZ!aW$zi5f7yosHyEc=dx z0I_Ol9I27P;z|E-?z@9BLDYmGZlR?zY6XHQvbeaVJ&Umv0zZ13Ak@&_S=LGG2jd~k z#x6)Px#%I}C#h?9k`cCsA{GBOc6O-j`|T_8-1oi}jU%HnSS5~{`7OW2Cal}1$~tTO zU^G8M%%T@PhW#X0#&~{J?ROI-M=ZiQ@h?Pl^({Zxy5Q4~XEfsQc|@wm;19jl!zXy- znm~1jB;NiD#mVrCtwbIJ8#CzU4#{DzjhcE)_a+7ITSue~Tc?L=a`G?v=R6*K>T%Un zWcGO3HT3M%aB=(#$z6SLd@S!L$txA-%!cpHR;l_dR_s*Jg#7DZ1EbJEeqUwMI+YA_ z%?>9hc{1qHl>A9o90G8`Z0x)yh_BA!EEFA?w^&)ev>?NIr<8GaE{Y^4zh{O4-E>yd znoA8?u3q7YV!w+8tr1#Rx2?ZOX9q$RIn3!D@vAvc$SBnT&;el~p@bC5cte9j*!{HW zpm0{KLFt81mNNL)h(zx$=sca+{{y?XNhr#>shOga$`Gfeho(ja%>uG``@^WLzZC-IxrK_0V~0VLGny#i(J44q0nb%^+vl=CI5=}fCr0C9=9#X1qLly@wAg^V z-k$Xr8H9Y3xkc6Ft40?y+k4~3wt{s;u^<{E>yj=2WZ;`5FIHSMtqwR#L``gbGaNvw z8`|q;&H$tk1f*bY=~*{RkXYf?www?#4d*1Vq#6tmu@lIXRxfPGObMTw(fkdjt8!>3 z0<%u*0Zg)rZ>iB77UgS+EN^`RKs8dcBquFu5>oGjsvOcJGfLE1D|3{(l-0hFgrJGH zQ-GbO^lW`(NQ!Th0jn?N4YG8LIdtGMaca0$fhC3DK86hT$Y~GHKuJwbzp1dmvreqo zK+=_jUUf4DcHR#FlnOxl6!~lDp*qItMO!MWr+z>q}y(NQiWZxPkY1oGX6Z<<$mir1RNCcNhpaZ(g5rSYkbD(QaOUqJ22-=wkK4p?$bm z6jY_N3NNMG^}178(s;O7>j|EzWhFo*q{Za5mAiP?qP8{loxjSWRD0zkCH2$BJ(%Gls|pcaMF+hK0=*m)|Py0-S{NIV1`E z{?6MkY4m2hTx21~u5RVVX8oh@f(_!=h3LE5Tn3${bh7nNYIJDTm7dvkT6p|J*?VHU+(RDx1qg8oHay~ z?MvJXOnTeJLb<}AIyxYm&avgvHys5sLxP3gXA{j+7;1>0;lj3-2~3B&;h^&)*KH@0QhYrlb6SZ`ni0tor`M{8VJ!nU6z&%e zKV|#6pwjmN2i~J5TR_1~JU2l;%LZC3z`e>d`g{Kh`Uk9$Iag1lbFO(gqIbYw#9elE zo1T$OawUGE{Jh0=K3fO`MN6fjM2$&JPuoX=|1|mojp$wT`%fJ>Sprw=FnbmoEq?te z_*AG$HW~2UxC(r=nM_%HEGgf1Evj4m%u?>DP-E1n<%c|xLs;`bYQkwEzGm#OQ*$^T zNGu$Uszj6P|6pYQ!SSGhEECcfkQR&(5eDvBgtqwNq-fHi#wy-F8k=P(xH0Z!W=nz z`p$&X;Ly7@q>t%~V_|6JNLxCb>SNaF8OG3(on+=Ej_GjSxE6e4nX;sMK$$z%G%WKu zj7%p6NEW+UD_gkt9KTE&z1Y?XoE?@muxJUK!__vQG;?UeXw^ecJ+fd?Jjg3I&uebgUs!k}jJ(cAJ3PrCym0YVJV zKMPBAI>th=yD^vfWeoelm4Zls6*gk&wDWK)MVjPO}>ALKtlo2y{ z(h28eZ2J^@=sEqpH)GiMwgQ`*qVfrHf{7n}h~#KzQv5u4**`<~BEsU{uJd8OOk??Y z#f4`=OhnF9DwW@{7Bv zwr`4Y=mFO?*Hh3lM<8h4LD>bU@&dYwrV^fD1n1WI-6&#e)LSW%vJ^jewu;{dA)NN^ zB)27$`<&($%YFU|HjEh*=>dnQ`;)$VKZntozi_N(!OsliG&-KFU;rWc(ie=U@)4w~ zQ}Cyc!CEm&y3?xql~O#>K2dKaKlt6^aX74W@w6zq8!_{t_Pk38BkEqa5m%vUTT;kn zAOQbNGOPK@I+I;2sil2BRT|9qdZVeRgw;kv{T9+ka4tLvv33c?~Rb77(j(Xfu0C`U^606qtGW zlx}wcF;+UGh9ZW|PJ+bIp>hUt0>7Rf_ zF--S(h@~`VQqQI}u(rHGsK%k8Y}fQ$Qh#PULT-uMu2W8lIC-<;O1r8)mJ|#RsTmIh9s3rSkDc8bw@5=7i$?k97kpuH|ykW#sex{ndnPsMnLQuC=RsUARw zx0>fBt2^8=bD?7k%9e`C&&kq!0bTgO6^EyblMV;$5Vi3tDcxDk_Z$0!o4-J&aPh4K ztAnQ7OGP_Pj&B9<6GJ#jRwuI7$#}##M1adWz-5L=mnpgpre6}v#|3*byg3{6HqgYR z2W?Wr?~b~GJi>c#xQz&#pFsKNz~*YpBZG4eE9WL1q(cVJ9KO61n6;RmHJV5e?tmO4 z6fRvXb{le0cD^ZZ%aUn)G~ch-8O+p$Fe$jD{^+o1wl${U-Z5V_I$7S4!xZWV;Sczm z`5%ARN+bdf7NtAXS`hgm?#zKx$(;9`M%=J(LAyp}_h#ZS7YPTs~~?bRVE~~kZdUklqXeZcEYUIT4-dc$pRcEWs4!$ZiIkb zNV{SsnFfmsPVus0Wlc$ollz*)>guVguQyrMC7khF z%|Xfy$Z+aIiDgsk>{XuXLqC6-squLih@8zQnO|!cxtW1MwXT=H0eC5 z#d}s4$~l`A$I4_14(&=&xuZ9hEav8grf}!^W|~e6J|bP$oFxAXG$SU2xpfqQG6knD zSrzpQa(Gg>yf)iyaLiEi6;@-U(O@xeSB%QVzSU$gUoY5U-h>_(=NcS%jhSYGrW9{B z4az4Qp}$Bno9J8oDb5dte)o z;KYICCIy(_q{83?V-uzHX$ci)@C33uVwM&VgohGsCIzVAq@B!wf_wu6+WUcc1P7_7oB!CbQvUoF3;NH7mo0za@s}l_c zZ&}~g2RIei12NT}>($Eb>_rC4VtOXBtmOtu-+O)pL2}L1HmC$os>{xY0yK8_OvP?rAC3YmM&10O=+H=}>oA-e36V7*`(qldU_n z%SLG@opS<9FAFTaaF+DkMBaAIke&TU2Y1EDmD6iP5&V6J&UIhXM5NQGd1186J^)_)8@sK&u z!(wBQh|uYV=^;-&Qw#CZBGcSozUPKbOh`B@E7kZ&G_eCaQo4wu9Z+qmPL+EBz-}d^ zgsrG_;UUq)@w8h(Fd`OHOLE>b2V@c)Uvfjk@Se`bCo%?4WGY-4?WQSt#((f*%P4)c zmuTX8a;gMznw>P{a>^mqZJ<~usdVw>UzjokJ0LQBS$fl8V5{sqU|{&zjgJa%E)j-W z3@!uy7AjD17#PTm*(09>sy8s4?@mDhK;Q`t1bIS(56e(G4kQ8pjxO4tu0!Y3425iw zC7a#ue7o`CeIi7dX`)I9RDJzqq1vyIJ`_GZ_5I0z@uMBF+_7)BQ9FM{h4h-#D?=8@ zDpo*Nkz&q5Ff*u>`3DwRWX!=^C%*r*oT@fPDYz0l8UooF7)p0{a==PFY%U((Pwfrt z882(zpPoGIZg!Z8si{gI42)Z+9J))~WFgGLmV@>)S?7pxN$D%oLP) zaAvh%y-{KlQo7KSXv*(iC&^~Xn#?U%)NQm8@}=UL19ckVc6X|}#V2P~`9|N^*HBt4 zf8E}T^uyo*$KYe5&x07)g+<+1QZqUi>!a;;Icsy)>No`A1(a&EC7LP$hq*u~^o54s zdM#2MVdNU{{=NO4!GTyp@yY87{8Anj>Xv$^XwEO z%nQzPNR_xNI)7GjRFIIkfiTAg-2G}O(_E|_lRNN>lKEwk9v@%b1q7m4pCs>aSZQNp z-e$8y<4ER4N0+Y!!eH#$91d?~!K*j=NlJU8lM!~z5i>A&1^0b|0 zka<&3<5A%fk!{*Au!~YeuPmfU!#OfPNSR8?qA@1lBco5$0Xksx1MYIO;OEDJf+obW zc-fd=?>k|SY}neuPuK+5*G;7Oi(-Amy|s^@+<1@p7Pf{pWXp~na45GT<-|j(>TJI8 z^>q|^@B2F}SVyLiEqe$y{tJRB&RXb9eBp3(MDYyfZ?{GzcVuS|q(Fn%k#Rp-elR6) z%wUEW3*50zY*+`@O2mhA@j6;Tmx&iR@;G{H`Hh45LS(0rlJHO5B={|^8p*b9!2Ovxulm;OtMgO; zq{VR7OXN|W`{TvqlcD2_V`${#G%Ksa*1LgVe}p?nyW7*;;HDd||Gr>!c@Ykm`y2|& z5&;Sd>CXitJ9`%kLp#Xb)J3KFuq1kvd(2MQXh|8_Jie@t<6<#C&9WE$TM~7ANVW4A z3JqFYm}<}?_g^nwlz#^u$eEVzj63^%!j@*ko~{#p*S6otHuAXhHf|dq@;;jPsrw^R zU8GCOP_Rfn^=Z?G-V>`@ zYyWama}|s$Lc~Z^BL6errH7gmCTM!5k6!Ue6$6#}{5f1q83z4KF~R|Pgtq;W*ihqG zw!JwK=)SxtEjE*{lv4H6`d;8wj0{8qgxY)U(AuFn zp~1R3?@6M!Rs>y7c?P78>Md@*HD4qBcSY9wv0ajp)PPMGocf#-+6WsI{V|j399&GYAQ`BGV^ywJiGa9 zi*EW(jX5QjVyFsp3Z2w187h=4;-6{KnIlHeXHlp(Pf2@eI;^H^CG74*&)~s(4R%$A zPVgA#&#p>35{bq5)#ko@WfFfJS{Q+!OiU>)+P}+X`JFR(dj38lLs-y;`V5>U5$qUU z%9Fa-l69J`uUF-~y#07Onuo!C-I_w59XLKC2uEgJcKz3_`wNF>1@yp;dkP>?FLXms?PwLL#)nQ9lxb8BO7ozxjF8Hw(A1W;CYe>*f(n*=qTJOm zDwEqxcx_v{h6}wf2x5=0?GMzU%8fp&hDy<3WmYN=pA3Y(D7cbW$gB`#yI%I4H4;9n zo(KjQu=r2mxosi>;K+^@gUBl?!2Z9rk?O{PE3p)JD13h=o` z^%f{EY(KzlgA&1SpjRDo@G#e+QMtiH9jTy@Ej&dY-xZ z-UC5uhqwBbDc*7$!P|=7FbP?j*N=ohb9PvGiao>Gj*;oziwN@g4Jar#)YmoIEiOK` zsYQN>$o{aWaZ@3c^>qLZmF}f!c;!x%muvPXf6avh<$9J8;kshNUuS!(eU;NRMeoW{*G&`zW8Yy6fyYv%qQ}@|( z4Cw#+tsS6ho<6<;3jhD_@ceE4_d7IyY@wjOJ}diA>wmpb^SAlmZ<_pL-Uo#LU*0?U z+ri&AjsNYy8~)Xkga7s3@!wwlK2H4G%QJK8ColgTF8((Cd*A-Yl$-9qcJaRr|K11x zF(jk^H}ik(ihmpb-RJ)pe`9zu{y)0@Z$E#x(m#Go7@?s4m&TG2hXGy;f`WnrK6!v0 Kyow3()Bgk5PWEE} literal 0 HcmV?d00001 diff --git a/scripts/summarize_data.py b/scripts/summarize_data.py index df57059..7615641 100644 --- a/scripts/summarize_data.py +++ b/scripts/summarize_data.py @@ -151,4 +151,4 @@ def summarize_compartment_train(): # summarize_vesicle_train_data() # summarize_vesicle_da() summarize_az_train() -# summarize_compartment_train() +summarize_compartment_train() From a0c31a8ce9e507569c437d4cae3df2ef92ae29a3 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 24 Nov 2024 16:15:32 +0100 Subject: [PATCH 30/35] Fix issue in data aggregation --- scripts/aggregate_data_information.py | 44 +++++++----------- .../data_summary/vesicle_training_data.xlsx | Bin 13832 -> 14745 bytes 2 files changed, 17 insertions(+), 27 deletions(-) diff --git a/scripts/aggregate_data_information.py b/scripts/aggregate_data_information.py index 03ca0af..7086b23 100644 --- a/scripts/aggregate_data_information.py +++ b/scripts/aggregate_data_information.py @@ -12,30 +12,24 @@ stem = "STEM" -def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions): +def aggregate_vesicle_train_data(roots, conditions, resolutions): tomo_names = [] tomo_vesicles_all, tomo_vesicles_imod = [], [] tomo_condition = [] tomo_resolution = [] tomo_train = [] - for ds, root in roots.items(): - print("Aggregate data for", ds) - train_root = root["train"] - if train_root == "": - test_root = root["test"] - tomograms = sorted(glob(os.path.join(test_root, "2024**", "*.h5"), recursive=True)) - this_test_tomograms = [os.path.basename(tomo) for tomo in tomograms] + def aggregate_split(ds, split_root, split): + if ds.startswith("04"): + tomograms = sorted(glob(os.path.join(split_root, "2024**", "*.h5"), recursive=True)) else: - # This is only the case for 04, which is also nested - tomograms = sorted(glob(os.path.join(train_root, "*.h5"))) - this_test_tomograms = test_tomograms[ds] + tomograms = sorted(glob(os.path.join(split_root, "*.h5"))) assert len(tomograms) > 0, ds this_condition = conditions[ds] this_resolution = resolutions[ds][0] - for tomo_path in tqdm(tomograms): + for tomo_path in tqdm(tomograms, desc=f"Aggregate {split}"): fname = os.path.basename(tomo_path) with h5py.File(tomo_path, "r") as f: try: @@ -58,7 +52,16 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions) tomo_vesicles_imod.append(n_vesicles_imod) tomo_condition.append(this_condition) tomo_resolution.append(this_resolution) - tomo_train.append("test" if fname in this_test_tomograms else "train/val") + tomo_train.append(split) + + for ds, root in roots.items(): + print("Aggregate data for", ds) + train_root = root["train"] + if train_root != "": + aggregate_split(ds, train_root, "train/val") + test_root = root["test"] + if test_root != "": + aggregate_split(ds, test_root, "test") df = pd.DataFrame({ "tomogram": tomo_names, @@ -117,19 +120,6 @@ def vesicle_train_data(): }, } - test_tomograms = { - "01": ["tomogram-009.h5", "tomogram-038.h5", "tomogram-049.h5", "tomogram-052.h5", "tomogram-057.h5", "tomogram-060.h5", "tomogram-067.h5", "tomogram-074.h5", "tomogram-076.h5", "tomogram-083.h5", "tomogram-133.h5", "tomogram-136.h5", "tomogram-145.h5", "tomogram-149.h5", "tomogram-150.h5"], # noqa - "02": ["tomogram-004.h5", "tomogram-008.h5"], - "03": ["tomogram-003.h5", "tomogram-004.h5", "tomogram-008.h5",], - "04": [], # all used for test - "05": ["tomogram-003.h5", "tomogram-005.h5",], - "07": ["tomogram-006.h5", "tomogram-017.h5",], - "09": [], # no test data - "10": ["tomogram-001.h5", "tomogram-002.h5", "tomogram-007.h5"], - "11": ["tomogram-001.h5 tomogram-007.h5 tomogram-008.h5"], - "12": ["tomogram-004.h5", "tomogram-021.h5", "tomogram-022.h5",], - } - conditions = { "01": single_ax_tem, "02": dual_ax_tem, @@ -156,7 +146,7 @@ def vesicle_train_data(): "12": (1.554, 1.554, 1.554) } - aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions) + aggregate_vesicle_train_data(roots, conditions, resolutions) def aggregate_az_train_data(roots, test_tomograms, conditions, resolutions): diff --git a/scripts/data_summary/vesicle_training_data.xlsx b/scripts/data_summary/vesicle_training_data.xlsx index 8cceb3e101a6d6e2aef5b70072270dce310e3eaa..0f9ee1e82eeb9566bdfd8f76fc9b951258dc404a 100644 GIT binary patch delta 10819 zcmZ8{bzD?y)Gna{f}kKE!q5zYbT^7f4&6$p2uQ=Az<`2u4l&fw-Gb7fATc!3E#2LH z_i)bn?sqT$@Z0n3=UMTtwcdC3>_FQDX9874>^qM!Ffed2@_nNSN^tXip?aooH+7?$ znPHs`u7iY(8EoVanIj_+cjn2MX0!+R)J%uAx8%D6cIvNgGIH~H}_V&2B-ao#%@vLt=+uB%JJBx>%whvFbpQK*TwHC(H zaL!W0gl29|+5`Loni`=62g~bMdo9z$Jpoo#?Wv94*SUx7I|_OdOP=QmH#H*HZp_!0 zn|q^Yd&|o!?XafH_N3BO8tOU`LmH!5NwONx)4tMn`|#1xi0{-N&zkxyQtjitx?^D1 z>eqEDcc`CU?(J`%_VgUx7&g?vPL2)&EN7@-wUq?m*?kRInf_*Zgm?)xa*Xb zMv#}eHqlC6adr@1N>UL9l+J|ZUel* z7y>Tk!e9o|;ukAM*WT+lzjpE?X5);EvP;_>p=a|tD2lpTuhZ~`)1&>zsZAXjHd=e4 z6v;WcX&LIsyvL~uSgbKm)6}V0$FxF%*=ng+S$Tv&${G~NJOYVW0?lXqFDxALjCI&q zzpe77U9XpiVqRJCpFYgs|cE2ES#3EC3I$R z5o_oQ<6q1BW_D%%XO7?`sWN|Nv1>*ei^!P)DmDjTdwJgizPDw@a1V?R+3zpe)29d2 zJQ6`-EVm`P#B{>=9n4)4F?+$E;iaERP~w2KJr^#)5x>k%vV$J=xjV1?Dud=03+JS3 zZxw+X(25oXdk}}w5vYJxvZr3M9ERzKI}yyGHRWLHZ10``+1I>3$A3HS4?EI>0&5-# zAteA~4w`zO7S!f^k!wi`%8d}^XE=TRC=`sOb4r&af~_vmK92a$?l>w*ySGtW`MJ_g zN_uQXZfs5YGC?Y9lJjfX01~l32RU&SrE$Ey-pKU0irhG~J-rc#=IS%Cif;&t<5nkd zc_?6Sd(mNbvy83oi4czh{H+JPb3DE?A&X~O<}Xw|fzqZq$|B&rBr}ssDOnJfo$QsJ zEYD*0;ZAsN3?N9wuct3rrs)C}N>cA_RPW7-;TFVd*Fbw2&Q^!eBImav7$o-?M2(N4 z5?pRIMbMf9A~nd*BFSq(-&vM6pD#fa zDA0?OjZP&j?;Faq${d2QM$fBy1r+6vivi% z59X**biSMiPWnG*X5d=EQLXVPfAd=}c+g&`RkWEsV5Wz6f#D&XaQtmjp;usX6|l3J zl2E~BdfaE$`p*+ArVRe2aQroMz>p*CME%e%N7bTdS3kgdKf_j`=2W33QoU^oms*7d zpox9kd?VSXXm0d+JjQ7+)`>0~onWLodO>5<1w2GA}DRAzJ&NYM98&{xGn(!oQR;kOGdkdzMsN&M8Y8S~r!i@)SsNxLlLhWSwnhi8&TFhZo$v(Bwe>vndE$>I;xYhI>PXS=EBs2Rvaz5O}N;| z-tncDcl4lB#8Vfe4;+tU#6gsWizdSVB`W6c!;`w;DM$aEl=_|Tc5H1HN}WXxEO z+qn1^{734%8gF@kyqlp>!uhK)wfL`YSQ!6Y)?m}Ml@O0eGq26m;r{BzQq*H2bgz88 zttbnPhNQRtI@Ql3lJu#)YQ&=d{+hxCsc-lt6&uWnX9g^00r5+<4+5g_a5kGCrg_h{#8L#li=Dmqpl|qsZk;#i);ZjC6i4*y zWXHmitTnperEyk?^FUguRoKxzS70kJTeNp5t2p?`%HY)011t>GoyfK$xz~z8$2Z|o z6&Ek9kIrrrL;H(`O9_iWAM8Tkqsw5xY0R@qs-kN!Cyv>Oy^j$Gop5x7BoXRe%MKt= zZ(3Zg61z&{8>HHPa(W!I3hW<)o5s6&4xjOcJO;ZeO5?5G)c%_4NZ>&l0>0*Uc6{?F zAWbJEuRiHcGDdp+5`(Ya`pSj&Ro<&+{cQyy15$~*uhA4Ke{2kF1-S@x$7+piz8E%{ zNENk@_Bdubkoty$USiZgI|$0NW+>>11I!93*dun4=s`CQWJd&P30QPF{7P^zHxT_a z?)8ciZ(pOOq1->J3IC`PRJ%Yk-u8hLt&I3M`33-<81fyu(JS8xK*80#)?)KnLI4ES zoD#NbtqEv7N+HY5LG;KgpBEfX^wb9}kCXS{D;=Rdg)t|hMYDr@*5e2>S*N5AHba*0 zYRyq(cpOUu9r-)6Gg0G@3*xJO7dVj|5Y4*JE$FnEhNdm2e<`rdC%`;UTl^)&lHM754%A1p{zg{@A3EC#q?1GuGv8mPB9-_!8{ZjP^W3)006M4r z@{Xx{9lI}F{#EP&CiTCH=Hug2@xx$C-ArA9*80<652n`JF>bmH1B%gHk2f|w0~@#b zHC6fM#u90tuEa{D#YxOkqgmV$_0pS%1zdKzdIB}|r%oPBHMgqz=w&h=ADD7EBsjDd z+5pGt7_pTy#sRq@{>}_+%*%R1Z`eiy%<^_|3)eStVs(t3muAIsqk99`z|g%hQ>!*W zfDg;u$cfR>VPUQ$2WpDycFzHJ2;nh}*Z5P>qe{<8OH`Ftc|M~%M-?#YP)S^3k79+W z9Jmgm3Jc*RwPCm2h~6SVdTv~TpzI91-kfM19VTWz(5)mBkT{`&BH<2wf9uS_+Wf^~ zEfijZEV@rMP%yw5{A6AMS7y2-OLLUZrsPfyF zSw()vT(Dls=|?4xMJ`gw5|2h_^}(fM#h>U9XQ$77I!ZG{?uuQHFSWEWS_#8XM;KFP z7|*3G7MjxM->En~yoNF2=y2p>qb!sOUplcXR96Ww>W$g14$OU~$zpqQR*;~i&n5i3 zgP(P7W+HURaOs>Pr^TPpOA@y*`U@v=yz?&nRj7Uy@pf^KT+QC_2?HpnSe&R?hHIaR z@1n-oDKKh7uesqn1;0flq17c7-fZ-j(yZ~}Th<1e1(QQ9ef%XAu%|mrJP9C0D{2czE|17dpybBep0wu3ol zjg@yl>Ey4=lhmtDoc_`A)yrlSolGOCaV~kus7nIijEGEBsZAe)06$lRW%MJYrC8ij@4sz4Jz2ydKkA!O4^n-`ZW%sXZ6Z ztny+PqQfn0l8t{*Cjm7@yNK zfM%^E?UF$nYmB87DU=D-ocP^So67`A0&WAWyP(1?Y+fEtV@2*#FuPRkNFnaqWIMZ5ZM+&= zEmo4>c+cBkoP2+88^#WRGKhZ5|0LyfNq?9LfpQJJ<*SLovvFMMllPfrpVhSah0P?! zxXjgVx@%E2IW{A?&aVCNlyq)WJ>x~@Ut1<@{x$&RB7_&;yg2z7TxmE56swNMi7=_Q zf0(-pX)XyI?(iJaXka3RLx#>vtb3{%ESL)}-`(%uoS5s93NUl7 z3lYxwC7A3_6OhzKI5^=(Kd-wWc;Gr`dlI{j&&hWM)_(xEw?Xv7EWV(5&x;d7Wi{~i<=$n_g?%EQ{dakA}I3x5k(Xd<{RU*63c_PEi7r$o0 zCm>~^b;A!_jGe%Sye7Jbd--ALq8 zTT*4@AZAy~T`Q^HJuyhhnn|YMovinXEt&;F92WR}EsIIiIn6%3YJW<$ZlT!9yIHY( zJwCXjEsK-X5~2Qh{lbj_D1HgsLm3~g zt{_YS$||k-%PjZIdiI)YPj!Lnre*Odt-!}uFBDj4x(^c?!FpC;6CZ>p&TqfA!elC} zMON$W2bgQ{<8GIL@Ikdt^18G+J~;G9$rjY?wUfuz-vJR3N+GXJ-#)BkoKv#w3ou0VRkoV$KW0X{CL1kd2{wK6s9yhAWsES zSo;NJx4Mz>kXSj0#W=P@{Sh2H(2__kHF3msi|ih0F7IkImm&nSN;)nWK}A~$^*bM` zED#-AXgPGXl4cxo`N>_LPI@uyvV?$Qsih`syUIF0gE_hyOldGdNHl)(N43&}F9KOd zr9<-i_|-<)3lqGCN1158GKK=EaFj}V%OXa`BHcR4P{DS!u5>xxdlMuAYdgpj7X-$6 zXhEbl8@{e*zszOCM{2b939RY*^F>rd8bC^)f3!ZSRJ=~;wvf@t@!rNH1A)a6?1=@? zU=KYeYWrTRjf%TIS0C*OlzX?~1E;5X5dcJu{ITttim>#laOWVV@58h}YL(Ed$Z7T8{f$3eD==cg0B0 zHiaRC5BIZNQ*%wV+{+?(`Jt!cSAoEi3Gtly zkM<78$sR{QBgYw>dG0$9c}TrW1Uf4v%WdxlUd6JFdwUNt?Ja%!7u5fX!W2Q~EVo65 zi}ZaIFyrx(PwpnS3}N~EqhrGYL6x?Evi?A@mIp5)uV~UYA|`Tr{2>GnX7He^GIXOA zmJ?!qKG`Y>t3F*;S(@LOHON|;KR!LDzu9U(T;8vuku}yrcr*B9y1%84jzhRB)y&y>Zg9aSLc7kiBNi z&Aw~IJ3NHj_x5s1?Qq_?}iuwV$FKjaM#Gz%BDQfbs z)8tinmOu)XHp4dH#RsYBOmYTCN&u~T7B#u*G+BJE(;(jp={2s;0BcHqCrlCG=5lycyY#LAJE<##L`bM{Xf2|V| zjKvVEp%S}-k|<#&H8Qfc?lXL%hSEa*KDEI{KOBFfI$;eZFpGqoRyC?UIXBxyotmc z-U1Jzf(9u`8nN^~eu`KTtumxttHa&*57Q>-Z`@VacJVYx`V7TWiIB*Z#L!%EoxZ*M z{m(GI4_{in5Ji5}k5cm>Mgpq*+^I63zBJS187pCFzx{plT`p+X8F$2R2}y)>T1g}* z^vZ(?wm;XbcriS(m8ur@Np(wWTIkdMZvM7S;>6 zG=ZbR#u}`Xr|sK0u-cUQ<~VJlaNywA;3^R4+5_(6U$Ty23e|d+E}@!vBcQyo`Dj%p zoy>|-ru09C39VCwT!!hyPeEY}Ot6AJdb+^(RWd3iGFG1yrL*0Z5#y5KI(dFFvNs*M z)M;3EGQV((z=#&bGIm$G!)&wss)6?R-usy+!J-7K&Pzl7I2H4{t!vP)Z2H0#JDo68dzu4Jren5uDrqjq@Uf zgIxJ|2!=YxS@mCGV-u*bFGtzGbKq%lyz7=)n!_RXbmtw2Mq7%(lfPo85Y^t~z>~pJ z61dGfIOJa;j8hssIQLQyVQo1Jfk$}FnaeEPUy{I4GJtkly6Lv!=(CbiCyt-=FD@9O z?U`2h32Bfcw-(;EGRrVLF1Jrc*%Lbf-or?_1*I46Xz@-mSB3ME(tY(Rgh=#+0qJ`F zwT3P$L&dpV^Dt_G@yL(g^0=6rGLsqNHcQpr>akHdpnI{AC76(enbXM8a4+7U)AHC0 zxZxRal5jEc=V3e5}+GAxGYqZBiw1O31&TC!FUy%I(;1HCV`u{@1 z=>%6Wv`4cU>~)KmhZehVZfGwRDf4L<&-F{*jdhnCtzoEah|iE zO8=L6{BF!H3rGR6F&Rg~xI1bQTTuA+R5PVZ8Fl=NFGH?%&3}*0mZ!oVKCb(s7444R z&T+>OQ}J7AS9$&2N7MNBFa|ZElna47>*G_^FPg# zf6+{XV3b~sh}zwj$Z2nfA)?T{EPp}EaHttivNw~#C$k9^<;Of{7=D$9~xz>jq@ai&(;D( z!K`cE?%n!zW^bf`1MZ~J#LI?8j+tOTam#SnnAto-<197r=rQ7+TB>AD$=l7486f&g zeX?tW@O(s&dhIL?@8~sxOf5AAI4qSfh=ebc%RgIV;gSzicnTcv#h7}+zT4^gRSlw;xSVQZM%rNRDK~7eRRysl{AHh@I*geFL3(Xf0YoJ?(N-3(c8GQ5$v=LNr@PEFF{=G-OutadblgB!pC9niXO-f ze-AmTK(>LUq*QUbxq0uedpmb;CYYgf@@DeH+Ewp79M7hks;q6XO60s4;DwF93)Oz3 zg(BJLKaZ{^WVZ!clMHUOx?0uRjbyjXP#&A9sv0|7|C1@AMc(jgi7>J>y4Q#`+zJLHikAAkG&CRRFV*7Lau#3KEV-7#tr2*&&*m55d~b0ZqINQFnWS0 zpeKmB!bBsuWFGoC9vE4V%pi}0ui6mfylu98BVfi2m~v)9RBld9xd6kFWrkOimQlPdZUt9Ew=F$| zZfKi1&A|GaHSl~Ls6ZLstAf%H8QkW-JQXJ9#{5911d6Evo$?4(3-^M*G9ov^B(scz zB{a}sZw#PEJB!T+V6vlZHpDYZMgNj3b=U}eCSa2@0|qQ}DPWS`D<*&RPg}3ZUq<6u^hkvJ z9sys4G4^ACDS0oe0roERoAzFi8`u31RCiq1E%@*j1ZTxmh?}ano;h;*@n7)XBg;bX zA|xYIKv9eAwIClCm45n36xBbx+4V#>strAqbbz5Ot)TOE=b_j19p|vu)iPwqW{2@v z!sD+s3ByZ1@Qp}iZNzD+2(1gHXut8(GdH1x$-v|jfgwVrH;wLG&{wNQD=jLd&DY{4B{=#Lw3KiZPu=GDmVkV$t<7*lA14X!;7bGaDtIh zj;qtr+#7OTzz39o@V8o^L!+XH&BU#W7qAJ$g3J8bY^zr}6`g^uVJM`80i%m$+Lpk~ zB{}(Z$nIyh&2RsLnT3EZ9?MRAHg&!BC{z3dy%;Ti-SZEDa=K`<3`MtQ55&5q6M-i% zA7ATAk_l){;YO7$0~>;@t{~7ne`MFj+Y`QdO3PQ?m-3ya$tpxgGjJQtkRx?wtTYcy zBC3c?Cc>ZnaVjop%rIh;L;+a@{LbK~Iqk|XNKcw-Vyo@fWM;DRJM3ZSd-PXmb70}X zS>_d_VXTcmF`97Swl{-=DCqz$u;8fo=mj6<$7xwV#o7R}C`9HaeJH4RJ0l9*-|}lS zk@Mmg!f44aG4NjZ^ajALCFqq2|L#3L`k*Ldu>TwS$|ySA=2>%edD*Y@n{Xh9tI+o! zs}3?agsIs;uJUhKo;Q1xmqnyE`3J(AJls8DN9X(JqKNVU8Jn{00c2Hv5eEFo>-pnafT>=_Y zT?b||7*l?`A!Jw2+6*o)TT5#K`N6*=M|b6`SZp*`oC&Q7rK}qGaRR1Z!r0>g=ow%F%|*guI~Ei(3b`qJ4M0^TP{LL z3Y*(HA?>K@I^5ac#Jft1*#O7X$pV@hlmX+YpRfYeAM2_S^N1OzcXTlzsInG2cO@iW z8;nuC$!dYfckKGa)0aj9YSGUDHPXUnx5|t#a`{KNMy$62cu`y&?ae z$yJfib#+~~IGeiQCfe!u0w0f4EzR%@E4v(HMa{$=_vde4w%b~|TKebhw;OMdi{tf{>57d76(YTWQfi{Bnud=T+iRRB zf;3Iu*J-TY^))cs%hzk1oKL7Zb|-tb`cgUe*JuCtYlIc< zZ2eeR`Mz*@&K2Na1rOI-O!Q?0_X7ITg$Htb*~}~UKd)NC@9_zuZ;|AaB>aEhHiU=r z35laWIysKO%f-ULu*1i|cmTYb_}`xo<|QWv2Go|**~P=w!kGhVXRE4+iS-!!zgIfp znEX^&y*zLlehRb+5#Xwd7p}_>qD9N{ZY@L!VcmIV@BKV`?{oG!tefT;?o6Voa_u@b9v>g z?yelFg9}*y;GIBArj^{!seXr5h_H)KxwX=G^htzPOCuMb2|+O9=c9|i)A|KF9e=Lm zJpLu5`T98w-4NAr<%;a4L=rckXLUZZM(o`6QeEnYpq@@gH z@W*eH*eM&|Rehuus5vTsF4X|}Nb}S7z~IUjRE@&xplPtG*QtJb`?hemiup?P&(`@G z<=N+2lfM!+6fV?OU(9S~0QB#!ax6@?m1_L2cweJ5)6BaR_;`4kTzGg?U`py%uJGYQ za0!uR>dxn3$HQAQy@E%Nmmna{02F4Kc`|)Q$zKq?{zY3{*2YYI*SVMIm#es}lb;U@ zC4~eV z0|N6F=7GNF>pzPx4$ij9Q^eY4Ei3}XgMib`?Uch`0&W+-hCXj(eE%3^o_%r7avrpQ zFn3mb@q2N+KQ6^B?jh5!i;bnn9i43-kF-}XHL#}ZT1JC|y{EYif8n&i0 zSUmLiaD1mxa%aUp4#;A0a}5H5{-yx_d;5Ex)&`$9mNrg5v!#$<9PJ-rc1})eCLRl1 zoHlRnoz=#r?GCl2Tx?Ul3d09lTP1@$JoQ1_%1o|TPJI}Tr|5*nXUL3yLcANj+ z`r$*s#cuc6V}ZDft?rGXG~htQ;(LIv=l)U31Jp@O&I|w6mge&t9&P)cK(pk>X5XXu z%rAeVa*j84w%7OfcNX_C0xa7C{O{grBpz*NLJWSn&95>D++GEaEi~Ky3Y1*=`;dt& z?A^Rt@i~j;I=KPMygADS@Vgi=pL=ln+2S0re$n2h%>wYXeS})5jUMfGzl#?^c%S}e z^xxU|$t?|_l$<_NkN|0EG0COgdfC4>7Y8Qws_FQ^tZw{bgKcLxJc-WXKlQ~=)_S& ztbiewzW=7-J=iN-vg)S>w`q;M%o(A=;_WfE$FjB&rK*ubN^|<7=Cl=*STlF*dJ^B;U^72mo3Hg}RbPGxXBBCawLAl)x=i(d z1UC~3=6I%&h+%WJY*!Al%k#Yaxn;E!tWL6%Gz+D~LM7`pIGL;5+qhfS`D!-#*nbO(z2<^^oK1~VNP6JRce^k; zVz4Smg%=wC06coG0HPSwtO01B?JRqPF zLQF#0#~-Rv5lXx!J%ITU@dq?p)Upk0?iT4ZGg+F}dmdr?ob^h0^DQZqAhyR;O8G2Z zATCn>WnI(~qHF0{Z5JoIDvQd@nRFTXx7$hyjH>t?y zY0ZDzXhKzY^`2B+S?aCWZV!%{=;mAE3(M}`H>-JD7{rB@6HL5Q`e4a@Nd>}KyB~7e zX<#D1)LQ#be$`6l1gcuJt7^2X3KE4t-P(OP-Dh?99g(%D0NV`xG`5Fl?6QDW$8Qg#xldkvNHUZr-(0G z7O31;*GMF+qBAMm@W0VsUq%DK;_zV`IXW2w{SdjP*14ui_pHI1;*ydQ3+a18WQW|X zCjQAv+n0m{`I{dG%AhWHwnCiCMD6=L&utQu7fD*QZ(9{gz0vNO zCCOCe2J6CPgm6+=ffMELSx)vou1Fvl=}paeMYU$*IuLTVKV2v}a-=-n5--ZKfTI5MD=k4=bP3^;z>KDvSiDW*FU|C$x1P zFe51GbFHKwvor>Twzd7D3wISA!PfI-E5#My&q>t5On%3EFZ64Wq9IH zfIgTs`1?`vO5}U7%^dmf&k?}vf59J@;KC7AKipJ?Dz?Suwso)6$QU=yEFNceuUD$! zu~y(X4gUHPf7)k5VT}^PoI7cs6@_u@pfCc%<{CIo>N+sWpH>Wl1%yIkb@J@-1GA|u zN^%&!#zR}0Lb|n4{?uYaL5-$BEMc%85P4k$Y;5l0i<6v%FRRywDjX*Jbt+Q0w52Yq z1xg9Y?@LC&8j0N84YCoSol6DhUm!k6b-(cIQ8=OcvrD}OvMH=9aSQWvE-mLr+FvrYPhYu=CxDChpxgPN zd!5@Z^;OP4mQOHy*FQ+fzqrhLJ#A=g^oV%i{P-?Y`OjNoWDjKkz9Ez`wrBQ3XW8ad z-_KjLj?dtr4C`j81T5$_>rw`@-o(JGQo)$_STlj48uM(W-KV&1HwCvuvmf36&g@-u z<6gGkWWQ%ciajoIxm1@_!8R|Xhbr@EteEi))fi_h{d($(uU?Q0?su`-4_pbTPRf8k zSi67dgcSMWHf-(7u1l)Krogh!9Ub6{6;Xz~0_+4rVvo6`w)1|q5*sy497I&TQ{nUyQP#--c1Pao{3Z|p&cu_1Eg z|LJAw{Ut{?3_RKw(cRh=>Bl+^_J$P73Kz1h!su05=X+_NX^uUJ3O32+((1C6OKQ9S zHjPEl1Mc?b2uy2Nx*rG55RS6LHPs85(Bs`_N~G`PQ_MsJYV6FF4#3Q56r|xM1=I=O z6fQMdX-|hy;Y~M4U%f29D6af`>7?1WwpAJ8#JTxtF+8_v4ioDZJ31Fn)T4BC2C(d0 zfqLH%b(?v95%PP=IuF3b3Q$GZfvYuNx5lrfu(O27=WA5t~r=_w37$ymNoY?~}%_Qb;DF8g+j#SGtbL+3I@lf3Epm<3}58()y!cCv1 z`_0MkF+^cwbT2o6OQ);016!3wA!3?9$VOG#q@jpg0THgkBj8$9QAmQ-J%CFMr+^Bb zc+bRuIe%V*_!^xCuis7l*fQuRhW9@X|zJ9LSCH{7B(@!=wOGN@}l7K z9zogCd0Oa4r7kdcqj|6}{Q*X95d0KDEdpFlHH>a(WbB=r2G6@ZDpJIxtdF_Fc*BZ$ z51H#KChs){Ylb+q_o4fL!jk(Ld%e|+iSr)f>edTY)!1{H>@_J#AV$Yr zuEhl0TI7{X&xqCjdfnatgyM)Z=p8&YRZ0vE{XYKs#c}Y;&JZiheNMl45hZ7} zku|kE4c+_ajL_Hvc7g>Bz~kmm4~aLX*k$U1;XI`O9&2xholG1V`H_5CuvSI-bk~gp zooLy7$Tr5lU;^4WB*=Vwb@=dgHB_Q&F|E>6P@?;u7tg|cz>~h1cOO{R1o^i7DDDSq z@#ansU0M#|ESLM9F;^WufPCZ#GgPW8C*~}s51HVY%@|SydVd~-gpiGvqRE971S9uF zGg_W{L~5;CbbL6D9vi5$W$!FtWEqQnO5N{>@NHkF&L8dqS>EE4zFSzi)65yIN>+om z%b-(L7b5zg+9Z~P+C8m1yvtFulg&2^VqfeUFELUz5c`V#9%tkGvN?{86wjLAQ%ACZ z22hI^{Qyr=4NuRqj$A!#XZl?Lo^@8tpg?A(6}8C~#EUD?ESF4(Ug|BvEasUjwC{lh zkb|bZ5Bw^vVa1MQ{)-c*2|);4^58n}>ebF(?kBl}+wX7ahG1rdn6bZ5!_ze)Ph~{t z)j@vVh+>JiP*ZzirEUiqD-_wQ8u_Pyi~)#g5hWptRo+4a7Gv2vuTB3@%J?+JRsB|i zgbPfRa?2=Glcs8dRKp_mz8~EhE&a?5TCWI@N1>*Z4;+8J0}(9u*`ZJq)$ppu?Ws)N zrNePVvG-g4Rt6s_=A38FJagBqPd^1`3uR{JQLk^Q6UdkybzM`}2mtb3YBgtT;-1R< z0?j12!XjISlr`xZ8abGYRx*A=cx%$0$`Iee(Wmjr$$87~Y|pOeo97>2P0d?V{8WYu z2SxHVNj_+@U2xR*y-%0Ddd%5+L&cfja9BGuK_^ibGq)m8LOf%=njxN*WQOd08exV{ zY~1he5vg5*22P{#y)FHeXMQD)g{;|3u+8GDIExt$%Z1=f$LChWLDNbbOi1r;?#-EY z$SE1Em(HbhpF<<^^$u^*QP-0W;xfG47VFdY;6c_)f!opq9YxjC3}aoy(o4wgShJh` zpN!YI(UN~yyZ;$8664I!Ekg~Obke!^_CwYvM%h}4Rh-QYhh34loXpLwSWHpRM-vV2q&-1*bX9lp zhu#QO5SNhW9-9k%5~^nDxtkZeM4_H~D|rf2{r2=GSMi;Y+LobVO}Z+CS?R~$%A;-_ z zhRlkYh4w`n&tFoNjvo|0w$D9OON7Klktcsiy{FrGdB0koWD&mZjCU9B3N*^h{GoGI zI#AXmrJje=i>a_dP=c2*_hjA=;hclwYC1jqF+}tO0)yr} zNAw2rq^q9aNgd;|c*LMYe>)ku#OhYxzI;7b0*qDRJ#F>tv9a%{rhXF$ZLyZKs;X0r zpaHaZ?Ae6FfI>mC&AWpq1qP@eL#kiak^8xK!|qAEdKN>m@8GECV~CM-2{bdU<(nl} z={O(Ok~>RB*F2PZRidF=j$R_fl&mzb>9aFYsB~OKu^go`-F4)p;LDRH?kcKcPAe66 zspRXxuVD|MIDQoeisxV(D=ycY`??)D227e!poYf@#pHq(tv#-4f$<2P^n8TvD@7-} zOW78}4I#(s86x)el%89zZ3c1J4Y?A%0#TL zNWa^__&IwtM5Tip=af0e4Jc2<*#S`ZP+BoOd;D}YT^@GvOTy9-Gw!2&wNX2p+z7D@>a#?Zjgw1?$A#TnkQgE%sYH$602t4wnB8$F}MfT9M zyv_6U)H2`)f`0H*4{Z!m&4@j_t7kA8sW#5kXr*7jL%`rHo05Nx?PpFD; z)%W|6Vq?f4h0ZRW!v+OM`@k9ZpJLjKvW_JiC}V4R>zV}u23Ohf{3JGa#tn&CcbK>b z<73y715^gXqv#COoxiU_GloxAYZPaTJew9y+?Vf%%(|1joK5`OSS<9kH5cGqerPNJ zsjmwvY%*sm#jwJdXz##2MB54yAs-IAQ@y_W$3|-Z-r0GW_F#J`&lk!Wh*f9JRV$q^ z%5U7v(PG10f>*K_u6#49-P+XZfW$(kj3BwCO9XWO*7?5I)S@Ye+jcC5&y59wj?Nd| zCk{6@U$sNXNTzHdTD|ET5gEX(ZWwey$Y{7$hNfQx zk?u>)XtUi8jEwYKw`{$_J|>m^5c+6EYJaL6j0(x-gHk4C=jLpZ5bvI=^)TwnHs*^I$ilK-uRGU&40X&pK zVt8F*cu;@aq-1uQiz;_*RDS?-dx`XZ?%dZ{NDb-AQ%RGrZn=5(*VMW=ovIMIgguPT zvS0JHIIO^CM>L?n%X$w@q=&0j`nwr82YlYa&lo;feHt+}sAc2)F=9$KYbZM-TrF$Z zYNb&Sn14zIXZgTKIU}(i?IJ;GQgS>ULse560)PF$C;j2@OR6%KKmo#8zpKlel%Hs0 z@uo_t+DmQsdcxyDMnYE~DX4ZKlJO+`MXN*duN?fAb3SpVJLN5G>d+5)Z7-h!qCItq zdRkS0!R9g%%(GUX?rJ?n2Tkdd4NPv;q*qO>;&>`WR&0tOq}MV-TSMNVe?OZSzyJK3 zoO#HCeGKWt<7#|$J5kh_55w-))jfL5Rl&1Y!m)4L%iK>YYDT$OMnCnEa6$g4KbB?! z3ctl{yopV@wJb}6p98ypB*paXE*qCfy5f6D(&J}iBWcs;1-b#B>}S}9*<-_Vx4bh4 zp0QbOi!SGX71^%*GWI;c!6GEfK4wemSn4wB9|Rb+WUKlBS$A;lJzpIW%Min<@9kld z?1WqD?A2MJ0phj4ZK5T;0@zQhpunc2lk@l9FkN@yJnr1^?Y+&hkd1GHRHDz}iv>iJ zgA6T*-34kerflx4{dg&{Mtv6Z#16_lN#An9$huBlIx#rmmfF40Qzp3gLbvosy#PB= zS<271vtrqLmfckh`P%1oi-lNF-!dEIy8lQCuxKjyo3&NMMBQFy3qQbLjCo*T;rA~Mlc zb1f65DFcb+g3A>(ti>XOpTIHrW0nr5Gu`SwmnLkou=97>@1=?wmSU0lOZ3Mj3Xhs) zYJ9uoHDW?zo(Vfx^a=%-Q#0>SJ8VebRX3Z={xe}pxAF=}X&)0Kek>@Z{?Du@F0S%l z_bIAU;rgJr&|zdvr!Gwrmp~QH<}zpBUb^uuVcsiK{Y81ltGXVt%mCWkT;O;eK@SD# zOxAOKfK7ID%UiY?S)Z!?%gSMH<-_z?eP;ko@wH^2ta`qpW}h=B1Wb)As&;#CZp?d2 z0+(>6I%t5H(`HMr!iVR$pYD&}qGkFV!x27g^fG>T!!aNX5%%VfICzy|tk0)6Hw zb~{kH(}ZTg?$3I0GEKWvwIfHz0{Ju0zdM`yOIXid5&nGR{ta9NMFhnbI^?J#V7qR5 z%v&RR;E(rne7Py-i7C6$G=)ywxkC5Q&v_kPOtJRbU{#CL^uE((?b<-TC5}qjDeryn zImeZ+x&fUt?}x`~I*UYNLU96$w(BPUBuQ?5rWZzJC1u%aeloD#_H950QWYnw-!ClU_Z(Eb%_b1oQ2iqp@rt+IW%= zOB1wIu7pHZs82cWG8wm3+PS#E_;RI8LxV8!lZN%sE_G^zZAX64r#ig7{{wf6}>aK|ZK;PBXS9gWB{4pA@`<}-ykb?7A& z)1R8boR-VhM6uUKRHO#_$!Kz(LG)7{Cy=F_LxKkxjuT&WM;!*Z3J~|wvvT14W7>}u z@2w+?In$Ng!6Rrv$*B}91Km?6|CsuXDK;IKrWh8mF(cHKZbIvrS;*aHIm2QZ$-Mt~ilZmS&;5 zqb$9z=j$*5q5_y-|3>2!$Lqu9VJttw(uP5wYC*+QZbe3SMbwIga!?}!*v)M$+Ut%t z4ai}ksP`B3>71z3auJx$yYb%TbGAi`zIfq+qZK$y;J>CR!xhD)n9ZC}tHyGp-B~P{y)iI*lxwg%2Vq6_bvt34%ibMY2^Jaql3@9@!15~;XiVQ1aezuE z@It`fFO>AY!<;aKjRU#`uL~-VHnmuwv7>i7H0x>~+Ah}L!X5j+r8Djk_M#E5!-$p~ zA1q`7M$_TKI};=cR4>_VF)_z46KikB)avh~O&Afz4fJIvL^Rr^GEo`hz-FZGoTEmG z@l$vUD?1W4qrz#PVTrr);8O7Bph2`vIB@P-Pz18dj;puJUj%nm@&d0eceT;qm>$fV zHV*e2EO;7C-xuCl1OtY%nPFmp5)&5=`{iU`(CQ8QkP!!ab62EFn)V*^&T6& zCN=$o=l{kGaM%``1OB@cw2g;BNWYEkMGrG2k&|jHWhJT_{I988*>*&PD9vY`3}my7 z?MzP=1qgzdHze-zMiiT;zZ1cZ`kh#@InhKVYZimsEUU0RK!EIeYFw`(8%>UqJ~2;k zA7_lkP11Yimp8(HGB9`F^RD(zNRM5|z0q~|5}dM+f$SsDgVMCJ#B0?db;2Qs^{3bowMohgFMUB#5p+*d zGnaKAs|VJa&@|LdL!({ftKyyzc;*8tDxF3!4)wbG z_TWkZkx5Ian#LgbzoUgcfnRpo4II-?R5=z{O z3$s}ogb2}dbq6&qh;Zf^In`(rm(ahJ#$Wi428F5jvO&|K9pB^4Wqwe8= zSJP-#m=fK6##67SZjI(iu_mtwHw z+;O6=rHBl82=eW@qe>5iAqkfr5oIPMq_W-E_FA#l-caG?xOMy6>0#96J3WmlhbiyO zgIWXp|IYl}nkgsu8LEnV_<)`H;|tSF4}b7AoD`tB?f7VaFDzp&PVA6;So&;dZf6&> zbFg!bd?&}%b-V3ve^eOpUxerBUU5|G3GlyfE7jc;9KL!r8IkZ!cm@1Z*C}Fh1^0Ld zwuF1UabNZFQAbSbKW}s;q=<>(eljffknI1y)Re$1E^dPR>hK_ru=pw-o(nM^9x3<+ z)W1J3uQ>7I;rTiXczXFc+j{c*x;Sg9T)9ep?cZ;X`5HAUY(#zne@_dVgVz7~yu9*2 zAfa0P7A|tXIMY>5!Gtw&X57 Date: Sun, 24 Nov 2024 18:37:52 +0100 Subject: [PATCH 31/35] Update data summary --- scripts/inner_ear/analysis/common.py | 10 ++++++---- scripts/summarize_data.py | 25 ++++++++++++++++--------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/scripts/inner_ear/analysis/common.py b/scripts/inner_ear/analysis/common.py index c3622d5..0d4a46a 100644 --- a/scripts/inner_ear/analysis/common.py +++ b/scripts/inner_ear/analysis/common.py @@ -1,4 +1,4 @@ -import os +# import os import sys import pandas as pd @@ -46,11 +46,13 @@ def get_measurements_with_annotation(): def get_all_measurements(): - data_root = get_data_root() - val_table = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx") + # data_root = get_data_root() + # val_table = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx") + + val_table = "/home/pape/Desktop/sfb1286/mboc_synapse/misc/Validierungs-Tabelle-v3-passt.xlsx" val_table = pd.read_excel(val_table) - val_table = val_table[val_table["Kommentar 27-10-24"] == "passt"] + val_table = val_table[val_table["Kommentar 22.11.24"] == "passt"] n_tomos = len(val_table) print("All tomograms:", n_tomos) assert n_tomos > 0 diff --git a/scripts/summarize_data.py b/scripts/summarize_data.py index 7615641..6642db1 100644 --- a/scripts/summarize_data.py +++ b/scripts/summarize_data.py @@ -65,31 +65,38 @@ def summarize_vesicle_train_data(): for condition in conditions: ctab = vesicle_train[vesicle_train.condition == condition] n_tomos = len(ctab) - n_vesicles = ctab["vesicle_count"].sum() + n_vesicles_all = ctab["vesicle_count_all"].sum() + n_vesicles_imod = ctab["vesicle_count_imod"].sum() print(condition) print("Tomograms:", n_tomos) - print("Vesicles:", n_vesicles) + print("All-Vesicles:", n_vesicles_all) + print("Vesicles-From-Manual:", n_vesicles_imod) print() condition_summary["Condition"].append(condition) condition_summary["Tomograms"].append(n_tomos) - condition_summary["Vesicles"].append(n_vesicles) + condition_summary["Vesicles"].append(n_vesicles_all) condition_summary = pd.DataFrame(condition_summary) + print() + print() print("Total:") print("Tomograms:", len(vesicle_train)) - print("Vesicles:", vesicle_train["vesicle_count"].sum()) + print("All-Vesicles:", vesicle_train["vesicle_count_all"].sum()) + print("Vesicles-From-Manual:", vesicle_train["vesicle_count_imod"].sum()) print() train_tomos = vesicle_train[vesicle_train.used_for == "train/val"] print("Training:") print("Tomograms:", len(train_tomos)) - print("Vesicles:", train_tomos["vesicle_count"].sum()) + print("All-Vesicles:", train_tomos["vesicle_count_all"].sum()) + print("Vesicles-From-Manual:", train_tomos["vesicle_count_imod"].sum()) print() test_tomos = vesicle_train[vesicle_train.used_for == "test"] print("Test:") print("Tomograms:", len(test_tomos)) - print("Vesicles:", test_tomos["vesicle_count"].sum()) + print("All-Vesicles:", test_tomos["vesicle_count_all"].sum()) + print("Vesicles-From-Manual:", test_tomos["vesicle_count_imod"].sum()) pie_chart(condition_summary, "Tomograms", "Tomograms per Condition") pie_chart(condition_summary, "Vesicles", "Vesicles per Condition") @@ -148,7 +155,7 @@ def summarize_compartment_train(): # training_resolutions() -# summarize_vesicle_train_data() +summarize_vesicle_train_data() # summarize_vesicle_da() -summarize_az_train() -summarize_compartment_train() +# summarize_az_train() +# summarize_compartment_train() From 59a38dbd1bff4aaa14298cc09a45f53a7618a2c9 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 24 Nov 2024 22:27:24 +0100 Subject: [PATCH 32/35] Update all measurements for the inner ear analysis --- .../inner_ear/analysis/analyze_distances.py | 25 +++--- .../analysis/analyze_vesicle_diameters.py | 48 ++++++++--- .../analysis/analyze_vesicle_pools.py | 31 ++++--- scripts/inner_ear/analysis/common.py | 81 ++++++++++--------- scripts/summarize_data.py | 30 +++++-- 5 files changed, 131 insertions(+), 84 deletions(-) diff --git a/scripts/inner_ear/analysis/analyze_distances.py b/scripts/inner_ear/analysis/analyze_distances.py index 473d6b8..c98de9c 100644 --- a/scripts/inner_ear/analysis/analyze_distances.py +++ b/scripts/inner_ear/analysis/analyze_distances.py @@ -32,7 +32,6 @@ def _plot_all(distances): plt.show() -# TODO rename the method names. # We only care about the following distances: # - MP-V -> PD, AZ (Boundary) # - Docked-V -> PD, AZ @@ -98,7 +97,7 @@ def _plot(pool_name, distance_col, structure_name, ax): def for_tomos_with_annotation(plot_all=True): - manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation() + manual_assignments, semi_automatic_assignments, proofread_assignments = get_measurements_with_annotation() manual_distances = manual_assignments[ ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] @@ -110,38 +109,38 @@ def for_tomos_with_annotation(plot_all=True): ] semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances) - automatic_distances = automatic_assignments[ + proofread_distances = proofread_assignments[ ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] ] - automatic_distances["approach"] = ["automatic"] * len(automatic_distances) + proofread_distances["approach"] = ["proofread"] * len(proofread_distances) - distances = pd.concat([manual_distances, semi_automatic_distances, automatic_distances]) + distances = pd.concat([manual_distances, semi_automatic_distances, proofread_distances]) if plot_all: - distances.to_excel("./results/distances_with_manual_annotations.xlsx", index=False) + distances.to_excel("./results/distances_tomos_with_manual_annotations.xlsx", index=False) _plot_all(distances) else: - _plot_selected(distances, save_path="./results/selected_distances_manual_annotations.xlsx") + _plot_selected(distances, save_path="./results/selected_distances_tomos_with_manual_annotations.xlsx") def for_all_tomos(plot_all=True): - semi_automatic_assignments, automatic_assignments = get_all_measurements() + semi_automatic_assignments, proofread_assignments = get_all_measurements() semi_automatic_distances = semi_automatic_assignments[ ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] ] semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances) - automatic_distances = automatic_assignments[ + proofread_distances = proofread_assignments[ ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"] ] - automatic_distances["approach"] = ["automatic"] * len(automatic_distances) + proofread_distances["approach"] = ["proofread"] * len(proofread_distances) - distances = pd.concat([semi_automatic_distances, automatic_distances]) + distances = pd.concat([semi_automatic_distances, proofread_distances]) if plot_all: - distances.to_excel("./results/distances_all_tomograms.xlsx", index=False) + distances.to_excel("./results/distances_all_tomos.xlsx", index=False) _plot_all(distances) else: - _plot_selected(distances, save_path="./results/selected_distances_all_tomograms.xlsx") + _plot_selected(distances, save_path="./results/selected_distances_all_tomos.xlsx") def main(): diff --git a/scripts/inner_ear/analysis/analyze_vesicle_diameters.py b/scripts/inner_ear/analysis/analyze_vesicle_diameters.py index 8fa5d9e..439fa0f 100644 --- a/scripts/inner_ear/analysis/analyze_vesicle_diameters.py +++ b/scripts/inner_ear/analysis/analyze_vesicle_diameters.py @@ -10,10 +10,12 @@ from synaptic_reconstruction.imod.export import load_points_from_imodinfo from synaptic_reconstruction.file_utils import get_data_path +from common import get_finished_tomos + sys.path.append("../processing") -def aggregate_radii(data_root, table, save_path, get_tab): +def aggregate_radii(data_root, table, save_path, get_tab, include_names): if os.path.exists(save_path): return @@ -24,6 +26,14 @@ def aggregate_radii(data_root, table, save_path, get_tab): continue tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse")) + if ( + tomo_name in ("WT strong stim/Mouse 1/modiolar/1", "WT strong stim/Mouse 1/modiolar/2") and + (row["EM alt vs. Neu"] == "neu") + ): + continue + if tomo_name not in include_names: + continue + tab_path = get_tab(folder) if tab_path is None: continue @@ -38,7 +48,7 @@ def aggregate_radii(data_root, table, save_path, get_tab): radius_table.to_excel(save_path, index=False) -def aggregate_radii_imod(data_root, table, save_path): +def aggregate_radii_imod(data_root, table, save_path, include_names): if os.path.exists(save_path): return @@ -49,6 +59,15 @@ def aggregate_radii_imod(data_root, table, save_path): continue tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse")) + tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse")) + if ( + tomo_name in ("WT strong stim/Mouse 1/modiolar/1", "WT strong stim/Mouse 1/modiolar/2") and + (row["EM alt vs. Neu"] == "neu") + ): + continue + if tomo_name not in include_names: + continue + annotation_folder = os.path.join(folder, "manuell") if not os.path.exists(annotation_folder): annotation_folder = os.path.join(folder, "Manuell") @@ -84,7 +103,7 @@ def aggregate_radii_imod(data_root, table, save_path): radius_table.to_excel(save_path, index=False) -def get_tab_automatic(folder): +def get_tab_semi_automatic(folder): tab_name = "measurements_uncorrected_assignments.xlsx" res_path = os.path.join(folder, "korrektur", tab_name) if not os.path.exists(res_path): @@ -94,7 +113,7 @@ def get_tab_automatic(folder): return res_path -def get_tab_semi_automatic(folder): +def get_tab_proofread(folder): tab_name = "measurements.xlsx" res_path = os.path.join(folder, "korrektur", tab_name) if not os.path.exists(res_path): @@ -121,11 +140,22 @@ def main(): table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx") table = parse_table(table_path, data_root) - # TODO get the radii from imod - aggregate_radii(data_root, table, save_path="./results/vesicle_radii_automatic.xlsx", get_tab=get_tab_automatic) - aggregate_radii(data_root, table, save_path="./results/vesicle_radii_semi_automatic.xlsx", get_tab=get_tab_semi_automatic) # noqa - aggregate_radii(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual) - aggregate_radii_imod(data_root, table, save_path="./results/vesicle_radii_imod.xlsx") + all_tomos = get_finished_tomos() + aggregate_radii( + data_root, table, save_path="./results/vesicle_radii_semi_automatic.xlsx", get_tab=get_tab_semi_automatic, + include_names=all_tomos + ) + + aggregate_radii( + data_root, table, save_path="./results/vesicle_radii_proofread.xlsx", get_tab=get_tab_proofread, + include_names=all_tomos + ) + + # aggregate_radii(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual) + aggregate_radii_imod( + data_root, table, save_path="./results/vesicle_radii_manual.xlsx", + include_names=all_tomos + ) if __name__ == "__main__": diff --git a/scripts/inner_ear/analysis/analyze_vesicle_pools.py b/scripts/inner_ear/analysis/analyze_vesicle_pools.py index 7b67c99..f27a5c2 100644 --- a/scripts/inner_ear/analysis/analyze_vesicle_pools.py +++ b/scripts/inner_ear/analysis/analyze_vesicle_pools.py @@ -35,68 +35,67 @@ def plot_pools(data, errors): def for_tomos_with_annotation(): - manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation() + manual_assignments, semi_automatic_assignments, proofread_assignments = get_measurements_with_annotation() manual_counts = manual_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) semi_automatic_counts = semi_automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) - automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) + proofread_counts = proofread_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) manual_stats = manual_counts.agg(["mean", "std"]).transpose().reset_index() semi_automatic_stats = semi_automatic_counts.agg(["mean", "std"]).transpose().reset_index() - automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index() + proofread_stats = proofread_counts.agg(["mean", "std"]).transpose().reset_index() data = pd.DataFrame({ "Pool": manual_stats["pool"], "Semi-automatic": semi_automatic_stats["mean"], - "Automatic": automatic_stats["mean"], + "Proofread": proofread_stats["mean"], "Manual": manual_stats["mean"], }) errors = pd.DataFrame({ "Pool": manual_stats["pool"], "Semi-automatic": semi_automatic_stats["std"], - "Automatic": automatic_stats["std"], + "Proofread": proofread_stats["std"], "Manual": manual_stats["std"], }) plot_pools(data, errors) - output_path = "./results/vesicle_pools_with_manual_annotations.xlsx" + output_path = "./results/vesicle_pools_tomos_with_manual_annotations.xlsx" data.to_excel(output_path, index=False, sheet_name="Average") with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer: errors.to_excel(writer, sheet_name="StandardDeviation", index=False) def for_all_tomos(): - semi_automatic_assignments, automatic_assignments = get_all_measurements() + semi_automatic_assignments, proofread_assignments = get_all_measurements() - automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) - automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index() + proofread_counts = proofread_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) + proofread_stats = proofread_counts.agg(["mean", "std"]).transpose().reset_index() semi_automatic_counts = semi_automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0) semi_automatic_stats = semi_automatic_counts.agg(["mean", "std"]).transpose().reset_index() data = pd.DataFrame({ - "Pool": automatic_stats["pool"], + "Pool": proofread_stats["pool"], "Semi-automatic": semi_automatic_stats["mean"], - "Automatic": automatic_stats["mean"], + "Proofread": proofread_stats["mean"], }) errors = pd.DataFrame({ - "Pool": automatic_stats["pool"], + "Pool": proofread_stats["pool"], "Semi-automatic": semi_automatic_stats["std"], - "Automatic": automatic_stats["std"], + "Proofread": proofread_stats["std"], }) plot_pools(data, errors) - output_path = "./results/vesicle_pools_all_tomograms.xlsx" + output_path = "./results/vesicle_pools_all_tomos.xlsx" data.to_excel(output_path, index=False, sheet_name="Average") with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer: errors.to_excel(writer, sheet_name="StandardDeviation", index=False) -# TODO: export the ribbon and pd stats (first need to discuss this with Fid) def main(): - # for_tomos_with_annotation() + for_tomos_with_annotation() for_all_tomos() diff --git a/scripts/inner_ear/analysis/common.py b/scripts/inner_ear/analysis/common.py index 0d4a46a..772cd31 100644 --- a/scripts/inner_ear/analysis/common.py +++ b/scripts/inner_ear/analysis/common.py @@ -1,6 +1,7 @@ # import os import sys +import numpy as np import pandas as pd sys.path.append("../processing") @@ -8,20 +9,43 @@ from parse_table import get_data_root # noqa +def get_finished_tomos(): + # data_root = get_data_root() + # val_table = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx") + + val_table = "/home/pape/Desktop/sfb1286/mboc_synapse/misc/Validierungs-Tabelle-v3-passt.xlsx" + val_table = pd.read_excel(val_table) + + val_table = val_table[val_table["Kommentar 22.11.24"] == "passt"] + n_tomos = len(val_table) + assert n_tomos > 0 + + tomo_names = [] + for _, row in val_table.iterrows(): + name = "/".join([ + row.Bedingung, f"Mouse {int(row.Maus)}", + row["Ribbon-Orientierung"].lower().rstrip("?"), + str(int(row["OwnCloud-Unterordner"]))] + ) + tomo_names.append(name) + + return tomo_names + + def get_manual_assignments(): - result_path = "../results/20240917_1/fully_manual_analysis_results.xlsx" + result_path = "../results/20241124_1/fully_manual_analysis_results.xlsx" results = pd.read_excel(result_path) return results -def get_semi_automatic_assignments(tomograms): - result_path = "../results/20240917_1/automatic_analysis_results.xlsx" +def get_proofread_assignments(tomograms): + result_path = "../results/20241124_1/automatic_analysis_results.xlsx" results = pd.read_excel(result_path) results = results[results["tomogram"].isin(tomograms)] return results -def get_automatic_assignments(tomograms): +def get_semi_automatic_assignments(tomograms): result_path = "../results/fully_automatic_analysis_results.xlsx" results = pd.read_excel(result_path) results = results[results["tomogram"].isin(tomograms)] @@ -30,48 +54,29 @@ def get_automatic_assignments(tomograms): def get_measurements_with_annotation(): manual_assignments = get_manual_assignments() - manual_tomograms = pd.unique(manual_assignments["tomogram"]) - semi_automatic_assignments = get_semi_automatic_assignments(manual_tomograms) - tomograms = pd.unique(semi_automatic_assignments["tomogram"]) - manual_assignments = manual_assignments[manual_assignments["tomogram"].isin(tomograms)] - assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(semi_automatic_assignments["tomogram"])) + # Get the tomos with manual annotations and the ones which are fully done in proofreading. + manual_tomos = pd.unique(manual_assignments["tomogram"]) + finished_tomos = get_finished_tomos() + # Intersect them to get the tomos we are using. + tomos = np.intersect1d(manual_tomos, finished_tomos) - automatic_assignments = get_automatic_assignments(tomograms) - filtered_tomograms = pd.unique(manual_assignments["tomogram"]) - assert len(filtered_tomograms) == len(pd.unique(automatic_assignments["tomogram"])) + manual_assignments = manual_assignments[manual_assignments["tomogram"].isin(tomos)] + semi_automatic_assignments = get_semi_automatic_assignments(tomos) + proofread_assignments = get_proofread_assignments(tomos) - print("Tomograms with manual annotations:", len(filtered_tomograms)) - return manual_assignments, semi_automatic_assignments, automatic_assignments + print("Tomograms with manual annotations:", len(tomos)) + return manual_assignments, semi_automatic_assignments, proofread_assignments def get_all_measurements(): - # data_root = get_data_root() - # val_table = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx") - - val_table = "/home/pape/Desktop/sfb1286/mboc_synapse/misc/Validierungs-Tabelle-v3-passt.xlsx" - val_table = pd.read_excel(val_table) - - val_table = val_table[val_table["Kommentar 22.11.24"] == "passt"] - n_tomos = len(val_table) - print("All tomograms:", n_tomos) - assert n_tomos > 0 - tomo_names = [] - for _, row in val_table.iterrows(): - name = "/".join([ - row.Bedingung, f"Mouse {int(row.Maus)}", - row["Ribbon-Orientierung"].lower().rstrip("?"), - str(int(row["OwnCloud-Unterordner"]))] - ) - tomo_names.append(name) - - semi_automatic_assignments = get_semi_automatic_assignments(tomo_names) - filtered_tomo_names = pd.unique(semi_automatic_assignments["tomogram"]).tolist() + tomos = get_finished_tomos() + print("All tomograms:", len(tomos)) - automatic_assignments = get_automatic_assignments(tomo_names) - assert len(filtered_tomo_names) == len(pd.unique(automatic_assignments["tomogram"])) + semi_automatic_assignments = get_semi_automatic_assignments(tomos) + proofread_assignments = get_proofread_assignments(tomos) - return semi_automatic_assignments, automatic_assignments + return semi_automatic_assignments, proofread_assignments def main(): diff --git a/scripts/summarize_data.py b/scripts/summarize_data.py index 6642db1..66fe321 100644 --- a/scripts/summarize_data.py +++ b/scripts/summarize_data.py @@ -9,13 +9,6 @@ vesicle_train = pd.read_excel("data_summary/vesicle_training_data.xlsx") vesicle_da = pd.read_excel("data_summary/vesicle_domain_adaptation_data.xlsx", sheet_name="cryo") -# Inner ear trainign data: -# Sophia: 92 -# Rat: 19 -# Tether: 3 -# Ves Pools: 6 -# Total = 120 - def training_resolutions(): res_az = np.round(az_train["resolution"].mean(), 2) @@ -154,8 +147,29 @@ def summarize_compartment_train(): print("Compartments:", test_tomos["compartment_count"].sum()) +def summarize_inner_ear_data(): + # NOTE: this is not all trainig data, but the data on which we run the analysis + # New tomograms from Sophia. + n_tomos_sophia_tot = 87 + n_tomos_sophia_manual = 33 # noqa + # This is the training data + n_tomos_sohphia_train = "" # TODO # noqa + + # Published tomograms + n_tomos_rat = 19 + n_tomos_tether = 3 + n_tomos_ves_pool = 6 + + # 28 + print("Total published:", n_tomos_rat + n_tomos_tether + n_tomos_ves_pool) + # 115 + print("Total:", n_tomos_rat + n_tomos_tether + n_tomos_ves_pool + n_tomos_sophia_tot) + + # training_resolutions() -summarize_vesicle_train_data() +# summarize_vesicle_train_data() # summarize_vesicle_da() # summarize_az_train() # summarize_compartment_train() +# summarize_inner_ear_data() +summarize_inner_ear_data() From 97289510790ad1c14192a1d6610cadcc703b8c05 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 24 Nov 2024 23:15:12 +0100 Subject: [PATCH 33/35] Update vesicle diameter analysis --- .../analysis/analyze_vesicle_diameters.py | 60 +++++--- .../analysis/analyze_vesicle_radii.py | 132 ------------------ 2 files changed, 38 insertions(+), 154 deletions(-) delete mode 100644 scripts/inner_ear/analysis/analyze_vesicle_radii.py diff --git a/scripts/inner_ear/analysis/analyze_vesicle_diameters.py b/scripts/inner_ear/analysis/analyze_vesicle_diameters.py index 439fa0f..1f0b3a0 100644 --- a/scripts/inner_ear/analysis/analyze_vesicle_diameters.py +++ b/scripts/inner_ear/analysis/analyze_vesicle_diameters.py @@ -15,10 +15,7 @@ sys.path.append("../processing") -def aggregate_radii(data_root, table, save_path, get_tab, include_names): - if os.path.exists(save_path): - return - +def aggregate_diameters(data_root, table, save_path, get_tab, include_names, sheet_name): radius_table = [] for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"): folder = row["Local Path"] @@ -41,17 +38,20 @@ def aggregate_radii(data_root, table, save_path, get_tab, include_names): tab = pd.read_excel(tab_path) this_tab = tab[["pool", "radius [nm]"]] this_tab.insert(0, "tomogram", [tomo_name] * len(this_tab)) + this_tab.insert(3, "diameter [nm]", this_tab["radius [nm]"] * 2) radius_table.append(this_tab) radius_table = pd.concat(radius_table) - print("Saving table for", len(radius_table), "vesicles to", save_path) - radius_table.to_excel(save_path, index=False) - -def aggregate_radii_imod(data_root, table, save_path, include_names): + print("Saving table for", len(radius_table), "vesicles to", save_path, sheet_name) if os.path.exists(save_path): - return + with pd.ExcelWriter(save_path, engine="openpyxl", mode="a") as writer: + radius_table.to_excel(writer, sheet_name=sheet_name, index=False) + else: + radius_table.to_excel(save_path, sheet_name=sheet_name, index=False) + +def aggregate_diameters_imod(data_root, table, save_path, include_names, sheet_name): radius_table = [] for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"): folder = row["Local Path"] @@ -95,12 +95,16 @@ def aggregate_radii_imod(data_root, table, save_path, include_names): "tomogram": [tomo_name] * len(radii), "pool": [label_names[label_id] for label_id in labels], "radius [nm]": radii, + "diameter [nm]": 2 * radii, }) radius_table.append(this_tab) radius_table = pd.concat(radius_table) - print("Saving table for", len(radius_table), "vesicles to", save_path) - radius_table.to_excel(save_path, index=False) + print("Saving table for", len(radius_table), "vesicles to", save_path, sheet_name) + radius_table.to_excel(save_path, index=False, sheet_name=sheet_name) + + man_tomos = pd.unique(radius_table.tomogram) + return man_tomos def get_tab_semi_automatic(folder): @@ -141,20 +145,32 @@ def main(): table = parse_table(table_path, data_root) all_tomos = get_finished_tomos() - aggregate_radii( - data_root, table, save_path="./results/vesicle_radii_semi_automatic.xlsx", get_tab=get_tab_semi_automatic, - include_names=all_tomos - ) - aggregate_radii( - data_root, table, save_path="./results/vesicle_radii_proofread.xlsx", get_tab=get_tab_proofread, - include_names=all_tomos + print("All tomograms") + save_path = "./results/vesicle_diameters_all_tomos.xlsx" + aggregate_diameters( + data_root, table, save_path=save_path, get_tab=get_tab_semi_automatic, include_names=all_tomos, + sheet_name="Semi-automatic", + ) + aggregate_diameters( + data_root, table, save_path=save_path, get_tab=get_tab_proofread, include_names=all_tomos, + sheet_name="Proofread", ) - # aggregate_radii(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual) - aggregate_radii_imod( - data_root, table, save_path="./results/vesicle_radii_manual.xlsx", - include_names=all_tomos + print() + print("Tomograms with manual annotations") + # aggregate_diameters(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual) + save_path = "./results/vesicle_diameters_tomos_with_manual_annotations.xlsx" + man_tomos = aggregate_diameters_imod( + data_root, table, save_path=save_path, include_names=all_tomos, sheet_name="Manual", + ) + aggregate_diameters( + data_root, table, save_path=save_path, get_tab=get_tab_semi_automatic, include_names=man_tomos, + sheet_name="Semi-automatic", + ) + aggregate_diameters( + data_root, table, save_path=save_path, get_tab=get_tab_proofread, include_names=man_tomos, + sheet_name="Proofread", ) diff --git a/scripts/inner_ear/analysis/analyze_vesicle_radii.py b/scripts/inner_ear/analysis/analyze_vesicle_radii.py deleted file mode 100644 index 8fa5d9e..0000000 --- a/scripts/inner_ear/analysis/analyze_vesicle_radii.py +++ /dev/null @@ -1,132 +0,0 @@ -import os -import sys - -from glob import glob - -import mrcfile -import pandas as pd -from tqdm import tqdm - -from synaptic_reconstruction.imod.export import load_points_from_imodinfo -from synaptic_reconstruction.file_utils import get_data_path - -sys.path.append("../processing") - - -def aggregate_radii(data_root, table, save_path, get_tab): - if os.path.exists(save_path): - return - - radius_table = [] - for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"): - folder = row["Local Path"] - if folder == "": - continue - - tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse")) - tab_path = get_tab(folder) - if tab_path is None: - continue - - tab = pd.read_excel(tab_path) - this_tab = tab[["pool", "radius [nm]"]] - this_tab.insert(0, "tomogram", [tomo_name] * len(this_tab)) - radius_table.append(this_tab) - - radius_table = pd.concat(radius_table) - print("Saving table for", len(radius_table), "vesicles to", save_path) - radius_table.to_excel(save_path, index=False) - - -def aggregate_radii_imod(data_root, table, save_path): - if os.path.exists(save_path): - return - - radius_table = [] - for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"): - folder = row["Local Path"] - if folder == "": - continue - - tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse")) - annotation_folder = os.path.join(folder, "manuell") - if not os.path.exists(annotation_folder): - annotation_folder = os.path.join(folder, "Manuell") - if not os.path.exists(annotation_folder): - continue - - annotations = glob(os.path.join(annotation_folder, "*.mod")) - annotation_file = [ann for ann in annotations if ("vesikel" in ann.lower()) or ("vesicle" in ann.lower())] - if len(annotation_file) != 1: - continue - annotation_file = annotation_file[0] - - tomo_file = get_data_path(folder) - with mrcfile.open(tomo_file) as f: - shape = f.data.shape - resolution = list(f.voxel_size.item()) - resolution = [res / 10 for res in resolution][0] - - try: - _, radii, labels, label_names = load_points_from_imodinfo(annotation_file, shape, resolution=resolution) - except AssertionError: - continue - - this_tab = pd.DataFrame({ - "tomogram": [tomo_name] * len(radii), - "pool": [label_names[label_id] for label_id in labels], - "radius [nm]": radii, - }) - radius_table.append(this_tab) - - radius_table = pd.concat(radius_table) - print("Saving table for", len(radius_table), "vesicles to", save_path) - radius_table.to_excel(save_path, index=False) - - -def get_tab_automatic(folder): - tab_name = "measurements_uncorrected_assignments.xlsx" - res_path = os.path.join(folder, "korrektur", tab_name) - if not os.path.exists(res_path): - res_path = os.path.join(folder, "Korrektur", tab_name) - if not os.path.exists(res_path): - res_path = None - return res_path - - -def get_tab_semi_automatic(folder): - tab_name = "measurements.xlsx" - res_path = os.path.join(folder, "korrektur", tab_name) - if not os.path.exists(res_path): - res_path = os.path.join(folder, "Korrektur", tab_name) - if not os.path.exists(res_path): - res_path = None - return res_path - - -def get_tab_manual(folder): - tab_name = "measurements.xlsx" - res_path = os.path.join(folder, "manuell", tab_name) - if not os.path.exists(res_path): - res_path = os.path.join(folder, "Manuell", tab_name) - if not os.path.exists(res_path): - res_path = None - return res_path - - -def main(): - from parse_table import parse_table, get_data_root - - data_root = get_data_root() - table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx") - table = parse_table(table_path, data_root) - - # TODO get the radii from imod - aggregate_radii(data_root, table, save_path="./results/vesicle_radii_automatic.xlsx", get_tab=get_tab_automatic) - aggregate_radii(data_root, table, save_path="./results/vesicle_radii_semi_automatic.xlsx", get_tab=get_tab_semi_automatic) # noqa - aggregate_radii(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual) - aggregate_radii_imod(data_root, table, save_path="./results/vesicle_radii_imod.xlsx") - - -if __name__ == "__main__": - main() From 622da1e618953d385e6a93285e7e167a049bddc5 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Wed, 27 Nov 2024 12:00:34 +0100 Subject: [PATCH 34/35] update AZ evaluation --- scripts/cooper/training/evaluate_AZ.py | 63 ++++++++++--- scripts/cooper/training/postprocess_AZ.py | 107 ++++++++++++++++++++++ 2 files changed, 158 insertions(+), 12 deletions(-) create mode 100644 scripts/cooper/training/postprocess_AZ.py diff --git a/scripts/cooper/training/evaluate_AZ.py b/scripts/cooper/training/evaluate_AZ.py index fc32214..dbf8d67 100644 --- a/scripts/cooper/training/evaluate_AZ.py +++ b/scripts/cooper/training/evaluate_AZ.py @@ -26,7 +26,37 @@ def evaluate(labels, segmentation): score = dice_score(segmentation, labels) return score -def evaluate_file(labels_path, segmentation_path, model_name, crop= False): +def compute_precision(ground_truth, segmentation): + """ + Computes the Precision score for 3D arrays representing the ground truth and segmentation. + + Parameters: + - ground_truth (np.ndarray): 3D binary array where 1 represents the ground truth region. + - segmentation (np.ndarray): 3D binary array where 1 represents the predicted segmentation region. + + Returns: + - precision (float): The precision score, or 0 if the segmentation is empty. + """ + assert ground_truth.shape == segmentation.shape + # Ensure inputs are binary arrays + ground_truth = (ground_truth > 0).astype(np.int32) + segmentation = (segmentation > 0).astype(np.int32) + + # Compute intersection: overlap between segmentation and ground truth + intersection = np.sum(segmentation * ground_truth) + + # Compute total predicted (segmentation region) + total_predicted = np.sum(segmentation) + + # Handle case where there are no predictions + if total_predicted == 0: + return 0.0 # Precision is undefined; returning 0 + + # Calculate precision + precision = intersection / total_predicted + return precision + +def evaluate_file(labels_path, segmentation_path, model_name, crop= False, precision_score=False): print(f"Evaluate labels {labels_path} and vesicles {segmentation_path}") ds_name = os.path.basename(os.path.dirname(labels_path)) @@ -34,22 +64,25 @@ def evaluate_file(labels_path, segmentation_path, model_name, crop= False): #get the labels and segmentation with h5py.File(labels_path) as label_file: - gt = label_file["/labels/AZ"][:] + gt = label_file["/labels/thin_az"][:] with h5py.File(segmentation_path) as seg_file: - segmentation = seg_file["/AZ/segment_from_AZmodel_v3"][:] + segmentation = seg_file["/AZ/thin_az"][:] if crop: print("cropping the annotation and segmentation") segmentation, gt = extract_gt_bounding_box(segmentation, gt) # Evaluate the match of ground truth and segmentation - dice_score = evaluate(gt, segmentation) + if precision_score: + precision = compute_precision(gt, segmentation) + else: + dice_score = evaluate(gt, segmentation) # Store results result_folder = "/user/muth9/u12095/synaptic-reconstruction/scripts/cooper/evaluation_results" os.makedirs(result_folder, exist_ok=True) - result_path = os.path.join(result_folder, f"evaluation_{model_name}.csv") + result_path = os.path.join(result_folder, f"evaluation_{model_name}_dice_thinpred_thinanno.csv") print("Evaluation results are saved to:", result_path) # Load existing results if the file exists @@ -59,9 +92,14 @@ def evaluate_file(labels_path, segmentation_path, model_name, crop= False): results = None # Create a new DataFrame for the current evaluation - res = pd.DataFrame( - [[ds_name, tomo, dice_score]], columns=["dataset", "tomogram", "dice_score"] - ) + if precision_score: + res = pd.DataFrame( + [[ds_name, tomo, precision]], columns=["dataset", "tomogram", "precision"] + ) + else: + res = pd.DataFrame( + [[ds_name, tomo, dice_score]], columns=["dataset", "tomogram", "dice_score"] + ) # Combine with existing results or initialize with the new results if results is None: @@ -72,7 +110,7 @@ def evaluate_file(labels_path, segmentation_path, model_name, crop= False): # Save the results to the CSV file results.to_csv(result_path, index=False) -def evaluate_folder(labels_path, segmentation_path, model_name, crop = False): +def evaluate_folder(labels_path, segmentation_path, model_name, crop = False, precision_score=False): print(f"Evaluating folder {segmentation_path}") print(f"Using labels stored in {labels_path}") @@ -82,7 +120,7 @@ def evaluate_folder(labels_path, segmentation_path, model_name, crop = False): for vesicle_file in vesicles_files: if vesicle_file in label_files: - evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(segmentation_path, vesicle_file), model_name, crop) + evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(segmentation_path, vesicle_file), model_name, crop, precision_score) @@ -93,13 +131,14 @@ def main(): parser.add_argument("-v", "--segmentation_path", required=True) parser.add_argument("-n", "--model_name", required=True) parser.add_argument("--crop", action="store_true", help="Crop around the annotation.") + parser.add_argument("--precision", action="store_true", help="Calculate precision score.") args = parser.parse_args() segmentation_path = args.segmentation_path if os.path.isdir(segmentation_path): - evaluate_folder(args.labels_path, segmentation_path, args.model_name, args.crop) + evaluate_folder(args.labels_path, segmentation_path, args.model_name, args.crop, args.precision) else: - evaluate_file(args.labels_path, segmentation_path, args.model_name, args.crop) + evaluate_file(args.labels_path, segmentation_path, args.model_name, args.crop, args.precision) diff --git a/scripts/cooper/training/postprocess_AZ.py b/scripts/cooper/training/postprocess_AZ.py new file mode 100644 index 0000000..e2b849e --- /dev/null +++ b/scripts/cooper/training/postprocess_AZ.py @@ -0,0 +1,107 @@ +import os +from glob import glob +import argparse + +import h5py +import numpy as np +from tqdm import tqdm +from scipy.ndimage import binary_closing +from skimage.measure import label +from synaptic_reconstruction.ground_truth.shape_refinement import edge_filter +from synaptic_reconstruction.morphology import skeletonize_object + + + +def filter_az(path, output_path): + """Filter the active zone (AZ) data from the HDF5 file.""" + ds, fname = os.path.split(path) + dataset_name = os.path.basename(ds) + out_file_path = os.path.join(output_path, "postprocessed_AZ", dataset_name, fname) + + os.makedirs(os.path.dirname(out_file_path), exist_ok=True) + + if os.path.exists(out_file_path): + return + + with h5py.File(path, "r") as f: + raw = f["raw"][:] + az = f["AZ/segment_from_AZmodel_v3"][:] + + hmap = edge_filter(raw, sigma=1.0, method="sato", per_slice=True, n_threads=8) + + # Filter the active zone by combining a bunch of things: + # 1. Find a mask with high values in the ridge filter. + threshold_hmap = 0.5 + az_filtered = hmap > threshold_hmap + # 2. Intersect it with the active zone predictions. + az_filtered = np.logical_and(az_filtered, az) + + # Postprocessing of the filtered active zone: + # 1. Apply connected components and only keep the largest component. + az_filtered = label(az_filtered) + ids, sizes = np.unique(az_filtered, return_counts=True) + ids, sizes = ids[1:], sizes[1:] + az_filtered = (az_filtered == ids[np.argmax(sizes)]).astype("uint8") + # 2. Apply binary closing. + az_filtered = np.logical_or(az_filtered, binary_closing(az_filtered, iterations=4)).astype("uint8") + + # Save the result. + with h5py.File(out_file_path, "a") as f: + f.create_dataset("AZ/filtered_az", data=az_filtered, compression="gzip") + + +def process_az(path, view=False): + """Skeletonize the filtered AZ data to obtain a 1D representation.""" + key = "AZ/thin_az" + with h5py.File(path, "r") as f: + if key in f and not view: + return + az_seg = f["AZ/filtered_az"][:] + + az_thin = skeletonize_object(az_seg) + + if view: + import napari + ds, fname = os.path.split(path) + raw_path = os.path.join(ROOT, ds, fname) + with h5py.File(raw_path, "r") as f: + raw = f["raw"][:] + v = napari.Viewer() + v.add_image(raw) + v.add_labels(az_seg) + v.add_labels(az_thin) + napari.run() + else: + with h5py.File(path, "a") as f: + f.create_dataset(key, data=az_thin, compression="gzip") + + +def filter_all_azs(input_path, output_path): + """Apply filtering to all AZ data in the specified directory.""" + files = sorted(glob(os.path.join(input_path, "**/*.h5"), recursive=True)) + for ff in tqdm(files, desc="Filtering AZ segmentations"): + filter_az(ff, output_path) + + +def process_all_azs(output_path): + """Apply skeletonization to all filtered AZ data.""" + files = sorted(glob(os.path.join(output_path, "postprocessed_AZ", "**/*.h5"), recursive=True)) + for ff in tqdm(files, desc="Thinning AZ segmentations"): + process_az(ff, view=False) + + +def main(): + parser = argparse.ArgumentParser(description="Filter and process AZ data.") + parser.add_argument("input_path", type=str, help="Path to the root directory containing datasets.") + parser.add_argument("output_path", type=str, help="Path to the root directory for saving processed data.") + args = parser.parse_args() + + input_path = args.input_path + output_path = args.output_path + + filter_all_azs(input_path, output_path) + process_all_azs(output_path) + + +if __name__ == "__main__": + main() From 686b018fc4a2af32e8b7a9e8747e9708ce9d2d66 Mon Sep 17 00:00:00 2001 From: SarahMuth Date: Thu, 28 Nov 2024 21:28:44 +0100 Subject: [PATCH 35/35] erosion dilation filtering of AZ --- scripts/cooper/training/filter_AZ.py | 67 ++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 scripts/cooper/training/filter_AZ.py diff --git a/scripts/cooper/training/filter_AZ.py b/scripts/cooper/training/filter_AZ.py new file mode 100644 index 0000000..78b8ba7 --- /dev/null +++ b/scripts/cooper/training/filter_AZ.py @@ -0,0 +1,67 @@ +import os +import h5py +import numpy as np +from scipy.ndimage import binary_erosion, binary_dilation, label + +def process_labels(label_file_path, erosion_structure=None, dilation_structure=None): + """ + Process the labels: perform erosion, find the largest connected component, + and perform dilation on it. + + Args: + label_file_path (str): Path to the HDF5 file containing the label data. + erosion_structure (ndarray, optional): Structuring element for erosion. + dilation_structure (ndarray, optional): Structuring element for dilation. + + Returns: + None: The processed data is saved back into the HDF5 file under a new key. + """ + with h5py.File(label_file_path, "r+") as label_file: + # Read the ground truth data + gt = label_file["/labels/filtered_az"][:] + + # Perform binary erosion + eroded = binary_erosion(gt, structure=erosion_structure) + + # Label connected components + labeled_array, num_features = label(eroded) + + # Identify the largest connected component + if num_features > 0: + largest_component_label = np.argmax(np.bincount(labeled_array.flat, weights=eroded.flat)[1:]) + 1 + largest_component = (labeled_array == largest_component_label) + else: + largest_component = np.zeros_like(gt, dtype=bool) + + # Perform binary dilation on the largest connected component + dilated = binary_dilation(largest_component, structure=dilation_structure) + + # Save the result back into the HDF5 file + if "labels/erosion_filtered_az" in label_file: + del label_file["labels/erosion_filtered_az"] # Remove if it already exists + label_file.create_dataset("labels/erosion_filtered_az", data=dilated.astype(np.uint8), compression="gzip") + +def process_folder(folder_path, erosion_structure=None, dilation_structure=None): + """ + Process all HDF5 files in a folder. + + Args: + folder_path (str): Path to the folder containing HDF5 files. + erosion_structure (ndarray, optional): Structuring element for erosion. + dilation_structure (ndarray, optional): Structuring element for dilation. + + Returns: + None + """ + for file_name in os.listdir(folder_path): + if file_name.endswith(".h5") or file_name.endswith(".hdf5"): + label_file_path = os.path.join(folder_path, file_name) + print(f"Processing {label_file_path}...") + process_labels(label_file_path, erosion_structure, dilation_structure) + +# Example usage +if __name__ == "__main__": + folder_path = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/training_AZ_v2/postprocessed_AZ/12_chemical_fix_cryopreparation" # Replace with the path to your folder + erosion_structure = np.ones((3, 3, 3)) # Example structuring element + dilation_structure = np.ones((3, 3, 3)) # Example structuring element + process_folder(folder_path, erosion_structure, dilation_structure)