From e2c4f4b122b6c517615a2a24e7dc7ae567bc787a Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Wed, 23 Oct 2024 20:00:05 +0200
Subject: [PATCH 01/35] AZ segmentation

---
 scripts/cooper/AZ_segmentation_h5.py    | 142 ++++++++++++++++++++++++
 scripts/rizzoli/evaluation_2D.py        |   4 +-
 synaptic_reconstruction/inference/AZ.py |  82 ++++++++++++++
 3 files changed, 226 insertions(+), 2 deletions(-)
 create mode 100644 scripts/cooper/AZ_segmentation_h5.py
 create mode 100644 synaptic_reconstruction/inference/AZ.py

diff --git a/scripts/cooper/AZ_segmentation_h5.py b/scripts/cooper/AZ_segmentation_h5.py
new file mode 100644
index 0000000..07ff718
--- /dev/null
+++ b/scripts/cooper/AZ_segmentation_h5.py
@@ -0,0 +1,142 @@
+import argparse
+import h5py
+import os
+from pathlib import Path
+
+from tqdm import tqdm
+from elf.io import open_file
+
+from synaptic_reconstruction.inference.AZ import segment_AZ
+from synaptic_reconstruction.inference.util import parse_tiling
+
+def _require_output_folders(output_folder):
+    #seg_output = os.path.join(output_folder, "segmentations")
+    seg_output = output_folder
+    os.makedirs(seg_output, exist_ok=True)
+    return seg_output
+
+def get_volume(input_path):
+    '''
+    with h5py.File(input_path) as seg_file:
+        input_volume = seg_file["raw"][:]
+    '''
+    with open_file(input_path, "r") as f:
+
+        # Try to automatically derive the key with the raw data.
+        keys = list(f.keys())
+        if len(keys) == 1:
+            key = keys[0]
+        elif "data" in keys:
+            key = "data"
+        elif "raw" in keys:
+            key = "raw"
+
+        input_volume = f[key][:]
+    return input_volume
+
+def run_AZ_segmentation(input_path, output_path, model_path, mask_path, mask_key,tile_shape, halo, key_label):
+    tiling = parse_tiling(tile_shape, halo)
+    print(f"using tiling {tiling}")
+    input = get_volume(input_path)
+
+    #check if we have a restricting mask for the segmentation
+    if mask_path is not None:
+        with open_file(mask_path, "r") as f:
+                        mask = f[mask_key][:]
+    else:
+        mask = None
+
+    foreground = segment_AZ(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, mask = mask)
+
+    seg_output = _require_output_folders(output_path)
+    file_name = Path(input_path).stem
+    seg_path = os.path.join(seg_output, f"{file_name}.h5")
+
+    #check
+    os.makedirs(Path(seg_path).parent, exist_ok=True)
+
+    print(f"Saving results in {seg_path}")
+    with h5py.File(seg_path, "a") as f:
+        if "raw" in f:
+            print("raw image already saved")
+        else:
+            f.create_dataset("raw", data=input, compression="gzip")
+
+        key=f"AZ/segment_from_{key_label}"
+        if key in f:
+            print("Skipping", input_path, "because", key, "exists")
+        else:
+            f.create_dataset(key, data=foreground, compression="gzip")
+
+        if mask is not None:
+            if mask_key in f:
+                print("mask image already saved")
+            else:
+                f.create_dataset(mask_key, data = mask, compression = "gzip")
+        
+        
+
+
+def segment_folder(args):
+    input_files = []
+    for root, dirs, files in os.walk(args.input_path):
+        input_files.extend([
+            os.path.join(root, name) for name in files if name.endswith(".h5")
+        ])
+    print(input_files)
+    pbar = tqdm(input_files, desc="Run segmentation")
+    for input_path in pbar:
+
+        filename = os.path.basename(input_path)
+        try:
+            mask_path = os.path.join(args.mask_path, filename)
+        except:
+            print(f"Mask file not found for {input_path}")
+            mask_path = None
+
+        run_AZ_segmentation(input_path, args.output_path, args.model_path, mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label)
+
+def main():
+    parser = argparse.ArgumentParser(description="Segment vesicles in EM tomograms.")
+    parser.add_argument(
+        "--input_path", "-i", required=True,
+        help="The filepath to the mrc file or the directory containing the tomogram data."
+    )
+    parser.add_argument(
+        "--output_path", "-o", required=True,
+        help="The filepath to directory where the segmentations will be saved."
+    )
+    parser.add_argument(
+        "--model_path", "-m", required=True, help="The filepath to the vesicle model."
+    )
+    parser.add_argument(
+        "--mask_path", help="The filepath to a h5 file with a mask that will be used to restrict the segmentation. Needs to be in combination with mask_key."
+    )
+    parser.add_argument(
+        "--mask_key", help="Key name that holds the mask segmentation"
+    )
+    parser.add_argument(
+        "--tile_shape", type=int, nargs=3,
+        help="The tile shape for prediction. Lower the tile shape if GPU memory is insufficient."
+    )
+    parser.add_argument(
+        "--halo", type=int, nargs=3,
+        help="The halo for prediction. Increase the halo to minimize boundary artifacts."
+    )
+    parser.add_argument(
+        "--key_label", "-k", default = "combined_vesicles",
+        help="Give the key name for saving the segmentation in h5."
+    )
+    args = parser.parse_args()
+
+    input_ = args.input_path
+    
+    if os.path.isdir(input_):
+        segment_folder(args)
+    else:
+        run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label)
+
+    print("Finished segmenting!")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/scripts/rizzoli/evaluation_2D.py b/scripts/rizzoli/evaluation_2D.py
index 1cae666..18fd4f1 100644
--- a/scripts/rizzoli/evaluation_2D.py
+++ b/scripts/rizzoli/evaluation_2D.py
@@ -58,8 +58,8 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key)
     #get the labels and vesicles
     with h5py.File(labels_path) as label_file:
         labels = label_file["labels"]
-        #vesicles = labels["vesicles"]
-        gt = labels[anno_key][:]
+        vesicles = labels["vesicles"]
+        gt = vesicles[anno_key][:]
         
     with h5py.File(vesicles_path) as seg_file:
         segmentation = seg_file["vesicles"]
diff --git a/synaptic_reconstruction/inference/AZ.py b/synaptic_reconstruction/inference/AZ.py
new file mode 100644
index 0000000..b93218f
--- /dev/null
+++ b/synaptic_reconstruction/inference/AZ.py
@@ -0,0 +1,82 @@
+import time
+from typing import Dict, List, Optional, Tuple, Union
+
+import elf.parallel as parallel
+import numpy as np
+import torch
+
+from synaptic_reconstruction.inference.util import get_prediction, _Scaler
+
+
+def _run_segmentation(
+    foreground, verbose, min_size,
+    # blocking shapes for parallel computation
+    block_shape=(128, 256, 256),
+):
+
+    # get the segmentation via seeded watershed
+    t0 = time.time()
+    seg = parallel.label(foreground > 0.5, block_shape=block_shape, verbose=verbose)
+    if verbose:
+        print("Compute connected components in", time.time() - t0, "s")
+
+    # size filter
+    t0 = time.time()
+    ids, sizes = parallel.unique(seg, return_counts=True, block_shape=block_shape, verbose=verbose)
+    filter_ids = ids[sizes < min_size]
+    seg[np.isin(seg, filter_ids)] = 0
+    if verbose:
+        print("Size filter in", time.time() - t0, "s")
+    seg = np.where(seg > 0, 1, 0)
+    return seg
+
+def segment_AZ(
+    input_volume: np.ndarray,
+    model_path: Optional[str] = None,
+    model: Optional[torch.nn.Module] = None,
+    tiling: Optional[Dict[str, Dict[str, int]]] = None,
+    min_size: int = 500,
+    verbose: bool = True,
+    return_predictions: bool = False,
+    scale: Optional[List[float]] = None,
+    mask: Optional[np.ndarray] = None,
+) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+    """
+    Segment mitochondria in an input volume.
+
+    Args:
+        input_volume: The input volume to segment.
+        model_path: The path to the model checkpoint if `model` is not provided.
+        model: Pre-loaded model. Either `model_path` or `model` is required.
+        tiling: The tiling configuration for the prediction.
+        verbose: Whether to print timing information.
+        scale: The scale factor to use for rescaling the input volume before prediction.
+        mask: An optional mask that is used to restrict the segmentation.
+
+    Returns:
+        The foreground mask as a numpy array.
+    """
+    if verbose:
+        print("Segmenting AZ in volume of shape", input_volume.shape)
+    # Create the scaler to handle prediction with a different scaling factor.
+    scaler = _Scaler(scale, verbose)
+    input_volume = scaler.scale_input(input_volume)
+
+    # Rescale the mask if it was given and run prediction.
+    if mask is not None:
+        mask = scaler.scale_input(mask, is_segmentation=True)
+    pred = get_prediction(input_volume, model_path=model_path, model=model, tiling=tiling, mask=mask, verbose=verbose)
+
+    # Run segmentation and rescale the result if necessary.
+    foreground = pred[0]
+    #print(f"shape {foreground.shape}")
+    #foreground = pred[0, :, :, :]
+    print(f"shape {foreground.shape}")
+
+    segmentation = _run_segmentation(foreground, verbose=verbose, min_size=min_size)
+
+    if return_predictions:
+        pred = scaler.rescale_output(pred, is_segmentation=False)
+        return segmentation, pred
+    return segmentation
+

From a0f713f80ce3c75b0534cbdc2a79214c6dd52c6f Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Mon, 28 Oct 2024 13:43:56 +0100
Subject: [PATCH 02/35] updates

---
 .gitignore                                    |  4 +++-
 scripts/cooper/training/train_AZ.py           | 11 ++++++----
 scripts/rizzoli/2D_vesicle_segmentation.py    | 20 +++++++++++++------
 scripts/rizzoli/train_2D_domain_adaptation.py | 16 ++++++++++-----
 4 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/.gitignore b/.gitignore
index d040431..ca03577 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,6 @@ slurm/
 scripts/cooper/evaluation_results/
 scripts/cooper/training/copy_testset.py
 scripts/rizzoli/upsample_data.py
-scripts/cooper/training/find_rec_testset.py
\ No newline at end of file
+scripts/cooper/training/find_rec_testset.py
+scripts/rizzoli/combine_2D_slices.py
+scripts/rizzoli/combine_2D_slices_raw.py
\ No newline at end of file
diff --git a/scripts/cooper/training/train_AZ.py b/scripts/cooper/training/train_AZ.py
index 1468eaf..9d7d283 100644
--- a/scripts/cooper/training/train_AZ.py
+++ b/scripts/cooper/training/train_AZ.py
@@ -12,7 +12,7 @@
 from synaptic_reconstruction.training import semisupervised_training
 
 TRAIN_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects"
-OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/training_AZ_v1"
+OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/training_AZ_v2"
 
 
 def _require_train_val_test_split(datasets):
@@ -80,8 +80,11 @@ def get_paths(split, datasets, testset=True):
 
 def train(key, ignore_label = None, training_2D = False, testset = True):
 
+    os.makedirs(OUTPUT_ROOT, exist_ok=True)
+
     datasets = [
     "01_hoi_maus_2020_incomplete",
+    "04_hoi_stem_examples",
     "06_hoi_wt_stem750_fm",
     "12_chemical_fix_cryopreparation"
 ]
@@ -93,7 +96,7 @@ def train(key, ignore_label = None, training_2D = False, testset = True):
     print(len(val_paths), "tomograms for validation")
 
     patch_shape = [48, 256, 256]
-    model_name=f"3D-AZ-model-v1"
+    model_name=f"3D-AZ-model-v3"
 
     #checking for 2D training
     if training_2D:
@@ -109,11 +112,11 @@ def train(key, ignore_label = None, training_2D = False, testset = True):
         val_paths=val_paths,
         label_key=f"/labels/{key}",
         patch_shape=patch_shape, batch_size=batch_size,
-        sampler = torch_em.data.sampler.MinInstanceSampler(min_num_instances=1),
+        sampler = torch_em.data.sampler.MinInstanceSampler(min_num_instances=1, p_reject = 0.95),
         n_samples_train=None, n_samples_val=25,
         check=check,
         save_root="/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/AZ_models",
-        n_iterations=int(5e3),
+        n_iterations=int(5e4),
         ignore_label= ignore_label,
         label_transform=torch_em.transform.label.labels_to_binary,
         out_channels = 1,
diff --git a/scripts/rizzoli/2D_vesicle_segmentation.py b/scripts/rizzoli/2D_vesicle_segmentation.py
index 7974e3b..ddfdab7 100644
--- a/scripts/rizzoli/2D_vesicle_segmentation.py
+++ b/scripts/rizzoli/2D_vesicle_segmentation.py
@@ -57,7 +57,7 @@ def get_volume(input_path):
         input_volume = seg_file["raw"][:]
     return input_volume
 
-def run_vesicle_segmentation(input_path, output_path, model_path, tile_shape, halo, include_boundary, key_label):
+def run_vesicle_segmentation(input_path, output_path, model_path, tile_shape, halo, include_boundary, key_label, scale):
 
     tiling = get_2D_tiling()
 
@@ -72,20 +72,24 @@ def run_vesicle_segmentation(input_path, output_path, model_path, tile_shape, ha
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model = torch_em.util.load_model(checkpoint=model_path, device=device)
 
-    def process_slices(input_volume):
+    def process_slices(input_volume, scale):
         processed_slices = []
         foreground = []
         boundaries = []
         for z in range(input_volume.shape[0]):
             slice_ = input_volume[z, :, :]
-            segmented_slice, prediction_slice = segment_vesicles(input_volume=slice_, model=model, verbose=False, tiling=tiling, return_predictions=True, exclude_boundary=not include_boundary)
+            segmented_slice, prediction_slice = segment_vesicles(input_volume=slice_, model=model, verbose=False, tiling=tiling, return_predictions=True, scale = scale, exclude_boundary=not include_boundary)
             processed_slices.append(segmented_slice)
             foreground_pred_slice, boundaries_pred_slice = prediction_slice[:2]
             foreground.append(foreground_pred_slice)
             boundaries.append(boundaries_pred_slice)
         return processed_slices, foreground, boundaries
 
-    segmentation, foreground, boundaries = process_slices(input)
+    if input.ndim == 2:
+        segmentation, prediction = segment_vesicles(input_volume=input, model=model, verbose=False, tiling=tiling, return_predictions=True, scale = scale, exclude_boundary=not include_boundary)
+        foreground, boundaries = prediction[:2]
+    else:
+        segmentation, foreground, boundaries = process_slices(input, scale)
 
     seg_output = _require_output_folders(output_path)
     file_name = Path(input_path).stem
@@ -121,7 +125,7 @@ def segment_folder(args):
     print(input_files)
     pbar = tqdm(input_files, desc="Run segmentation")
     for input_path in pbar:
-        run_vesicle_segmentation(input_path, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label)
+        run_vesicle_segmentation(input_path, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.scale)
 
 def main():
     parser = argparse.ArgumentParser(description="Segment vesicles in EM tomograms.")
@@ -152,6 +156,10 @@ def main():
         "--key_label", "-k", default = "combined_vesicles",
         help="Give the key name for saving the segmentation in h5."
     )
+    parser.add_argument(
+        "--scale", "-s", type=float, nargs=2,
+        help="Scales the input data."
+    )
     args = parser.parse_args()
 
     input_ = args.input_path
@@ -159,7 +167,7 @@ def main():
     if os.path.isdir(input_):
         segment_folder(args)
     else:
-        run_vesicle_segmentation(input_, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label)
+        run_vesicle_segmentation(input_, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.scale)
 
     print("Finished segmenting!")
 
diff --git a/scripts/rizzoli/train_2D_domain_adaptation.py b/scripts/rizzoli/train_2D_domain_adaptation.py
index 86eedd1..c8a9419 100644
--- a/scripts/rizzoli/train_2D_domain_adaptation.py
+++ b/scripts/rizzoli/train_2D_domain_adaptation.py
@@ -6,11 +6,13 @@
 from sklearn.model_selection import train_test_split
 from synaptic_reconstruction.training.domain_adaptation import mean_teacher_adaptation
 
-TRAIN_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted"
-OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/2D_DA_training_rizzoli"
+TRAIN_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data"
+OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/2D_DA_training_rizzoli_v4"
 
 def _require_train_val_test_split(datasets):
     train_ratio, val_ratio, test_ratio = 0.8, 0.1, 0.1
+    if len(datasets) < 10:
+        train_ratio, val_ratio, test_ratio = 0.5, 0.25, 0.25
 
     def _train_val_test_split(names):
         train, test = train_test_split(names, test_size=1 - train_ratio, shuffle=True)
@@ -71,8 +73,12 @@ def get_paths(split, datasets, testset=True):
     return paths
 
 def vesicle_domain_adaptation(teacher_model, testset = True):
+
+    os.makedirs(OUTPUT_ROOT, exist_ok=True)
+
     datasets = [
-    "upsampled_by2"
+    "maus_2020_tem2d_wt_unt_div14_exported_scaled_grouped",
+    "20241021_imig_2014_data_transfer_exported_grouped"
 ]
     train_paths = get_paths("train", datasets=datasets, testset=testset)
     val_paths = get_paths("val", datasets=datasets, testset=testset)
@@ -83,7 +89,7 @@ def vesicle_domain_adaptation(teacher_model, testset = True):
 
     #adjustable parameters
     patch_shape = [1, 256, 256] #2D
-    model_name = "2D-vesicle-DA-rizzoli-v3"
+    model_name = "2D-vesicle-DA-rizzoli-v5"
     
     model_root = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/models_v2/checkpoints/"
     checkpoint_path = os.path.join(model_root, teacher_model)
@@ -97,7 +103,7 @@ def vesicle_domain_adaptation(teacher_model, testset = True):
         save_root="/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/DA_models",
         source_checkpoint=checkpoint_path,
         confidence_threshold=0.75,
-        n_iterations=int(5e4),
+        n_iterations=int(5e5),
     )
 
 
From ac1ac0082b0154e04729c8e55802b73a81ab3afd Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Mon, 28 Oct 2024 16:59:33 +0100
Subject: [PATCH 03/35] update 2D DA

---
 scripts/rizzoli/train_2D_domain_adaptation.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/scripts/rizzoli/train_2D_domain_adaptation.py b/scripts/rizzoli/train_2D_domain_adaptation.py
index c8a9419..3beb487 100644
--- a/scripts/rizzoli/train_2D_domain_adaptation.py
+++ b/scripts/rizzoli/train_2D_domain_adaptation.py
@@ -7,7 +7,7 @@
 from synaptic_reconstruction.training.domain_adaptation import mean_teacher_adaptation
 
 TRAIN_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data"
-OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/2D_DA_training_rizzoli_v4"
+OUTPUT_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/2D_DA_training_2Dcooper_v1"
 
 def _require_train_val_test_split(datasets):
     train_ratio, val_ratio, test_ratio = 0.8, 0.1, 0.1
@@ -77,9 +77,8 @@ def vesicle_domain_adaptation(teacher_model, testset = True):
     os.makedirs(OUTPUT_ROOT, exist_ok=True)
 
     datasets = [
-    "maus_2020_tem2d_wt_unt_div14_exported_scaled_grouped",
     "20241021_imig_2014_data_transfer_exported_grouped"
-]
+]#"maus_2020_tem2d_wt_unt_div14_exported_scaled_grouped",
     train_paths = get_paths("train", datasets=datasets, testset=testset)
     val_paths = get_paths("val", datasets=datasets, testset=testset)
     
@@ -89,7 +88,7 @@ def vesicle_domain_adaptation(teacher_model, testset = True):
 
     #adjustable parameters
     patch_shape = [1, 256, 256] #2D
-    model_name = "2D-vesicle-DA-rizzoli-v5"
+    model_name = "2D-vesicle-DA-2Dcooper-imig-v1"
     
     model_root = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/models_v2/checkpoints/"
     checkpoint_path = os.path.join(model_root, teacher_model)

From 61c57faf1de13b2e29176a1f7896274dc7ae01ae Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Thu, 7 Nov 2024 10:52:06 +0100
Subject: [PATCH 04/35] small updates, compartment segmentation

---
 scripts/cooper/AZ_segmentation_h5.py          |   8 +-
 scripts/cooper/compartment_segmentation_h5.py | 116 ++++++++++++++++++
 scripts/cooper/training/evaluation.py         |  18 ++-
 scripts/cooper/vesicle_segmentation_h5.py     |  30 ++++-
 scripts/rizzoli/evaluation_2D.py              |  30 ++++-
 scripts/rizzoli/train_2D_domain_adaptation.py |  11 +-
 synaptic_reconstruction/inference/vesicles.py |   4 +-
 7 files changed, 194 insertions(+), 23 deletions(-)
 create mode 100644 scripts/cooper/compartment_segmentation_h5.py

diff --git a/scripts/cooper/AZ_segmentation_h5.py b/scripts/cooper/AZ_segmentation_h5.py
index 07ff718..4deadc8 100644
--- a/scripts/cooper/AZ_segmentation_h5.py
+++ b/scripts/cooper/AZ_segmentation_h5.py
@@ -81,7 +81,7 @@ def segment_folder(args):
     input_files = []
     for root, dirs, files in os.walk(args.input_path):
         input_files.extend([
-            os.path.join(root, name) for name in files if name.endswith(".h5")
+            os.path.join(root, name) for name in files if name.endswith(args.data_ext)
         ])
     print(input_files)
     pbar = tqdm(input_files, desc="Run segmentation")
@@ -127,6 +127,10 @@ def main():
         "--key_label", "-k", default = "combined_vesicles",
         help="Give the key name for saving the segmentation in h5."
     )
+    parser.add_argument(
+        "--data_ext", "-d", default = ".h5",
+        help="Format extension of data to be segmented, default is .h5."
+    )
     args = parser.parse_args()
 
     input_ = args.input_path
@@ -134,7 +138,7 @@ def main():
     if os.path.isdir(input_):
         segment_folder(args)
     else:
-        run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label)
+        run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label, args.data_ext)
 
     print("Finished segmenting!")
 
diff --git a/scripts/cooper/compartment_segmentation_h5.py b/scripts/cooper/compartment_segmentation_h5.py
new file mode 100644
index 0000000..1d0020a
--- /dev/null
+++ b/scripts/cooper/compartment_segmentation_h5.py
@@ -0,0 +1,116 @@
+import argparse
+import h5py
+import os
+from pathlib import Path
+
+from tqdm import tqdm
+from elf.io import open_file
+
+from synaptic_reconstruction.inference.compartments import segment_compartments
+from synaptic_reconstruction.inference.util import parse_tiling
+
+def _require_output_folders(output_folder):
+    #seg_output = os.path.join(output_folder, "segmentations")
+    seg_output = output_folder
+    os.makedirs(seg_output, exist_ok=True)
+    return seg_output
+
+def get_volume(input_path):
+
+    with open_file(input_path, "r") as f:
+
+        # Try to automatically derive the key with the raw data.
+        keys = list(f.keys())
+        if len(keys) == 1:
+            key = keys[0]
+        elif "data" in keys:
+            key = "data"
+        elif "raw" in keys:
+            key = "raw"
+
+        input_volume = f[key][:]
+    return input_volume
+
+def run_compartment_segmentation(input_path, output_path, model_path, tile_shape, halo, key_label):
+    tiling = parse_tiling(tile_shape, halo)
+    print(f"using tiling {tiling}")
+    input = get_volume(input_path)
+
+    segmentation, prediction = segment_compartments(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, return_predictions=True, scale=[0.25, 0.25, 0.25])
+
+    seg_output = _require_output_folders(output_path)
+    file_name = Path(input_path).stem
+    seg_path = os.path.join(seg_output, f"{file_name}.h5")
+
+    #check
+    os.makedirs(Path(seg_path).parent, exist_ok=True)
+
+    print(f"Saving results in {seg_path}")
+    with h5py.File(seg_path, "a") as f:
+        if "raw" in f:
+            print("raw image already saved")
+        else:
+            f.create_dataset("raw", data=input, compression="gzip")
+
+        key=f"compartments/segment_from_{key_label}"
+        if key in f:
+            print("Skipping", input_path, "because", key, "exists")
+        else:
+            f.create_dataset(key, data=segmentation, compression="gzip")
+            f.create_dataset(f"compartment_pred_{key_label}/foreground", data = prediction, compression="gzip")
+        
+        
+
+
+def segment_folder(args):
+    input_files = []
+    for root, dirs, files in os.walk(args.input_path):
+        input_files.extend([
+            os.path.join(root, name) for name in files if name.endswith(args.data_ext)
+        ])
+    print(input_files)
+    pbar = tqdm(input_files, desc="Run segmentation")
+    for input_path in pbar:
+        run_compartment_segmentation(input_path, args.output_path, args.model_path, args.tile_shape, args.halo, args.key_label)
+
+def main():
+    parser = argparse.ArgumentParser(description="Segment vesicles in EM tomograms.")
+    parser.add_argument(
+        "--input_path", "-i", required=True,
+        help="The filepath to the mrc file or the directory containing the tomogram data."
+    )
+    parser.add_argument(
+        "--output_path", "-o", required=True,
+        help="The filepath to directory where the segmentations will be saved."
+    )
+    parser.add_argument(
+        "--model_path", "-m", required=True, help="The filepath to the vesicle model."
+    )
+    parser.add_argument(
+        "--tile_shape", type=int, nargs=3,
+        help="The tile shape for prediction. Lower the tile shape if GPU memory is insufficient."
+    )
+    parser.add_argument(
+        "--halo", type=int, nargs=3,
+        help="The halo for prediction. Increase the halo to minimize boundary artifacts."
+    )
+    parser.add_argument(
+        "--data_ext", "-d", default=".h5", help="The extension of the tomogram data. By default .h5."
+    )
+    parser.add_argument(
+        "--key_label", "-k", default = "3Dmodel_v1",
+        help="Give the key name for saving the segmentation in h5."
+    )
+    args = parser.parse_args()
+
+    input_ = args.input_path
+    
+    if os.path.isdir(input_):
+        segment_folder(args)
+    else:
+        run_compartment_segmentation(input_, args.output_path, args.model_path, args.tile_shape, args.halo, args.key_label)
+
+    print("Finished segmenting!")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/scripts/cooper/training/evaluation.py b/scripts/cooper/training/evaluation.py
index d7aaf6e..68fa863 100644
--- a/scripts/cooper/training/evaluation.py
+++ b/scripts/cooper/training/evaluation.py
@@ -21,7 +21,7 @@ def summarize_eval(results):
     table = summary.to_markdown(index=False)
     print(table)
 
-def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key):
+def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key, mask_key = None):
     print(f"Evaluate labels {labels_path} and vesicles {vesicles_path}")
 
     ds_name = os.path.basename(os.path.dirname(labels_path))
@@ -33,11 +33,16 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key)
         #vesicles = labels["vesicles"]
         gt = labels[anno_key][:]
         
+        if mask_key is not None:
+            mask = labels[mask_key][:]
+        
     with h5py.File(vesicles_path) as seg_file:
         segmentation = seg_file["vesicles"]
         vesicles = segmentation[segment_key][:] 
     
-    
+    if mask_key is not None:
+        gt[mask == 0] = 0
+        vesicles[mask == 0] = 0
     #evaluate the match of ground truth and vesicles
     scores = evaluate(gt, vesicles)
     
@@ -65,7 +70,7 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key)
     summarize_eval(results)
 
 
-def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_key):
+def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_key, mask_key = None):
     print(f"Evaluating folder {vesicles_path}")
     print(f"Using labels stored in {labels_path}")
 
@@ -75,7 +80,7 @@ def evaluate_folder(labels_path, vesicles_path, model_name, segment_key, anno_ke
     for vesicle_file in vesicles_files:
         if vesicle_file in label_files:
 
-            evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(vesicles_path, vesicle_file), model_name, segment_key, anno_key)
+            evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(vesicles_path, vesicle_file), model_name, segment_key, anno_key, mask_key)
 
 
@@ -87,13 +92,14 @@ def main():
     parser.add_argument("-n", "--model_name", required=True)
     parser.add_argument("-sk", "--segment_key", required=True)
     parser.add_argument("-ak", "--anno_key", required=True)
+    parser.add_argument("-m", "--mask_key")
     args = parser.parse_args()
 
     vesicles_path = args.vesicles_path
     if os.path.isdir(vesicles_path):
-        evaluate_folder(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key)
+        evaluate_folder(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key)
     else:
-        evaluate_file(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key)
+        evaluate_file(args.labels_path, vesicles_path, args.model_name, args.segment_key, args.anno_key, args.mask_key)
     
     
diff --git a/scripts/cooper/vesicle_segmentation_h5.py b/scripts/cooper/vesicle_segmentation_h5.py
index 9c8b1d1..1136f18 100644
--- a/scripts/cooper/vesicle_segmentation_h5.py
+++ b/scripts/cooper/vesicle_segmentation_h5.py
@@ -34,7 +34,7 @@ def get_volume(input_path):
         input_volume = f[key][:]
     return input_volume
 
-def run_vesicle_segmentation(input_path, output_path, model_path, mask_path, mask_key,tile_shape, halo, include_boundary, key_label):
+def run_vesicle_segmentation(input_path, output_path, model_path, mask_path, mask_key,tile_shape, halo, include_boundary, key_label, distance_threshold = None):
     tiling = parse_tiling(tile_shape, halo)
     print(f"using tiling {tiling}")
     input = get_volume(input_path)
@@ -45,8 +45,17 @@ def run_vesicle_segmentation(input_path, output_path, model_path, mask_path, mas
                         mask = f[mask_key][:]
     else:
         mask = None
+    if distance_threshold is not None:
+        segmentation, prediction = segment_vesicles(
+            input_volume=input, model_path=model_path, verbose=False, tiling=tiling, return_predictions=True, 
+            exclude_boundary=not include_boundary, mask = mask, distance_threshold = distance_threshold
+        )
+    else:
+        segmentation, prediction = segment_vesicles(
+            input_volume=input, model_path=model_path, verbose=False, tiling=tiling, return_predictions=True, 
+            exclude_boundary=not include_boundary, mask = mask
+        )
 
-    segmentation, prediction = segment_vesicles(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, return_predictions=True, exclude_boundary=not include_boundary, mask = mask)
     foreground, boundaries = prediction[:2]
 
     seg_output = _require_output_folders(output_path)
@@ -84,7 +93,7 @@ def segment_folder(args):
     input_files = []
     for root, dirs, files in os.walk(args.input_path):
         input_files.extend([
-            os.path.join(root, name) for name in files if name.endswith(".h5")
+            os.path.join(root, name) for name in files if name.endswith(args.data_ext)
         ])
     print(input_files)
     pbar = tqdm(input_files, desc="Run segmentation")
@@ -97,7 +106,10 @@ def segment_folder(args):
             print(f"Mask file not found for {input_path}")
             mask_path = None
 
-        run_vesicle_segmentation(input_path, args.output_path, args.model_path, mask_path, args.mask_key, args.tile_shape, args.halo, args.include_boundary, args.key_label)
+        run_vesicle_segmentation(
+            input_path, args.output_path, args.model_path, mask_path, args.mask_key, 
+            args.tile_shape, args.halo, args.include_boundary, args.key_label, args.distance_threshold
+        )
 
 def main():
     parser = argparse.ArgumentParser(description="Segment vesicles in EM tomograms.")
@@ -134,6 +146,14 @@ def main():
         "--key_label", "-k", default = "combined_vesicles",
         help="Give the key name for saving the segmentation in h5."
     )
+    parser.add_argument(
+        "--distance_threshold", "-t", type=int,
+        help="Used for distance based segmentation."
+    )
+    parser.add_argument(
+        "--data_ext", "-d", default = ".h5",
+        help="Format extension of data to be segmented, default is .h5."
+    )
     args = parser.parse_args()
 
     input_ = args.input_path
@@ -141,7 +161,7 @@ def main():
     if os.path.isdir(input_):
         segment_folder(args)
     else:
-        run_vesicle_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.include_boundary, args.key_label)
+        run_vesicle_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.distance_threshold)
 
     print("Finished segmenting!")
 
diff --git a/scripts/rizzoli/evaluation_2D.py b/scripts/rizzoli/evaluation_2D.py
index 18fd4f1..9f918df 100644
--- a/scripts/rizzoli/evaluation_2D.py
+++ b/scripts/rizzoli/evaluation_2D.py
@@ -6,8 +6,13 @@
 import numpy as np
 
 from elf.evaluation import matching
+from skimage.transform import rescale
 
-
+def transpose_tomo(tomogram):
+        data0 = np.swapaxes(tomogram, 0, -1)
+        data1 = np.fliplr(data0)
+        transposed_data = np.swapaxes(data1, 0, -1)
+        return transposed_data
 
 def evaluate(labels, vesicles):
     assert labels.shape == vesicles.shape
@@ -54,21 +59,34 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key)
 
     ds_name = os.path.basename(os.path.dirname(labels_path))
     tomo = os.path.basename(labels_path)
-
+    use_mask = True
     #get the labels and vesicles
     with h5py.File(labels_path) as label_file:
         labels = label_file["labels"]
-        vesicles = labels["vesicles"]
-        gt = vesicles[anno_key][:]
+        #vesicles = labels["vesicles"]
+        gt = labels[anno_key][:]
+        gt = rescale(gt, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(gt.dtype)
+        gt = transpose_tomo(gt)
+
+        if use_mask:
+            mask = labels["mask"][:]
+            mask = rescale(mask, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(mask.dtype)
+            mask = transpose_tomo(mask)
         
     with h5py.File(vesicles_path) as seg_file:
         segmentation = seg_file["vesicles"]
         vesicles = segmentation[segment_key][:] 
     
+    if use_mask:
+        gt[mask == 0] = 0
+        vesicles[mask == 0] = 0
     
-    #evaluate the match of ground truth and vesicles
-    scores = evaluate_slices(gt, vesicles)
     
+    #evaluate the match of ground truth and vesicles
+    if len(vesicles.shape) == 3:
+        scores = evaluate_slices(gt, vesicles)
+    else:
+        scores = evaluate(gt,vesicles)
     #store results
     result_folder ="/user/muth9/u12095/synaptic-reconstruction/scripts/cooper/evaluation_results"
     os.makedirs(result_folder, exist_ok=True)
diff --git a/scripts/rizzoli/train_2D_domain_adaptation.py b/scripts/rizzoli/train_2D_domain_adaptation.py
index 3beb487..ac2a28f 100644
--- a/scripts/rizzoli/train_2D_domain_adaptation.py
+++ b/scripts/rizzoli/train_2D_domain_adaptation.py
@@ -78,7 +78,7 @@ def vesicle_domain_adaptation(teacher_model, testset = True):
 
     datasets = [
     "20241021_imig_2014_data_transfer_exported_grouped"
-]#"maus_2020_tem2d_wt_unt_div14_exported_scaled_grouped",
+]
     train_paths = get_paths("train", datasets=datasets, testset=testset)
     val_paths = get_paths("val", datasets=datasets, testset=testset)
     
@@ -88,11 +88,13 @@ def vesicle_domain_adaptation(teacher_model, testset = True):
 
     #adjustable parameters
     patch_shape = [1, 256, 256] #2D
-    model_name = "2D-vesicle-DA-2Dcooper-imig-v1"
+    model_name = "2D-vesicle-DA-2Dcooper-imig-v2"
     
     model_root = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/models_v2/checkpoints/"
     checkpoint_path = os.path.join(model_root, teacher_model)
 
+    patch_shape = [256, 256] if any("maus" in dataset for dataset in datasets) else [1, 256, 256]
+
     mean_teacher_adaptation(
         name=model_name,
         unsupervised_train_paths=train_paths,
@@ -102,7 +104,10 @@ def vesicle_domain_adaptation(teacher_model, testset = True):
         save_root="/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/DA_models",
         source_checkpoint=checkpoint_path,
         confidence_threshold=0.75,
-        n_iterations=int(5e5),
+        batch_size=8,
+        n_iterations=int(1.5e4),
+        n_samples_train=8000,
+        n_samples_val=50,
     )
 
 
diff --git a/synaptic_reconstruction/inference/vesicles.py b/synaptic_reconstruction/inference/vesicles.py
index 237d95a..4a56b0f 100644
--- a/synaptic_reconstruction/inference/vesicles.py
+++ b/synaptic_reconstruction/inference/vesicles.py
@@ -49,6 +49,7 @@ def distance_based_vesicle_segmentation(
 
     # Get the segmentation via seeded watershed of components in the boundary distances.
     t0 = time.time()
+    print(f"using a distance thresholf of {distance_threshold} for distance based segmentation")
     seeds = parallel.label(bd_dist > distance_threshold, block_shape=block_shape, verbose=verbose)
     if verbose:
         print("Compute connected components in", time.time() - t0, "s")
@@ -129,6 +130,7 @@ def segment_vesicles(
     min_size: int = 500,
     verbose: bool = True,
     distance_based_segmentation: bool = True,
+    distance_threshold: int = 8,
     return_predictions: bool = False,
     scale: Optional[List[float]] = None,
     exclude_boundary: bool = False,
@@ -174,7 +176,7 @@ def segment_vesicles(
 
     if distance_based_segmentation:
         seg = distance_based_vesicle_segmentation(
-            foreground, boundaries, verbose=verbose, min_size=min_size, **kwargs
+            foreground, boundaries, verbose=verbose, min_size=min_size, distance_threshold = distance_threshold, **kwargs
         )
     else:
         seg = simple_vesicle_segmentation(

From 40e965ed72c87250b5c2ecf02e8ba1bb06b1e2e9 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Thu, 7 Nov 2024 17:24:28 +0100
Subject: [PATCH 05/35] Implement code for first analysis

---
 scripts/cooper/analysis/run_analysis_1.py | 66 +++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 scripts/cooper/analysis/run_analysis_1.py

diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py
new file mode 100644
index 0000000..5518189
--- /dev/null
+++ b/scripts/cooper/analysis/run_analysis_1.py
@@ -0,0 +1,66 @@
+# This is the code for the first analysis for the cooper data.
+# Here, we only compute the vesicle numbers and size distributions for the STEM tomograms
+# in the 04 dataset.
+
+import os
+from glob import glob
+
+import pandas as pd
+import h5py
+from tqdm import tqdm
+from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres
+
+DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/ground_truth/04Dataset_for_vesicle_eval"  # noqa
+PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/04Dataset_for_vesicle_eval/model_segmentation"  # noqa
+RESULT_FOLDER = "./analysis_results/analysis_1"
+
+
+# We compute the sizes for all vesicles in the compartment masks.
+# We use the same logic in the size computation as for the vesicle extraction to IMOD,
+# including the radius correction factor.
+# The number of vesicles is automatically computed as the length of the size list.
+def compute_sizes_for_all_tomorams():
+    os.makedirs(RESULT_FOLDER, exist_ok=True)
+
+    resolution = (0.8681,) * 3
+    radius_factor = 1.3
+    estimate_radius_2d = True
+
+    tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
+    for tomo in tqdm(tomograms):
+        ds_name, fname = os.path.split(tomo)
+        ds_name = os.path.split(ds_name)[1]
+        fname = os.path.splitext(fname)[0]
+        output_path = os.path.join(RESULT_FOLDER, f"{ds_name}_{fname}.csv")
+        if os.path.exists(output_path):
+            continue
+
+        # Load the vesicle segmentation from the predictions.
+        with h5py.File(tomo, "r") as f:
+            segmentation = f["/vesicles/segment_from_combined_vesicles"][:]
+
+        input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5")
+        assert os.path.exists(input_path), input_path
+        # Load the compartment mask from the tomogram
+        with h5py.File(input_path, "r") as f:
+            mask = f["labels/compartment"][:]
+
+        segmentation[mask == 0] = 0
+        _, sizes = convert_segmentation_to_spheres(
+            segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d
+        )
+
+        result = pd.DataFrame({
+            "dataset": [ds_name] * len(sizes),
+            "tomogram": [fname] * len(sizes),
+            "sizes": sizes
+        })
+        result.to_csv(output_path, index=False)
+
+
+def main():
+    compute_sizes_for_all_tomorams()
+
+
+if __name__ == "__main__":
+    main()

From 7be9ee8fd74d0667e61c84f0d3db104427527581 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Mon, 11 Nov 2024 20:21:47 +0100
Subject: [PATCH 06/35] 2D seg with mask

---
 .gitignore                                 |  3 +-
 scripts/cooper/AZ_segmentation_h5.py       |  2 +-
 scripts/rizzoli/2D_vesicle_segmentation.py | 37 ++++++++++++++++++----
 scripts/rizzoli/evaluation_2D.py           |  6 ++--
 4 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/.gitignore b/.gitignore
index ca03577..4db569e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,4 +12,5 @@ scripts/cooper/training/copy_testset.py
 scripts/rizzoli/upsample_data.py
 scripts/cooper/training/find_rec_testset.py
 scripts/rizzoli/combine_2D_slices.py
-scripts/rizzoli/combine_2D_slices_raw.py
\ No newline at end of file
+scripts/rizzoli/combine_2D_slices_raw.py
+scripts/cooper/remove_h5key.py
\ No newline at end of file
diff --git a/scripts/cooper/AZ_segmentation_h5.py b/scripts/cooper/AZ_segmentation_h5.py
index 4deadc8..2fb7045 100644
--- a/scripts/cooper/AZ_segmentation_h5.py
+++ b/scripts/cooper/AZ_segmentation_h5.py
@@ -138,7 +138,7 @@ def main():
     if os.path.isdir(input_):
         segment_folder(args)
     else:
-        run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label, args.data_ext)
+        run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label)
 
     print("Finished segmenting!")
 
diff --git a/scripts/rizzoli/2D_vesicle_segmentation.py b/scripts/rizzoli/2D_vesicle_segmentation.py
index ddfdab7..159be28 100644
--- a/scripts/rizzoli/2D_vesicle_segmentation.py
+++ b/scripts/rizzoli/2D_vesicle_segmentation.py
@@ -7,6 +7,7 @@
 import torch
 import torch_em
 import numpy as np
+from elf.io import open_file
 
 from synaptic_reconstruction.inference.vesicles import segment_vesicles
 from synaptic_reconstruction.inference.util import parse_tiling
@@ -57,7 +58,7 @@ def get_volume(input_path):
         input_volume = seg_file["raw"][:]
     return input_volume
 
-def run_vesicle_segmentation(input_path, output_path, model_path, tile_shape, halo, include_boundary, key_label, scale):
+def run_vesicle_segmentation(input_path, output_path, model_path, tile_shape, halo, include_boundary, key_label, scale, mask_path, mask_key):
 
     tiling = get_2D_tiling()
 
@@ -69,16 +70,29 @@ def run_vesicle_segmentation(input_path, output_path, model_path, tile_shape, ha
     tiling = parse_tiling(tile_shape, halo)
     input = get_volume(input_path)
 
+    #check if we have a restricting mask for the segmentation
+    if mask_path is not None:
+        with open_file(mask_path, "r") as f:
+            mask = f[mask_key][:]
+    else:
+        mask = None
+
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model = torch_em.util.load_model(checkpoint=model_path, device=device)
 
-    def process_slices(input_volume, scale):
+    def process_slices(input_volume, scale, mask):
         processed_slices = []
         foreground = []
         boundaries = []
         for z in range(input_volume.shape[0]):
             slice_ = input_volume[z, :, :]
-            segmented_slice, prediction_slice = segment_vesicles(input_volume=slice_, model=model, verbose=False, tiling=tiling, return_predictions=True, scale = scale, exclude_boundary=not include_boundary)
+            #check if we have a restricting mask for the segmentation
+            if mask is not None:
+                mask_slice = mask[z, :, :]
+                segmented_slice, prediction_slice = segment_vesicles(input_volume=slice_, model=model, verbose=False, tiling=tiling, return_predictions=True, scale = scale, exclude_boundary=not include_boundary, mask = mask_slice)
+            else:
+                segmented_slice, prediction_slice = segment_vesicles(input_volume=slice_, model=model, verbose=False, tiling=tiling, return_predictions=True, scale = scale, exclude_boundary=not include_boundary)
+            
             processed_slices.append(segmented_slice)
             foreground_pred_slice, boundaries_pred_slice = prediction_slice[:2]
             foreground.append(foreground_pred_slice)
@@ -86,10 +100,11 @@ def process_slices(input_volume, scale):
         return processed_slices, foreground, boundaries
 
     if input.ndim == 2:
+        #TODO: check if we have a restricting mask for the segmentation
         segmentation, prediction = segment_vesicles(input_volume=input, model=model, verbose=False, tiling=tiling, return_predictions=True, scale = scale, exclude_boundary=not include_boundary)
         foreground, boundaries = prediction[:2]
     else:
-        segmentation, foreground, boundaries = process_slices(input, scale)
+        segmentation, foreground, boundaries = process_slices(input, scale, mask)
 
     seg_output = _require_output_folders(output_path)
     file_name = Path(input_path).stem
@@ -125,7 +140,11 @@ def segment_folder(args):
     print(input_files)
     pbar = tqdm(input_files, desc="Run segmentation")
     for input_path in pbar:
-        run_vesicle_segmentation(input_path, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.scale)
+        if args.mask_path is not None:
+            mask_path_for_file = os.path.join(args.mask_path, os.path.basename(input_path))
+        else:
+            mask_path_for_file = None
+        run_vesicle_segmentation(input_path, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.scale, mask_path_for_file, args.mask_key)
 
 def main():
     parser = argparse.ArgumentParser(description="Segment vesicles in EM tomograms.")
@@ -160,6 +179,12 @@ def main():
         "--scale", "-s", type=float, nargs=2,
         help="Scales the input data."
     )
+    parser.add_argument(
+        "--mask_path", help="The filepath to a h5 file with a mask that will be used to restrict the segmentation. Needs to be in combination with mask_key."
+    )
+    parser.add_argument(
+        "--mask_key", help="Key name that holds the mask segmentation"
+    )
     args = parser.parse_args()
 
     input_ = args.input_path
@@ -167,7 +192,7 @@ def main():
     if os.path.isdir(input_):
         segment_folder(args)
     else:
-        run_vesicle_segmentation(input_, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.scale)
+        run_vesicle_segmentation(input_, args.output_path, args.model_path, args.tile_shape, args.halo, args.include_boundary, args.key_label, args.scale, args.mask_path, args.mask_key)
 
     print("Finished segmenting!")
 
diff --git a/scripts/rizzoli/evaluation_2D.py b/scripts/rizzoli/evaluation_2D.py
index 9f918df..5b5bbbd 100644
--- a/scripts/rizzoli/evaluation_2D.py
+++ b/scripts/rizzoli/evaluation_2D.py
@@ -59,14 +59,14 @@ def evaluate_file(labels_path, vesicles_path, model_name, segment_key, anno_key)
 
     ds_name = os.path.basename(os.path.dirname(labels_path))
     tomo = os.path.basename(labels_path)
-    use_mask = True
+    use_mask = False
     #get the labels and vesicles
     with h5py.File(labels_path) as label_file:
         labels = label_file["labels"]
         #vesicles = labels["vesicles"]
         gt = labels[anno_key][:]
-        gt = rescale(gt, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(gt.dtype)
-        gt = transpose_tomo(gt)
+        #gt = rescale(gt, scale=0.5, order=0, anti_aliasing=False, preserve_range=True).astype(gt.dtype)
+        #gt = transpose_tomo(gt)
 
         if use_mask:
             mask = labels["mask"][:]

From f85e4452bd3c32132a2cd94ff139ea47334e4566 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Mon, 11 Nov 2024 22:47:38 +0100
Subject: [PATCH 07/35] spatial distribution analysis

---
 scripts/cooper/analysis/run_analysis_1.py     |  6 +-
 .../run_spatial_distribution_analysis.py      | 75 +++++++++++++++++++
 2 files changed, 78 insertions(+), 3 deletions(-)
 create mode 100644 scripts/cooper/analysis/run_spatial_distribution_analysis.py

diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py
index 5518189..523aba1 100644
--- a/scripts/cooper/analysis/run_analysis_1.py
+++ b/scripts/cooper/analysis/run_analysis_1.py
@@ -10,8 +10,8 @@
 from tqdm import tqdm
 from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres
 
-DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/ground_truth/04Dataset_for_vesicle_eval"  # noqa
-PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/04Dataset_for_vesicle_eval/model_segmentation"  # noqa
+DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014"  # noqa
+PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/20241102_TOMO_DATA_Imig2014"  # noqa
 RESULT_FOLDER = "./analysis_results/analysis_1"
 
 
@@ -22,7 +22,7 @@
 def compute_sizes_for_all_tomorams():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
 
-    resolution = (0.8681,) * 3
+    resolution = (0.8681,) * 3 #change for each dataset
     radius_factor = 1.3
     estimate_radius_2d = True
 
diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
new file mode 100644
index 0000000..2002431
--- /dev/null
+++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
@@ -0,0 +1,75 @@
+import os
+from glob import glob
+import pandas as pd
+import h5py
+from tqdm import tqdm
+from synaptic_reconstruction.distance_measurements import measure_segmentation_to_object_distances
+
+DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014"  # noqa
+PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/20241102_TOMO_DATA_Imig2014"  # noqa
+RESULT_FOLDER = "./analysis_results"
+
+
+# We compute the distances for all vesicles in the compartment masks to the AZ.
+# We use the same different resolution, depending on dataset.
+# The closest distance is calculated, i.e., the closest point on the outer membrane of the vesicle to the AZ.
+def compute_sizes_for_all_tomorams():
+    os.makedirs(RESULT_FOLDER, exist_ok=True)
+    
+    resolution = (0.8681,) * 3  # Change for each dataset
+    
+    # Dictionary to hold the results for each dataset
+    dataset_results = {}
+    
+    tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
+    for tomo in tqdm(tomograms):
+        ds_name, fname = os.path.split(tomo)
+        ds_name = os.path.split(ds_name)[1]
+        fname = os.path.splitext(fname)[0]
+        
+        # Initialize a new dictionary entry for each dataset if not already present
+        if ds_name not in dataset_results:
+            dataset_results[ds_name] = {}
+        
+        # Skip if this tomogram already exists in the dataset dictionary
+        if fname in dataset_results[ds_name]:
+            continue
+
+        # Load the vesicle segmentation from the predictions
+        with h5py.File(tomo, "r") as f:
+            segmentation = f["/vesicles/segment_from_combined_vesicles"][:]
+            segmented_object = f["/AZ/segment_from_AZmodel_v3"][:]
+
+        input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5")
+        assert os.path.exists(input_path), input_path
+
+        # Load the compartment mask from the tomogram
+        with h5py.File(input_path, "r") as f:
+            mask = f["labels/compartment"][:]
+
+        segmentation[mask == 0] = 0
+        distances, _, _, _ = measure_segmentation_to_object_distances(
+            segmentation, segmented_object=segmented_object, resolution=resolution
+        )
+
+        # Add distances to the dataset dictionary under the tomogram name
+        dataset_results[ds_name][fname] = distances
+
+    # Save each dataset's results to a single CSV file
+    for ds_name, tomogram_data in dataset_results.items():
+        # Create a DataFrame where each column is a tomogram's distances
+        result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose()
+        
+        # Define the output file path
+        output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}.csv")
+        
+        # Save the DataFrame to CSV
+        result_df.to_csv(output_path, index=False)
+
+        
+def main():
+    compute_sizes_for_all_tomorams()
+
+
+if __name__ == "__main__":
+    main()

From 8ef16bcafd4289b1793e1275d826a386a2cf8c87 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Tue, 12 Nov 2024 13:46:23 +0100
Subject: [PATCH 08/35] intersection between compartment boundary and AZ
 segmentaiton

---
 scripts/cooper/AZ_segmentation_h5.py          | 35 ++++++++++++++++---
 .../run_spatial_distribution_analysis.py      |  2 +-
 synaptic_reconstruction/inference/AZ.py       |  9 ++++-
 .../postprocessing/postprocess_AZ.py          | 25 +++++++++++++
 4 files changed, 65 insertions(+), 6 deletions(-)
 create mode 100644 synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py

diff --git a/scripts/cooper/AZ_segmentation_h5.py b/scripts/cooper/AZ_segmentation_h5.py
index 2fb7045..baa7225 100644
--- a/scripts/cooper/AZ_segmentation_h5.py
+++ b/scripts/cooper/AZ_segmentation_h5.py
@@ -34,7 +34,7 @@ def get_volume(input_path):
         input_volume = f[key][:]
     return input_volume
 
-def run_AZ_segmentation(input_path, output_path, model_path, mask_path, mask_key,tile_shape, halo, key_label):
+def run_AZ_segmentation(input_path, output_path, model_path, mask_path, mask_key,tile_shape, halo, key_label, compartment_seg):
     tiling = parse_tiling(tile_shape, halo)
     print(f"using tiling {tiling}")
     input = get_volume(input_path)
@@ -46,7 +46,14 @@ def run_AZ_segmentation(input_path, output_path, model_path, mask_path, mask_key
     else:
         mask = None
 
-    foreground = segment_AZ(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, mask = mask)
+    #check if intersection with compartment is necessary
+    if compartment_seg is None:
+        foreground = segment_AZ(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, mask = mask)
+        intersection = None
+    else:
+        with open_file(compartment_seg, "r") as f:
+            compartment = f["/compartments/segment_from_3Dmodel_v1"][:]
+        foreground, intersection = segment_AZ(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, mask = mask, compartment=compartment)
 
     seg_output = _require_output_folders(output_path)
     file_name = Path(input_path).stem
@@ -73,6 +80,13 @@ def run_AZ_segmentation(input_path, output_path, model_path, mask_path, mask_key
                 print("mask image already saved")
             else:
                 f.create_dataset(mask_key, data = mask, compression = "gzip")
+
+        if intersection is not None:
+            intersection_key = "AZ/compartment_AZ_intersection"
+            if intersection_key in f:
+                print("intersection already saved")
+            else:
+                f.create_dataset(intersection_key, data = intersection, compression = "gzip")
         
         
@@ -93,8 +107,15 @@ def segment_folder(args):
         except:
             print(f"Mask file not found for {input_path}")
             mask_path = None
+        
+        if args.compartment_seg is not None:
+            try:
+                compartment_seg = os.path.join(args.compartment_seg, os.path.splitext(filename)[0] + '.h5')
+            except:
+                print(f"compartment file not found for {input_path}")
+                compartment_seg = None
 
-        run_AZ_segmentation(input_path, args.output_path, args.model_path, mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label)
+        run_AZ_segmentation(input_path, args.output_path, args.model_path, mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label, compartment_seg)
 
 def main():
     parser = argparse.ArgumentParser(description="Segment vesicles in EM tomograms.")
@@ -131,6 +152,12 @@ def main():
         "--data_ext", "-d", default = ".h5",
         help="Format extension of data to be segmented, default is .h5."
     )
+    parser.add_argument(
+        "--compartment_seg", "-c", 
+        help="Path to compartment segmentation."
+        "If the compartment segmentation was executed before, this will add a key to output file that stores the intersection between compartment boundary and AZ."
+        "Maybe need to adjust the compartment key that the segmentation is stored under"
+    )
     args = parser.parse_args()
 
     input_ = args.input_path
@@ -138,7 +165,7 @@ def main():
     if os.path.isdir(input_):
         segment_folder(args)
     else:
-        run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label)
+        run_AZ_segmentation(input_, args.output_path, args.model_path, args.mask_path, args.mask_key, args.tile_shape, args.halo, args.key_label, args.compartment_seg)
 
     print("Finished segmenting!")
 
diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
index 2002431..f8d0708 100644
--- a/scripts/cooper/analysis/run_spatial_distribution_analysis.py
+++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
@@ -16,7 +16,7 @@
 def compute_sizes_for_all_tomorams():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
     
-    resolution = (0.8681,) * 3  # Change for each dataset
+    resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
     
     # Dictionary to hold the results for each dataset
     dataset_results = {}
diff --git a/synaptic_reconstruction/inference/AZ.py b/synaptic_reconstruction/inference/AZ.py
index b93218f..a1c9da8 100644
--- a/synaptic_reconstruction/inference/AZ.py
+++ b/synaptic_reconstruction/inference/AZ.py
@@ -6,7 +6,7 @@
 import torch
 
 from synaptic_reconstruction.inference.util import get_prediction, _Scaler
-
+from synaptic_reconstruction.inference.postprocessing.postprocess_AZ import find_intersection_boundary
 
 def _run_segmentation(
     foreground, verbose, min_size,
@@ -40,6 +40,7 @@ def segment_AZ(
     return_predictions: bool = False,
     scale: Optional[List[float]] = None,
     mask: Optional[np.ndarray] = None,
+    compartment: Optional[np.ndarray] = None,
 ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
     """
     Segment mitochondria in an input volume.
@@ -75,8 +76,14 @@ def segment_AZ(
 
     segmentation = _run_segmentation(foreground, verbose=verbose, min_size=min_size)
 
+    #returning prediciton and intersection not possible atm, but currently do not need prediction anyways
     if return_predictions:
         pred = scaler.rescale_output(pred, is_segmentation=False)
         return segmentation, pred
+
+    if compartment is not None:
+        intersection = find_intersection_boundary(segmentation, compartment)
+        return segmentation, intersection
+        
     return segmentation
 
diff --git a/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py b/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py
new file mode 100644
index 0000000..54d9e2c
--- /dev/null
+++ b/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py
@@ -0,0 +1,25 @@
+import numpy as np
+from scipy.ndimage import binary_erosion
+
+def find_intersection_boundary(segmented_AZ, segmented_compartment):
+    """
+    Find the intersection of the boundary of segmented_compartment with segmented_AZ.
+
+    Parameters:
+    segmented_AZ (numpy.ndarray): 3D array representing the active zone (AZ).
+    segmented_compartment (numpy.ndarray): 3D array representing the compartment.
+
+    Returns:
+    numpy.ndarray: 3D array with the intersection of the boundary of segmented_compartment and segmented_AZ.
+    """
+    # Step 0: Binarize the segmented_compartment
+    binarized_compartment = (segmented_compartment > 0).astype(int)
+    
+    # Step 1: Create a binary mask of the compartment boundary
+    eroded_compartment = binary_erosion(binarized_compartment)
+    boundary_compartment = binarized_compartment - eroded_compartment
+    
+    # Step 2: Find the intersection with the AZ
+    intersection = np.logical_and(boundary_compartment, segmented_AZ)
+    
+    return intersection.astype(int)  # Convert boolean array to int (1 for intersecting points, 0 elsewhere)

From 09f6c846cfc391096ee6c86fca49d4d5f56d8799 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Tue, 12 Nov 2024 17:20:26 +0100
Subject: [PATCH 09/35] Update compartment postprocessing

---
 .../inference/compartments.py                 | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/synaptic_reconstruction/inference/compartments.py b/synaptic_reconstruction/inference/compartments.py
index a822d9f..701c222 100644
--- a/synaptic_reconstruction/inference/compartments.py
+++ b/synaptic_reconstruction/inference/compartments.py
@@ -77,6 +77,12 @@ def _segment_compartments_2d(
         mask = np.logical_or(binary_closing(mask, iterations=4), mask)
         segmentation[bb][mask] = prop.label
 
+    # import napari
+    # v = napari.Viewer()
+    # v.add_image(boundaries)
+    # v.add_labels(segmentation)
+    # napari.run()
+
     return segmentation
 
 
@@ -117,6 +123,7 @@ def _segment_compartments_3d(
     boundary_threshold=0.4,
     n_slices_exclude=0,
     min_z_extent=10,
+    postprocess_segments=False,
 ):
     distances = distance_transform_edt(prediction < boundary_threshold).astype("float32")
     seg_2d = np.zeros(prediction.shape, dtype="uint32")
@@ -132,7 +139,8 @@ def _segment_compartments_3d(
         seg_2d[z] = seg_z
 
     seg = _merge_segmentation_3d(seg_2d, min_z_extent)
-    seg = _postprocess_seg_3d(seg)
+    if postprocess_segments:
+        seg = _postprocess_seg_3d(seg)
 
     # import napari
     # v = napari.Viewer()
@@ -155,6 +163,9 @@ def segment_compartments(
     scale: Optional[List[float]] = None,
     mask: Optional[np.ndarray] = None,
     n_slices_exclude: int = 0,
+    boundary_threshold: float = 0.4,
+    min_z_extent: int = 10,
+    postprocess_segments: bool = False,
     **kwargs,
 ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
     """
@@ -194,9 +205,14 @@ def segment_compartments(
     # We may want to expose some of the parameters here.
     t0 = time.time()
     if input_volume.ndim == 2:
-        seg = _segment_compartments_2d(pred)
+        seg = _segment_compartments_2d(pred, boundary_threshold=boundary_threshold)
     else:
-        seg = _segment_compartments_3d(pred, n_slices_exclude=n_slices_exclude)
+        seg = _segment_compartments_3d(
+            pred,
+            boundary_threshold=boundary_threshold,
+            n_slices_exclude=n_slices_exclude,
+            postprocess_segments=postprocess_segments,
+        )
     if verbose:
         print("Run segmentation in", time.time() - t0, "s")
 

From f893d2300fab14a737b3d85adecb72af04644e22 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Wed, 13 Nov 2024 12:05:09 +0100
Subject: [PATCH 10/35] updating data analysis on smaller details

---
 .gitignore                                    |   1 +
 scripts/cooper/AZ_segmentation_h5.py          |   2 +-
 scripts/cooper/analysis/run_analysis_1.py     | 106 +++++++++++++++++-
 .../run_spatial_distribution_analysis.py      |  64 +++++++++--
 scripts/cooper/compartment_segmentation_h5.py |   2 +-
 .../postprocessing/postprocess_AZ.py          |  36 +++---
 6 files changed, 185 insertions(+), 26 deletions(-)

diff --git a/.gitignore b/.gitignore
index 4db569e..0377c4a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ models/*/
 run_sbatch.sbatch
 slurm/
 scripts/cooper/evaluation_results/
+analysis_results/
 scripts/cooper/training/copy_testset.py
 scripts/rizzoli/upsample_data.py
 scripts/cooper/training/find_rec_testset.py
diff --git a/scripts/cooper/AZ_segmentation_h5.py b/scripts/cooper/AZ_segmentation_h5.py
index baa7225..da694c1 100644
--- a/scripts/cooper/AZ_segmentation_h5.py
+++ b/scripts/cooper/AZ_segmentation_h5.py
@@ -52,7 +52,7 @@ def run_AZ_segmentation(input_path, output_path, model_path, mask_path, mask_key
         intersection = None
     else:
         with open_file(compartment_seg, "r") as f:
-            compartment = f["/compartments/segment_from_3Dmodel_v1"][:]
+            compartment = f["/labels/compartment"][:]
         foreground, intersection = segment_AZ(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, mask = mask, compartment=compartment)
 
     seg_output = _require_output_folders(output_path)
diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py
index 523aba1..b166a71 100644
--- a/scripts/cooper/analysis/run_analysis_1.py
+++ b/scripts/cooper/analysis/run_analysis_1.py
@@ -5,15 +5,56 @@
 import os
 from glob import glob
 
+import numpy as np
 import pandas as pd
 import h5py
 from tqdm import tqdm
 from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres
 
-DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014"  # noqa
-PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/20241102_TOMO_DATA_Imig2014"  # noqa
-RESULT_FOLDER = "./analysis_results/analysis_1"
+DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
+PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
+RESULT_FOLDER = "./analysis_results/AZ_intersect_autoCompartment"
 
+def get_compartment_with_max_overlap(compartments, vesicles):
+    """
+    Given 3D numpy arrays of compartments and vesicles, this function returns a binary mask
+    of the compartment with the most overlap with vesicles based on the number of overlapping voxels.
+    
+    Parameters:
+    compartments (numpy.ndarray): 3D array of compartment labels.
+    vesicles (numpy.ndarray): 3D array of vesicle labels or binary mask.
+
+    Returns:
+    numpy.ndarray: Binary mask of the compartment with the most overlap with vesicles.
+    """
+    
+    unique_compartments = np.unique(compartments)
+    if 0 in unique_compartments:
+        unique_compartments = unique_compartments[unique_compartments != 0]
+
+    max_overlap_count = 0
+    best_compartment = None
+
+    # Iterate over each compartment and calculate the overlap with vesicles
+    for compartment_label in unique_compartments:
+        # Create a binary mask for the current compartment
+        compartment_mask = compartments == compartment_label
+        vesicle_mask = vesicles > 0 
+
+        intersection = np.logical_and(compartment_mask, vesicle_mask)
+        
+        # Calculate the number of overlapping voxels
+        overlap_count = np.sum(intersection)
+        
+        # Track the compartment with the most overlap in terms of voxel count
+        if overlap_count > max_overlap_count:
+            max_overlap_count = overlap_count
+            best_compartment = compartment_label
+
+    # Create the final mask for the compartment with the most overlap
+    final_mask = compartments == best_compartment
+
+    return final_mask
 
 # We compute the sizes for all vesicles in the compartment masks.
 # We use the same logic in the size computation as for the vesicle extraction to IMOD,
@@ -57,9 +98,66 @@ def compute_sizes_for_all_tomorams():
         })
         result.to_csv(output_path, index=False)
 
+def compute_sizes_for_all_tomorams_manComp():
+    os.makedirs(RESULT_FOLDER, exist_ok=True)
+
+    resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
+    radius_factor = 1.3
+    estimate_radius_2d = True
+
+    # Dictionary to hold the results for each dataset
+    dataset_results = {}
+
+    tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
+    for tomo in tqdm(tomograms):
+        ds_name, fname = os.path.split(tomo)
+        ds_name = os.path.split(ds_name)[1]
+        fname = os.path.splitext(fname)[0]
+        # Initialize a new dictionary entry for each dataset if not already present
+        if ds_name not in dataset_results:
+            dataset_results[ds_name] = {}
+        
+        # Skip if this tomogram already exists in the dataset dictionary
+        if fname in dataset_results[ds_name]:
+            continue
+
+        # Load the vesicle segmentation from the predictions.
+        with h5py.File(tomo, "r") as f:
+            segmentation = f["/vesicles/segment_from_combined_vesicles"][:]
+
+        input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5")
+        assert os.path.exists(input_path), input_path
+        # Load the compartment mask from the tomogram
+        with h5py.File(input_path, "r") as f:
+            compartments  = f["/compartments/segment_from_3Dmodel_v2"][:]
+        mask = get_compartment_with_max_overlap(compartments, segmentation)
+        
+        #if more than half of the vesicles (approximation, its checking pixel and not label) would get filtered by mask it means the compartment seg didn't work and thus we won't use the mask
+        if np.sum(segmentation[mask == 0] > 0) > (0.5 * np.sum(segmentation > 0)):
+            print("using no mask")
+        else:
+            segmentation[mask == 0] = 0
+        _, sizes = convert_segmentation_to_spheres(
+            segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d
+        )
+
+        # Add sizes to the dataset dictionary under the tomogram name
+        dataset_results[ds_name][fname] = sizes
+
+        # Save each dataset's results to a single CSV file
+        for ds_name, tomogram_data in dataset_results.items():
+            # Create a DataFrame where each column is a tomogram's sizes
+            result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose()
+            
+            # Define the output file path
+            output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}.csv")
+            
+            # Save the DataFrame to CSV
+            result_df.to_csv(output_path, index=False)
 
 def main():
-    compute_sizes_for_all_tomorams()
+    #compute_sizes_for_all_tomorams()
+    compute_sizes_for_all_tomorams_manComp()
 
 
 if __name__ == "__main__":
diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
index f8d0708..fca7eed 100644
--- a/scripts/cooper/analysis/run_spatial_distribution_analysis.py
+++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
@@ -4,12 +4,54 @@
 import h5py
 from tqdm import tqdm
 from synaptic_reconstruction.distance_measurements import measure_segmentation_to_object_distances
+import numpy as np
 
-DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014"  # noqa
-PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/20241102_TOMO_DATA_Imig2014"  # noqa
-RESULT_FOLDER = "./analysis_results"
+DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
+PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
+RESULT_FOLDER = "./analysis_results/AZ_intersect_autoCompartment"
 
 
+def get_compartment_with_max_overlap(compartments, vesicles):
+    """
+    Given 3D numpy arrays of compartments and vesicles, this function returns a binary mask
+    of the compartment with the most overlap with vesicles based on the number of overlapping voxels.
+    
+    Parameters:
+    compartments (numpy.ndarray): 3D array of compartment labels.
+    vesicles (numpy.ndarray): 3D array of vesicle labels or binary mask.
+
+    Returns:
+    numpy.ndarray: Binary mask of the compartment with the most overlap with vesicles.
+    """
+    
+    unique_compartments = np.unique(compartments)
+    if 0 in unique_compartments:
+        unique_compartments = unique_compartments[unique_compartments != 0]
+
+    max_overlap_count = 0
+    best_compartment = None
+
+    # Iterate over each compartment and calculate the overlap with vesicles
+    for compartment_label in unique_compartments:
+        # Create a binary mask for the current compartment
+        compartment_mask = compartments == compartment_label
+        vesicle_mask = vesicles > 0 
+
+        intersection = np.logical_and(compartment_mask, vesicle_mask)
+        
+        # Calculate the number of overlapping voxels
+        overlap_count = np.sum(intersection)
+        
+        # Track the compartment with the most overlap in terms of voxel count
+        if overlap_count > max_overlap_count:
+            max_overlap_count = overlap_count
+            best_compartment = compartment_label
+
+    # Create the final mask for the compartment with the most overlap
+    final_mask = compartments == best_compartment
+
+    return final_mask
+
 # We compute the distances for all vesicles in the compartment masks to the AZ.
 # We use the same different resolution, depending on dataset.
 # The closest distance is calculated, i.e., the closest point on the outer membrane of the vesicle to the AZ.
@@ -38,16 +80,24 @@ def compute_sizes_for_all_tomorams():
         # Load the vesicle segmentation from the predictions
         with h5py.File(tomo, "r") as f:
             segmentation = f["/vesicles/segment_from_combined_vesicles"][:]
-            segmented_object = f["/AZ/segment_from_AZmodel_v3"][:]
+            segmented_object = f["/AZ/compartment_AZ_intersection"][:]
+            #if AZ intersect is small, compartment seg didn't align with AZ so we use the normal AZ and not intersect
+            if (segmented_object == 0).all() or np.sum(segmented_object == 1) < 2000:
+                segmented_object = f["/AZ/segment_from_AZmodel_v3"][:]
 
         input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5")
         assert os.path.exists(input_path), input_path
 
         # Load the compartment mask from the tomogram
         with h5py.File(input_path, "r") as f:
-            mask = f["labels/compartment"][:]
-
-        segmentation[mask == 0] = 0
+            compartments  = f["/compartments/segment_from_3Dmodel_v2"][:]
+        mask = get_compartment_with_max_overlap(compartments, segmentation)
+        
+        #if more than half of the vesicles (approximation, its checking pixel and not label) would get filtered by mask it means the compartment seg didn't work and thus we won't use the mask
+        if np.sum(segmentation[mask == 0] > 0) > (0.5 * np.sum(segmentation > 0)):
+            print("using no mask")
+        else:
+            segmentation[mask == 0] = 0
         distances, _, _, _ = measure_segmentation_to_object_distances(
             segmentation, segmented_object=segmented_object, resolution=resolution
         )
diff --git a/scripts/cooper/compartment_segmentation_h5.py b/scripts/cooper/compartment_segmentation_h5.py
index 1d0020a..573ac48 100644
--- a/scripts/cooper/compartment_segmentation_h5.py
+++ b/scripts/cooper/compartment_segmentation_h5.py
@@ -36,7 +36,7 @@ def run_compartment_segmentation(input_path, output_path, model_path, tile_shape
     print(f"using tiling {tiling}")
     input = get_volume(input_path)
 
-    segmentation, prediction = segment_compartments(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, return_predictions=True, scale=[0.25, 0.25, 0.25])
+    segmentation, prediction = segment_compartments(input_volume=input, model_path=model_path, verbose=False, tiling=tiling, return_predictions=True, scale=[0.25, 0.25, 0.25],boundary_threshold=0.2, postprocess_segments=False)
 
     seg_output = _require_output_folders(output_path)
     file_name = Path(input_path).stem
diff --git a/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py b/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py
index 54d9e2c..8ef2cd3 100644
--- a/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py
+++ b/synaptic_reconstruction/inference/postprocessing/postprocess_AZ.py
@@ -1,25 +1,35 @@
 import numpy as np
-from scipy.ndimage import binary_erosion
+from skimage.segmentation import find_boundaries
 
 def find_intersection_boundary(segmented_AZ, segmented_compartment):
     """
-    Find the intersection of the boundary of segmented_compartment with segmented_AZ.
+    Find the cumulative intersection of the boundary of each label in segmented_compartment with segmented_AZ.
 
     Parameters:
     segmented_AZ (numpy.ndarray): 3D array representing the active zone (AZ).
-    segmented_compartment (numpy.ndarray): 3D array representing the compartment.
+    segmented_compartment (numpy.ndarray): 3D array representing the compartment, with multiple labels.
 
     Returns:
-    numpy.ndarray: 3D array with the intersection of the boundary of segmented_compartment and segmented_AZ.
+    numpy.ndarray: 3D array with the cumulative intersection of all boundaries of segmented_compartment labels with segmented_AZ.
     """
-    # Step 0: Binarize the segmented_compartment
-    binarized_compartment = (segmented_compartment > 0).astype(int)
+    # Step 0: Initialize an empty array to accumulate intersections
+    cumulative_intersection = np.zeros_like(segmented_AZ, dtype=bool)
     
-    # Step 1: Create a binary mask of the compartment boundary
-    eroded_compartment = binary_erosion(binarized_compartment)
-    boundary_compartment = binarized_compartment - eroded_compartment
-    
-    # Step 2: Find the intersection with the AZ
-    intersection = np.logical_and(boundary_compartment, segmented_AZ)
+    # Step 1: Loop through each unique label in segmented_compartment (excluding 0 if it represents background)
+    labels = np.unique(segmented_compartment)
+    labels = labels[labels != 0]  # Exclude background label (0) if necessary
+
+    for label in labels:
+        # Step 2: Create a binary mask for the current label
+        label_mask = (segmented_compartment == label)
+        
+        # Step 3: Find the boundary of the current label's compartment
+        boundary_compartment = find_boundaries(label_mask, mode='outer')
+        
+        # Step 4: Find the intersection with the AZ for this label's boundary
+        intersection = np.logical_and(boundary_compartment, segmented_AZ)
+        
+        # Step 5: Accumulate intersections for each label
+        cumulative_intersection = np.logical_or(cumulative_intersection, intersection)
     
-    return intersection.astype(int)  # Convert boolean array to int (1 for intersecting points, 0 elsewhere)
+    return cumulative_intersection.astype(int)  # Convert boolean array to int (1 for intersecting points, 0 elsewhere)

From 08c56b9c08bb244869ad77d689efcc5ca294dfa8 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Wed, 13 Nov 2024 13:03:23 +0100
Subject: [PATCH 11/35] minor updates data analysis

---
 scripts/cooper/analysis/run_analysis_1.py     | 44 ++++++++-----
 .../run_spatial_distribution_analysis.py      | 62 +++++++++++++++++--
 2 files changed, 87 insertions(+), 19 deletions(-)

diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py
index b166a71..459d490 100644
--- a/scripts/cooper/analysis/run_analysis_1.py
+++ b/scripts/cooper/analysis/run_analysis_1.py
@@ -11,9 +11,9 @@
 from tqdm import tqdm
 from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres
 
-DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
-PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
-RESULT_FOLDER = "./analysis_results/AZ_intersect_autoCompartment"
+DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/"  # noqa
+PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp"  # noqa
+RESULT_FOLDER = "./analysis_results/AZ_intersect_manualCompartment"
 
 def get_compartment_with_max_overlap(compartments, vesicles):
     """
@@ -60,20 +60,27 @@ def get_compartment_with_max_overlap(compartments, vesicles):
 # We use the same logic in the size computation as for the vesicle extraction to IMOD,
 # including the radius correction factor.
 # The number of vesicles is automatically computed as the length of the size list.
-def compute_sizes_for_all_tomorams():
+def compute_sizes_for_all_tomorams_manComp():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
 
-    resolution = (0.8681,) * 3 #change for each dataset
+    resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
     radius_factor = 1.3
     estimate_radius_2d = True
 
+    # Dictionary to hold the results for each dataset
+    dataset_results = {}
+
     tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
     for tomo in tqdm(tomograms):
         ds_name, fname = os.path.split(tomo)
         ds_name = os.path.split(ds_name)[1]
         fname = os.path.splitext(fname)[0]
-        output_path = os.path.join(RESULT_FOLDER, f"{ds_name}_{fname}.csv")
-        if os.path.exists(output_path):
+        # Initialize a new dictionary entry for each dataset if not already present
+        if ds_name not in dataset_results:
+            dataset_results[ds_name] = {}
+        
+        # Skip if this tomogram already exists in the dataset dictionary
+        if fname in dataset_results[ds_name]:
             continue
 
         # Load the vesicle segmentation from the predictions.
@@ -91,14 +98,21 @@ def compute_sizes_for_all_tomorams():
             segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d
         )
 
-        result = pd.DataFrame({
-            "dataset": [ds_name] * len(sizes),
-            "tomogram": [fname] * len(sizes),
-            "sizes": sizes
-        })
-        result.to_csv(output_path, index=False)
+        # Add sizes to the dataset dictionary under the tomogram name
+        dataset_results[ds_name][fname] = sizes
 
-def compute_sizes_for_all_tomorams_manComp():
+        # Save each dataset's results to a single CSV file
+        for ds_name, tomogram_data in dataset_results.items():
+            # Create a DataFrame where each column is a tomogram's sizes
+            result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose()
+            
+            # Define the output file path
+            output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}.csv")
+            
+            # Save the DataFrame to CSV
+            result_df.to_csv(output_path, index=False)
+
+def compute_sizes_for_all_tomorams_autoComp():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
 
     resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
@@ -156,8 +170,8 @@ def compute_sizes_for_all_tomorams_manComp():
             result_df.to_csv(output_path, index=False)
 
 def main():
-    #compute_sizes_for_all_tomorams()
     compute_sizes_for_all_tomorams_manComp()
+    #compute_sizes_for_all_tomorams_autoComp()
 
 
 if __name__ == "__main__":
diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
index fca7eed..9a890a1 100644
--- a/scripts/cooper/analysis/run_spatial_distribution_analysis.py
+++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
@@ -6,9 +6,9 @@
 from synaptic_reconstruction.distance_measurements import measure_segmentation_to_object_distances
 import numpy as np
 
-DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
-PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
-RESULT_FOLDER = "./analysis_results/AZ_intersect_autoCompartment"
+DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/"  # noqa
+PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp"  # noqa
+RESULT_FOLDER = "./analysis_results/AZ_intersect_manualCompartment"
 
 
 def get_compartment_with_max_overlap(compartments, vesicles):
@@ -116,9 +116,63 @@ def compute_sizes_for_all_tomorams():
         # Save the DataFrame to CSV
         result_df.to_csv(output_path, index=False)
 
+
+def compute_sizes_for_all_tomorams_manComp():
+    os.makedirs(RESULT_FOLDER, exist_ok=True)
+    
+    resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
+    
+    # Dictionary to hold the results for each dataset
+    dataset_results = {}
+    
+    tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
+    for tomo in tqdm(tomograms):
+        ds_name, fname = os.path.split(tomo)
+        ds_name = os.path.split(ds_name)[1]
+        fname = os.path.splitext(fname)[0]
         
+        # Initialize a new dictionary entry for each dataset if not already present
+        if ds_name not in dataset_results:
+            dataset_results[ds_name] = {}
+        
+        # Skip if this tomogram already exists in the dataset dictionary
+        if fname in dataset_results[ds_name]:
+            continue
+
+        # Load the vesicle segmentation from the predictions
+        with h5py.File(tomo, "r") as f:
+            segmentation = f["/vesicles/segment_from_combined_vesicles"][:]
+            segmented_object = f["/AZ/compartment_AZ_intersection_manComp"][:]
+
+        input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5")
+        assert os.path.exists(input_path), input_path
+
+        # Load the compartment mask from the tomogram
+        with h5py.File(input_path, "r") as f:
+            mask  = f["/labels/compartment"][:]
+
+        segmentation[mask == 0] = 0
+        distances, _, _, _ = measure_segmentation_to_object_distances(
+            segmentation, segmented_object=segmented_object, resolution=resolution
+        )
+
+        # Add distances to the dataset dictionary under the tomogram name
+        dataset_results[ds_name][fname] = distances
+
+    # Save each dataset's results to a single CSV file
+    for ds_name, tomogram_data in dataset_results.items():
+        # Create a DataFrame where each column is a tomogram's distances
+        result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose()
+        
+        # Define the output file path
+        output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}.csv")
+        
+        # Save the DataFrame to CSV
+        result_df.to_csv(output_path, index=False)
+
 def main():
-    compute_sizes_for_all_tomorams()
+    #compute_sizes_for_all_tomorams()
+    compute_sizes_for_all_tomorams_manComp()
 
 
 if __name__ == "__main__":

From 36d834fd4a92571819be466ddccf405e311fffc2 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Thu, 14 Nov 2024 15:25:32 +0100
Subject: [PATCH 12/35] Implement inner ear analysis WIP

---
 .../inner_ear/analysis/analyze_distances.py   |  0
 .../analysis/analyze_vesicle_pools.py         | 73 +++++++++++++++++++
 scripts/inner_ear/processing/run_analyis.py   | 41 +++++++----
 3 files changed, 100 insertions(+), 14 deletions(-)
 create mode 100644 scripts/inner_ear/analysis/analyze_distances.py
 create mode 100644 scripts/inner_ear/analysis/analyze_vesicle_pools.py

diff --git a/scripts/inner_ear/analysis/analyze_distances.py b/scripts/inner_ear/analysis/analyze_distances.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/inner_ear/analysis/analyze_vesicle_pools.py b/scripts/inner_ear/analysis/analyze_vesicle_pools.py
new file mode 100644
index 0000000..b53a9f6
--- /dev/null
+++ b/scripts/inner_ear/analysis/analyze_vesicle_pools.py
@@ -0,0 +1,73 @@
+import sys
+
+import numpy as np
+import pandas as pd
+
+sys.path.append("..")
+sys.path.append("../processing")
+
+from combine_measurements import combine_manual_results, combine_automatic_results  # noqa
+# from compare_pool_assignments import create_manual_assignment
+from parse_table import parse_table, get_data_root  # noqa
+
+
+def get_manual_assignments():
+    result_path = "../results/20240917_1/fully_manual_analysis_results.xlsx"
+    results = pd.read_excel(result_path)
+    return results
+
+
+def get_automatic_assignments(tomograms):
+    result_path = "../results/20240917_1/automatic_analysis_results.xlsx"
+    results = pd.read_excel(result_path)
+    results = results[results["tomogram"].isin(tomograms)]
+    return results
+
+
+def plot_confusion_matrix(manual_assignments, automatic_assignments):
+    pass
+
+
+def for_tomos_with_annotation():
+    manual_assignments = get_manual_assignments()
+    manual_tomograms = pd.unique(manual_assignments["tomogram"])
+    automatic_assignments = get_automatic_assignments(manual_tomograms)
+
+    tomograms = pd.unique(automatic_assignments["tomogram"])
+    manual_assignments = manual_assignments[manual_assignments["tomogram"].isin(tomograms)]
+    assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(automatic_assignments["tomogram"]))
+
+    n_tomograms = len(tomograms)
+    pool_names, manual_pool_counts = np.unique(manual_assignments["pool"].values, return_counts=True)
+    _, automatic_pool_counts = np.unique(automatic_assignments["pool"].values, return_counts=True)
+
+    manual_pool_counts = manual_pool_counts.astype("float32")
+    manual_pool_counts /= n_tomograms
+    automatic_pool_counts = automatic_pool_counts.astype("float32")
+    automatic_pool_counts /= n_tomograms
+
+    print(pool_names)
+    print(manual_pool_counts)
+    print(automatic_pool_counts)
+
+    # TODO plot as a bar chart
+    # TODO save excel
+    # TODO add 'more automatic' results
+
+    breakpoint()
+
+
+# TODO
+def for_all_tomos():
+    pass
+
+
+def main():
+    # data_root = get_data_root()
+    # table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx")
+    # table = parse_table(table_path, data_root)
+    for_tomos_with_annotation()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/inner_ear/processing/run_analyis.py b/scripts/inner_ear/processing/run_analyis.py
index baeade1..8508673 100644
--- a/scripts/inner_ear/processing/run_analyis.py
+++ b/scripts/inner_ear/processing/run_analyis.py
@@ -334,8 +334,7 @@ def _insert_missing_vesicles(vesicle_path, original_vesicle_path, pool_correctio
     imageio.imwrite(vesicle_path, vesicles)
 
 
-# TODO adapt to segmentation without PD
-def analyze_folder(folder, version, n_ribbons, force):
+def analyze_folder(folder, version, n_ribbons, force, use_corrected_vesicles):
     data_path = get_data_path(folder)
     output_folder = os.path.join(folder, "automatisch", f"v{version}")
 
@@ -352,12 +351,20 @@ def analyze_folder(folder, version, n_ribbons, force):
     correction_folder = _match_correction_folder(folder)
     if os.path.exists(correction_folder):
         output_folder = correction_folder
-        result_path = os.path.join(output_folder, "measurements.xlsx")
+
+        if use_corrected_vesicles:
+            result_path = os.path.join(output_folder, "measurements.xlsx")
+        else:
+            result_path = os.path.join(output_folder, "measurements_uncorrected_assignments.xlsx")
+
         if os.path.exists(result_path) and not force:
             return
 
         print("Analyse the corrected segmentations from", correction_folder)
         for seg_name in segmentation_names:
+            if seg_name == "vesicles" and not use_corrected_vesicles:
+                continue
+
             seg_path = _match_correction_file(correction_folder, seg_name)
             if os.path.exists(seg_path):
 
@@ -371,7 +378,10 @@ def analyze_folder(folder, version, n_ribbons, force):
 
                 segmentation_paths[seg_name] = seg_path
 
-    result_path = os.path.join(output_folder, "measurements.xlsx")
+    if use_corrected_vesicles:
+        result_path = os.path.join(output_folder, "measurements.xlsx")
+    else:
+        result_path = os.path.join(output_folder, "measurements_uncorrected_assignments.xlsx")
     if os.path.exists(result_path) and not force:
         return
 
@@ -398,7 +408,7 @@ def analyze_folder(folder, version, n_ribbons, force):
         )
 
 
-def run_analysis(table, version, force=False, val_table=None):
+def run_analysis(table, version, force=False, val_table=None, use_corrected_vesicles=True):
     for i, row in tqdm(table.iterrows(), total=len(table)):
         folder = row["Local Path"]
         if folder == "":
@@ -426,19 +436,19 @@ def run_analysis(table, version, force=False, val_table=None):
 
         micro = row["EM alt vs. Neu"]
         if micro == "beides":
-            analyze_folder(folder, version, n_ribbons, force=force)
+            analyze_folder(folder, version, n_ribbons, force=force, use_corrected_vesicles=use_corrected_vesicles)
 
             folder_new = os.path.join(folder, "Tomo neues EM")
             if not os.path.exists(folder_new):
                 folder_new = os.path.join(folder, "neues EM")
             assert os.path.exists(folder_new), folder_new
-            analyze_folder(folder_new, version, n_ribbons, force=force)
+            analyze_folder(folder_new, version, n_ribbons, force=force, use_corrected_vesicles=use_corrected_vesicles)
 
         elif micro == "alt":
-            analyze_folder(folder, version, n_ribbons, force=force)
+            analyze_folder(folder, version, n_ribbons, force=force, use_corrected_vesicles=use_corrected_vesicles)
 
         elif micro == "neu":
-            analyze_folder(folder, version, n_ribbons, force=force)
+            analyze_folder(folder, version, n_ribbons, force=force, use_corrected_vesicles=use_corrected_vesicles)
 
 
 def main():
@@ -447,13 +457,16 @@ def main():
     table = parse_table(table_path, data_root)
 
     version = 2
-    force = True
+    force = False
+    use_corrected_vesicles = False
 
-    val_table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx")
-    val_table = pandas.read_excel(val_table_path)
-    # val_table = None
+    # val_table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx")
+    # val_table = pandas.read_excel(val_table_path)
+    val_table = None
 
-    run_analysis(table, version, force=force, val_table=val_table)
+    run_analysis(
+        table, version, force=force, val_table=val_table, use_corrected_vesicles=use_corrected_vesicles
+    )
 
 
 if __name__ == "__main__":

From 49d1b7ce8834dd9e81c594f7a3e96f91f6b2208e Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Thu, 14 Nov 2024 16:52:48 +0100
Subject: [PATCH 13/35] calculation of AZ area

---
 scripts/cooper/analysis/calc_AZ_area.py | 227 ++++++++++++++++++++++++
 1 file changed, 227 insertions(+)
 create mode 100644 scripts/cooper/analysis/calc_AZ_area.py

diff --git a/scripts/cooper/analysis/calc_AZ_area.py b/scripts/cooper/analysis/calc_AZ_area.py
new file mode 100644
index 0000000..e9fcb52
--- /dev/null
+++ b/scripts/cooper/analysis/calc_AZ_area.py
@@ -0,0 +1,227 @@
+import h5py
+import numpy as np
+import os
+import csv
+from scipy.ndimage import binary_opening, median_filter,zoom, binary_closing
+from skimage.measure import label, regionprops
+from synaptic_reconstruction.morphology import compute_object_morphology
+from skimage.morphology import ball
+from scipy.spatial import ConvexHull
+from skimage.draw import polygon
+
+def calculate_AZ_area_per_slice(AZ_slice, pixel_size_nm=1.554):
+    """
+    Calculate the area of the AZ in a single 2D slice after applying error-reducing processing.
+    
+    Parameters:
+    - AZ_slice (numpy array): 2D array representing a single slice of the AZ segmentation.
+    - pixel_size_nm (float): Size of a pixel in nanometers.
+    
+    Returns:
+    - slice_area_nm2 (float): The area of the AZ in the slice in square nanometers.
+    """
+    # Apply binary opening or median filter to reduce small segmentation errors
+    AZ_slice_filtered = binary_opening(AZ_slice, structure=np.ones((3, 3))).astype(int)
+    
+    # Calculate area in this slice
+    num_AZ_pixels = np.sum(AZ_slice_filtered == 1)
+    slice_area_nm2 = num_AZ_pixels * (pixel_size_nm ** 2)
+    
+    return slice_area_nm2
+
+def calculate_total_AZ_area(tomo_path, pixel_size_nm=1.554):
+    """
+    Calculate the total area of the AZ across all slices in a 3D tomogram file.
+    
+    Parameters:
+    - tomo_path (str): Path to the tomogram file (HDF5 format).
+    - pixel_size_nm (float): Size of a pixel in nanometers.
+    
+    Returns:
+    - total_AZ_area_nm2 (float): The total area of the AZ in square nanometers.
+    """
+    with h5py.File(tomo_path, "r") as f:
+        AZ_intersect_seg = f["/AZ/compartment_AZ_intersection_manComp"][:]
+
+    # Calculate the AZ area for each slice along the z-axis
+    total_AZ_area_nm2 = 0
+    for z_slice in AZ_intersect_seg:
+        slice_area_nm2 = calculate_AZ_area_per_slice(z_slice, pixel_size_nm)
+        total_AZ_area_nm2 += slice_area_nm2
+
+    return total_AZ_area_nm2
+
+def calculate_AZ_area_simple(tomo_path, pixel_size_nm=1.554):
+    """
+    Calculate the volume of the AZ (active zone) in a 3D tomogram file.
+    
+    Parameters:
+    - tomo_path (str): Path to the tomogram file (HDF5 format).
+    - pixel_size_nm (float): Size of a pixel in nanometers (default is 1.554 nm).
+    
+    Returns:
+    - AZ_volume_nm3 (float): The volume of the AZ in cubic nanometers.
+    """
+    # Open the file and read the AZ intersection segmentation data
+    with h5py.File(tomo_path, "r") as f:
+        AZ_intersect_seg = f["/AZ/compartment_AZ_intersection_manComp"][:]
+
+    # Count voxels with label = 1
+    num_AZ_voxels = np.sum(AZ_intersect_seg == 1)
+
+    # Calculate the volume in cubic nanometers
+    AZ_area_nm2 = num_AZ_voxels * (pixel_size_nm ** 2)
+
+    return AZ_area_nm2
+
+def calculate_AZ_surface(tomo_path, pixel_size_nm=1.554):
+    with h5py.File(tomo_path, "r") as f:
+        AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:]
+    
+    # Apply binary closing to smooth the segmented regions
+    struct_elem = ball(1)  # Use a small 3D structuring element
+    AZ_seg_smoothed = binary_closing(AZ_seg > 0, structure=struct_elem, iterations=20)
+
+    labeled_seg = label(AZ_seg_smoothed)
+
+    regions = regionprops(labeled_seg)
+    if regions:
+        # Sort regions by area and get the label of the largest region
+        largest_region = max(regions, key=lambda r: r.area)
+        largest_label = largest_region.label
+
+        largest_component_mask = (labeled_seg == largest_label)
+        AZ_seg_filtered = largest_component_mask.astype(np.uint8)
+
+    else:
+        # If no regions found, return an empty array
+        AZ_seg_filtered = np.zeros_like(AZ_seg_interp, dtype=np.uint8)
+    
+    morphology_data = compute_object_morphology(AZ_seg_filtered, "AZ Structure", resolution=(pixel_size_nm, pixel_size_nm, pixel_size_nm))
+    surface_column = "surface [nm^2]" #if resolution is not None else "surface [pixel^2]"
+    surface_area = morphology_data[surface_column].iloc[0]
+
+    return surface_area
+
+def calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm=1.554):
+    with h5py.File(tomo_path, "r") as f:
+        AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:]
+
+    # Apply binary closing to smooth the segmented regions
+    struct_elem = ball(1)  # Use a small 3D structuring element
+    AZ_seg_smoothed = binary_closing(AZ_seg > 0, structure=struct_elem, iterations=10)
+
+    labeled_seg = label(AZ_seg_smoothed)
+
+    regions = regionprops(labeled_seg)
+    if regions:
+        # Sort regions by area and get the label of the largest region
+        largest_region = max(regions, key=lambda r: r.area)
+        largest_label = largest_region.label
+
+        largest_component_mask = (labeled_seg == largest_label)
+        AZ_seg_filtered = largest_component_mask.astype(np.uint8)
+    AZ_seg = AZ_seg_filtered
+    # Extract coordinates of non-zero points
+    points = np.argwhere(AZ_seg > 0)  # Get the coordinates of non-zero (foreground) pixels
+
+    if points.shape[0] < 4:
+        # ConvexHull requires at least 4 points in 3D to form a valid hull
+        AZ_seg_filtered = np.zeros_like(AZ_seg, dtype=np.uint8)
+    else:
+        # Apply ConvexHull to the points
+        hull = ConvexHull(points)
+
+        # Create a binary mask for the convex hull
+        convex_hull_mask = np.zeros_like(AZ_seg, dtype=bool)
+
+        # Iterate over each simplex (facet) of the convex hull and fill in the polygon
+        for simplex in hull.simplices:
+            # For each face of the convex hull, extract the vertices and convert to a 2D polygon
+            polygon_coords = points[simplex]
+            rr, cc = polygon(polygon_coords[:, 0], polygon_coords[:, 1])
+            convex_hull_mask[rr, cc] = True
+        
+        # Optional: Label the convex hull mask
+        labeled_seg = label(convex_hull_mask)
+        regions = regionprops(labeled_seg)
+
+        if regions:
+            # Sort regions by area and get the label of the largest region
+            largest_region = max(regions, key=lambda r: r.area)
+            largest_label = largest_region.label
+
+            largest_component_mask = (labeled_seg == largest_label)
+            AZ_seg_filtered = largest_component_mask.astype(np.uint8)
+
+        else:
+            AZ_seg_filtered = np.zeros_like(AZ_seg, dtype=np.uint8)
+
+    # Calculate surface area
+    morphology_data = compute_object_morphology(AZ_seg_filtered, "AZ Structure", resolution=(pixel_size_nm, pixel_size_nm, pixel_size_nm))
+    surface_column = "surface [nm^2]"
+    surface_area = morphology_data[surface_column].iloc[0]
+
+    return surface_area
+
+def process_datasets(folder_path, output_csv="AZ_areas.csv", pixel_size_nm=1.554):
+    """
+    Process all tomograms in multiple datasets within a folder and save results to a CSV.
+    
+    Parameters:
+    - folder_path (str): Path to the folder containing dataset folders with tomograms.
+    - output_csv (str): Filename for the output CSV file.
+    - pixel_size_nm (float): Size of a pixel in nanometers.
+    """
+    results = []
+
+    # Iterate over each dataset folder
+    for dataset_name in os.listdir(folder_path):
+        dataset_path = os.path.join(folder_path, dataset_name)
+        
+        # Check if it's a directory (skip files in the main folder)
+        if not os.path.isdir(dataset_path):
+            continue
+        
+        # Iterate over each tomogram file in the dataset folder
+        for tomo_file in os.listdir(dataset_path):
+            tomo_path = os.path.join(dataset_path, tomo_file)
+            
+            # Check if the file is an HDF5 file (optional)
+            if tomo_file.endswith(".h5") or tomo_file.endswith(".hdf5"):
+                try:
+                    # Calculate AZ area
+                    #AZ_area = calculate_total_AZ_area(tomo_path, pixel_size_nm)
+                    #AZ_area = calculate_AZ_area_simple(tomo_path, pixel_size_nm)
+                    #AZ_surface_area = calculate_AZ_surface(tomo_path, pixel_size_nm)
+                    AZ_surface_area = calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm)
+                    # Append results to list
+                    results.append({
+                        "Dataset": dataset_name,
+                        "Tomogram": tomo_file,
+                        "AZ_surface_area": AZ_surface_area
+                    })
+                except Exception as e:
+                    print(f"Error processing {tomo_file} in {dataset_name}: {e}")
+    
+    # Write results to a CSV file
+    with open(output_csv, mode="w", newline="") as csvfile:
+        fieldnames = ["Dataset", "Tomogram", "AZ_surface_area"]
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+        
+        writer.writeheader()
+        for result in results:
+            writer.writerow(result)
+    
+    print(f"Results saved to {output_csv}")
+
+def main():
+    # Define the path to the folder containing dataset folders
+    folder_path = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp"
+    output_csv = "./analysis_results/AZ_intersect_manualCompartment/AZ_surface_area.csv"
+    # Call the function to process datasets and save results
+    process_datasets(folder_path, output_csv = output_csv)
+
+# Call main
+if __name__ == "__main__":
+    main()

From 8a515d1704409f328d2fffd7cbc99e9d63986ba5 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Thu, 14 Nov 2024 18:01:29 +0100
Subject: [PATCH 14/35] corrected radius factor

---
 scripts/cooper/analysis/run_analysis_1.py | 53 ++++++++++++++---------
 1 file changed, 33 insertions(+), 20 deletions(-)

diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py
index 459d490..4077ea3 100644
--- a/scripts/cooper/analysis/run_analysis_1.py
+++ b/scripts/cooper/analysis/run_analysis_1.py
@@ -11,9 +11,9 @@
 from tqdm import tqdm
 from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres
 
-DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/"  # noqa
-PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp"  # noqa
-RESULT_FOLDER = "./analysis_results/AZ_intersect_manualCompartment"
+DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
+PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
+RESULT_FOLDER = "./analysis_results/AZ_intersect_autoCompartment"
 
 def get_compartment_with_max_overlap(compartments, vesicles):
     """
@@ -64,7 +64,7 @@ def compute_sizes_for_all_tomorams_manComp():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
 
     resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
-    radius_factor = 1.3
+    radius_factor = 0.7
     estimate_radius_2d = True
 
     # Dictionary to hold the results for each dataset
@@ -112,14 +112,21 @@ def compute_sizes_for_all_tomorams_manComp():
             # Save the DataFrame to CSV
             result_df.to_csv(output_path, index=False)
 
+import os
+import pandas as pd
+import numpy as np
+from glob import glob
+import h5py
+from tqdm import tqdm
+
 def compute_sizes_for_all_tomorams_autoComp():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
 
     resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
-    radius_factor = 1.3
+    radius_factor = 0.7
     estimate_radius_2d = True
 
-    # Dictionary to hold the results for each dataset
+    # Dictionary to hold the results for each dataset and category (CTRL or DKO)
     dataset_results = {}
 
     tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
@@ -127,12 +134,16 @@ def compute_sizes_for_all_tomorams_autoComp():
         ds_name, fname = os.path.split(tomo)
         ds_name = os.path.split(ds_name)[1]
         fname = os.path.splitext(fname)[0]
-        # Initialize a new dictionary entry for each dataset if not already present
+        
+        # Determine if the tomogram is 'CTRL' or 'DKO'
+        category = "CTRL" if "CTRL" in fname else "DKO"
+        
+        # Initialize a new dictionary entry for each dataset and category if not already present
         if ds_name not in dataset_results:
-            dataset_results[ds_name] = {}
+            dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}}
         
         # Skip if this tomogram already exists in the dataset dictionary
-        if fname in dataset_results[ds_name]:
+        if fname in dataset_results[ds_name][category]:
             continue
 
         # Load the vesicle segmentation from the predictions.
@@ -146,32 +157,34 @@ def compute_sizes_for_all_tomorams_autoComp():
             compartments  = f["/compartments/segment_from_3Dmodel_v2"][:]
         mask = get_compartment_with_max_overlap(compartments, segmentation)
         
-        #if more than half of the vesicles (approximation, its checking pixel and not label) would get filtered by mask it means the compartment seg didn't work and thus we won't use the mask
+        # if more than half of the vesicles (approximation, its checking pixel and not label) would get filtered by mask it means the compartment seg didn't work and thus we won't use the mask
         if np.sum(segmentation[mask == 0] > 0) > (0.5 * np.sum(segmentation > 0)):
-            print("using no mask")
+            print(f"using no mask for {tomo}")
         else:
             segmentation[mask == 0] = 0
         _, sizes = convert_segmentation_to_spheres(
             segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d
         )
 
-        # Add sizes to the dataset dictionary under the tomogram name
-        dataset_results[ds_name][fname] = sizes
+        # Add sizes to the dataset dictionary under the appropriate category
+        dataset_results[ds_name][category][fname] = sizes
 
-        # Save each dataset's results to a single CSV file
-        for ds_name, tomogram_data in dataset_results.items():
-            # Create a DataFrame where each column is a tomogram's sizes
-            result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose()
+    # Save each dataset's results into separate CSV files for CTRL and DKO tomograms
+    for ds_name, categories in dataset_results.items():
+        for category, tomogram_data in categories.items():
+            # Sort tomograms by name within the category
+            sorted_data = dict(sorted(tomogram_data.items()))  # Sort by tomogram names
+            result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose()
             
             # Define the output file path
-            output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}.csv")
+            output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}.csv")
             
             # Save the DataFrame to CSV
             result_df.to_csv(output_path, index=False)
 
 def main():
-    compute_sizes_for_all_tomorams_manComp()
-    #compute_sizes_for_all_tomorams_autoComp()
+    #compute_sizes_for_all_tomorams_manComp()
+    compute_sizes_for_all_tomorams_autoComp()
 
 
 if __name__ == "__main__":

From 0f40d3c331b99431b6251e3090935dcf5fb9d00d Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Fri, 15 Nov 2024 20:54:23 +0100
Subject: [PATCH 15/35] Update inner ear analysis

---
 .../inner_ear/analysis/analyze_distances.py   |  23 ++++
 .../analysis/analyze_vesicle_pools.py         | 126 +++++++++++-------
 scripts/inner_ear/analysis/common.py          |  53 ++++++++
 3 files changed, 155 insertions(+), 47 deletions(-)
 create mode 100644 scripts/inner_ear/analysis/common.py

diff --git a/scripts/inner_ear/analysis/analyze_distances.py b/scripts/inner_ear/analysis/analyze_distances.py
index e69de29..e8a77c1 100644
--- a/scripts/inner_ear/analysis/analyze_distances.py
+++ b/scripts/inner_ear/analysis/analyze_distances.py
@@ -0,0 +1,23 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+from common import get_all_measurements, get_measurements_with_annotation
+
+
+def for_tomos_with_annotation():
+    manual_assignments, automatic_assignments = get_measurements_with_annotation()
+    breakpoint()
+
+
+# def for_all_tomos():
+#     automatic_assignments = get_all_measurements()
+
+
+def main():
+    for_tomos_with_annotation()
+    # for_all_tomos()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/inner_ear/analysis/analyze_vesicle_pools.py b/scripts/inner_ear/analysis/analyze_vesicle_pools.py
index b53a9f6..6273277 100644
--- a/scripts/inner_ear/analysis/analyze_vesicle_pools.py
+++ b/scripts/inner_ear/analysis/analyze_vesicle_pools.py
@@ -1,72 +1,104 @@
-import sys
-
-import numpy as np
+import matplotlib.pyplot as plt
 import pandas as pd
+import seaborn as sns
 
-sys.path.append("..")
-sys.path.append("../processing")
-
-from combine_measurements import combine_manual_results, combine_automatic_results  # noqa
-# from compare_pool_assignments import create_manual_assignment
-from parse_table import parse_table, get_data_root  # noqa
+from common import get_all_measurements, get_measurements_with_annotation
 
 
-def get_manual_assignments():
-    result_path = "../results/20240917_1/fully_manual_analysis_results.xlsx"
-    results = pd.read_excel(result_path)
-    return results
+def plot_pools(data, errors):
+    data_for_plot = pd.melt(data, id_vars="Pool", var_name="Method", value_name="Measurement")
 
+    # Plot using seaborn
+    plt.figure(figsize=(8, 6))
+    sns.barplot(data=data_for_plot, x="Pool", y="Measurement", hue="Method")
 
-def get_automatic_assignments(tomograms):
-    result_path = "../results/20240917_1/automatic_analysis_results.xlsx"
-    results = pd.read_excel(result_path)
-    results = results[results["tomogram"].isin(tomograms)]
-    return results
+    # FIXME
+    # error_for_plot = pd.melt(errors, id_vars="Pool", var_name="Method", value_name="Error")
+    # # Add error bars manually
+    # for i, bar in enumerate(plt.gca().patches):
+    #     # Get Standard Deviation for the current bar
+    #     err = error_for_plot.iloc[i % len(error_for_plot)]["Error"]
+    #     bar_x = bar.get_x() + bar.get_width() / 2
+    #     bar_y = bar.get_height()
+    #     plt.errorbar(bar_x, bar_y, yerr=err, fmt="none", c="black", capsize=4)
 
+    # Customize the chart
+    plt.title("Different measurements for vesicles per pool")
+    plt.xlabel("Vesicle Pools")
+    plt.ylabel("Vesicles per Tomogram")
+    plt.grid(axis="y", linestyle="--", alpha=0.7)
+    plt.legend(title="Approaches")
 
-def plot_confusion_matrix(manual_assignments, automatic_assignments):
-    pass
+    # Show the plot
+    plt.tight_layout()
+    plt.show()
 
 
+# TODO use the actual results without vesicle post-processing.
 def for_tomos_with_annotation():
-    manual_assignments = get_manual_assignments()
-    manual_tomograms = pd.unique(manual_assignments["tomogram"])
-    automatic_assignments = get_automatic_assignments(manual_tomograms)
+    manual_assignments, automatic_assignments = get_measurements_with_annotation()
 
-    tomograms = pd.unique(automatic_assignments["tomogram"])
-    manual_assignments = manual_assignments[manual_assignments["tomogram"].isin(tomograms)]
-    assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(automatic_assignments["tomogram"]))
+    manual_counts = manual_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
+    automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
 
-    n_tomograms = len(tomograms)
-    pool_names, manual_pool_counts = np.unique(manual_assignments["pool"].values, return_counts=True)
-    _, automatic_pool_counts = np.unique(automatic_assignments["pool"].values, return_counts=True)
+    manual_stats = manual_counts.agg(["mean", "std"]).transpose().reset_index()
+    automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index()
 
-    manual_pool_counts = manual_pool_counts.astype("float32")
-    manual_pool_counts /= n_tomograms
-    automatic_pool_counts = automatic_pool_counts.astype("float32")
-    automatic_pool_counts /= n_tomograms
+    data = pd.DataFrame({
+        "Pool": manual_stats["pool"],
+        "Manual": manual_stats["mean"],
+        "Semi-automatic": automatic_stats["mean"],
+        "Automatic": automatic_stats["mean"],
+    })
+    errors = pd.DataFrame({
+        "Pool": manual_stats["pool"],
+        "Manual": manual_stats["std"],
+        "Semi-automatic": automatic_stats["std"],
+        "Automatic": automatic_stats["std"],
+    })
 
-    print(pool_names)
-    print(manual_pool_counts)
-    print(automatic_pool_counts)
+    plot_pools(data, errors)
 
-    # TODO plot as a bar chart
-    # TODO save excel
-    # TODO add 'more automatic' results
+    output_path = "./vesicle_pools_small.xlsx"
+    data.to_excel(output_path, index=False, sheet_name="Average")
+    with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer:
+        errors.to_excel(writer, sheet_name="StandardDeviation", index=False)
 
-    breakpoint()
 
-
-# TODO
+# TODO use the actual results without vesicle post-processing.
 def for_all_tomos():
-    pass
+
+    automatic_assignments = get_all_measurements()
+    # TODO double check why this number is so different! (64 vs. 81)
+    # tomos = pd.unique(automatic_assignments["tomogram"])
+    # print(len(tomos), n_tomos)
+    # assert len(tomos) == n_tomos
+
+    automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
+    automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index()
+
+    data = pd.DataFrame({
+        "Pool": automatic_stats["pool"],
+        "Semi-automatic": automatic_stats["mean"],
+        "Automatic": automatic_stats["mean"],
+    })
+    errors = pd.DataFrame({
+        "Pool": automatic_stats["pool"],
+        "Semi-automatic": automatic_stats["std"],
+        "Automatic": automatic_stats["std"],
+    })
+
+    plot_pools(data, errors)
+
+    output_path = "./vesicle_pools_large.xlsx"
+    data.to_excel(output_path, index=False, sheet_name="Average")
+    with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer:
+        errors.to_excel(writer, sheet_name="StandardDeviation", index=False)
 
 
 def main():
-    # data_root = get_data_root()
-    # table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx")
-    # table = parse_table(table_path, data_root)
-    for_tomos_with_annotation()
+    # for_tomos_with_annotation()
+    for_all_tomos()
 
 
 if __name__ == "__main__":
diff --git a/scripts/inner_ear/analysis/common.py b/scripts/inner_ear/analysis/common.py
new file mode 100644
index 0000000..cea779c
--- /dev/null
+++ b/scripts/inner_ear/analysis/common.py
@@ -0,0 +1,53 @@
+import os
+import sys
+import pandas as pd
+
+sys.path.append("../processing")
+
+from parse_table import get_data_root  # noqa
+
+
+def get_manual_assignments():
+    result_path = "../results/20240917_1/fully_manual_analysis_results.xlsx"
+    results = pd.read_excel(result_path)
+    return results
+
+
+def get_automatic_assignments(tomograms):
+    result_path = "../results/20240917_1/automatic_analysis_results.xlsx"
+    results = pd.read_excel(result_path)
+    results = results[results["tomogram"].isin(tomograms)]
+    return results
+
+
+def get_measurements_with_annotation():
+    manual_assignments = get_manual_assignments()
+    manual_tomograms = pd.unique(manual_assignments["tomogram"])
+    automatic_assignments = get_automatic_assignments(manual_tomograms)
+
+    tomograms = pd.unique(automatic_assignments["tomogram"])
+    manual_assignments = manual_assignments[manual_assignments["tomogram"].isin(tomograms)]
+    assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(automatic_assignments["tomogram"]))
+
+    return manual_assignments, automatic_assignments
+
+
+def get_all_measurements():
+    data_root = get_data_root()
+    val_table = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx")
+    val_table = pd.read_excel(val_table)
+
+    val_table = val_table[val_table["Kommentar 27-10-24"] == "passt"]
+    n_tomos = len(val_table)
+    assert n_tomos > 0
+    tomo_names = []
+    for _, row in val_table.iterrows():
+        name = "/".join([
+            row.Bedingung, f"Mouse {int(row.Maus)}",
+            row["Ribbon-Orientierung"].lower().rstrip("?"),
+            str(int(row["OwnCloud-Unterordner"]))]
+        )
+        tomo_names.append(name)
+
+    automatic_assignments = get_automatic_assignments(tomo_names)
+    return automatic_assignments

From ad4741b72b3a36041d341f55a6bf0269c20ed3d5 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Sun, 17 Nov 2024 14:03:01 +0100
Subject: [PATCH 16/35] Update inner ear analysis

---
 scripts/inner_ear/analysis/.gitignore         |  2 +
 .../inner_ear/analysis/analyze_distances.py   | 84 +++++++++++++++++--
 .../analysis/analyze_vesicle_pools.py         | 32 ++++---
 .../combine_fully_automatic_results.py        | 69 +++++++++++++++
 scripts/inner_ear/analysis/common.py          | 40 +++++++--
 5 files changed, 199 insertions(+), 28 deletions(-)
 create mode 100644 scripts/inner_ear/analysis/.gitignore
 create mode 100644 scripts/inner_ear/analysis/combine_fully_automatic_results.py

diff --git a/scripts/inner_ear/analysis/.gitignore b/scripts/inner_ear/analysis/.gitignore
new file mode 100644
index 0000000..383f264
--- /dev/null
+++ b/scripts/inner_ear/analysis/.gitignore
@@ -0,0 +1,2 @@
+panels/
+*.zip
diff --git a/scripts/inner_ear/analysis/analyze_distances.py b/scripts/inner_ear/analysis/analyze_distances.py
index e8a77c1..029dc63 100644
--- a/scripts/inner_ear/analysis/analyze_distances.py
+++ b/scripts/inner_ear/analysis/analyze_distances.py
@@ -6,17 +6,91 @@
 
 
 def for_tomos_with_annotation():
-    manual_assignments, automatic_assignments = get_measurements_with_annotation()
-    breakpoint()
+    manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation()
 
+    manual_distances = manual_assignments[
+        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+    ]
+    manual_distances["approach"] = ["manual"] * len(manual_distances)
 
-# def for_all_tomos():
-#     automatic_assignments = get_all_measurements()
+    semi_automatic_distances = semi_automatic_assignments[
+        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+    ]
+    semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances)
+
+    automatic_distances = automatic_assignments[
+        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+    ]
+    automatic_distances["approach"] = ["automatic"] * len(automatic_distances)
+
+    distances = pd.concat([manual_distances, semi_automatic_distances, automatic_distances])
+    distances.to_excel("./results/distances_with_manual_annotations.xlsx", index=False)
+
+    pools = pd.unique(distances["pool"])
+    dist_cols = ["ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+
+    fig, axes = plt.subplots(3, 3)
+
+    # multiple = "stack"
+    multiple = "layer"
+
+    structures = ["Ribbon", "PD", "Boundary"]
+    for i, pool in enumerate(pools):
+        pool_distances = distances[distances["pool"] == pool]
+        for j, dist_col in enumerate(dist_cols):
+            ax = axes[i, j]
+            ax.set_title(f"{pool} to {structures[j]}")
+            sns.histplot(
+                data=pool_distances, x=dist_col, hue="approach", multiple=multiple, kde=False, ax=ax
+            )
+            ax.set_xlabel("distance [nm]")
+
+    fig.tight_layout()
+    plt.show()
+
+
+def for_all_tomos():
+    semi_automatic_assignments, automatic_assignments = get_all_measurements()
+
+    semi_automatic_distances = semi_automatic_assignments[
+        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+    ]
+    semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances)
+
+    automatic_distances = automatic_assignments[
+        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+    ]
+    automatic_distances["approach"] = ["automatic"] * len(automatic_distances)
+
+    distances = pd.concat([semi_automatic_distances, automatic_distances])
+    distances.to_excel("./results/distances_all_tomograms.xlsx", index=False)
+
+    pools = pd.unique(distances["pool"])
+    dist_cols = ["ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+
+    fig, axes = plt.subplots(3, 3)
+
+    # multiple = "stack"
+    multiple = "layer"
+
+    structures = ["Ribbon", "PD", "Boundary"]
+    for i, pool in enumerate(pools):
+        pool_distances = distances[distances["pool"] == pool]
+        for j, dist_col in enumerate(dist_cols):
+            ax = axes[i, j]
+            ax.set_title(f"{pool} to {structures[j]}")
+            sns.histplot(
+                data=pool_distances, x=dist_col, hue="approach", multiple=multiple, kde=False, ax=ax
+            )
+            ax.set_xlabel("distance [nm]")
+
+    fig.tight_layout()
+    plt.show()
 
 
 def main():
     for_tomos_with_annotation()
-    # for_all_tomos()
+    for_all_tomos()
 
 
 if __name__ == "__main__":
diff --git a/scripts/inner_ear/analysis/analyze_vesicle_pools.py b/scripts/inner_ear/analysis/analyze_vesicle_pools.py
index 6273277..1bd5fe1 100644
--- a/scripts/inner_ear/analysis/analyze_vesicle_pools.py
+++ b/scripts/inner_ear/analysis/analyze_vesicle_pools.py
@@ -34,63 +34,61 @@ def plot_pools(data, errors):
     plt.show()
 
 
-# TODO use the actual results without vesicle post-processing.
 def for_tomos_with_annotation():
-    manual_assignments, automatic_assignments = get_measurements_with_annotation()
+    manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation()
 
     manual_counts = manual_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
+    semi_automatic_counts = semi_automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
     automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
 
     manual_stats = manual_counts.agg(["mean", "std"]).transpose().reset_index()
+    semi_automatic_stats = semi_automatic_counts.agg(["mean", "std"]).transpose().reset_index()
     automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index()
 
     data = pd.DataFrame({
         "Pool": manual_stats["pool"],
-        "Manual": manual_stats["mean"],
-        "Semi-automatic": automatic_stats["mean"],
+        "Semi-automatic": semi_automatic_stats["mean"],
         "Automatic": automatic_stats["mean"],
+        "Manual": manual_stats["mean"],
     })
     errors = pd.DataFrame({
         "Pool": manual_stats["pool"],
-        "Manual": manual_stats["std"],
-        "Semi-automatic": automatic_stats["std"],
+        "Semi-automatic": semi_automatic_stats["std"],
         "Automatic": automatic_stats["std"],
+        "Manual": manual_stats["std"],
     })
 
     plot_pools(data, errors)
 
-    output_path = "./vesicle_pools_small.xlsx"
+    output_path = "./results/vesicle_pools_with_manual_annotations.xlsx"
     data.to_excel(output_path, index=False, sheet_name="Average")
     with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer:
         errors.to_excel(writer, sheet_name="StandardDeviation", index=False)
 
 
-# TODO use the actual results without vesicle post-processing.
 def for_all_tomos():
-
-    automatic_assignments = get_all_measurements()
-    # TODO double check why this number is so different! (64 vs. 81)
-    # tomos = pd.unique(automatic_assignments["tomogram"])
-    # print(len(tomos), n_tomos)
-    # assert len(tomos) == n_tomos
+    semi_automatic_assignments, automatic_assignments = get_all_measurements()
 
     automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
     automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index()
 
+    semi_automatic_counts = semi_automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
+    semi_automatic_stats = semi_automatic_counts.agg(["mean", "std"]).transpose().reset_index()
+
     data = pd.DataFrame({
         "Pool": automatic_stats["pool"],
-        "Semi-automatic": automatic_stats["mean"],
+        "Semi-automatic": semi_automatic_stats["mean"],
         "Automatic": automatic_stats["mean"],
     })
     errors = pd.DataFrame({
         "Pool": automatic_stats["pool"],
-        "Semi-automatic": automatic_stats["std"],
+        "Semi-automatic": semi_automatic_stats["std"],
         "Automatic": automatic_stats["std"],
     })
 
     plot_pools(data, errors)
 
-    output_path = "./vesicle_pools_large.xlsx"
+    output_path = "./results/vesicle_pools_all_tomograms.xlsx"
     data.to_excel(output_path, index=False, sheet_name="Average")
     with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer:
         errors.to_excel(writer, sheet_name="StandardDeviation", index=False)
diff --git a/scripts/inner_ear/analysis/combine_fully_automatic_results.py b/scripts/inner_ear/analysis/combine_fully_automatic_results.py
new file mode 100644
index 0000000..54bdbc1
--- /dev/null
+++ b/scripts/inner_ear/analysis/combine_fully_automatic_results.py
@@ -0,0 +1,69 @@
+import os
+import sys
+
+import pandas as pd
+
+sys.path.append("..")
+sys.path.append("../processing")
+
+
+def combine_fully_auto_results(table, data_root, output_path):
+    from combine_measurements import combine_results
+
+    val_table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx")
+    val_table = pd.read_excel(val_table_path)
+
+    results = {}
+    for _, row in table.iterrows():
+        folder = row["Local Path"]
+        if folder == "":
+            continue
+
+        row_selection = (val_table.Bedingung == row.Bedingung) &\
+            (val_table.Maus == row.Maus) &\
+            (val_table["Ribbon-Orientierung"] == row["Ribbon-Orientierung"]) &\
+            (val_table["OwnCloud-Unterordner"] == row["OwnCloud-Unterordner"])
+        complete_vals = val_table[row_selection]["Fertig!"].values
+        is_complete = (complete_vals == "ja").all()
+        if not is_complete:
+            continue
+
+        micro = row["EM alt vs. Neu"]
+
+        tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse"))
+        tab_name = "measurements_uncorrected_assignments.xlsx"
+        res_path = os.path.join(folder, "korrektur", tab_name)
+        if not os.path.exists(res_path):
+            res_path = os.path.join(folder, "Korrektur", tab_name)
+        assert os.path.exists(res_path), res_path
+        results[tomo_name] = (res_path, "alt" if micro == "beides" else micro)
+
+        if micro == "beides":
+            micro = "neu"
+
+            new_root = os.path.join(folder, "neues EM")
+            if not os.path.exists(new_root):
+                new_root = os.path.join(folder, "Tomo neues EM")
+            assert os.path.exists(new_root)
+
+            res_path = os.path.join(new_root, "korrektur", "measurements.xlsx")
+            if not os.path.exists(res_path):
+                res_path = os.path.join(new_root, "Korrektur", "measurements.xlsx")
+            assert os.path.exists(res_path), res_path
+            results[tomo_name] = (res_path, "alt" if micro == "beides" else micro)
+
+    combine_results(results, output_path, sheet_name="vesicles")
+
+
+def main():
+    from parse_table import parse_table, get_data_root
+
+    data_root = get_data_root()
+    table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx")
+    table = parse_table(table_path, data_root)
+
+    res_path = "../results/fully_automatic_analysis_results.xlsx"
+    combine_fully_auto_results(table, data_root, output_path=res_path)
+
+
+main()
diff --git a/scripts/inner_ear/analysis/common.py b/scripts/inner_ear/analysis/common.py
index cea779c..c3622d5 100644
--- a/scripts/inner_ear/analysis/common.py
+++ b/scripts/inner_ear/analysis/common.py
@@ -1,5 +1,6 @@
 import os
 import sys
+
 import pandas as pd
 
 sys.path.append("../processing")
@@ -13,23 +14,35 @@ def get_manual_assignments():
     return results
 
 
-def get_automatic_assignments(tomograms):
+def get_semi_automatic_assignments(tomograms):
     result_path = "../results/20240917_1/automatic_analysis_results.xlsx"
     results = pd.read_excel(result_path)
     results = results[results["tomogram"].isin(tomograms)]
     return results
 
 
+def get_automatic_assignments(tomograms):
+    result_path = "../results/fully_automatic_analysis_results.xlsx"
+    results = pd.read_excel(result_path)
+    results = results[results["tomogram"].isin(tomograms)]
+    return results
+
+
 def get_measurements_with_annotation():
     manual_assignments = get_manual_assignments()
     manual_tomograms = pd.unique(manual_assignments["tomogram"])
-    automatic_assignments = get_automatic_assignments(manual_tomograms)
+    semi_automatic_assignments = get_semi_automatic_assignments(manual_tomograms)
 
-    tomograms = pd.unique(automatic_assignments["tomogram"])
+    tomograms = pd.unique(semi_automatic_assignments["tomogram"])
     manual_assignments = manual_assignments[manual_assignments["tomogram"].isin(tomograms)]
-    assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(automatic_assignments["tomogram"]))
+    assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(semi_automatic_assignments["tomogram"]))
 
-    return manual_assignments, automatic_assignments
+    automatic_assignments = get_automatic_assignments(tomograms)
+    filtered_tomograms = pd.unique(manual_assignments["tomogram"])
+    assert len(filtered_tomograms) == len(pd.unique(automatic_assignments["tomogram"]))
+
+    print("Tomograms with manual annotations:", len(filtered_tomograms))
+    return manual_assignments, semi_automatic_assignments, automatic_assignments
 
 
 def get_all_measurements():
@@ -39,6 +52,7 @@ def get_all_measurements():
 
     val_table = val_table[val_table["Kommentar 27-10-24"] == "passt"]
     n_tomos = len(val_table)
+    print("All tomograms:", n_tomos)
     assert n_tomos > 0
     tomo_names = []
     for _, row in val_table.iterrows():
@@ -49,5 +63,19 @@ def get_all_measurements():
         )
         tomo_names.append(name)
 
+    semi_automatic_assignments = get_semi_automatic_assignments(tomo_names)
+    filtered_tomo_names = pd.unique(semi_automatic_assignments["tomogram"]).tolist()
+
     automatic_assignments = get_automatic_assignments(tomo_names)
-    return automatic_assignments
+    assert len(filtered_tomo_names) == len(pd.unique(automatic_assignments["tomogram"]))
+
+    return semi_automatic_assignments, automatic_assignments
+
+
+def main():
+    get_measurements_with_annotation()
+    get_all_measurements()
+
+
+if __name__ == "__main__":
+    main()

From 305a80b74950e3f00bbdc9f168670fb4cf0b2126 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Sun, 17 Nov 2024 21:16:46 +0100
Subject: [PATCH 17/35] Updates to inner ear training and eval

---
 scripts/inner_ear/processing/run_analyis.py   | 19 ++++++++++++++-----
 .../training/postprocessing_and_evaluation.py |  8 ++++----
 .../structure_prediction_and_evaluation.py    |  8 ++++----
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/scripts/inner_ear/processing/run_analyis.py b/scripts/inner_ear/processing/run_analyis.py
index 8508673..ca5ea0b 100644
--- a/scripts/inner_ear/processing/run_analyis.py
+++ b/scripts/inner_ear/processing/run_analyis.py
@@ -52,7 +52,7 @@ def _load_segmentation(seg_path, tomo_shape):
     return seg
 
 
-def compute_distances(segmentation_paths, save_folder, resolution, force, tomo_shape):
+def compute_distances(segmentation_paths, save_folder, resolution, force, tomo_shape, use_corrected_vesicles=True):
     os.makedirs(save_folder, exist_ok=True)
 
     vesicles = None
@@ -61,9 +61,10 @@ def _require_vesicles():
         vesicle_path = segmentation_paths["vesicles"]
 
         if vesicles is None:
-            vesicle_pool_path = os.path.join(os.path.split(save_folder)[0], "vesicle_pools.tif")
-            if os.path.exists(vesicle_pool_path):
-                vesicle_path = vesicle_pool_path
+            if use_corrected_vesicles:
+                vesicle_pool_path = os.path.join(os.path.split(save_folder)[0], "vesicle_pools.tif")
+                if os.path.exists(vesicle_pool_path):
+                    vesicle_path = vesicle_pool_path
             return _load_segmentation(vesicle_path, tomo_shape)
 
         else:
@@ -394,14 +395,22 @@ def analyze_folder(folder, version, n_ribbons, force, use_corrected_vesicles):
     with open_file(data_path, "r") as f:
         tomo_shape = f["data"].shape
 
-    out_distance_folder = os.path.join(output_folder, "distances")
+    if use_corrected_vesicles:
+        out_distance_folder = os.path.join(output_folder, "distances")
+    else:
+        out_distance_folder = os.path.join(output_folder, "distances_uncorrected")
     distance_paths, skip = compute_distances(
         segmentation_paths, out_distance_folder, resolution, force=force, tomo_shape=tomo_shape,
+        use_corrected_vesicles=use_corrected_vesicles
     )
     if skip:
         return
 
     if force or not os.path.exists(result_path):
+
+        if not use_corrected_vesicles:
+            pool_correction_path = None
+
         analyze_distances(
             segmentation_paths, distance_paths, resolution, result_path, tomo_shape,
             pool_correction_path=pool_correction_path
diff --git a/scripts/inner_ear/training/postprocessing_and_evaluation.py b/scripts/inner_ear/training/postprocessing_and_evaluation.py
index 30c9e42..30c1313 100644
--- a/scripts/inner_ear/training/postprocessing_and_evaluation.py
+++ b/scripts/inner_ear/training/postprocessing_and_evaluation.py
@@ -13,8 +13,8 @@
 
 from train_structure_segmentation import get_train_val_test_split
 
-ROOT = "/home/pape/Work/data/synaptic_reconstruction/moser"
-# ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/moser"
+# ROOT = "/home/pape/Work/data/synaptic_reconstruction/moser"
+ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/moser"
 MODEL_PATH = "/mnt/lustre-emmy-hdd/projects/nim00007/models/synaptic-reconstruction/vesicle-DA-inner_ear-v2"
 OUTPUT_ROOT = "./predictions"
 
@@ -187,8 +187,8 @@ def segment_train_domain():
     name = "train_domain"
     run_vesicle_segmentation(paths, MODEL_PATH, name, is_nested=True)
     postprocess_structures(paths, name, is_nested=True)
-    visualize(paths, name, is_nested=True)
-    results = evaluate(paths, name, is_nested=True, save_path="./results/train_domain_postprocessed.csv")
+    # visualize(paths, name, is_nested=True)
+    results = evaluate(paths, name, is_nested=True, save_path="./results/train_domain_postprocessed_v2.csv")
     print(results)
     print("Ribbon segmentation:", results["ribbon"].mean(), "+-", results["ribbon"].std())
     print("PD segmentation:", results["PD"].mean(), "+-", results["PD"].std())
diff --git a/scripts/inner_ear/training/structure_prediction_and_evaluation.py b/scripts/inner_ear/training/structure_prediction_and_evaluation.py
index cb174c7..7ed89a9 100644
--- a/scripts/inner_ear/training/structure_prediction_and_evaluation.py
+++ b/scripts/inner_ear/training/structure_prediction_and_evaluation.py
@@ -143,10 +143,10 @@ def predict_and_evaluate_train_domain():
     print("Run evaluation on", len(paths), "tomos")
 
     name = "train_domain"
-    model_path = "./checkpoints/inner_ear_structure_model"
+    model_path = "./checkpoints/inner_ear_structure_model_v2"
 
     run_prediction(paths, model_path, name, is_nested=True)
-    evaluate(paths, name, is_nested=True, save_path="./results/train_domain.csv")
+    evaluate(paths, name, is_nested=True, save_path="./results/train_domain_v2.csv")
     visualize(paths, name, is_nested=True)
 
 
@@ -187,9 +187,9 @@ def predict_and_evaluate_rat():
 
 
 def main():
-    # predict_and_evaluate_train_domain()
+    predict_and_evaluate_train_domain()
     # predict_and_evaluate_vesicle_pools()
-    predict_and_evaluate_rat()
+    # predict_and_evaluate_rat()
 
 
 if __name__ == "__main__":

From 903e59ec13d099e16a8e71063c0621da62b66116 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Mon, 18 Nov 2024 19:50:18 +0100
Subject: [PATCH 18/35] Update inner ear analysis

---
 .../inner_ear/analysis/analyze_distances.py   | 131 ++++++++++++------
 .../analysis/analyze_vesicle_pools.py         |   2 +
 2 files changed, 89 insertions(+), 44 deletions(-)

diff --git a/scripts/inner_ear/analysis/analyze_distances.py b/scripts/inner_ear/analysis/analyze_distances.py
index 029dc63..534f90b 100644
--- a/scripts/inner_ear/analysis/analyze_distances.py
+++ b/scripts/inner_ear/analysis/analyze_distances.py
@@ -1,31 +1,14 @@
+import os
+
 import matplotlib.pyplot as plt
+import numpy as np
 import pandas as pd
 import seaborn as sns
 
 from common import get_all_measurements, get_measurements_with_annotation
 
 
-def for_tomos_with_annotation():
-    manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation()
-
-    manual_distances = manual_assignments[
-        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
-    ]
-    manual_distances["approach"] = ["manual"] * len(manual_distances)
-
-    semi_automatic_distances = semi_automatic_assignments[
-        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
-    ]
-    semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances)
-
-    automatic_distances = automatic_assignments[
-        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
-    ]
-    automatic_distances["approach"] = ["automatic"] * len(automatic_distances)
-
-    distances = pd.concat([manual_distances, semi_automatic_distances, automatic_distances])
-    distances.to_excel("./results/distances_with_manual_annotations.xlsx", index=False)
-
+def _plot_all(distances):
     pools = pd.unique(distances["pool"])
     dist_cols = ["ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
 
@@ -49,8 +32,64 @@ def for_tomos_with_annotation():
     plt.show()
 
 
-def for_all_tomos():
-    semi_automatic_assignments, automatic_assignments = get_all_measurements()
+# TODO rename the method names.
+# We only care about the following distances:
+# - MP-V -> PD, AZ (Boundary)
+# - Docked-V -> PD
+# - RA-V -> Ribbon
+def _plot_selected(distances, save_path=None):
+    fig, axes = plt.subplots(2, 2)
+    multiple = "layer"
+
+    if save_path is not None and os.path.exists(save_path):
+        os.remove(save_path)
+
+    def _plot(pool_name, distance_col, structure_name, ax):
+
+        this_distances = distances[distances["pool"] == pool_name][["approach", distance_col]]
+
+        ax.set_title(f"{pool_name} to {structure_name}")
+        sns.histplot(
+            data=this_distances, x=distance_col, hue="approach", multiple=multiple, kde=False, ax=ax
+        )
+        ax.set_xlabel("distance [nm]")
+
+        if save_path is not None:
+            approaches = pd.unique(this_distances["approach"])
+            dist_values = [
+                this_distances[this_distances["approach"] == approach][distance_col].values.tolist()
+                for approach in approaches
+            ]
+            max_len = max([len(vals) for vals in dist_values])
+            save_distances = {
+                approach: dists + [np.nan] * (max_len - len(dists))
+                for approach, dists in zip(approaches, dist_values)
+            }
+            save_distances = pd.DataFrame(save_distances)
+
+            sheet_name = f"{pool_name}_{structure_name}"
+            if os.path.exists(save_path):
+                with pd.ExcelWriter(save_path, engine="openpyxl", mode="a") as writer:
+                    save_distances.to_excel(writer, sheet_name=sheet_name, index=False)
+            else:
+                save_distances.to_excel(save_path, index=False, sheet_name=sheet_name)
+
+    _plot("MP-V", "pd_distance [nm]", "PD", axes[0, 0])
+    _plot("MP-V", "boundary_distance [nm]", "AZ Membrane", axes[0, 1])
+    _plot("Docked-V", "pd_distance [nm]", "PD", axes[1, 0])
+    _plot("RA-V", "ribbon_distance [nm]", "Ribbon", axes[1, 1])
+
+    fig.tight_layout()
+    plt.show()
+
+
+def for_tomos_with_annotation(plot_all=True):
+    manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation()
+
+    manual_distances = manual_assignments[
+        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+    ]
+    manual_distances["approach"] = ["manual"] * len(manual_distances)
 
     semi_automatic_distances = semi_automatic_assignments[
         ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
@@ -62,35 +101,39 @@ def for_all_tomos():
     ]
     automatic_distances["approach"] = ["automatic"] * len(automatic_distances)
 
-    distances = pd.concat([semi_automatic_distances, automatic_distances])
-    distances.to_excel("./results/distances_all_tomograms.xlsx", index=False)
+    distances = pd.concat([manual_distances, semi_automatic_distances, automatic_distances])
+    if plot_all:
+        distances.to_excel("./results/distances_with_manual_annotations.xlsx", index=False)
+        _plot_all(distances)
+    else:
+        _plot_selected(distances, save_path="./results/selected_distances_manual_annotations.xlsx")
 
-    pools = pd.unique(distances["pool"])
-    dist_cols = ["ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
 
-    fig, axes = plt.subplots(3, 3)
+def for_all_tomos(plot_all=True):
+    semi_automatic_assignments, automatic_assignments = get_all_measurements()
 
-    # multiple = "stack"
-    multiple = "layer"
+    semi_automatic_distances = semi_automatic_assignments[
+        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+    ]
+    semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances)
 
-    structures = ["Ribbon", "PD", "Boundary"]
-    for i, pool in enumerate(pools):
-        pool_distances = distances[distances["pool"] == pool]
-        for j, dist_col in enumerate(dist_cols):
-            ax = axes[i, j]
-            ax.set_title(f"{pool} to {structures[j]}")
-            sns.histplot(
-                data=pool_distances, x=dist_col, hue="approach", multiple=multiple, kde=False, ax=ax
-            )
-            ax.set_xlabel("distance [nm]")
+    automatic_distances = automatic_assignments[
+        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+    ]
+    automatic_distances["approach"] = ["automatic"] * len(automatic_distances)
 
-    fig.tight_layout()
-    plt.show()
+    distances = pd.concat([semi_automatic_distances, automatic_distances])
+    if plot_all:
+        distances.to_excel("./results/distances_all_tomograms.xlsx", index=False)
+        _plot_all(distances)
+    else:
+        _plot_selected(distances, save_path="./results/selected_distances_all_tomograms.xlsx")
 
 
 def main():
-    for_tomos_with_annotation()
-    for_all_tomos()
+    plot_all = False
+    for_tomos_with_annotation(plot_all=plot_all)
+    for_all_tomos(plot_all=plot_all)
 
 
 if __name__ == "__main__":
diff --git a/scripts/inner_ear/analysis/analyze_vesicle_pools.py b/scripts/inner_ear/analysis/analyze_vesicle_pools.py
index 1bd5fe1..e9ad651 100644
--- a/scripts/inner_ear/analysis/analyze_vesicle_pools.py
+++ b/scripts/inner_ear/analysis/analyze_vesicle_pools.py
@@ -94,6 +94,8 @@ def for_all_tomos():
         errors.to_excel(writer, sheet_name="StandardDeviation", index=False)
 
 
+# TODO: export the vesicle diameters
+# TODO: export the ribbon and pd stats
 def main():
     # for_tomos_with_annotation()
     for_all_tomos()

From b1449d23ff090a8b8c8c2ebd5845ea578e7f9be1 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Tue, 19 Nov 2024 17:57:53 +0100
Subject: [PATCH 19/35] minor changes

---
 scripts/cooper/analysis/calc_AZ_area.py       | 20 ++++-
 scripts/cooper/analysis/run_analysis_1.py     | 53 ++++++-----
 .../run_spatial_distribution_analysis.py      | 87 +++++++++++++++++--
 3 files changed, 122 insertions(+), 38 deletions(-)

diff --git a/scripts/cooper/analysis/calc_AZ_area.py b/scripts/cooper/analysis/calc_AZ_area.py
index e9fcb52..592b043 100644
--- a/scripts/cooper/analysis/calc_AZ_area.py
+++ b/scripts/cooper/analysis/calc_AZ_area.py
@@ -76,7 +76,8 @@ def calculate_AZ_area_simple(tomo_path, pixel_size_nm=1.554):
 
 def calculate_AZ_surface(tomo_path, pixel_size_nm=1.554):
     with h5py.File(tomo_path, "r") as f:
-        AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:]
+        #AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:]
+        AZ_seg = f["/filtered_az"][:]
     
     # Apply binary closing to smooth the segmented regions
     struct_elem = ball(1)  # Use a small 3D structuring element
@@ -103,6 +104,16 @@ def calculate_AZ_surface(tomo_path, pixel_size_nm=1.554):
 
     return surface_area
 
+def calculate_AZ_surface_simple(tomo_path, pixel_size_nm=1.554):
+    with h5py.File(tomo_path, "r") as f:
+        AZ_seg = f["/labels/AZ"][:]
+    
+    morphology_data = compute_object_morphology(AZ_seg, "AZ Structure", resolution=(pixel_size_nm, pixel_size_nm, pixel_size_nm))
+    surface_column = "surface [nm^2]" #if resolution is not None else "surface [pixel^2]"
+    surface_area = morphology_data[surface_column].iloc[0]
+
+    return surface_area
+
 def calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm=1.554):
     with h5py.File(tomo_path, "r") as f:
         AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:]
@@ -194,7 +205,8 @@ def process_datasets(folder_path, output_csv="AZ_areas.csv", pixel_size_nm=1.554
                     #AZ_area = calculate_total_AZ_area(tomo_path, pixel_size_nm)
                     #AZ_area = calculate_AZ_area_simple(tomo_path, pixel_size_nm)
                     #AZ_surface_area = calculate_AZ_surface(tomo_path, pixel_size_nm)
-                    AZ_surface_area = calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm)
+                    #AZ_surface_area = calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm)
+                    AZ_surface_area = calculate_AZ_surface_simple(tomo_path, pixel_size_nm)
                     # Append results to list
                     results.append({
                         "Dataset": dataset_name,
@@ -217,8 +229,8 @@ def process_datasets(folder_path, output_csv="AZ_areas.csv", pixel_size_nm=1.554
 
 def main():
     # Define the path to the folder containing dataset folders
-    folder_path = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp"
-    output_csv = "./analysis_results/AZ_intersect_manualCompartment/AZ_surface_area.csv"
+    folder_path = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/"
+    output_csv = "./analysis_results/manual_AZ_exported/AZ_surface_area.csv"
     # Call the function to process datasets and save results
     process_datasets(folder_path, output_csv = output_csv)
 
diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py
index 4077ea3..3afde5d 100644
--- a/scripts/cooper/analysis/run_analysis_1.py
+++ b/scripts/cooper/analysis/run_analysis_1.py
@@ -11,9 +11,9 @@
 from tqdm import tqdm
 from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres
 
-DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
-PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
-RESULT_FOLDER = "./analysis_results/AZ_intersect_autoCompartment"
+DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/"  # noqa
+PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp"  # noqa
+RESULT_FOLDER = "./analysis_results/AZ_intersect_manualCompartment"
 
 def get_compartment_with_max_overlap(compartments, vesicles):
     """
@@ -64,10 +64,10 @@ def compute_sizes_for_all_tomorams_manComp():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
 
     resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
-    radius_factor = 0.7
+    radius_factor = 1
     estimate_radius_2d = True
 
-    # Dictionary to hold the results for each dataset
+    # Dictionary to hold the results for each dataset and category (CTRL or DKO)
     dataset_results = {}
 
     tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
@@ -75,14 +75,18 @@ def compute_sizes_for_all_tomorams_manComp():
         ds_name, fname = os.path.split(tomo)
         ds_name = os.path.split(ds_name)[1]
         fname = os.path.splitext(fname)[0]
-        # Initialize a new dictionary entry for each dataset if not already present
+        
+        # Determine if the tomogram is 'CTRL' or 'DKO'
+        category = "CTRL" if "CTRL" in fname else "DKO"
+        
+        # Initialize a new dictionary entry for each dataset and category if not already present
         if ds_name not in dataset_results:
-            dataset_results[ds_name] = {}
+            dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}}
         
         # Skip if this tomogram already exists in the dataset dictionary
-        if fname in dataset_results[ds_name]:
+        if fname in dataset_results[ds_name][category]:
             continue
-
+            
         # Load the vesicle segmentation from the predictions.
         with h5py.File(tomo, "r") as f:
             segmentation = f["/vesicles/segment_from_combined_vesicles"][:]
@@ -98,32 +102,27 @@ def compute_sizes_for_all_tomorams_manComp():
             segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d
         )
 
-        # Add sizes to the dataset dictionary under the tomogram name
-        dataset_results[ds_name][fname] = sizes
+        # Add sizes to the dataset dictionary under the appropriate category
+        dataset_results[ds_name][category][fname] = sizes
 
-        # Save each dataset's results to a single CSV file
-        for ds_name, tomogram_data in dataset_results.items():
-            # Create a DataFrame where each column is a tomogram's sizes
-            result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose()
+    # Save each dataset's results into separate CSV files for CTRL and DKO tomograms
+    for ds_name, categories in dataset_results.items():
+        for category, tomogram_data in categories.items():
+            # Sort tomograms by name within the category
+            sorted_data = dict(sorted(tomogram_data.items()))  # Sort by tomogram names
+            result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose()
             
             # Define the output file path
-            output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}.csv")
+            output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}_rf1.csv")
             
             # Save the DataFrame to CSV
             result_df.to_csv(output_path, index=False)
 
-import os
-import pandas as pd
-import numpy as np
-from glob import glob
-import h5py
-from tqdm import tqdm
-
 def compute_sizes_for_all_tomorams_autoComp():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
 
     resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
-    radius_factor = 0.7
+    radius_factor = 1
     estimate_radius_2d = True
 
     # Dictionary to hold the results for each dataset and category (CTRL or DKO)
@@ -177,14 +176,14 @@ def compute_sizes_for_all_tomorams_autoComp():
             result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose()
             
             # Define the output file path
-            output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}.csv")
+            output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}_rf1.csv")
             
             # Save the DataFrame to CSV
             result_df.to_csv(output_path, index=False)
 
 def main():
-    #compute_sizes_for_all_tomorams_manComp()
-    compute_sizes_for_all_tomorams_autoComp()
+    compute_sizes_for_all_tomorams_manComp()
+    #compute_sizes_for_all_tomorams_autoComp()
 
 
 if __name__ == "__main__":
diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
index 9a890a1..cdc4c0d 100644
--- a/scripts/cooper/analysis/run_spatial_distribution_analysis.py
+++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
@@ -6,9 +6,9 @@
 from synaptic_reconstruction.distance_measurements import measure_segmentation_to_object_distances
 import numpy as np
 
-DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/"  # noqa
-PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp"  # noqa
-RESULT_FOLDER = "./analysis_results/AZ_intersect_manualCompartment"
+DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
+PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
+RESULT_FOLDER = "./analysis_results/AZ_filtered_autoComp"
 
 
 def get_compartment_with_max_overlap(compartments, vesicles):
@@ -55,7 +55,7 @@ def get_compartment_with_max_overlap(compartments, vesicles):
 # We compute the distances for all vesicles in the compartment masks to the AZ.
 # We use the same different resolution, depending on dataset.
 # The closest distance is calculated, i.e., the closest point on the outer membrane of the vesicle to the AZ.
-def compute_sizes_for_all_tomorams():
+def compute_per_vesicle_distance_to_AZ():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
     
     resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
@@ -116,8 +116,80 @@ def compute_sizes_for_all_tomorams():
         # Save the DataFrame to CSV
         result_df.to_csv(output_path, index=False)
 
+def compute_per_vesicle_distance_to_filteredAZ():
+    filtered_AZ_path = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/az_seg_filtered"
+    os.makedirs(RESULT_FOLDER, exist_ok=True)
+    
+    resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
+    
+    # Dictionary to hold the results for each dataset and category (CTRL or DKO)
+    dataset_results = {}
+
+    tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
+    for tomo in tqdm(tomograms):
+        ds_name, fname = os.path.split(tomo)
+        ds_name = os.path.split(ds_name)[1]
+        fname = os.path.splitext(fname)[0]
+        
+        # Determine if the tomogram is 'CTRL' or 'DKO'
+        category = "CTRL" if "CTRL" in fname else "DKO"
+        
+        # Initialize a new dictionary entry for each dataset and category if not already present
+        if ds_name not in dataset_results:
+            dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}}
+        
+        # Skip if this tomogram already exists in the dataset dictionary
+        if fname in dataset_results[ds_name][category]:
+            continue
+
+        #Load the AZ segmentations
+        AZ_path = os.path.join(filtered_AZ_path, ds_name, f"{fname}.h5")
+        with h5py.File(AZ_path, "r") as f:
+            segmented_object = f["/filtered_az"][:]
+
+        # Load the vesicle segmentation from the predictions
+        with h5py.File(tomo, "r") as f:
+            segmentation = f["/vesicles/segment_from_combined_vesicles"][:]
+            
+            #if AZ intersect is small, compartment seg didn't align with AZ so we use the normal AZ and not intersect
+            if (segmented_object == 0).all() or np.sum(segmented_object == 1) < 2000:
+                segmented_object = f["/AZ/segment_from_AZmodel_v3"][:]
+
+        input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5")
+        assert os.path.exists(input_path), input_path
+
+        # Load the compartment mask from the tomogram
+        with h5py.File(input_path, "r") as f:
+            compartments  = f["/compartments/segment_from_3Dmodel_v2"][:]
+        mask = get_compartment_with_max_overlap(compartments, segmentation)
+        
+        #if more than half of the vesicles (approximation, its checking pixel and not label) would get filtered by mask it means the compartment seg didn't work and thus we won't use the mask
+        if np.sum(segmentation[mask == 0] > 0) > (0.5 * np.sum(segmentation > 0)):
+            print("using no mask")
+        else:
+            segmentation[mask == 0] = 0
+        distances, _, _, _ = measure_segmentation_to_object_distances(
+            segmentation, segmented_object=segmented_object, resolution=resolution
+        )
+
+        # Add distances to the dataset dictionary under the appropriate category
+        dataset_results[ds_name][category][fname] = distances
+
+    # Save each dataset's results into separate CSV files for CTRL and DKO tomograms
+    for ds_name, categories in dataset_results.items():
+        for category, tomogram_data in categories.items():
+            # Sort tomograms by name within the category
+            sorted_data = dict(sorted(tomogram_data.items()))  # Sort by tomogram names
+            result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose()
+            
+            # Define the output file path
+            output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}_{category}.csv")
+            
+            # Save the DataFrame to CSV
+            result_df.to_csv(output_path, index=False)
+
 
-def compute_sizes_for_all_tomorams_manComp():
+def compute_per_vesicle_distance_to_AZ_manComp():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
     
     resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
@@ -171,8 +243,9 @@ def compute_sizes_for_all_tomorams_manComp():
         result_df.to_csv(output_path, index=False)
 
 def main():
-    #compute_sizes_for_all_tomorams()
-    compute_sizes_for_all_tomorams_manComp()
+    #compute_per_vesicle_distance_to_AZ()
+    #compute_per_vesicle_distance_to_AZ_manComp()
+    compute_per_vesicle_distance_to_filteredAZ()
 
 
 if __name__ == "__main__":

From 186c92dc282ca32a485bb669b42bc7fe33abb5e7 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Wed, 20 Nov 2024 08:11:12 +0100
Subject: [PATCH 20/35] Update inner ear analysis scripts

---
 .../full_reconstruction/visualize_results.py  | 23 +++++++---
 .../inner_ear/analysis/analyze_distances.py   | 46 ++++++++++++-------
 .../analysis/analyze_vesicle_pools.py         |  3 +-
 scripts/summarize_data.py                     | 27 +++++++++++
 4 files changed, 74 insertions(+), 25 deletions(-)
 create mode 100644 scripts/summarize_data.py

diff --git a/scripts/cooper/full_reconstruction/visualize_results.py b/scripts/cooper/full_reconstruction/visualize_results.py
index 5e3f596..839626b 100644
--- a/scripts/cooper/full_reconstruction/visualize_results.py
+++ b/scripts/cooper/full_reconstruction/visualize_results.py
@@ -6,11 +6,14 @@
 import numpy as np
 import pandas as pd
 
+from skimage.filters import gaussian
+
 ROOT = "./04_full_reconstruction"
 TABLE = "/home/pape/Desktop/sfb1286/mboc_synapse/draft_figures/full_reconstruction.xlsx"
 
 # Skip datasets for which all figures were already done.
-SKIP_DS = ["20241019_Tomo-eval_MF_Synapse"]
+SKIP_DS = ["20241019_Tomo-eval_MF_Synapse", "20241019_Tomo-eval_PS_Synapse"]
+# SKIP_DS = []
 
 
 def _get_name_and_row(path, table):
@@ -46,13 +49,12 @@ def visualize_result(path, table):
     if ds_name in SKIP_DS:
         return
 
-    # if row["Use for vis"].values[0] == "yes":
-    if row["Use for vis"].values[0] in ("yes", "no"):
+    if row["Use for Vis"].values[0] == "no":
         return
     compartment_ids = _get_compartment_ids(row)
 
     # access = np.s_[:]
-    access = np.s_[::2, ::2, ::2]
+    access = np.s_[::3, ::3, ::3]
 
     with h5py.File(path, "r") as f:
         raw = f["raw"][access]
@@ -60,6 +62,10 @@ def visualize_result(path, table):
         active_zone = f["labels/active_zone"][access]
         mitos = f["labels/mitochondria"][access]
         compartments = f["labels/compartments"][access]
+    print("Loading done")
+
+    raw = gaussian(raw)
+    print("Gaussian done")
 
     if any(comp_ids is not None for comp_ids in compartment_ids):
         mask = np.zeros(raw.shape, dtype="bool")
@@ -78,12 +84,14 @@ def visualize_result(path, table):
         mitos[~mask] = 0
         compartments = compartments_new
 
+    vesicle_ids = np.unique(vesicles)[1:]
+
     v = napari.Viewer()
     v.add_image(raw)
     v.add_labels(mitos)
-    v.add_labels(vesicles)
-    v.add_labels(compartments)
-    v.add_labels(active_zone)
+    v.add_labels(vesicles, colormap={ves_id: "orange" for ves_id in vesicle_ids})
+    v.add_labels(compartments, colormap={1: "red", 2: "green", 3: "orange"})
+    v.add_labels(active_zone, colormap={1: "blue"})
     v.title = f"{ds_name}/{name}"
     napari.run()
 
@@ -115,6 +123,7 @@ def main():
     paths = sorted(glob(os.path.join(ROOT, "**/*.h5"), recursive=True))
     table = pd.read_excel(TABLE)
     for path in paths:
+        print(path)
         visualize_result(path, table)
         # visualize_only_compartment(path, table)
 
diff --git a/scripts/inner_ear/analysis/analyze_distances.py b/scripts/inner_ear/analysis/analyze_distances.py
index 534f90b..473d6b8 100644
--- a/scripts/inner_ear/analysis/analyze_distances.py
+++ b/scripts/inner_ear/analysis/analyze_distances.py
@@ -35,7 +35,7 @@ def _plot_all(distances):
 # TODO rename the method names.
 # We only care about the following distances:
 # - MP-V -> PD, AZ (Boundary)
-# - Docked-V -> PD
+# - Docked-V -> PD, AZ
 # - RA-V -> Ribbon
 def _plot_selected(distances, save_path=None):
     fig, axes = plt.subplots(2, 2)
@@ -46,7 +46,7 @@ def _plot_selected(distances, save_path=None):
 
     def _plot(pool_name, distance_col, structure_name, ax):
 
-        this_distances = distances[distances["pool"] == pool_name][["approach", distance_col]]
+        this_distances = distances[distances["pool"] == pool_name][["tomogram", "approach", distance_col]]
 
         ax.set_title(f"{pool_name} to {structure_name}")
         sns.histplot(
@@ -56,15 +56,27 @@ def _plot(pool_name, distance_col, structure_name, ax):
 
         if save_path is not None:
             approaches = pd.unique(this_distances["approach"])
-            dist_values = [
-                this_distances[this_distances["approach"] == approach][distance_col].values.tolist()
-                for approach in approaches
-            ]
-            max_len = max([len(vals) for vals in dist_values])
-            save_distances = {
-                approach: dists + [np.nan] * (max_len - len(dists))
-                for approach, dists in zip(approaches, dist_values)
-            }
+            tomo_names = pd.unique(this_distances["tomogram"])
+
+            tomograms = []
+            distance_values = {approach: [] for approach in approaches}
+
+            for tomo in tomo_names:
+                tomo_dists = this_distances[this_distances["tomogram"] == tomo]
+                max_vesicles = 0
+                for approach in approaches:
+                    n_vesicles = len(tomo_dists[tomo_dists["approach"] == approach].values)
+                    if n_vesicles > max_vesicles:
+                        max_vesicles = n_vesicles
+
+                for approach in approaches:
+                    app_dists = tomo_dists[tomo_dists["approach"] == approach][distance_col].values.tolist()
+                    app_dists = app_dists + [np.nan] * (max_vesicles - len(app_dists))
+                    distance_values[approach].extend(app_dists)
+                tomograms.extend([tomo] * max_vesicles)
+
+            save_distances = {"tomograms": tomograms}
+            save_distances.update(distance_values)
             save_distances = pd.DataFrame(save_distances)
 
             sheet_name = f"{pool_name}_{structure_name}"
@@ -74,9 +86,11 @@ def _plot(pool_name, distance_col, structure_name, ax):
             else:
                 save_distances.to_excel(save_path, index=False, sheet_name=sheet_name)
 
+    # NOTE: we over-ride a plot here, should not do this in the actual version
     _plot("MP-V", "pd_distance [nm]", "PD", axes[0, 0])
     _plot("MP-V", "boundary_distance [nm]", "AZ Membrane", axes[0, 1])
     _plot("Docked-V", "pd_distance [nm]", "PD", axes[1, 0])
+    _plot("Docked-V", "boundary_distance [nm]", "AZ Membrane", axes[1, 0])
     _plot("RA-V", "ribbon_distance [nm]", "Ribbon", axes[1, 1])
 
     fig.tight_layout()
@@ -87,17 +101,17 @@ def for_tomos_with_annotation(plot_all=True):
     manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation()
 
     manual_distances = manual_assignments[
-        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+        ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
     ]
     manual_distances["approach"] = ["manual"] * len(manual_distances)
 
     semi_automatic_distances = semi_automatic_assignments[
-        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+        ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
     ]
     semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances)
 
     automatic_distances = automatic_assignments[
-        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+        ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
     ]
     automatic_distances["approach"] = ["automatic"] * len(automatic_distances)
 
@@ -113,12 +127,12 @@ def for_all_tomos(plot_all=True):
     semi_automatic_assignments, automatic_assignments = get_all_measurements()
 
     semi_automatic_distances = semi_automatic_assignments[
-        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+        ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
     ]
     semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances)
 
     automatic_distances = automatic_assignments[
-        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+        ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
     ]
     automatic_distances["approach"] = ["automatic"] * len(automatic_distances)
 
diff --git a/scripts/inner_ear/analysis/analyze_vesicle_pools.py b/scripts/inner_ear/analysis/analyze_vesicle_pools.py
index e9ad651..7b67c99 100644
--- a/scripts/inner_ear/analysis/analyze_vesicle_pools.py
+++ b/scripts/inner_ear/analysis/analyze_vesicle_pools.py
@@ -94,8 +94,7 @@ def for_all_tomos():
         errors.to_excel(writer, sheet_name="StandardDeviation", index=False)
 
 
-# TODO: export the vesicle diameters
-# TODO: export the ribbon and pd stats
+# TODO: export the ribbon and pd stats (first need to discuss this with Fid)
 def main():
     # for_tomos_with_annotation()
     for_all_tomos()
diff --git a/scripts/summarize_data.py b/scripts/summarize_data.py
new file mode 100644
index 0000000..3658354
--- /dev/null
+++ b/scripts/summarize_data.py
@@ -0,0 +1,27 @@
+import numpy as np
+import pandas as pd
+
+
+az_train = pd.read_excel("data_summary/active_zone_training_data.xlsx")
+compartment_train = pd.read_excel("data_summary/compartment_training_data.xlsx")
+vesicle_train = pd.read_excel("data_summary/vesicle_training_data.xlsx")
+vesicle_da = pd.read_excel("data_summary/vesicle_domain_adaptation_data.xlsx", sheet_name="cryo")
+
+
+def training_resolutions():
+    res_az = np.round(az_train["resolution"].mean(), 2)
+    res_compartment = np.round(compartment_train["resolution"].mean(), 2)
+    res_cryo = np.round(vesicle_da["resolution"].mean(), 2)
+    res_vesicles = np.round(vesicle_train["resolution"].mean(), 2)
+
+    print("Training resolutions for models:")
+    print("active_zone:", res_az)
+    print("compartments:", res_compartment)
+    # TODO
+    print("mitochondria:", 1.0)
+    print("vesicles_2d:", res_vesicles)
+    print("vesicles_3d:", res_vesicles)
+    print("vesicles_cryo:", res_cryo)
+
+
+training_resolutions()

From 2ccf3404d318097c59d6eb5cc1f7cf6e682b1c58 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Wed, 20 Nov 2024 13:49:29 +0100
Subject: [PATCH 21/35] Add script to extract vesicle diameters for inner ear
 data

---
 .../analysis/analyze_vesicle_radii.py         | 132 ++++++++++++++++++
 1 file changed, 132 insertions(+)
 create mode 100644 scripts/inner_ear/analysis/analyze_vesicle_radii.py

diff --git a/scripts/inner_ear/analysis/analyze_vesicle_radii.py b/scripts/inner_ear/analysis/analyze_vesicle_radii.py
new file mode 100644
index 0000000..8fa5d9e
--- /dev/null
+++ b/scripts/inner_ear/analysis/analyze_vesicle_radii.py
@@ -0,0 +1,132 @@
+import os
+import sys
+
+from glob import glob
+
+import mrcfile
+import pandas as pd
+from tqdm import tqdm
+
+from synaptic_reconstruction.imod.export import load_points_from_imodinfo
+from synaptic_reconstruction.file_utils import get_data_path
+
+sys.path.append("../processing")
+
+
+def aggregate_radii(data_root, table, save_path, get_tab):
+    if os.path.exists(save_path):
+        return
+
+    radius_table = []
+    for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"):
+        folder = row["Local Path"]
+        if folder == "":
+            continue
+
+        tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse"))
+        tab_path = get_tab(folder)
+        if tab_path is None:
+            continue
+
+        tab = pd.read_excel(tab_path)
+        this_tab = tab[["pool", "radius [nm]"]]
+        this_tab.insert(0, "tomogram", [tomo_name] * len(this_tab))
+        radius_table.append(this_tab)
+
+    radius_table = pd.concat(radius_table)
+    print("Saving table for", len(radius_table), "vesicles to", save_path)
+    radius_table.to_excel(save_path, index=False)
+
+
+def aggregate_radii_imod(data_root, table, save_path):
+    if os.path.exists(save_path):
+        return
+
+    radius_table = []
+    for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"):
+        folder = row["Local Path"]
+        if folder == "":
+            continue
+
+        tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse"))
+        annotation_folder = os.path.join(folder, "manuell")
+        if not os.path.exists(annotation_folder):
+            annotation_folder = os.path.join(folder, "Manuell")
+        if not os.path.exists(annotation_folder):
+            continue
+
+        annotations = glob(os.path.join(annotation_folder, "*.mod"))
+        annotation_file = [ann for ann in annotations if ("vesikel" in ann.lower()) or ("vesicle" in ann.lower())]
+        if len(annotation_file) != 1:
+            continue
+        annotation_file = annotation_file[0]
+
+        tomo_file = get_data_path(folder)
+        with mrcfile.open(tomo_file) as f:
+            shape = f.data.shape
+            resolution = list(f.voxel_size.item())
+            resolution = [res / 10 for res in resolution][0]
+
+        try:
+            _, radii, labels, label_names = load_points_from_imodinfo(annotation_file, shape, resolution=resolution)
+        except AssertionError:
+            continue
+
+        this_tab = pd.DataFrame({
+            "tomogram": [tomo_name] * len(radii),
+            "pool": [label_names[label_id] for label_id in labels],
+            "radius [nm]": radii,
+        })
+        radius_table.append(this_tab)
+
+    radius_table = pd.concat(radius_table)
+    print("Saving table for", len(radius_table), "vesicles to", save_path)
+    radius_table.to_excel(save_path, index=False)
+
+
+def get_tab_automatic(folder):
+    tab_name = "measurements_uncorrected_assignments.xlsx"
+    res_path = os.path.join(folder, "korrektur", tab_name)
+    if not os.path.exists(res_path):
+        res_path = os.path.join(folder, "Korrektur", tab_name)
+    if not os.path.exists(res_path):
+        res_path = None
+    return res_path
+
+
+def get_tab_semi_automatic(folder):
+    tab_name = "measurements.xlsx"
+    res_path = os.path.join(folder, "korrektur", tab_name)
+    if not os.path.exists(res_path):
+        res_path = os.path.join(folder, "Korrektur", tab_name)
+    if not os.path.exists(res_path):
+        res_path = None
+    return res_path
+
+
+def get_tab_manual(folder):
+    tab_name = "measurements.xlsx"
+    res_path = os.path.join(folder, "manuell", tab_name)
+    if not os.path.exists(res_path):
+        res_path = os.path.join(folder, "Manuell", tab_name)
+    if not os.path.exists(res_path):
+        res_path = None
+    return res_path
+
+
+def main():
+    from parse_table import parse_table, get_data_root
+
+    data_root = get_data_root()
+    table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx")
+    table = parse_table(table_path, data_root)
+
+    # TODO get the radii from imod
+    aggregate_radii(data_root, table, save_path="./results/vesicle_radii_automatic.xlsx", get_tab=get_tab_automatic)
+    aggregate_radii(data_root, table, save_path="./results/vesicle_radii_semi_automatic.xlsx", get_tab=get_tab_semi_automatic)  # noqa
+    aggregate_radii(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual)
+    aggregate_radii_imod(data_root, table, save_path="./results/vesicle_radii_imod.xlsx")
+
+
+if __name__ == "__main__":
+    main()

From 5feff6a2b43c07af16bf2d3d2d579cacb77a08df Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Thu, 21 Nov 2024 12:38:36 +0100
Subject: [PATCH 22/35] Update active zone analysis for SNAP/MUNC data

---
 .../cooper/analysis/active_zone_analysis.py   | 197 +++++++++++++++++-
 .../cooper/analysis/compute_skeleton_area.py  |  44 ++++
 scripts/summarize_data.py                     | 129 +++++++++++-
 synaptic_reconstruction/morphology.py         | 112 +++++++++-
 4 files changed, 476 insertions(+), 6 deletions(-)
 create mode 100644 scripts/cooper/analysis/compute_skeleton_area.py

diff --git a/scripts/cooper/analysis/active_zone_analysis.py b/scripts/cooper/analysis/active_zone_analysis.py
index d2234c9..bb13ac5 100644
--- a/scripts/cooper/analysis/active_zone_analysis.py
+++ b/scripts/cooper/analysis/active_zone_analysis.py
@@ -3,15 +3,22 @@
 
 import h5py
 import numpy as np
+import napari
+import pandas as pd
 
 from scipy.ndimage import binary_closing
 from skimage.measure import label
 from synaptic_reconstruction.ground_truth.shape_refinement import edge_filter
+from synaptic_reconstruction.morphology import skeletonize_object
+from synaptic_reconstruction.distance_measurements import measure_segmentation_to_object_distances
 from tqdm import tqdm
 
-ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/final_Imig2014_seg_autoComp"  # noqa
+from compute_skeleton_area import calculate_surface_area
 
-OUTPUT_AZ = "./boundary_az"
+ROOT = "./imig_data"  # noqa
+OUTPUT_AZ = "./az_segmentation"
+
+RESOLUTION = (1.554,) * 3
 
 
 def filter_az(path):
@@ -20,6 +27,7 @@ def filter_az(path):
     ds = os.path.basename(ds)
     out_path = os.path.join(OUTPUT_AZ, ds, fname)
     os.makedirs(os.path.join(OUTPUT_AZ, ds), exist_ok=True)
+
     if os.path.exists(out_path):
         return
 
@@ -56,11 +64,192 @@ def filter_az(path):
         f.create_dataset("filtered_az", data=az_filtered, compression="gzip")
 
 
-def main():
+def filter_all_azs():
     files = sorted(glob(os.path.join(ROOT, "**/*.h5"), recursive=True))
-    for ff in tqdm(files):
+    for ff in tqdm(files, desc="Filter AZ segmentations."):
         filter_az(ff)
 
 
+def process_az(path, view=True):
+    key = "thin_az"
+
+    with h5py.File(path, "r") as f:
+        if key in f and not view:
+            return
+        az_seg = f["filtered_az"][:]
+
+    az_thin = skeletonize_object(az_seg)
+
+    if view:
+        ds, fname = os.path.split(path)
+        ds = os.path.basename(ds)
+        raw_path = os.path.join(ROOT, ds, fname)
+        with h5py.File(raw_path, "r") as f:
+            raw = f["raw"][:]
+        v = napari.Viewer()
+        v.add_image(raw)
+        v.add_labels(az_seg)
+        v.add_labels(az_thin)
+        napari.run()
+    else:
+        with h5py.File(path, "a") as f:
+            f.create_dataset(key, data=az_thin, compression="gzip")
+
+
+# Apply thinning to all active zones to obtain 1d surface.
+def process_all_azs():
+    files = sorted(glob(os.path.join(OUTPUT_AZ, "**/*.h5"), recursive=True))
+    for ff in tqdm(files, desc="Thin AZ segmentations."):
+        process_az(ff, view=False)
+
+
+def measure_az_area(path):
+    from skimage import measure
+
+    with h5py.File(path, "r") as f:
+        seg = f["thin_az"][:]
+
+    # Try via surface mesh.
+    verts, faces, normals, values = measure.marching_cubes(seg, spacing=RESOLUTION)
+    surface_area1 = measure.mesh_surface_area(verts, faces)
+
+    # Try via custom function.
+    surface_area2 = calculate_surface_area(seg, voxel_size=RESOLUTION)
+
+    ds, fname = os.path.split(path)
+    ds = os.path.basename(ds)
+
+    return pd.DataFrame({
+        "Dataset": [ds],
+        "Tomogram": [fname],
+        "surface_mesh [nm^2]": [surface_area1],
+        "surface_custom [nm^2]": [surface_area2],
+    })
+
+
+# Measure the AZ surface areas.
+def measure_all_areas():
+    save_path = "./results/area_measurements.xlsx"
+    if os.path.exists(save_path):
+        return
+
+    files = sorted(glob(os.path.join(OUTPUT_AZ, "**/*.h5"), recursive=True))
+    area_table = []
+    for ff in tqdm(files, desc="Measure AZ areas."):
+        area = measure_az_area(ff)
+        area_table.append(area)
+    area_table = pd.concat(area_table)
+    area_table.to_excel(save_path, index=False)
+
+    manual_results = "/home/pape/Work/my_projects/synaptic-reconstruction/scripts/cooper/debug/surface/manualAZ_surface_area.xlsx"  # noqa
+    manual_results = pd.read_excel(manual_results)[["Dataset", "Tomogram", "manual"]]
+    comparison_table = pd.merge(area_table, manual_results, on=["Dataset", "Tomogram"], how="inner")
+    comparison_table.to_excel("./results/area_comparison.xlsx", index=False)
+
+
+def analyze_areas():
+    import seaborn as sns
+    import matplotlib.pyplot as plt
+
+    table = pd.read_excel("./results/area_comparison.xlsx")
+
+    fig, axes = plt.subplots(2)
+    sns.scatterplot(data=table, x="manual", y="surface_mesh [nm^2]", ax=axes[0])
+    sns.scatterplot(data=table, x="manual", y="surface_custom [nm^2]", ax=axes[1])
+    plt.show()
+
+
+def measure_distances(ves_path, az_path):
+    with h5py.File(az_path, "r") as f:
+        az = f["thin_az"][:]
+
+    with h5py.File(ves_path, "r") as f:
+        vesicles = f["vesicles/segment_from_combined_vesicles"][:]
+
+    distances, _, _, _ = measure_segmentation_to_object_distances(vesicles, az, resolution=RESOLUTION)
+
+    ds, fname = os.path.split(az_path)
+    ds = os.path.basename(ds)
+
+    return pd.DataFrame({
+        "Dataset": [ds] * len(distances),
+        "Tomogram": [fname] * len(distances),
+        "Distance": distances,
+    })
+
+
+# Measure the AZ vesicle distances for all vesicles.
+def measure_all_distances():
+    save_path = "./results/vesicle_az_distances.xlsx"
+    if os.path.exists(save_path):
+        return
+
+    ves_files = sorted(glob(os.path.join(ROOT, "**/*.h5"), recursive=True))
+    az_files = sorted(glob(os.path.join(OUTPUT_AZ, "**/*.h5"), recursive=True))
+    assert len(ves_files) == len(az_files)
+
+    dist_table = []
+    for ves_file, az_file in tqdm(zip(ves_files, az_files), total=len(az_files), desc="Measure distances."):
+        dist = measure_distances(ves_file, az_file)
+        dist_table.append(dist)
+    dist_table = pd.concat(dist_table)
+
+    dist_table.to_excel(save_path, index=False)
+
+
+def reformat_distances():
+    tab = pd.read_excel("./results/vesicle_az_distances.xlsx")
+
+    munc_ko = {}
+    munc_ctrl = {}
+
+    snap_ko = {}
+    snap_ctrl = {}
+
+    for _, row in tab.iterrows():
+        ds = row.Dataset
+        tomo = row.Tomogram
+
+        if ds == "Munc13DKO":
+            if "CTRL" in tomo:
+                group = munc_ctrl
+            else:
+                group = munc_ko
+        else:
+            assert ds == "SNAP25"
+            if "CTRL" in tomo:
+                group = snap_ctrl
+            else:
+                group = snap_ko
+
+        name = os.path.splitext(tomo)[0]
+        val = row["Distance [nm]"]
+        if name in group:
+            group[name].append(val)
+        else:
+            group[name] = [val]
+
+    def save_tab(group, path):
+        n_ves_max = max(len(v) for v in group.values())
+        group = {k: v + [np.nan] * (n_ves_max - len(v)) for k, v in group.items()}
+        group_tab = pd.DataFrame(group)
+        group_tab.to_excel(path, index=False)
+
+    os.makedirs("./results/distances_formatted", exist_ok=True)
+    save_tab(munc_ko, "./results/distances_formatted/munc_ko.xlsx")
+    save_tab(munc_ctrl, "./results/distances_formatted/munc_ctrl.xlsx")
+    save_tab(snap_ko, "./results/distances_formatted/snap_ko.xlsx")
+    save_tab(snap_ctrl, "./results/distances_formatted/snap_ctrl.xlsx")
+
+
+def main():
+    # filter_all_azs()
+    # process_all_azs()
+    # measure_all_areas()
+    # analyze_areas()
+    # measure_all_distances()
+    reformat_distances()
+
+
 if __name__ == "__main__":
     main()
diff --git a/scripts/cooper/analysis/compute_skeleton_area.py b/scripts/cooper/analysis/compute_skeleton_area.py
new file mode 100644
index 0000000..6fb05d0
--- /dev/null
+++ b/scripts/cooper/analysis/compute_skeleton_area.py
@@ -0,0 +1,44 @@
+import numpy as np
+
+
+def calculate_surface_area(skeleton, voxel_size=(1.0, 1.0, 1.0)):
+    """
+    Calculate the surface area of a 3D skeletonized object.
+
+    Parameters:
+        skeleton (3D array): Binary 3D skeletonized array.
+        voxel_size (tuple): Physical size of voxels (z, y, x).
+
+    Returns:
+        float: Approximate surface area of the skeleton.
+    """
+    # Define the voxel dimensions
+    voxel_area = (
+        voxel_size[1] * voxel_size[2],  # yz-face area
+        voxel_size[0] * voxel_size[2],  # xz-face area
+        voxel_size[0] * voxel_size[1],  # xy-face area
+    )
+
+    # Compute the number of exposed faces for each voxel
+    exposed_faces = 0
+    directions = [
+        (1, 0, 0), (-1, 0, 0),  # x-axis neighbors
+        (0, 1, 0), (0, -1, 0),  # y-axis neighbors
+        (0, 0, 1), (0, 0, -1),  # z-axis neighbors
+    ]
+
+    # Iterate over all voxels in the skeleton
+    for z, y, x in np.argwhere(skeleton):
+        for i, (dz, dy, dx) in enumerate(directions):
+            neighbor = (z + dz, y + dy, x + dx)
+            # Check if the neighbor is outside the volume or not part of the skeleton
+            if (
+                0 <= neighbor[0] < skeleton.shape[0] and
+                0 <= neighbor[1] < skeleton.shape[1] and
+                0 <= neighbor[2] < skeleton.shape[2] and
+                skeleton[neighbor] == 1
+            ):
+                continue
+            exposed_faces += voxel_area[i // 2]
+
+    return exposed_faces
diff --git a/scripts/summarize_data.py b/scripts/summarize_data.py
index 3658354..df57059 100644
--- a/scripts/summarize_data.py
+++ b/scripts/summarize_data.py
@@ -1,12 +1,21 @@
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
 
+# TODO inner ear train data and mito training data are missing
 az_train = pd.read_excel("data_summary/active_zone_training_data.xlsx")
 compartment_train = pd.read_excel("data_summary/compartment_training_data.xlsx")
 vesicle_train = pd.read_excel("data_summary/vesicle_training_data.xlsx")
 vesicle_da = pd.read_excel("data_summary/vesicle_domain_adaptation_data.xlsx", sheet_name="cryo")
 
+# Inner ear trainign data:
+# Sophia: 92
+# Rat: 19
+# Tether: 3
+# Ves Pools: 6
+# Total = 120
+
 
 def training_resolutions():
     res_az = np.round(az_train["resolution"].mean(), 2)
@@ -22,6 +31,124 @@ def training_resolutions():
     print("vesicles_2d:", res_vesicles)
     print("vesicles_3d:", res_vesicles)
     print("vesicles_cryo:", res_cryo)
+    # TODO inner ear
+
+
+def pie_chart(data, count_col, title):
+    # Plot the pie chart
+    plt.figure(figsize=(8, 6))
+    wedges, texts, autotexts = plt.pie(
+        data[count_col],
+        labels=data["Condition"],
+        autopct="%1.1f%%",  # Display percentages
+        startangle=90,      # Start at the top
+        colors=plt.cm.Paired.colors[:len(data)],  # Optional: Custom color palette
+        textprops={"fontsize": 14}
+    )
+
+    for autot in autotexts:
+        autot.set_fontsize(18)
+
+    plt.title(title, fontsize=18)
+    plt.tight_layout()
+    plt.show()
+
+
+def summarize_vesicle_train_data():
+    condition_summary = {
+        "Condition": [],
+        "Tomograms": [],
+        "Vesicles": [],
+    }
+
+    conditions = pd.unique(vesicle_train.condition)
+    for condition in conditions:
+        ctab = vesicle_train[vesicle_train.condition == condition]
+        n_tomos = len(ctab)
+        n_vesicles = ctab["vesicle_count"].sum()
+        print(condition)
+        print("Tomograms:", n_tomos)
+        print("Vesicles:", n_vesicles)
+        print()
+        condition_summary["Condition"].append(condition)
+        condition_summary["Tomograms"].append(n_tomos)
+        condition_summary["Vesicles"].append(n_vesicles)
+    condition_summary = pd.DataFrame(condition_summary)
+
+    print("Total:")
+    print("Tomograms:", len(vesicle_train))
+    print("Vesicles:", vesicle_train["vesicle_count"].sum())
+    print()
+
+    train_tomos = vesicle_train[vesicle_train.used_for == "train/val"]
+    print("Training:")
+    print("Tomograms:", len(train_tomos))
+    print("Vesicles:", train_tomos["vesicle_count"].sum())
+    print()
+
+    test_tomos = vesicle_train[vesicle_train.used_for == "test"]
+    print("Test:")
+    print("Tomograms:", len(test_tomos))
+    print("Vesicles:", test_tomos["vesicle_count"].sum())
+
+    pie_chart(condition_summary, "Tomograms", "Tomograms per Condition")
+    pie_chart(condition_summary, "Vesicles", "Vesicles per Condition")
+
+
+def summarize_vesicle_da():
+    for name in ("inner_ear", "endbulb", "cryo", "frog", "maus_2d"):
+        tab = pd.read_excel("data_summary/vesicle_domain_adaptation_data.xlsx", sheet_name=name)
+        print(name)
+        print("N-tomograms:", len(tab))
+        print("N-test:", (tab["used_for"] == "test").sum())
+        print("N-vesicles:", tab["vesicle_count"].sum())
+        print()
+
+
+def summarize_az_train():
+    conditions = pd.unique(az_train.condition)
+    print(conditions)
+
+    print("Total:")
+    print("Tomograms:", len(az_train))
+    print("Active Zones:", az_train["az_count"].sum())
+    print()
+
+    train_tomos = az_train[az_train.used_for == "train/val"]
+    print("Training:")
+    print("Tomograms:", len(train_tomos))
+    print("Active Zones:", train_tomos["az_count"].sum())
+    print()
+
+    test_tomos = az_train[az_train.used_for == "test"]
+    print("Test:")
+    print("Tomograms:", len(test_tomos))
+    print("Active Zones:", test_tomos["az_count"].sum())
+
+
+def summarize_compartment_train():
+    conditions = pd.unique(compartment_train.condition)
+    print(conditions)
+
+    print("Total:")
+    print("Tomograms:", len(compartment_train))
+    print("Compartments:", compartment_train["compartment_count"].sum())
+    print()
+
+    train_tomos = compartment_train[compartment_train.used_for == "train/val"]
+    print("Training:")
+    print("Tomograms:", len(train_tomos))
+    print("Compartments:", train_tomos["compartment_count"].sum())
+    print()
+
+    test_tomos = compartment_train[compartment_train.used_for == "test"]
+    print("Test:")
+    print("Tomograms:", len(test_tomos))
+    print("Compartments:", test_tomos["compartment_count"].sum())
 
 
-training_resolutions()
+# training_resolutions()
+# summarize_vesicle_train_data()
+# summarize_vesicle_da()
+summarize_az_train()
+# summarize_compartment_train()
diff --git a/synaptic_reconstruction/morphology.py b/synaptic_reconstruction/morphology.py
index 8afea3d..126042f 100644
--- a/synaptic_reconstruction/morphology.py
+++ b/synaptic_reconstruction/morphology.py
@@ -6,8 +6,11 @@
 
 import numpy as np
 import pandas as pd
-from scipy.ndimage import distance_transform_edt
+
+from scipy.ndimage import distance_transform_edt, convolve
+from skimage.graph import MCP
 from skimage.measure import regionprops, marching_cubes
+from skimage.morphology import skeletonize, medial_axis, label
 from skimage.segmentation import find_boundaries
 
 
@@ -87,3 +90,110 @@ def compute_object_morphology(object_, structure_name, resolution=None):
         "surface [pixel^2]" if resolution is None else "surface [nm^2]": [surface],
     })
     return morphology
+
+
+def _find_endpoints(component):
+    # Define a 3x3 kernel to count neighbors
+    kernel = np.ones((3, 3), dtype=int)
+    neighbor_count = convolve(component.astype(int), kernel, mode="constant", cval=0)
+    endpoints = np.argwhere((component == 1) & (neighbor_count == 2))  # Degree = 1
+    return endpoints
+
+
+def _compute_longest_path(component, endpoints):
+    # Use the first endpoint as the source
+    src = tuple(endpoints[0])
+    cost = np.where(component, 1, np.inf)  # Cost map: 1 for skeleton, inf for background
+    mcp = MCP(cost)
+    _, traceback = mcp.find_costs([src])
+
+    # Use the second endpoint as the destination
+    dst = tuple(endpoints[-1])
+
+    # Trace back the path
+    path = np.zeros_like(component, dtype=bool)
+    current = dst
+
+    # Extract offsets from the MCP object
+    offsets = np.array(mcp.offsets)
+    nrows, ncols = component.shape
+
+    while current != src:
+        path[current] = True
+        current_offset_index = traceback[current]
+        if current_offset_index < 0:
+            # No valid path found
+            break
+        offset = offsets[current_offset_index]
+        # Move to the predecessor
+        current = (current[0] - offset[0], current[1] - offset[1])
+        # Ensure indices are within bounds
+        if not (0 <= current[0] < nrows and 0 <= current[1] < ncols):
+            break
+
+    path[src] = True  # Include the source
+    return path
+
+
+def _prune_skeleton_longest_path(skeleton):
+    pruned_skeleton = np.zeros_like(skeleton, dtype=bool)
+
+    # Label connected components in the skeleton
+    labeled_skeleton, num_labels = label(skeleton, return_num=True)
+
+    for label_id in range(1, num_labels + 1):
+        # Isolate the current connected component
+        component = (labeled_skeleton == label_id)
+
+        # Find the endpoints of the component
+        endpoints = _find_endpoints(component)
+        if len(endpoints) < 2:
+            continue  # Skip if there are no valid endpoints
+        elif len(endpoints) == 2:  # Nothing to prune
+            pruned_skeleton |= component
+            continue
+
+        # Compute the longest path using MCP
+        longest_path = _compute_longest_path(component, endpoints)
+
+        # import napari
+        # v = napari.Viewer()
+        # v.add_labels(component)
+        # v.add_labels(longest_path)
+        # v.add_points(endpoints)
+        # napari.run()
+
+        pruned_skeleton |= longest_path
+
+    return pruned_skeleton.astype(skeleton.dtype)
+
+
+def skeletonize_object(
+    segmentation: np.ndarray,
+    method: str = "skeletonize",
+    prune: bool = True,
+    min_prune_size: int = 10,
+):
+    """Skeletonize a 3D object by inidividually skeletonizing each slice.
+
+    Args:
+
+    Returns:
+    """
+    assert method in ("skeletonize", "medial_axis")
+    seg_thin = np.zeros_like(segmentation)
+    skeletor = skeletonize if method == "skeletonize" else medial_axis
+    # Parallelize?
+    for z in range(segmentation.shape[0]):
+        skeleton = skeletor(segmentation[z])
+
+        if prune:
+            skeleton = _prune_skeleton_longest_path(skeleton)
+            if min_prune_size > 0:
+                skeleton = label(skeleton)
+                ids, sizes = np.unique(skeleton, return_counts=True)
+                ids, sizes = ids[1:], sizes[1:]
+                skeleton = np.isin(skeleton, ids[sizes >= min_prune_size])
+
+        seg_thin[z] = skeleton
+    return seg_thin

From 9b8c7a21b9178efc48bfa4805745af8f3683df55 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Thu, 21 Nov 2024 22:16:39 +0100
Subject: [PATCH 23/35] Add more inner ear analysis code

---
 .../analysis/analyze_vesicle_diameters.py     | 132 ++++++++++++++++++
 .../analysis/extract_ribbon_stats.py          |  36 +++++
 2 files changed, 168 insertions(+)
 create mode 100644 scripts/inner_ear/analysis/analyze_vesicle_diameters.py
 create mode 100644 scripts/inner_ear/analysis/extract_ribbon_stats.py

diff --git a/scripts/inner_ear/analysis/analyze_vesicle_diameters.py b/scripts/inner_ear/analysis/analyze_vesicle_diameters.py
new file mode 100644
index 0000000..8fa5d9e
--- /dev/null
+++ b/scripts/inner_ear/analysis/analyze_vesicle_diameters.py
@@ -0,0 +1,132 @@
+import os
+import sys
+
+from glob import glob
+
+import mrcfile
+import pandas as pd
+from tqdm import tqdm
+
+from synaptic_reconstruction.imod.export import load_points_from_imodinfo
+from synaptic_reconstruction.file_utils import get_data_path
+
+sys.path.append("../processing")
+
+
+def aggregate_radii(data_root, table, save_path, get_tab):
+    if os.path.exists(save_path):
+        return
+
+    radius_table = []
+    for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"):
+        folder = row["Local Path"]
+        if folder == "":
+            continue
+
+        tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse"))
+        tab_path = get_tab(folder)
+        if tab_path is None:
+            continue
+
+        tab = pd.read_excel(tab_path)
+        this_tab = tab[["pool", "radius [nm]"]]
+        this_tab.insert(0, "tomogram", [tomo_name] * len(this_tab))
+        radius_table.append(this_tab)
+
+    radius_table = pd.concat(radius_table)
+    print("Saving table for", len(radius_table), "vesicles to", save_path)
+    radius_table.to_excel(save_path, index=False)
+
+
+def aggregate_radii_imod(data_root, table, save_path):
+    if os.path.exists(save_path):
+        return
+
+    radius_table = []
+    for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"):
+        folder = row["Local Path"]
+        if folder == "":
+            continue
+
+        tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse"))
+        annotation_folder = os.path.join(folder, "manuell")
+        if not os.path.exists(annotation_folder):
+            annotation_folder = os.path.join(folder, "Manuell")
+        if not os.path.exists(annotation_folder):
+            continue
+
+        annotations = glob(os.path.join(annotation_folder, "*.mod"))
+        annotation_file = [ann for ann in annotations if ("vesikel" in ann.lower()) or ("vesicle" in ann.lower())]
+        if len(annotation_file) != 1:
+            continue
+        annotation_file = annotation_file[0]
+
+        tomo_file = get_data_path(folder)
+        with mrcfile.open(tomo_file) as f:
+            shape = f.data.shape
+            resolution = list(f.voxel_size.item())
+            resolution = [res / 10 for res in resolution][0]
+
+        try:
+            _, radii, labels, label_names = load_points_from_imodinfo(annotation_file, shape, resolution=resolution)
+        except AssertionError:
+            continue
+
+        this_tab = pd.DataFrame({
+            "tomogram": [tomo_name] * len(radii),
+            "pool": [label_names[label_id] for label_id in labels],
+            "radius [nm]": radii,
+        })
+        radius_table.append(this_tab)
+
+    radius_table = pd.concat(radius_table)
+    print("Saving table for", len(radius_table), "vesicles to", save_path)
+    radius_table.to_excel(save_path, index=False)
+
+
+def get_tab_automatic(folder):
+    tab_name = "measurements_uncorrected_assignments.xlsx"
+    res_path = os.path.join(folder, "korrektur", tab_name)
+    if not os.path.exists(res_path):
+        res_path = os.path.join(folder, "Korrektur", tab_name)
+    if not os.path.exists(res_path):
+        res_path = None
+    return res_path
+
+
+def get_tab_semi_automatic(folder):
+    tab_name = "measurements.xlsx"
+    res_path = os.path.join(folder, "korrektur", tab_name)
+    if not os.path.exists(res_path):
+        res_path = os.path.join(folder, "Korrektur", tab_name)
+    if not os.path.exists(res_path):
+        res_path = None
+    return res_path
+
+
+def get_tab_manual(folder):
+    tab_name = "measurements.xlsx"
+    res_path = os.path.join(folder, "manuell", tab_name)
+    if not os.path.exists(res_path):
+        res_path = os.path.join(folder, "Manuell", tab_name)
+    if not os.path.exists(res_path):
+        res_path = None
+    return res_path
+
+
+def main():
+    from parse_table import parse_table, get_data_root
+
+    data_root = get_data_root()
+    table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx")
+    table = parse_table(table_path, data_root)
+
+    # TODO get the radii from imod
+    aggregate_radii(data_root, table, save_path="./results/vesicle_radii_automatic.xlsx", get_tab=get_tab_automatic)
+    aggregate_radii(data_root, table, save_path="./results/vesicle_radii_semi_automatic.xlsx", get_tab=get_tab_semi_automatic)  # noqa
+    aggregate_radii(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual)
+    aggregate_radii_imod(data_root, table, save_path="./results/vesicle_radii_imod.xlsx")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/inner_ear/analysis/extract_ribbon_stats.py b/scripts/inner_ear/analysis/extract_ribbon_stats.py
new file mode 100644
index 0000000..8ee9e12
--- /dev/null
+++ b/scripts/inner_ear/analysis/extract_ribbon_stats.py
@@ -0,0 +1,36 @@
+import numpy as np
+import pandas as pd
+
+
+def main():
+    man_path = "../results/20240917_1/fully_manual_analysis_results.xlsx"
+    auto_path = "../results/20240917_1/automatic_analysis_results.xlsx"
+
+    man_measurements = pd.read_excel(man_path, sheet_name="morphology")
+    man_measurements = man_measurements[man_measurements.structure == "ribbon"][
+        ["tomogram", "surface [nm^2]", "volume [nm^3]"]
+    ]
+
+    auto_measurements = pd.read_excel(auto_path, sheet_name="morphology")
+    auto_measurements = auto_measurements[auto_measurements.structure == "ribbon"][
+        ["tomogram", "surface [nm^2]", "volume [nm^3]"]
+    ]
+
+    # save all the automatic measurements
+    auto_measurements.to_excel("./results/ribbon_morphology_auto.xlsx", index=False)
+
+    man_tomograms = pd.unique(man_measurements["tomogram"])
+    auto_tomograms = pd.unique(auto_measurements["tomogram"])
+    tomos = np.intersect1d(man_tomograms, auto_tomograms)
+
+    man_measurements = man_measurements[man_measurements.tomogram.isin(tomos)]
+    auto_measurements = auto_measurements[auto_measurements.tomogram.isin(tomos)]
+
+    save_path = "./results/ribbon_morphology_man-v-auto.xlsx"
+    man_measurements.to_excel(save_path, sheet_name="manual", index=False)
+    with pd.ExcelWriter(save_path, engine="openpyxl", mode="a") as writer:
+        auto_measurements.to_excel(writer, sheet_name="auto", index=False)
+
+
+if __name__ == "__main__":
+    main()

From db89b441b25770ec298bbe51eb8d82233b92a0f6 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Sat, 23 Nov 2024 14:43:53 +0100
Subject: [PATCH 24/35] evaluation of AZ seg

---
 scripts/cooper/analysis/run_analysis_1.py     |  11 +-
 .../run_spatial_distribution_analysis.py      |   2 +-
 scripts/cooper/training/evaluate_AZ.py        | 107 ++++++++++++++++++
 synaptic_reconstruction/imod/to_imod.py       |  31 +++++
 4 files changed, 144 insertions(+), 7 deletions(-)
 create mode 100644 scripts/cooper/training/evaluate_AZ.py

diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py
index 3afde5d..94d0a62 100644
--- a/scripts/cooper/analysis/run_analysis_1.py
+++ b/scripts/cooper/analysis/run_analysis_1.py
@@ -11,9 +11,9 @@
 from tqdm import tqdm
 from synaptic_reconstruction.imod.to_imod import convert_segmentation_to_spheres
 
-DATA_ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/"  # noqa
-PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg_manComp"  # noqa
-RESULT_FOLDER = "./analysis_results/AZ_intersect_manualCompartment"
+DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
+PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
+RESULT_FOLDER = "./analysis_results/AZ_filtered_autoComp"
 
 def get_compartment_with_max_overlap(compartments, vesicles):
     """
@@ -182,9 +182,8 @@ def compute_sizes_for_all_tomorams_autoComp():
             result_df.to_csv(output_path, index=False)
 
 def main():
-    compute_sizes_for_all_tomorams_manComp()
-    #compute_sizes_for_all_tomorams_autoComp()
-
+    #compute_sizes_for_all_tomorams_manComp()
+    compute_sizes_for_all_tomorams_autoComp()
 
 if __name__ == "__main__":
     main()
diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
index cdc4c0d..edd8308 100644
--- a/scripts/cooper/analysis/run_spatial_distribution_analysis.py
+++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
@@ -188,7 +188,6 @@ def compute_per_vesicle_distance_to_filteredAZ():
             # Save the DataFrame to CSV
             result_df.to_csv(output_path, index=False)
 
-
 def compute_per_vesicle_distance_to_AZ_manComp():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
     
@@ -248,5 +247,6 @@ def main():
     compute_per_vesicle_distance_to_filteredAZ()
 
 
+
 if __name__ == "__main__":
     main()
diff --git a/scripts/cooper/training/evaluate_AZ.py b/scripts/cooper/training/evaluate_AZ.py
new file mode 100644
index 0000000..fc32214
--- /dev/null
+++ b/scripts/cooper/training/evaluate_AZ.py
@@ -0,0 +1,107 @@
+import argparse
+import os
+
+import h5py
+import pandas as pd
+import numpy as np
+
+from elf.evaluation.dice import dice_score
+
+def extract_gt_bounding_box(segmentation, gt, halo=[20, 320, 320]):
+    # Find the bounding box for the ground truth
+    bb = np.where(gt > 0)
+    bb = tuple(slice(
+        max(int(b.min() - ha), 0),  # Ensure indices are not below 0
+        min(int(b.max() + ha), sh) # Ensure indices do not exceed shape dimensions
+    ) for b, sh, ha in zip(bb, gt.shape, halo))
+    
+    # Apply the bounding box to both segmentations
+    segmentation_cropped = segmentation[bb]
+    gt_cropped = gt[bb]
+    
+    return segmentation_cropped, gt_cropped
+
+def evaluate(labels, segmentation):
+    assert labels.shape == segmentation.shape
+    score = dice_score(segmentation, labels)
+    return score
+
+def evaluate_file(labels_path, segmentation_path, model_name, crop= False):
+    print(f"Evaluate labels {labels_path} and vesicles {segmentation_path}")
+
+    ds_name = os.path.basename(os.path.dirname(labels_path))
+    tomo = os.path.basename(labels_path)
+
+    #get the labels and segmentation
+    with h5py.File(labels_path) as label_file:
+        gt = label_file["/labels/AZ"][:]
+        
+    with h5py.File(segmentation_path) as seg_file:
+        segmentation = seg_file["/AZ/segment_from_AZmodel_v3"][:]
+
+    if crop:
+        print("cropping the annotation and segmentation")
+        segmentation, gt = extract_gt_bounding_box(segmentation, gt)
+
+    # Evaluate the match of ground truth and segmentation
+    dice_score = evaluate(gt, segmentation)
+
+    # Store results
+    result_folder = "/user/muth9/u12095/synaptic-reconstruction/scripts/cooper/evaluation_results"
+    os.makedirs(result_folder, exist_ok=True)
+    result_path = os.path.join(result_folder, f"evaluation_{model_name}.csv")
+    print("Evaluation results are saved to:", result_path)
+
+    # Load existing results if the file exists
+    if os.path.exists(result_path):
+        results = pd.read_csv(result_path)
+    else:
+        results = None
+
+    # Create a new DataFrame for the current evaluation
+    res = pd.DataFrame(
+        [[ds_name, tomo, dice_score]], columns=["dataset", "tomogram", "dice_score"]
+    )
+
+    # Combine with existing results or initialize with the new results
+    if results is None:
+        results = res
+    else:
+        results = pd.concat([results, res])
+
+    # Save the results to the CSV file
+    results.to_csv(result_path, index=False)
+
+def evaluate_folder(labels_path, segmentation_path, model_name, crop = False):
+    print(f"Evaluating folder {segmentation_path}")
+    print(f"Using labels stored in {labels_path}")
+
+    label_files = os.listdir(labels_path)
+    vesicles_files = os.listdir(segmentation_path)
+    
+    for vesicle_file in vesicles_files:
+        if vesicle_file in label_files:
+
+            evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(segmentation_path, vesicle_file), model_name, crop)
+
+
+
+def main():
+    
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-l", "--labels_path", required=True)
+    parser.add_argument("-v", "--segmentation_path", required=True)
+    parser.add_argument("-n", "--model_name", required=True)
+    parser.add_argument("--crop", action="store_true", help="Crop around the annotation.")
+    args = parser.parse_args()
+
+    segmentation_path = args.segmentation_path
+    if os.path.isdir(segmentation_path):
+        evaluate_folder(args.labels_path, segmentation_path, args.model_name, args.crop)
+    else:
+        evaluate_file(args.labels_path, segmentation_path, args.model_name, args.crop)
+    
+    
+
+if __name__ == "__main__":
+    main()
diff --git a/synaptic_reconstruction/imod/to_imod.py b/synaptic_reconstruction/imod/to_imod.py
index 7a98469..6b217aa 100644
--- a/synaptic_reconstruction/imod/to_imod.py
+++ b/synaptic_reconstruction/imod/to_imod.py
@@ -121,6 +121,37 @@ def coords_and_rads(prop):
     rads = [re[1] for re in res]
     return np.array(coords), np.array(rads)
 
+    def coords_and_rads(prop):
+        seg_id = prop.label
+
+        bbox = prop.bbox
+        bb = np.s_[bbox[0]:bbox[3], bbox[1]:bbox[4], bbox[2]:bbox[5]]
+        mask = segmentation[bb] == seg_id
+
+        if estimate_radius_2d:
+            dists = np.array([distance_transform_edt(ma, sampling=resolution[1:]) for ma in mask])
+        else:
+            dists = distance_transform_edt(mask, sampling=resolution)
+
+        max_coord = np.unravel_index(np.argmax(dists), mask.shape)
+        radius = dists[max_coord] * radius_factor
+
+        offset = np.array(bbox[:3])
+        coord = np.array(max_coord) + offset
+        return coord, radius, seg_id
+
+    with futures.ThreadPoolExecutor(num_workers) as tp:
+        res = list(tqdm(
+            tp.map(coords_and_rads, props), disable=not verbose, total=len(props),
+            desc="Compute coordinates and radii"
+        ))
+
+    coords = [re[0] for re in res]
+    rads = [re[1] for re in res]
+    label_indxes = [re[2] for re in res]
+    return np.array(coords), np.array(rads), np.array(label_indxes)
+
+
 
 def write_points_to_imod(
     coordinates: np.ndarray,

From 51165a5699a65e34ed3ca9ef87dcacea93549c3d Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Sat, 23 Nov 2024 15:30:07 +0100
Subject: [PATCH 25/35] Fix issues with the segmentation export to IMOD

---
 scripts/cooper/export_mask_to_imod.py         |   8 --
 scripts/inner_ear/analysis/.gitignore         |   1 +
 .../inner_ear/analysis/export_seg_to_imod.py  | 128 ++++++++++++++++++
 synaptic_reconstruction/imod/to_imod.py       |  57 ++++----
 4 files changed, 162 insertions(+), 32 deletions(-)
 create mode 100644 scripts/inner_ear/analysis/export_seg_to_imod.py

diff --git a/scripts/cooper/export_mask_to_imod.py b/scripts/cooper/export_mask_to_imod.py
index 98b4b2f..4273707 100644
--- a/scripts/cooper/export_mask_to_imod.py
+++ b/scripts/cooper/export_mask_to_imod.py
@@ -4,19 +4,11 @@
 
 
 def export_mask_to_imod(args):
-    # Test script
-    # write_segmentation_to_imod(
-    #     "synapse-examples/36859_J1_66K_TS_CA3_PS_26_rec_2Kb1dawbp_crop.mrc",
-    #     "synapse-examples/36859_J1_66K_TS_CA3_PS_26_rec_2Kb1dawbp_crop_mitos.tif",
-    #     "synapse-examples/mito.mod"
-    # )
     write_segmentation_to_imod(args.input_path, args.segmentation_path, args.output_path)
 
 
 def main():
     parser = argparse.ArgumentParser()
-
-    args = parser.parse_args()
     parser.add_argument(
         "-i", "--input_path", required=True,
         help="The filepath to the mrc file containing the data."
diff --git a/scripts/inner_ear/analysis/.gitignore b/scripts/inner_ear/analysis/.gitignore
index 383f264..cbad005 100644
--- a/scripts/inner_ear/analysis/.gitignore
+++ b/scripts/inner_ear/analysis/.gitignore
@@ -1,2 +1,3 @@
 panels/
+auto_seg_export/
 *.zip
diff --git a/scripts/inner_ear/analysis/export_seg_to_imod.py b/scripts/inner_ear/analysis/export_seg_to_imod.py
new file mode 100644
index 0000000..eea4b14
--- /dev/null
+++ b/scripts/inner_ear/analysis/export_seg_to_imod.py
@@ -0,0 +1,128 @@
+import os
+from shutil import copyfile
+from subprocess import run
+
+import imageio.v3 as imageio
+import mrcfile
+import napari
+import numpy as np
+import pandas as pd
+from elf.io import open_file
+from skimage.transform import resize
+from synaptic_reconstruction.imod.to_imod import write_segmentation_to_imod, write_segmentation_to_imod_as_points
+
+out_folder = "./auto_seg_export"
+os.makedirs(out_folder, exist_ok=True)
+
+
+def _resize(seg, tomo_path):
+    with open_file(tomo_path, "r") as f:
+        shape = f["data"].shape
+
+    if shape != seg.shape:
+        seg = resize(seg, shape, order=0, anti_aliasing=False, preserve_range=True).astype(seg.dtype)
+    assert seg.shape == shape
+    return seg
+
+
+def check_imod(tomo_path, mod_path):
+    run(["imod", tomo_path, mod_path])
+
+
+def export_pool(pool_name, pool_seg, tomo_path):
+    seg_path = f"./auto_seg_export/{pool_name}.tif"
+    pool_seg = _resize(pool_seg, tomo_path)
+    imageio.imwrite(seg_path, pool_seg, compression="zlib")
+
+    output_path = f"./auto_seg_export/{pool_name}.mod"
+    write_segmentation_to_imod_as_points(tomo_path, seg_path, output_path, min_radius=5)
+
+    check_imod(tomo_path, output_path)
+
+
+def export_vesicles(folder, tomo_path):
+    vesicle_pool_path = os.path.join(folder, "Korrektur", "vesicle_pools.tif")
+    # pool_correction_path = os.path.join(folder, "Korrektur", "pool_correction.tif")
+    # pool_correction = imageio.imread(pool_correction_path)
+
+    assignment_path = os.path.join(folder, "Korrektur", "measurements.xlsx")
+    assignments = pd.read_excel(assignment_path)
+
+    vesicles = imageio.imread(vesicle_pool_path)
+
+    pools = {}
+    for pool_name in pd.unique(assignments.pool):
+        pool_ids = assignments[assignments.pool == pool_name].id.values
+        pool_seg = vesicles.copy()
+        pool_seg[~np.isin(vesicles, pool_ids)] = 0
+        pools[pool_name] = pool_seg
+
+    view = False
+    if view:
+        v = napari.Viewer()
+        v.add_labels(vesicles, visible=False)
+        for pool_name, pool_seg in pools.items():
+            v.add_labels(pool_seg, name=pool_name)
+        napari.run()
+    else:
+        for pool_name, pool_seg in pools.items():
+            export_pool(pool_name, pool_seg, tomo_path)
+
+
+def export_structure(folder, tomo, name, view=False):
+    path = os.path.join(folder, "Korrektur", f"{name}.tif")
+    seg = imageio.imread(path)
+    seg = _resize(seg, tomo)
+
+    if view:
+        with open_file(tomo, "r") as f:
+            raw = f["data"][:]
+
+        v = napari.Viewer()
+        v.add_image(raw)
+        v.add_labels(seg)
+        napari.run()
+
+        return
+
+    seg_path = f"./auto_seg_export/{name}.tif"
+    imageio.imwrite(seg_path, seg, compression="zlib")
+    output_path = f"./auto_seg_export/{name}.mod"
+    write_segmentation_to_imod(tomo, seg_path, output_path)
+    check_imod(tomo, output_path)
+
+
+def remove_scale(tomo):
+    new_path = "./auto_seg_export/Emb71M1aGridA1sec1mod7.rec.rec"
+    if os.path.exists(new_path):
+        return new_path
+
+    copyfile(tomo, new_path)
+
+    with mrcfile.open(new_path, "r+") as f:
+        # Set the origin to (0, 0, 0)
+        f.header.nxstart = 0
+        f.header.nystart = 0
+        f.header.nzstart = 0
+        f.header.origin = (0.0, 0.0, 0.0)
+
+        # Save changes
+        f.flush()
+
+    return new_path
+
+
+def main():
+    folder = "/home/pape/Work/data/moser/em-synapses/Electron-Microscopy-Susi/Analyse/WT strong stim/Mouse 1/modiolar/1"
+    tomo = os.path.join(folder, "Emb71M1aGridA1sec1mod7.rec.rec")
+
+    tomo = remove_scale(tomo)
+
+    # export_vesicles(folder, tomo)
+    # export_structure(folder, tomo, "ribbon", view=False)
+    # export_structure(folder, tomo, "membrane", view=False)
+    export_structure(folder, tomo, "PD", view=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/synaptic_reconstruction/imod/to_imod.py b/synaptic_reconstruction/imod/to_imod.py
index 7a98469..307e645 100644
--- a/synaptic_reconstruction/imod/to_imod.py
+++ b/synaptic_reconstruction/imod/to_imod.py
@@ -16,51 +16,60 @@
 from tqdm import tqdm
 
 
-# FIXME how to bring the data to the IMOD axis convention?
-def _to_imod_order(data):
-    # data = np.swapaxes(data, 0, -1)
-    # data = np.fliplr(data)
-    # data = np.swapaxes(data, 0, -1)
-    return data
-
-
+# TODO: this has still some issues with some tomograms that has an offset info.
+# For now, this occurs for the inner ear data tomograms; it works for Fidi's STEM tomograms.
+# Ben's theory is that this might be due to data form JEOL vs. ThermoFischer microscopes.
+# To test this I can check how it works for data from Maus et al. / Imig et al., which were taken on a JEOL.
+# Can also check out the mrc documentation here: https://www.ccpem.ac.uk/mrc_format/mrc2014.php
 def write_segmentation_to_imod(
     mrc_path: str,
-    segmentation_path: str,
+    segmentation: Union[str, np.ndarray],
     output_path: str,
 ) -> None:
-    """Write a segmentation to a mod file as contours.
+    """Write a segmentation to a mod file as closed contour objects.
 
     Args:
-        mrc_path: a
-        segmentation_path: a
-        output_path: a
+        mrc_path: The filepath to the mrc file from which the segmentation was derived.
+        segmentation: The segmentation (either as numpy array or filepath to a .tif file).
+        output_path: The output path where the mod file will be saved.
     """
     cmd = "imodauto"
     cmd_path = shutil.which(cmd)
     assert cmd_path is not None, f"Could not find the {cmd} imod command."
 
+    # Load the segmentation from a tif file in case a filepath was passed.
+    if isinstance(segmentation, str):
+        assert os.path.exists(segmentation)
+        segmentation = imageio.imread(segmentation)
+
+    # Binarize the segmentation and flip its axes to match the IMOD axis convention.
+    segmentation = (segmentation > 0).astype("uint8")
+    segmentation = np.flip(segmentation, axis=1)
+
+    # Read the voxel size and origin information from the mrc file.
     assert os.path.exists(mrc_path)
-    with mrcfile.open(mrc_path, mode="r+") as f:
+    with mrcfile.open(mrc_path, mode="r") as f:
         voxel_size = f.voxel_size
+        nx, ny, nz = f.header.nxstart, f.header.nystart, f.header.nzstart
+        origin = f.header.origin
 
+    # Write the input for imodauto to a temporary mrc file.
     with tempfile.NamedTemporaryFile(suffix=".mrc") as f:
         tmp_path = f.name
 
-        seg = (imageio.imread(segmentation_path) > 0).astype("uint8")
-        seg_ = _to_imod_order(seg)
-
-        # import napari
-        # v = napari.Viewer()
-        # v.add_image(seg)
-        # v.add_labels(seg_)
-        # napari.run()
-
-        mrcfile.new(tmp_path, data=seg_, overwrite=True)
+        mrcfile.new(tmp_path, data=segmentation, overwrite=True)
+        # Write the voxel_size and origin infomration.
         with mrcfile.open(tmp_path, mode="r+") as f:
             f.voxel_size = voxel_size
+
+            f.header.nxstart = nx
+            f.header.nystart = ny
+            f.header.nzstart = nz
+            f.header.origin = (0.0, 0.0, 0.0) * 3 if origin is None else origin
+
             f.update_header_from_data()
 
+        # Run the command.
         cmd_list = [cmd, "-E", "1", "-u", tmp_path, output_path]
         run(cmd_list)
 

From aa5d78e2c3257a653a576c5adb006b686aedcb7f Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Sat, 23 Nov 2024 16:38:35 +0100
Subject: [PATCH 26/35] clean up

---
 scripts/cooper/analysis/calc_AZ_area.py       | 239 ------------------
 scripts/cooper/analysis/run_analysis_1.py     |  35 +--
 .../run_spatial_distribution_analysis.py      | 149 +++--------
 3 files changed, 52 insertions(+), 371 deletions(-)
 delete mode 100644 scripts/cooper/analysis/calc_AZ_area.py

diff --git a/scripts/cooper/analysis/calc_AZ_area.py b/scripts/cooper/analysis/calc_AZ_area.py
deleted file mode 100644
index 592b043..0000000
--- a/scripts/cooper/analysis/calc_AZ_area.py
+++ /dev/null
@@ -1,239 +0,0 @@
-import h5py
-import numpy as np
-import os
-import csv
-from scipy.ndimage import binary_opening, median_filter,zoom, binary_closing
-from skimage.measure import label, regionprops
-from synaptic_reconstruction.morphology import compute_object_morphology
-from skimage.morphology import ball
-from scipy.spatial import ConvexHull
-from skimage.draw import polygon
-
-def calculate_AZ_area_per_slice(AZ_slice, pixel_size_nm=1.554):
-    """
-    Calculate the area of the AZ in a single 2D slice after applying error-reducing processing.
-    
-    Parameters:
-    - AZ_slice (numpy array): 2D array representing a single slice of the AZ segmentation.
-    - pixel_size_nm (float): Size of a pixel in nanometers.
-    
-    Returns:
-    - slice_area_nm2 (float): The area of the AZ in the slice in square nanometers.
-    """
-    # Apply binary opening or median filter to reduce small segmentation errors
-    AZ_slice_filtered = binary_opening(AZ_slice, structure=np.ones((3, 3))).astype(int)
-    
-    # Calculate area in this slice
-    num_AZ_pixels = np.sum(AZ_slice_filtered == 1)
-    slice_area_nm2 = num_AZ_pixels * (pixel_size_nm ** 2)
-    
-    return slice_area_nm2
-
-def calculate_total_AZ_area(tomo_path, pixel_size_nm=1.554):
-    """
-    Calculate the total area of the AZ across all slices in a 3D tomogram file.
-    
-    Parameters:
-    - tomo_path (str): Path to the tomogram file (HDF5 format).
-    - pixel_size_nm (float): Size of a pixel in nanometers.
-    
-    Returns:
-    - total_AZ_area_nm2 (float): The total area of the AZ in square nanometers.
-    """
-    with h5py.File(tomo_path, "r") as f:
-        AZ_intersect_seg = f["/AZ/compartment_AZ_intersection_manComp"][:]
-
-    # Calculate the AZ area for each slice along the z-axis
-    total_AZ_area_nm2 = 0
-    for z_slice in AZ_intersect_seg:
-        slice_area_nm2 = calculate_AZ_area_per_slice(z_slice, pixel_size_nm)
-        total_AZ_area_nm2 += slice_area_nm2
-
-    return total_AZ_area_nm2
-
-def calculate_AZ_area_simple(tomo_path, pixel_size_nm=1.554):
-    """
-    Calculate the volume of the AZ (active zone) in a 3D tomogram file.
-    
-    Parameters:
-    - tomo_path (str): Path to the tomogram file (HDF5 format).
-    - pixel_size_nm (float): Size of a pixel in nanometers (default is 1.554 nm).
-    
-    Returns:
-    - AZ_volume_nm3 (float): The volume of the AZ in cubic nanometers.
-    """
-    # Open the file and read the AZ intersection segmentation data
-    with h5py.File(tomo_path, "r") as f:
-        AZ_intersect_seg = f["/AZ/compartment_AZ_intersection_manComp"][:]
-
-    # Count voxels with label = 1
-    num_AZ_voxels = np.sum(AZ_intersect_seg == 1)
-
-    # Calculate the volume in cubic nanometers
-    AZ_area_nm2 = num_AZ_voxels * (pixel_size_nm ** 2)
-
-    return AZ_area_nm2
-
-def calculate_AZ_surface(tomo_path, pixel_size_nm=1.554):
-    with h5py.File(tomo_path, "r") as f:
-        #AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:]
-        AZ_seg = f["/filtered_az"][:]
-    
-    # Apply binary closing to smooth the segmented regions
-    struct_elem = ball(1)  # Use a small 3D structuring element
-    AZ_seg_smoothed = binary_closing(AZ_seg > 0, structure=struct_elem, iterations=20)
-
-    labeled_seg = label(AZ_seg_smoothed)
-
-    regions = regionprops(labeled_seg)
-    if regions:
-        # Sort regions by area and get the label of the largest region
-        largest_region = max(regions, key=lambda r: r.area)
-        largest_label = largest_region.label
-
-        largest_component_mask = (labeled_seg == largest_label)
-        AZ_seg_filtered = largest_component_mask.astype(np.uint8)
-
-    else:
-        # If no regions found, return an empty array
-        AZ_seg_filtered = np.zeros_like(AZ_seg_interp, dtype=np.uint8)
-    
-    morphology_data = compute_object_morphology(AZ_seg_filtered, "AZ Structure", resolution=(pixel_size_nm, pixel_size_nm, pixel_size_nm))
-    surface_column = "surface [nm^2]" #if resolution is not None else "surface [pixel^2]"
-    surface_area = morphology_data[surface_column].iloc[0]
-
-    return surface_area
-
-def calculate_AZ_surface_simple(tomo_path, pixel_size_nm=1.554):
-    with h5py.File(tomo_path, "r") as f:
-        AZ_seg = f["/labels/AZ"][:]
-    
-    morphology_data = compute_object_morphology(AZ_seg, "AZ Structure", resolution=(pixel_size_nm, pixel_size_nm, pixel_size_nm))
-    surface_column = "surface [nm^2]" #if resolution is not None else "surface [pixel^2]"
-    surface_area = morphology_data[surface_column].iloc[0]
-
-    return surface_area
-
-def calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm=1.554):
-    with h5py.File(tomo_path, "r") as f:
-        AZ_seg = f["/AZ/segment_from_AZmodel_v3"][:]
-
-    # Apply binary closing to smooth the segmented regions
-    struct_elem = ball(1)  # Use a small 3D structuring element
-    AZ_seg_smoothed = binary_closing(AZ_seg > 0, structure=struct_elem, iterations=10)
-
-    labeled_seg = label(AZ_seg_smoothed)
-
-    regions = regionprops(labeled_seg)
-    if regions:
-        # Sort regions by area and get the label of the largest region
-        largest_region = max(regions, key=lambda r: r.area)
-        largest_label = largest_region.label
-
-        largest_component_mask = (labeled_seg == largest_label)
-        AZ_seg_filtered = largest_component_mask.astype(np.uint8)
-    AZ_seg = AZ_seg_filtered
-    # Extract coordinates of non-zero points
-    points = np.argwhere(AZ_seg > 0)  # Get the coordinates of non-zero (foreground) pixels
-
-    if points.shape[0] < 4:
-        # ConvexHull requires at least 4 points in 3D to form a valid hull
-        AZ_seg_filtered = np.zeros_like(AZ_seg, dtype=np.uint8)
-    else:
-        # Apply ConvexHull to the points
-        hull = ConvexHull(points)
-
-        # Create a binary mask for the convex hull
-        convex_hull_mask = np.zeros_like(AZ_seg, dtype=bool)
-
-        # Iterate over each simplex (facet) of the convex hull and fill in the polygon
-        for simplex in hull.simplices:
-            # For each face of the convex hull, extract the vertices and convert to a 2D polygon
-            polygon_coords = points[simplex]
-            rr, cc = polygon(polygon_coords[:, 0], polygon_coords[:, 1])
-            convex_hull_mask[rr, cc] = True
-        
-        # Optional: Label the convex hull mask
-        labeled_seg = label(convex_hull_mask)
-        regions = regionprops(labeled_seg)
-
-        if regions:
-            # Sort regions by area and get the label of the largest region
-            largest_region = max(regions, key=lambda r: r.area)
-            largest_label = largest_region.label
-
-            largest_component_mask = (labeled_seg == largest_label)
-            AZ_seg_filtered = largest_component_mask.astype(np.uint8)
-
-        else:
-            AZ_seg_filtered = np.zeros_like(AZ_seg, dtype=np.uint8)
-
-    # Calculate surface area
-    morphology_data = compute_object_morphology(AZ_seg_filtered, "AZ Structure", resolution=(pixel_size_nm, pixel_size_nm, pixel_size_nm))
-    surface_column = "surface [nm^2]"
-    surface_area = morphology_data[surface_column].iloc[0]
-
-    return surface_area
-
-def process_datasets(folder_path, output_csv="AZ_areas.csv", pixel_size_nm=1.554):
-    """
-    Process all tomograms in multiple datasets within a folder and save results to a CSV.
-    
-    Parameters:
-    - folder_path (str): Path to the folder containing dataset folders with tomograms.
-    - output_csv (str): Filename for the output CSV file.
-    - pixel_size_nm (float): Size of a pixel in nanometers.
-    """
-    results = []
-
-    # Iterate over each dataset folder
-    for dataset_name in os.listdir(folder_path):
-        dataset_path = os.path.join(folder_path, dataset_name)
-        
-        # Check if it's a directory (skip files in the main folder)
-        if not os.path.isdir(dataset_path):
-            continue
-        
-        # Iterate over each tomogram file in the dataset folder
-        for tomo_file in os.listdir(dataset_path):
-            tomo_path = os.path.join(dataset_path, tomo_file)
-            
-            # Check if the file is an HDF5 file (optional)
-            if tomo_file.endswith(".h5") or tomo_file.endswith(".hdf5"):
-                try:
-                    # Calculate AZ area
-                    #AZ_area = calculate_total_AZ_area(tomo_path, pixel_size_nm)
-                    #AZ_area = calculate_AZ_area_simple(tomo_path, pixel_size_nm)
-                    #AZ_surface_area = calculate_AZ_surface(tomo_path, pixel_size_nm)
-                    #AZ_surface_area = calculate_AZ_surface_convexHull(tomo_path, pixel_size_nm)
-                    AZ_surface_area = calculate_AZ_surface_simple(tomo_path, pixel_size_nm)
-                    # Append results to list
-                    results.append({
-                        "Dataset": dataset_name,
-                        "Tomogram": tomo_file,
-                        "AZ_surface_area": AZ_surface_area
-                    })
-                except Exception as e:
-                    print(f"Error processing {tomo_file} in {dataset_name}: {e}")
-    
-    # Write results to a CSV file
-    with open(output_csv, mode="w", newline="") as csvfile:
-        fieldnames = ["Dataset", "Tomogram", "AZ_surface_area"]
-        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-        
-        writer.writeheader()
-        for result in results:
-            writer.writerow(result)
-    
-    print(f"Results saved to {output_csv}")
-
-def main():
-    # Define the path to the folder containing dataset folders
-    folder_path = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/exported/"
-    output_csv = "./analysis_results/manual_AZ_exported/AZ_surface_area.csv"
-    # Call the function to process datasets and save results
-    process_datasets(folder_path, output_csv = output_csv)
-
-# Call main
-if __name__ == "__main__":
-    main()
diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_analysis_1.py
index 94d0a62..abad440 100644
--- a/scripts/cooper/analysis/run_analysis_1.py
+++ b/scripts/cooper/analysis/run_analysis_1.py
@@ -1,7 +1,3 @@
-# This is the code for the first analysis for the cooper data.
-# Here, we only compute the vesicle numbers and size distributions for the STEM tomograms
-# in the 04 dataset.
-
 import os
 from glob import glob
 
@@ -37,13 +33,10 @@ def get_compartment_with_max_overlap(compartments, vesicles):
 
     # Iterate over each compartment and calculate the overlap with vesicles
     for compartment_label in unique_compartments:
-        # Create a binary mask for the current compartment
         compartment_mask = compartments == compartment_label
         vesicle_mask = vesicles > 0 
 
         intersection = np.logical_and(compartment_mask, vesicle_mask)
-        
-        # Calculate the number of overlapping voxels
         overlap_count = np.sum(intersection)
         
         # Track the compartment with the most overlap in terms of voxel count
@@ -51,14 +44,13 @@ def get_compartment_with_max_overlap(compartments, vesicles):
             max_overlap_count = overlap_count
             best_compartment = compartment_label
 
-    # Create the final mask for the compartment with the most overlap
     final_mask = compartments == best_compartment
 
     return final_mask
 
-# We compute the sizes for all vesicles in the compartment masks.
+# We compute the sizes for all vesicles in the MANUALLY ANNOTATED compartment masks.
 # We use the same logic in the size computation as for the vesicle extraction to IMOD,
-# including the radius correction factor.
+# including the radius correction factor. --> not needed here
 # The number of vesicles is automatically computed as the length of the size list.
 def compute_sizes_for_all_tomorams_manComp():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
@@ -66,8 +58,6 @@ def compute_sizes_for_all_tomorams_manComp():
     resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
     radius_factor = 1
     estimate_radius_2d = True
-
-    # Dictionary to hold the results for each dataset and category (CTRL or DKO)
     dataset_results = {}
 
     tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
@@ -79,11 +69,9 @@ def compute_sizes_for_all_tomorams_manComp():
         # Determine if the tomogram is 'CTRL' or 'DKO'
         category = "CTRL" if "CTRL" in fname else "DKO"
         
-        # Initialize a new dictionary entry for each dataset and category if not already present
         if ds_name not in dataset_results:
             dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}}
-        
-        # Skip if this tomogram already exists in the dataset dictionary
+
         if fname in dataset_results[ds_name][category]:
             continue
             
@@ -93,6 +81,7 @@ def compute_sizes_for_all_tomorams_manComp():
 
         input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5")
         assert os.path.exists(input_path), input_path
+
         # Load the compartment mask from the tomogram
         with h5py.File(input_path, "r") as f:
             mask = f["labels/compartment"][:]
@@ -102,30 +91,30 @@ def compute_sizes_for_all_tomorams_manComp():
             segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d
         )
 
-        # Add sizes to the dataset dictionary under the appropriate category
+
         dataset_results[ds_name][category][fname] = sizes
 
     # Save each dataset's results into separate CSV files for CTRL and DKO tomograms
     for ds_name, categories in dataset_results.items():
         for category, tomogram_data in categories.items():
-            # Sort tomograms by name within the category
             sorted_data = dict(sorted(tomogram_data.items()))  # Sort by tomogram names
             result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose()
             
-            # Define the output file path
             output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}_rf1.csv")
             
             # Save the DataFrame to CSV
             result_df.to_csv(output_path, index=False)
 
+# We compute the sizes for all vesicles in the AUTOMATIC SEGMENTED compartment masks.
+# We use the same logic in the size computation as for the vesicle extraction to IMOD,
+# including the radius correction factor. --> not needed here
+# The number of vesicles is automatically computed as the length of the size list.
 def compute_sizes_for_all_tomorams_autoComp():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
 
     resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
     radius_factor = 1
     estimate_radius_2d = True
-
-    # Dictionary to hold the results for each dataset and category (CTRL or DKO)
     dataset_results = {}
 
     tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
@@ -137,11 +126,9 @@ def compute_sizes_for_all_tomorams_autoComp():
         # Determine if the tomogram is 'CTRL' or 'DKO'
         category = "CTRL" if "CTRL" in fname else "DKO"
         
-        # Initialize a new dictionary entry for each dataset and category if not already present
         if ds_name not in dataset_results:
             dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}}
         
-        # Skip if this tomogram already exists in the dataset dictionary
         if fname in dataset_results[ds_name][category]:
             continue
 
@@ -151,6 +138,7 @@ def compute_sizes_for_all_tomorams_autoComp():
 
         input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5")
         assert os.path.exists(input_path), input_path
+
         # Load the compartment mask from the tomogram
         with h5py.File(input_path, "r") as f:
             compartments  = f["/compartments/segment_from_3Dmodel_v2"][:]
@@ -165,17 +153,14 @@ def compute_sizes_for_all_tomorams_autoComp():
             segmentation, resolution=resolution, radius_factor=radius_factor, estimate_radius_2d=estimate_radius_2d
         )
 
-        # Add sizes to the dataset dictionary under the appropriate category
         dataset_results[ds_name][category][fname] = sizes
 
     # Save each dataset's results into separate CSV files for CTRL and DKO tomograms
     for ds_name, categories in dataset_results.items():
         for category, tomogram_data in categories.items():
-            # Sort tomograms by name within the category
             sorted_data = dict(sorted(tomogram_data.items()))  # Sort by tomogram names
             result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose()
             
-            # Define the output file path
             output_path = os.path.join(RESULT_FOLDER, f"size_analysis_for_{ds_name}_{category}_rf1.csv")
             
             # Save the DataFrame to CSV
diff --git a/scripts/cooper/analysis/run_spatial_distribution_analysis.py b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
index edd8308..6943484 100644
--- a/scripts/cooper/analysis/run_spatial_distribution_analysis.py
+++ b/scripts/cooper/analysis/run_spatial_distribution_analysis.py
@@ -9,7 +9,8 @@
 DATA_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
 PREDICTION_ROOT = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/segmentation/for_spatial_distribution_analysis/final_Imig2014_seg/"  # noqa
 RESULT_FOLDER = "./analysis_results/AZ_filtered_autoComp"
-
+AZ_PATH = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/az_seg_filtered"
+                
 
 def get_compartment_with_max_overlap(compartments, vesicles):
     """
@@ -33,13 +34,10 @@ def get_compartment_with_max_overlap(compartments, vesicles):
 
     # Iterate over each compartment and calculate the overlap with vesicles
     for compartment_label in unique_compartments:
-        # Create a binary mask for the current compartment
         compartment_mask = compartments == compartment_label
         vesicle_mask = vesicles > 0 
 
         intersection = np.logical_and(compartment_mask, vesicle_mask)
-        
-        # Calculate the number of overlapping voxels
         overlap_count = np.sum(intersection)
         
         # Track the compartment with the most overlap in terms of voxel count
@@ -47,85 +45,20 @@ def get_compartment_with_max_overlap(compartments, vesicles):
             max_overlap_count = overlap_count
             best_compartment = compartment_label
 
-    # Create the final mask for the compartment with the most overlap
     final_mask = compartments == best_compartment
 
     return final_mask
 
-# We compute the distances for all vesicles in the compartment masks to the AZ.
-# We use the same different resolution, depending on dataset.
+# We compute the distances for all vesicles in the AUTOMATIC SEGMENTED compartment masks to the AZ.
+# We use different resolution, depending on dataset.
 # The closest distance is calculated, i.e., the closest point on the outer membrane of the vesicle to the AZ.
-def compute_per_vesicle_distance_to_AZ():
-    os.makedirs(RESULT_FOLDER, exist_ok=True)
-    
-    resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
-    
-    # Dictionary to hold the results for each dataset
-    dataset_results = {}
-    
-    tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
-    for tomo in tqdm(tomograms):
-        ds_name, fname = os.path.split(tomo)
-        ds_name = os.path.split(ds_name)[1]
-        fname = os.path.splitext(fname)[0]
-        
-        # Initialize a new dictionary entry for each dataset if not already present
-        if ds_name not in dataset_results:
-            dataset_results[ds_name] = {}
-        
-        # Skip if this tomogram already exists in the dataset dictionary
-        if fname in dataset_results[ds_name]:
-            continue
-
-        # Load the vesicle segmentation from the predictions
-        with h5py.File(tomo, "r") as f:
-            segmentation = f["/vesicles/segment_from_combined_vesicles"][:]
-            segmented_object = f["/AZ/compartment_AZ_intersection"][:]
-            #if AZ intersect is small, compartment seg didn't align with AZ so we use the normal AZ and not intersect
-            if (segmented_object == 0).all() or np.sum(segmented_object == 1) < 2000:
-                segmented_object = f["/AZ/segment_from_AZmodel_v3"][:]
-
-        input_path = os.path.join(DATA_ROOT, ds_name, f"{fname}.h5")
-        assert os.path.exists(input_path), input_path
-
-        # Load the compartment mask from the tomogram
-        with h5py.File(input_path, "r") as f:
-            compartments  = f["/compartments/segment_from_3Dmodel_v2"][:]
-        mask = get_compartment_with_max_overlap(compartments, segmentation)
-        
-        #if more than half of the vesicles (approximation, its checking pixel and not label) would get filtered by mask it means the compartment seg didn't work and thus we won't use the mask
-        if np.sum(segmentation[mask == 0] > 0) > (0.5 * np.sum(segmentation > 0)):
-            print("using no mask")
-        else:
-            segmentation[mask == 0] = 0
-        distances, _, _, _ = measure_segmentation_to_object_distances(
-            segmentation, segmented_object=segmented_object, resolution=resolution
-        )
+def compute_per_vesicle_distance_to_AZ_autoComp(separate_AZseg=False):
 
-        # Add distances to the dataset dictionary under the tomogram name
-        dataset_results[ds_name][fname] = distances
-
-    # Save each dataset's results to a single CSV file
-    for ds_name, tomogram_data in dataset_results.items():
-        # Create a DataFrame where each column is a tomogram's distances
-        result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose()
-        
-        # Define the output file path
-        output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}.csv")
-        
-        # Save the DataFrame to CSV
-        result_df.to_csv(output_path, index=False)
-
-def compute_per_vesicle_distance_to_filteredAZ():
-    filtered_AZ_path = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/20241102_TOMO_DATA_Imig2014/az_seg_filtered"
     os.makedirs(RESULT_FOLDER, exist_ok=True)
-    
     resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
-    
-    # Dictionary to hold the results for each dataset and category (CTRL or DKO)
     dataset_results = {}
-
     tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
+
     for tomo in tqdm(tomograms):
         ds_name, fname = os.path.split(tomo)
         ds_name = os.path.split(ds_name)[1]
@@ -134,23 +67,27 @@ def compute_per_vesicle_distance_to_filteredAZ():
         # Determine if the tomogram is 'CTRL' or 'DKO'
         category = "CTRL" if "CTRL" in fname else "DKO"
         
-        # Initialize a new dictionary entry for each dataset and category if not already present
         if ds_name not in dataset_results:
             dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}}
-        
-        # Skip if this tomogram already exists in the dataset dictionary
+
         if fname in dataset_results[ds_name][category]:
             continue
 
-        #Load the AZ segmentations
-        AZ_path = os.path.join(filtered_AZ_path, ds_name, f"{fname}.h5")
-        with h5py.File(AZ_path, "r") as f:
-            segmented_object = f["/filtered_az"][:]
 
         # Load the vesicle segmentation from the predictions
         with h5py.File(tomo, "r") as f:
             segmentation = f["/vesicles/segment_from_combined_vesicles"][:]
-            
+
+            #Check if AZ seg is stored in a different tomo or same
+            if separate_AZseg:
+                print(f"using AZ segmentation from {AZ_PATH}")
+                #Load the AZ segmentations
+                AZ_path = os.path.join(AZ_PATH, ds_name, f"{fname}.h5")
+                with h5py.File(AZ_path, "r") as f_AZ:
+                    segmented_object = f_AZ["/thin_az"][:]
+            else:
+                segmented_object = f["/AZ/compartment_AZ_intersection"][:]
+
             #if AZ intersect is small, compartment seg didn't align with AZ so we use the normal AZ and not intersect
             if (segmented_object == 0).all() or np.sum(segmented_object == 1) < 2000:
                 segmented_object = f["/AZ/segment_from_AZmodel_v3"][:]
@@ -168,6 +105,7 @@ def compute_per_vesicle_distance_to_filteredAZ():
             print("using no mask")
         else:
             segmentation[mask == 0] = 0
+
         distances, _, _, _ = measure_segmentation_to_object_distances(
             segmentation, segmented_object=segmented_object, resolution=resolution
         )
@@ -178,36 +116,35 @@ def compute_per_vesicle_distance_to_filteredAZ():
     # Save each dataset's results into separate CSV files for CTRL and DKO tomograms
     for ds_name, categories in dataset_results.items():
         for category, tomogram_data in categories.items():
-            # Sort tomograms by name within the category
             sorted_data = dict(sorted(tomogram_data.items()))  # Sort by tomogram names
             result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose()
-            
-            # Define the output file path
             output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}_{category}.csv")
             
             # Save the DataFrame to CSV
             result_df.to_csv(output_path, index=False)
 
+# We compute the distances for all vesicles in the MANUALLY ANNOTATED compartment masks to the AZ.
+# We use different resolution, depending on dataset.
+# The closest distance is calculated, i.e., the closest point on the outer membrane of the vesicle to the AZ.
 def compute_per_vesicle_distance_to_AZ_manComp():
     os.makedirs(RESULT_FOLDER, exist_ok=True)
     
     resolution = (1.554,) * 3  # Change for each dataset #1.554 for Munc and snap #0.8681 for 04 dataset
-    
-    # Dictionary to hold the results for each dataset
     dataset_results = {}
-    
     tomograms = sorted(glob(os.path.join(PREDICTION_ROOT, "**/*.h5"), recursive=True))
+    
     for tomo in tqdm(tomograms):
         ds_name, fname = os.path.split(tomo)
         ds_name = os.path.split(ds_name)[1]
         fname = os.path.splitext(fname)[0]
         
-        # Initialize a new dictionary entry for each dataset if not already present
-        if ds_name not in dataset_results:
-            dataset_results[ds_name] = {}
+        # Determine if the tomogram is 'CTRL' or 'DKO'
+        category = "CTRL" if "CTRL" in fname else "DKO"
         
-        # Skip if this tomogram already exists in the dataset dictionary
-        if fname in dataset_results[ds_name]:
+        if ds_name not in dataset_results:
+            dataset_results[ds_name] = {'CTRL': {}, 'DKO': {}}
+
+        if fname in dataset_results[ds_name][category]:
             continue
 
         # Load the vesicle segmentation from the predictions
@@ -223,28 +160,26 @@ def compute_per_vesicle_distance_to_AZ_manComp():
             mask  = f["/labels/compartment"][:]
 
         segmentation[mask == 0] = 0
+
         distances, _, _, _ = measure_segmentation_to_object_distances(
             segmentation, segmented_object=segmented_object, resolution=resolution
         )
 
-        # Add distances to the dataset dictionary under the tomogram name
-        dataset_results[ds_name][fname] = distances
-
-    # Save each dataset's results to a single CSV file
-    for ds_name, tomogram_data in dataset_results.items():
-        # Create a DataFrame where each column is a tomogram's distances
-        result_df = pd.DataFrame.from_dict(tomogram_data, orient='index').transpose()
-        
-        # Define the output file path
-        output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}.csv")
-        
-        # Save the DataFrame to CSV
-        result_df.to_csv(output_path, index=False)
+        # Add distances to the dataset dictionary under the appropriate category
+        dataset_results[ds_name][category][fname] = distances
 
+    # Save each dataset's results into separate CSV files for CTRL and DKO tomograms
+    for ds_name, categories in dataset_results.items():
+        for category, tomogram_data in categories.items():
+            sorted_data = dict(sorted(tomogram_data.items()))  # Sort by tomogram names
+            result_df = pd.DataFrame.from_dict(sorted_data, orient='index').transpose()
+            output_path = os.path.join(RESULT_FOLDER, f"spatial_distribution_analysis_for_{ds_name}_{category}.csv")
+            
+            # Save the DataFrame to CSV
+            result_df.to_csv(output_path, index=False)
 def main():
-    #compute_per_vesicle_distance_to_AZ()
+    compute_per_vesicle_distance_to_AZ_autoComp(separate_AZseg=False)
     #compute_per_vesicle_distance_to_AZ_manComp()
-    compute_per_vesicle_distance_to_filteredAZ()
 
 
From 20e429b3b7b5e000a3aeb48c603326ceb2be1c5c Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Sat, 23 Nov 2024 16:40:26 +0100
Subject: [PATCH 27/35] clean up

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 0377c4a..d955053 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,4 +14,5 @@ scripts/rizzoli/upsample_data.py
 scripts/cooper/training/find_rec_testset.py
 scripts/rizzoli/combine_2D_slices.py
 scripts/rizzoli/combine_2D_slices_raw.py
-scripts/cooper/remove_h5key.py
\ No newline at end of file
+scripts/cooper/remove_h5key.py
+scripts/cooper/analysis/calc_AZ_area.py
\ No newline at end of file

From 19f618e388a510884c82475622b66074237c46ba Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Sat, 23 Nov 2024 16:42:00 +0100
Subject: [PATCH 28/35] clean up

---
 .../cooper/analysis/{run_analysis_1.py => run_size_analysis.py}   | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename scripts/cooper/analysis/{run_analysis_1.py => run_size_analysis.py} (100%)

diff --git a/scripts/cooper/analysis/run_analysis_1.py b/scripts/cooper/analysis/run_size_analysis.py
similarity index 100%
rename from scripts/cooper/analysis/run_analysis_1.py
rename to scripts/cooper/analysis/run_size_analysis.py

From cb693b13a14f96bfccd62f117eced546e4b476d3 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Sun, 24 Nov 2024 15:03:00 +0100
Subject: [PATCH 29/35] Update data summaries

---
 scripts/aggregate_data_information.py         |  78 +++++++++++++-----
 .../active_zone_training_data.xlsx            | Bin 0 -> 9607 bytes
 .../compartment_training_data.xlsx            | Bin 0 -> 5747 bytes
 .../vesicle_domain_adaptation_data.xlsx       | Bin 0 -> 11526 bytes
 .../data_summary/vesicle_training_data.xlsx   | Bin 0 -> 13832 bytes
 scripts/summarize_data.py                     |   2 +-
 6 files changed, 60 insertions(+), 20 deletions(-)
 create mode 100644 scripts/data_summary/active_zone_training_data.xlsx
 create mode 100644 scripts/data_summary/compartment_training_data.xlsx
 create mode 100644 scripts/data_summary/vesicle_domain_adaptation_data.xlsx
 create mode 100644 scripts/data_summary/vesicle_training_data.xlsx

diff --git a/scripts/aggregate_data_information.py b/scripts/aggregate_data_information.py
index d90ec8c..03ca0af 100644
--- a/scripts/aggregate_data_information.py
+++ b/scripts/aggregate_data_information.py
@@ -14,7 +14,7 @@
 
 def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions):
     tomo_names = []
-    tomo_vesicles = []
+    tomo_vesicles_all, tomo_vesicles_imod = [], []
     tomo_condition = []
     tomo_resolution = []
     tomo_train = []
@@ -43,14 +43,19 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
                 except KeyError:
                     tomo_name = fname
 
-                n_label_sets = len(f["labels"])
-                if n_label_sets > 2:
-                    print(tomo_path, "contains the following labels:", list(f["labels"].keys()))
-                seg = f["labels/vesicles"][:]
-                n_vesicles = len(np.unique(seg)) - 1
+                if "labels/vesicles/combined_vesicles" in f:
+                    all_vesicles = f["labels/vesicles/combined_vesicles"][:]
+                    imod_vesicles = f["labels/vesicles/masked_vesicles"][:]
+                    n_vesicles_all = len(np.unique(all_vesicles)) - 1
+                    n_vesicles_imod = len(np.unique(imod_vesicles)) - 2
+                else:
+                    vesicles = f["labels/vesicles"][:]
+                    n_vesicles_all = len(np.unique(vesicles)) - 1
+                    n_vesicles_imod = n_vesicles_all
 
             tomo_names.append(tomo_name)
-            tomo_vesicles.append(n_vesicles)
+            tomo_vesicles_all.append(n_vesicles_all)
+            tomo_vesicles_imod.append(n_vesicles_imod)
             tomo_condition.append(this_condition)
             tomo_resolution.append(this_resolution)
             tomo_train.append("test" if fname in this_test_tomograms else "train/val")
@@ -60,7 +65,8 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
         "condition": tomo_condition,
         "resolution": tomo_resolution,
         "used_for": tomo_train,
-        "vesicle_count": tomo_vesicles,
+        "vesicle_count_all": tomo_vesicles_all,
+        "vesicle_count_imod": tomo_vesicles_imod,
     })
 
     os.makedirs("data_summary", exist_ok=True)
@@ -70,15 +76,15 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
 def vesicle_train_data():
     roots = {
         "01": {
-            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/01_hoi_maus_2020_incomplete",  # noqa
+            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/01_hoi_maus_2020_incomplete",  # noqa
             "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/01_hoi_maus_2020_incomplete",  # noqa
         },
         "02": {
-            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/02_hcc_nanogold",  # noqa
+            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/02_hcc_nanogold",  # noqa
             "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/02_hcc_nanogold",  # noqa
         },
         "03": {
-            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/03_hog_cs1sy7",  # noqa
+            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/03_hog_cs1sy7",  # noqa
             "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/03_hog_cs1sy7",  # noqa
         },
         "04": {
@@ -86,27 +92,27 @@ def vesicle_train_data():
             "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/ground_truth/04Dataset_for_vesicle_eval/",  # noqa
         },
         "05": {
-            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/05_stem750_sv_training",  # noqa
+            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/05_stem750_sv_training",  # noqa
             "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/05_stem750_sv_training",  # noqa
         },
         "07": {
-            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/07_hoi_s1sy7_tem250_ihgp",  # noqa
+            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/07_hoi_s1sy7_tem250_ihgp",  # noqa
             "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/07_hoi_s1sy7_tem250_ihgp",  # noqa
         },
         "09": {
-            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/09_stem750_66k",  # noqa
+            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/09_stem750_66k",  # noqa
             "test": "",
         },
         "10": {
-            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/10_tem_single_release",  # noqa
+            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/10_tem_single_release",  # noqa
             "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/10_tem_single_release",  # noqa
         },
         "11": {
-            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/11_tem_multiple_release",  # noqa
+            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/11_tem_multiple_release",  # noqa
             "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/11_tem_multiple_release",  # noqa
         },
         "12": {
-            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/12_chemical_fix_cryopreparation",  # noqa
+            "train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/12_chemical_fix_cryopreparation",  # noqa
             "test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/12_chemical_fix_cryopreparation",  # noqa
         },
     }
@@ -397,6 +403,11 @@ def vesicle_domain_adaptation_data():
             "MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5",
             "MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5",
             "MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5"
+        ],
+        "frog": [
+            "block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5",
+            "block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5",
+            "block30UB_one.h5", "block10U3A_five.h5",
         ]
     }
 
@@ -439,13 +450,42 @@ def vesicle_domain_adaptation_data():
     aggregate_da(roots, train_tomograms, test_tomograms, resolutions)
 
 
+def get_n_images_frog():
+    root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted/upsampled_by2"
+    tomos = ["block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5",
+             "block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5",
+             "block30UB_one.h5", "block10U3A_five.h5"]
+
+    n_images = 0
+    for tomo in tomos:
+        path = os.path.join(root, tomo)
+        with h5py.File(path, "r") as f:
+            n_images += f["raw"].shape[0]
+    print(n_images)
+
+
+def get_image_sizes_tem_2d():
+    root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/maus_2020_tem2d_wt_unt_div14_exported_scaled/good_for_DAtraining/maus_2020_tem2d_wt_unt_div14_exported_scaled"  # noqa
+    tomos = [
+        "MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5",
+        "MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5",
+        "MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5"
+    ]
+    for tomo in tomos:
+        path = os.path.join(root, tomo)
+        with h5py.File(path, "r") as f:
+            print(f["raw"].shape)
+
+
 def main():
     # active_zone_train_data()
     # compartment_train_data()
     # mito_train_data()
-    # vesicle_train_data()
+    vesicle_train_data()
 
-    vesicle_domain_adaptation_data()
+    # vesicle_domain_adaptation_data()
+    # get_n_images_frog()
+    # get_image_sizes_tem_2d()
 
 
 main()
diff --git a/scripts/data_summary/active_zone_training_data.xlsx b/scripts/data_summary/active_zone_training_data.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..b19365309eac854d7d9ace55088d8c6518d6c387
GIT binary patch
literal 9607
zcmZ{K1z1~8vp&VOxD+X_1xj#lf#R-(;1DFZLy=;^ic4{KhnC<PoMOd`dnr)dij{xp
z@80jbU;p<ePfnh*JM+%$duC_Ov%6}FNXSG82ngs1l=-fDGC&x15d7{F{D%ksfgH`%
zTpXQTUz$2OaeCR?DaR;db#vjpc~ID0cF;bi0nkhrtPT0d3Gl~(q&!0Id^KUQi=r!B
zQ7yz@bbIMN`9gb3-s2C+A~ayh^hiyWJ(Fc52kSn6J@zh=aNeqyp_XC*$%I&mXDs86
z>Sw+14rLqP2J3D`V@PVITZ#>U<yVUBSxm@W8+C)@wJAzx;th)0fA}jN8)2~z_jC9O
z0s{X3^w-?c#p2Q7qIhN19xm+AhcDAU`^CWuku3)N&WIQh@-z<W8}<|FxQsFfzs2n5
zh4GdI`;RsU8YWA-KF0o{^y`vCoyu>lQZxv0_h7)Jq$#5ZrhN!>l4`n|P$17fL69g<
z6s6I!&<mDM046}xQ^V#{4U>SQUykt?<#UNmB1xziO;Ca<6v^yw>P!_ZKdV|6w^YAq
z+3v-m0C@R$`=%w2M=LU9bt~}#or%>GV$IrEGH)9K9s9m>-&O>81C-X>o;HnD5AD0u
zEsKHj*(zEyKV?>V*!Ag+9_?RETogPoL5(unuftB*@Nn*K1WqvilbJMwOnN+21cXdx
z1O!4jGhTKt-K;I_E&jOz9=SOH>bouo61_jF`RZkLX^+*<AJ_mTbOn1XLen4zD%2tP
zy3uy;K=Ba&PFa4u;bfeH1m%_tQDpYr+c9a`ogael&JH=veuRc6oqCzfzg(PcZG5@1
z_LiFgUxMpdNzHK=W|5LnJrIo0Bj`6oNYJws6p60rMKnBXu%=J`l74m%y11+Z0o&z~
z$%0J$jM%MTf7Mb4IvUeJx2bu45f5ucTNFGYd@I4!91cCbw(GlFSP{1v-xa3f4s_t6
z(53Mob1d0X0fw(@b@z7JSiRD*9hjJu3kVwWOG0Upzbu#{M(4&65zf*b^YFdwJ;;gr
zvf##RPrkN!;w9KHJQ?XkiM*!Xn%$jsZr52TfdMT5e+7NJT&RvOE|1n!ox1W{-OsSj
z#EjNgowD^@o{EX+ulnkR({Z92)>9qPU-8v#AI!vjUu|2sOJXf4v$^k0KwQ85dWr@?
z*U4jcWD+Uai1o%jiF}@_by>pTSocsU0{?e^AMz}C@>w#kj7wG=%2Nb^-U%f&-x`UA
z4{O~ClLm3K<E-^+<ml;xffuqtw0%o8_ZKOxLxnuvS$@J}wn_friYFxptr_EqcdkFS
za!L~ClqJWU+SoBpZt3++!<ZdJ2wj9Jtt5TNFE8VZ7`exft}*vjM6IZGl4}je$G5Vh
zT<BYuFLnJcRb?IM=gPY3=;o4S7^HIsIa;>x1?%N;xCvjOXN9KPs8J#oVgQo!LVrQ~
zEAiI|{Te&a2UnxRuPPkV#op%W$}d2E;mMG1WV(KLov^z_E0USG)NWfUNYN#Jl0vr1
zTh7PcH|zDW>!d1El!}mGGzkk;A~c18iJ(mAy|NtP?-xZ2zcZ&^A1A$JxKW6Y1=rD+
zxG|0X;30>})KIvGSx<_3|3<ma<in(lr=UW#qNtOhg@9X+pVu(Nq*rz0zmOC+d&W=h
zQcn`t;9LeV0TT32c&SW6@$d91`4YuV27ivmyN}JrKgCZl5`fE&MaJQGJ`WXCNI%GT
z)<wS0HtLaw{RkSiyl;9RI>Tef9&;r-O{%bk%T)4emzx)@AYBim9in5CQy#`EUizFH
z6Xm4&x%jrX7p6dRf16r$kk6Bci3Ne~ieEupLjIh7<3&f!=VE!2jRN$3R<Nl^{S_Gp
z{odPZz``6aCyV)8m(jrit(rvEZ$dd;Kku!@vri6g<{2P0>3SUWByoy?brK9scadL?
z*?KX4fb<zLhP9?F1U?XCOHw%OYF;ROoZGHxW@!@D-`cIoa}#GDzZNy#fsSJFNHw}B
z?lmNJ)H4YxoPQc|Qh7d%En+2_K-J)xOT~A4Y3Q*<U&N)EBGp7L*@0x=lC>l(n4KEm
zj5(zd=h$<-+vi2TAvBNN?3eo1e&^f7#O)4Dy|&;;G?{83{4}5TFfTN9yhkeRosV&<
z)yIA{ULt$DN(OeBMw6zw6Lz9CkO@aFT5ydz;;DwV2|&Le=|x<=uTqrs52u|J1LMt!
zgw4fbi+YC|R?lblbQ(h4;8a4ZiZ3e`pJ3Wl<J$VCwH-EMP-2zw-+EPpIoS9rBv=G;
z=1HW*3i)TxG&^=)$oFasc1rU!5TuZ`ZGIncR?%!C@MyqA<Zf;iD)Y7gSQVeHa$sYy
z>xb(RY$zKZ%<N=jR(sQV+@yHIOiu5ehGcQKMRttO4W+8zUoLOnI3$Y5DP_Lv!++u?
zjY$8I5|xCMdrR#wAr6)B$!OBhn`6WvP%&jd>J3)=DRwg}@mQ6(kiRs0aHxvWBqIUQ
zhgxhDT2hzIqoYI{W@_CR}x7B^^y^ovvkOpM1x>YEnhS@i-FR><Op)@Unv2a}0n
zkGDe>2lK<C(X<$pR7~cqAZ_h=U6^ZzcT=}?<%90*^K0%^A*V&*6(pnYgzQp<?_qu)
zJ_O)bn02(&s9rN>ia>o%0%<btW)}-Dm)Ed;rz?O(pcnYB6w!RC6Hv{g^v`*XV~NbF
zia%BLF95LL1?Zk(=UED_hDoQB^UOq63Poc&O3b|8eziD_V||IkOBs)11W0S(FW6GS
z6kYm#gj3ty8?$~8+4<x@3y8AJuvi(qa`5#dAdvl2Ks+5?Y+bD_EZkgQ{`vb;MT)Wv
zH5YYBq#rQ*ZUbUEW~56MXbhJV<=u<rJzup@I&~3uML8#AUtgFD^<NFnid!jOu`)&Q
z*Y54v?Cu}d9Yt=Z?6l6?xu0M4+;5&$Y_`tq?UqbU?d-SC_ncpYPI!F`9)@@NPEA{Y
z{>^P-e(!-}#R3`8&kt3n7+6)Qi_Zo&&+0~3{dBW)I6A5icVg>shYNOUaB-)${Z~g0
zNmq@jhEIl#@5DRX_3eLeoZtNfdyBNDSN-+^I@bO^bxU#O@lO>KA195K9iHELJ5aM-
z(x|bMVbA}tGk*Q^w(iDv`nPZnN#x4|($1AOVU>j(8;@75M)kB)7u2hkq;<Eq{I>UX
zcUBZWlpXf1MZfO|9?n9$ZqV*{{jv?&4<FK3D-VsR<_2U1>c*#Pt}0eZSN+rM{VVA1
z!c(SZ&hPA+f4zs=`7fW<?O*nMZ*_csf8n3=s-d=8XZWIXY;M)jy#px9?vOGSRyQ?s
zV+DRVJoj$hrtlfw@7o-0ySMP+Z?6?F9LpxXGKMvzTs9czSP3Lv(Cp5iR<&=R`zP-A
z%?)UK4o~sbQq{Z3a-h(H>6A4JuNlCT3^{k1s@0rK5lYqKnM=fec<Ntg?oKY>U&hY)
zkLSdSclfQMoeUHoI6Zi9GE2wjWWArtRXew7UXfSnR6e>TbC>iCq{5t!k>n4T{J@$^
z6D0Xv?ezpx$Vf@t5zAIE3nla6KUDGHIj#-JZ&UyLLmTr%d5;QAlI?h>^_mZV@7
zqCbL3e*{_lFlS;U`NAb5{s?aW5&Tl3cEmCr%%b&2a2hVyo-S5%gD@=~V2PId3^^Bu
zpoaG(*BK$#+Leq~97R6Jlu=4hE=Z`00kPLZUkNcZgQewDK*(ohknue`wsVj?Hr9#m
zYifQybQqmrVJd-MP3%+o#LBAO=oj%^5vJ>42JA*XJUUhwL!Z_V5=p2+H$!L$b0XK0
zrNoay<QyVR7F=C2Hk3p%l5CA`rwkO@j|7z(N>!pJrs%{<X1ZQx7(MnR5TJsSB4GP8
zK-#`GyjJn#oFH_O9UYcrl4{2G{RvhNhY3MXccK!RLmdsTW{}kf*7RF;rOt3J<r6K!
zQ2IF{Nq)v}#i=dt0${?KGQWzwx-oiaQM>u4Oft&A8E9lt^sGu|)ynT0FnSIuG$HoI
zFn-M`{wqB#=2`a^^kpTnWym@|wxocj`;beVgAViTV0g35u2BP}=W@dFDDN-Jy<e7m
z@^uV~%qhQ7me5y~#8x5e)}Q4tW|V?$ieqdJO}VNKyBSb}OVYWPo)ZZ3Be4hlY5&9p
zrC<WEi_XCDu_XDXa?{HS(tX&m4?^Lzs`8l%E{UNJD+OB=$5_BIIM-ot46C$942VTB
zj9YVn`%0J0iu#dfJ*G#V%hOP2l&EHw%uO@>y5K}V8nl%7Wl7@8<LX;C+%0??AU|UG
zdR)M?$&Whjc$EE@WvJ+3J18+Sw)4T()!mhYaFVTWyT;%001ZOlEyy!2XxbDV%r(YN
zW)-UBKrOpqG_klL$9z<S7mt20B6<K>|Dl@zX#6tM?o#L?#`T$$EbR&X_v~h$Dc>bS
zSE>v|Itg(ekh|EMQ&PHS2?j|C9#sj`CS^#|LK+InXx!oj9;;C}5o$a-{-`K^$T~m1
z<eNI0{Ab*4&*`?Y;wNbc@(R*Q3b-g$Xc<5CAXAOT?HrgCf<Ia&`hW4jl&A2)jEI{Y
zNz@>7z{QnRsG})*_PJe*uCrZ<mIL1wGeT`@L{pQ@fsl5Z7t1D{c4wbfqYV#S5{Ds4
zU|O1HRmw$q2!LdMmr!v9`JqN{Fi4MENu%2wVz6P$vbc0BpRE-)?-%P6&3iD}qlE$z
zyG2n$OG>zw-Wx@Kof7>4KHLM-Xtd*%VbkGzr~7zg_Sn-w3ckbCU5C|QuHH9@efvEt
z<Oc|N!(wyD%7z_wi*=?};$1%VS+kFBvlpo!z`j%>G9b1hKL~!l#oM?AMJg11CzN6P
z%9H`$Bi+XXj>iDW`VLcZ9acd~@UOd7g!`JE+3iGu?hNC_4Ncb%T|nv1-0T)a4QDRp
zT0)gXw|S1t*N9w?1rh2CamL0QS2b<IfgDeX{sKE(gAq>AL?r2c@?QNU3SE4SF3k~?
zPm-MP5y(Z4+z*Hy_ld#mnE*7>P}ZZz0Ci&)EMNw|1DxS;sBYSH&;2B?R9bDiTVwiZ
zoSEkcN2NI@HYm;uu42z1Ok#k{8dN}%UGO{Za&C(ck8A+pgpxS4=RU+~noiJWUC72{
zLTFBwosP2p7~U>LGO{nbcTcaXiiGw3oaB**tGNF5@Hc~C{4MD5mKz$w1HiK~V(a@7
zeqKq)BjYlXKfw6s(BpHrNCw9vW5)qGdxVIuQ{7XVWKXY1yOknMibJf5MMx6t^eSJO
zkl<Say(}?%Xt5*}B58_3n2QcLe%hQKZi`k^k@ml%j562t(s=CdmK|OzDB39yA^Dua
z-<!^FLUYDeU{wfz!fEkeC%LV`5n4wzv58l&8nNMKHyiPoQuZY#<+PPJjjY1uASub&
zDcw%I;tG=T!?}fw32mHFBz+mvviR$dpj?#sG@L2P64Hu=o?$Gh>iDMlMOOJ-ODn4A
zR2;!E$#N&&UU*XU-7<>fGNw(V!D^G-EZ_*<qqqV6AX!L!a&~;TQ&R<3O<%>;M*Ip@
zzApPtV<J|N?8uALLy^->qXcF*D7G+=u5bUf{CpYjKRAMv)R5?8n2aLG3vM$HpSGZg
z`Hv?cC^k2c4kq#jHj<Ce3H0W~>^W$M;7zijgF}cOZ9cGIO{Fq2ic*BEU*;1)qIF<0
zFTTjs5zdO3JeTUNQ-7RBNl;7$pt?f(#QUdbWOUg{@Pv(STCB}A^?`fpCxdy!n<nsf
zGb?su1fByBMWBjBV06%&L&@CB%K;u*Pm1ZFP)s0D9he?J08H)AJ%+;LP%7AIimeGO
z-3%Uk8ja3TCCUNM0=SkZO!eM`MMXj);%2i>E~L)njUNcti?yAmKDbO}633wnREY|N
ztUK+|EI~0Hfa(tEM>!KJUkN{Q#Rjg=vtoyFp#!RdQ)iP>Nw(UaOr>gfhyp^^zrWWY
zGy|dms#pOWgYS@v0iyp{lT_T#E#fOGLTRt-5)BP6DdJjUlC%a>L7~M^C`eWgk~I5g
ztZ1}F?s1dp36lxbwD>@v5Ae^7&zOZN+kR1g;Q@gU|KM@>@8H8n6R3J7;3qW48~_jN
zgCTfpAf5{lO{nUbke|>Dy!9LS;L*_vTkieHauk_ofcq14ej)$fO5QzAKH<HS$#lGF
zVRV}VOdFu%5s%&5R*4$pp2w74{3oS@3GblJ_gbI6vA0C%b}?8!(HU1Z6<kmxmsHHi
zEsCxv;s{M5BjAdFt+B#wzs+Y%f?_BvG%1wpLZvpuK=8nbh6e_`I1E}7f}qY8P>Xm#
zt1Kj(KRX;Ac0PDd7m(5H4bYZ*j-Q%^*N*qsslszCMz?r|NlvtFjt~3sA--vLv{g17
zBF`M=I;+ez-p@ClEL~l!3?hCkNb<~&1!-~~dDqThSBw2;8C|yk#be<a4T>9MlC#=W
z*m;iC7W?L#<>ovScGwhC9kd;|m`yMm^o#&dvDiAih}(|3fEU9WVYHO}c_aZLu%LAN
z;Qr!3meVQc*I6-xO~fsPVC+MxkTa2Vui^Idevvn+?uUt5_<7Je4Rd6|2)#XV=ed@!
zlc)HW^|x<0Jag1e^(p{C)h25!AHrKV#G*XIxolUMZ6<CZr(e^_=>fwl%!e&kTr0HQ
zmA*NpMMs2RWGHN+dfDISG*ztjcr_KfOB$eld{gC@!zJ9}hrB8jJ)lQ~<9Y8x77ONz
zTxGVoxP{<PQsL~BQq&>5->442-OOqa(L;Kr9=XOGmEy2sa(b1VYH>{HN##oxJAhV4
z@IF7eTa`>id8nPylJBn9@F{h_$Y$_~mSH#Yesqf*H>$iAZlZB7@8^h!3bRQ=rKnO%
zZ<4^d_f8}sy+`KcdV{^G<<+1d3t2;kk!`L_X<BD(chE~h?UBNaG?N{5Qh62H2xsli
zEHFrnaAv#^E8XNGUM3-jYi`0Bj_`3No<mtnIq?u2gd=Cm;4fOuB(MJ6;<nV;r>>=)
zN3S7=CW*}WVliE2mQUSJxuq6EhxR~f$WzS)Ng=A?re2xeC!=(OX?Y1*B>pnbE`?W5
z3Z}@t!HfC9wuT-nvlNxta2<I#zCn$j1B-(5`jYCLFF9<6w9QvArf{%{ByvB~jyvkh
zo(}W3lxbO}8PIWR1OV8%2KMS9)I4D*-;_7;(V$K;kQy=#0Yy!<xT^}Tfa5`#I(*_7
zl^T2Wn%3MX41@&@4p)RRo`Y%@1q$}2H5$}u3W;SjX{9UbonCIBGZyovax4XtVR$<*
zv5Yp2Y{K0(&#6r+6yH!Am$BAMK7L+Q`?A;sR}mnszQHfO?iGf;*;>x7V1lDb{p_%o
zNRxum!I21VfNQd%yZ||w`yzgb_nDUjXYm__B@$N3qd3!%xApQEKfx|TYXu%rzgzk=
z1XpN#e$?zG6MS3PN`T28klnl67C#!DFrMLMtnC5kjPxD$<{n$F!Fv=-9($ZkW<(fk
zC(2@#%7?n12^I$`EMc-y9znx+o>l@&k&{LKm=#1CNK%hjlnL3S;*LV9kYo#FRP_V7
zD5H*b0gALX8Z_3uyts$XH6qxG33M-~(TcvRt!+_gsxUf;5XX3iVik0j=PH=&YEq|Z
zB(lQExOdktcM^Th8FgJA!xq@C>ZfLdgObhBcdiwouNAVHwcedtvsX^=?E$LFWY9aD
zW$vzI{IMxS`*DD&HEh)cx~MaMp4=zle&6LwxWkWZ5bI*FhwRZO+H(I2Co=DFh=ki7
z<oaImp%U3c>LQ+%_nEKm2Y_+o2<E`%P)fYfj1^-o;~s(nOa%a_(%F~=u6v<}ur?6P
z*fG{}l;dIwwDv;vBlI0ZHV^#5S(^xEoEU5Q%43y^hq`zIQ<Gmf+g3kP2nTXwtQ9Yh
z#TGc~pEitG^!!89dHKqVKno2&P}4hs;yZ<<%NPIUr3uE*qy^@w4m)L)4#4;QWRrC-
z(QEF>3btD0ON9#w+LBbVUH8aZr#2n~vR!RMT|}(no53y0G<Yk;r+*A;e_*Ty!wm}c
z^~%-gNB@^S5B9*1nrNUOnuDny*quc_Kgt4(wG$a@ec-ap?*N8+8_5>yUK!X^X^iE(
ziYA2G)XyAHW58nsjlx(1A|SGR?E^hTtg@Ryrvy^XTt<grl1Bz_Diol|{F)c>>y~S&
zOdQS?&`0D6W9>4=+7h-G`}t{i2`bg%P|YJ34>9>wzl4n4D^=CR{p}5mpukHkur-ge
z%HC)3T!6|u5!kc*yd+Y7ZFlQJDeKppA-tF={U)OpEzma~!5BYF_5ub&0|Jt<B$7D8
zoo$Avtt0j(2y|6u^5Svxo{W-AmKu;t&8_ZP=6kklM<jPymOl-5K}To#jQ8dsh|H=+
zDKfc~oFi~qi;8>(k6g`&HoCtEFs7UKoI_n2h}c@jizCm^%Z}Rg8Vm5_T>pZ*t6k4}
z&P2HG%|}ZN_WZrO8>y#-O#)a!c%;38#1;y5g<5Ptx=)LfgHRpcJnPqlA-ozX{c9yS
z3jLzmR>@sVaPwwx^VxWBzQfIDWSO|#sYUA{`jiUumeca0E?Vv`d~a%~bl@t?vm<O!
zbGPP|vc_&0ova^)8R~T>Swz?j6X?DwlCZ<2VhD#d=(RLND1N3vm*LfxO72=Iqo(=I
zns~WW^8uuhd=#ph7hF{5RzxUZF@KBcila?ziBXvs1e&A;q3T=j0$Di6J`Zt`l;6}L
zH)u$3&*O0iM?>AB-3(%N1O(AQT8YVB@7d&vDPN<$wWa0$xL>wOUf%-yF<2m6k9?^m
z!7UXt)MSR0;N0hKP^Tk6mY}|Yd0S4C=8c{PBhYUq5UDn;poprDE{ov^Y}XDS$wlI?
zUJ3#kR8Y-9XK+HzW%M}gYJ)(bl=aTnBn+G6oQF$rcVh8M_c#JWbmB(h3iuB<;PQ)9
zZ((qGpF|BKU3-)vY$2OX9ZtFanN?>CgJqK@0amn7#<y`w;*b#&v}yyi=WmDM(x4=y
zo|YGKmV@pnOVcPrO;1IeauriUIn{{`5p8Q|k|YuYbp2nv^><cJ5i=k+Ybx#F4CK&_
z8^IguK`{zfI@mjETB&y6VkkH#y)-=-guW3svR{xAH&C9W6=-ovBt_m_IBG(CX-eMq
zM%xRcvQN?^bZHM3VuTm<qI1)`4Z?a#Ftf#2ON|vU6uLy;KML>I@7Tm_hk(xsw09Y6
zncy902MWP6jUF?kSKfJ23G=(|H@*THRh%UiyVvtm=N^~uTiUW-iBm6+i`O*Ds{jE8
zV~$52#+4+hwvYkY9FmHe4b!s|2mbanj>U)C-&f<2eKuEpyEhcR#_XMT9Yfa-e=YA_
zUB+URAR-{xV<8~m{Ik60>h{jg!u4@yXjx}5CW8y_p12n+Nm)(3RI-qNQsLu*P0?~#
zN4iN6qj4#q0@T^T(?FQ;^Xc;Cr(DPhWlpmQNUW(~E0$`9Q;4N3jO`$E|CPmj?S7M4
z589L~yi`}3Jx%t=!D-*Fd=^&-YG`YH!QBT@5wI02_t_2wS;-ufpK~gI9&O?C*5H`R
z4QN+7*-9H?Tjpl9dC_fI3=u`WpS1OmA<qyD4}H7aT@jj6Yl@#jW&<sP@Xy-Ttugt|
z#XCj(8bA<%B+Z=!n$;bqxi0Pb=prY~b@b=ox&5T=Xtx;XJ2m%)9pgG_QY<?8Y_pW|
z+Yaf6VZXS{BF)@AQK#66k}SiwbL7Q}&$NE)wmQh>PQ`2X*}X!#r?KV7wvL|%A&Q*o
zK#^~+{i{XS=U9WAUzzgGj44*#*ni4HwhG8bdQpU%cuAv-zZw3{ST+)+xUzsmL5&~C
zqc?Gr8#mVG!aOhoxI1Y^g0nB$Mc*fz)ez&(cP?&Pj;gd3^60b$2xJaXu?R-}<Y`p-
zsfMJCi6RXBq_Bf{?P56dyHff1we~KqXz3~4gg4P72G!dvYw~L2MueXtIz6hek4WEU
zj+3J&#j;>*SzAYK_yug^t<P&ooaY$)d(;hxrogCHk^*;K8TJMXeg}QX$W9bBT!R8%
z+dI+2K`3zncnp5=rg4>B^nUlT@2Y;ZA<D@#c~bx<Dv3ui+XV8eI^~U#D<F|_&Fp7w
zEQ6;GjgDcT7;SZle$iql;M0YjhphZ=IORd&;^FsC#rZs3XD(GN%99WJ!t+r!yp-1E
zO9C0fB&o|_aU5ZY1YU>JG`dc<dCm~}khwV0M@EeGID#pCBCR~kwutxfVJzdr32=wH
z2VDuekwlQ<O-*PRl6Kamz0bslRgRkNZi3sdasG3`x>0ujLl-=fN#Icp4{8Ks7YjSr
zmz<B+tOP{|L;&{d8UhPrrLtZ`K;^eusc@{Oh@FD8-gxug^ULUOT{PDCiVxY@95uMS
zxgM0Ri$!+*T)hVKI($_KpKV!9vYqpF+4vM+bL8>HeZBaG$Nv3{agd|OVYX4p^-lgA
zGk(9t6>8y$Np$h#S9MQ1y@C{Q-Z_9r@nLjn9CJE7tETejT?yZD@yOW)O<caLv=jSz
ze4$dLM^cS=_HswzS&_LJ)NA#}!`atTBC+evEUu!6$vJ5ZCdb<Azc#%u-Jg_m!8g2F
z;RD6}XX-w#%$qqn+CEO+=(q_L_?EY{f6*7TuMR=-O8Z8s0rKd0Qa<BA&-&!vkDCs)
zHO%s?N*f`}19P)O?M9j5xFY#dm!{EYuAJI0CP&Yf-g4+3)RZETTH<_~ApoxIO!Ml(
zK9sKnCHex1>M~*05-uHmV-<my)X7I=ZU_1ZpDbVNMXPX-7Sz2RI~|TeF8`&eRq$C_
z@Om}mo2Be|lT`4jUimqHE_dWpj_f>6?L6+)$=!Ie@vS!*%6%ZEGNv9AEOy=dc!7!U
ztt;1s;VbQZBSq=NPrlLk$Wuhr1P{?ew)6Hw4kyr$6|h9PE~r}kx*=rO(lKryK3S^h
zvuNit;pSEE!(x%YN(*P3lO4DGmWjzECwEhOg%%eZ99NFX9L*SCJSUL2xYi-?ozwc|
zVT9Ygxn0}dDb{~R5zqc+OBI~dTR6ew|B(AnApK8@{{&Tiyx7xVF6@vRf^7t{(&;?J
zV$Mop((s^b{J=M#D!LCSU<h(lEnIrz$(>gqtCzeVd_>geL<r8TnZ8&I{Ctj3C_+gr
zJ-bfC6n#;!LF0#|vMW)K4yIg%Q9t`U7^9@l@$`Xqp?H^1yy|_d;0Z35ZzXl96o{2=
zTYt--+wPJd225ayD`MF<xcRJ77%)tL{}L4(Teq9&?_KmN%y8-8Z8P7vY*Q8O+WG#*
zUxUmDm4r~h2RMiH&*1=lDMtr43kNr2t#?iqu7;0MRFydX7)AZYMSJcxJc3TI_c^0p
z{OrPl^cog?&oOg!bBta5tdRQ&=c8gl|K*kI9b-kKIr50Gm6*;uDI)5S*E0c$x}oUV
z^zL#h1P{eo;TrWWXdCaFeeu}55LHnw$gjQY1A&Je7UNWo1sEM)2z5v5N&ug%w40+Q
z$QdIxH`Xq26;7mCnq3BA{cjAMB(o#x!fDT6tZV)94oHd$wR=I>E;j(quS49melk2v
zZq<VAZcp?h_cBINAqu0gA2lG8Lw2H%nD->k^loL;!6dF(Zfn{e6Mw!Q*?%X?Znq+G
zP%>`NKxhjBI+$mMtT^M`=m@I*qA&C~M((Q}p+57>VZPTVe}|iQf8;Y;Q<T`RiW9yr
zR`Uv8NS`1QA^qQnX5dlt=Mw;D`2Rmb^SAcjM_B&RML-C6Qu{x(|NB78-|By#^Z2*=
z7r6QV<<!UD0DtfE{R<!v3;hqk|GMk<H_G3y6aPYavPba;<=-zCf2;nTxBso$_WVDx
z_}_|u=fQs~7E%Ap^Z(9@e=Gkz&;PBANc%_m|CsfE<NQ69{*AK($N66-mYO08{8$bG
Q0tWmm4$t5`^pAJ{4<|cSiU0rr

literal 0
HcmV?d00001

diff --git a/scripts/data_summary/compartment_training_data.xlsx b/scripts/data_summary/compartment_training_data.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..e141f0ba1b96854c08a08eb049fe1f8286c10d39
GIT binary patch
literal 5747
zcmZ`-1yqz<*B(k?0BJ!Q>6Grqp;J;?n32vwN<hG&q+6vV29O%M1?g@iB_sr-J0$+$
zy8iFp%m4i|>wRa<TIbnk@4cSA&)JWr3K}{w006)Q1QbD^%Nw@f1S0>HAs>9?19r01
zbairuK6&Bn%;o7|uNI|--O7#s^hSAO&QW)dmWOscd$H%WqL5$=Sk67@(nq@`dRBa%
zJEESX*Pxx=Lm;?4&yS!-0Z@%C-zGQq`H=!6xs3s}^q^Nb(X>@NLn%cUnmLIo-(bom
z)mD}08fBxv66;1`P0;&Pw`5x$ma}BL!>FLiMw)7;^A{M+@fR4H|MGYL8ms>o($78=
z0D$11{#rV@LT(*~$F``qapMG=L4JLCHJYXL)PzeeRa(}9Fw|(p)9C4#e3({4O&+Jw
z{XmMYvzyClJzE@LgQ7+mHu1EUA#RgRd~0b>RKE-ggLs_KXR<JNoKusd;4Uk1Y3?(}
z2k3BoyS?^KazSpa!&0(}iqWMk9GbEZ7!L!;55`?lpDWgutB&MJQTzncg{(X**!`V_
zc<K0Ev2}B~?Xb~jXQn+D)a&M;^@5?zWPA+tgP&i(hkasg+6<$-&?Rt5`;(fxl4-&A
z+>YcJoW;qN73$T%9#k-)tm72Fsc@saF1C5HL(>^9J-N5JaK!xY#C$MLrN_Sq0HiVl
z07S^dc-lX4vxYc8{=9i^Q?qSo1f3Bk_S;1ads>}1V0Q`zR96x~ZQN%oKft!tX@Uq0
zBJI7vv9LGI3WE53Nx0i_YV|4N=o}lr2SEyJ>%v}#J6sm)!J!EUp61g}W+xhIp3E=m
z^D+>~a6igJ?0p5oRZXi5Z2)b;zCA>QZQldKF;zT?`zDMRjmV$SPyDK!ozsKa*k_U{
zfX#hPIjp6Jb+inf%xEiDY52}a`g9^8*(gN%GE8-$l?Uhc9hWonl8~VdQ7Ya5M{WuO
z+Bbtvc`NFMp+9t5+rQdcN$J^jjf^V#2ln_TVALy}WRH<x^5Tk#rWp*n`<%3IXGHYQ
zxbZuXFD~zU3Rm}yhC5TDFKRWUx27H0H|NM;Rc6}^gQrerN@8;hBDFQfPJI_PQ>;^g
zkwzM0b{=zMQDL3M!=AWJ`x+r_C1IU~!)}{4Ow3m$b~zglt!3qxH@yi-s#c}PXaNS!
z?i2l^Xi2857q1e?r>Pp|WQ_L=c0|Gmt~)!>Cu~L^CGpF<ro~`900^~@sA~EkWUAjS
zw#JPb$4m^dR%w!Be(Vl7RtTi)_>Q<bPHyPQ;qy-O6&<uocw<wDlDFNEG8BIaU0=z_
zi=R}J9dvHwz}o*!Z}cLB*-?zhRg}_7)@SJCBo@xdJG6Ta+*B90qR~q#H5nRONsn-)
zZ<sqV@IBE`aHOC7{I%loWP&^cD5IOReuY4|N(q;jND4D8_`R(rC29^9Pf}*^S!HJt
z!6K1wO%rDKLS*P^q0>hR{Y(R;8Q2-VJo!>8^e1%0{`Y;j{K$!J<M-@j0}_;EvIYJE
z0gjFd&&03$#i`;{M1%tg*!N_DlNp!@Ka2RODH2_?!)LBj$E5cXo-kY}$41*!(C4`^
z4XpE#x5y(XUWHhXihEyUoTmx^DPt+9P^~B`<mq5G4SUoGhNzFltpx0{k`|8y$z7`+
z22{IzhM5}@c8++ek5&>~J}(l8mo)F*8i;*0I1&4RAkI_>xppu-hM<``SXlYvc9x3)
z`c=AVn^MbqVBgEDTEF0LeD)ksrwZew$}4zGc~Tp^{P(jzKF9h3)3ePe2;rB^r{)D>
z?AK9Cu6la{g_1fOHA@0tquh+l2(=cT1%4HI!{s{!->o~6$Q-Q^qW86G84EX>mv=O3
z|6Rf}Gs(}zVyW*s(A}kjh-aM=$@sc;Wi6S$zkM;y07HCy&Po3;MkSy^hN1Q{ymybS
z9cvwI#E8|WGX@cQN0=^4;kcoFteiNxil}3$6*pShKxDc}atxh|o2^w2VDrh<xT^fB
zPH3uP5>`Gc>vvYC?!ytY5|5**_V`34@cYEXeT5#*t(`1aOD@}l=1`yZT~s*zeQX_Y
z>{*Oc+xbR^C;5`dG<u!ydwqwssgaT2Yb{!(*}LLNR9&G5S#&#@!S9FK<U+h&o4vP6
z?9}8ZcCasE;E=B|ubtfIAYKHUbAG!27NLcD@J!d7$EXvHJtoUXH3GEmyq0WiwmcHI
zJev!tazwCtJaTyaOr+K3J&{#m?>wZeMVD$w*XW?M$yTD0M1AP`d2x3J4uSeZY(hoL
z1kzmPtiwmzO>68*?YhFvAiiqCWU|KPpIt8M+O>r4)p)49bqyk)y&*hSxd#iJI9MD;
zq0b4I)J(R&t)--vct3W(NcLzkKe%%4QNUXjTQfT{kt^{#nOnYaj2BZ>P4(&^Kyd@1
z(kD{hdq~Q=qPY_nbB_pRAfe~!9%>*sm(u_J1@@N%oH|yL!D2~~Hz1C;!Rn@?jD*DR
zN^vmgNL`nAcjIk6A|@|50vupEyx?!7y>ekK5`3<5Q#y=?B{NLeL9<Ib0}bR&Oy(+W
z-i~RU%r~>9;}Vsm67q-H9~P-ALZB($wXL9{8-oezbKV6J=ULHtG}E6%9CA5+ExzyG
z`4bdcG}R+C&KXn1Dqrsh(575Y%;ucTE#ml$7aGFB>;zIO_kC#M?pa0{9r2q*6I&GL
zmKAr-@Zfm)8yw<fz7$>v0evLr`xag#5(#vY`6j(8H9L-LeS*tR8H-`c^PyTWdqo{6
z{{4Cvx3slA>c@6?Gs?dY5XJ|b)Jf<7z%DHSK=$VV@o;jrgIYr%ZqO%x{rq)`z(2ll
znh~Z9X$CH#@t4t|FiFoC<jtui7L?DK%A?>sRa-K)=$)&PVy+CUJE%BZFiiybVQQr9
zwR21L9n^^eGw<DiZA>|X72xmi()AspABPu;Jr6aODo=(yWdHJ<y0L%-)M$4P^0~`b
z>Zn|}C#25{x?owC+83`JZ!Dhl_E!SL<$RYpw)fDYJJ`uXoG~QZc4sf?@UdiQm8KL>
zLfXlj(<&MK7(w^}Pr1wtN2(m})Mo<6CjCJF@nE9MYt81A$b_NePMU^}VT6ZrjzyuZ
ze9=u~Xrkg6IoxA)SN~V(>u=sUUcYf~-~^veorv@;7{@Yt)Tv0@Hk?a|exD^2Fne9I
znJ(xIZ!fJGbo6<#4!te+tLZzhZW?LyKHNDTi*IzV(u4c)k0A1rlwTfbc20lS_BQ+V
zyK#H7TjM0w=)-Q@#lSpSeK{T{h;fXF?Dw?fiB6bF&RTb5>}d(r;JaF#i-n@BV)4~q
z<Ueen#gg;Cmi+~Rc`Y0l@t_xHoEJ?|dNc|nZONf(+0X3huN~Ul_r0{A&OUqS(Y2Ul
z1}jOZJ-#t8NjDq1PF`Ji6#oI;?H~^EEEw;KI3>)hN&PUagxwg&V+)B0<$ZYtq2-U`
zaqX5CFT^40^^bJ1v$U^iS+EyBw{&QuIZh7fR_vUjL)$OnRb4--4*&FuVAvRTWkSpA
zN+4`ZoBj>7D5=~!0a6rJrk*5V?v}P@pm+OYOtgv9a<upTFgU{N%_Gc6uqq`l{t#Om
zjMw?Ssu6{D7H;dfv^CiBS80H!9)<R)3$GodA9F3DOTh9k^E}X6q)*sHz4A2>?&eMY
zjNG?-E~q2jU7G+7bi!i!lF|q?M)~@zga(%L-t1$`6{4RQ=&}U;jHtm?y0EZSN&FEI
zONIVA2z?b0AkETBk`ZG~`ocoXrHo7ZQE$C=*|pydH`Zmogz-`4GmYq)^ZoA%*j2J0
z*yZR(N>+j44-!hAJ_3pt1wjKJ+D7y0$H-=xgoi;;hDSlHP@Rok<3<@lw<VyqVx<^Q
z8J*^66>D}o>ptOw@=QYgbmZEwFv$AW3?o}Me}Q|eZy=Fv4zE63HfydYr;C3>G*L^o
zN>BEDD>I1=3TI=ONqleM#=BvgDB^lN2ccpd&Tr{8?f{0lGwKfjOSU~Hi@uK(?WUr`
zzVX;ry6RWTX88v6duR~N3$WJ+Z917;$qDK#_3A5Dvh$Q#Cy}Y-CkW^igP$6p`P=8W
z*S)*2>&w@t!!9}TBrQ)ZPf#`{4X|Ht*DZ^&_OP&!nLXsPsI!zfP9to^dU)%83)20b
z+K@Oda}h>!JH~`Vb^-G`c5`H2+S{`XmrYt+mk!t<xVYt+M7L|_l%|&X@~{omO~8y#
zCBSlwCH_HA8{(8QY(_~qn7OL%!2*MBw<|?%H)>gz=z-A}hC!hz3C+joGr_>V4RY#+
zbw6UaXOjk1ynU6fr%;CN#5>}R&`4g^!9m9bM6M_6T+)j^G446^kYy>2Ws`Zm>-1TH
zd-k<O^HpTqfz-cy+386%Rvsz<;D8MP;Qr}lP&Y4o2=w+EJf}AsmBNjGMbdshK}}OD
zUp7Z@R5@|R7Csl!^wB(!(JY^*5Zv6vS4|YR^<eI#>=SICGNVomEK!@i5>2(nCBpJK
zgl#)@QwlO&x>;+{c7F_tt7unb@gT`P1NX~n(W96gaCKt?J8uU}UC2(N;Pq-S*h+r8
z;D}4De4t*y`x)ohC;u<1`zs%M*yeayZP{C2=EB79U5(ng%af-FhX(6!v=#;@m%boK
zCbO-C!vrVnDi)c1CS#q$&bk0%u!PC|0E>#9576(wUK^mtO*VCAUAk@2HGP3}bsSiF
zx9s6LYg5cRzg}g@7qsg!3c+DNNh3|&*w><1i;(?HVDH3_9h+*c@3zp*_KJ$%qWo1M
zsI8`ceO1rbod`qmMHk$s-9c&=N{!vUJj|4NXhyN%#<8UYTgfKtZ^sa=<tL3Wn=-jh
znKKoqIEBC>D$Q=lBbV`#YL*r!L)?M>yv?-zZ_|&zi2Eg3AV~10n`f6_4yZTg@aZ-B
z3#Il@u?R<O@ztnrX`-nCF+?lNl-E$tT}{4S7pd(@>u%tQ=N~*C@g^R{qS8;ZCND9o
z0c?rswP~~@qD_6<ONtzoNNZtBTiI<S$YvXApe}vrGRg3!P0N^g%;uhztkC7x6bIuO
z-|Y@`bY}_#cel{+muAdRFh-0&K7()Ui<qLX^uAZoUd8K;sEYEn-V}x-MI-~jMxo5&
zW+gN9LRh$B9mioKOZS0ejZ;V&qn!cq868d>!Q+slp!w_S13olvKEXHdam)KEEah^=
z`Le*he2EI7`E;(mab$o|QrGU>7@{B<{3hpdOucl=j2_G$OG&guMyww(gkwg;I+?)6
zFu&LkmZ82lq(iOUP{LL;F|^nh=Clk6Yd=8Vhmw8jyLI*#Z@--5{+rP?3Y+h~BANUU
z$!a920cNfcd*~Cc+jm-=iX$oyjx>T0Vy61J9hIkOs#Go%yEbes`$K!IW&6=d<kVMM
zYXX&<^mI-H9{(qIO6V-yzLUG%cv?@O7*KA<YM$<rWxyt&BF&k}A2WPBh0pQxkg=Py
z&2gee6?&<31dQFRhgL#7fW*frXC-YP>6PUSr(OK{RBi^o4*|zNrXh;9F7pKTa{CXD
zX=Ab!K+YUTu{o;Y?g<FV^tq;-L%5|yrRT!>&Ear9vBY_E8aF&_bP|Nc<WzcoXTU#s
zg;Kzc9PU|>f#Urs-P>!3g_D!rZSh9NjNFd*pf~W|`@@cbN~)Wt@BNi9@#S6*8G2MD
zwI?n+mLiyySXGyTn7bw?dcK&ZhT@53$(_82JcM%TvX2fNe%I$T*hb`|k-o$&`$lLu
zzc$Wq(DJTeJ}}<LkhmhX#ahO-sbjD(;Djb=m&|S32vGJ?`gx=}Cuw$t{@_7h6nep#
zwoZ0ANcemqXzHcHQLWtDgXaZDf}eQ9A8@8;a_MIBE{tx(k`1jqO;PIrtA1u`GsosI
zxQZ1T`Pndko)bF%rK2DIkpyLm_O%j47~*XYJ!~~|GiYZ7b59vtocoxn-nSJ-b`FYi
zdsp^S-H1guiwQ5YWV0n2eHbK~Zb`ORe=Q%CN>1*kDRn<4`fW@BkU5euHg{4ees-}*
z=qH!;lbtZPD@*&vUkBL#W)a`!a(yu}slSm4CjXP%KScUxivJ>2RjkB=x7;{E2*OnW
zS^jt?YA#n132A8HIYGeFvclGFiWYz(RXz9fp`_+hu+<a(cdx~?CdCL3t(kfuU0c+E
z95G4~(8Ld7rpV*$C0bu>^$nRSOdHCD7Me%p-BGGqoDXj3W^y+KB#Zr`h4=BeeTr!E
z<-n|Lt41rvt@bB^ErxL{F>sbm<BM|j9REH-f+zQEqANDy-+05NLQKAI>(>bkDbyCz
zEgo$y-3c-!SQbWs3~&<dj}l@e=j7-Hadb1&@p6VhO>S9K96xl+qE0jTuUEEw!p_p0
zToLSBU$J5BCfPqTES%h&qG!vMKb7Gos$_SboI)=d3u`RV`$erJ^t|MVX@aD``Ntas
zW2V!;QdB3r$xRD=R^@ts$*;}_pUo3h1LK(d+`B5kaEB8zMCFu?)znL5&|i_qQ)Z=G
z7coN47`D8$c#NmK4`Qiv?QZFOYV0hV9##=bcf|ff=giwbAtKnGo#=~VmrYg$>V`E+
zUmv+uJ<cl!;`L7_Q7Xz2SUH{d{8Kp<Mmk9N_hVjM&JQ@6$J8mVj5`1crmN83tfkp+
z6oz-p#&jEttiZtA)6B4W7u*XyVU08T9QQr+j?#XbLyrvRD<g6*yv(cJ*AobMe5VF(
z=nn~m6mlp)K_y1}{{=!MYySH9BQyMe<-)t@yK>zhECAq-Qu<Hy|B82a;de!?zu`T|
z?f;9!b=Sb%q2@mZ0)Uu*8ThXw&Rr{auM_{Vf--^gmzBRS7k8m|`}W_^d}Md~M;E^f
zyxRx=2F4<FT4d<|EZMu@yLJ9Icn2w(BEkPp)!(&qx0L?2Ge!si{FlPgRKY-MkN^M{
O@+*n#;54^H0{jowQ8;n{

literal 0
HcmV?d00001

diff --git a/scripts/data_summary/vesicle_domain_adaptation_data.xlsx b/scripts/data_summary/vesicle_domain_adaptation_data.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..8a47219e486167ecbce424ba10ef48910c6ab1ec
GIT binary patch
literal 11526
zcmZ{K1yo$yvTft;8r&_o1oz->f#B}$?v~KFyF+jY?(Xg`2@u=~E+07Wy>n0gxjn}2
z9;>=)?%K8HUbUp;rNF?^0RR9rz%t+Ev#5F}y#Gt9>g7UyxeRR#<n3*29T@a%ZRuUD
zEM+2O;JO%*KR!$Dty-&`5-}0Y<ZcXlh_io)G8A?Wdhk^0j9lhfV+1x~45{^yy08WJ
z6ndi!iUI22M7xEji{6Qm<Cv)t2#mRfp)H#9kX7RLgXv;Ovy7!a;2+j<?clYr{iNP2
zsSip@b4)a6qPj`6xQGavZy~I+xz&SckG+GC|A)W71XZ_AU;G>f0RT|`(_aG{d!tu}
z<D)xeyBXnwb&U2~onjEla_F&;DWjs2z<*+ruYD>r%kBBLx*VHOw#-V8xA}ZwqEQ4)
zEP%Vg<cjn&2ZnXBS;h5Le8-PY1WX=I?5UCE$owdPlT~sLrgOErat|ZIZiWiUE2BWj
zWxTFt;-)D}j(_52jqSkcfW)su_rt;)qt&xL8xN*V9weS)nZ5R!Ohilk@bmQc?wpS<
zbKZ(<6=y`*D4Z39%}FpE>$ZDB*;b)6qS^<}6wAy%FQ8w=S(0z4zwoz10XW)l+J$DE
z#S>AY5c3sVQ~BMv(EOsi(sRF>q@9Z%4~#Md`*VRe;Gf}1)=VQsh6Di8C;<Spm+-h+
zGB}zVSsDHH&GZ_eBXtdjB@T4&lZpvflPfE@z7KwNHE0fI&dW8)fk(20K`3hBmTrd8
zfj;eGACQL=5RPJGno@bdY4`5OM8tM>IovLe>GgMmL*vd}br%_y=bGyo);3g`$xs9t
z-xXGzcEILKYuBoq0lGQ72GLNvSNy}ErCia6=QKAoa2QDE_G^|`KLwguW@C#P>UwF@
znhH!PE2!J(5Y=oGvfN+{D~B89f}p7iQZ$CvoZnjZJ}j;A8IA98;WPVLGvcZd`Ha~V
zZpo^L{!s4f=`c6p|76iWIVJAvKj;+)(Ijz|JB<O&jKIy6t~TcEdDU~22^?B-WVOQC
z*gSLPs2iRNv&947P-xERO24#h&l7~H$u*lWT)0{)k1i+<SCX5)_F6wkHBEyJ*N~gG
za9N#>_|jK4;fm09CKu9O{-v*E!tub2g7UH4B5x1VR7iC5z#SE%c3WVY2%u)`JU220
zmY_|2=M;yth~K;_sClY(%=rc7sjnA&&TQ&k0;{NfdK3gAfW2o@THdolu<q+dSIm@V
z)Z93Atvn8N%7EW*F@NITm5RsTiOqv~Ebi%ETw@k-K4v8#g-6Y)<FOA8yIYxsvGX!Q
zW40}{FlYCq8hRm=*4$|JTzDozp5s?n(fQ=e<0rSU2eLdSgr5>BwZ_M{GJy7^&8t^x
zURQEr)}-@A9n~cBaiU}*nFDl9TPPg05(vy_{LtyaN#^o+pm{J%3E9CnHGQQh8)#nj
zZO{Yj;i1<hHYvQS*=iC?fj7vaI6u=IemP89-b3Y!PF|_BtmG!DVSprJud^1j(e}={
z#&?{RrSagSp^nDEK?(*Zl2M=*aeB*$qdmRPUwTTL7C4P#AiI-{jx?(#Ep((9-DSb)
z6s^E@3NfAHaesoiO=E+_i^jzVHNmYGB@Q%eJ|(Cii%2Q!LU}L5r~mE)j(shrU!7f1
zpsqS<-=wSTR1M0*=Tf#<KHY)C(P*c!xoAX`7;W|!-LbGJly-t(4#||G96L4e#|-Um
ziOyaBVdKXJ@8DS$OWKHQu^BAMEhLIU{yk<^sN9s#Fs*@~%rlEaSoyvYFvCKeH4^Y`
zySu`&C-k+*m-~BwJWnpMca_}ucX0a9dyVIxG+y#%Pt~)NdYN=ihiR;dT5I&&moqKR
zv(i%;sM?PX^eb1yQZI03b{syM@@1SI-7S&@R-}BUBgKr8@~ajkYj_A7I;H7>*)`N4
zhZ$C$He&yZnjwU1y{GhBGJbx$qLHeBM`LTRBHNLVcKnt{XQyTqjzzfMUTVKCuC11W
zL-Mj}#8#GI7@pgNCkDUHB@3VJ{z}Vvi!`56DN(oqN2m?VswsViiz6c`x)F9-G0LX<
zcCXhJ=O^bPc%xU6s@2ZI<mCNMr$S}!2~Prkf9QD*@o{!=(s;LUh?|E_l1Y4@JS)1D
zWhoi0XuWR3{249!hM_K97F0lm0_eG-iY}8zAK3e-98YPW$gb^9qNdK~WX$Grfl;k>
z1+~jND-uP{F0&*wlaiq|qpD67{BaeH^U5}J-Wm+q@u$ya1DWtBvY2qF;s$Y81(G=z
z@08kh-b?hTaI}lC)S)I~w`~6Ex06+BKy|J|0%dM&<}7kIVlpW>U#EkIq16cejQUeX
z>u7c-HLcv8#Q84KrBnC((RNS_X`6dT=Tb|!-1};E^UgY!TU<KLtrrEvQ3RAU9uE=|
zi+M}_I3@}b4P-QK@Z%|{zhMEMZ_*uH>p6TQHO5#OAE%E9Z9uTB_7piP`qxT$2x2Vz
z&6AT@a~I(J9j%{Lpfa=JEY^_lmrh<5d*KCT@{95%3b>%<pUR`nIBgWVQr+&>>2#FO
z%i1%%HCVi&7rDtB1l1u9sqPJ3BBjr2a|E}{>zuaBTx($3ztCuf^SnE~zJB#ZDba6h
zs*t-SPvfrfIP)V)eVAL$yIS3V_nawF&o_LJ!Y>8oNf-lZ0Mxi-)rmycFDs}j>sw-i
zck@-dfX_DOSPv0N!C{#VE9DG_wGo^Z*ydlJK`^~SV8x4u&}K@m`;faO3(K?ebb?UX
z)f4gKD6Adip9$p5-O!yM0sv^F006N6N+2#a_7)DNMn;Yf3~xVgS)?pQD{@H`J@^^+
z{INV4sf|QH(pja}9IfoTnS<Fo7T?;skR`C?r~8Q39s^L5vCid^-i|aDBxlErma7Mq
zM21;eYL*+y@vr76hrmT2=x&<DBA)W&-`W^gvElR=3k%)$xS#G7dk>5iGJ-rg@Iu_u
zFgxwnTCSDpckXn**T~ClXK`)0X+GUc7~MIcRBrU+>P#PvvDyR<-}0>$n;1Q<EJIu|
zS{{^6YhtJrhY1#a2-_gNKcx~IjrsN7Cx5?ozlC6i^?jMne!%)%n68?Qq!A61Q~1IY
zXE9W~krQ=ue7l?q*nF+_qSs^Y1H&ki?sfgw_rJ5MPxh7$F*{kYOJ^zg41bk)KdPi>
zacOu0Wm?h_@0{f4YKGku+fAF;FRu>F6rMkg7t~}H)Mb9gNk6c9w{tPE&^mJxG-$R>
z`8=ek!o@vPqdLcTGfzNM8*%uh?fdy;`}wzG)<NaCU17bk#1PEZ?HxLPC>q|X0p&^C
zf!waAD+*cd97XbLw`9$Gy0zjN)ipcLmdvU_9v)}6Z2E$lc?(xywZ%6Rk!61qyqs-h
z=#YkuZ_lC@@w-2v^<co^%=ddyIT9?Z$o$DA3dKn9<gpk+dN@Vt`ge^HyY}7@tH~=d
z%t80#)T<eQTYTi=H3$UT|GAC5i5Ht-V9x^e<5C)#_FEB_g};G}Mk%#F+i?J!NM%-m
zb}4lPn?;o|^jr<n9DJvLihGqY2PSUQXP&?g4ptzvZecOwyM?(hXKhT~16N4a@0o1_
z%~r|fB0a$-t_m^v_pRyyn(&*TWqrfF(3)d}*i^_mIhq==U(6e%lBpc42KA?X%ct~V
z74^+QtUKf+qTqfV`un--QI|kQ>$r-BV{Dgzg$D0IEX+oVBE$cJ4-o8rUas!d+xW#)
z2spT7m#c*RRpYOxEjX>}Y>gT_d1I2KP16ZY-<c-OI6^_en#2S-Jxqx$NR`9_3rW+v
z&5uqv0v@nugaXV$8F2##6A@Z-OqO9v(mw8hKJQ55Wt1n}k})w3?!07%6qScOj`Ir`
zBC=t!<@>1H!T0psiHhgPEw+(G3bf{ANz-F&?eRtH@pbWsFs09A{<<$4x_<Jcd)+*#
z0v#df>!73ZYyYVk<JJ)rxgmQz`)nB%m`{FllWYeqq>H=}7(&#9u^0-5Y~A8OD@X*%
zv(Nx_e^Wu-)v#l|;$XRQE$POQK<aDtXc@RSzIwT{a9JqAiwB3+!RL+Lb-81G=RhmJ
z8-Hy}s{mNOTZCrrNQbwjRUwMNqZI?(-{}31Kx#Ms!IoC~O~slh4HtR>VGZpWnpXBj
zqNL|f16>FUsvLA}C;%FU-cjfM!JiY_1Q8U7zJ>+{Lo5DjM&&}yRZqP^lx+e}Hj#?{
zc&v!52>ydr*f-_uxkmu>0?8K4ThiYLdWR77GScNmGL2MOLKvP!*tHkpThl=Z1|!Yh
z(eyNm<|xduikxIUL8hv!2e#0UBzy|@<3UK*TQJNI9P@*(*8?MXJMtI<_%0TEuK*|<
z?(2j4%ZpQ@qr%~uCMNfEK~_dSd((Cj<*`7+;Pl8aMZJj#gR|hLr~|;`e-vPg>u0|q
z+k)@m3HnFaD#4Rgoq`;0hGg@!Pq_Jlqzu<!i%Y_Trritp+kZj<`sLx?q2jb}y%^SG
zbzl;Gtp8~QkM5k|T1>cihnKiq^9?e2X+h;TqQBtnRp`vJ-{k$T?40-yqC2C*>0jrI
zOWSq%*G;XK0`5@CuFPt;ef~M<`a1>OMnD4q`f~qu&}Dv|x~H$KS}%(t2mjDua@3Yu
z**=c(qs@|#uYm&bzoZO-G?=bKmUN_EvOa0cBe59hS|2$i=ylG)Z~r39oA*hxy=L{g
zTUhHF>-jL{@rU`@#C7CJ!f^MfaBB%-;*misq4!Stk+TZF=h7+zQlysFDgj}QMS^kd
z`LI^)D#M3^&3QhTRHyqH_GPcf#%2zl7v1ZV`zAA;a?Hh_zu|Q1u8|(k;D$%<s+uZ%
zD(?9SL!jU@>BkRNtX+OTRa#wRX?eNS-f((cSJm`~;5`rMEZ=Q2-rne!-p<vn_se-V
zx2l)+*%Vvvr+RDV;&{82^Uuhz(6f2I_sRUEH8FD*Jsqi&_`H3C#pvm?UM&2WZPjLN
zb01P7Qq|B<c2Zne@I2M_pq`n7=M_d+(zaCEe6^i}Yr%eydKR#GY4CX3Uw?mgvVYx%
z>+Qlla=`A@Y;n?TUax)Cea-A`<>uhN<?$4)<z<>$SalWN@4Z2I@A%E8BE)Bxo!~aw
zXUoBBVU6|oZ<+g=lN}@-#JwGFhhui#{0g53$LZtya;tLw+HUqsCFjPbwc7gSx}qkd
zx+YIso5%bIHR}fy&o#qDH#LUL!c|lCbp-zU;hkc)_ijzU_FFbe`!hT&)#V`FY1ab^
zs(3#g503M?sYinFa=x`2S2o9Z>5KaP@cMVM^eiJ{9UeIQDYsptHgEA*fum>kvK%{s
zd1(N9(9D3~Do(*M>M>3cMOhGgSUxBO0|!{nLY|#C2~P3>>NkDr?KDe~@z#>0tbQw&
zUusbf>hM<`dX^KGD$1;r;O_drzD`VT)L_&5o9l%!i=f;RDS>s4WcbR*0>C*UY!s1+
zNl6XO$*?ipnFV2N2?+CTg+%2CW)vA|ZVf>(HhB0X{eplsQALfq$`6C!j2&*CasKe{
zl8Kda^#~v$Jwb6_RX{?3TTw-OjFCeMalTL-k&%k0#H9R&<^m(R$f_)U$gj8pA4R_o
z%m9<bIt!^mP+uikfy9#7Y~;TbLd!)bz|;CO2WeRK#L+~{zPO=-mZAuN?3^jU$|ns%
zgs{qIkv2t_bL45+%lmdAQ%U1cO@ORIXZUX3Py|uJyLIKcf#Q#ccfh<pk!=?;r8ExZ
zMD#dx%9{cOyjfSC87RRGxO9h|XDldMFr(fSBS@r&Kluhnn}i;WZ~~)Qf`G}1XcjnL
zKRY#*UMMQXSJc93EypfBaIp?O&*(0c1U$W5yfgJcS@^OnXpZo9?^5|da*+ipkrs+h
zxS#&S8?1a1deBnAl?uVGGD3lspl_0k@4Z`2VN^oU^IfHv-$j-VFkuDMKm)@;dy1gX
zI$=G1yKhJ!2x-qcMbCaH2WG^TzC;hmpt#YAN<bSB(mhe#PUyXOyyp1@#tFvPm-Ln%
zf`}HRi`h4-X`@q99z1O@gc@B>0wqBq5FPTl7hJL<*)z!>HGxX+ous(lX$O5M=-5^_
zdd`3ZIwvVCH9Y>hLnnqU#WPoV7Y1h|iHRn0P&S-lV-_4oERxbL38mH+kUo#mu{a5?
z&zI$vF^I{YS#%6r7*=>EtWf~rMQQplI>|Cd_uGpdTB+s|O>~{A5OG9WPc+NVsIKzs
z-*Dz=hammuR<IshhLJq01dzP(+n{_K(tOf>k!h#Vg`3?SUfll_*z$+_^KYoN$^3{+
zdx$Q)@3yxB6?|<AB8!3B?STvK7|0?+;|<!hDnj=}3urb8Yy>ld5ZRKg=L14XeH$Eo
ziF`OZ318c%A*4PHjy^=*9DZ-@jSy1r22*eG#Jn_LB#!9usl%Bx%UNJwi0$VgiDkCw
zD+`*1AEEg4Dfopnt^>#;AW-Kjunru|Az>dvDsM+%HhjTuX&^Z8{JKQ~HkBceut3>1
z1llA85yWO}Cufl9$+<w@*%ImG3j|Xd-k2Ju6;Hq<VTHb+EN5f%oPLBS6oH?`6lXvN
zi`Z&L{~y6>gdhVp*lVx!of^Uu8o{rY3o;PB2?kCB;R%D_SH}by7~cdVr#<k5z3{8Y
zf(+uX0#+M@<Q8@^Nt`D9KyCLh@g<Ls=nJQT2!ufhtMoz)u5U(br&|bw+X$<&LJU!F
zg00h!h=d}DvwhqQS#O@~LI*}52O-Y>Y9^|D5v<qc>536K2Rb@u3F{*-fQX%I!8(KM
zi;0^+agjg@;=e{#UW~{o(9tOiOdomTO~DadNK9M|%2O#z7m*|8L@|jpic_eGb3#H5
zdEiaK9-Kr>oDzx)0nVReL%dDuqHbpJUNN`Vj;rzGxVE6Xjrmfl62=O2PdM@@!Vgdl
z<kGk3TY!HT4Xq2d^`nEHdX0V%D{PD?3LV0a92sH`FvP8Oxl&tE!7-|wbYjgKYH*_t
zKhJofcM#OM6YDcSJs+AyRoP%Wv24|J2=CYpjNzK#w~*;!A9?Ahpzv>C{V;5?N0RAw
z@dt>N(C;+V<q^4lV)#>RFeb5RFuFzw$5CCn)NnlRbn=_OyuMV}ELsz?bE*ANgs_i~
zwAuz6NGThT9I$;On3q(+6Y{$!#%DMIvKg>`aJJ~cWI9vY(BzdNlH+{jUO$1Rbch;I
z$rQQ<8Ua+n>`)K<k9Lg6SpcZ6ygh%BTW19cajH#iuxt|7NJJ1ATOyhiI<*`UP@#AA
zy+T}F@t{Z>91AR3mEhy~<da#zWp!fwElCh4TfD<$`dx<!;SW@sb;{!XjfxW>cu*e3
za4FCH>GYKK#<(ZE*G2os_4nN(O6v%b>RJ(n?W>U<jgL2ZHiVHb{P%NLUd`OzH}b0W
zPV7&9Kdw)f>iB(_*QAa;HCjjin8OgAqDdUTEF4*A{_7lu<#i5ImU5)H%z++kLmms)
zF&UDchsNC9kKRNVBvilpxXtu2N;*BM7GrnqaR^aOHuV~>^Zb1kzQl>a@R)%_Y=6#!
z!f1F^Yv!=0%F*@C+**5B&c3mA!slAmmDG4{dyTl^k*3je`r>1$*wGZBVV;-(K~%cx
z-eUfA)_eM)y*#yL&njjtqY8C~#Lu@U*@I#Ik8QjBCk>h_qgK_VDYD1a+zv*Enb+mM
z%fKaf<FNeCDq;Gw??;ckp9NaD=j@d7ZU^gw+)BCDJ=jK_6dt~@^Nn2H2XMs=j)s>V
z46O7Aa5B4Tt8V8!Tqs+t*Q+NUdi#_=c{zY;wjGo*i-|mWF;!nYK7CtHO<uN4UmpH>
zHp3(6L_%QlAyszhXO-$)&GewmW4>8ywhO;b>Ar)Lk(7($kELoQ4cf3~W(`@AgYPr<
zIjMy)A&!Gn$|7y$W-f8OIm6}eS)^8j))dZ@=x=q4du6PrQuSNEFZYs$tep=h==i4~
z8Xqec5ROk*zi(C({kei!zt>rxJ074{JiA-*YeL(LmRD(yQRPgn*W<!<K6$A|;d(fv
zXaToroySoQ@~f`Yd~$9G?Ya{|lM~_+Y(0?*T#+_dQNK=e6c>h_a1ls;4z&VgjN(~b
zQ~r#NaB@^!O2Nl>;*z6EeRM_-6mcnmKr?Z4zd)edKwgma8o0f<5CtpQR{<dko~_Ct
zyf#SY2!1CbBo!K5aR{Q2w|@}b1{B~p-O&aD1Mw!Ks6b&je-ii8Z6yV?u*N@@)ob$-
zO2{b}543R=z$o04{Yauj2RnfYcpevDkCdYm5Kr>WTPeoSb9RJI?uER#pI?1hrhf?e
z@c8><VQ#Q8kz!|*5I(7d2%rdyP%t&Jd(lqN!0yzuhu?#&%diECB8v%ZM9lR!O3gjE
zo;gk|3Sp9OAG|!_k5#dWbF{#B(@DVF4scdu-+O2Ve*d~mq7jtv!3HF00RzZ|$kRJg
z(`S&ReXlULBE?Qy<~uObi8K&UvK9~+b-ykSGigy@T~Hhr0lz>OB0(v6WQvP#M(EJ5
zw<HsHec1>T9=lkSop$&{dFj@F$;V#6j@QX<))|Tn6mWq27IjHO79Bd1#!X+)?5L=1
z`Dl~l2P*2PJA5fG=nE=QA%MCF)e)w|sqAC98grZ>-D4U`_3qIR#B>Po7@clMNJiKh
zmlC`M>7~uSk4D5lH5~bIAB1RilwDj_eSl>a7~F8rBp3(0?*OnG`QD44@S!G)G3Mh<
z!~yT#M0es9e5hag*L&Ta>Mw5h@4n$=T1G18xR?&QB3Wh_!=DoDwThAKWns5dw(EOO
zQMbeauihecC0@acDj^njYmsPGz7=w%br&Yt4;_U<u<!u^n}}x$F7jhj70AkO%p>US
zo2&~=uNDRF^bQ55J~v(H6xI&6JQsp5ftyBoF%(9nP{COeV5MSi2MPnc+M-;R;*$fR
z`ry#PStcl;;Nor|iUa)EQ@O|3QT~xcy}NIIO%QJX79)Oi1=%cW7H0QJJGEy<1y`E*
zM(m)IKFF{|#PAV=m4^NCchhYJ08Mc%5FIrdD`zSXh|r15YUYbTZ6o}(j#!`=ijX@`
zEcGch^bC;AtoRB5qL|l$PObjcUx6)rC=^9nFc&g3`dmNHlnGW;##C7}S{D|FO&~f>
zmFzc-$Q^z>g%LW;tNZ8@ZX$}MBuAB6oL9LzIw^}x`(Ju)P+lo?6Mf~K_nH4E6B?=#
zi#<5nL>Ln2mbeG!)PSr%JXQ{qEVxGP@yJo$Qn5TKJ;O(wqJav{{v!zP@YG~t|F4sT
z3lw?#pT5mt%e!4q4sZ>;i@DQ@#<rXT?ste1f2tRv76X)2PeDc8bS&;9C40@SbP<@c
zL7SHdQ3K*sal#G#)wf6#jT=!77%LS*F=EveOv?0Hlg?@1pJ0|0C#Gtsns0z!dTblp
z{WR{zydJf0JLp={U-V2*wKN{OgF3#kQdn@#)y7*aM@xT#`sX_L_~bi&H7EdZ4gX(r
z1nb)w-n@=YDkC!Bw8Gz2r{e(uLr{InX<0bGc($&8Kqk%C+}4<T@ao7!X}=?`I=)c9
zd-Ya~%4CRT(<X=Fqq~p#(w86|DYphV(5fPduwEd(c;VO~woR3?a2ryOA6m;%fI7}c
zy(9=aYp!;h`$kIb_(LkQ>1fL1F9yr{Pc9Nv+zg&uKGHnw_v%+@f+_^387;`Sn^~YU
zjErt|GE!WVm1CQR^{4!~%^q==#E<0sxb~R2267hK?>wdhPL_iR_eD@7KPc{W?2msb
zh0f=SS06edF7YWUChhMh+#|fh&6>OZ3h|lgu4Hk@{l~bMjg<3^_%;i-4=ET;EA1J9
zlxBQdI&XQcO5-Gy*Kh3#dK)gU3T-k~6I>O!PcgNY<8~U5ie)0>5UIrWW|3PT8Qo`=
z-dDZ%I8=x_Qh%IKB{?~YrX)#0AU{ChcDBbMplUi3>mvk7CLo;COCeu$|4mIRpW&@u
z-p!acyH~4mmhq9_wU8z)Uql(QkO_;1q|se70Igs?FE)?jM<5EWs3#m42ymg(_p92w
zs?=y~QF}B47!K4Buws2DGHWq&ygj-IXx%(SNada(@m1gmBQtldnh!qq`(dScPHcIU
z_oNOi_T2+Ex@5j2E>u}KpG1y7jfCqTEAk+GWiW@Q`h^Ig++Cd1j1i=O&Q8TFpumAH
zk5H0h`(EDFEcFLM1fDItgf6sNFHLd*1XV6RU`D#4USKz$JFiN&Lk1G%yx1U(NytIK
zes6nVa)k1EcR~ZJF@;kN!V$>QX9vPTKZ?iMblZa>jHt>ERpJ|A*GCl&=+93j730oq
zbM1s0o*nCCCNfMz$$sD$XIx@9K=KzGxt5{ik#we^N!u<CIlo4@x)=B^+)nOgq-d3N
z->vSIgj~)bzvBXwV41-6yeLz81p6n^w@1Jl^j?UL3;SP*&h|?5@Py@v7orF2K%4=X
zw$d<&tWZ^sMDl0Lr&@t{*q~oKXFg<m-7k?@P~oPwb`?Hfvj-T-O~^a{c;{>Tgr2vR
zoeHDCOWd;w4?99x<F7cD7l)xXK+jF8&D0_Bo(4L_eGL_ZgJG4*v|n2deL_8|B@pY^
zX%iLWeKUkm>csEWNWb}qv@gy85y7DGQ^=q45`KpyN3mNFaXf|j-9!khCeLOk>tshU
zI-IO6^Ulhwl#&d^7o;#+a)Ka!YE($H{HhBe#6yxf4{gm&tL2i5Ha-hJ7HCAbvPs<$
zG1ojOpLff(DE-jqKekN$;$8_*cMSEQwMA!K;*b`>g{U&GX|A6f-gYap|4osUHoEOi
zf?AVh5*!AA8O-UE_f)k$l7vi)Q5U%vXs))fKpa$f+Jzcw)pCQ#=m(ErDw8^E#~Yk*
zdQifDp4o6DVQsrv)4TGN-fmAZ3=`wTE>@|tK}qq1h^0i=Vsb-Yk;y|mWjhCT^b+8k
z-m12<S5N5(`H8q<XaSKK9x%>!<B6wf2rs=W>=t6pT$eN6KZ%9obp~-6jPt*0I=}!1
zwwo!~whS9O6QgfO4Kn4Qj<-3kB^@d(S6EIe<F7xC!fy!ML0NyV^{?pboX7Fxg|(Kw
zkzck;)ht^%usaU_wdNVLY(Efk7qPl!X|xWkzSq7^a?8D}OWlrb31H{a*gK)&_CBHe
zz-z(lY4DT(i^pl-&MNGQ#)d*gfxO7H+{l>QAU<f(jB9LsOZq|?bfXh^*^<fq#~@mo
zC1Z*(c3pG7G&yRFrrfVYT1cYLe^dutUq!+czR=PN4gf&->u|@x(aqAx;gyT4pWe9m
zh|vQTCnK-$O(^fflw|ypdH!lhTZ*nfxz0DH62ta3mO8YUL&Vjqs;s~>yv#;!L*9nm
ztw{VGdQPgM5SpX31Ae2$%7X^|Zm4Mo1aXT}eZ&OkOoZ0$(sxmLhIK8?@0oi8W!Wuw
zi#@i34NXLkiZAJ9zK=Gsxhv95XZf~DpKT=%(yTI5o4@ZeE(qj-e4Mgy7R5>B2n|-<
z>naIOtkgqE#5S+V5BxA^QN2OoIUjBN<)$CN9T+!%=BHnMob0f&@1X`BGvC&i^Wb<$
z+}3K;-+ON0-g$~-tAx93>#<Gs?SsXjMhN`-t8}ciy)y;e9iUJVilq%JTy&bLs^j_q
zjT1hr{&%Mmk?#7Y-R)0a&S((gdj0vHJy!h74g_!mn-dh-7dp7>j<kmofm^xQBRvpY
z4Xju|odvC@)KzUB+-sviV2#c*PWUELLjBK;`4DGVU*>kAk${Zftvucd`V|;Ri|xys
z#-p+=c`Tn=eA&|m@u@g~hb;B7hw@-Dun=4|RgycPxAt1IPo*-a0xEk*Jm1bqCf(7e
zVDMGbO>xR~>H&w`pStBb<G~hYPZPqYc+)#+(zi}pP;zO;n+Ym0?dHjRx)n6hr_CTu
zgxDWCQmr(XypDRo!EJFX7zfxVTHB#R4I!d@k;%NG^`c5UNWC5--O6@bK*dEH+;P<>
zOEE@aTiCP9+9h<rO9I2h8)+|Es0Plh>uo}+$Su^+Z;0VzP)I^9gVvtv&RM`1Sw8qA
zA$%XMHV`i0VaYKZVu=?E{YGp*6oU<z!m=MIh(ZezL~gU4f&P?XkU0oFXut;+PY&}V
z3UyioT{#=J<%@T82-Wy-%!@-^0}iNNVBBEQdb&hpaXUXm+%NcsWltI{?*dwH5&k*Q
z){7l{?Ra4_<_oJ|NDa`jH?nkKpnv^NkCCzlWr7!|KsC~lF6segDqW})4uxy@vXh(K
z6K&9Qc@@6UL1c;|^_-DGSAoQu<&5XBoNw93*rU1liLDIq-GW*-!!AdShD}O<E}J!K
z;`ahF?XL^+0lIGMxq4}b2Z>AA=z}JQ8Y352^xq&i<=rWylEUhXcD^i9&!a2jurn#?
z6{Uv{g>0t<BNxAkqH@GUY-ul}^Q6O^<0|+vR@?F}@(uKBT-SG>FDAaB^WL_nGv<Gp
zniqkgu&KQLqweVmv76lNrRIqO3;;m->(zZdm({niv3N@)G0T!YuZbjo3&gVi7?ysy
zRe{I%+qc8AyirH)B*P&xC#}r2ynJ;kTJ8J`xu*(;fnNd-3aDOWI-(l&zl2!V>JM0*
zRKP@=Sj;<vHDxUGkcdx|iY-bvQi{iXf170(VNnMp5DYd!YEVKh`KXl}MGdh5)^<UR
z$keM=f$_W!F|wrR^1b{bY&6_~0r$<w-CJn*3Ew6O;U;}7kiQ6qLADEGkf43B*tHb{
zt{b+_RA9u;1*yS*!LRH$r)ckEUN_ux?;SoTztRj1qXk$P?%d<@>I9SU6KfLWj7=k$
zBYD#Ct}r{%EqO)4Smnj?-6;S~^z;3Mls)s*1m%xs<-?JZBy3vxqBcC(0ENaim^b)j
za?rOhLVTpYDn^CFU!ETVpFMxEe|I=(Ou0#<d>Bq{H(rt(DM9-+%Q$Wq3Ts}IMF7^<
z`%%pdV@-+YPVm?8>`&&>^65JoQ@Ja@yt~z*o4h*yf6@<BNMvQ;r8k7Z0RWu8dgU($
z{%7C3v2iqR7}1{*S;QxQ4~k=8xCi<@yCnku#Kvjp#D{dI{dFl_KN-yi;_v3iti0Ej
z1%+@|U%7LOj%dH^OjryV4qHY7lxsPtmfnTAw-klrqDM}$>8dy>AqZiEQ#Z&3kH}?8
zE40x*39&k@s<?ib^%5A>={RLP7NsMRQlOb9GC|FFWTr@pc(f*yQqW}C`+A@adsDBl
z`z{EV@heZRDVS)L3A#-O)fq5`Ej(0*cPfX)q=H|oI<vpNVMfl5;BMG4iLB$E<(&PW
z&u^SDzyJP)mG>_jKd<Fx8p1Z#jz-pwI?8UgMh;r9%p6r7eyPVpeu$@<r?I1s1T%LB
zDW3r2*Nu@e=w)!LCx7(#022;QL#Glo(3{X&&9aF<I6U791;ojvGqw}iItl7%0VQQ}
zBA`{3!~&A>1&5z9S8PM}=<~IsHCBTspqg|3oIOX8Y@9{sW%CRlTBJqHVu~Pe*DZ<f
zo$@HNm(axG*6>u4;KNWnK4es*j%0!CiRPf0jQ3XNU~M}j0#^~JN$|_a*Gn?35m>V5
zxMb}|Ni3&TK@1wM57tn;1Tpl(0K7}T%ByCX>Jbk1ZpE?6%H=E?m~gTs;H(8C9^mVn
zHKUDxL>5nX+aelV$<=|j`U!#Fj<EyN-YoBuoV8nbX7H49(cx1R-Kz}Ll<OTBRR*p+
z6-Kk~?6*IKE%CA`E_V9vK0VC7ES5k((ZT+EZRSgW-#)%C5&!=yHUG5!vr_UeTL8cp
zr1C$l|F>@PPxC)33I8@9f4Tj?)D`{-@Mk5-{{i?RzZ`YG0Q_rR$)6~H9*zEu67({I
z`4^P8gVH}y{@k4W8$|;1Us2w+D*r_JbNTu=iWT0!qP#6;|3vw7ZSXhBC&GV4d0Q#`
ziSp-E?*C9gj)>n-{+`(VY5Hf*{M)ph>>nBQPs2ZR(BFpN$p6pt|IJ8$8vi*|{%y?n
sQmgvXk^g1j{1fNTA?H7EsQz^Tl9z&bDTsZY@FD;{y=2c6>etr)13FZT(*OVf

literal 0
HcmV?d00001

diff --git a/scripts/data_summary/vesicle_training_data.xlsx b/scripts/data_summary/vesicle_training_data.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..8cceb3e101a6d6e2aef5b70072270dce310e3eaa
GIT binary patch
literal 13832
zcmZ{L1z48d(lv^Rl(Zlr($X!B(%s$NDcuM@bUt(=-Q6wS-BQvG(hdLp;CIe@j{o<;
z1zuc>nOU>;%$|MYzGcLrUtmB%LA`>Cb#zu0Qfou>0e&h6zEFTK18aR5TWcFTI$awZ
zT4zfO$uLQTc6t<nN3oqnE5+aM8QxE3{_1lRVf`3k!0+gL=PutCKF_s8AJTx?uhL2E
z#NyYP>xtSY3{`_5)WJWI|5lh3$5iFr=V6y%^f}{Bk}}*LXdO%mrs1SJ{QYW<Exaa{
zRf?U$THp9&`#3X(4_9&Kr(wP`O$0U8*SavRQ8zF$|Haq9h*)43;5qON3JUc<`RZHS
z8bSzXgWIG!=n?(24R@OzqTalgjU6D9dQZpz*GVJRRID~36e!zROHQK>>x0{K^>{a@
zly%&G_AHJ6h(YGt7nX*a;PxvAN0O80?-PkR@Ph^HDHbl<OvOfQph|~C2~eC>m%j0j
zD@FN<#@}|uYer8~i#?+LRMkP!34YBP^aK7SYb<@;m2pQh^*VNvFOFY2c0qzTE`zh1
zt9wG+ND^)2A)=9(8nMJ@Ne`I`!3VAyv=+<_!wcnS2b7X1_7`=-C4J`AbBFI3yTrA&
zLN=O~%EcDG$j%%^?Fm0hZoAI!CwXC)J||t7Y0a1ZZ($NNl8I4XLO~^yLqVYf!Z=&d
z*_#+z8vgmt0FmZEP2KJ%8;0jm>8P{ug(X7wNAH>{bURbW`KkoaffRu+s!FJZivbwq
z)hhfEWgr&mAX2g+iR%UR&h4<E@YXh)%jqGl-nL&r^og_19NqkMV=dj%FJ(p&R9^bG
zxuw5<z-LQnRjZjob+CE#p`&#y_yoTacg7f)*7&84Lq|NlTQ$F^1TwWq#}+ow@zA0+
z`8=v1t7fhJzG{Pj=?Zf|A=EJQ8M-nrS$#m&$+bn--OnW+!;u{hd`538dR&$FUc=V8
z>r!d~D+=wMKg^6jDVg_-jf;Hs>GOz&X%M~0oWOj=h{VZ}qB890e$jc57SjLIp4k%T
z*V?f&Tg||DunpdeU$Tv<?I~v#ty#QqRhg!v2D2AGOTak=q4LrbmmbS|NhZngq3Y5T
z=1z+fVS(Mnqs~Yz$I|{CC4t?AqxO5IWaRfH=2<&$O!$P>_FU00t2aJRyoXY;ahx6;
zhmO^vxN(TanZs{f<kk4Ca>yQt`q16=V%l{4Z7j2pZAt{pYbe&vF$o#>Qr?=NU+t0O
z8WGbY6xA{~uM&H`&xL&myB13C&*K{VvY1>`JUE8UqrFTEpXDAjCXGbh*=?_<<wng&
z@(tTGQNtbI607U_lUs43+j8I;^SO^)T!6Dl8Apz;;rFDtj0u!t%QQzu)>A`li5nL$
zR6H)Eg{_EZ@_$qk%|r{42&VPYG_0etRf{4qqJMgo;umiwgZDfOjv+SP@2aZ12=y1b
zM{Uci-sRAM%R=i!Zsl|p(Vw6z6d|0|WV=nfF^gN+Y@x9W#ioVKI2FuiaoEeu1uWEE
z)6OwJj*FAI@X^tRq7h#5`o)ouq2;rCN{XOAP-XvoNS^roJDQH<MhqNoT1lL1Pd2p8
zgwrNeitFHSGS21t0CSzp0*?pA#eZ&$TPZ{cGHv|*u9PG!vA7+TijPO{?MEEj>Nnms
zU-CgZYG~bK&Qjx5sCTMGEKxi<z57F8hv8}PYt%?BR^WGsgCkH|-}$kLB_3paQF(En
zs?{Odw(T=ubYJJ`H^pQ@9d;=^i6yp<OqTm;hmjdJGf@?;8Kh*ER^ZRfllP7h9_G0I
z9nXfVGdyc-cauzskK41yv7fB%g;zd5*u7{yMzW9U&$!daYgvgsjN2xH)t7{<)H`oW
z7=F$$(|*ubwjJv2Q7Da~m}O7<v43yElX`q`Ge-g{O;n{JeiI??UCB#QcNg6Mo3ayb
z+d!QZZa`tekTnP`l@HfyNB&$aW@e+b{zDy?`ua|3x;+o|$TgStR@D#!6MwC(_-;*f
zOEnpr*jf3YjnumVL{4L_Nc<Y7@AxdY7n+Xi#M$)nar|{Sd@azH4Jiv8Y^m|!diV*s
z2<wjPoi1mbRra|T^&aucmRqx9W4BvvvSpb^T(S5)0Vf%Rhv|OtBOUzyE^gZK#xdP8
z%ovsyMI_WhwK{b($J7|V40LF|!+tB3eSRXRsKcP%4NVo1;Vuy(xNWl)r=h(z7P&T`
zV_0ofO5ya@l1Prd-83HExUhf8u)IwXe?(FJq^!k^y9!fk<UzH#Hw_V0>J0*#h<-Fy
zj#$R&TltnPD$!0wwpKx=8nig<rnSwUFH-V#XpS|=&l&3*+4Efu8H{sImT3^-sMP~h
z(N-lj52m(~l1p5P9B<;B+H_9tZTf_fH#oPn&ouc<JTDg4Zmgm>MI@44x=^3l3qB`~
z!F%}zi*a4%Ff!sL`m>?vK7rrQeGGE&zQ*4mG@l^WQ(z7k^RRmfQh)Q4(i$g4!w4!v
zgdxPTT{}98GII)<xuN#91Sv2YOkwr&2exrD+49dSke-(OBt!6>UsV`t#AzYZ5$|xd
zN}(ZtoY$J<uEOFLI?YV@^{&$2F3Gj7U9jj;W%}JU<1)L=JjW8W)+RbNf0k#PM^MmL
z)Iz<MhEnNk(qzsmw`1@3Nq5upSr?1H5ZxyW)v^t!P(O*ox)Vgc)DKZVW7ZDG&@0X<
zFYf-yfavm7<rFd9h;7+lFcF7oD!7O}6yBP5>hs2@`AH;`3nXSdFpL&MLe0m_bt!nR
zg@+@gvi8ofm4o2cXa7Aw`0%<isYsxpb~K(rVgDH*PS&>Ob|!{~_I7kn|2$2R>=aEq
zydJRNBmB3cH@O9^_!z_%eeg$CY<%V}P6VhZ>_n)~;b@^mm%Lh`B+HqowhT5gQUxf;
zE2D&&7HbO%cU$fbHZQgsoqE>pE4^A;A8anqPn|9=2_COjyB7DHdM@Lfv=`mEJe>pX
z?CowJP99dYt(RNsZk^ql+)pWX?$3i=*#_yBw6#2(A9go#A203?3gcK?7qqoK*gU;%
z_YdPPkLau)k4Ad-l9pRMwNf7+2p&AoFBb1}AAhe-4TQ&8hkwF5dfZ#%Z|`Vrxsux&
znz~){Z!Ang9<d7l<Z*u|y%D~Q<i8_7oI7%NIdxpidAwyD?v+AdZRP2C7w6@6dVboW
zuiUe@ws+e@6o>qHb$&H;d~;Ji%}@7uTfcvLUlE>gGSV9Nc!=)p|I(|eiPO{0?*5aq
z_2II|y}QT5{?KvmfOg5zqvz?oc6i#OQbw0G-4UI3b5ry6!Q8=`_T|*TOt{yT{kgQ)
zy??m2hcoEB<NkpEY<I==vh>h;X}PJn+5PdP>z<!3{PCb`ZzRF%f=PSX-P!K^Dvld`
z6Po7g*3?-40At&FZs%3c*;4O(6`9<77o2vzcYL^eetx`qK14@wNJm>*Djj`wn2e@;
zWWBVFL-%gm>sni;^^Pv@+1)3+*Z!qTlDQ8AGP}si1WQ^3kH2%hv=+H<d$b=McOTnY
z<p{iJT3aZ!B`2;<x=JIN935_d<GLO1ZBX!dfdy-GR-G8xMzno``hWZOqsUWy_LW0d
z@cRnyxFHJ`oZ;see7V(OO1XwbsV|ZJ98*kE<}EbkY>cqr8_hVsAVgXKo4}%!moS8)
zj86Q{2%}I&6&^(CYSK#0*bqITvPq}1i9PU2<pYbi;p;LT<#%sX?X_?z8QI!G4X^nO
z1M?-G;>G??ysdE=aN8FyNyB?S!_fS}Cv5s(*aacUH*}e~7MVFcN(7no$rmv+-+eS)
z6b#0y?@M}rF{b3my>2w&>eNvB<x>wwpJtbU$hw&>Yq)ihT9{$)M!_d5tapR#&*%Eu
zj>$&s1uU-JN?wKk1#fZt-b;JOa&^6Gb&(leD?h0$r+S9CFiOs9X)^p0n^ua(U7GTJ
z8j|1itl!AdS{CAig=4raXx?Rq1P+&Yic@<bz;O=_jRwA;XWbl^rJVTz9&>rd*7%CH
z9G;fslAaZse7tQT9zi$;e~?bx7XcBqpVm*j$PZzMXK?6O;2B`GDr@Lp-^J9SslGO+
z_$AQr3*obX`nOzQ`ps?)F0uPWy6_;Sua&`TXoYKh94G*LB9Fn)dK(2*w>8-naOzH@
z7;U)(?FZPj?@{6;(e(^i;na-@+OcTkzZdfxl}gH$XmKTvxOu5&M5yfa4-wNS)y?^Z
z{Smmv_ex^Ti(;vUJ$ZO(W<+T0^l9IzPD?0kf8eSth<_W_WlLHfQva51W!+|Zzl^#O
zhmBDzO5Gu@pCz_GCW~>?=$C+80w7~=g}&R&m~1>*Nuyjzxm-zBGy{;eTt6gh$v3sg
z)R>yDhD)N&i=wG03<rmP1V%HmZnlFBl~1Pyhu(Ia2`)O=hC0}ira*#$rGTv2)W~bm
zwK?3s=9f0?t>f&ii*dZQV{#@+0rGZMP-XHj>WE2I`6t@9r)a>!&}`O{cHnxrIHqgp
zr)!9Q&<EBNB1v2rTE7sy5$L20VT15Ze+&p<uXQcN2hi8PdB?>@8D!&_z}3m<@;>2-
zgam|y*|f9Sih;pN*}ytsY#($9rn8U4JEDXq?dSNZU=*F9tC<3PG0LUhiI77g)x~C5
zGx$=P`)zSZL}7K5MEk2yAVbvn7r)rWpIg@vs>%%eVlDwOC!fWDtZay~As(aUK2m)#
zh>l%FY?OPam(3L+*S&z4EJ6V+3ZAN?x$qVTeCDa{Odo$y6zq6ke3Cc%j)B#nFY!Pk
zLu}A^VWO@w07b&eJwf?@Af?R6b46n`&PIuMdI?<L(cJ&V-`QZ!EXUFQ3hGnw7p#7L
z>S*C{pZFwI7;HF0t3gfJynd@<>`7fUFNQ-omyqle5i_9boQd)w>Yv>NU(9E=SJ@jc
zRG(m*Z=fni@bFTFgOvcqII4cEGU3x?!;R3!_NY*YXT5}^jy3@4nb_iLkG#FYVf#JQ
zfpGnDuE=+t3y7&UFt*ER>#~ap^Dc(?JVjrP;|BDp<Amiv@kwpyeE^v*;(=*hljFR?
z;j^`$FsIlEn_y<bO{3oeKETo)=Lz<0){|ydJvc7>gZ+BKR<6(oI5c?C!x|0V>GyGs
zPf|dvA|X~E`nYQNWj&@aU+%8aCiJK<O6S3)y-(;7VT5!*cHj_^mB$p4)!+neLM{LT
zM{d?iL6XS9h4@BMfgzgOOGC_Ttepwk_*@lw={k>1I3Jgw7fe7&Q{+D0q^<OBzZfYp
zof(iXiX)ffdRi$U5#P-ISVxvxcxxNwRL6R*E?XC(wr}-sIlvka;VSlROsqL6HDIxW
zq#89+_3F96zuEwOL}($D6dkSnGdLuI70Ajh>0$@axhEjHF4WJ|I!N~!62Ee&8!ZNu
zlc$a&Pkh<&Z?opf419}x`XyJ#X09$*mu}%3^s^I;9e_-BWuCRai$#7REy(zZsBz-f
zQ#d+EIPNZsi>_S?qj>MMKbAs6r!Shh1YMsbuYThf7BbG}u{3ppSGe((_4OxSnh~(t
zX7@sKN5TGV|Hj)lW+og!68iOAQJa9*l&2){6{iQi^XWsYHZ|!+%C%BOPjn!AZ5*UG
zQ$K%P#&^hp`H<=|GhkN~XAF@o9sP+ZP~h3vlwwrUTbeW@<!Y&-M><w7rLtmyZ7wX8
z+X@!_rr>PT=nom+9B0TTE%#J#F~ys4EGZ9WrW~t9!79{rB(Myu{N!AR8AnYBsx{LW
zfhDD4oW_5=YKgqovwA80<TX6DW3x6UQ8U%4nybgmg^>MeF9z}h_F|f(4hQ=>AGQC%
zc>7LYRC5WNJ?T=B*_PQ3LnafaNk><%rL{eUv}rUP9e4)pb*Yp<71t|*IJkZ^8_&U<
zq9JVh3LyWxiZRaD%&dnvLYy7DnmU7A2<L#h-~#JGA{G>>dKYw5@i@(qo}!6w#uz+V
zJymgDMb};3WFWSUv6kadQ)i+JDa4KgYy_z%S6;vC6BALSQo%S)Cc1JXEzt`=HtDPc
z$aJ7If2qO2HBoGgH6%`*sGPX{wDio7rKd?mN_~4MffLR~LHlNi>K*1~bmeM$$Lft#
zu)O@>CJDKR;!nrQ{S22t!Ba7LZb+8~8UbCJsZj3HnB9>F?=;rbnc*U+c=ckeNg(5o
zFq_fL_&J0#4Z)i%fX{rs!67_$Y={}Ry-W=L&p0m*_xD_lCR+MqO_7w4T|wlDi=v~M
zmj;K1PNK`A;3-b<W~jnb<3BmwHXJ-CR`q3-@!cy)m{DP}&Vq-m?l{n+;=(a3mE0U$
zqj5lf=G2L-a=!mb^ZsO85%=t1?vG2dj;E<U1nf}$QbD&N!hXUTmFAl)(}TFCMv$q_
z4D|Il*bvK3TPZkx1;49-YLi825SIb~e*|JcnjUO8q~I46_WqUfmrmQFI94wGYYKmA
z|6J-z{K}%454AorzNT&c4Fel2u>-h$!2BBxTnB0ba7u1BF~&5SyJFF(vZPnu1Xk%@
z7!;iZs{pN3nOCi1h1?^v0=HCP-KeyrSKI_v0tP|~WQ5b=t4{AX$d19&3Ds&+Sf|U;
zn$Y<`MyDLG^lTMH?6dNMuyv%?jWSDo=^CSkFCarE9WrF9(5$TcC?%v%Eob`F^P&(M
zLZ6y23etqsoQX+ca$_%yl>sOcAu;T!&mtjxMrr<<QCU}QyXt__1wql4_Xrh|g#sjt
zFSR%MABx?U9b=k&DeJ$(z(Gz0l)$<Jr-DrqGyH7zlak0<8B#2JBB08WMMweqc!q4_
zVMB`cAc{-LD=>(tzKpO>9#51{Jd#HmP4PlULp0jYZ+YsQ>$T6;@dzQoM>3a4CQ%E?
z@g2zwX@!puxRhZRB(N*ewy!p9*&{SI*D<n2GeB;qgnjKsyy35ruLKKxWQ!8#I$@&Z
zL-;b$4u_1hrYUPj=<(lek6wN&qh#+~O(@o&XYcx8Pqnh-F4P}d+Dx!RPjldc^3g|@
zI(=^B$#Mi@x!!MrUvkBb)<Wv9B3fC9K$eT`J59QfG$PWsapCLxdLq98nNgTN=$tvJ
zQO7n&c3Zo>`8s5Bu+osEBMX;cGE4_!z|7IPZ5<<Xv=botmWGEoyZE@CEJWgUd4o|B
zmV^`oe6vIyYa00Ew(^pgwEQ@gW&yx@jcT7TBuM}p+guJeaQ<2!PJ{%d5BsA%mQK0?
zDuf$~QxZy1w-Q;(|9Gy{9ZEkXbeJwVSr0?~nWO77$`s3FG{yM0a4Gon&)!sdtpF5c
zjGZ{1zfmbV=FD`xF`uSK<KVn_LB0LFqmM!;efY2nM!|Pzo&kUJ2s}Dh&ZNW3ge?Wo
zb8yUM|4yl4j3C-&%Bu%{?OiUY!>bHHvhXpEx#Hg`-B-g%I-uu%i6;G+B`^EVjz|uu
zB-S`VDT7%ujU=rd|Ir0&=MDBe%p3bafJU~Axf93B-F8PGfgaOr8E}nvxwQ^&<&(oH
z$6SZ+v`sipT=;1w7F3JS`nNxPQW=tyGca}!q+oe9uR5Wnq}?+uD`XbR!*zI%04sKK
z|DeVZF&V6yT2lO+t)$;*)a3~}ya;b0<T0Jn(!SGL7?UVEXShX{VNjRn>hO|7pdgw$
zPVPFx6*DF0k65YO*JMpF;uf?jqjG*x^3i-li(7Pg2=n^eNo*-GnxH<NK+Ts3Y6CX5
zL2`Kww;?YbbX~;ekD@1icMPV97G8>5u&Rs-_(CkF7Pr^{%VL8e-@cRdSMzOXx3AyW
z%ReRVbg7EG+wQ<TVRh4vPym(QsDIn$GfeoD-XRZcdrsLL&SWP7&l>13O!L*i549bN
z2F^1=d)HG6Q%%d44WaUcOU=HAVT-R2eil7(X}m>*OKNvS%w4PMFMXxljy8|>#$J78
z9Wi?}espIjI}4%Ku+@C^Dj59&M0!C^ZPL)*M<JGea#;Cc(szh(o<kz1L$D4bb~dN%
z`#C&h;rbg#^2;bJn%olEQsOm0>bijk@CY1bPtlrg@emwzZ!aW$zi5f7yosHyEc=dx
z0I_Ol9I27P;z|E-?z@9BLDYmGZlR?zY6XHQvbeaVJ&Umv0zZ13Ak@&_S=LGG2jd~k
z#x6)Px#%I}C#h?9k`cCsA{GBOc6O-j`|T_8-1oi}jU%HnSS5~{`7OW2Cal}1$~tTO
zU^G8M%%T@PhW#X0#&~{J?ROI-M=ZiQ@h?Pl^({Zxy5Q4~XEfsQc|@wm;19jl!zXy-
znm~1jB;NiD#mVrCtwbIJ8#CzU4#{DzjhcE)_a+7ITSue~Tc?L=a`G?v=R6*K>T%Un
zWcGO3HT3M%aB=(#$z6SLd@S!L$txA-%!cpHR;l_dR_s*Jg#7DZ1EbJEeqUwMI+YA_
z%?>9hc{1qHl>A9o90G8`Z0x)yh_BA!EEFA?w^&)ev>?NIr<8GaE{Y^4zh{O4-E>yd
znoA8?u3q7YV!w+8tr1#Rx2?ZOX9q$RIn3!D@vAvc$SBnT&;el~p@bC5cte9j*!{HW
zpm0{KLFt81mNNL)h(zx$=sca+{{y?XNhr#>shOga$`Gfeho(ja%>uG``@^WLz<pvv
z`6n*#JZXi(hBdgMysi2jB@#Ro@%HCnf~$BuVl!$KnhYL<&YTkbgnTy3Vw(1s<s|MX
z*WTek>ZC-IxrK_0V~0VLGny#i(J44q0nb%^+vl=CI5=}fCr0C9=9#X1qLly@wAg^V
z-k$Xr8H9Y3xkc6Ft40?y+k4~3wt{s;u^<{E>yj=2WZ;`5FIHSMtqwR#L``gbGaNvw
z8`|q;&H$tk1f*bY=~*{RkXYf?www?#4d*1Vq#6tmu@lIXRxfPGObMTw(fkdjt8!>3
z0<%u*0Zg)rZ>iB77UgS+EN^`RKs8dcBquFu5>oGjsvOcJGfLE1D|3{(l-0hFgrJGH
zQ-GbO^lW`(NQ!Th0jn?N4YG8LIdtGMaca0$fhC3DK86hT$Y~GHKuJwbzp1dmvreqo
zK+=_jUUf4DcHR#FlnOxl6!~lDp*qItMO!MWr+z><l%Qx9)=hM|c|7C`&aBX0(|$E)
z@>q}y(NQiWZxPkY1oGX6Z<<$mi<qWJ#q=`NaIk_)l91%UWrub$lMZ`kIlKzV)&hLW
z<PsPzbTnk-uLK(!Mg(MM@I#c9?KBriC_^yB+eN{(<B+^bif)rJk{TSh(^u<WsXJ4|
zf)a?VGl5v?BFdKu6uS*m&T=d>r1RNCcNhpaZ(g5rSYkbD(QaOUqJ22-=wkK4p?$bm
z6jY_N3NNMG^}178(s;O7>j|EzWhFo*q{Za5mAiP?qP8{loxj<msqvGfWC|xEZYAXw
zG!koi&Ddx}o#mEI2g`h~B|?2Vys%Zprrg2ZC$G<T=&wUyQeJ<uD{Qs9z14O9<%m7}
zqx(%LNK%y~wX=IT1SC0ySF5K~eGH3Z!55baO{9}F(&ks#fT|8xo~|Kmu`3cemHcDJ
z{d*f(CkF@dYrDDb;77bmZPWUz2mfH#;nHYse6TuFeR_bf)h<R9`trvh)=7}3a7U;7
zWsR_zvB$j42CEz{pIP1>SWRD0zkCH2$BJ(%Gls|pcaMF+hK0=*m)|Py0-S{NIV1`E
z{?6MkY4m2hTx21~u5RVVX8oh@f(_!=h3LE5Tn3${bh7nNYIJDTm7dvkT6p<Gge-V(
z-m^4^7}CRoK8@O-e|vsrz;*uN{=r{vxXq8M7j+&@ucQ3Ap1eQmPYV1H*3nM-XDK+(
zeyUa+?8~;Jg`v%=qNV4r!D6}TXF5YmhM<nN9&3+2sL^>|J*?VHU+(RDx1qg8oHay~
z?MvJXOnTeJLb<}AIyxYm&avgvHys5sLxP3gXA{j+7;1>0;lj3-2~3B&;h<F5x`UeY
zLzDG|hH^|3P`Hucvzuvm{wCJzb>^&)*KH@0QhYrlb6SZ`ni0tor`M{8VJ!nU6z&%e
zKV|#6pwjmN2i~J5TR_1~JU2l;%LZC3z`e>d`g{Kh`Uk9$Iag1lbFO(gqIbYw#9elE
zo1T$OawUGE{Jh0=K3fO`MN6fjM2$&JPuoX=|1|mojp$wT`%fJ>Sprw=FnbmoEq?te
z_*AG$HW~2UxC(r=nM_%HEGgf1Evj4m%u?>DP-E1n<%c|xLs;`bYQkwEzGm#OQ*$^T
zNGu$Uszj6<WBL{oY)jwKaPTFGF$>P|6pYQ!SSGhEECcf<cd^|jak^P3buCp#`Q*}n
z4u9?nivyV<uyEjNq7`(nk1rI`KbhW>kQR&(5eDvBgtqwNq-fHi#w<NETYnB=l$qJM
z9%owA)4IZl&oe}ye9LU0FVXpC*Wsc=z@j2&u~cZ~Wuna9Hb>y-F8k=P(xH0Z!W=nz
z`p$&X;Ly7@q>t%~V_|6JNLxCb>SNaF8OG3(on+=Ej_GjSxE6e4nX;sMK$$z%G%WKu
zj7%p6NEW+UD_gkt9KTE&z1Y?XoE?@muxJUK<x3e!O$v}q8P(gWrC-uP=OAdNL7ivc
z4Y6cLRnNPg3q>!__vQG;?UeXw^ecJ+fd?Jjg3I&uebgUs!k}jJ(cAJ3PrCym0YVJV
zKMPBAI>th=yD^vfW<I<4EnsnDfptrrUDvMU`^{J|4nVsrqYKe%Tmyt&YDAUoBxcUh
zbsxV}N1gTY<$5#k{JO?Y815V;6s?X-U!LiFGXyz|b++J#<Kc(&<YVfM-4GfaQX9$c
zoQt}|nHoInGNk9~df`;XOj`yyXh_h>eoelm4Zls6*gk&wDWK)MVjPO}>ALKtlo2y{
z(h28eZ2J^@=sEqpH)GiMwgQ`*qVfrHf{7n}h~#KzQv5u4**`<~BEsU{uJd8OOk??Y
z#f4`=OhnF<C{o3VbZs&@o-p^2rQrU<c%EpKBrG8Pz#)0igh=<0c|CKC>9DwW@{7Bv
zwr`4Y=mFO?*Hh3lM<8h4LD>bU@&dYwrV^fD1n1WI-6&#e)LSW%vJ^jewu;{dA)NN^
zB)27$`<&($%YFU|HjEh*=>dnQ`;)$VKZntozi_N(!OsliG&-KFU;rWc(ie=U@)4w~
zQ}Cyc!CEm&y3?xql~O#>K2dKaKlt6^aX74W@w6zq8!_{t_Pk38BkEqa5m%vUTT;kn
zAOQbNGOPK@I+I;2si<N<J)1Eh+Z~_$8N))#0p}Do6dvr794GW1ohUtA-%CoE05W^a
zIHw@zfI;>l2BRT|9qdZVeRgw;kv{T9+ka4tLvv33c?~Rb77(j(Xfu0C`U^606qtGW
zlx}wcF;+UGh9ZW|PJ+bIp>hUt0>7R<IDxiuIw^e~qQ`h!{2muX-F{Vlc`0K9Z(u_G
zK?q4RhD5D_h*`Cic<ko;y*nXv^z1^DS6|cwJRFl7N){WZHR1-N3+dO3$_aCshJOHq
z@7DqrB+Ep-kK{5eGua)#3x2N^l@sJLEj^*Xo=}e91-#ntYxd*nm|Dp!7pwMu`0>f_
zF--S(h@~`VQqQI}u(rHGsK%k8Y}fQ$Qh#PULT-uMu2W8lIC-<;O1r8)mJ|<Scocd7
zk>#RsTmIh9s3rSkDc8bw@5=7i$?k97kpuH|ykW#sex{ndnPsMnLQuC=RsUA<f2>Rw
zx0>fBt2^8=bD?7k%9e`C&&kq!0bTgO6^EyblMV;$5Vi3tDcxDk_Z$0!o4-J&aPh4K
ztAnQ7OGP_Pj&B9<6GJ#jRwuI7$#}##M1adWz-5L=mnpgpre6}v#|3*byg3{6HqgYR
z2W?Wr?~b~GJi>c#xQz&#pFsKNz~*YpBZG4eE9WL1q(cVJ9KO61n6;RmHJV5e?tmO4
z6fRvXb{le0cD^ZZ%aUn)G~ch-8O+p$Fe$jD{^+o1wl${U-Z5V_I$7S4!xZWV;Sczm
z`5%ARN+bdf7NtAXS`hgm?#zKx$(;9`M%=J(LAyp}_h#ZS7Y<M=f*B1Xs(@CxvAb+R
zX1YbH`udW_I7q;$mq>PTs~~?bRVE~~kZdUklqXeZcEYUIT4-dc$pRcEWs4!$ZiIkb
zNV{SsnFfmsPVus0Wlc$<pG;bDP6=m{cR~yGbF@ef-NL>ollz*)>guVguQyrMC7khF
z%|Xfy$Z+aIiDgsk>{XuXLqC6-squL<sLjx9VtTF>ih@8zQnO|!cxtW1MwXT=H0eC5
z#d}s4$~l`A$I4_14(&=&xuZ9hEav8grf}!^W|~e6J|bP$oFxAXG$SU2xpfqQG6knD
zSrzpQa(Gg>yf)iyaLiEi6;@-U(O@xeSB%QVzSU$gUoY5U-h>_(=NcS%jhSYGrW9{B
z4az4Qp}$Bno9<PZFdZb%a{Q2(lEy(hDaSAJVHcE3mMCfioHn!aZn;>J8oDb5dte)o
z;KYICCIy(_q{83?V-uzHX$ci)@C33uVwM&VgohGsCIzVAq@<vNFdWh{jpS$cou<Al
zppqg2RAkVLNq~oo4|0<peu)iM-0=+l9~U|a5@aUfJ`Dv0H7rY43V8w^c?h%PY2I7r
zihc!sm#Q;W!kE0bN@TH}u*W%lmfvs@x|fw(9vFc#3CA=P<kPS$Q7L2uu}bf2l`b4u
zwU*f40mYHE((PT27lGq)CgPDrrc(+Mv=xrUc+M4_O{W2rZs_P=Xt>B!wf_<e!7u-W
zQNX37XYQ<Zjw&&eWG+J*czkek|8TRx{=EYxKw%}P?+D{|k=cAb2N07u(xGtCFlW^n
zDu8~X=)W!FMvHRuhxYx0l({^4>wu6+WUcc1P7_7oB!CbQvUoF3;NH7mo0za@s}l_c
zZ&}~g2RIei12NT}>($Eb>_rC4VtOXBtmOtu-+O)<!nro(2)zz4Sae{F61Zuw53|Me
zOJvc_>pL2}L1HmC$os>{xY0yK8_OvP?rAC3YmM&10O=+H=}>oA-e36V7*`(qldU_n
z%SLG@opS<9FAFTaaF+DkM<!EBo+xTG;P+SZ`bvjx@PVR%ctw{J|GY0tKp8gizFnrW
z;*;TO^;^jC{%0cYDO_KJ<2Qf!H8bv&WYxI<#^Y(kDD<yHhC`v(U!yDDg;ppXCrqot
zg%9?pMg`Uy#p9u?LBM*D!lIdUo|+CvBOwWhWnP?2D@hmfU;_~#;-ZGRmC<7n+A|PT
zVnp88>Ba<kAhHLZP>AIke&TU2Y1EDmD6iP5&V6J&UIhXM5NQGd1186J^)_)8@sK&u
z!(wBQh|uYV=^;-&Qw#CZBGcSozUPKbOh`B@E7kZ&G_eCaQo4wu9Z+qmPL+EBz-}d^
zgsrG_;UUq)@w8h(Fd`OHOLE>b2V@c)Uvfjk@Se`bCo%?4WGY-4?WQSt#((f*%P4)c
zmuTX8a;gMznw>P{a>^mqZJ<~usdVw>UzjokJ0LQBS$fl8V5{sqU|{&zjgJa%E)j-W
z3@!uy7AjD17#PTm*(09>sy8s4?@mDhK;Q`t1bIS(56e(G4kQ8pjxO4tu0!Y3425iw
zC7a#ue7o`CeIi7dX`)I9RDJzqq1vyIJ`_GZ_5I0z@uMBF+_7)BQ9FM{h4h-#D?=8@
zDpo*Nkz&q5Ff*u>`3DwRWX!=^C%*r*oT@fPDYz0l8UooF7)p0{a==PFY%U((Pwfrt
z882(zpPoGIZg!Z8si{gI42)Z+9J))~WFg<m58zjjTM$#>GLmV@>)S?7pxN$D%oLP)
zaAvh%y-{KlQo7KSXv*(iC&^~Xn#?U%)NQm8@}=UL19ckVc6X|}#V2P~`9|N^*HBt4
zf8E}T^uyo*$KYe5&x07)g+<+1QZqUi>!a;;Icsy)>No`A1(a&EC7LP$hq*u~^o54s
zdM#2MVdNU{{=NO4!GTyp@yY87{8Anj>X<c^bCe;Ce!ipzhTpn6-9M3$Qy>v$^XwEO
z%nQzPNR_xNI)7GjRFIIkfiTAg-2G}O(_E|_lRNN>lKEwk9v@%b1q7m4pCs>aSZQNp
z-e$8y<4ER4N0+Y!!eH#$91d?~!K*j=NlJU8lM!~z5i>A&1^0<cq$6$t3W@p68>b|0
zka<&3<5A%fk!{*Au!~YeuPmfU!#OfPNSR8?qA@1lBco5$0Xksx1MYIO;OEDJf+obW
zc-fd=?>k|SY}neuPuK+5*G;7Oi(-Amy|s^@+<1@p7Pf{pWXp~na45GT<-|j(>TJI8
z^>q|^@B2F}SVyLiEqe$y{tJRB&RXb9eBp3(MDYyfZ?{GzcVuS|q(Fn%k#Rp-elR6)
z%wUEW3*50zY*+`@O2mhA@j6;Tmx&iR@;G<Dm%#Q1!L5CUPmYR;CxSV&-_n=c6Q?G9
z`|juMW$@EooZ7g{xKb_8CU>{H`Hh45LS(0rlJHO5B={|^8p*b9!2Ovxulm;OtMgO;
zq{VR7OXN|W`{TvqlcD2_V`${#G%Ksa*1LgVe}p?nyW7*;;HDd||Gr>!c@Ykm`y2|&
z5&;Sd>CXitJ9`%kLp#Xb)J3KFuq1kvd(2MQXh|8_Jie@t<6<#C&9WE$TM~7ANVW4A
z3JqFYm}<}?_g^nwlz#^u<E7Pe8gSQTu7~4q(XxNY_oqBa-uq-YSGHHD*8w|Wha_TN
zr1v`3F%7ACqv&l!mO)KZBNby8NQ%{*yTEP3&%jvdpx}&FvSO%##Z``G;``TTiR1Nz
zKFUQ#3Nxy9qZ|;|%lmP2M<JXfwg5lno%TY%xH4VTIBc`3Y|zJP^U7aj?lWMUz^fi8
zPEhpBvA163VS?SluA9n>$eEVzj63^%!j@*ko~{#p*S6otHuAXhHf|dq@;;jPsrw^R
zU8G<o?;OkGZiVpWqgq%qBY=}ll<k*$DIM^c^(q_+1v_dRwl|bK9MNG!bbGShJ1sxW
z+r2~RT^l7!Kh?%vwx`|~1+8ad4|c+E)G=d)XwPasBrR%j;a(boLaMYMaYEOSV{2D`
z&G<XQe`RcaKlm;6yqU{0R<9Hjd9HPS&1gufDT_&|=__k;AN~inkbS0FseKt}Nq88J
zs&cWd=hwEHQx8RwzdtMPAamuN5RJKFjKkq8r<mZBXxBpRb1HR6x5YruPW_Gz9p_GI
zqfA*pYC_GV9BF)4_U6kBiC2fL2F8TxOJhFPyB|rG8b3V_x?a4n!7ZilWgTs9eHCB;
z6Y&*=!~?7wQS^h@<38M_c)RJjh)|s?uG&};<`8@nYkG03sP>COP_Rfn^=Z?G-V>`@
zYyWama}|s$Lc~Z^BL6errH7gmCTM!5k6!Ue6$6#}{5f1q83z4KF~R|Pgtq;W*ihqG
zw!JwK=)SxtEjE*{lv4H6`d;<v^FYUt!mUK0O{iliq{BA_dV>8wj0{8qgxY)U(AuFn
zp~1R3?@6M!Rs>y7c?P78>Md@*HD4qBcSY9<?*;t;D)|ji)j&~0Y1<lF*wN8KzNbWr
zTRmq${9KA=s4bD-`JAC>wv0ajp)PPMGocf#-+6WsI{V|j399&GYAQ`BGV^ywJiGa9
zi*EW(jX5QjVyFsp3Z2w187h=4;-6{KnIlHeXHlp(Pf2@eI;^H^CG74*&)~s(4R%$A
zPVgA#&#p>35{bq5)#ko@WfFfJS{Q+!OiU>)+P}+X`JFR(dj38lLs-y;`V5>U5$qUU
z%9Fa-l69J`uUF-~y#07Onuo!C-I_w59XLKC2uEgJcKz3_`wNF>1@yp;dkP>?<Ug$o
zxf`fwZEX%|-q46K$envZuk3!<Q7a!&i9N0OucEI|_}xa-oT_6xW7e$7O36hjBvyUN
zduFElnzfPxkU2B>FLXms?PwLL#)nQ9lxb8BO7ozxjF8Hw(A1W;CYe>*f(n*=qTJOm
zDwEqxcx_v{h6}wf2x5=0?GMzU%8fp&hDy<3WmYN=pA3Y(D7cbW$gB`#yI%I4H4;9n
z<NtP|T5$I9J7e%`n$&b!#dOBy@f|Ss$htt1WS4<NK3Rtj0=3FLn00Kkap^iMV5zxl
zFgp?R+3b5aQQW}NZ+*m|jr2X=!?9Pt#SpmY&+!{P+CkXYf?@VS<wjEK9~3jlkkd={
z+QMIq3UZ|CWB+b=5DH7i!LgV51RD|lEusLPJd_liGs7A+|Eq;{6Zmh`!$A9ceT$~u
z6NLY+BBs5yhGIa}TR>o(KjQu=r2mxosi>;K+^@gUBl?!2Z9rk?O{PE3p)JD13h=o`
z^%f{EY(KzlgA&1SpjRD<ZM`%wreh9r<CLA@L_0Mh>o@G#e+QMtiH9jTy@Ej&dY-xZ
z-UC5uhqwBbDc*7$!P|=7FbP?j*N=ohb9PvGiao>Gj*;oziwN@g4Jar#)YmoIEiOK`
zsYQN>$o{aWaZ@3c^>qLZmF}f!c;!x%muvPXf6avh<$9J8;kshNUuS!(e<hjZ#|Od%
z5}bkl^Lm0hzqOUUp_RS1f{TrzohGD;ilas#Rn)DWz3X7c#Afq(k2ZvA{|5r7Q!{fj
zP0!liI()uD?0Y#<jCf}E#iiXHX<@DYi$M-!ZY39f3<BTJQ(vQ0{9dIJJBUc3J?5kY
z$W`0Iu6owHqfk0OmxejVxpuAgRy(9I9KpBFglp+XR~f9#WhggRtPdH(Aq`wx{dJBk
zb}aaz-nO@`TR_8xFEy|-fbfiJMd8ZzYjlX81r>U;NRMeo<?|hrX9EK`#tn!LmKfXL
zFT%vdLg2ExUw%!d5gzNpWIm42y;~Zx(ut@SS)a6oN1dyF;kA`wu~Qh_%NNnB!M+YM
zIG7^`Eqy_{QDT$6BF=LB{i3UEkl@rQjr?96#|1h4{>W{*G&`zW8Yy6fyYv%qQ}@|(
z4Cw#+tsS6ho<6<;3jhD_@ceE4_d7IyY@wjOJ}diA>wmpb^SAlmZ<_pL-Uo#LU*0?U
z+ri&AjsNYy8~)Xkga7s3@!wwlK2H4G%QJK8ColgTF8((Cd*A-Yl$-9qcJaRr|K11x
zF(jk^H}ik(ihmpb-RJ)pe`9zu{y)0@Z$E#x(m#Go7@?s4m&TG2hXGy;f`WnrK6!v0
Kyow3()Bgk5PWEE}

literal 0
HcmV?d00001

diff --git a/scripts/summarize_data.py b/scripts/summarize_data.py
index df57059..7615641 100644
--- a/scripts/summarize_data.py
+++ b/scripts/summarize_data.py
@@ -151,4 +151,4 @@ def summarize_compartment_train():
 # summarize_vesicle_train_data()
 # summarize_vesicle_da()
 summarize_az_train()
-# summarize_compartment_train()
+summarize_compartment_train()

From a0c31a8ce9e507569c437d4cae3df2ef92ae29a3 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Sun, 24 Nov 2024 16:15:32 +0100
Subject: [PATCH 30/35] Fix issue in data aggregation

---
 scripts/aggregate_data_information.py         |  44 +++++++-----------
 .../data_summary/vesicle_training_data.xlsx   | Bin 13832 -> 14745 bytes
 2 files changed, 17 insertions(+), 27 deletions(-)

diff --git a/scripts/aggregate_data_information.py b/scripts/aggregate_data_information.py
index 03ca0af..7086b23 100644
--- a/scripts/aggregate_data_information.py
+++ b/scripts/aggregate_data_information.py
@@ -12,30 +12,24 @@
 stem = "STEM"
 
 
-def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions):
+def aggregate_vesicle_train_data(roots, conditions, resolutions):
     tomo_names = []
     tomo_vesicles_all, tomo_vesicles_imod = [], []
     tomo_condition = []
     tomo_resolution = []
     tomo_train = []
 
-    for ds, root in roots.items():
-        print("Aggregate data for", ds)
-        train_root = root["train"]
-        if train_root == "":
-            test_root = root["test"]
-            tomograms = sorted(glob(os.path.join(test_root, "2024**", "*.h5"), recursive=True))
-            this_test_tomograms = [os.path.basename(tomo) for tomo in tomograms]
+    def aggregate_split(ds, split_root, split):
+        if ds.startswith("04"):
+            tomograms = sorted(glob(os.path.join(split_root, "2024**", "*.h5"), recursive=True))
         else:
-            # This is only the case for 04, which is also nested
-            tomograms = sorted(glob(os.path.join(train_root, "*.h5")))
-            this_test_tomograms = test_tomograms[ds]
+            tomograms = sorted(glob(os.path.join(split_root, "*.h5")))
 
         assert len(tomograms) > 0, ds
         this_condition = conditions[ds]
         this_resolution = resolutions[ds][0]
 
-        for tomo_path in tqdm(tomograms):
+        for tomo_path in tqdm(tomograms, desc=f"Aggregate {split}"):
             fname = os.path.basename(tomo_path)
             with h5py.File(tomo_path, "r") as f:
                 try:
@@ -58,7 +52,16 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
             tomo_vesicles_imod.append(n_vesicles_imod)
             tomo_condition.append(this_condition)
             tomo_resolution.append(this_resolution)
-            tomo_train.append("test" if fname in this_test_tomograms else "train/val")
+            tomo_train.append(split)
+
+    for ds, root in roots.items():
+        print("Aggregate data for", ds)
+        train_root = root["train"]
+        if train_root != "":
+            aggregate_split(ds, train_root, "train/val")
+        test_root = root["test"]
+        if test_root != "":
+            aggregate_split(ds, test_root, "test")
 
     df = pd.DataFrame({
         "tomogram": tomo_names,
@@ -117,19 +120,6 @@ def vesicle_train_data():
         },
     }
 
-    test_tomograms = {
-        "01": ["tomogram-009.h5",  "tomogram-038.h5", "tomogram-049.h5", "tomogram-052.h5", "tomogram-057.h5", "tomogram-060.h5", "tomogram-067.h5", "tomogram-074.h5", "tomogram-076.h5", "tomogram-083.h5",    "tomogram-133.h5", "tomogram-136.h5", "tomogram-145.h5", "tomogram-149.h5", "tomogram-150.h5"],  # noqa
-        "02": ["tomogram-004.h5", "tomogram-008.h5"],
-        "03": ["tomogram-003.h5", "tomogram-004.h5", "tomogram-008.h5",],
-        "04": [],  # all used for test
-        "05": ["tomogram-003.h5", "tomogram-005.h5",],
-        "07": ["tomogram-006.h5", "tomogram-017.h5",],
-        "09": [],  # no test data
-        "10": ["tomogram-001.h5", "tomogram-002.h5", "tomogram-007.h5"],
-        "11": ["tomogram-001.h5 tomogram-007.h5 tomogram-008.h5"],
-        "12": ["tomogram-004.h5", "tomogram-021.h5", "tomogram-022.h5",],
-    }
-
     conditions = {
         "01": single_ax_tem,
         "02": dual_ax_tem,
@@ -156,7 +146,7 @@ def vesicle_train_data():
         "12": (1.554, 1.554, 1.554)
     }
 
-    aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
+    aggregate_vesicle_train_data(roots, conditions, resolutions)
 
 
 def aggregate_az_train_data(roots, test_tomograms, conditions, resolutions):
diff --git a/scripts/data_summary/vesicle_training_data.xlsx b/scripts/data_summary/vesicle_training_data.xlsx
index 8cceb3e101a6d6e2aef5b70072270dce310e3eaa..0f9ee1e82eeb9566bdfd8f76fc9b951258dc404a 100644
GIT binary patch
delta 10819
zcmZ8{bzD?y)Gna{f}kKE!q5zYbT^7f4&6$p2uQ=Az<`2u4l&fw-Gb7fATc!3E#2LH
z_i)bn?sqT$@Z0n3=UMTtwcdC3>_FQDX9874>^qM!Ffed2@_nNSN^tXip?aooH+7?$
znPHs`u7iY(8EoVanIj_+cjn2MX0!+R)J%uAx8%D<eE+s_UrWM%kXnD{l9<;x#ANu6
z(9YaAXM~KEVXHmFKvDItX<BCV&GpLWvPdpae-_yJ7@?{&%rC@f$8U}Bd#xL~nN?}2
z_BCRx2g?RS)MZQq%h0L1%x@rDl5*Lct{pbSF~mi%)ZeW?6WFq`&~i}kIdB~0Vjsz3
z9me&D|12*jq}g$k(erud;ASb-gcg!^^@&4EFG|t*Q&odiLCL!7FAufSs!N4kncbu+
z)KTUQz01sxx?`HQiWl;yNA~L^|2uf7UV?ry!^3+R7|F~S7$iVSDpoMr?-8RDa&v0c
zQV)E(=ZlH)1OtxbdkV`=nRcRTkMcM2|Me+EL+ffI?TLw^Hgt3jw|Hr~LHfmFpfoOH
zH3p-{><q^14BqD}h214J#3*S?W{Dn`)blkFPFP*d)!Fv+9`t6@Xs1+i%IoTK`s%7(
zvJvWdaz4FxwKT0GNE1OrO)oh;)!0-IJKfltBzLE=kBGOIV1hL?c}EcithHWr6%zFs
zHC!Ir&(1b}^f>6cIvNgGIH~H}_V&2B-ao#%@vLt=+uB%JJBx>%whvFbpQK*TwHC(H
zaL!W0gl29|+5`Loni`=62g~bMdo9z$Jpoo#?Wv94*SUx7I|_OdOP=QmH#H*HZp_!0
zn|q^Yd&|o!?XafH_N3BO8tOU`LmH!5NwONx)4tMn`|#1xi0{-N&zkxyQtjitx?^D1
z>eqEDcc`CU?(J`%_VgUx7&g?vPL2)&EN7@-wU<lh*EE7nS4XF3RZpwPni}exJdgH|
zm#eI*BAT`f<4b97cGuQZy{`gjn$CA|Hv%pu16GGyj$qqH)3fzW9-ls)T`z@P1Q=4^
zY_)FmCBQydH8!5@F0b#!i$|R+i5?w8-JP%S7riHYua>q?m*?kRInf_*Zgm?)xa*Xb
zMv#}<d!ISL96h|eGJd;%QqXT`^2oHQho;>eHqlC6adr@1N>UL9l+J|ZUel<q6Y0>*
z7y>Tk!e9o|;ukAM*WT+lzjpE?X5);EvP;_>p=a|tD2lpTuhZ~`)1&>zsZAXjHd=e4
z6v;WcX&LIsyvL~uSgbKm)6}V0$FxF%*=ng+S$Tv&${G~NJOYVW0?lXqFDxALjCI&q
zzpe7<Xqr24<mQK}Xv(B2V4`_EBAR7|WTY^yLsgirxr=gcIYdPlOw)y-`;6bm!l?`(
zvVB{v)C7orlwFt77RULaFZ|W9H7x_~yA5a1hbZS~(0^%hNbKB~w%E&u++t+^VMnjz
z?#ZTolTg$1V&zp$AHs+oPXBE1J+C7}@k%f1mxq`V6G&hHx?2Nd`+3(H+gM_ShvGw#
zzf5e(`lpiMKU++AueXBMB&mqkguBCj%2QL>7U9XpiVqRJCpFYgs|cE2ES#3EC3I$R
z5o_oQ<6q1BW_D%%XO7?`sWN|Nv1>*ei^!P)DmDjTdwJgizPDw@a1V?R+3zpe)29d2
zJQ6`-EVm`P#B{>=9n4)4F?+$E;iaERP~w2KJr^#)5x>k%vV$J=xjV1?Dud=03+JS3
zZxw+X(25oXdk}}w5vYJxvZr3M9ERzKI}yyGHRWLHZ10``+1I>3$A3HS4?EI>0&5-#
zAteA~4w`zO7S!f^k!wi`%8d}^XE=TRC=`sOb4r&af~_vmK92a$?l>w*ySGtW`MJ_g
zN_uQXZfs5YGC?Y9lJjfX01~l32RU&SrE$Ey-pKU0irhG~J-rc#=IS%Cif;&t<5nkd
zc_?6Sd(mNbvy83oi4czh{H+JPb3DE?A&X~O<}Xw|fzqZq$|B&rBr}ssDOnJfo$QsJ
zEYD*0;ZAsN3?N9wuct3rrs)C}N>cA_RPW7-;TFVd*Fbw2&Q^!eBImav7$o-?M2(N4
z5?pRIMbMf9A~nd<KXcd+uO%gBD<fvBKPUv{uZ1=94@E(;vHZ>*BFSq(-&vM6pD#fa
zDA0?OjZP&j?;Faq${d<a)CZ2K;EdtLv*-@%-}B=U9LdV``xf>2QM$fBy1r+6vivi%
z59X**biSMiPWnG*X5d=EQLXVPfAd=}c+g&`RkWEsV5Wz6f#D&XaQtmjp;usX6|l3J
zl2E~BdfaE$`p*+ArVRe2aQroMz>p*CME%e%N7bTdS3kgdKf_j`=2W33QoU^oms*7d
zpox9kd?VSXXm0d+JjQ7+)`>0~onWLodO>5<1w<XqV}8uXD6x<mv5>2GA}D<qSS&sO
zwHjxzF;ElUyjpK7QgbR&6R8X+QmHVb9r;#7VV3`leK<ymiQEXrM6M1b%6B0jNEATp
zkzv{>RAzJ&NYM98&{xGn(!oQR;kOGdkdzMsN&M8Y8S~r!i@)SsNxLlLhWSwnh<h7b
z(_-`u!8+Gg(-VPwu~T&qT7IDL#E_bRpU}hWkn_RJ=1NzqvC93wHcOc9k;wE|9F^yQ
zH!zXPU-P^m_H4X9a2$>i8&TFhZo$v(Bwe>vndE$>I;xYhI>PXS=EBs2Rvaz5O}N;|
z-tncDcl4lB#8Vfe4;+tU#6gsWizdSVB`W6c<rOL)E=}Ay_=~^n%SX2?vkUzUxH_+v
z6y4*jC}VxIRff0x3r60JZ7qfFx;wl;*9{_dN9G6;9mW_B{wgl}<|yB4cB7q%?tUfy
zcv#24I8DMf+GCgLATV*403`;Ez7<Uzvz;H&F_>!;`w;DM$aEl=_|Tc5H1HN}WXxEO
z+qn1^{734%8gF@kyqlp>!uhK)wfL`YSQ!6Y)?m}Ml@O0eGq26m;r{BzQq*H2bgz88
zttbnPhNQRtI@Ql3lJu#)YQ&=d{+hxCsc-<v6ZvK<JX8fD?Y;XoV*wm5Ys3-32<7^p
zEU=0WzfuF`D6wWj?fT-o7{mwB(RewR?hK1F#fQ-j%}H0mkV+g8jr!EqPYKXHQG@P@
z)nc+EV@3bHk&3mYfzd7!i9db)U75M@Koz9ud5LtJDH^zf#gYuEy&A*@!~j=!Z(W@q
z(CA)n&=qeb;N4xw0leW<$@A#_k4gP~w7EY@?P;z`(i^1bcd@yU7OT7^6ifqHu!Ct6
zEw{X-{X3NWd?`vCDvkO`xdY=5e&Nh$n;@-`Zr^;au*w`IPL)POq})Clx^)L6?rSR{
zx(>lt6&uWnX9g^00r5+<4+5g_a5kGCrg_h{#8L#li=Dmqpl|qsZk;#i);ZjC6i4*y
zWXHmitTnperEyk?^FUguRoKxzS70kJTeNp5t2p?`%HY)011t>GoyfK$xz~z8$2Z|o
z6&Ek9kIrrrL;H(`O9_iWAM8Tkqsw5xY0R@qs-kN!Cyv>Oy^j$Gop5x7BoXRe%MKt=
zZ(3Zg61z&{8>HHPa(W!I3hW<)o5s6&4xjOcJO;ZeO5?5G)c%_4NZ>&l0>0*Uc6{?F
zAWbJEuRiHcGDdp+5`(Ya`pSj&Ro<&+{cQyy15$~*uhA4Ke{2kF1-S@x$7+piz8E%{
zNENk@_Bdubkoty$USiZgI|$0NW+>>11I!93*dun4=s`CQWJd&P30QPF{7P^zHxT_a
z?)8ciZ(pOOq1->J3IC`PRJ%Yk-u8hLt&I3M`33-<81fyu(JS8xK*80#)?)KnLI4ES
zoD#NbtqEv7N+HY5LG;KgpBEfX^wb9}kCXS{D;=Rdg)t|hMYDr@*5e2>S*N5AHba*0
zYRyq(cpOUu9r-)6Gg0G@3*xJO7dVj|5Y4*JE$FnEhNdm2e<`rdC%<tyeE}61s6Wl{
zpwqgYI=9Yfj+b9MtvP(QUbZ0V3E@oCQTIo02tk(U4Z+^5#J&d@#;h=Xfu8!)L=Pr3
z^WX3_^>`;UTl^)&lHM754%A1p{zg{@A3EC#q?1GuGv8mPB9-_!8{ZjP^W3)006M4r
z@{Xx{9lI}F{#EP&CiTCH=Hug2@xx$C-ArA9*80<652n`JF>bmH1B%gHk2f|w0~@#b
zHC6fM#u90tuEa{D#YxOkqgmV$_0pS%1zdKzdIB}|r%oPBHMgqz=w&h=ADD7EBsjDd
z+5pGt7_pTy#sRq@{>}_+%*%R1Z`eiy%<^_|3)eStVs(t3muAIsqk99`z|g%hQ>!*W
zfDg;u$cfR>VPUQ$2WpDycFzHJ2;nh}*Z5P>qe{<8OH`Ftc|M~%M-?#YP)S^3k79+W
z9Jmgm3Jc*RwPCm2h~6SVdTv~TpzI91-kfM19VTWz(5)mBkT{`&BH<2wf9uS_+Wf^~
zEfijZEV@rMP%yw5<pV8A(rmAtjelsBwEvd|coOtRb}g>{A6AMS7y2-OLLUZrsPfyF
zSw()vT(Dls=|?4xMJ`gw5|2h_^}(fM#h>U9XQ$77I!ZG{?uuQHFSWEWS_#8XM;KFP
z7|*3G7MjxM->En~yoNF2=y2p>qb!sOUplcXR96Ww>W$g14$OU~$zpqQR*;~i&n5i3
zgP(P7W+HURaOs>Pr^TPpOA@y*`U@v=yz?&nRj7Uy@pf^KT+QC_2?HpnSe&R?hHIaR
z@1n-oDKKh7uesqn1;0flq17c7-fZ-j(yZ~}Th<1e1(QQ9ef%XAu%<bUlfdxg>|<h&
z0ovdZ=dHo+3Jb=TEJ~W8aO1_U_ioT%iw_w_fBSv1;Z2+jjrq~GlE}p)e7c@!XLw8E
zL6c%;QyXYlg2GO0=PaFeUklVVp#89UhxVg*>mrJP9C0D{2czE|17dpybBep0wu3ol
zjg@yl>Ey4=lhmtDoc_`A)yrlSolGOCaV~k<pYb*pHUEUD{_#eTvD5{*G9kB<t6MIE
z@#mhWMo*RoX)1!z7_+ey`ICP!_B2*o?RFQ0q^y+HMXwKDkFgLC)bM!9vQL2nl8uox
z_d$@n$enXCm@l4ziIA`~N^2s8!?$2JNHm&&{M#?MfN!IUv6S<zUaVWLRvK$^F(tB*
zt6?!E>us7nIijEGEBsZAe)06$lRW%MJYrC8ij@4sz4Jz2ydKkA!O4^n-`ZW%sXZ6Z
ztny+PqQfn0l8<c;PJz*dRidCfu5%<rLgKK!upcNX#|w$icnxMk!i&k9tlJXLUmA?f
z#JM1l_RWf1XO^Gw*v--IYfX81f37V{iPD^l+Lmut=Q=}v#!I<{s{U>t{*Cjm7@yNK
zfM%^E?UF$nYmB87DU=D-ocP^So6<OJBhpLfU9{bZ!Cz4B6p(l9EhsvL64?4S@6#dw
zB^EoQXZiSps3Q652jJNcT<JmgnFF5H6x8)Os*?XGjn*OucLgxo;=p%A5&cU3=+LL9
zt;F;2SAIe2*6{oCu>7`<w{S>A0&WAWyP(1?Y+fEtV@2*#FuPRkNFnaqWIMZ5ZM+&=
zEmo4>c+cBkoP2+88^#WRGKhZ5|0LyfNq?9LfpQJJ<*SLovvFMMllPfrpVhSah0P?!
zxXjgVx@%E2IW{A?&aVCNlyq)WJ>x~@Ut1<@{x$&RB7_&;yg2z7TxmE56swNMi7=_Q
zf0(-pX)XyI?(iJaXka3RLx#>vtb3{%ES<wvXIAscO01uS$-X!~$wV0tKvZGfg)Kg?
zLc!`r2tsb9<~pvkWq-Ua8ML+GX|HR4t-0Ym3{dL3tmhB{_>L)}-`(%uoS5s93NUl7
z3lYxwC7A3_6OhzKI5^=(Kd-wWc;Gr`dlI{j&&hWM)_(xEw?Xv7EWV(5<I^j)x}?;b
z*vefWE}QYjK@Nh8TU9G#l;F|XvVqCH?Q1^TD{yAQ@8LaZGkp_#1lILev5%tdf4OQ1
z1Hxu-BMTCGe2s?rTS=e4kg}DfBHSZ!32!xV5+|f=prgH#%B*?2af*qFmkr_)rc$#`
zU2sC^C$-4f<HF>&x;d7Wi{~i<=$n_g?%EQ{dakA}I3x5k(Xd<{RU*63c_PEi7r$o0
zCm>~^b;<TMt{QE?=)*_}n+fmMN@slUR=;XOU6Ar6o`|e|o#ic7AUv6L_yn8L#r5Z=
zLmT&m@P`1@cTwBJuRezZACF10sv`USns!BD5X_bqiU6w>A!_jGe%Sye7Jbd--ALq8
zTT*4@AZAy~T`Q^HJuyhhnn|YMovinXEt&;F92WR}EsIIiIn6%3YJW<$ZlT!9yIHY(
zJwCXjEsK-X5~2Qh{lbj_D1HgsLm3~<p5zC#UP?nRo6oq;Mgs62d=eJE)1mce3Py>g
zt{_YS$||k-%PjZIdiI)YPj!Lnre*Odt-!}uFBDj4x(^c?!FpC;6CZ>p&TqfA!elC}
zMON$W2bgQ{<8GIL@Ikdt^18G+J~;G9$rjY?wUfuz-vJR3N+GX<p@L0gLdf7A4_y5*
zM<9#wm%xC22PdEY)radznc5a1QLsNEp+?;4S=bVRP=W`{IHwT8)W&gZgONtsjD^c5
zB<do6{R6EA(40+xlQ?1+xQsYpZR{_|aCim56qOcp@`q#lkp-yER#vi#T7F8^<`kb7
ziR6-oV9H91xuZdC091ac<;K;nrQ=vngs1cSaoOwNEjmKu%$l%q`lnJ}C4pLcX2hDU
zj-y2mG7wA~X~T)1ix}yP57#F#GrGXeG=KK6ALucs=Q6`ApZ2xR1K8%niQ;Un)^7-Z
z8jx)Mv+hS4KS5C{JHHk&lIMSp7iVg<nr^;#73;&5U0wANrQdi5A^e%|Qb}QPSi`Vh
zYr!4C<z?V`^$PZcF~{c>J-#)BkoKv#w3ou0VRkoV$KW0X{CL1kd2{wK6s9yhAWsES
zSo;NJx4Mz>kXSj0#W=P@{Sh2H(2__kHF3msi|ih0F7IkImm&nSN;)nWK}A~$^*bM`
zED#-AXgPGXl4cxo`N>_LPI@uyvV?$Qsih`syUIF0gE_hyOldGdNHl)(N43&}F9KOd
zr9<-i_|-<)3lqGCN1158GKK=EaFj}V%OXa`BHcR4P{DS!u5>xxdlMuAYdgpj7X-$6
zXhEbl8@{e*zszOCM{2b939RY*^F>rd8bC^)f3!ZSRJ=~;wvf@t@!rNH1A)a6?1=@?
zU=KYeYWrTRjf%TIS0C*OlzX?~1E;5X5dcJu{ITt<KID4v{%a|);Bj<w*$6DlV9!e`
zFb=NApV~f%+9+uAO=KfSFA<((>tim>#laOWVV@58h}YL(Ed$Z7T8{f$3eD==cg0B0
zHiaRC5BIZNQ*%wV+{+?(`J<E=bmt0S2&^B$p8L0wowa@1fF$8DZ&sC8ziEJjHc$$$
zC7^_GFX@onqKhI@*J;z)huV%)pTOOu8b1x9B2s|p=u~u1YM!sxdT<ZYvW5%N&y^yu
z?uB@+-fGs?_89}IY#VB<<v`|uA!HgqEu#980UAA;#s<@N-RghUx>t!cSAoEi3Gtly
zkM<78$sR{QBgYw>dG0$9c}TrW1Uf4v%WdxlUd6JFdwUNt?Ja%!7u5fX!W2Q~EVo65
zi}ZaIFyrx(PwpnS3}N~EqhrGYL6x?Evi?A@mIp5)uV~UYA|`Tr{2>GnX7He^GIXOA
zmJ?!qKG`Y>t3F*;S(@LOHON|;KR!LDzu9U(T;8vuku<DtY&Z)%boj(C1y?xP9p+*G
zI<YN2T(HbHF5Ys9O)5xNHn0;}UHqwfuEb{XN9~lG{daBt6<eK=Qye})lL18k{XO2Q
zOaDnp!WQhRSL426;#7D^uzgjrps->}yrcr*B9y1%84jzhRB)y&y>Zg9aSLc7kiBNi
z&Aw~IJ3NHj_x<Gyf2CDt7Zo;*Hvu6xPk-23*PdPngw(IR?QD$bV6U5U+u>5<SMi#7
zW~fFI4YDIunVzK1-#V|H`|u%4TmOAs6;hKr@>s1?Qq_?}iuwV$FKjaM#Gz%BDQfbs
z)8tinmOu)XHp4dH#RsYBOmYTCN&u~T7B#u*G+BJE(;(jp={2s;0BcHq<RkuWFsttQ
zoj6~K<NI_b=9Wf6uo^8o5!}=-F4{CA{_Mcytce6X=ZMRDIDSmG+|hFW0ndI?nigb&
zH6emkY4FYN<QdGwRTj_oRZ#!XG1-L^Z`<>CrlCG=5lycyY#LAJE<##L`bM{Xf2|V|
zjKvVEp%S}-k|<h93)t9Lcqb%SHARu}`cbM&Uj(20P4rV3jd`mJlWpC*uSJ|U@yfR^
z&&?CFhCXxu9)><#&H8Qfc?lXL%hSEa*KDEI{KOBFfI$;eZFpGqoRyC?UIXBxyotmc
z-U1Jzf(9u`8nN^~eu`KTtumxttHa&*57Q>-Z`@VacJVYx`V7TWiIB*Z#L!%EoxZ*M
z{m(GI4_{in5Ji5}k5cm>Mgpq*+^I63zBJS187pCFzx{plT`p+X8F$2R2}y)>T1g}*
z^vZ(?wm;XbcriS(m8ur@Np(wW<Lqy~YY8fED9gKK_VXOn|2^Q+`>TIkdMZvM7S;>6
zG=ZbR#u}`Xr|sK0u-cUQ<~VJlaNywA;3^R4+5_(6U$Ty23e|d+E}@!vBcQyo`Dj%p
zoy>|-ru09C39VCwT!!hyPeEY}Ot6AJdb+^(RWd3iGFG1yrL*0Z5#y5KI(dFFvNs*M
z)<x~J4%9@cmQRi)9*kcSm)Cqu0dYP`DzcuTYje1(l;_G9BmK^UsB*efyrwgSo$FDe
z#O~zOCbyvp$plCo{OIlY7rUwqm68lA9msF64TmK@dHZu)se#NjMpK}|Ct=0g@WSnJ
zFiClS%J%ZAM7YkF-w#<Dh2KA9qsoK8rp~SsqE_?*u5#5Wv&~ST`aSt_q-{S9QPjPs
z7tlGVhy|MtQo4p7nJnP2I!GO5>M;3EGQV((z=#&bGIm$G!)&wss)6?R-u<K}m}wqN
zrjtwncrRM?sd1gs@0_9yn2MMpOG!CiZcpfkxInwUikM;{yti($_coj4$5=OdJ_^zU
z{+gQm+=~yi8~XxOEL~mZoga(Rw(oOhlIw1TnbKAO;R7~{7Ukc|;lyLvB;}~YOuZjV
zfWM(6o?nzD2>sy+!J-7K&Pzl7I2H4{t!vP)Z2H0#JDo68dzu4Jren5uDrqjq<?e6Z
zPOVRlgxJ<xCT$5`yvh<7{&0%$=c^Y~LcrlMbjNw=r@YfkAqUG@U%23;Gr)ZPDRnD{
zBN==VtG@TSPau8DYf?#K&ks%!_PqO|UchzEG+cGW(Xf)-Flqc`ViB$^lrT{Jr>@Uf
zgIxJ|2!=YxS@mCGV-u*bFGtzGbKq%lyz7=)n!_RXbmtw2Mq7%(lfPo85Y^t~z>~pJ
z61dGfIOJa;j8hssIQLQyVQo1Jfk$}FnaeEPUy{I4GJtkly6Lv!=(CbiCyt-=FD@9O
z?U`2h32Bfcw-(;EGRrVLF1Jrc*%Lbf-or?_1*I46Xz@-mSB3ME(tY(Rgh=#+0qJ`F
zwT3P$L&dpV^Dt_G@yL(g^0=6rGLsqNHcQpr>akHdpnI{AC76(enbXM8a4+7U)AHC0
zxZxRal5jE<x8oa;$(oVmFRj-aUD}CLZEbvRX~yOcY#_ZCxnXS+frs*emcH^Zqj*Ai
zcQY~XMI53y$?3#{qlV|Vm~Wm6Q(^5fXlK1v2orqC<@}q7Bp?Mr^<t4~_0PMaFu6XV
zr2OGdZg}In=106ZRNQBDUr`bnokWB5^KFTNE(UdUZ(1FD6?ILQeVD)zQryBXI)wYP
z2XvDnqkEU@d)8}RW-Q5a5W>c=V3e5}+GAxGYqZBiw1O31&TC!FUy%I(;1HCV`u{@1
z=>%6Wv`4cU<SV$^x-rcl)JdjJ7*WnyikamC^!zAW!iE7y5qn;It8OZXwnaKt^hseN
zk<x}F+I7n#Y1q}VR~2fu>>~)KmhZehVZfGwRDf4L<&-F{*jdhnCtz<Qpo}ccIKa6C
z;4%C`N?0aBTG#4(*fzPHe^QgEFfO6D{5Q^Kc2ej3kmtdbFTvh{ewD`$c+TY1l?iG0
zv-kypKvXEI0?m`dYr2~0crOSOJn31OFZ^Uw^niykQUxXAO5bWZQosrRFj>oEah|iE
zO8=L6{BF!H3rGR6F&Rg~xI1bQTTuA+R5PVZ8Fl=NFGH?%&3}*0mZ!oVKCb(s7444R
z&T+>OQ}J7AS9$&2N7M<zf1`qOubrHH!kG(dxU(|BnLT>NBFa|ZElna47>*G_^FPg#
zf6+{XV3b~sh}zwj$Z2nfA)?T{EPp}EaHttivNw~#C$k9^<<w~EBew4pLlNbA?@huJ
z`ncN{(PtO=lNz)2+T4f!;R@Z&AYR!iZZ(CII<w^o&>;Of{7=D$9~xz>jq@ai&(;D(
z!K`cE?%n!zW^bf`1MZ~J#LI?8j+tOTam#SnnAto-<197r=rQ7+TB>AD$=l7486f&g
zeX?tW@O(s&dhIL?@8~sxOf5AAI4qSfh=ebc%RgIV;gSzicnTcv#h7}+zT<t50F{=6
ze0eYzoq)}eAcj4;6P;ia4++V$kN!yCaCuj4acqyRh{yRSE8NQXqL`<`Dt7!`krGU9
zB&8c+XhepIClBK4Ts#}~&g|Z#un>4^gRSlw;xSVQZM%r<AI40DAicLvP~AusOQ6=$
z@wR!5aPu`3cO5y+U4Me%xkeXufF>NRDK~7eRRysl{AHh@I*geFL3(Xf0YoJ<k<RXm
z_1yvDa{@dqkOOLQ95(75-@RF54%j2fC<Ynf7Dx3(a-q60;SeOnx*$N42WVhAt<e(4
zyxkWSubSz3W!ZSt)KA`;TlUdc4Cqou7KkETdF;~V4^Dtulchp~q5wz-fJu=~9hIDj
z!fI7TQo8g!Y{G=qd#xx;DP+dBb4Sto(?J+Nh!aXl94ulS8T3IlnTmFiWnJP6#9$QZ
zp4*&Yx$7@(NjPSfLH?K-2g4({wvW1^z{aRh$I)|7eGF(Maq!2Nj!r(2xp^0x+^*A}
zTxNu%G<%BR$F4s?yqOu>?(N-3(c8GQ5$v=LNr@PEFF{=G-OutadblgB!pC9niXO-f
ze-AmTK(>LUq*QUbxq0uedpmb;CYYgf@@DeH+Ewp79M7hks;q6XO60s4;DwF93)Oz3
zg(BJLKaZ{^WVZ!clMHUOx?0uRjbyjX<jz{KO&f{-jjim*WJ#!Nx=(%Jt=PS(XDaaY
zmB_cnpax5THVUaWS%&#gf=i`O>P#&A9sv0|7|C1@AMc(jgi7>J>y4Q#`+zJLHik<x
z#6`Ph%<QBKwu3Zmj)>AAkG&CRRFV*7Lau#3KEV-7#tr2*&&*m55d~b0ZqINQFnWS0
zpeKmB!bBsuWFGoC9vE4V%pi}0ui6mfylu98BVfi2m~v)<j^<wI3NxO2cef?nclaYP
z@AfMJO_?ksu}Uit=NrgEyIh{NKpX*^jG2>9RBld9xd6kFWrkOimQlPdZUt9Ew=F$|
zZfKi1&A|GaHSl~Ls6ZLstAf%H8QkW-JQXJ9#{5911d6Evo$?4(3-^M*G9ov^B(scz
zB{a}sZw#PEJB!T+V6vlZHpDYZMgNj3b=U}eCSa2@0|qQ}DPWS`D<*&<HUCuln;cZr
z@`vXw1}uFEfYI^e+4ZffJ0=@H{t%PU1H#7t1AGNtv<4)V<VL>RPg}3ZUq<6u^hkvJ
z9sys4G4^ACDS0oe0roERoAzFi8`u31RCiq1E%@*j1ZTxmh?}ano;h;*@n7)XBg;bX
zA|xYIKv9eAwIClCm45n36xBbx+4V#>strAqbbz5Ot)TOE=b_j19p|vu)iPwqW{2@v
z!sD+s3ByZ1@Qp}iZNzD+2(1gHXut8(GdH1x$-v|jfgwVrH;wLG&{wNQD=jLd<!h)k
znVZq!XqGTwRBag8F#Td?>&DY{4B{=#Lw3KiZPu=GDmVkV$t<7*lA14X!;7bGaDtIh
zj;qtr+#7OTzz39o@V8o^L!+XH&BU#W7qAJ$g3J8bY^zr}6`g^uVJM`80i%m$+Lpk~
zB{}(Z$nIyh&2RsLnT3EZ9?MRAHg&!BC{z3dy%;Ti-SZEDa=K`<3`MtQ55&5q6M-i%
zA7ATAk_l){;YO7$0~>;@t{~7ne`MFj+Y`QdO3PQ?m-3ya$tpxgGjJQtkRx?wtTYcy
zBC3c?Cc>ZnaVjop%rIh;L;+a@{LbK~Iqk|XNKcw-Vyo@fWM;DRJM3ZSd-PXmb70}X
zS>_d_VXTcmF`97Swl{-=DCqz$u;8fo=mj6<$7xwV#o7R}C`9HaeJH4RJ0l9*-|}lS
zk@Mmg!f44aG4NjZ^ajALCFqq2|L#3L`k*Ldu>TwS$|ySA=2>%edD*Y@n{Xh9tI+o!
zs}3?agsIs;uJUhKo;Q1xmqnyE`3J(AJls8DN9X(J<L){)we?N4XAX&x+Jj~dM01le
z?pvafthO_&dGp=!9o7OIhweTOPLYF~PV0k11!a0r;Fr0-s(!FiQ<Hcp7zr*j4huU+
zwuC|P{UmW~kz_T#`+7Y&eo+_vOgJpX0AN>qKNVU8Jn{00c2Hv5eEFo>-pnafT>=_Y
zT?b||7*l?`A!Jw2+6*o)TT5#K`N6*=M|b6`SZp*`oC&Q7rK}qGaRR1Z!r0>g=o<jp
zSc&`F!MI-c4nJ?cSANI#p(9He{7X!*V!l1uTXF$=LM=z9B93N~@;hRWJj5RWtb6eq
zvzk9RYz2C?9UDdQjzDFyi+3ME2%Vl9D0`7i)H>w%F%|*guI~Ei(3b`qJ4M0^TP{LL
z3Y*(HA?>K@I^5ac#Jft1*#O7X$pV@hlmX+YpRfYeAM2_S^N1OzcXTlzsInG2cO@iW
z8;nuC$!dYfckKGa)0aj9YSGUDHPXUnx5|t#a`{KNMy<rVT8kFMkZ=>$62cu`y&?ae
z$yJfib#+~~IG<K}&R8_5u=(PW%yn+K1JkZb4qu_!uKdmvnMchXAXfbcuo<fc;DNEd
z!G=)7xvt-VDV`ALej*+ihr5P8xz5Hc+J8&+wZEfiw1X)dCbNv8;IBnmCY{gD?sf_V
zWhu0(jv{;cxHfIXWtMDtKe>eiQCfe!u0w0f4EzR<!=N{0=c4nyf#;NHTUR~P1M0p7
z2_aE4=ZS}j?;dT3GW~`1c2F_`ypG|SDG<H-+wWo4`lOP750r3sd_);`B7ZN^((6E-
z-2>%@E4v(HMa{$=_vde4w%b~|TKebhw;OMdi{tf{>57d76(YTWQfi{Bnud=T+iRRB
zf;3Iu*J-TY^))cs<F$yL@Ob<1stC#5hc|1>%hzk1oKL7Zb|-tb`cgUe*JuCtYlIc<
zZ2eeR`Mz*@&K2Na1rOI-O!Q?0_X7ITg$Htb*~}~UKd)NC@9_zuZ;|AaB>aEhHiU=r
z35laWIysKO%f-ULu*1i|cmTYb_}`xo<|QWv2Go|**~P=w!kGhVXRE4+iS-!!zgIfp
znEX^&y*zLlehRb+5#Xwd7p}_>qD9N{ZY<Om16miD7#L6g(**weU_R!Dr*cyf{(oXk
S0R4;~KF!Z`@3kQM{{I8%H}%^9

delta 9898
zcmZvCbyQT}_cj(t!;lgpNXXDJ0t2ETNO!liG!g?yT*aZwp&RK&LTW&fRAK;WB$RMy
z6p$8p@9_E7Z>@L!VcmIV@BKV`?{oG!tefT;?o6Voa_u@b9v<FJykrE5r0Qle!q>>g
z?yelFg9}*y;GIBArj^{!seXr5h_H)KxwX=G^htzPOCuMb2|+O9=c9|i)A|KF9e=Lm
zJpLu5`T98w-4NAr<c(n8y?Q78E}S_^&Wn5Yu-!p<$Q|z;Mu~;O*J%Fr;}QLYP^Hw<
z?o_kH30~z3%5O$bNuNfMNk)Dm!WT;xY&x><%;a4L=rckXLUZZM(o`6QeEnYpq@@gH
z@W*eH*eM&|Rehuus5vTsF4X|}Nb}S7z~IUjRE@&xplPtG*QtJb`?hemiup?P&(`@G
z<=N+2lfM!+6fV?OU(9S~0QB#!ax6@?m1_L2cweJ5)6BaR_;`4kTzGg?U`py%uJGYQ
za0!uR>dxn3$HQAQy@E%Nmmna{02F4Kc`|)Q$zKq?{zY3{*2YYI*SVMIm#es}lb;U@
zC4~eV<yAs{Jf(S{4Nt8`glEMDOPwvp%~hA6ATrljR=)o6?-ur8rN!s-(peqwvF+Ub
zaDUI|@Q~%=Xt8^4!{_s1ijT!yfK-rg_+Ky2)AQZ)1q-+DADd2n{aXX}xPi5^{iqk>
z0|N6F=7GNF>pzPx4$ij9Q^eY4Ei3}XgMib`?Uch`0&W+-hCXj(eE%3^o_%r7avrpQ
zFn3mb@q2N+KQ6^B?jh5!i;bnn9i43-kF-}X<EKkuE#(>HL#}ZT1JC|y{EYif8n&i0
zSUmLiaD1mxa%aUp4#;A0a}5H5{-yx_d;5Ex)&`$9mNrg5v!#$<9PJ-rc1})eCLRl1
zoHlRnoz=#r?GCl2Tx?Ul3d09lTP1@$J<lE*xNUzAJPQas-^A<`_ghr`x(M2vv53pL
z(9i336Zj=y@uBs@@z(U#lEvY8|709+<h8E>oQ1_%1o|TPJI}Tr|5*nXUL3yLcANj+
z`r$*s#cuc6V}ZDft?rGXG~htQ;(LIv=l)U31Jp@O&I|w6mge&t9&P)cK(pk>X5XXu
z%rAeVa*j84w%7OfcNX_C0xa7C{O{grBpz*NLJWSn&95>D++GEaEi~Ky3Y1*=`;dt&
z?A^Rt@i~j;I=KPMygADS@Vgi=pL=ln+2S0re$n2h%>wYXeS})5jUMfGzl#?^c%S}e
z^xxU|$t?|_<Qt0?eT2n_EP^BZeuwl?=x{#@(WcT3eMyA)sfCiCeEe7*f2Gp_x<m)H
zRxP|rqpYTR>l$<_NkN|0EG0COgdfC4>7Y8Qws_FQ^tZw{bgKcLxJc-WXKlQ~=)_S&
ztbiewzW=7-J=iN-vg)S>w`q;M%o(A=;_WfE$FjB&rK*<!lm8WPWkeIz{#;7U_Dt3`
zrgY#EoA@_&z{>ubN^|<7=Cl=*STlF*dJ^B;U^72mo3Hg}RbPGxXBBCawLAl)x=i(d
z1UC~3=6I%&h+%WJY*!Al%k#Yaxn;E!tWL<oWM7bZinuFZCt&nkD5(uQkg4q0!1qGb
zm{`@DC{9|shN>6%Gz+D~LM7`pIGL;5+qhfS`D!-#*nbO(z2<^^oK1~VNP6JRce^k;
zVz4Smg%=<ud@z6+j<_x;h8+R4pp&0aU;VC#x7_5fA>wC06coG0HPSwtO01B?JRqPF
zLQF#0#~-Rv5lXx!J%ITU@dq?p)Upk0?iT4ZGg+F}dmdr?ob^h0^DQZqAhyR;O8G2Z
zATCn>WnI(~q<l#hP65(q3mk}PcGorXU(#AYWv{g;^Vg{I-y_I*3nZwpCpHVw#qqYJ
zStLV7e$SP*=vUR~^E0abdLY9ysmSALy-kt>HF0{Z5JoIDvQd@nRFTXx7$hyjH>t?y
zY0ZDzXhKzY^`2B+S?aCWZV!%{=;mAE3(M}`H>-JD7{rB@6HL5Q`e4a@Nd>}KyB~7e
zX<#D1)LQ#be$`6l1gcuJt7^2X3KE4t-P(OP-D<Rr<h(RyHwUW{ohlM}xNQe8pCS^4
z#jqVHTZ6p`dCaZOKk{?l9x>h?99g(%D0NV`xG`5Fl?6QDW$8Qg#xldkvNHUZr-(0G
z7O31;*GMF+qBAMm@W0VsUq%DK;_zV`IXW2w{SdjP*14ui_pHI1;*ydQ3+a18WQW|X
zCjQAv+n0m{`I{d<kp%@<oZOZYheo<1(xkeC{qCe)vQWjbaGdaUT(Gg(t?plDP42x`
zMdKT+dYdinGX2w!d2Z<WUaJ=g523D6zpcn+?FxjdHznt}357tNLRa9b33Q!RiD8D^
z<hXqN!!SxAdG6cF@f#xpp-pAK@TA>G%AhWHwnCiCMD6=L&utQu7fD*QZ(9{gz0vNO
zCCOCe2J6CPgm6+=ffMELSx)vou1Fvl=}paeMYU$*IuLTVKV2v}a-=-n5-<R<cmnN9
ztn?pDKR<q7&mJP#qI%mZP3kQq;NJ>-ZKfTI5MD=k4=bP3^;z>KDvSiDW*FU|C$x1P
zFe51GbFHKwvor>Twzd7D3wISA!PfI-E5#My&q>t5On%3EFZ<d0U3#aVlm>64Wq9IH
zfIgTs`1?<bBFub+ghTwvpAC8A3h;5_A^o_elU!LJSh7(*=(R!FA7@=76@R;HmL%K#
zhR1ivvesPMjGryDbj3^ilzwa86a{T$YDPO%FUY3*ZvTYkvz?Oi!9PpiZtT$$V^N~4
zSi9eH!C>`vO5}U7%^dmf&k?}vf59J@;KC7AKipJ?Dz?Suwso)6$QU=yEFNceuUD$!
zu~y(X4gUHPf7)k5VT}^PoI7cs6@_u@pfCc%<{CIo>N+sWpH>Wl1%yIkb@J@-1GA|u
zN^%&!#zR}0Lb|n4{?uYaL5-$BEMc%85P4k$Y;5l0i<6v%FRRywDjX*Jbt+Q0w52Yq
z1xg9Y?@LC&8j0N84YCo<Qr#YYZ0)a`Z^2QfxYX5dx<7N1gqjl2N2<4&W?MB&p?<W3
z8B*lOaneJ$hULLkqjWiITm+K)sUD2tGCSKm+32m&YkT*mH_$X(2_{d6H-#%5zw<wK
zAI9>Sol6DhUm!k6b-(cIQ8=OcvrD}OvMH=9aSQWvE-mLr+FvrYPhYu=CxDChpxgPN
zd!5@Z^;OP4mQOHy*FQ+fzqrhLJ#A=g^oV%i{P-?Y`OjNoWDjKkz9Ez`wrBQ3XW8ad
z-_KjLj?dtr4C`j81T5$_>rw`@-o(JGQo)$_STlj48uM(W-KV&1HwCvuvmf36&g@-u
z<6gGkWWQ%ciajoIxm1@_!8R|Xhbr@EteEi))fi_h{d($(uU?Q0?su`-4_pbTPRf8k
zSi67dgcSMWHf-(7u1l)Krogh!9Ub6{6;Xz~0_+4rVvo6`w)1|q5*sy497I&TQ<k*<
z$E~>{nUyQP#--c1<W6j3Qo32TPrcM<M?Y4H%WWAH2yV-K8J*r<O~J<h!en~)uGdQe
z3XYelsm*WC@3_IKnPMg&P-AYsI!=nU8%&J&1MYg+P_UPq^^kC>Pao{3Z|p&cu_1Eg
z|LJAw{Ut{?3_RKw(cRh=>Bl+^_J$P73Kz1h!su05=X+_NX^uUJ3O32+((1C6OKQ9S
zHjPEl1Mc?b2uy2Nx*rG55RS6LHPs85(Bs`_N~G`PQ_MsJYV6FF4#3Q56r|xM1=I=O
z6fQMdX-|hy;Y~M4U%f29D6af`>7?1WwpAJ8#JTxtF+8_v4ioDZJ31Fn)T4BC2C(d0
zfqLH%b(?v95%PP=IuF3b3Q$GZfvYuNx5lrfu(O27=WA<Mo}awl(e<yba(Vxx2Ae3#
zT*Nwwj>5t~r=_w37$ymNoY?~}%_Qb;DF8g+j#SGtbL+3I@lf3Epm<3}58()y!cCv1
z`_0MkF+^cwbT2o6OQ);016!3wA!3?9$VOG#q@jpg0THgkBj8$9QAmQ-J%CFMr+^Bb
zc+<tSLvXvwzj>bRuIe%V*_!^xCuis7l*fQuRhW9@X|zJ9LSCH{7B(@!=wOGN@}l7K
z9zogCd0Oa4r7kdcqj|6}{Q*X95d0KDEdpFlHH>a(WbB=r2G6@ZDpJIxtdF_Fc*BZ$
z51H#KChs){Ylb+q_o4fL!jk(Ld%<Zq9I*`c1?ZK_bM&joaPfk3i{ZRgO)JW<XLdko
z3}px|sJfDRLs#<8nhS*UWo|9{^H!CuD1C4^WZ@<{KeN%qCR*z&kpS?d(Tv+ISDXKt
zKrn8SYJ=q$udR@n(vc@<;&5xxoR`ft9k#uWn>e|+iSr)f>edTY)!1{H>@_J#AV$Yr
zuEhl0TI7{X&xqCjdfnatgyM<QV=tFmJZ`x`oydd@ER9#+Z$bTt^*m&LQQ~Ci3WNGb
zo;RM9-mCQgj!0?^fi}OrL5MphK*8FB#{?|9BXgn2ZdH7vCI_7t8`x@@iX5N-CR_25
zSd5+*lKW8Y2!&*8u4e7kVN#M+iI>)Z=p8&YRZ0vE{XYKs#c}Y;&JZiheNMl45hZ7}
zku|kE4c+_ajL_Hvc7g>Bz~kmm4~aLX*k$U1;XI`O9&2xholG1V`H_5CuvSI-bk~gp
zooLy7$Tr5lU;^4WB*=Vwb@=dgHB_Q&F|E>6P@?;u7tg|cz>~h1cOO{R1o^i7DDDSq
z@#ansU0M#|ESLM9F;^WufPCZ#GgPW8C*~}s51HVY%@|SydVd~-gpiGvqRE971S9uF
zGg_W{L~5;CbbL6D9vi5$W$!FtWEqQnO5N{>@NHkF&L8dqS>EE4zFSzi)65yIN>+om
z%b-(L7b5zg+9Z~P+C8m1yvtFulg&2^VqfeUFELUz5c`V#9%tkGvN?{86wjLAQ%ACZ
z22hI^{Qyr=4NuRqj$A!#XZl?Lo^@8tpg?A(6}8C~#EUD?ESF4(Ug|BvEasUjwC{lh
zkb|bZ5Bw^vVa1MQ{)-c*2|);4^58n}>ebF(?kBl}+wX7ahG1rdn6bZ5!_ze)Ph~{t
z)j@vVh+>JiP*ZzirEUiqD-_wQ8u_Pyi~)#g5hWptRo+4a7Gv2vuTB3@%J?+JRsB|i
zgbPfRa?2=Glcs8dRKp_mz8~EhE&a?5TCWI@N1>*Z4;+8J0}(9u*`ZJq)$ppu?Ws)N
zrNePVvG-g4Rt6s_=A38FJagBqPd^1`3uR{JQLk^Q6UdkybzM`}2mtb3YBgtT;-1R<
z0?j12!XjISlr`xZ8abGYRx*A=cx%$0$`Iee(Wmjr$$87~Y|pOeo97>2P0d?V{8WYu
z2SxHVNj_+@U2xR*y-%0Ddd%5+L&cfja9BGuK_^ibGq)m8LOf%=njxN*WQOd08exV{
zY~1he5vg5*22P{#y)FHeXMQD)g{;|3u+8GDIExt$%Z1=f$LChWLDNbbOi1r;?#-EY
z$SE1Em(HbhpF<<^^$u^*QP-0W;xfG47VFdY;6c_)f!opq9YxjC3}aoy(o4wgShJh`
zpN!YI(UN~yy<Y5bJD9oPqhoYf=Rbr1+p%*uiN94`vS~RbA<XDNkQ6Vy?|XOCpaU`k
zq4hFZSSBePP90vu6c!LS+O|0@9z{_dAUO`Wc%Pv=dbyQzcem^AjfG%XXW*(uo$^i8
z$x}tuZ}*9Cwd-pcDy`;rZuL(W&q_5#npj3`5fQ_yFQc`dG7)<NmZt}5A<xZ~3~7_6
z>Z;$8664I!Ekg~Obke!^_CwYvM%h}4Rh-QYhh34loXpLwSWHpRM-vV2q&-1*bX9lp
zhu#QO5SNhW9-9k%5~^nDxtkZeM4_H~D|rf2{r2=GSMi;Y+LobVO}Z+CS?R~$%A;-_
z<JDaap*PI#130_Hxxm3twS}+JKi+Tluw=fOBlLMZ`=-11`?-f7v*1Wx3xjN6yh#!>
zhRlkYh4w`n&tFoNjvo|0w$D9OON7Klktcsiy{FrGdB0koWD&mZjCU9B3N*^h{GoGI
zI#AXmrJje=i>a_dP=c2*<?(E0cQ2tV?9Lcho!+z~FnH#;$kXe*;AseX64pczkwAqE
zeW{t#rEgp8xIGG$&J%>_hjA=;hclwYC1jqF+}tO0)<cM{h{2wJBqw~8RV~>yr<j>}
zNAw2rq^q9aNgd;|c*LMYe>)ku#OhYxzI;7b0*qDRJ#F>tv9a%{rhXF$ZLyZKs;X0r
zpaHaZ?Ae6FfI>mC&AWpq1qP@eL#kiak^8xK!|qAEdKN>m@8GECV~CM-2{bdU<(nl}
z={O(Ok~>RB*F2PZRidF=j$R_fl&mzb>9aFYsB~OKu^go`-F4)p;LDRH?kcKcPAe66
zspRXxuVD|MIDQoeisxV(D=ycY`??)D227e!poYf@#pHq(tv#-4f$<2P^n8TvD@7-}
zOW78}4I#<lf4_WQzMjV_J62p#&$VcGFuA%hvg0A&#FmWYV{@zO20<a&F|JCzIfwf2
zKk1a|EwqE-9F{R({GEhA4UwP*Czqh6DVFqF|Mu@`Zzc$R<mQc{Kqj7mT#1HBIXXWp
zZhX@0H%j2WcLy6vY{CkpQc!uy3-O-QX=HOb4Gu>(s86x)el%89zZ3c1J4Y?A%0#TL
zNWa^__&IwtM5Tip<uh*O5v<8xd724YJ+DD)3CtA<psH1P3ESl6b)$NP@+9IX0)ryd
z%p^m{V<Y}_(IrQ401IhfOdN$IWbT1jHi%hsmo8DoJhjiQSTQu(3kBm*L?*(-uvFSJ
zOymcW1u<V7`iy<Kl97IFzlw3n6^#zlxYp6H&5lauXdFl@1vPmI3FPP1aPm;q+n$<Z
z-W!fZGP;>=a<T?os?>f0e4Jc2<<aONIju=%Alksd(`=R<8cnUzp@eE2!6|32IL)Na
zKqFRj7n^RH_;M#B)7aed!C<+H28U_f>*#S`ZP+BoOd;D}YT^@GvOTy9-Gw<n_V^+H
z`A%oI7{I`PelRC*B;XqNPGBAQYdCLuez5d8YO+y`g_4z@M#e+?;Pyx3mGQTafe(SL
zEf3YyvLuBW>!2&wNX2p+z7D@>a#?Zjgw1?$A#TnkQgE%sYH$602t4wnB8$F}MfT9M
zyv_6U)H2`)f`0H*4{Z!m&4@j_t7kA8sW#5kXr*7jL%`rHo05Nx?P<nPduTbD(uA-k
z*HpoIT`_(<^V3?u+jh<_IQ%Y|CotU;b-{FK;m~|^9v1at@Ll2qW|RqOb8fhT>pFD;
z)%W|6Vq?f4h0ZRW!v+OM`@k9ZpJLjKvW_JiC}V4R>zV}u23Ohf{3JGa#tn&CcbK>b
z<73y715^gXqv#COoxiU_GloxAYZPaTJew9y+?Vf%%(|1joK5`OSS<9kH5cGqerPNJ
zsjmwvY%*sm#jwJdXz##2MB54yAs-IAQ@y_W$3|-Z-r0GW_F#J`&lk!Wh*f9JRV$q^
z%5U7v(PG10f>*K_u6#49-P+XZfW$(kj3BwCO9XWO*7?5I)S@Ye+jcC5&y59wj?Nd|
zCk{6@U$sNXNTzHdTD|ET5gEX(ZWwey$Y{7<D;K`v*E^AbbR;FXwkfVuqKiSNPD;_c
zX-4GYoxq{GD}V8(Lm^rr*YFTIa=Is~vYwM2zOR+P0DpXSOL{ytvnr1#HTUYMIP&6Q
zDOPQI)NY!Dn7Gh5qHzzhvbvCS2BN@*LOESWmEts=0{3gm;Q{Z15`njU3sm>$hNfQx
zk?u>)XtUi8jEwYKw`{$_J|>m^5c+6EYJaL6j0(x-gHk4C=jLpZ5<MLL1Yx`V;m*TO
zh61@Gi5nZvKR<@{Wij(m>bvI=^)TwnHs*^I$ilK-uRGU&40X&p<DpD4=GDOjkw=S_
zAdMAr6hj;(2*#%w0s&GOyv2wO`tz%`^QidzTL=TQ+FhEmeI{Ldr$4Z<RcDuolCgDq
z|1pLh7}V0mXjoP|_uJs3?${KhBQXrlXa*_kSss}!7rZn*W9KLvVq=Y1caHfO!jNSq
zK$$du#c}y6j#7Sp%W{fC#pkvKVP<KJVCJc)jkRhQK)ddJ&?)C!TQv9X$pU`5=Ef>K
zVt8F*cu;@aq-1uQiz;_*RDS?-dx`XZ?%dZ{NDb-AQ%RGrZn=5(*VMW=ovIMIgguPT
zvS0JHIIO^CM>L?n%X$w@q=&0j`nwr82YlYa&lo;feHt+}sAc2)F=9$KYbZM-TrF$Z
zYNb&Sn14zIXZgTKIU}(i?IJ;GQgS>ULse560)PF$C;j2@OR6%KKmo#8zpKlel%Hs0
z@uo_t+DmQsdcxyDMnYE~DX4ZKlJO+`MXN*duN?fAb3SpVJLN5G>d+5)Z7-h!qCItq
z<x~wX!k^%qCRDGb=T8gt?BJVFP6dZZ(ar$AWDTAQg!>dRkS0!R9g%%(G<mGpRM{uQ
z_PyN99p0bo(R>UX?rJ?n2Tkdd4NPv;q*qO>;&>`WR&0tOq}MV-TSMNVe?OZSzyJK3
zoO#HCeGKWt<7#|$J5kh_55w-))jfL5Rl&1Y!m)4L%iK>YYDT$OMnCnEa6$g4KbB?!
z3ctl{yopV@wJb}6p98ypB*paXE*qCfy5f6D(&J}iBWcs;1-b#B>}S}9*<-_Vx4bh4
zp0QbOi!SGX71^%*GWI;c!6GEfK4wemSn4wB9|Rb+WUKlBS$A;lJzpIW%Min<@9kld
z?1WqD?A2MJ0phj4ZK5T;0@zQhpunc2lk@l9FkN@yJnr1^?Y+&hkd1GHRHDz}iv>iJ
zgA6T*-34kerflx4{dg&{Mtv6Z#16_lN#An9$huBlIx#rmmfF40Qzp3gLbvosy#PB=
zS<271vtrqLmfckh`P%1oi-lNF-!dE<UW!c?R;uY4dQD8_jcWCzW<H{J2GqS7_9$bB
zP=1*yPwr5$<;Gb9>Iy8lQCuxKjyo3&NMMBQFy3qQb<j--C+DGFt`?8~qR!h+lGZb(
zbX+i`n0?F#iLBh~oLm$)W^}T80_2p%1nkfiLf9^r^kGbm5Eo16=Uzg&_8I|oYiYKN
zR1fCQegKJOpOLkWy1mUSa9;fbv2x^Ll-$6O1&vT938&33YtaIN5>LjCo*T;rA~Mlc
zb1f65DFcb+g3A>(ti>XOpTIHrW0nr5Gu`SwmnLkou=97>@1=?wmSU0lOZ3Mj3Xhs)
zYJ9uoHDW?zo(Vfx^a=%-Q#0>SJ8VebRX3Z={xe}pxAF=}X&)0Kek>@Z{?Du@F0S%l
z_bIAU;rgJr&|zdvr!Gwrmp~QH<}zpBUb^uuVcsiK{Y81ltGXVt%mCWkT;O;eK@SD#
zOxAOKfK7ID%UiY?S)Z!?%gSMH<-_z?eP;ko@wH^2ta`qpW}h=B1Wb)As&;#CZp?d2
z0+(>6I%t5H(`HMr!iVR$p<H>YD&}qGkFV!x27g^fG>T!!aNX5%%VfICzy|tk0)6Hw
zb~{kH(}ZTg?$3I0GEKWvwIfHz0{Ju0zdM`yOIXid5&nGR{ta9NMFhnbI^?J#V7qR5
z%v&RR;E(rne7Py-i7C6$G=)ywxkC5Q&v_kPOtJRbU{#CL^uE((?b<-TC5}qjDeryn
zImeZ+x&fUt?}x`~I*UYNLU96$w(BPU<?mRZUC!J2-5Rest^zoBPT(<R`qi{)%3w%Y
zhiP38HhmC~VgUO^J`X))1g5Jvd0jlx(XztG+FH$?ffPIovuVw(l$CB|YevVEbIwO1
zOVpbjcSVic%I#dtVBiT;s}hH;zme09?$E2_G==#wRDRd1Yp6;O)y%0Zs*=ojmG+VM
zDkL6mS>BuQ?5rWZzJC1u%aeloD#_H950QWYnw-!ClU_Z(Eb%_b1oQ2iqp@rt+IW%=
zOB1wIu7pHZs82cWG8wm3+PS#E_;RI<DmPpY42rO|I|iJ(-W~c0#lt5qFej&S2g3bQ
zM>8LxV8!lZN%sE_G^zZAX64r#ig7{{wf6}>aK|ZK;PBXS9gWB{4pA@`<}-ykb?7A&
z)1R8boR-VhM6uUKRHO#_$!Kz(LG)7{Cy=F_LxKkxjuT&WM;!*Z3J~|wvvT14W7>}u
z@2w+?In$Ng!6Rrv$*B}91Km?6|Cs<yN~i*=8<P_uRF5T;Pfl02=4aKLWRffFOq!o1
zRsb;R?qhO1gzD;ek<{Kf`F1*!n?B>uXDK;IKrWh8mF(cH<DT|dU}!qp^+Bpo*!|fq
zmNd;V!iSj1Nc4A&x+g)POKH}^RK7bd#Y!)cZ`GR|^r^?rT+?A5qh1VqS4z<bF&hQN
zH4$$p#t20)k-_Nis&!B7a8|j!ZGc93|Dv1f>KZbIvrS;*aHIm2QZ$-Mt~ilZmS&;5
zqb$9z=j$*5q5_y-|3>2!$Lqu9VJttw(uP5wYC*+QZbe3SMbwIga!?}!*v)M$+Ut%t
z4ai}ksP`B3>71z3auJx$yYb%TbGAi`zIfq+qZK$y;J>CR!xhD)n9Z<A1F0aHCNO3O
zXPeHZ&u9fI17rloti+bcoxSR_xXW6x(gmSl`flwJ44##n!Jx)hts1r0HsYhSXoAvV
zOT`fyXF(4!495=enB#DpId9<vxf8TktOreKI?S@9_t(G)Da^^HWK9&nak6iDW@tg@
zlR>C}tHyGp-B~P{y)iI*lxwg%2Vq6_bvt34%ibMY2^Jaql3@9@!15~;XiVQ1aezuE
z@It`fFO>AY!<;aKjRU#`uL~-VHnmuwv7>i7H0x>~+Ah}L!X5j+r8Djk_M#E5!-$p~
zA1q`7M$_TKI};=cR4>_VF)_z46KikB)avh~O&Afz4fJIvL^Rr^GEo`hz-FZGoTEmG
z@l$vUD?1W4qrz#PVTrr);8O7Bph2`vIB@P-Pz18dj;puJUj%nm@&d0eceT;qm>$fV
zHV*e2EO;7C-xuCl1OtY%nPFmp<M6SObt^2R9uv7FRcvw+DI1l*pdju(G(MZEp*Zp2
z`P<^y(NR@j1O0|z)m8~`YsyvB^8{PY$fs^29Ne~^=+o=z9|Wql!Y{Wi9L5H7aN&~q
z<4w-a5W;0^8~3@Z<&urfC7aq8!k!aQ>5)&5=`{iU`(CQ8QkP!!ab62EFn)V*^&T6&
zCN=$o=l{kGaM%``1OB@cw2g;BNWYEkMGrG2k&|jHWhJT_{I988*>*&PD9vY`3}my7
z?MzP=1qgzdHze-zMiiT;zZ1cZ`kh#@InhKVYZimsEUU0RK!EIeYFw`(8%>UqJ~2;k
zA7_lkP11Yimp8(HGB9`F^RD(zNRM5|<Jww{CH0%OFkB^Tah2qrc9zeNd?)&ejx#=K
z>z0q~|5}dM+f$SsDgVMCJ#B0?db;2Qs^{3bowM<c{tf%@W@mf5=j#Br(?QwTNMoyi
z#4T$(K=q`6{8Xo(xt7cME)SzHr@4-`NBk(nV|{e8tU6G#pyP;EU;K&wft6}&Y0oly
zA*^7mxJJ*i#r;VLkMuOM%Z{<9%g8@9v*3M5;)z!S)cO0%)s6U2YH3swGd<M|Qb<BU
z$sd-T*EQFi=w&I{SOhFJ!R1or^cyTxTkAlN#b7L|S@dtKkLvJPE>ohgFMUB#5p+*d
zGnaKAs|VJa&@|LdL!(<lLpW?~Uw3x}Jmlo!CS@&@UaKUYk)MLAR|V+#KGH|%s;Zvg
zsMCY)exR$(<{H0hZ-pjk{&3DsO|9<+192~padmnF3+>{ftKyyzc;*8tDxF3!4)wbG
z_TWkZkx5Ian#L<CPS(!pyFx-hRIie1F3Ad*BjwN;`>gbzoUgcfnRpo4II-?R5=z{O
z3$s}ogb2}dbq6&qh;Zf^In`(rm<j4W&|Pr>(ahJ#$Wi428F5jvO&|K9pB^4Wqwe8=
zSJP-#m=fK6##67<v7dKHZV@zyMXz{DfF89M)P1BmjC`Gbz9dWF`f}$l9RWfldc_Ne
zjr_o2dQ;9%N1wwH2#TPn#~p5zTnGt?2yO_@j`P8V%RL7cM4l*-a>SZjI(iu_mtwHw
z+;O6=rHBl82=eW@qe>5iAqkfr5oIPMq_W-E_FA#l-caG?xOMy6>0#96J3WmlhbiyO
zgIWXp|IYl}nkgsu8LEnV_<)`H;|tSF4}b7AoD`tB?f7VaFDzp&PVA6;So&;dZf6&>
zbFg!bd?&}%b-V3ve^eOpUxerBUU5|G3GlyfE7jc;9KL!r8IkZ!cm@1Z*C}Fh1^0Ld
zwuF1UabNZFQAbSbKW}s;q=<>(eljffknI1y)Re$1E^dPR>hK_ru=pw-o(nM^9x3<+
z)W1J3uQ>7I;rTiXczXFc+j{c*x;Sg9T)9ep?cZ;X`5HAUY(#zne@_dVgVz7~yu9*2
zAfa0P7A|tXIMY>5!Gtw&X57<F38t&Bg%Si|Ol1GpS%M~rJQGfEmEgdCFM{**{{W*d
B1JM8g


From 93a66c17c956826f0714f847c6bc1244c2c0b02f Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Sun, 24 Nov 2024 18:37:52 +0100
Subject: [PATCH 31/35] Update data summary

---
 scripts/inner_ear/analysis/common.py | 10 ++++++----
 scripts/summarize_data.py            | 25 ++++++++++++++++---------
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/scripts/inner_ear/analysis/common.py b/scripts/inner_ear/analysis/common.py
index c3622d5..0d4a46a 100644
--- a/scripts/inner_ear/analysis/common.py
+++ b/scripts/inner_ear/analysis/common.py
@@ -1,4 +1,4 @@
-import os
+# import os
 import sys
 
 import pandas as pd
@@ -46,11 +46,13 @@ def get_measurements_with_annotation():
 
 
 def get_all_measurements():
-    data_root = get_data_root()
-    val_table = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx")
+    # data_root = get_data_root()
+    # val_table = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx")
+
+    val_table = "/home/pape/Desktop/sfb1286/mboc_synapse/misc/Validierungs-Tabelle-v3-passt.xlsx"
     val_table = pd.read_excel(val_table)
 
-    val_table = val_table[val_table["Kommentar 27-10-24"] == "passt"]
+    val_table = val_table[val_table["Kommentar 22.11.24"] == "passt"]
     n_tomos = len(val_table)
     print("All tomograms:", n_tomos)
     assert n_tomos > 0
diff --git a/scripts/summarize_data.py b/scripts/summarize_data.py
index 7615641..6642db1 100644
--- a/scripts/summarize_data.py
+++ b/scripts/summarize_data.py
@@ -65,31 +65,38 @@ def summarize_vesicle_train_data():
     for condition in conditions:
         ctab = vesicle_train[vesicle_train.condition == condition]
         n_tomos = len(ctab)
-        n_vesicles = ctab["vesicle_count"].sum()
+        n_vesicles_all = ctab["vesicle_count_all"].sum()
+        n_vesicles_imod = ctab["vesicle_count_imod"].sum()
         print(condition)
         print("Tomograms:", n_tomos)
-        print("Vesicles:", n_vesicles)
+        print("All-Vesicles:", n_vesicles_all)
+        print("Vesicles-From-Manual:", n_vesicles_imod)
         print()
         condition_summary["Condition"].append(condition)
         condition_summary["Tomograms"].append(n_tomos)
-        condition_summary["Vesicles"].append(n_vesicles)
+        condition_summary["Vesicles"].append(n_vesicles_all)
     condition_summary = pd.DataFrame(condition_summary)
+    print()
+    print()
 
     print("Total:")
     print("Tomograms:", len(vesicle_train))
-    print("Vesicles:", vesicle_train["vesicle_count"].sum())
+    print("All-Vesicles:", vesicle_train["vesicle_count_all"].sum())
+    print("Vesicles-From-Manual:", vesicle_train["vesicle_count_imod"].sum())
     print()
 
     train_tomos = vesicle_train[vesicle_train.used_for == "train/val"]
     print("Training:")
     print("Tomograms:", len(train_tomos))
-    print("Vesicles:", train_tomos["vesicle_count"].sum())
+    print("All-Vesicles:", train_tomos["vesicle_count_all"].sum())
+    print("Vesicles-From-Manual:", train_tomos["vesicle_count_imod"].sum())
     print()
 
     test_tomos = vesicle_train[vesicle_train.used_for == "test"]
     print("Test:")
     print("Tomograms:", len(test_tomos))
-    print("Vesicles:", test_tomos["vesicle_count"].sum())
+    print("All-Vesicles:", test_tomos["vesicle_count_all"].sum())
+    print("Vesicles-From-Manual:", test_tomos["vesicle_count_imod"].sum())
 
     pie_chart(condition_summary, "Tomograms", "Tomograms per Condition")
     pie_chart(condition_summary, "Vesicles", "Vesicles per Condition")
@@ -148,7 +155,7 @@ def summarize_compartment_train():
 
 
 # training_resolutions()
-# summarize_vesicle_train_data()
+summarize_vesicle_train_data()
 # summarize_vesicle_da()
-summarize_az_train()
-summarize_compartment_train()
+# summarize_az_train()
+# summarize_compartment_train()

From 59a38dbd1bff4aaa14298cc09a45f53a7618a2c9 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Sun, 24 Nov 2024 22:27:24 +0100
Subject: [PATCH 32/35] Update all measurements for the inner ear analysis

---
 .../inner_ear/analysis/analyze_distances.py   | 25 +++---
 .../analysis/analyze_vesicle_diameters.py     | 48 ++++++++---
 .../analysis/analyze_vesicle_pools.py         | 31 ++++---
 scripts/inner_ear/analysis/common.py          | 81 ++++++++++---------
 scripts/summarize_data.py                     | 30 +++++--
 5 files changed, 131 insertions(+), 84 deletions(-)

diff --git a/scripts/inner_ear/analysis/analyze_distances.py b/scripts/inner_ear/analysis/analyze_distances.py
index 473d6b8..c98de9c 100644
--- a/scripts/inner_ear/analysis/analyze_distances.py
+++ b/scripts/inner_ear/analysis/analyze_distances.py
@@ -32,7 +32,6 @@ def _plot_all(distances):
     plt.show()
 
 
-# TODO rename the method names.
 # We only care about the following distances:
 # - MP-V -> PD, AZ (Boundary)
 # - Docked-V -> PD, AZ
@@ -98,7 +97,7 @@ def _plot(pool_name, distance_col, structure_name, ax):
 
 
 def for_tomos_with_annotation(plot_all=True):
-    manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation()
+    manual_assignments, semi_automatic_assignments, proofread_assignments = get_measurements_with_annotation()
 
     manual_distances = manual_assignments[
         ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
@@ -110,38 +109,38 @@ def for_tomos_with_annotation(plot_all=True):
     ]
     semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances)
 
-    automatic_distances = automatic_assignments[
+    proofread_distances = proofread_assignments[
         ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
     ]
-    automatic_distances["approach"] = ["automatic"] * len(automatic_distances)
+    proofread_distances["approach"] = ["proofread"] * len(proofread_distances)
 
-    distances = pd.concat([manual_distances, semi_automatic_distances, automatic_distances])
+    distances = pd.concat([manual_distances, semi_automatic_distances, proofread_distances])
     if plot_all:
-        distances.to_excel("./results/distances_with_manual_annotations.xlsx", index=False)
+        distances.to_excel("./results/distances_tomos_with_manual_annotations.xlsx", index=False)
         _plot_all(distances)
     else:
-        _plot_selected(distances, save_path="./results/selected_distances_manual_annotations.xlsx")
+        _plot_selected(distances, save_path="./results/selected_distances_tomos_with_manual_annotations.xlsx")
 
 
 def for_all_tomos(plot_all=True):
-    semi_automatic_assignments, automatic_assignments = get_all_measurements()
+    semi_automatic_assignments, proofread_assignments = get_all_measurements()
 
     semi_automatic_distances = semi_automatic_assignments[
         ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
     ]
     semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances)
 
-    automatic_distances = automatic_assignments[
+    proofread_distances = proofread_assignments[
         ["tomogram", "pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
     ]
-    automatic_distances["approach"] = ["automatic"] * len(automatic_distances)
+    proofread_distances["approach"] = ["proofread"] * len(proofread_distances)
 
-    distances = pd.concat([semi_automatic_distances, automatic_distances])
+    distances = pd.concat([semi_automatic_distances, proofread_distances])
     if plot_all:
-        distances.to_excel("./results/distances_all_tomograms.xlsx", index=False)
+        distances.to_excel("./results/distances_all_tomos.xlsx", index=False)
         _plot_all(distances)
     else:
-        _plot_selected(distances, save_path="./results/selected_distances_all_tomograms.xlsx")
+        _plot_selected(distances, save_path="./results/selected_distances_all_tomos.xlsx")
 
 
 def main():
diff --git a/scripts/inner_ear/analysis/analyze_vesicle_diameters.py b/scripts/inner_ear/analysis/analyze_vesicle_diameters.py
index 8fa5d9e..439fa0f 100644
--- a/scripts/inner_ear/analysis/analyze_vesicle_diameters.py
+++ b/scripts/inner_ear/analysis/analyze_vesicle_diameters.py
@@ -10,10 +10,12 @@
 from synaptic_reconstruction.imod.export import load_points_from_imodinfo
 from synaptic_reconstruction.file_utils import get_data_path
 
+from common import get_finished_tomos
+
 sys.path.append("../processing")
 
 
-def aggregate_radii(data_root, table, save_path, get_tab):
+def aggregate_radii(data_root, table, save_path, get_tab, include_names):
     if os.path.exists(save_path):
         return
 
@@ -24,6 +26,14 @@ def aggregate_radii(data_root, table, save_path, get_tab):
             continue
 
         tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse"))
+        if (
+            tomo_name in ("WT strong stim/Mouse 1/modiolar/1", "WT strong stim/Mouse 1/modiolar/2") and
+            (row["EM alt vs. Neu"] == "neu")
+        ):
+            continue
+        if tomo_name not in include_names:
+            continue
+
         tab_path = get_tab(folder)
         if tab_path is None:
             continue
@@ -38,7 +48,7 @@ def aggregate_radii(data_root, table, save_path, get_tab):
     radius_table.to_excel(save_path, index=False)
 
 
-def aggregate_radii_imod(data_root, table, save_path):
+def aggregate_radii_imod(data_root, table, save_path, include_names):
     if os.path.exists(save_path):
         return
 
@@ -49,6 +59,15 @@ def aggregate_radii_imod(data_root, table, save_path):
             continue
 
         tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse"))
+        tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse"))
+        if (
+            tomo_name in ("WT strong stim/Mouse 1/modiolar/1", "WT strong stim/Mouse 1/modiolar/2") and
+            (row["EM alt vs. Neu"] == "neu")
+        ):
+            continue
+        if tomo_name not in include_names:
+            continue
+
         annotation_folder = os.path.join(folder, "manuell")
         if not os.path.exists(annotation_folder):
             annotation_folder = os.path.join(folder, "Manuell")
@@ -84,7 +103,7 @@ def aggregate_radii_imod(data_root, table, save_path):
     radius_table.to_excel(save_path, index=False)
 
 
-def get_tab_automatic(folder):
+def get_tab_semi_automatic(folder):
     tab_name = "measurements_uncorrected_assignments.xlsx"
     res_path = os.path.join(folder, "korrektur", tab_name)
     if not os.path.exists(res_path):
@@ -94,7 +113,7 @@ def get_tab_automatic(folder):
     return res_path
 
 
-def get_tab_semi_automatic(folder):
+def get_tab_proofread(folder):
     tab_name = "measurements.xlsx"
     res_path = os.path.join(folder, "korrektur", tab_name)
     if not os.path.exists(res_path):
@@ -121,11 +140,22 @@ def main():
     table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx")
     table = parse_table(table_path, data_root)
 
-    # TODO get the radii from imod
-    aggregate_radii(data_root, table, save_path="./results/vesicle_radii_automatic.xlsx", get_tab=get_tab_automatic)
-    aggregate_radii(data_root, table, save_path="./results/vesicle_radii_semi_automatic.xlsx", get_tab=get_tab_semi_automatic)  # noqa
-    aggregate_radii(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual)
-    aggregate_radii_imod(data_root, table, save_path="./results/vesicle_radii_imod.xlsx")
+    all_tomos = get_finished_tomos()
+    aggregate_radii(
+        data_root, table, save_path="./results/vesicle_radii_semi_automatic.xlsx", get_tab=get_tab_semi_automatic,
+        include_names=all_tomos
+    )
+
+    aggregate_radii(
+        data_root, table, save_path="./results/vesicle_radii_proofread.xlsx", get_tab=get_tab_proofread,
+        include_names=all_tomos
+    )
+
+    # aggregate_radii(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual)
+    aggregate_radii_imod(
+        data_root, table, save_path="./results/vesicle_radii_manual.xlsx",
+        include_names=all_tomos
+    )
 
 
 if __name__ == "__main__":
diff --git a/scripts/inner_ear/analysis/analyze_vesicle_pools.py b/scripts/inner_ear/analysis/analyze_vesicle_pools.py
index 7b67c99..f27a5c2 100644
--- a/scripts/inner_ear/analysis/analyze_vesicle_pools.py
+++ b/scripts/inner_ear/analysis/analyze_vesicle_pools.py
@@ -35,68 +35,67 @@ def plot_pools(data, errors):
 
 
 def for_tomos_with_annotation():
-    manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation()
+    manual_assignments, semi_automatic_assignments, proofread_assignments = get_measurements_with_annotation()
 
     manual_counts = manual_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
     semi_automatic_counts = semi_automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
-    automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
+    proofread_counts = proofread_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
 
     manual_stats = manual_counts.agg(["mean", "std"]).transpose().reset_index()
     semi_automatic_stats = semi_automatic_counts.agg(["mean", "std"]).transpose().reset_index()
-    automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index()
+    proofread_stats = proofread_counts.agg(["mean", "std"]).transpose().reset_index()
 
     data = pd.DataFrame({
         "Pool": manual_stats["pool"],
         "Semi-automatic": semi_automatic_stats["mean"],
-        "Automatic": automatic_stats["mean"],
+        "Proofread": proofread_stats["mean"],
         "Manual": manual_stats["mean"],
     })
     errors = pd.DataFrame({
         "Pool": manual_stats["pool"],
         "Semi-automatic": semi_automatic_stats["std"],
-        "Automatic": automatic_stats["std"],
+        "Proofread": proofread_stats["std"],
         "Manual": manual_stats["std"],
     })
 
     plot_pools(data, errors)
 
-    output_path = "./results/vesicle_pools_with_manual_annotations.xlsx"
+    output_path = "./results/vesicle_pools_tomos_with_manual_annotations.xlsx"
     data.to_excel(output_path, index=False, sheet_name="Average")
     with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer:
         errors.to_excel(writer, sheet_name="StandardDeviation", index=False)
 
 
 def for_all_tomos():
-    semi_automatic_assignments, automatic_assignments = get_all_measurements()
+    semi_automatic_assignments, proofread_assignments = get_all_measurements()
 
-    automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
-    automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index()
+    proofread_counts = proofread_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
+    proofread_stats = proofread_counts.agg(["mean", "std"]).transpose().reset_index()
 
     semi_automatic_counts = semi_automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
     semi_automatic_stats = semi_automatic_counts.agg(["mean", "std"]).transpose().reset_index()
 
     data = pd.DataFrame({
-        "Pool": automatic_stats["pool"],
+        "Pool": proofread_stats["pool"],
         "Semi-automatic": semi_automatic_stats["mean"],
-        "Automatic": automatic_stats["mean"],
+        "Proofread": proofread_stats["mean"],
     })
     errors = pd.DataFrame({
-        "Pool": automatic_stats["pool"],
+        "Pool": proofread_stats["pool"],
         "Semi-automatic": semi_automatic_stats["std"],
-        "Automatic": automatic_stats["std"],
+        "Proofread": proofread_stats["std"],
     })
 
     plot_pools(data, errors)
 
-    output_path = "./results/vesicle_pools_all_tomograms.xlsx"
+    output_path = "./results/vesicle_pools_all_tomos.xlsx"
     data.to_excel(output_path, index=False, sheet_name="Average")
     with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer:
         errors.to_excel(writer, sheet_name="StandardDeviation", index=False)
 
 
-# TODO: export the ribbon and pd stats (first need to discuss this with Fid)
 def main():
-    # for_tomos_with_annotation()
+    for_tomos_with_annotation()
     for_all_tomos()
 
 
diff --git a/scripts/inner_ear/analysis/common.py b/scripts/inner_ear/analysis/common.py
index 0d4a46a..772cd31 100644
--- a/scripts/inner_ear/analysis/common.py
+++ b/scripts/inner_ear/analysis/common.py
@@ -1,6 +1,7 @@
 # import os
 import sys
 
+import numpy as np
 import pandas as pd
 
 sys.path.append("../processing")
@@ -8,20 +9,43 @@
 from parse_table import get_data_root  # noqa
 
 
+def get_finished_tomos():
+    # data_root = get_data_root()
+    # val_table = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx")
+
+    val_table = "/home/pape/Desktop/sfb1286/mboc_synapse/misc/Validierungs-Tabelle-v3-passt.xlsx"
+    val_table = pd.read_excel(val_table)
+
+    val_table = val_table[val_table["Kommentar 22.11.24"] == "passt"]
+    n_tomos = len(val_table)
+    assert n_tomos > 0
+
+    tomo_names = []
+    for _, row in val_table.iterrows():
+        name = "/".join([
+            row.Bedingung, f"Mouse {int(row.Maus)}",
+            row["Ribbon-Orientierung"].lower().rstrip("?"),
+            str(int(row["OwnCloud-Unterordner"]))]
+        )
+        tomo_names.append(name)
+
+    return tomo_names
+
+
 def get_manual_assignments():
-    result_path = "../results/20240917_1/fully_manual_analysis_results.xlsx"
+    result_path = "../results/20241124_1/fully_manual_analysis_results.xlsx"
     results = pd.read_excel(result_path)
     return results
 
 
-def get_semi_automatic_assignments(tomograms):
-    result_path = "../results/20240917_1/automatic_analysis_results.xlsx"
+def get_proofread_assignments(tomograms):
+    result_path = "../results/20241124_1/automatic_analysis_results.xlsx"
     results = pd.read_excel(result_path)
     results = results[results["tomogram"].isin(tomograms)]
     return results
 
 
-def get_automatic_assignments(tomograms):
+def get_semi_automatic_assignments(tomograms):
     result_path = "../results/fully_automatic_analysis_results.xlsx"
     results = pd.read_excel(result_path)
     results = results[results["tomogram"].isin(tomograms)]
@@ -30,48 +54,29 @@ def get_automatic_assignments(tomograms):
 
 def get_measurements_with_annotation():
     manual_assignments = get_manual_assignments()
-    manual_tomograms = pd.unique(manual_assignments["tomogram"])
-    semi_automatic_assignments = get_semi_automatic_assignments(manual_tomograms)
 
-    tomograms = pd.unique(semi_automatic_assignments["tomogram"])
-    manual_assignments = manual_assignments[manual_assignments["tomogram"].isin(tomograms)]
-    assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(semi_automatic_assignments["tomogram"]))
+    # Get the tomos with manual annotations and the ones which are fully done in proofreading.
+    manual_tomos = pd.unique(manual_assignments["tomogram"])
+    finished_tomos = get_finished_tomos()
+    # Intersect them to get the tomos we are using.
+    tomos = np.intersect1d(manual_tomos, finished_tomos)
 
-    automatic_assignments = get_automatic_assignments(tomograms)
-    filtered_tomograms = pd.unique(manual_assignments["tomogram"])
-    assert len(filtered_tomograms) == len(pd.unique(automatic_assignments["tomogram"]))
+    manual_assignments = manual_assignments[manual_assignments["tomogram"].isin(tomos)]
+    semi_automatic_assignments = get_semi_automatic_assignments(tomos)
+    proofread_assignments = get_proofread_assignments(tomos)
 
-    print("Tomograms with manual annotations:", len(filtered_tomograms))
-    return manual_assignments, semi_automatic_assignments, automatic_assignments
+    print("Tomograms with manual annotations:", len(tomos))
+    return manual_assignments, semi_automatic_assignments, proofread_assignments
 
 
 def get_all_measurements():
-    # data_root = get_data_root()
-    # val_table = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx")
-
-    val_table = "/home/pape/Desktop/sfb1286/mboc_synapse/misc/Validierungs-Tabelle-v3-passt.xlsx"
-    val_table = pd.read_excel(val_table)
-
-    val_table = val_table[val_table["Kommentar 22.11.24"] == "passt"]
-    n_tomos = len(val_table)
-    print("All tomograms:", n_tomos)
-    assert n_tomos > 0
-    tomo_names = []
-    for _, row in val_table.iterrows():
-        name = "/".join([
-            row.Bedingung, f"Mouse {int(row.Maus)}",
-            row["Ribbon-Orientierung"].lower().rstrip("?"),
-            str(int(row["OwnCloud-Unterordner"]))]
-        )
-        tomo_names.append(name)
-
-    semi_automatic_assignments = get_semi_automatic_assignments(tomo_names)
-    filtered_tomo_names = pd.unique(semi_automatic_assignments["tomogram"]).tolist()
+    tomos = get_finished_tomos()
+    print("All tomograms:", len(tomos))
 
-    automatic_assignments = get_automatic_assignments(tomo_names)
-    assert len(filtered_tomo_names) == len(pd.unique(automatic_assignments["tomogram"]))
+    semi_automatic_assignments = get_semi_automatic_assignments(tomos)
+    proofread_assignments = get_proofread_assignments(tomos)
 
-    return semi_automatic_assignments, automatic_assignments
+    return semi_automatic_assignments, proofread_assignments
 
 
 def main():
diff --git a/scripts/summarize_data.py b/scripts/summarize_data.py
index 6642db1..66fe321 100644
--- a/scripts/summarize_data.py
+++ b/scripts/summarize_data.py
@@ -9,13 +9,6 @@
 vesicle_train = pd.read_excel("data_summary/vesicle_training_data.xlsx")
 vesicle_da = pd.read_excel("data_summary/vesicle_domain_adaptation_data.xlsx", sheet_name="cryo")
 
-# Inner ear trainign data:
-# Sophia: 92
-# Rat: 19
-# Tether: 3
-# Ves Pools: 6
-# Total = 120
-
 
 def training_resolutions():
     res_az = np.round(az_train["resolution"].mean(), 2)
@@ -154,8 +147,29 @@ def summarize_compartment_train():
     print("Compartments:", test_tomos["compartment_count"].sum())
 
 
+def summarize_inner_ear_data():
+    # NOTE: this is not all trainig data, but the data on which we run the analysis
+    # New tomograms from Sophia.
+    n_tomos_sophia_tot = 87
+    n_tomos_sophia_manual = 33  # noqa
+    # This is the training data
+    n_tomos_sohphia_train = ""  # TODO  # noqa
+
+    # Published tomograms
+    n_tomos_rat = 19
+    n_tomos_tether = 3
+    n_tomos_ves_pool = 6
+
+    # 28
+    print("Total published:", n_tomos_rat + n_tomos_tether + n_tomos_ves_pool)
+    # 115
+    print("Total:", n_tomos_rat + n_tomos_tether + n_tomos_ves_pool + n_tomos_sophia_tot)
+
+
 # training_resolutions()
-summarize_vesicle_train_data()
+# summarize_vesicle_train_data()
 # summarize_vesicle_da()
 # summarize_az_train()
 # summarize_compartment_train()
+# summarize_inner_ear_data()
+summarize_inner_ear_data()

From 97289510790ad1c14192a1d6610cadcc703b8c05 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Sun, 24 Nov 2024 23:15:12 +0100
Subject: [PATCH 33/35] Update vesicle diameter analysis

---
 .../analysis/analyze_vesicle_diameters.py     |  60 +++++---
 .../analysis/analyze_vesicle_radii.py         | 132 ------------------
 2 files changed, 38 insertions(+), 154 deletions(-)
 delete mode 100644 scripts/inner_ear/analysis/analyze_vesicle_radii.py

diff --git a/scripts/inner_ear/analysis/analyze_vesicle_diameters.py b/scripts/inner_ear/analysis/analyze_vesicle_diameters.py
index 439fa0f..1f0b3a0 100644
--- a/scripts/inner_ear/analysis/analyze_vesicle_diameters.py
+++ b/scripts/inner_ear/analysis/analyze_vesicle_diameters.py
@@ -15,10 +15,7 @@
 sys.path.append("../processing")
 
 
-def aggregate_radii(data_root, table, save_path, get_tab, include_names):
-    if os.path.exists(save_path):
-        return
-
+def aggregate_diameters(data_root, table, save_path, get_tab, include_names, sheet_name):
     radius_table = []
     for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"):
         folder = row["Local Path"]
@@ -41,17 +38,20 @@ def aggregate_radii(data_root, table, save_path, get_tab, include_names):
         tab = pd.read_excel(tab_path)
         this_tab = tab[["pool", "radius [nm]"]]
         this_tab.insert(0, "tomogram", [tomo_name] * len(this_tab))
+        this_tab.insert(3, "diameter [nm]", this_tab["radius [nm]"] * 2)
         radius_table.append(this_tab)
 
     radius_table = pd.concat(radius_table)
-    print("Saving table for", len(radius_table), "vesicles to", save_path)
-    radius_table.to_excel(save_path, index=False)
-
 
-def aggregate_radii_imod(data_root, table, save_path, include_names):
+    print("Saving table for", len(radius_table), "vesicles to", save_path, sheet_name)
     if os.path.exists(save_path):
-        return
+        with pd.ExcelWriter(save_path, engine="openpyxl", mode="a") as writer:
+            radius_table.to_excel(writer, sheet_name=sheet_name, index=False)
+    else:
+        radius_table.to_excel(save_path, sheet_name=sheet_name, index=False)
 
+
+def aggregate_diameters_imod(data_root, table, save_path, include_names, sheet_name):
     radius_table = []
     for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"):
         folder = row["Local Path"]
@@ -95,12 +95,16 @@ def aggregate_radii_imod(data_root, table, save_path, include_names):
             "tomogram": [tomo_name] * len(radii),
             "pool": [label_names[label_id] for label_id in labels],
             "radius [nm]": radii,
+            "diameter [nm]": 2 * radii,
         })
         radius_table.append(this_tab)
 
     radius_table = pd.concat(radius_table)
-    print("Saving table for", len(radius_table), "vesicles to", save_path)
-    radius_table.to_excel(save_path, index=False)
+    print("Saving table for", len(radius_table), "vesicles to", save_path, sheet_name)
+    radius_table.to_excel(save_path, index=False, sheet_name=sheet_name)
+
+    man_tomos = pd.unique(radius_table.tomogram)
+    return man_tomos
 
 
 def get_tab_semi_automatic(folder):
@@ -141,20 +145,32 @@ def main():
     table = parse_table(table_path, data_root)
 
     all_tomos = get_finished_tomos()
-    aggregate_radii(
-        data_root, table, save_path="./results/vesicle_radii_semi_automatic.xlsx", get_tab=get_tab_semi_automatic,
-        include_names=all_tomos
-    )
 
-    aggregate_radii(
-        data_root, table, save_path="./results/vesicle_radii_proofread.xlsx", get_tab=get_tab_proofread,
-        include_names=all_tomos
+    print("All tomograms")
+    save_path = "./results/vesicle_diameters_all_tomos.xlsx"
+    aggregate_diameters(
+        data_root, table, save_path=save_path, get_tab=get_tab_semi_automatic, include_names=all_tomos,
+        sheet_name="Semi-automatic",
+    )
+    aggregate_diameters(
+        data_root, table, save_path=save_path, get_tab=get_tab_proofread, include_names=all_tomos,
+        sheet_name="Proofread",
     )
 
-    # aggregate_radii(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual)
-    aggregate_radii_imod(
-        data_root, table, save_path="./results/vesicle_radii_manual.xlsx",
-        include_names=all_tomos
+    print()
+    print("Tomograms with manual annotations")
+    # aggregate_diameters(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual)
+    save_path = "./results/vesicle_diameters_tomos_with_manual_annotations.xlsx"
+    man_tomos = aggregate_diameters_imod(
+        data_root, table, save_path=save_path, include_names=all_tomos, sheet_name="Manual",
+    )
+    aggregate_diameters(
+        data_root, table, save_path=save_path, get_tab=get_tab_semi_automatic, include_names=man_tomos,
+        sheet_name="Semi-automatic",
+    )
+    aggregate_diameters(
+        data_root, table, save_path=save_path, get_tab=get_tab_proofread, include_names=man_tomos,
+        sheet_name="Proofread",
     )
 
 
diff --git a/scripts/inner_ear/analysis/analyze_vesicle_radii.py b/scripts/inner_ear/analysis/analyze_vesicle_radii.py
deleted file mode 100644
index 8fa5d9e..0000000
--- a/scripts/inner_ear/analysis/analyze_vesicle_radii.py
+++ /dev/null
@@ -1,132 +0,0 @@
-import os
-import sys
-
-from glob import glob
-
-import mrcfile
-import pandas as pd
-from tqdm import tqdm
-
-from synaptic_reconstruction.imod.export import load_points_from_imodinfo
-from synaptic_reconstruction.file_utils import get_data_path
-
-sys.path.append("../processing")
-
-
-def aggregate_radii(data_root, table, save_path, get_tab):
-    if os.path.exists(save_path):
-        return
-
-    radius_table = []
-    for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"):
-        folder = row["Local Path"]
-        if folder == "":
-            continue
-
-        tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse"))
-        tab_path = get_tab(folder)
-        if tab_path is None:
-            continue
-
-        tab = pd.read_excel(tab_path)
-        this_tab = tab[["pool", "radius [nm]"]]
-        this_tab.insert(0, "tomogram", [tomo_name] * len(this_tab))
-        radius_table.append(this_tab)
-
-    radius_table = pd.concat(radius_table)
-    print("Saving table for", len(radius_table), "vesicles to", save_path)
-    radius_table.to_excel(save_path, index=False)
-
-
-def aggregate_radii_imod(data_root, table, save_path):
-    if os.path.exists(save_path):
-        return
-
-    radius_table = []
-    for _, row in tqdm(table.iterrows(), total=len(table), desc="Collect tomo information"):
-        folder = row["Local Path"]
-        if folder == "":
-            continue
-
-        tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse"))
-        annotation_folder = os.path.join(folder, "manuell")
-        if not os.path.exists(annotation_folder):
-            annotation_folder = os.path.join(folder, "Manuell")
-        if not os.path.exists(annotation_folder):
-            continue
-
-        annotations = glob(os.path.join(annotation_folder, "*.mod"))
-        annotation_file = [ann for ann in annotations if ("vesikel" in ann.lower()) or ("vesicle" in ann.lower())]
-        if len(annotation_file) != 1:
-            continue
-        annotation_file = annotation_file[0]
-
-        tomo_file = get_data_path(folder)
-        with mrcfile.open(tomo_file) as f:
-            shape = f.data.shape
-            resolution = list(f.voxel_size.item())
-            resolution = [res / 10 for res in resolution][0]
-
-        try:
-            _, radii, labels, label_names = load_points_from_imodinfo(annotation_file, shape, resolution=resolution)
-        except AssertionError:
-            continue
-
-        this_tab = pd.DataFrame({
-            "tomogram": [tomo_name] * len(radii),
-            "pool": [label_names[label_id] for label_id in labels],
-            "radius [nm]": radii,
-        })
-        radius_table.append(this_tab)
-
-    radius_table = pd.concat(radius_table)
-    print("Saving table for", len(radius_table), "vesicles to", save_path)
-    radius_table.to_excel(save_path, index=False)
-
-
-def get_tab_automatic(folder):
-    tab_name = "measurements_uncorrected_assignments.xlsx"
-    res_path = os.path.join(folder, "korrektur", tab_name)
-    if not os.path.exists(res_path):
-        res_path = os.path.join(folder, "Korrektur", tab_name)
-    if not os.path.exists(res_path):
-        res_path = None
-    return res_path
-
-
-def get_tab_semi_automatic(folder):
-    tab_name = "measurements.xlsx"
-    res_path = os.path.join(folder, "korrektur", tab_name)
-    if not os.path.exists(res_path):
-        res_path = os.path.join(folder, "Korrektur", tab_name)
-    if not os.path.exists(res_path):
-        res_path = None
-    return res_path
-
-
-def get_tab_manual(folder):
-    tab_name = "measurements.xlsx"
-    res_path = os.path.join(folder, "manuell", tab_name)
-    if not os.path.exists(res_path):
-        res_path = os.path.join(folder, "Manuell", tab_name)
-    if not os.path.exists(res_path):
-        res_path = None
-    return res_path
-
-
-def main():
-    from parse_table import parse_table, get_data_root
-
-    data_root = get_data_root()
-    table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx")
-    table = parse_table(table_path, data_root)
-
-    # TODO get the radii from imod
-    aggregate_radii(data_root, table, save_path="./results/vesicle_radii_automatic.xlsx", get_tab=get_tab_automatic)
-    aggregate_radii(data_root, table, save_path="./results/vesicle_radii_semi_automatic.xlsx", get_tab=get_tab_semi_automatic)  # noqa
-    aggregate_radii(data_root, table, save_path="./results/vesicle_radii_manual.xlsx", get_tab=get_tab_manual)
-    aggregate_radii_imod(data_root, table, save_path="./results/vesicle_radii_imod.xlsx")
-
-
-if __name__ == "__main__":
-    main()

From 622da1e618953d385e6a93285e7e167a049bddc5 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Wed, 27 Nov 2024 12:00:34 +0100
Subject: [PATCH 34/35] update AZ evaluation

---
 scripts/cooper/training/evaluate_AZ.py    |  63 ++++++++++---
 scripts/cooper/training/postprocess_AZ.py | 107 ++++++++++++++++++++++
 2 files changed, 158 insertions(+), 12 deletions(-)
 create mode 100644 scripts/cooper/training/postprocess_AZ.py

diff --git a/scripts/cooper/training/evaluate_AZ.py b/scripts/cooper/training/evaluate_AZ.py
index fc32214..dbf8d67 100644
--- a/scripts/cooper/training/evaluate_AZ.py
+++ b/scripts/cooper/training/evaluate_AZ.py
@@ -26,7 +26,37 @@ def evaluate(labels, segmentation):
     score = dice_score(segmentation, labels)
     return score
 
-def evaluate_file(labels_path, segmentation_path, model_name, crop= False):
+def compute_precision(ground_truth, segmentation):
+    """
+    Computes the Precision score for 3D arrays representing the ground truth and segmentation.
+
+    Parameters:
+    - ground_truth (np.ndarray): 3D binary array where 1 represents the ground truth region.
+    - segmentation (np.ndarray): 3D binary array where 1 represents the predicted segmentation region.
+
+    Returns:
+    - precision (float): The precision score, or 0 if the segmentation is empty.
+    """
+    assert ground_truth.shape == segmentation.shape
+    # Ensure inputs are binary arrays
+    ground_truth = (ground_truth > 0).astype(np.int32)
+    segmentation = (segmentation > 0).astype(np.int32)
+    
+    # Compute intersection: overlap between segmentation and ground truth
+    intersection = np.sum(segmentation * ground_truth)
+    
+    # Compute total predicted (segmentation region)
+    total_predicted = np.sum(segmentation)
+    
+    # Handle case where there are no predictions
+    if total_predicted == 0:
+        return 0.0  # Precision is undefined; returning 0
+    
+    # Calculate precision
+    precision = intersection / total_predicted
+    return precision
+
+def evaluate_file(labels_path, segmentation_path, model_name, crop= False, precision_score=False):
     print(f"Evaluate labels {labels_path} and vesicles {segmentation_path}")
 
     ds_name = os.path.basename(os.path.dirname(labels_path))
@@ -34,22 +64,25 @@ def evaluate_file(labels_path, segmentation_path, model_name, crop= False):
 
     #get the labels and segmentation
     with h5py.File(labels_path) as label_file:
-        gt = label_file["/labels/AZ"][:]
+        gt = label_file["/labels/thin_az"][:]
         
     with h5py.File(segmentation_path) as seg_file:
-        segmentation = seg_file["/AZ/segment_from_AZmodel_v3"][:]
+        segmentation = seg_file["/AZ/thin_az"][:]
 
     if crop:
         print("cropping the annotation and segmentation")
         segmentation, gt = extract_gt_bounding_box(segmentation, gt)
 
     # Evaluate the match of ground truth and segmentation
-    dice_score = evaluate(gt, segmentation)
+    if precision_score:
+        precision = compute_precision(gt, segmentation)
+    else:
+        dice_score = evaluate(gt, segmentation)
 
     # Store results
     result_folder = "/user/muth9/u12095/synaptic-reconstruction/scripts/cooper/evaluation_results"
     os.makedirs(result_folder, exist_ok=True)
-    result_path = os.path.join(result_folder, f"evaluation_{model_name}.csv")
+    result_path = os.path.join(result_folder, f"evaluation_{model_name}_dice_thinpred_thinanno.csv")
     print("Evaluation results are saved to:", result_path)
 
     # Load existing results if the file exists
@@ -59,9 +92,14 @@ def evaluate_file(labels_path, segmentation_path, model_name, crop= False):
         results = None
 
     # Create a new DataFrame for the current evaluation
-    res = pd.DataFrame(
-        [[ds_name, tomo, dice_score]], columns=["dataset", "tomogram", "dice_score"]
-    )
+    if precision_score:
+        res = pd.DataFrame(
+            [[ds_name, tomo, precision]], columns=["dataset", "tomogram", "precision"]
+        )
+    else:
+        res = pd.DataFrame(
+            [[ds_name, tomo, dice_score]], columns=["dataset", "tomogram", "dice_score"]
+        )
 
     # Combine with existing results or initialize with the new results
     if results is None:
@@ -72,7 +110,7 @@ def evaluate_file(labels_path, segmentation_path, model_name, crop= False):
     # Save the results to the CSV file
     results.to_csv(result_path, index=False)
 
-def evaluate_folder(labels_path, segmentation_path, model_name, crop = False):
+def evaluate_folder(labels_path, segmentation_path, model_name, crop = False, precision_score=False):
     print(f"Evaluating folder {segmentation_path}")
     print(f"Using labels stored in {labels_path}")
 
@@ -82,7 +120,7 @@ def evaluate_folder(labels_path, segmentation_path, model_name, crop = False):
     for vesicle_file in vesicles_files:
         if vesicle_file in label_files:
 
-            evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(segmentation_path, vesicle_file), model_name, crop)
+            evaluate_file(os.path.join(labels_path, vesicle_file), os.path.join(segmentation_path, vesicle_file), model_name, crop, precision_score)
 
 
@@ -93,13 +131,14 @@ def main():
     parser.add_argument("-v", "--segmentation_path", required=True)
     parser.add_argument("-n", "--model_name", required=True)
     parser.add_argument("--crop", action="store_true", help="Crop around the annotation.")
+    parser.add_argument("--precision", action="store_true", help="Calculate precision score.")
     args = parser.parse_args()
 
     segmentation_path = args.segmentation_path
     if os.path.isdir(segmentation_path):
-        evaluate_folder(args.labels_path, segmentation_path, args.model_name, args.crop)
+        evaluate_folder(args.labels_path, segmentation_path, args.model_name, args.crop, args.precision)
     else:
-        evaluate_file(args.labels_path, segmentation_path, args.model_name, args.crop)
+        evaluate_file(args.labels_path, segmentation_path, args.model_name, args.crop, args.precision)
     
     
diff --git a/scripts/cooper/training/postprocess_AZ.py b/scripts/cooper/training/postprocess_AZ.py
new file mode 100644
index 0000000..e2b849e
--- /dev/null
+++ b/scripts/cooper/training/postprocess_AZ.py
@@ -0,0 +1,107 @@
+import os
+from glob import glob
+import argparse
+
+import h5py
+import numpy as np
+from tqdm import tqdm
+from scipy.ndimage import binary_closing
+from skimage.measure import label
+from synaptic_reconstruction.ground_truth.shape_refinement import edge_filter
+from synaptic_reconstruction.morphology import skeletonize_object
+
+
+
+def filter_az(path, output_path):
+    """Filter the active zone (AZ) data from the HDF5 file."""
+    ds, fname = os.path.split(path)
+    dataset_name = os.path.basename(ds)
+    out_file_path = os.path.join(output_path, "postprocessed_AZ", dataset_name, fname)
+
+    os.makedirs(os.path.dirname(out_file_path), exist_ok=True)
+
+    if os.path.exists(out_file_path):
+        return
+
+    with h5py.File(path, "r") as f:
+        raw = f["raw"][:]
+        az = f["AZ/segment_from_AZmodel_v3"][:]
+
+    hmap = edge_filter(raw, sigma=1.0, method="sato", per_slice=True, n_threads=8)
+
+    # Filter the active zone by combining a bunch of things:
+    # 1. Find a mask with high values in the ridge filter.
+    threshold_hmap = 0.5
+    az_filtered = hmap > threshold_hmap
+    # 2. Intersect it with the active zone predictions.
+    az_filtered = np.logical_and(az_filtered, az)
+
+    # Postprocessing of the filtered active zone:
+    # 1. Apply connected components and only keep the largest component.
+    az_filtered = label(az_filtered)
+    ids, sizes = np.unique(az_filtered, return_counts=True)
+    ids, sizes = ids[1:], sizes[1:]
+    az_filtered = (az_filtered == ids[np.argmax(sizes)]).astype("uint8")
+    # 2. Apply binary closing.
+    az_filtered = np.logical_or(az_filtered, binary_closing(az_filtered, iterations=4)).astype("uint8")
+
+    # Save the result.
+    with h5py.File(out_file_path, "a") as f:
+        f.create_dataset("AZ/filtered_az", data=az_filtered, compression="gzip")
+
+
+def process_az(path, view=False):
+    """Skeletonize the filtered AZ data to obtain a 1D representation."""
+    key = "AZ/thin_az"
+    with h5py.File(path, "r") as f:
+        if key in f and not view:
+            return
+        az_seg = f["AZ/filtered_az"][:]
+
+    az_thin = skeletonize_object(az_seg)
+
+    if view:
+        import napari
+        ds, fname = os.path.split(path)
+        raw_path = os.path.join(ROOT, ds, fname)
+        with h5py.File(raw_path, "r") as f:
+            raw = f["raw"][:]
+        v = napari.Viewer()
+        v.add_image(raw)
+        v.add_labels(az_seg)
+        v.add_labels(az_thin)
+        napari.run()
+    else:
+        with h5py.File(path, "a") as f:
+            f.create_dataset(key, data=az_thin, compression="gzip")
+
+
+def filter_all_azs(input_path, output_path):
+    """Apply filtering to all AZ data in the specified directory."""
+    files = sorted(glob(os.path.join(input_path, "**/*.h5"), recursive=True))
+    for ff in tqdm(files, desc="Filtering AZ segmentations"):
+        filter_az(ff, output_path)
+
+
+def process_all_azs(output_path):
+    """Apply skeletonization to all filtered AZ data."""
+    files = sorted(glob(os.path.join(output_path, "postprocessed_AZ", "**/*.h5"), recursive=True))
+    for ff in tqdm(files, desc="Thinning AZ segmentations"):
+        process_az(ff, view=False)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Filter and process AZ data.")
+    parser.add_argument("input_path", type=str, help="Path to the root directory containing datasets.")
+    parser.add_argument("output_path", type=str, help="Path to the root directory for saving processed data.")
+    args = parser.parse_args()
+
+    input_path = args.input_path
+    output_path = args.output_path
+
+    filter_all_azs(input_path, output_path)
+    process_all_azs(output_path)
+
+
+if __name__ == "__main__":
+    main()

From 686b018fc4a2af32e8b7a9e8747e9708ce9d2d66 Mon Sep 17 00:00:00 2001
From: SarahMuth <sarahmuth9@gmail.com>
Date: Thu, 28 Nov 2024 21:28:44 +0100
Subject: [PATCH 35/35] erosion dilation filtering of AZ

---
 scripts/cooper/training/filter_AZ.py | 67 ++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 scripts/cooper/training/filter_AZ.py

diff --git a/scripts/cooper/training/filter_AZ.py b/scripts/cooper/training/filter_AZ.py
new file mode 100644
index 0000000..78b8ba7
--- /dev/null
+++ b/scripts/cooper/training/filter_AZ.py
@@ -0,0 +1,67 @@
+import os
+import h5py
+import numpy as np
+from scipy.ndimage import binary_erosion, binary_dilation, label
+
+def process_labels(label_file_path, erosion_structure=None, dilation_structure=None):
+    """
+    Process the labels: perform erosion, find the largest connected component,
+    and perform dilation on it.
+
+    Args:
+        label_file_path (str): Path to the HDF5 file containing the label data.
+        erosion_structure (ndarray, optional): Structuring element for erosion.
+        dilation_structure (ndarray, optional): Structuring element for dilation.
+
+    Returns:
+        None: The processed data is saved back into the HDF5 file under a new key.
+    """
+    with h5py.File(label_file_path, "r+") as label_file:
+        # Read the ground truth data
+        gt = label_file["/labels/filtered_az"][:]
+
+        # Perform binary erosion
+        eroded = binary_erosion(gt, structure=erosion_structure)
+
+        # Label connected components
+        labeled_array, num_features = label(eroded)
+        
+        # Identify the largest connected component
+        if num_features > 0:
+            largest_component_label = np.argmax(np.bincount(labeled_array.flat, weights=eroded.flat)[1:]) + 1
+            largest_component = (labeled_array == largest_component_label)
+        else:
+            largest_component = np.zeros_like(gt, dtype=bool)
+
+        # Perform binary dilation on the largest connected component
+        dilated = binary_dilation(largest_component, structure=dilation_structure)
+
+        # Save the result back into the HDF5 file
+        if "labels/erosion_filtered_az" in label_file:
+            del label_file["labels/erosion_filtered_az"]  # Remove if it already exists
+        label_file.create_dataset("labels/erosion_filtered_az", data=dilated.astype(np.uint8), compression="gzip")
+
+def process_folder(folder_path, erosion_structure=None, dilation_structure=None):
+    """
+    Process all HDF5 files in a folder.
+
+    Args:
+        folder_path (str): Path to the folder containing HDF5 files.
+        erosion_structure (ndarray, optional): Structuring element for erosion.
+        dilation_structure (ndarray, optional): Structuring element for dilation.
+
+    Returns:
+        None
+    """
+    for file_name in os.listdir(folder_path):
+        if file_name.endswith(".h5") or file_name.endswith(".hdf5"):
+            label_file_path = os.path.join(folder_path, file_name)
+            print(f"Processing {label_file_path}...")
+            process_labels(label_file_path, erosion_structure, dilation_structure)
+
+# Example usage
+if __name__ == "__main__":
+    folder_path = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/training_AZ_v2/postprocessed_AZ/12_chemical_fix_cryopreparation"  # Replace with the path to your folder
+    erosion_structure = np.ones((3, 3, 3))  # Example structuring element
+    dilation_structure = np.ones((3, 3, 3))  # Example structuring element
+    process_folder(folder_path, erosion_structure, dilation_structure)