Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sm dev #62

Open
wants to merge 44 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
e2c4f4b
AZ segmentation
SarahMuth Oct 23, 2024
a0f713f
updates
SarahMuth Oct 28, 2024
94f9121
Merge branch 'main' of https://github.com/computational-cell-analytic…
SarahMuth Oct 28, 2024
ac1ac00
update 2D DA
SarahMuth Oct 28, 2024
37de75d
Merge branch 'main' of https://github.com/computational-cell-analytic…
SarahMuth Oct 29, 2024
61c57fa
small updates, compartment segmentation
SarahMuth Nov 7, 2024
40e965e
Implement code for first analysis
constantinpape Nov 7, 2024
7be9ee8
2D seg with mask
SarahMuth Nov 11, 2024
b1bef7e
Merge branch 'analysis' of https://github.com/computational-cell-anal…
SarahMuth Nov 11, 2024
f85e445
spatial distribution analysis
SarahMuth Nov 11, 2024
8ef16bc
intersection between compartment boundary and AZ segmentaiton
SarahMuth Nov 12, 2024
e625ef7
Merge branch 'main' of https://github.com/computational-cell-analytic…
SarahMuth Nov 12, 2024
09f6c84
Update compartment postprocessing
constantinpape Nov 12, 2024
d7dbb39
Merge branch 'more-comp-seg-updates' of https://github.com/computatio…
SarahMuth Nov 12, 2024
f893d23
updating data analysis on smaller details
SarahMuth Nov 13, 2024
08c56b9
minor updates data analysis
SarahMuth Nov 13, 2024
36d834f
Implement inner ear analysis WIP
constantinpape Nov 14, 2024
49d1b7c
calculation of AZ area
SarahMuth Nov 14, 2024
8a515d1
corrected radius factor
SarahMuth Nov 14, 2024
0f40d3c
Update inner ear analysis
constantinpape Nov 15, 2024
ad4741b
Update inner ear analysis
constantinpape Nov 17, 2024
305a80b
Updates to inner ear training and eval
constantinpape Nov 17, 2024
903e59e
Update inner ear analysis
constantinpape Nov 18, 2024
b1449d2
minor changes
SarahMuth Nov 19, 2024
0b7884d
Merge branch 'main' of https://github.com/computational-cell-analytic…
constantinpape Nov 19, 2024
186c92d
Update inner ear analysis scripts
constantinpape Nov 20, 2024
186df5b
Merge branch 'more-inner-ear-analysis' of https://github.com/computat…
constantinpape Nov 20, 2024
2ccf340
Add script to extract vesicle diameters for inner ear data
constantinpape Nov 20, 2024
5feff6a
Update active zone analysis for SNAP/MUNC data
constantinpape Nov 21, 2024
9b8c7a2
Add more inner ear analysis code
constantinpape Nov 21, 2024
db89b44
evaluation of AZ seg
SarahMuth Nov 23, 2024
51165a5
Fix issues with the segmentation export to IMOD
constantinpape Nov 23, 2024
aa5d78e
clean up
SarahMuth Nov 23, 2024
20e429b
clean up
SarahMuth Nov 23, 2024
19f618e
clean up
SarahMuth Nov 23, 2024
cb693b1
Update data summaries
constantinpape Nov 24, 2024
a0c31a8
Fix issue in data aggregation
constantinpape Nov 24, 2024
93a66c1
Update data summary
constantinpape Nov 24, 2024
e0dfda6
Merge branch 'main' into more-inner-ear-analysis
constantinpape Nov 24, 2024
59a38db
Update all measurements for the inner ear analysis
constantinpape Nov 24, 2024
9728951
Update vesicle diameter analysis
constantinpape Nov 24, 2024
84d3ec7
Merge branch 'more-inner-ear-analysis' of https://github.com/computat…
SarahMuth Nov 25, 2024
622da1e
update AZ evaluation
SarahMuth Nov 27, 2024
686b018
erosion dilation filtering of AZ
SarahMuth Nov 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ models/*/
run_sbatch.sbatch
slurm/
scripts/cooper/evaluation_results/
analysis_results/
scripts/cooper/training/copy_testset.py
scripts/rizzoli/upsample_data.py
scripts/cooper/training/find_rec_testset.py
scripts/cooper/training/find_rec_testset.py
scripts/rizzoli/combine_2D_slices.py
scripts/rizzoli/combine_2D_slices_raw.py
scripts/cooper/remove_h5key.py
scripts/cooper/analysis/calc_AZ_area.py
122 changes: 76 additions & 46 deletions scripts/aggregate_data_information.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,55 +12,64 @@
stem = "STEM"


def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions):
def aggregate_vesicle_train_data(roots, conditions, resolutions):
tomo_names = []
tomo_vesicles = []
tomo_vesicles_all, tomo_vesicles_imod = [], []
tomo_condition = []
tomo_resolution = []
tomo_train = []

for ds, root in roots.items():
print("Aggregate data for", ds)
train_root = root["train"]
if train_root == "":
test_root = root["test"]
tomograms = sorted(glob(os.path.join(test_root, "2024**", "*.h5"), recursive=True))
this_test_tomograms = [os.path.basename(tomo) for tomo in tomograms]
def aggregate_split(ds, split_root, split):
if ds.startswith("04"):
tomograms = sorted(glob(os.path.join(split_root, "2024**", "*.h5"), recursive=True))
else:
# This is only the case for 04, which is also nested
tomograms = sorted(glob(os.path.join(train_root, "*.h5")))
this_test_tomograms = test_tomograms[ds]
tomograms = sorted(glob(os.path.join(split_root, "*.h5")))

assert len(tomograms) > 0, ds
this_condition = conditions[ds]
this_resolution = resolutions[ds][0]

for tomo_path in tqdm(tomograms):
for tomo_path in tqdm(tomograms, desc=f"Aggregate {split}"):
fname = os.path.basename(tomo_path)
with h5py.File(tomo_path, "r") as f:
try:
tomo_name = f.attrs["filename"]
except KeyError:
tomo_name = fname

n_label_sets = len(f["labels"])
if n_label_sets > 2:
print(tomo_path, "contains the following labels:", list(f["labels"].keys()))
seg = f["labels/vesicles"][:]
n_vesicles = len(np.unique(seg)) - 1
if "labels/vesicles/combined_vesicles" in f:
all_vesicles = f["labels/vesicles/combined_vesicles"][:]
imod_vesicles = f["labels/vesicles/masked_vesicles"][:]
n_vesicles_all = len(np.unique(all_vesicles)) - 1
n_vesicles_imod = len(np.unique(imod_vesicles)) - 2
else:
vesicles = f["labels/vesicles"][:]
n_vesicles_all = len(np.unique(vesicles)) - 1
n_vesicles_imod = n_vesicles_all

tomo_names.append(tomo_name)
tomo_vesicles.append(n_vesicles)
tomo_vesicles_all.append(n_vesicles_all)
tomo_vesicles_imod.append(n_vesicles_imod)
tomo_condition.append(this_condition)
tomo_resolution.append(this_resolution)
tomo_train.append("test" if fname in this_test_tomograms else "train/val")
tomo_train.append(split)

for ds, root in roots.items():
print("Aggregate data for", ds)
train_root = root["train"]
if train_root != "":
aggregate_split(ds, train_root, "train/val")
test_root = root["test"]
if test_root != "":
aggregate_split(ds, test_root, "test")

df = pd.DataFrame({
"tomogram": tomo_names,
"condition": tomo_condition,
"resolution": tomo_resolution,
"used_for": tomo_train,
"vesicle_count": tomo_vesicles,
"vesicle_count_all": tomo_vesicles_all,
"vesicle_count_imod": tomo_vesicles_imod,
})

os.makedirs("data_summary", exist_ok=True)
Expand All @@ -70,60 +79,47 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
def vesicle_train_data():
roots = {
"01": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/01_hoi_maus_2020_incomplete", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/01_hoi_maus_2020_incomplete", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/01_hoi_maus_2020_incomplete", # noqa
},
"02": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/02_hcc_nanogold", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/02_hcc_nanogold", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/02_hcc_nanogold", # noqa
},
"03": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/03_hog_cs1sy7", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/03_hog_cs1sy7", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/03_hog_cs1sy7", # noqa
},
"04": {
"train": "",
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/ground_truth/04Dataset_for_vesicle_eval/", # noqa
},
"05": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/05_stem750_sv_training", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/05_stem750_sv_training", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/05_stem750_sv_training", # noqa
},
"07": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/07_hoi_s1sy7_tem250_ihgp", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/07_hoi_s1sy7_tem250_ihgp", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/07_hoi_s1sy7_tem250_ihgp", # noqa
},
"09": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/09_stem750_66k", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/09_stem750_66k", # noqa
"test": "",
},
"10": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/10_tem_single_release", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/10_tem_single_release", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/10_tem_single_release", # noqa
},
"11": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/11_tem_multiple_release", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/11_tem_multiple_release", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/11_tem_multiple_release", # noqa
},
"12": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/12_chemical_fix_cryopreparation", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/12_chemical_fix_cryopreparation", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/12_chemical_fix_cryopreparation", # noqa
},
}

test_tomograms = {
"01": ["tomogram-009.h5", "tomogram-038.h5", "tomogram-049.h5", "tomogram-052.h5", "tomogram-057.h5", "tomogram-060.h5", "tomogram-067.h5", "tomogram-074.h5", "tomogram-076.h5", "tomogram-083.h5", "tomogram-133.h5", "tomogram-136.h5", "tomogram-145.h5", "tomogram-149.h5", "tomogram-150.h5"], # noqa
"02": ["tomogram-004.h5", "tomogram-008.h5"],
"03": ["tomogram-003.h5", "tomogram-004.h5", "tomogram-008.h5",],
"04": [], # all used for test
"05": ["tomogram-003.h5", "tomogram-005.h5",],
"07": ["tomogram-006.h5", "tomogram-017.h5",],
"09": [], # no test data
"10": ["tomogram-001.h5", "tomogram-002.h5", "tomogram-007.h5"],
"11": ["tomogram-001.h5 tomogram-007.h5 tomogram-008.h5"],
"12": ["tomogram-004.h5", "tomogram-021.h5", "tomogram-022.h5",],
}

conditions = {
"01": single_ax_tem,
"02": dual_ax_tem,
Expand All @@ -150,7 +146,7 @@ def vesicle_train_data():
"12": (1.554, 1.554, 1.554)
}

aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
aggregate_vesicle_train_data(roots, conditions, resolutions)


def aggregate_az_train_data(roots, test_tomograms, conditions, resolutions):
Expand Down Expand Up @@ -397,6 +393,11 @@ def vesicle_domain_adaptation_data():
"MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5",
"MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5",
"MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5"
],
"frog": [
"block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5",
"block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5",
"block30UB_one.h5", "block10U3A_five.h5",
]
}

Expand Down Expand Up @@ -439,13 +440,42 @@ def vesicle_domain_adaptation_data():
aggregate_da(roots, train_tomograms, test_tomograms, resolutions)


def get_n_images_frog():
root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted/upsampled_by2"
tomos = ["block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5",
"block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5",
"block30UB_one.h5", "block10U3A_five.h5"]

n_images = 0
for tomo in tomos:
path = os.path.join(root, tomo)
with h5py.File(path, "r") as f:
n_images += f["raw"].shape[0]
print(n_images)


def get_image_sizes_tem_2d():
root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/maus_2020_tem2d_wt_unt_div14_exported_scaled/good_for_DAtraining/maus_2020_tem2d_wt_unt_div14_exported_scaled" # noqa
tomos = [
"MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5",
"MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5",
"MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5"
]
for tomo in tomos:
path = os.path.join(root, tomo)
with h5py.File(path, "r") as f:
print(f["raw"].shape)


def main():
# active_zone_train_data()
# compartment_train_data()
# mito_train_data()
# vesicle_train_data()
vesicle_train_data()

vesicle_domain_adaptation_data()
# vesicle_domain_adaptation_data()
# get_n_images_frog()
# get_image_sizes_tem_2d()


main()
Loading