-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathwrapper.py
221 lines (190 loc) · 9.63 KB
/
wrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import argparse
import os
import re
import pandas as pd
import subprocess
import shlex
import shutil
import glob
from pathlib import Path
from collections import defaultdict
from collections import namedtuple
from modules.parse_yaml import parse_yaml
from modules.parse_htd import parse_htd
from modules.get_wells import get_wells
from modules.get_image_paths import get_image_paths
from modules.convert_video import convert_video
from modules.dense_flow import dense_flow
from modules.segment_worms import segment_worms
from modules.generate_thumbnails import generate_thumbnails
from modules.parse_htd import parse_htd
from modules.crop_wells import auto_crop, grid_crop
from modules.parse_yaml import parse_yaml
from modules.fecundity import fecundity
if __name__ == "__main__":
# create the class that will instantiate the namedtuple
g_class = namedtuple(
'g_class', 'mode file_structure well_detection image_n_row image_n_col species stages input work output plate_dir plate plate_short desc time_points columns rows x_sites y_sites n_waves wave_names wells plate_paths')
############################################
######### 1. GET THE YAML CONFIGS #########
############################################
arg_parser = argparse.ArgumentParser()
g, modules = parse_yaml(arg_parser, g_class)
#########################################################
######### 2. GET THE HTD CONFIGS OR CROP WELLS #########
#########################################################
if g.file_structure == 'imagexpress':
g = parse_htd(g, g_class)
else:
# crops_wells will write images in IX format to input/ and create an HTD
if g.well_detection == 'auto':
auto_crop(g)
elif g.well_detection == 'grid':
grid_crop(g)
else:
raise ValueError('Incompatible well detection mode selected (or none selected with multi-well mode.')
g = parse_htd(g, g_class)
#########################################
######### 3. GET WELLS & PATHS #########
#########################################
try:
if 'All' in g.wells:
wells = get_wells(g)
plate_paths = get_image_paths(g, wells)
else:
wells = g.wells
plate_paths = get_image_paths(g, g.wells)
# remove files that aren't going to be processed
print('Removing unselected wells.')
all_wells = get_wells(g)
all_paths = get_image_paths(g, all_wells)
all_paths = [item for sublist in all_paths for item in sublist]
all_paths = [i.as_posix() for i in all_paths]
plate_paths = [item for sublist in plate_paths for item in sublist]
plate_paths = [i.as_posix() for i in plate_paths]
rm_paths = set(all_paths).difference(plate_paths)
for rm in rm_paths:
os.remove(rm)
except TypeError:
print("ERROR: YAML parameter \"wells\" improperly formated (or none provided) or failure to retrieve image paths.")
# update g with wells & plate_paths and print contents (except for plate_paths)
g = g._replace(wells=wells, plate_paths=plate_paths)
########################################
######### 4. RUN CELLPROFILER #########
########################################
if 'cellprofiler' in modules.keys():
pipeline = modules['cellprofiler']['pipeline'][0]
if 'cellpose' in pipeline:
# rename TIF to tif to work with cellpose
for filepath in Path('input/{}/TimePoint_1'.format(g.plate)).glob('**/*'):
os.rename(filepath, str(filepath).replace('TIF', 'tif'))
wells = [well.replace('TIF', 'tif') for well in wells]
g = g._replace(wells=wells)
cellpose_command = 'python -m cellpose --dir {}/{}/TimePoint_1 --pretrained_model wrmXpress/cp_pipelines/cellpose_models/20220830_all --diameter 0 --save_png --no_npy --verbose'.format(g.input, g.plate)
cellpose_command_split = shlex.split(cellpose_command)
subprocess.run(cellpose_command_split)
os.mkdir("{}/cellpose_masks".format(g.output))
for file in glob.glob("{}/{}/TimePoint_1/*.png".format(g.input, g.plate)):
shutil.copy(file, "{}/cellpose_masks".format(g.output))
fl_command = 'Rscript wrmXpress/scripts/cp/generate_filelist_{}.R {} {}'.format(
pipeline, g.plate, g.wells)
fl_command_split = shlex.split(fl_command)
print('Generating file list for CellProfiler.')
subprocess.run(fl_command_split)
cellprofiler_command = 'cellprofiler -c -r -p wrmXpress/cp_pipelines/pipelines/{}.cppipe --data-file=input/image_paths_{}.csv'.format(
pipeline, pipeline)
cellprofiler_command_split = shlex.split(cellprofiler_command)
print('Starting CellProfiler.')
subprocess.run(cellprofiler_command_split)
md_command = 'Rscript wrmXpress/scripts/metadata_join_master.R {} {} {}'.format(
g.plate, g.rows, g.columns)
md_command_split = shlex.split(md_command)
print('Joining experiment metadata and tidying.')
subprocess.run(md_command_split)
######################################
######### 5. RUN PY MODULES #########
######################################
# Each module will give a single phenotypic value, which is then written
# out to a CSV. out_dict is a dictionary of well: [phenotype1, phenotype2, etc.]
# that will later be converted to a DataFrame and written to a csv.
# cols includes all the column names that will be in the output
# DataFrame. When the modules are run below, they need to append the column
# name to cols and the phenotypic value to out_dict['well'].
if 'cellprofiler' not in modules.keys():
out_dict = defaultdict(list)
cols = []
# start the well-by-well iterator
for well, well_paths in zip(wells, plate_paths):
print("Running well {}".format(well))
if 'convert' in modules.keys():
# get the value of reorganize and pass it to the module
reorganize = modules.get('convert').get('save_video')
multiplier = float(modules.get(
'convert').get('rescale_multiplier'))
video = convert_video(g, well, well_paths, reorganize, multiplier)
print('{}: module \'convert\' finished'.format(well))
if 'segment' in modules.keys():
if g.n_waves != 1:
wave_length = modules.get('segment').get('wavelength')
# filter for the paths to the wavelengths to be segmented
well_paths = [
path for path in well_paths if wave_length in str(path)]
worm_area = segment_worms(g, well, well_paths)
if 'worm_area' not in cols:
cols.append('worm_area')
out_dict[well].append(worm_area)
print('{}: module \'segment\' finished'.format(well))
if 'motility' in modules.keys():
# don't use a rescaled video for flow
video = convert_video(g, well, well_paths, False, 1)
flow = dense_flow(g, well, video)
if 'optical_flow' not in cols:
cols.append('optical_flow')
out_dict[well].append(flow)
print('{}: module \'motility\' finished'.format(well))
if 'fecundity' in modules.keys():
progeny_area = fecundity(g, well, well_paths)
if 'progeny_area' not in cols:
cols.append('progeny_area')
out_dict[well].append(progeny_area)
print('{}: module \'fecundity\' finished'.format(well))
##################################
######### 6. WRITE DATA #########
##################################
df = pd.DataFrame.from_dict(out_dict, orient='index', columns=cols)
g.output.joinpath('data').mkdir(parents=True, exist_ok=True)
outpath = g.output.joinpath('data', g.plate + '_data' + ".csv")
df.to_csv(path_or_buf=outpath, index_label='well')
md_command = 'Rscript wrmXpress/scripts/metadata_join_master.R {} {} {}'.format(g.plate, g.rows, g.columns)
md_command_split = shlex.split(md_command)
subprocess.run(md_command_split)
###########################################
######### 7. GENERATE THUMBNAILS #########
###########################################
if 'dx' in modules.keys():
# one for each wavelength (TimePoint_1)
# this if/else is required because of an IX nomenclature quirk:
# if there is only one wavelength, there is no _w1 in the file name
# if there is > 1, each image has _w1, _w2, etc...
if g.n_waves == 1:
type = ''
print("Generating w1 thumbnails")
generate_thumbnails(g, type)
else:
for i in range(1, g.n_waves + 1):
type = 'w' + str(i)
print("Generating {} thumbnails".format(type))
generate_thumbnails(g, type)
# one for each specific module
dx_types = []
if 'segment' in modules:
dx_types.append('binary')
if 'motility' in modules:
dx_types.append('motility')
if 'fecundity' in modules and g.species == 'Sma':
dx_types.append('filtered')
elif 'fecundity' in modules and g.species == 'Bma':
dx_types.append('binary')
for type in dx_types:
print("Generating {} thumbnails".format(type))
generate_thumbnails(g, type)