-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.yoUMAP_vocalizations.bds
executable file
·232 lines (172 loc) · 8.06 KB
/
.yoUMAP_vocalizations.bds
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#!/usr/bin/env bds
#-------------------------------------------------------------------
#set up default values for all incomig flags
#are automatically overwritten by supplied values
#-------------------------------------------------------------------
string e = "Not_A_Dir" #experimental directory variable
int n = 1 #default number of cores per task
string c = ppwd + '/.Defaults.config' #path to the config file
#-------------------------------------------------------------------
#save all your usefull paths to prevent grief later
#-------------------------------------------------------------------
#Top Level
experiment := e
pipe_modules := ppwd + '/.Pipe_Modules'
default_config := ppwd + '/.Defaults.config'
#Input Folders
inputs := experiment + 'Raw_Inputs/'
#Output folders, we'll make these on the fly later
segmented_songs := experiment + 'Segmented_Songs/'
segmented_syllables := experiment + 'Segmented_Syllables/'
clustered_syllables := experiment + 'Clustered_Syllables/'
sequenced_syllables := experiment + 'Sequenced_Syllables/'
#-------------------------------------------------------------------
#Get a list of all the input animal and parallelize
#song segmentation across them
#-------------------------------------------------------------------
string[] input_samples = inputs.dir() #list of sample names (not paths)
par{
for( string sample : input_samples ){
#---------------------------------------------------------------
#Handle the paths for this sample
#---------------------------------------------------------------
#build the path
sample_path := inputs + sample + '/'
#get the paths to the data files based on a glob
wav_list := sample_path.dirPath('*.wav')
wav_file := wav_list.join('__SPLIT__')
#set up the intermediate dirs for the files
seg_song_dest := segmented_songs + sample + '/'
animal_dest := seg_song_dest + sample + '/'
seg_song_wav := animal_dest + 'wavs/'
seg_song_csv := animal_dest + 'csv/'
#make the dir
if (!animal_dest.isDir()) {animal_dest.mkdir()}
if (!seg_song_wav.isDir()) {seg_song_wav.mkdir()}
if (!seg_song_csv.isDir()) {seg_song_csv.mkdir()}
#---------------------------------------------------------------
#Run the tasks
#---------------------------------------------------------------
#info for the task
task_seg_song := 'Segmenting songs: ' + sample
#define the chk file
song_chk := seg_song_dest + 'song_chk.txt'
#Task for segmenting songs
string Song_tid = task(taskName := task_seg_song, song_chk <- wav_list, cpus := n){
#provide some feedback
sys echo $task_seg_song
#fill the chk file
sys date >> $song_chk
#Run the module
sys python3 $pipe_modules/segment_songs.py -i "$wav_file" -o $seg_song_dest -s $sample -n $n -c $c
}
}
}
#Let all tasks finish
wait
#-------------------------------------------------------------------
#list of all the animals w/ segmented songs and
#parallelize syllable segmentation
#-------------------------------------------------------------------
string[] song_samples = segmented_songs.dir()
par{
for( string sample : song_samples ){
#---------------------------------------------------------------
#Handle the paths for this sample
#---------------------------------------------------------------
#build the path
sample_path := segmented_songs + sample + '/' + sample + '/wavs/'
#get the paths to the data files based on a glob
wav_list := sample_path.dirPath('*.wav')
wav_file := wav_list.join('__SPLIT__')
#set up the output dirs for the files
out_hdf5 := segmented_syllables + sample + '/' + sample + '_segmented_syllables.hdf5'
out_dest := segmented_syllables + sample+ '/'
#make the hdf5 (the demo code premade it, so I will too)
if (!out_dest.isDir()) {out_dest.mkdir()}
#---------------------------------------------------------------
#Run the task
#---------------------------------------------------------------
#info for the task
task_seg_syls := 'Segmenting syllables: ' + sample
#Task for segmenting syllables
string Syll_tid = task(taskName := task_seg_syls, out_hdf5 <- wav_list, cpus := n){
#provide some feedback
sys echo $task_seg_syls
#Run the module
sys python3 $pipe_modules/segment_syllables.py -i "$wav_file" -o $out_hdf5 -s $sample -n $n -c $c
}
}
}
#Let finish
wait
#-------------------------------------------------------------------
#list of all the syllable hdf5s which need embedding, clustering, and sequencing.
#parallelize across them for animal level clustering
#save a list hdf5s for the experiment level clustering at the end
#-------------------------------------------------------------------
string[] hdf5_list #init list
string[] fully_segmeted_samples = segmented_syllables.dir()
par{
for( string sample : fully_segmeted_samples ){
#---------------------------------------------------------------
#Handle the paths for this sample
#---------------------------------------------------------------
#build the path
sample_path := segmented_syllables + sample
#get the paths to the data files based on a glob
hdf5_file := sample_path.dirPath('*.hdf5').join() #get file
hdf5_list += hdf5_file #append to list
#Set up the paths for the csvs
syl_dir := clustered_syllables + sample + '/'
syl_csv := syl_dir + sample + '_clustered_syllables.csv'
#make the output locations
if (!syl_dir.isDir()) {syl_dir.mkdir()}
#---------------------------------------------------------------
#Run the task
#---------------------------------------------------------------
#info for the task
task_cluster := 'Clustering and sequencing: ' + sample
#Task for clustering and sequencing
string Cluster_tid = task(taskName := task_cluster, syl_csv <- hdf5_file, cpus := n, canFail = true){
#provide some feedback
sys echo $task_cluster
#Run the module
sys python3 $pipe_modules/cluster_syllables.py -s $sample -i $hdf5_file -o $syl_csv -n $n -c $c
}
}
}
#Let finish
wait
#-------------------------------------------------------------------
#list all syl_csv and seq_csv files
#Pull them into R as dataframes and save list of dfs at Experiment/
#Handle in parallel
#-------------------------------------------------------------------
#glob all the paths
string[] syl_csv_list
#List of smaples to name the R dataframes
string[] sample_list
#list all samples that made it through clustering
string[] clustered_samples = clustered_syllables.dir()
for (string sample : clustered_samples) {
#Build the paths
syl_path := clustered_syllables + sample +'/'
syl_csv_list += syl_path.dirPath('*_clustered_syllables.csv')
#append list
if (syl_path.dirPath('*_clustered_syllables.csv').size() > 0){ sample_list += sample }
}
#join the lists
syl_csv_file := syl_csv_list.join('__SPLIT__')
samples := sample_list.join('__SPLIT__')
#set up the output paths
syl_rds := experiment + 'yoUMAPped_Syllables.rds'
#info for the tasks
task_agg_syl := 'Aggregating Syllable Datasets'
#Aggregate the data in R.
#execute the task for the syllables
string Agg_syl_tid = task(taskName := task_agg_syl, syl_rds <- syl_csv_list, cpus := n){
#provide some feedback
sys echo $task_agg_syl
sys Rscript $pipe_modules/aggregate_outputs.R -i $syl_csv_file -o $syl_rds -n $n -s "$samples"
}