-
Notifications
You must be signed in to change notification settings - Fork 6
/
collectFile-tuple.nf
executable file
·60 lines (48 loc) · 1.76 KB
/
collectFile-tuple.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env nextflow
// How to deal with a process that has a large number of input files, say
// hundreds or thousands, for example when merging the outputs of previous tasks.
//
// - Use the Nextflow collectFile() operator.
// - Nextflow will not link the input files into the work execution directory.
// - Nextflow will create a metafile with in each line the full path name of an input file.
// - We can control the naming of the metafile.
// - The application in the process can use this metafile to know its inputs.
//
// This example is more complicated as it features collectFile() into separate groups
// by keys.
process star {
output: set val('star'), file('*.txt') into ch_f
script: 'for i in {00..14}; do echo "some F $i content" > f$i.txt; done'
}
process moon {
output: set val('moon'), file('*.txt') into ch_g
script: 'for i in {00..14}; do echo "some G $i content" > g$i.txt; done'
}
// In a more realistic example the files in ch_f would come from
// a process executed many times in parallel, similar for ch_g.
// We mimic this by using the transpose() operator below.
// This can be observed by running with -dump-channels input
ch_g.mix(ch_f)
.view()
.transpose()
.dump(tag: 'input')
.groupTuple()
.collectFile { id, files -> [ id, files.collect{ it.toString() }.join('\n') + '\n' ] }
.into{ch_report; ch_merge}
ch_report.println()
process merge {
publishDir "$baseDir/results", mode: 'copy'
input:
file metafile from ch_merge.view()
output:
file('*.output')
script:
basename = metafile.baseName
"""
cat $metafile | while read f; do
cat \$f
done > ${basename}.output
# more realistically, a script/program would e.g. execute
# my_merge_program -I $metafile
"""
}