forked from h3abionet/h3agwas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvcf_in_bgen_merge_chro.nf
executable file
·132 lines (116 loc) · 3.78 KB
/
vcf_in_bgen_merge_chro.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env nextflow
/*
* Authors :
*
*
* Scott Hazelhurst
* jean-tristan Brandenburg
*
* On behalf of the H3ABionet Consortium
* 2015-2022
*
*
* Description : Nextflow pipeline to transform vcf file in plink and other format
*
*(C) University of the Witwatersrand, Johannesburg, 2016-2019 on behalf of the H3ABioNet Consortium
*This is licensed under the MIT Licence. See the "LICENSE" file for details
*/
//---- General definitions --------------------------------------------------//
import java.nio.file.Paths;
import sun.nio.fs.UnixPath;
import java.security.MessageDigest;
nextflow.enable.dsl = 1
def helps = [ 'help' : 'help' ]
allowed_params = ['file_listvcf', 'min_scoreinfo', "output_dir", "max_cores", "output", "bgen_bits", "mem_req", "genotype_field", "qctoolsv2_bin", "bcftools_bin", "score_imp", "bgen_type"]
params.mem_req = '10GB' // how much plink needs for this
params.output_dir="bgen/"
params.output="bgen"
params.file_listvcf=""
params.min_scoreinfo=0.6
params.max_cores = 8
params.genotype_field="GP"
params.qctoolsv2_bin="qctool"
params.bcftools_bin="bcftools"
params.score_imp="INFO"
params.bgen_type="bgen"
params.other_opt=""
params.bgen_bits=8
params.cut_hwe = 0
if(params.file_listvcf==""){
error('params.file_listvcf : file contains list vcf not found')
}
list_vcf=Channel.fromPath(file(params.file_listvcf).readLines())
if(params.cut_hwe>0){
process filter_vcf{
label 'py3utils'
cpus params.max_cores
memory params.mem_req
time params.big_time
input :
file(vcf) from list_vcf
output :
set env(chro), file("${Ent}"), file("${Ent}.csi") into list_vcf_filt
script :
Ent=vcf.baseName+"_filter.vcf.gz"
"""
vcftools --gzvcf $vcf --hwe ${params.cut_hwe} --recode --recode-INFO-all --stdout | ${params.bcftools_bin} view -i '${params.score_imp}>${params.min_scoreinfo}' -Oz --threads ${params.max_cores} > $Ent
${params.bcftools_bin} index $Ent
chro=`zcat $vcf|grep -v "#"|head -1|awk '{print \$1}'`
"""
}
}else{
process filter_vcfnohwe{
label 'py3utils'
cpus params.max_cores
memory params.mem_req
time params.big_time
input :
file(vcf) from list_vcf
output :
set env(chro), file("${Ent}"), file("${Ent}.csi") into list_vcf_filt
script :
Ent=vcf.baseName+"_filter.vcf.gz"
"""
${params.bcftools_bin} view -i '${params.score_imp}>${params.min_scoreinfo}' $vcf -Oz --threads ${params.max_cores} > $Ent
${params.bcftools_bin} index $Ent
chro=`zcat $vcf|grep -v "#"|head -1|awk '{print \$1}'`
"""
}
}
process formatvcfinbgen{
label 'py3utils'
time params.big_time
memory params.mem_req
cpus params.max_cores
publishDir "${params.output_dir}/bgen_chro", overwrite:true, mode:'copy'
input :
tuple val(chro),file(vcf), file("${Ent}.csi") from list_vcf_filt
output :
file("${out}.bgen") into list_bgen
file("${out}.sample") into list_bgen_sample
script :
out="${params.output}_${chro}"
"""
${params.qctoolsv2_bin} -g $vcf -vcf-genotype-field ${params.genotype_field} -ofiletype ${params.bgen_type} -og ${out}.bgen -filetype vcf -os ${out}.sample ${params.other_opt} -bgen-bits ${params.bgen_bits}
"""
}
lbgen=list_bgen.collect()
lsample=list_bgen_sample.collect()
process format_mergebgen{
memory params.mem_req
time params.big_time
label 'py3utils'
input :
path(lbgen) from lbgen
path(lbgen) from lsample
publishDir "${params.output_dir}/", overwrite:true, mode:'copy'
output :
file("${Ent}.bgen")
file("${Ent}.sample")
script :
Ent="${params.output}"
"""
filesample=`ls *.sample|head -1`
${params.qctoolsv2_bin} -g ${params.output}_#.bgen -ofiletype ${params.bgen_type} -og ${Ent}.bgen -filetype bgen -bgen-bits ${params.bgen_bits} -s \$filesample -os ${Ent}.sample
"""
}