forked from h3abionet/h3agwas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert_posversiongenome.nf
executable file
·158 lines (143 loc) · 4.75 KB
/
convert_posversiongenome.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/env nextflow
/*
* Authors :
*
*
* Scott Hazelhurst
* jean-tristan Brandenburg
*
* On behalf of the H3ABionet Consortium
* 2015-2022
*
*
* Description : Nextflow pipeline to transform vcf file in plink and other format
*
*(C) University of the Witwatersrand, Johannesburg, 2016-2019 on behalf of the H3ABioNet Consortium
*This is licensed under the MIT Licence. See the "LICENSE" file for details
*/
nextflow.enable.dsl = 1
import java.nio.file.Paths;
import sun.nio.fs.UnixPath;
import java.security.MessageDigest;
// Checks if the file exists
allowed_params = ['file_toconvert','file_ref_gzip', "output_dir","output", "input_dir", "input_pat"]
/*file to convert if*/
params.file_toconvert=""
params.link_gwas_cat="https://www.ebi.ac.uk/gwas/api/search/downloads/alternative"
params.head_rs="SNPS"
params.head_chr="CHR_ID"
params.head_bp="CHR_POS"
params.output_dir='gwascat'
params.sep="TAB"
params.rs_info=""
params.poshead_rs_inforef=2
params.poshead_bp_inforef=3
params.file_ref_gzip=""
params.link_rs_info="ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_b151_GRCh37p13/VCF/All_20180423.vcf.gz"
params.bin_crossmap="/usr/local/bin/CrossMap.py"
params.data_crossmap=''
params.link_data_crossmap='http://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/hg38ToHg19.over.chain.gz'
//wget ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/VCF/All_20180418.vcf.gz
if(params.file_toconvert==""){
process DlGwasCT{
publishDir "${params.output_dir}/datai/", overwrite:true, mode:'copy'
output :
file("${fileconvert}err")
file(fileconvert) into (file_convert_ch_ext, file_convert_ch)
script :
fileconverti="gwas_catalog_i.tsv"
fileconvert="gwas_catalog.tsv"
"""
wget -O $fileconverti ${params.link_gwas_cat} --no-check-certificate
check_colfile.py $fileconverti $fileconvert
"""
}
}else{
file_convert_ch=Channel.fromPath(params.file_toconvert)
file_convert_ch_ext=Channel.fromPath(params.file_toconvert)
}
if(params.file_ref_gzip==""){
process DlInfoRs{
output :
file(file_rsinfo) into (file_rsinfo_ch)
publishDir "${params.output_dir}/datai/", overwrite:true, mode:'copy'
script :
file_rsinfo="All_rs.vcf.gz"
"""
wget -O $file_rsinfo ${params.link_rs_info} --no-check-certificate
"""
}
}else{
file_rsinfo_ch=Channel.fromPath(params.file_ref_gzip)
}
process ExtractInfo{
label 'R'
input :
file(fileconvert) from file_convert_ch_ext
output :
file("${headout}.rs") into rs_convert
file("${headout}.pos") into pos_convert
script :
headout="search"
"""
cp_extractpos.r --file $fileconvert --out $headout --head_rs ${params.head_rs} --head_bp ${params.head_bp} --head_chr ${params.head_chr} --sep ${params.sep}
"""
}
process SearchPosWithRs{
input :
file(rsinfo) from file_rsinfo_ch
file(rs_convert) from rs_convert
publishDir "${params.output_dir}/tmpi/", overwrite:true, mode:'copy'
output :
file(outinfors) into outinfors_ch
script :
outinfors='info_extract.info'
"""
zcat $rsinfo | cp_searchposwithrs.py $rs_convert ${outinfors}.tmp ${params.poshead_rs_inforef}
awk '{if(NF>6){for(Cmt=7;Cmt<=NF;Cmt++)\$6=\$6";"\$Cmt};print \$1"\\t"\$2"\\t"\$3"\\t"\$4"\\t"\$5"\\t"\$6}' ${outinfors}.tmp > $outinfors
"""
}
if(params.data_crossmap==""){
process DlDataCrossMap{
publishDir "${params.output_dir}/datai/", overwrite:true, mode:'copy'
output :
file(fileout) into CrossMap_data_ch
script :
fileout=params.link_data_crossmap.split('/').last()
"""
wget -c ${params.link_data_crossmap} --no-check-certificate
"""
}
}else{
CrossMap_data_ch=Channel.fromPath(params.data_crossmap)
}
process CrossMapLaunch{
label 'py3utils'
input :
file(CrossMapRef) from CrossMap_data_ch
file(posI) from pos_convert
publishDir "${params.output_dir}/tmpi/", overwrite:true, mode:'copy'
output :
file(poscrossmap) into result_crossmap
script :
poscrossmap='convert_crossmap.cross'
"""
${params.bin_crossmap} bed $CrossMapRef $posI $poscrossmap".tmp"
awk '{if(NF>5){for(Cmt=6;Cmt<=NF;Cmt++)\$5=\$5";"\$Cmt};print \$1"\\t"\$2"\\t"\$3"\\t"\$4"\\t"\$5}' $poscrossmap".tmp" > $poscrossmap
"""
}
process MergeRes{
label 'R'
input :
file(crossmap) from result_crossmap
file(outinfors) from outinfors_ch
file(filetoconvert) from file_convert_ch
publishDir "${params.output_dir}/", overwrite:true, mode:'copy'
output :
file("$headout*")
script :
headout=params.output
"""
cp_mergepos.r --file $filetoconvert --out $headout --head_rs ${params.head_rs} --head_bp ${params.head_bp} --head_chr ${params.head_chr} --sep ${params.sep} --file_rsres $outinfors --file_cross $crossmap
"""
}