-
Notifications
You must be signed in to change notification settings - Fork 0
/
variant_call.sh
executable file
·157 lines (130 loc) · 4.71 KB
/
variant_call.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
if [ $# -ne 2 ] ; then
echo "Error !!! Requires 2 arguments. $# argument(s) provided. Exiting ... (ERR_CODE: 1000)"
exit 1000
fi
declare freebayes_path
declare samtools_path
declare bwa_path
declare trim_galore_path
declare trim_galore_cores
declare bwa_mem_cores
declare samtools_cores
declare script_path
declare input_folder
declare input_id
declare input_count
declare output_folder
declare output_path
declare -a files
declare -a input_ids
script_path="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )/"
source $script_path"config.sh"
input_folder=$1
output_folder=$2
if [ ! -d "$input_folder" ] ; then
echo "Error !!! Invalid 'input_folder' given. Exiting ... (ERR_CODE: 1001)"
exit 1001
fi
if [ ! -d "$output_folder" ] ; then
echo "Error !!! Invalid 'output_folder' given. Exiting ... (ERR_CODE: 1002)"
exit 1002
fi
if [ "${input_folder[@]: -1: 1}" != "/" ] ; then
input_folder=$input_folder"/"
fi
if [ "${output_folder[@]: -1: 1}" != "/" ] ; then
output_folder=$output_folder"/"
fi
# files=(`ls $input_folder*.fastq.gz`)
files=(`find $input_folder -maxdepth 1 -type f -name "*.fastq.gz"`)
for ((i=0,j=0; i<${#files[@]}; ++i)) ; do
declare temp
temp=${files[$i]##*/}
temp=${temp%%.*}
temp=${temp%_*}
if [[ ! " ${input_ids[*]} " =~ " $temp " ]]; then
input_ids[$j]=$temp
((j=j+1))
fi
# echo $j" => "$temp" => "${input_ids[j-1]}
unset temp
done
input_count=${#input_ids[@]}
if [[ $input_count -eq 0 ]] ; then
echo "Error !!! No input .fastq files. Exiting ... (ERR_CODE: 1001)"
exit 1001
fi
echo "Running with following arguments:"
echo "script_path = "$script_path
echo "input_folder = "$input_folder
echo "Sample IDs = {"${input_ids[@]}"}"
echo "output_folder = "$output_folder
echo "reference_genome = "$script_path"data/GCF_000195955.2_ASM19595v2_genomic.fna"
echo "freebayes_path = "$freebayes_path
echo "samtools_path = "$samtools_path
echo "bwa_path = "$bwa_path
echo "trim_galore_path = "$trim_galore_path
echo "trim_galore_cores = "$trim_galore_cores
echo "bwa_mem_cores = "$bwa_mem_cores
echo "samtools_cores = "$samtools_cores
echo ""
((k=1))
for input_id in ${input_ids[@]} ; do
output_path=$output_folder$input_id"/"
echo "($k / $input_count) Running for $input_id (output_path = $output_path) ..."
echo ""
echo "Doing mkdir(s) ..."
mkdir $output_path
mkdir $output_path"reference"
cp $script_path"data/GCF_000195955.2_ASM19595v2_genomic.fna" $output_path"reference"
echo "Done mkdir(s)"
echo ""
#quality preprocess
echo "Doing trim-galore ..."
$trim_galore_path --cores "$trim_galore_cores" --paired $input_folder$input_id\_1.fastq.gz $input_folder$input_id\_2.fastq.gz --output_dir $output_path >/dev/null 2>&1
echo "Done trim-galore. Exit status $?"
echo ""
#indexing reference
echo "Doing bwa index ..."
$bwa_path index $output_path"reference/GCF_000195955.2_ASM19595v2_genomic.fna" >/dev/null 2>&1
echo "Done bwa index. Exit status $?"
echo ""
#alignment
echo "Doing bwa mem ..."
$bwa_path mem -t "$bwa_mem_cores" $output_path"reference/GCF_000195955.2_ASM19595v2_genomic.fna" -R "@RG\tID:$input_id\_\tSM:$input_id\_\tPL:ILLUMINA\tLB:$input_id\_" $output_path$input_id\_1_val_1.fq.gz $output_path$input_id\_2_val_2.fq.gz | samtools sort "-@"$samtools_cores -o $output_path$input_id.bam
echo "Done bwa mem. Exit status $?"
echo ""
#indexing the aligned bam
echo "Doing samtools index ..."
$samtools_path index $output_path$input_id.bam >/dev/null 2>&1
echo "Done samtools index. Exit status $?"
echo ""
#sorting the bam on names of the reads
echo "Doing samtools sort ..."
$samtools_path sort -n "-@"$samtools_cores -o $output_path$input_id\_namesort.bam $output_path$input_id.bam >/dev/null 2>&1
echo "Done samtools sort. Exit status $?"
echo ""
#fix the paired mates
echo "Doing samtools fixmate ..."
$samtools_path fixmate -m "-@"$samtools_cores $output_path$input_id\_namesort.bam $output_path$input_id\_fix.bam >/dev/null 2>&1
echo "Done samtools fixmate. Exit status $?"
echo ""
#sorting the bam on positions
echo "Doing samtools sort ..."
$samtools_path sort "-@"$samtools_cores -o $output_path$input_id\_positionsort.bam $output_path$input_id\_fix.bam >/dev/null 2>&1
echo "Done samtools sort. Exit status $?"
echo ""
#marking the duplicates
echo "Doing samtools markdup ..."
$samtools_path markdup "-@"$samtools_cores $output_path$input_id\_positionsort.bam $output_path$input_id\_markdup.bam >/dev/null 2>&1
echo "Done markdup. Exit status $?"
echo ""
#variant calling (ploidy 1)
echo "Doing freebayes ..."
$freebayes_path -f $output_path"reference/GCF_000195955.2_ASM19595v2_genomic.fna" -p 1 $output_path$input_id\_markdup.bam > $output_path$input_id.vcf
echo "Done freebayes. Exit status $?"
echo ""
echo "($k / $input_count) Done for $input_id ..."
echo ""
((k=$k+1))
done