forked from molgenis/NGS_DNA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
startFromVcf.sh
executable file
·125 lines (105 loc) · 4.83 KB
/
startFromVcf.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/bin/bash
set -e
set -u
module load NGS_DNA/3.5.2
module list
host=$(hostname -s)
environmentParameters="parameters_${host}"
function showHelp() {
#
# Display commandline help on STDOUT.
#
cat <<EOH
===============================================================================================================
Script to copy (sync) data from a succesfully finished analysis project from tmp to prm storage.
Usage:
$(basename $0) OPTIONS
Options:
-h Show this help.
-a sampleType (DNA or RNA) (default=DNA)
-g group (default=basename of ../../../ )
-f filePrefix (default=basename of this directory)
-r runID (default=run01)
-t tmpDirectory (default=basename of ../../ )
-w workdir (default=/groups/\${group}/\${tmpDirectory})
-c capturingKit
-v inputfile (vcf or vcf.gz)
===============================================================================================================
EOH
trap - EXIT
exit 0
}
while getopts "t:g:w:f:r:c:v:h" opt;
do
case $opt in h)showHelp;; t)tmpDirectory="${OPTARG}";; g)group="${OPTARG}";; w)workDir="${OPTARG}";; f)filePrefix="${OPTARG}";; c)capturingKit="${OPTARG}";; v)vcfFile="${OPTARG}";; r)runID="${OPTARG}";;
esac
done
if [[ -z "${capturingKit:-}" ]]; then echo -e '\nERROR: Must specify an capturingKit\n' ;showHelp ; exit 1 ; fi
if [[ -z "${vcfFile:-}" ]]; then echo -e '\nERROR: Must specify an inputFile (vcf)\n' ;showHelp ; exit 1 ; fi
if [[ -z "${tmpDirectory:-}" ]]; then tmpDirectory=$(basename $(cd ../../ && pwd )) ; fi ; echo "tmpDirectory=${tmpDirectory}"
if [[ -z "${group:-}" ]]; then group=$(basename $(cd ../../../ && pwd )) ; fi ; echo "group=${group}"
if [[ -z "${workDir:-}" ]]; then workDir="/groups/${group}/${tmpDirectory}" ; fi ; echo "workDir=${workDir}"
if [[ -z "${filePrefix:-}" ]]; then filePrefix=$(basename $(pwd )) ;fi ; echo "filePrefix=${filePrefix}"
if [[ -z "${runID:-}" ]]; then runID="run01" ; fi ; echo "runID=${runID}"
vcfExtension=${vcfFile##*.}
if [[ "${vcfExtension}" == "vcf" || "${vcfExtension}" == "gz" ]]
then
if [ "${vcfExtension}" == "gz" ]
then
inputVcf=${vcfFile%.*}
gzip -c -d "${vcfFile}" > "${inputVcf}"
else
inputVcf=${vcfFile}
fi
header=$(head -1 "${inputVcf}")
if [[ "${header}" == *fileformat=VCF* ]]
then
echo "valid vcf Format"
else
echo "ERROR:the header of the file does not contain vcf header, is the format correct?"
exit 0
fi
else
echo "ERROR:the extension of the file is not vcf, please select only vcf files"
exit 0
fi
## make samplesheet
sh ${EBROOTNGS_DNA}/scripts/convertVcfToSamplesheet.sh -i "${inputVcf}" -p "${filePrefix}" -c "${capturingKit}"
genScripts="${workDir}/generatedscripts/${filePrefix}/"
samplesheet="${genScripts}/${filePrefix}.csv"
build="b37"
species="homo_sapiens"
if [ -s build.txt ]; then build=$(cat build.txt);fi
if [ -s species.txt ];then species=$(cat species.txt); fi
sampleSize=$(cat "${genScripts}/${filePrefix}.csv" | wc -l) ; echo "Samplesize is ${sampleSize}"
batching="_chr"
echo "tmpName,${tmpDirectory}" > ${genScripts}/tmpdir_parameters.csv
perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${genScripts}/tmpdir_parameters.csv" > "${genScripts}/parameters_tmpdir_converted.csv"
perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/parameters.csv" > "${genScripts}/parameters_converted.csv"
perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/parameters_${group}.csv" > "${genScripts}/parameters_group_converted.csv"
perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/${environmentParameters}.csv" > "${genScripts}/parameters_environment_converted.csv"
echo "BATCHIDLIST=${EBROOTNGS_DNA}/batchIDList${batching}.csv"
ngsversion=$(module list | grep -o -P 'NGS_DNA(.+)')
projectJobsDir="${workDir}/projects/${filePrefix}/${runID}/jobs/"
projectResultsDir="${workDir}/projects/${filePrefix}/${runID}/results/"
intermediateDir="${workDir}/tmp/${filePrefix}/${runID}/"
projectLogsDir="${workDir}/logs/${filePrefix}/"
mkdir -p "${projectJobsDir}"
mkdir -p "${projectLogsDir}"
mkdir -p "${intermediateDir}"
mkdir -p "${projectResultsDir}"
cp "${samplesheet}" ${projectJobsDir}/${filePrefix}.csv
cp "${vcfFile}" "${intermediateDir}"
${EBROOTMOLGENISMINCOMPUTE}/molgenis_compute.sh \
-p "parameters_converted.csv" \
-p "${EBROOTNGS_DNA}/batchIDList${batching}.csv" -p "${projectJobsDir}/${filePrefix}.csv" -p "parameters_environment_converted.csv" -p "parameters_group_converted.csv" -p "parameters_tmpdir_converted.csv" \
-rundir "${projectJobsDir}" \
--header "${EBROOTNGS_DNA}/templates/slurm/header.ftl" \
--footer "${EBROOTNGS_DNA}/templates/slurm/footer.ftl" \
--submit "${EBROOTNGS_DNA}/templates/slurm/submit.ftl" \
-w "${EBROOTNGS_DNA}/workflow_startFromVcf.csv" \
-b slurm \
-g \
-weave \
-runid "${runID}" \
-o "ngsversion=${ngsversion};groupname=${group};inputVcf=${intermediateDir}/${vcfFile}"