Skip to content

Commit

Permalink
Merge pull request #152 from RoanKanninga/master
Browse files Browse the repository at this point in the history
new style generate_template
  • Loading branch information
Gerbenvandervries authored Jul 25, 2017
2 parents 0fd57fd + ca8e5b2 commit a630a01
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 82 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
<h1>NGS_DNA pipeline</h1>

<h2>Manual</h2>
Find manual on installation and use at https://molgenis.gitbooks.io/molgenis-pipelines/

Expand All @@ -7,6 +8,7 @@ The sequencer is producing reads (in FastQ format) and are aligned to the hg19 r
Sambamba (Tarasov et al.<sup>2</sup>) is processing the aligned reads and then we applied GATK (McKenna et al. <sup>3</sup>) duplicate removal,
performed SNP and INDEL discovery and genotyping using standard hard filtering parameters to GATK Best Practices recommendations (Van der Auwera et al.<sup>4</sup>)


<h3>References</h3>
1. Li Durbin, Fast and accurate short read alignment with Burrows-Wheeler transform.
2. Sambamba: Fast processing of NGS alignment formats
Expand Down
157 changes: 86 additions & 71 deletions generate_template.sh
Original file line number Diff line number Diff line change
@@ -1,94 +1,109 @@
#!/bin/bash

module load NGS_DNA/3.4.1
module list
HOST=$(hostname -s)
thisDir=$(pwd)
module list
host=$(hostname -s)
environmentParameters="parameters_${host}"

function showHelp() {
#
# Display commandline help on STDOUT.
#
cat <<EOH
===============================================================================================================
Script to copy (sync) data from a succesfully finished analysis project from tmp to prm storage.
Usage:
$(basename $0) OPTIONS
Options:
-h Show this help.
-a sampleType (DNA or RNA) (default=DNA)
-b build (default=b37)
-c batch (_chr or _small) (default=_chr)
-g group (default=basename of ../../../ )
-p project (default=basename of this directory)
-r runID (default=run01)
-s species (default=homo_sapiens)
-t tmpDirectory (default=basename of ../../ )
-w workdir (default=/groups/\${group}/\${tmpDirectory})
===============================================================================================================
EOH
trap - EXIT
exit 0
}


while getopts "t:g:w:p:r:s:b:c:h" opt;
do
case $opt in h)showHelp;; t)tmpDirectory="${OPTARG}";; g)group="${OPTARG}";; w)workDir="${OPTARG}";; p)project="${OPTARG}";; r)runID="${OPTARG}";; s)species="${OPTARG}";; b)build="${OPTARG}";; c)batch="${OPTARG}";;
esac
done

if [[ -z "${tmpDirectory:-}" ]]; then tmpDirectory=$(basename $(cd ../../ && pwd )) ; fi ; echo "tmpDirectory=${tmpDirectory}"
if [[ -z "${group:-}" ]]; then group=$(basename $(cd ../../../ && pwd )) ; fi ; echo "group=${group}"
if [[ -z "${workDir:-}" ]]; then workDir="/groups/${group}/${tmpDirectory}" ; fi ; echo "workDir=${workDir}"
if [[ -z "${project:-}" ]]; then project=$(basename $(pwd )) ; fi ; echo "project=${project}"
if [[ -z "${runID:-}" ]]; then runID="run01" ; fi ; echo "runID=${runID}"
if [[ -z "${species:-}" ]]; then species="homo_sapiens" ; fi ; echo "species=${species}"
if [[ -z "${build:-}" ]]; then build="b37" ; fi ; echo "build=${build}"
if [[ -z "${batch:-}" ]]; then batch="_chr"; fi ; echo "batch=${batch}"

ENVIRONMENT_PARAMETERS="parameters_${HOST}.csv"
TMPDIRECTORY=$(basename $(cd ../../ && pwd ))
GROUP=$(basename $(cd ../../../ && pwd ))

PROJECT=projectXX
WORKDIR="/groups/${GROUP}/${TMPDIRECTORY}"
RUNID=runXX
SPECIES="homo_sapiens"
BUILD="b37"

## Normal user, please leave BATCH at _chr
## For expert modus: small batchsize (6) fill in '_small' or per chromosome fill in _chr
BATCH="_chr"

GENSCRIPTS="${WORKDIR}/generatedscripts/${PROJECT}"

samplesheet=${GENSCRIPTS}/${PROJECT}.csv
mac2unix $samplesheet
genScripts="${workDir}/generatedscripts/${project}/"
samplesheet="${genScripts}/${project}.csv" ; mac2unix "${samplesheet}"

python ${EBROOTNGS_DNA}/scripts/samplesize.py ${samplesheet} $thisDir
SAMPLESIZE=$(cat externalSampleIDs.txt | uniq | wc -l)
python "${EBROOTNGS_DNA}/scripts/samplesize.py" "${samplesheet}" $(pwd)
sampleSize=$(cat externalSampleIDs.txt | uniq | wc -l)
echo "Samplesize is ${sampleSize}"

python ${EBROOTNGS_DNA}/scripts/gender.py $samplesheet
var=$(cat ${samplesheet}.tmp | wc -l)
python "${EBROOTNGS_DNA}/scripts/gender.py" "${samplesheet}"
var=$(cat "${samplesheet}.tmp" | wc -l)


if [ $var != 0 ]
if [ "${var}" != 0 ]
then
mv ${samplesheet}.tmp ${samplesheet}
mv "${samplesheet}.tmp" "${samplesheet}"
echo "samplesheet updated with Gender column"
fi
echo "Samplesize is $SAMPLESIZE"

if [ $SAMPLESIZE -gt 199 ]
if [ $sampleSize -gt 199 ]
then
WORKFLOW=${EBROOTNGS_DNA}/workflow_samplesize_bigger_than_200.csv
workflow=${EBROOTNGS_DNA}/workflow_samplesize_bigger_than_200.csv
else
WORKFLOW=${EBROOTNGS_DNA}/workflow.csv
fi

if [ -f .compute.properties ];
then
rm .compute.properties
fi

if [ -f ${GENSCRIPTS}/out.csv ];
then
rm -rf ${GENSCRIPTS}/out.csv
workflow=${EBROOTNGS_DNA}/workflow.csv
fi

echo "tmpName,${TMPDIRECTORY}" > ${GENSCRIPTS}/tmpdir_parameters.csv

perl ${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl ${GENSCRIPTS}/tmpdir_parameters.csv > \
${GENSCRIPTS}/tmpdir_parameters_converted.csv

perl ${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl ${EBROOTNGS_DNA}/parameters.csv > \
${GENSCRIPTS}/out.csv

perl ${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl ${EBROOTNGS_DNA}/parameters_${GROUP}.csv > \
${GENSCRIPTS}/group_parameters.csv

perl ${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl ${EBROOTNGS_DNA}/${ENVIRONMENT_PARAMETERS} > \
${GENSCRIPTS}/environment_parameters.csv


sh $EBROOTMOLGENISMINCOMPUTE/molgenis_compute.sh \
-p ${GENSCRIPTS}/out.csv \
-p ${GENSCRIPTS}/group_parameters.csv \
-p ${GENSCRIPTS}/environment_parameters.csv \
-p ${GENSCRIPTS}/tmpdir_parameters_converted.csv \
-p ${EBROOTNGS_DNA}/batchIDList${BATCH}.csv \
-p ${GENSCRIPTS}/${PROJECT}.csv \
-w ${EBROOTNGS_DNA}/create_in-house_ngs_projects_workflow.csv \
-rundir ${GENSCRIPTS}/scripts \
--runid ${RUNID} \
-o "workflowpath=${WORKFLOW};\
outputdir=scripts/jobs;mainParameters=${GENSCRIPTS}/out.csv;\
group_parameters=${GENSCRIPTS}/group_parameters.csv;\
groupname=${GROUP};\
if [ -f "${genScripts}/out.csv" ];then rm -rf "${genScripts}/out.csv" ; fi

echo "tmpName,${tmpDirectory}" > ${genScripts}/tmpdir_parameters.csv
perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${genScripts}/tmpdir_parameters.csv" > "${genScripts}/tmpdir_parameters_converted.csv"
perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/parameters.csv" > "${genScripts}/out.csv"
perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/parameters_${group}.csv" > "${genScripts}/group_parameters.csv"
perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/${environmentParameters}.csv" > "${genScripts}/environment_parameters.csv"

echo "BATCHIDLIST=${EBROOTNGS_DNA}/batchIDList${batch}.csv"

sh "${EBROOTMOLGENISMINCOMPUTE}/molgenis_compute.sh" \
-p "${genScripts}/out.csv" \
-p "${genScripts}/group_parameters.csv" \
-p "${genScripts}/environment_parameters.csv" \
-p "${genScripts}/tmpdir_parameters_converted.csv" \
-p "${EBROOTNGS_DNA}/batchIDList${batch}.csv" \
-p "${genScripts}/${project}.csv" \
-w "${EBROOTNGS_DNA}/create_in-house_ngs_projects_workflow.csv" \
-rundir "${genScripts}/scripts" \
--runid "${runID}" \
-o workflowpath="${workflow};\
outputdir=scripts/jobs;mainParameters=${genScripts}/out.csv;\
group_parameters=${genScripts}/group_parameters.csv;\
groupname=${group};\
ngsversion=$(module list | grep -o -P 'NGS_DNA(.+)');\
environment_parameters=${GENSCRIPTS}/environment_parameters.csv;\
tmpdir_parameters=${GENSCRIPTS}/tmpdir_parameters_converted.csv;\
batchIDList=${EBROOTNGS_DNA}/batchIDList${BATCH}.csv;\
worksheet=${GENSCRIPTS}/${PROJECT}.csv" \
environment_parameters=${genScripts}/environment_parameters.csv;\
tmpdir_parameters=${genScripts}/tmpdir_parameters_converted.csv;\
batchIDList=${EBROOTNGS_DNA}/batchIDList${batch}.csv;\
worksheet=${genScripts}/${project}.csv" \
-weave \
--generate

4 changes: 2 additions & 2 deletions protocols/CreateExternSamplesProjects.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ set -e
set -u

umask 0007
module load $ngsUtilsVersion
module load $ngsversion
module load ${ngsUtilsVersion}
module load ${ngsversion}

module list
#
Expand Down
23 changes: 14 additions & 9 deletions test/test_pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,23 +57,28 @@ cp generate_template.sh ${workfolder}/generatedscripts/PlatinumSubset/generate_t
fgrep "computeVersion," parameters.csv > ${workfolder}/generatedscripts/PlatinumSubset/mcVersion.txt

NGS_DNA_VERSION=NGS_DNA/3.4.1
module load $NGS_DNA_VERSION
perl -pi -e "s|module load $NGS_DNA_VERSION|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|" ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh
perl -pi -e 's|PROJECT=projectXX|PROJECT=PlatinumSubset|' ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh
perl -pi -e 's|RUNID=runXX|RUNID=run01|' ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh
module load ${NGS_DNA_VERSION}
EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/

perl -pi -e "s|module load ${NGS_DNA_VERSION}|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|" ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh
echo "perl -pi -e |module load ${NGS_DNA_VERSION}|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/| ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh"
perl -pi -e 's|ngsversion=.*|ngsversion="test";\\|' ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh
perl -pi -e 's|create_in-house_ngs_projects_workflow.csv|create_external_samples_ngs_projects_workflow.csv|' ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh
perl -pi -e 's|sh \$EBROOTMOLGENISMINCOMPUTE/molgenis_compute.sh|module load Molgenis-Compute/dummy\nsh \$EBROOTMOLGENISMINCOMPUTE/molgenis_compute.sh|' ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh
perl -pi -e "s|module load Molgenis-Compute/dummy|module load Molgenis-Compute/\$mcVersion|" ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh
perl -pi -e 's|WORKFLOW=\${EBROOTNGS_DNA}/workflow.csv|WORKFLOW=\${EBROOTNGS_DNA}/test_workflow.csv|' ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh

perl -pi -e 's|workflow=\${EBROOTNGS_DNA}/workflow.csv|workflow=${EBROOTNGS_DNA}/test_workflow.csv|" ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh
cp test/PlatinumSubset.csv ${workfolder}/generatedscripts/PlatinumSubset/
cd ${workfolder}/generatedscripts/PlatinumSubset/
sh generate_template.sh
sh generate_template.sh
cd scripts
perl -pi -e 's|module load \$ngsversion|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/\n|' *.sh
###### Load a version of molgenis compute
perl -pi -e "s|module load test|module load ${NGS_DNA_VERSION}|
######
perl -pi -e "s|/apps/software/${NGS_DNA_VERSION}/|/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|g" *.sh
sh submit.sh
cd ${workfolder}/projects/PlatinumSubset/run01/jobs/
Expand All @@ -88,8 +93,8 @@ for i in $(ls s*_GenderCheck_1.sh); do touch $i.finished ; touch ${i%.*}.env; ch
for i in $(ls s*_GenderCalculate_1.sh); do touch $i.finished ; touch ${i%.*}.env; chmod 755 ${i%.*}.env ;done
printf "This is a male\n" > //groups/umcg-gaf//tmp04//tmp//PlatinumSubset/run01//PlatinumSample_NA12891.chosenSex.txt
printf "Male\n" >> //groups/umcg-gaf//tmp04//tmp//PlatinumSubset/run01//PlatinumSample_NA12891.chosenSex.txt
perl -pi -e 's|module load test|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|' s*_QCStats_*.sh
perl -pi -e 's|module load test|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|' s*_DecisionTree_*.sh
perl -pi -e 's|module load test|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|' s*_QCStats_*.sh
perl -pi -e 's|module load test|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|' s*_DecisionTree_*.sh
perl -pi -e 's|module load test|#|' s*_QCReport_0.sh
perl -pi -e 's|countShScripts-3\)\)|countShScripts-4))|' s*_CountAllFinishedFiles_0.sh
perl -pi -e 's|--time=16:00:00|--time=05:59:00|' *.sh
Expand Down

0 comments on commit a630a01

Please sign in to comment.