From 1afdc0ba3c199260d85b22ded0b0f0a7089ac5aa Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Mon, 26 Mar 2018 10:31:05 +0200 Subject: [PATCH 1/7] fix coverageCalculations issue --- protocols/CoverageCalculations.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protocols/CoverageCalculations.sh b/protocols/CoverageCalculations.sh index eda80c2a..1fc7dfd3 100755 --- a/protocols/CoverageCalculations.sh +++ b/protocols/CoverageCalculations.sh @@ -78,7 +78,7 @@ then awk -v OFS='\t' '{print $1,$3}' "${sampleNameID}.${perTarget}.coveragePerTarget.sample_interval_summary" | sed '1d' > "${sampleNameID}.${perTarget}.coveragePerTarget.coveragePerTarget.txt.tmp.tmp" sort -V "${sampleNameID}.${perTarget}.coveragePerTarget.coveragePerTarget.txt.tmp.tmp" > "${sampleNameID}.${perTarget}.coveragePerTarget.coveragePerTarget.txt.tmp" - perl -pi -e 's|-|\^|' "${perTargetDir}/${perTarget}.genesOnly" > "${sampleNameID}.${perTarget}.coveragePerTarget.genesOnly.tmp" + perl -p -e 's|-|\^|' "${perTargetDir}/${perTarget}.genesOnly" > "${sampleNameID}.${perTarget}.coveragePerTarget.genesOnly.tmp" paste "${sampleNameID}.${perTarget}.coveragePerTarget.coveragePerTarget.txt.tmp" "${sampleNameID}.${perTarget}.coveragePerTarget.genesOnly.tmp" > "${sampleNameID}.${perTarget}.coveragePerTarget_inclGenes.txt" ##Paste command produces ^M character From 5f7269305133798a40f8ee8752e2ae380ba49c3d Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Mon, 26 Mar 2018 10:51:01 +0200 Subject: [PATCH 2/7] added check if bedfile exists on capturingkit part as the coverageper(base/target) part --- protocols/CreateInhouseProjects.sh | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/protocols/CreateInhouseProjects.sh b/protocols/CreateInhouseProjects.sh index ce22f771..152fbb2a 100755 --- a/protocols/CreateInhouseProjects.sh +++ b/protocols/CreateInhouseProjects.sh @@ -31,6 +31,9 @@ #string ngsversion #string ngsUtilsVersion +#string dataDir +#string inhouseIntervalsDir + #string project #string logsDir @@ -119,11 +122,31 @@ fi batching="_small" capturingKitProject=$(python ${EBROOTNGS_DNA}/scripts/getCapturingKit.py "${projectJobsDir}/${project}.csv") +captKit=$(echo "capturingKitProject" | awk 'BEGIN {FS="/"}{print $2}') + +if [ ! -d "${dataDir}/${capturingKitProject}" ] +then + echo "Bedfile does not exist! Exiting" + exit 1 +fi + if [[ "${capturingKitProject}" == *"Exoom"* || "${capturingKitProject}" == *"All_Exon_v1"* || "${capturingKitProject}" == *"wgs"* || "${capturingKitProject}" == *"WGS"* ]] then batching="_chr" + if [ ! -r "${coveragePerTargetDir}/${captKit}/${captKit}" ] + then + echo "Bedfile in ${coveragePerTargetDir} does not exist! Exiting" + exit 1 + fi +else + if [ ! -r "${coveragePerBaseDir}/${captKit}/${captKit}" ] + then + echo "Bedfile in ${coveragePerBaseDir} does not exist! Exiting" + exit 1 + fi fi + echo "BATCHIDLIST=${EBROOTNGS_DNA}/batchIDList${batching}.csv" sh "${EBROOTMOLGENISMINCOMPUTE}/molgenis_compute.sh" \ From ca5fc7c8b24f570eb8abc700015f6d3187499534 Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Mon, 26 Mar 2018 10:53:45 +0200 Subject: [PATCH 3/7] added bedfile check to external samples too --- protocols/CreateExternSamplesProjects.sh | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/protocols/CreateExternSamplesProjects.sh b/protocols/CreateExternSamplesProjects.sh index 51c46623..79b31a86 100755 --- a/protocols/CreateExternSamplesProjects.sh +++ b/protocols/CreateExternSamplesProjects.sh @@ -33,6 +33,9 @@ #list lane #string ngsUtilsVersion +#string dataDir +#string inhouseIntervalsDir + set -e set -u @@ -114,9 +117,28 @@ extract_samples_from_GAF_list.pl --i "${worksheet}" --o "${projectJobsDir}/${pro batching="_small" capturingKitProject=$(python ${EBROOTNGS_DNA}/scripts/getCapturingKit.py "${projectJobsDir}/${project}.csv") +captKit=$(echo "capturingKitProject" | awk 'BEGIN {FS="/"}{print $2}') + +if [ ! -d "${dataDir}/${capturingKitProject}" ] +then + echo "Bedfile does not exist! Exiting" + exit 1 +fi + if [[ "${capturingKitProject}" == *"Exoom"* || "${capturingKitProject}" == *"All_Exon_v1"* || "${capturingKitProject}" == *"wgs"* || "${capturingKitProject}" == *"WGS"* ]] then - batching="_chr" + batching="_chr" + if [ ! -r "${coveragePerTargetDir}/${captKit}/${captKit}" ] + then + echo "Bedfile in ${coveragePerTargetDir} does not exist! Exiting" + exit 1 + fi +else + if [ ! -r "${coveragePerBaseDir}/${captKit}/${captKit}" ] + then + echo "Bedfile in ${coveragePerBaseDir} does not exist! Exiting" + exit 1 + fi fi if [ -f .compute.properties ]; From 511671dcd893286e177ea86667bbe17e273398e4 Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Mon, 26 Mar 2018 11:01:19 +0200 Subject: [PATCH 4/7] trigger --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 7155bf42..c359de52 100755 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ The bwa-mem command from Burrows-Wheeler Aligner(BWA) [[2]](#r2) is used to alig The GATK [[4]](#r4) HaplotypeCaller estimates the most likely genotypes and allele frequencies in an alignment using a Bayesian likelihood model for every position of the genome regardless of whether a variant was detected at that site or not. This information can later be used in the project based genotyping step. A joint analysis has been performed of all the samples in the project. This leads to a posterior probability of a variant allele at a site. SNPs and small Indels are written to a VCF file, along with information such as genotype quality, allele frequency, strand bias and read depth for that SNP/Indel. Based on quality thresholds from the GATK "best practices" [[5]](#r5). The SNPs and indels are filtered and marked as Lowqual or Pass resulting in a final VCF file. + ### References 1. Andrews S. (2010). FastQC: a quality control tool for high throughput sequence data. Available online at:http://www.bioinformatics.babraham.ac.uk/projects/fastqc From 58f17393502ab9ac1f188c179995987f2676c0f5 Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Mon, 26 Mar 2018 11:28:19 +0200 Subject: [PATCH 5/7] bugfix in checking the bedfile --- protocols/CreateExternSamplesProjects.sh | 2 +- protocols/CreateInhouseProjects.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/protocols/CreateExternSamplesProjects.sh b/protocols/CreateExternSamplesProjects.sh index 79b31a86..116f8611 100755 --- a/protocols/CreateExternSamplesProjects.sh +++ b/protocols/CreateExternSamplesProjects.sh @@ -116,7 +116,7 @@ extract_samples_from_GAF_list.pl --i "${worksheet}" --o "${projectJobsDir}/${pro batching="_small" -capturingKitProject=$(python ${EBROOTNGS_DNA}/scripts/getCapturingKit.py "${projectJobsDir}/${project}.csv") +capturingKitProject=$(python ${EBROOTNGS_DNA}/scripts/getCapturingKit.py "${projectJobsDir}/${project}.csv" | sed 's|\\||' ) captKit=$(echo "capturingKitProject" | awk 'BEGIN {FS="/"}{print $2}') if [ ! -d "${dataDir}/${capturingKitProject}" ] diff --git a/protocols/CreateInhouseProjects.sh b/protocols/CreateInhouseProjects.sh index 152fbb2a..7e2e6bd2 100755 --- a/protocols/CreateInhouseProjects.sh +++ b/protocols/CreateInhouseProjects.sh @@ -121,7 +121,7 @@ fi batching="_small" -capturingKitProject=$(python ${EBROOTNGS_DNA}/scripts/getCapturingKit.py "${projectJobsDir}/${project}.csv") +capturingKitProject=$(python ${EBROOTNGS_DNA}/scripts/getCapturingKit.py "${projectJobsDir}/${project}.csv" | sed 's|\\||') captKit=$(echo "capturingKitProject" | awk 'BEGIN {FS="/"}{print $2}') if [ ! -d "${dataDir}/${capturingKitProject}" ] From db794eb066c6c3f645a8e748d5f3b834f928da82 Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Mon, 26 Mar 2018 11:34:59 +0200 Subject: [PATCH 6/7] added missing variables --- protocols/CreateExternSamplesProjects.sh | 3 ++- protocols/CreateInhouseProjects.sh | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/protocols/CreateExternSamplesProjects.sh b/protocols/CreateExternSamplesProjects.sh index 116f8611..03be3525 100755 --- a/protocols/CreateExternSamplesProjects.sh +++ b/protocols/CreateExternSamplesProjects.sh @@ -34,7 +34,8 @@ #string ngsUtilsVersion #string dataDir -#string inhouseIntervalsDir +#string coveragePerBaseDir +#string coveragePerTargetDir set -e set -u diff --git a/protocols/CreateInhouseProjects.sh b/protocols/CreateInhouseProjects.sh index 7e2e6bd2..0409ad7e 100755 --- a/protocols/CreateInhouseProjects.sh +++ b/protocols/CreateInhouseProjects.sh @@ -32,7 +32,9 @@ #string ngsUtilsVersion #string dataDir -#string inhouseIntervalsDir + +#string coveragePerBaseDir +#string coveragePerTargetDir #string project #string logsDir From ad5d8077cf78fcf3813603a882b820c18fb60234 Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Mon, 26 Mar 2018 12:18:38 +0200 Subject: [PATCH 7/7] check if exists --- protocols/CreateExternSamplesProjects.sh | 4 ++-- protocols/CreateInhouseProjects.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/protocols/CreateExternSamplesProjects.sh b/protocols/CreateExternSamplesProjects.sh index 03be3525..9bfa8881 100755 --- a/protocols/CreateExternSamplesProjects.sh +++ b/protocols/CreateExternSamplesProjects.sh @@ -129,13 +129,13 @@ fi if [[ "${capturingKitProject}" == *"Exoom"* || "${capturingKitProject}" == *"All_Exon_v1"* || "${capturingKitProject}" == *"wgs"* || "${capturingKitProject}" == *"WGS"* ]] then batching="_chr" - if [ ! -r "${coveragePerTargetDir}/${captKit}/${captKit}" ] + if [ ! -e "${coveragePerTargetDir}/${captKit}/${captKit}" ] then echo "Bedfile in ${coveragePerTargetDir} does not exist! Exiting" exit 1 fi else - if [ ! -r "${coveragePerBaseDir}/${captKit}/${captKit}" ] + if [ ! -e "${coveragePerBaseDir}/${captKit}/${captKit}" ] then echo "Bedfile in ${coveragePerBaseDir} does not exist! Exiting" exit 1 diff --git a/protocols/CreateInhouseProjects.sh b/protocols/CreateInhouseProjects.sh index 0409ad7e..6b4193c6 100755 --- a/protocols/CreateInhouseProjects.sh +++ b/protocols/CreateInhouseProjects.sh @@ -135,13 +135,13 @@ fi if [[ "${capturingKitProject}" == *"Exoom"* || "${capturingKitProject}" == *"All_Exon_v1"* || "${capturingKitProject}" == *"wgs"* || "${capturingKitProject}" == *"WGS"* ]] then batching="_chr" - if [ ! -r "${coveragePerTargetDir}/${captKit}/${captKit}" ] + if [ ! -e "${coveragePerTargetDir}/${captKit}/${captKit}" ] then echo "Bedfile in ${coveragePerTargetDir} does not exist! Exiting" exit 1 fi else - if [ ! -r "${coveragePerBaseDir}/${captKit}/${captKit}" ] + if [ ! -e "${coveragePerBaseDir}/${captKit}/${captKit}" ] then echo "Bedfile in ${coveragePerBaseDir} does not exist! Exiting" exit 1