Skip to content

Commit

Permalink
Merge pull request #386 from molgenis/3.2.1
Browse files Browse the repository at this point in the history
3.2.1 to master
  • Loading branch information
freerkvandijk committed Dec 22, 2015
2 parents 06f79b0 + 8958849 commit dcb4894
Show file tree
Hide file tree
Showing 32 changed files with 430 additions and 226 deletions.
2 changes: 2 additions & 0 deletions compute5/NGS_DNA/batchIDList_NO.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
batchID
1
24 changes: 24 additions & 0 deletions compute5/NGS_DNA/batchIDList_chr.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
batchID
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
X
2 changes: 1 addition & 1 deletion compute5/NGS_DNA/generate_template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ PROJECT=projectXX
TMPDIR=tmpXX
WORKDIR="/groups/umcg-gaf/${TMPDIR}"
RUNID=runXX
## For small batchsize (6) leave BATCH empty, else choose _exome (10 batches) or _wgs (20 batches)
## For small batchsize (6) leave BATCH empty, _chr (per chrosomomes), _NO (1 batch), _exome (10 batches) or _wgs (20 batches)
BATCH=""

SAMPLESIZE=$(cat ${WORKDIR}/generatedscripts/${PROJECT}/${PROJECT}.csv | wc -l)
Expand Down
63 changes: 37 additions & 26 deletions compute5/NGS_DNA/parameters.csv
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,24 @@ checkStage,module list

### Tool versions ####
bwaVersion,BWA/0.7.12-goolf-1.7.20
computeVersion,v15.11.1-Java-1.8.0_45
computeVersion,v15.12.4-Java-1.8.0_45
cutadaptVersion,1.8.1-goolf-1.7.20-Python-2.7.9
dbNSFPVersion,2.7
dellyVersion,v0.7.1
fastqcVersion,FastQC/0.11.3-Java-1.7.0_80
gatkVersion,GATK/3.4-0-Java-1.7.0_80
javaVersion,Java/1.7.0_80
javaVersion,Java/1.8.0_45
picardVersion,picard/1.130-Java-1.7.0_80
rVersion,R/3.2.1-goolf-1.7.20
sambambaVersion,sambamba_v0.5.9
sambambaVersion,sambamba/v0.5.9-goolf-1.7.20
samtoolsVersion,SAMtools/1.2-goolf-1.7.20
snpEffVersion,snpEff/4.1g-Java-1.7.0_80
tabixVersion,tabix/0.2.6-goolf-1.7.20
molgenisAnnotatorVersion,CmdLineAnnotator/1.9.0-Java-1.8.0_45
hpoVersion,90
gatkJar,GenomeAnalysisTK.jar
picardJar,picard.jar
sambambaTool,sambamba_v0.5.9

##### GENERAL DIRECTORIES #####
dataDir,${root}/data/
Expand Down Expand Up @@ -59,7 +60,7 @@ indexFileDictionary,${indicesDir}/${indexFileIDPhiX}.dict
#### Prefixes ####
runPrefix,${sequencingStartDate}_${sequencer}_${run}_${flowcell}
filePrefix,${runPrefix}_L${lane}
sample,${intermediateDir}/${externalSampleID}
sampleNameID,${intermediateDir}/${externalSampleID}

#### INTERVALS ####
nameBed,captured
Expand Down Expand Up @@ -96,7 +97,7 @@ phiXPrefix,150504_WGSIM_9999_ZZZZZZZZXX
phiXEnd1Gz,${humanPhiXdir}/${phiXPrefix}/${phiXPrefix}_L9_ZZZZZZ_1.${rawFileExt}
phiXEnd2Gz,${humanPhiXdir}/${phiXPrefix}/${phiXPrefix}_L9_ZZZZZZ_2.${rawFileExt}

### Protocols 5, 6, 7, 8 and 9 (SamToBam, SortBam, MergeBam, MarkDuplicates, IndelRealignment) ###
### Protocols 5, 6, 7, 8 and 9 (SamToBam, SortBam, MergeBam, MarkDuplicates) ###
samToBamJar,SamFormatConverter
sortSamJar,SortSam
mergeSamFilesJar,MergeSamFiles
Expand All @@ -105,35 +106,45 @@ alignedSam,${fileWithIndexId}.sam
alignedBam,${fileWithIndexId}.bam
alignedSortedBam,${fileWithIndexId}.sorted.bam
alignedSortedBamIdx,${fileWithIndexId}.sorted.bai
sampleMergedBam,${sample}.merged.bam
sampleMergedBamIdx,${sample}.merged.bai
dedupBam,${sample}.merged.dedup.bam
dedupBamIdx,${sample}.merged.dedup.bam.bai
dedupMetrics,${sample}.merged.dedup.metrics,
sampleMergedBam,${sampleNameID}.merged.bam
sampleMergedBai,${sampleNameID}.merged.bai
sampleMergedBamIdx,${sampleNameID}.merged.bam.bai
dedupBam,${sampleNameID}.merged.dedup.bam
dedupBamIdx,${sampleNameID}.merged.dedup.bam.bai
dedupMetrics,${sampleNameID}.merged.dedup.metrics,
dedupBamCram,${sampleNameID}.merged.dedup.bam.cram
dedupBamCramIdx,${sampleNameID}.merged.dedup.bam.cram.bai
dedupBamCramBam,${sampleNameID}.merged.dedup.bam.cram.bam
KGPhase1IndelsVcf,${indicesDir}/indels/1000G_phase1.indels.b37.vcf
KGPhase1IndelsVcfIdx,${KGPhase1IndelsVcf}.idx
MillsGoldStandardDir,${indicesDir}/Mills_and_1000G_gold_standard/
MillsGoldStandardIndelsVcf,${MillsGoldStandardDir}/1000G_phase1.indels_Mills_and_1000G_gold_standard.indels.b37.human_g1k_v37.vcf
MillsGoldStandardChr1Intervals,${MillsGoldStandardDir}/1000G_phase1.indels_Mills_and_1000G_gold_standard.indels.b37.human_g1k_v37.chr1.intervals

### Protocols 11, 12, 13 and 14 (CheckSex, Delly, CoveragePerBase, SequonomConcordanceCheck, CollectBamMetrics) ###
whichSex,${sample}.chosenSex.txt
checkSexMeanCoverage,${sample}.checkSex.filter.meancoverage.txt
whichSex,${sampleNameID}.chosenSex.txt
checkSexMeanCoverage,${sampleNameID}.checkSex.filter.meancoverage.txt
capturedIntervals_nonAutoChrX,${intermediateDir}/${nameBed}.nonAutosomalChrX.interval_list
familyList,${sample}.familylist.txt
arrayMapFile,${sample}.concordance.map
sampleConcordanceFile,${sample}.concordance.ngsVSarray.txt
familyList,${sampleNameID}.familylist.txt
arrayMapFile,${sampleNameID}.concordance.map
sampleConcordanceFile,${sampleNameID}.concordance.ngsVSarray.txt
sequenomReport,${tmpDataDir}/rawdata/array/${project}_Sequenom_Report.txt
sequenomInfo,${sequenomDir}/Sequonome_SNPinfo.txt
collectMultipleMetricsJar,CollectMultipleMetrics
bamIndexStats,${dedupBam}.bam_index_stats
bamIndexStatsJar,BamIndexStats
gcBiasMetrics,${dedupBam}.gc_bias_metrics
gcBiasMetricsJar,CollectGcBiasMetrics
insertSizeMetrics,${dedupBam}.insert_size_metrics
hsMetricsJar,CalculateHsMetrics
hsMetrics,${dedupBam}.hs_metrics
hsMetricsNonAutosomalRegionChrX,${dedupBam}.nonAutosomalRegionChrX_hs_metrics
recreateInsertSizePdfR,createInsertSizePlot_c5.R
bamIndexStatsJar,BamIndexStats
projectDellyAnnotatorOutputVcf,${intermediateDir}/${project}.delly.snpeff.hpo.vcf
projectDellyAnnotatorOutputVcf,${sampleNameID}.delly.snpeff.hpo.vcf
collectBamMetricsPrefix,${intermediateDir}/${externalSampleID}.merged.dedup
hpoTerms,${hpoDir}/build.${hpoVersion}/ALL_SOURCES_TYPICAL_FEATURES_diseases_to_genes_to_phenotypes.txt
dellyVcf,${intermediateDir}/${project}.delly.vcf
dellyVcf,${sampleNameID}.delly.vcf
dellySnpEffVcf,${sampleNameID}.delly.snpeff.vcf
dellyTypeDEL,DEL
dellyTypeDUP,DUP
dellyTypeINV,INV
Expand All @@ -144,11 +155,11 @@ dbSNP137Vcf,${dbSNPDir}/dbsnp_137.b37.vcf
dbSNP137VcfIdx,${dbSNP137Vcf}.idx
dbSNPExSiteAfter129Vcf,${dbSNPDir}/dbsnp_137.b37.excluding_sites_after_129.vcf
dbSNPExSiteAfter129VcfIdx,${dbSNPExSiteAfter129Vcf}.idx
sampleBatchVariantCalls,${sample}.batch-${batchID}.variant.calls.g.vcf
sampleBatchVariantCalls,${sampleNameID}.batch-${batchID}.variant.calls.g.vcf
sampleBatchVariantCallsIdx,${sampleBatchVariantCalls}.idx
sampleBatchVariantCallsFemale,${sample}.batch-${batchID}.chrX.female.variant.calls.g.vcf
sampleBatchVariantCallsFemale,${sampleNameID}.batch-${batchID}.chrX.female.variant.calls.g.vcf
sampleBatchVariantCallsFemaleIdx,${sampleBatchVariantCallsFemale}.idx
sampleBatchVariantCallsMaleNONPAR,${sample}.batch-${batchID}.chrX.male.NONPAR.variant.calls.g.vcf
sampleBatchVariantCallsMaleNONPAR,${sampleNameID}.batch-${batchID}.chrX.male.NONPAR.variant.calls.g.vcf
sampleBatchVariantCallsMaleNONPARIdx,${sampleBatchVariantCallsMaleNONPAR}.idx
projectBatchCombinedVariantCalls,${projectPrefix}.batch-${batchID}.variant.calls.combined.g.vcf
projectBatchGenotypedVariantCalls,${projectPrefix}.batch-${batchID}.variant.calls.genotyped.vcf
Expand All @@ -163,12 +174,12 @@ snpEffGenesTxt,${projectPrefix}.snpEff.calls.genes.txt
snpEffCallsVcf,${projectPrefix}.calls.snpEff.vcf
snpEffCallsSortedVcf,${projectPrefix}.calls.snpEff.sorted.vcf
dbNSFP,${dbNSFPDir}/${dbNSFPVersion}/dbNSFP${dbNSFPVersion}.txt.gz
dbNSFPSampleVcf,${sample}.snpEff.annotated.snps.dbnsfp.vcf
dbNSFPSampleVcf,${sampleNameID}.snpEff.annotated.snps.dbnsfp.vcf
variantAnnotatorOutputVcf,${projectPrefix}.snpEff.annotated.vcf
variantAnnotatorSampleOutputIndelsVcf,${sample}.snpEff.annotated.indels.vcf
variantAnnotatorSampleOutputSnpsVcf,${sample}.snpEff.annotated.snps.vcf
variantAnnotatorSampleOutputIndelsFilteredVcf,${sample}.snpEff.annotated.filtered.indels.vcf
variantAnnotatorSampleOutputSnpsFilteredVcf,${sample}.snpEff.annotated.filtered.snps.vcf
variantAnnotatorSampleOutputIndelsVcf,${sampleNameID}.snpEff.annotated.indels.vcf
variantAnnotatorSampleOutputSnpsVcf,${sampleNameID}.snpEff.annotated.snps.vcf
variantAnnotatorSampleOutputIndelsFilteredVcf,${sampleNameID}.snpEff.annotated.filtered.indels.vcf
variantAnnotatorSampleOutputSnpsFilteredVcf,${sampleNameID}.snpEff.annotated.filtered.snps.vcf

### Protocols 24 and 25 (VcfToTable, QCReport) ###
variantsFinalProjectVcfTable,${projectPrefix}.final.vcf.table
Expand Down
13 changes: 7 additions & 6 deletions compute5/NGS_DNA/protocols/CollectBamIndexMetrics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,26 @@
#string bamIndexStatsJar
#string dedupBam
#string dedupBamIdx
#string collectBamMetricsPrefix
#string tempDir
#string capturingKit
#string picardJar
#string bamIndexStats
#string project

#Load Picard module
${stage} ${picardVersion}

makeTmpDir ${collectBamMetricsPrefix}
tmpCollectBamMetricsPrefix=${MC_tmpFile}
makeTmpDir ${bamIndexStats}
tmpBamIndexStats=${MC_tmpFile}


#Run Picard BamIndexStats
java -jar -Xmx4g ${EBROOTPICARD}/${picardJar} ${bamIndexStatsJar} \
INPUT=${dedupBam} \
VALIDATION_STRINGENCY=LENIENT \
TMP_DIR=${tempDir} \
> ${tmpCollectBamMetricsPrefix}.bam_index_stats
> ${tmpBamIndexStats}

echo -e "\nBamIndexStats finished succesfull. Moving temp files to final.\n\n"
mv ${tmpCollectBamMetricsPrefix}.bam_index_stats ${dedupBam}.bam_index_stats
mv ${tmpBamIndexStats} ${bamIndexStats}
echo "moved ${tmpBamIndexStats} to ${bamIndexStats}"

21 changes: 12 additions & 9 deletions compute5/NGS_DNA/protocols/CollectGCBiasMetrics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#string capturingKit
#string seqType
#string picardJar
#string insertSizeMetrics
#string gcBiasMetrics
#string project

#Load Picard module
${stage} ${picardVersion}
Expand All @@ -25,30 +28,30 @@ ${stage} ${rVersion}
${stage} ngs-utils
${checkStage}

makeTmpDir ${collectBamMetricsPrefix}
tmpCollectBamMetricsPrefix=${MC_tmpFile}
makeTmpDir ${gcBiasMetrics}
tmpGcBiasMetrics=${MC_tmpFile}

#Run Picard GcBiasMetrics
java -XX:ParallelGCThreads=4 -jar -Xmx4g ${EBROOTPICARD}/${picardJar} ${gcBiasMetricsJar} \
R=${indexFile} \
I=${dedupBam} \
O=${tmpCollectBamMetricsPrefix}.gc_bias_metrics \
CHART=${tmpCollectBamMetricsPrefix}.gc_bias_metrics.pdf \
O=${tmpGcBiasMetrics} \
CHART=${tmpGcBiasMetrics}.pdf \
VALIDATION_STRINGENCY=LENIENT \
TMP_DIR=${tempDir}

echo -e "\nGcBiasMetrics finished succesfull. Moving temp files to final.\n\n"
mv ${tmpCollectBamMetricsPrefix}.gc_bias_metrics ${dedupBam}.gc_bias_metrics
mv ${tmpCollectBamMetricsPrefix}.gc_bias_metrics.pdf ${dedupBam}.gc_bias_metrics.pdf
mv ${tmpGcBiasMetrics} ${gcBiasMetrics}
mv ${tmpGcBiasMetrics}.pdf ${gcBiasMetrics}.pdf

######IS THIS STILL NEEDED, IMPROVEMENTS/UPDATES TO BE DONE?#####
#Create nicer insertsize plots if seqType is PE
#if [ "${seqType}" == "PE" ]
#then
# Overwrite the PDFs that were just created by nicer onces:
${recreateInsertSizePdfR} \
--insertSizeMetrics ${dedupBam}.insert_size_metrics \
--pdf ${dedupBam}.insert_size_histogram.pdf
${recreateInsertSizePdfR} \
--insertSizeMetrics ${insertSizeMetrics} \
--pdf ${insertSizeMetrics}.pdf

#else
# Don't do insert size analysis because seqType != "PE"
Expand Down
30 changes: 16 additions & 14 deletions compute5/NGS_DNA/protocols/CollectHSMetrics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,45 @@
#string checkStage
#string picardVersion
#string hsMetricsJar
#string hsMetrics
#string dedupBam
#string dedupBamIdx
#string collectBamMetricsPrefix
#string tempDir
#string recreateInsertSizePdfR
#string capturedIntervals
#string capturingKit
#string picardJar
#string project

#Load Picard module
${stage} ${picardVersion}

makeTmpDir ${collectBamMetricsPrefix}
tmpCollectBamMetricsPrefix=${MC_tmpFile}
makeTmpDir ${hsMetrics}
tmpHsMetrics=${MC_tmpFile}

#Run Picard HsMetrics if capturingKit was used
if [ "${capturingKit}" != "None" ]
then
java -jar -Xmx4g ${EBROOTPICARD}/${picardJar} ${hsMetricsJar} \
INPUT=${dedupBam} \
OUTPUT=${tmpCollectBamMetricsPrefix}.hs_metrics \
OUTPUT=${tmpHsMetrics} \
BAIT_INTERVALS=${capturedIntervals} \
TARGET_INTERVALS=${capturedIntervals} \
VALIDATION_STRINGENCY=LENIENT \
TMP_DIR=${tempDir}

else
echo "## net.sf.picard.metrics.StringHeader" > ${tmpCollectBamMetricsPrefix}.hs_metrics
echo "#" >> ${tmpCollectBamMetricsPrefix}.hs_metrics
echo "## net.sf.picard.metrics.StringHeader" >> ${tmpCollectBamMetricsPrefix}.hs_metrics
echo "#" >> ${tmpCollectBamMetricsPrefix}.hs_metrics
echo "" >> ${tmpCollectBamMetricsPrefix}.hs_metrics
echo "## METRICS CLASS net.sf.picard.analysis.directed.HsMetrics" >> ${tmpCollectBamMetricsPrefix}.hs_metrics
echo "BAIT_SET GENOME_SIZE BAIT_TERRITORY TARGET_TERRITORY BAIT_DESIGN_EFFICIENCY TOTAL_READS PF_READS PF_UNIQUE_READS PCT_PF_READS PCT_PF_UQ_READS PF_UQ_READS_ALIGNED PCT_PF_UQ_READS_ALIGNED PF_UQ_BASES_ALIGNED ON_BAIT_BASES NEAR_BAIT_BASES OFF_BAIT_BASES ON_TARGET_BASES PCT_SELECTED_BASES PCT_OFF_BAIT ON_BAIT_VS_SELECTED MEAN_BAIT_COVERAGE MEAN_TARGET_COVERAGE PCT_USABLE_BASES_ON_BAIT PCT_USABLE_BASES_ON_TARGET FOLD_ENRICHMENT ZERO_CVG_TARGETS_PCT FOLD_80_BASE_PENALTY PCT_TARGET_BASES_2X PCT_TARGET_BASES_10X PCT_TARGET_BASES_20X PCT_TARGET_BASES_30X HS_LIBRARY_SIZE HS_PENALTY_10X HS_PENALTY_20X HS_PENALTY_30X AT_DROPOUT GC_DROPOUT SAMPLE LIBRARY READ_GROUP" >> ${tmpCollectBamMetricsPrefix}.hs_metrics
echo "NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA" >> ${tmpCollectBamMetricsPrefix}.hs_metrics
echo "## net.sf.picard.metrics.StringHeader" > ${tmpHsMetrics}
echo "#" >> ${tmpHsMetrics}
echo "## net.sf.picard.metrics.StringHeader" >> ${tmpHsMetrics}
echo "#" >> ${tmpHsMetrics}
echo "" >> ${tmpHsMetrics}
echo "## METRICS CLASS net.sf.picard.analysis.directed.HsMetrics" >> ${tmpHsMetrics}
echo "BAIT_SET GENOME_SIZE BAIT_TERRITORY TARGET_TERRITORY BAIT_DESIGN_EFFICIENCY TOTAL_READS PF_READS PF_UNIQUE_READS PCT_PF_READS PCT_PF_UQ_READS PF_UQ_READS_ALIGNED PCT_PF_UQ_READS_ALIGNED PF_UQ_BASES_ALIGNED ON_BAIT_BASES NEAR_BAIT_BASES OFF_BAIT_BASES ON_TARGET_BASES PCT_SELECTED_BASES PCT_OFF_BAIT ON_BAIT_VS_SELECTED MEAN_BAIT_COVERAGE MEAN_TARGET_COVERAGE PCT_USABLE_BASES_ON_BAIT PCT_USABLE_BASES_ON_TARGET FOLD_ENRICHMENT ZERO_CVG_TARGETS_PCT FOLD_80_BASE_PENALTY PCT_TARGET_BASES_2X PCT_TARGET_BASES_10X PCT_TARGET_BASES_20X PCT_TARGET_BASES_30X HS_LIBRARY_SIZE HS_PENALTY_10X HS_PENALTY_20X HS_PENALTY_30X AT_DROPOUT GC_DROPOUT SAMPLE LIBRARY READ_GROUP" >> ${tmpHsMetrics}
echo "NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA" >> ${tmpHsMetrics}

fi
echo -e "\nHsMetrics finished succesfull. Moving temp files to final.\n\n"
mv ${tmpCollectBamMetricsPrefix}.hs_metrics ${dedupBam}.hs_metrics

mv ${tmpHsMetrics} ${hsMetrics}
echo "moved ${tmpHsMetrics} to ${hsMetrics}"

1 change: 1 addition & 0 deletions compute5/NGS_DNA/protocols/CollectMultipleMetrics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#string tempDir
#string seqType
#string picardJar
#string project

#Load Picard module
${stage} ${picardVersion}
Expand Down
10 changes: 10 additions & 0 deletions compute5/NGS_DNA/protocols/CopyToResultsDir.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,16 @@ do
then
cp ${intermediateDir}/${sample}.coveragePerBase.txt ${projectResultsDir}/coverage/
fi
if [ -f ${intermediateDir}/${sample}.coveragePerGene.txt ]
then
cp ${intermediateDir}/${sample}.coveragePerGene.txt ${projectResultsDir}/coverage/
fi
if [ -f ${intermediateDir}/${sample}.coveragePerTarget.txt ]
then
cp ${intermediateDir}/${sample}.coveragePerTarget.txt ${projectResultsDir}/coverage/
fi


done
echo "Copied vcf file + coveragePerBase.txt (8/11)"

Expand Down
2 changes: 1 addition & 1 deletion compute5/NGS_DNA/protocols/CountAllFinishedFiles.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#MOLGENIS walltime=00:01:00 mem=1gb
#string projectJobsDir
#string intermediateDir

#string project
cd $projectJobsDir

countShScripts=`ls *.sh | wc -l`
Expand Down
Loading

0 comments on commit dcb4894

Please sign in to comment.