Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docker #13

Merged
merged 13 commits into from
Nov 15, 2023
40 changes: 31 additions & 9 deletions docker/logan_base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,9 @@ RUN wget https://github.com/broadinstitute/gatk/releases/download/4.3.0.0/gatk-4
&& /opt2/gatk-4.3.0.0/gatk --list
ENV PATH="/opt2/gatk-4.3.0.0:$PATH"

# Install last release of GATK3 (GATK/3.8-1)
# Only being used for the CombineVariants
# command that is not available in GATK4
# Available via env variable: $GATK_JAR
# Requires Java8 or 1.8
RUN wget https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \
&& tar -xvjf /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \
&& rm /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2
ENV GATK_JAR="/opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar"
# Use DISCVRSeq For CombineVariants Replacement
RUN wget https://github.com/BimberLab/DISCVRSeq/releases/download/1.3.61/DISCVRSeq-1.3.61.jar
ENV DISCVRSeq_JAR="/opt2/DISCVRSeq-1.3.61.jar"

# Install dependencies needed to add a new repository over HTTPS
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \
Expand Down Expand Up @@ -164,6 +158,15 @@ RUN wget https://github.com/AstraZeneca-NGS/VarDictJava/releases/download/v1.8.3
&& rm /opt2/VarDict-1.8.3.tar
ENV PATH="/opt2/VarDict-1.8.3/bin:$PATH"

# Install Octopus/v0.7.4
#RUN wget https://github.com/luntergroup/octopus/archive/refs/tags/v0.7.4.tar.gz \
# && tar -xvzf /opt2/v0.7.4.tar.gz \
# && rm /opt2/v0.7.4.tar.gz \
# && cd /opt2/octopus-0.7.4 \
# && cmake .
#ENV PATH="/opt2/octopus-0.7.4/bin:$PATH"


# Fastp From Opengene github
RUN wget http://opengene.org/fastp/fastp.0.23.2 \
&& mkdir fastp \
Expand Down Expand Up @@ -193,6 +196,25 @@ RUN wget -O svaba_1.2.0 https://github.com/walaj/svaba/releases/download/v1.2.0/

ENV PATH="/opt2/svaba:$PATH"

# LOFREQ
RUN wget https://github.com/CSB5/lofreq/raw/master/dist/lofreq_star-2.1.5_linux-x86-64.tgz \
&& tar -xzf lofreq_star-2.1.5_linux-x86-64.tgz \
&& chmod a+x lofreq_star-2.1.5_linux-x86-64/bin/lofreq

ENV PATH="/opt2/lofreq_star-2.1.5_linux-x86-64/bin/:$PATH"
# MUSE
RUN wget -O muse_2.0.4.tar.gz https://github.com/wwylab/MuSE/archive/refs/tags/v2.0.4.tar.gz \
&& tar -xzf muse_2.0.4.tar.gz \
&& cd MuSE-2.0.4 \
&& ./install_muse.sh \
&& mv MuSE /opt2/ \
&& chmod a+x /opt2/MuSE \
&& rm -R /opt2/MuSE-2.0.4 \
&& rm /opt2/muse_2.0.4.tar.gz

ENV PATH="/opt2/MuSE:$PATH"



# Add Dockerfile and argparse.bash script
# and export environment variables
Expand Down
2 changes: 1 addition & 1 deletion docker/logan_base/meta.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
dockerhub_namespace: dnousome
image_name: ccbr_logan_base
version: v0.3.1
version: v0.3.3
container: "$(dockerhub_namespace)/$(image_name):$(version)"
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ profiles {
}
withName: 'octopus_tn|octopus_tonly' {
container = 'docker://dancooke/octopus:latest'
memory=70.GB
memory=72.GB
time=24.h
cpus=16
}
Expand Down
172 changes: 141 additions & 31 deletions workflow/modules/variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -224,17 +224,17 @@ process mutect2filter {
publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')

input:
tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination)
tuple val(sample), path(mutvcfs), path(stats), path(obs),
path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination)
output:
tuple val(sample), path("${sample}.mut2.marked.vcf.gz"),
path("${sample}.mut2.norm.vcf.gz"),
tuple val(sample),
path("${sample}.mut2.marked.vcf.gz"), path("${sample}.mut2.marked.vcf.gz.tbi"),
path("${sample}.mut2.norm.vcf.gz"), path("${sample}.mut2.norm.vcf.gz.tbi"),
path("${sample}.mut2.marked.vcf.gz.filteringStats.tsv")

script:
//Include the stats and concat ${mutvcfs} -Oz -o ${sample}.concat.vcf.gz
mut2in = mutvcfs.join(" -I ")


"""
gatk GatherVcfs -I ${mut2in} -O ${sample}.concat.vcf.gz
gatk IndexFeatureFile -I ${sample}.concat.vcf.gz
Expand All @@ -258,12 +258,13 @@ process mutect2filter {
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
sed '/^\$/d' > ${sample}.mut2.norm.vcf |\
bcftools view - -Oz -o ${sample}.mut2.norm.vcf.gz
bcftools index -t ${sample}.mut2.norm.vcf.gz
"""

stub:
"""
touch ${sample}.mut2.marked.vcf.gz
touch ${sample}.mut2.norm.vcf.gz
touch ${sample}.mut2.marked.vcf.gz ${sample}.mut2.marked.vcf.gz.tbi
touch ${sample}.mut2.norm.vcf.gz ${sample}.mut2.norm.vcf.gz.tbi
touch ${sample}.mut2.marked.vcf.gz.filteringStats.tsv
"""

Expand Down Expand Up @@ -395,7 +396,7 @@ process octopus_tn {


output:
tuple val(tumorname),
tuple val("${tumorname}_vs_${normalname}"),
path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz")

script:
Expand Down Expand Up @@ -506,8 +507,11 @@ process combineVariants {

output:
tuple val(sample),
path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz")

path("${vc}/${sample}.${vc}.marked.vcf.gz"),
path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"),
path("${vc}/${sample}.${vc}.norm.vcf.gz"),
path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi")

script:
vcfin = inputvcf.join(" -I ")

Expand All @@ -518,13 +522,87 @@ process combineVariants {
-D $GENOMEDICT \
-I $vcfin
bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz
bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
sed '/^\$/d' > ${sample}.${vc}.temp.vcf

bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz

mv ${sample}.${vc}.marked.vcf.gz ${vc}

bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t
bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t
"""

stub:

"""
mkdir ${vc}
touch ${vc}/${sample}.${vc}.marked.vcf.gz
touch ${vc}/${sample}.${vc}.norm.vcf.gz
touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi
touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi
"""

}



process bcftools_index_octopus {
label 'process_low'

input:
tuple val(sample),
path(vcf)

output:
tuple val(sample),
path(vcf),
path("${vcf}.tbi")

script:
"""
bcftools index -t ${vcf}
"""

stub:
"""
touch ${vcf} ${vcf}.tbi
"""

}

process combineVariants_octopus {
label 'process_highmem'
publishDir(path: "${outdir}/vcfs/", mode: 'copy')

input:
tuple val(sample), path(vcfs), path(vcfsindex), val(vc)

output:
tuple val(sample),
path("${vc}/${sample}.${vc}.marked.vcf.gz"),
path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"),
path("${vc}/${sample}.${vc}.norm.vcf.gz"),
path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi")

script:
vcfin = vcfs.join(" ")

"""
mkdir ${vc}
bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp.vcf.gz
bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz
bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
sed '/^\$/d' > ${sample}.${vc}.temp.vcf

bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz

mv ${sample}.${vc}.marked.vcf.gz ${vc}

bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t
bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t
"""

stub:
Expand All @@ -533,6 +611,8 @@ process combineVariants {
mkdir ${vc}
touch ${vc}/${sample}.${vc}.marked.vcf.gz
touch ${vc}/${sample}.${vc}.norm.vcf.gz
touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi
touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi

"""

Expand Down Expand Up @@ -617,7 +697,9 @@ process combineVariants_strelka {
tuple val(sample), path(strelkasnvs), path(strelkaindels)

output:
tuple val(sample), path("${sample}.strelka.vcf.gz"),path("${sample}.filtered.strelka.vcf.gz")
tuple val(sample),
path("${sample}.strelka.vcf.gz"),path("${sample}.strelka.vcf.gz.tbi"),
path("${sample}.filtered.strelka.vcf.gz"),path("${sample}.filtered.strelka.vcf.gz.tbi")


script:
Expand All @@ -628,29 +710,72 @@ process combineVariants_strelka {

"""
bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz
bcftools sort ${sample}.temp.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz
bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz

bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz

bcftools view ${sample}.strelka.vcf.gz --threads $task.cpus -f PASS -Oz -o ${sample}.filtered.strelka.vcf.gz

bcftools index ${sample}.strelka.vcf.gz -t
bcftools index ${sample}.filtered.strelka.vcf.gz -t
"""

stub:

"""
touch ${sample}.strelka.vcf.gz
touch ${sample}.filtered.strelka.vcf.gz
touch ${sample}.strelka.vcf.gz ${sample}.strelka.vcf.gz.tbi
touch ${sample}.filtered.strelka.vcf.gz ${sample}.filtered.strelka.vcf.gz.tbi

"""

}

process somaticcombine {
label 'process_mid'
publishDir(path: "${outdir}/vcfs/combined", mode: 'copy')

input:
tuple val(tumorsample), val(normal),
val(callers),
path(vcfs), path(vcfindex)

output:
tuple val(tumorsample), val(normal),
path("${tumorsample}_combined.vcf.gz"),
path("${tumorsample}_combined.vcf.gz.tbi")

script:
vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
vcfin2="-V:" + vcfin1.join(" -V:")
println vcfin2

"""
java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \
-R $GENOMEREF \
--genotypeMergeOption PRIORITIZE \
--priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \
--filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED
-O ${tumorsample}_combined.vcf.gz \
$vcfin2
"""

stub:

"""
touch ${tumorsample}_combined.vcf.gz
touch ${tumorsample}_combined.vcf.gz.tbi
"""

}

process annotvep_tn {
publishDir(path: "${outdir}/mafs/", mode: 'copy')

input:
tuple val(tumorsample), val(normalsample),
val(vc), path(tumorvcf)
val(vc), path(tumorvcf),path(vcfindex)

output:
path("paired/${vc}/${tumorsample}.maf")
Expand Down Expand Up @@ -739,18 +864,3 @@ process combinemafs_tn {
}



/*
process combineVariants_allcallers {

publishDir(path: "${outdir}/vcfs/", mode: 'copy')

input:
tuple val(sample), path(inputvcf), val(vc)

output:
tuple val(sample),
path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz")

}
*/
Loading
Loading