Skip to content

Commit

Permalink
Merge pull request #43 from CCBR/feat_hg19
Browse files Browse the repository at this point in the history
Feat hg19
  • Loading branch information
samarth8392 authored May 7, 2024
2 parents 09293c6 + fe3d57f commit 97ab644
Show file tree
Hide file tree
Showing 8 changed files with 367 additions and 246 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# LOGAN 🔬 [![Docker Pulls](https://img.shields.io/docker/pulls/nciccbr/ccbr_wes_base)](https://hub.docker.com/r/nciccbr/ccbr_wes_base) [![GitHub issues](https://img.shields.io/github/issues/ccbr/LOGAN?color=brightgreen)](https://github.com/ccbr/LOGAN/issues) [![GitHub license](https://img.shields.io/github/license/ccbr/LOGAN)](https://github.com/ccbr/LOGAN/blob/master/LICENSE)

> **_LOGAN-whoLe genOme-sequencinG Analysis pipeliNe_**. This is the home of the LOGAN Pipeline. Accurately call germline and somatic variants, CNVs, and SVs and annotate variants!
> **_LOGAN-whoLe genOme-sequencinG Analysis pipeliNe_**. Call germline and somatic variants, CNVs, and SVs and annotate variants!
## Overview
Welcome to LOGAN! Before getting started, we highly recommend reading through [LOGAN's documentation](https://ccbr.github.io/LOGAN).
Expand Down Expand Up @@ -72,7 +72,10 @@ Adding flags determines SNV (germline and/or somatic), SV, and/or CNV calling mo

`--vc`- Enables somatic CNV calling using FREEC, Sequenza, and Purple (hg38 only)

#### Optional Arguments
`--indelrealign` - Enables indel realignment when running alignment steps. May be helpful for certain callers (VarScan, VarDict)

`--callers`- Comma separated argument for callers, the default is to use all available. Example: `--callers mutect2,octopus,vardict,varscan`

## Running LOGAN
Example of Tumor only calling mode
Expand All @@ -87,7 +90,7 @@ logan run --mode local -profile ci_stub --genome hg38 --outdir out --fastq_input
logan run --mode slurm -profile biowulf,slurm --genome hg38 --outdir out --fastq_input "*R{1,2}.fastq.gz" --vc --sv --cnv
```

We currently support the hg38 and mm10 genomes.
We currently support the hg38, hg19 (in progress), and mm10 genomes.



Expand Down
35 changes: 31 additions & 4 deletions conf/genomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ params {
genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict"
wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list"
intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed"
millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) //
INDELREF = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz"
KNOWNINDELS = "-known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz"
KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz'
Expand All @@ -31,10 +29,39 @@ params {
SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz"
chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM']
}

'hg19' {
genome = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa"
genomefai = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa.fai"
bwagenome= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa"
genomedict= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.dict"
intervals= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg19/hg19_noblacklistsort_vc.bed"
INDELREF = "/fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz"
KNOWNINDELS = "-known /fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf -known /fdb/GATK_resource_bundle/b37/1000G_phase1.indels.b37.vcf"
KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz'
dbsnp = "/fdb/GATK_resource_bundle/hg19-2.8/dbsnp_138.hg19.vcf.gz"
gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz
pon = "/data/CCBR_Pipeliner/db/PipeDB/lib/GRCh37.noCOSMIC_ClinVar.pon.vcf.gz"
kgp = "/fdb/GATK_resource_bundle/hg19-2.8/dbsnp_138.hg19.vcf.gz"
KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2"
snpeff_genome = "GRCh37.75"
snpeff_config = "/usr/local/apps/snpEff/4.3t/snpEff.config"
snpeff_bundle = "/usr/local/apps/snpEff/4.3t/snpEff.confi"
sites_vcf= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/sites.hg38.vcf.gz"
somalier_ancestrydb="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/somalier/1kg-somalier"
vepcache = "/fdb/VEP/102/cache"
vepspecies = "homo_sapiens"
vepbuild = "GRCh37"
annotsvgenome = "GRCh37"
octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest"
octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest"
SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz"
chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM']
}

'mm10' {
genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa" // file(params.genome)
genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa.fai" // file(params.genome)
genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa"
genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa.fai"
bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwaindex/genome.fa"
genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.dict"
intervals="/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/mm10_wgsregions.bed"
Expand Down
6 changes: 6 additions & 0 deletions modules/local/splitbed.nf
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,10 @@ bedtools subtract -a GRCh38.primary_assembly.genome.bed -b ../hg38.blacklist.bed
gatk BedToIntervalList -I GRCh38.primary_assembly.genome.interval.bed -O \
GRCh38.primary_assembly.genome.interval_list -SD GRCh38.primary_assembly.genome.dict
#hg19
awk -F '\t' '{printf("%s\t0\t%s\n",$1,$2);}' /data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa.fai >hg19_all.bed
bedtools subtract -a hg19_all.bed -b hg19-blacklist.v2.bed > hg19_noblacklist.bed
bedtools sort -i hg19_noblacklist.bed -chrThenSizeD >hg19_noblacklistsort.bed
awk '/^chr[0-9,X,Y,M]*\t/ {printf("%s\t%s\t%s\n",$1,$2,$3);}' hg19_noblacklistsort.bed > hg19_noblacklistsort_vc.bed
*/
12 changes: 8 additions & 4 deletions modules/local/variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ process octopus_tn {
$GERMLINE_FOREST \
$SOMATIC_FOREST \
--target-working-memory 64Gb \
-B 64Gb \
-o ${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz
"""

Expand Down Expand Up @@ -759,7 +760,7 @@ process somaticcombine {

input:
tuple val(tumorsample), val(normal),
val(callers),
val(caller),
path(vcfs), path(vcfindex)

output:
Expand All @@ -768,24 +769,27 @@ process somaticcombine {
path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi")

script:
vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b }
vcfin2="-V:" + vcfin1.join(" -V:")

callerin=caller.join(",")
"""
/usr/lib/jvm/java-8-openjdk-amd64/bin/java -jar \$GATK_JAR -T CombineVariants \
-R $GENOMEREF \
--genotypemergeoption PRIORITIZE \
--rod_priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \
--rod_priority_list $callerin \
--filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \
-o ${tumorsample}_vs_${normal}_combined.vcf.gz \
$vcfin2
"""

stub:
vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b }
vcfin2="-V:" + vcfin1.join(" -V:")

callerin=caller.join(",")

"""
touch ${tumorsample}_vs_${normal}_combined.vcf.gz
touch ${tumorsample}_vs_${normal}_combined.vcf.gz.tbi
Expand Down
17 changes: 13 additions & 4 deletions modules/local/variant_calling_tonly.nf
Original file line number Diff line number Diff line change
Expand Up @@ -339,9 +339,11 @@ process octopus_tonly {
octopus -R $GENOMEREF -C cancer -I ${tumor} \
--annotations AC AD DP \
--target-working-memory 64Gb \
-B 64Gb \
-t ${bed} \
--threads ${task.cpus}\
$SOMATIC_FOREST \
-o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads ${task.cpus}
-o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz
"""

stub:
Expand Down Expand Up @@ -385,7 +387,7 @@ process somaticcombine_tonly {

input:
tuple val(tumorsample),
val(callers),
val(caller),
path(vcfs), path(vcfindex)

output:
Expand All @@ -394,20 +396,27 @@ process somaticcombine_tonly {
path("${tumorsample}_combined_tonly.vcf.gz.tbi")

script:
vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b }
vcfin2="-V:" + vcfin1.join(" -V:")

callerin=caller.join(",").replaceAll("_tonly","")

"""
/usr/lib/jvm/java-8-openjdk-amd64/bin/java -jar \$GATK_JAR -T CombineVariants \
-R $GENOMEREF \
--genotypemergeoption PRIORITIZE \
--rod_priority_list mutect2_tonly,octopus_tonly,vardict_tonly,varscan_tonly \
--rod_priority_list $callerin \
--filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \
-o ${tumorsample}_combined_tonly.vcf.gz \
$vcfin2
"""

stub:

vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b }
vcfin2="-V:" + vcfin1.join(" -V:")

callerin=caller.join(",").replaceAll("_tonly","")
"""
touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi
"""
Expand Down
6 changes: 3 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@ params {
script_freecpaired = "${projectDir}/bin/make_freec_genome_paired.pl"
freec_significance = "${projectDir}/bin/assess_significance.R"
freec_plot = "${projectDir}/bin/makeGraph.R"

lofreq_convert = "${projectDir}/bin/add_gt_lofreq.sh"
split_regions = "24" //Number of regions to split by

vep_cache = "/fdb/VEP/102/cache"

//SUB WORKFLOWS to SPLIT
Expand All @@ -40,16 +41,15 @@ params {
qc=null
bam=null
indelrealign=null

//Set all Inputs to null
sample_sheet=null

fastq_file_input=null
bam_file_input=null
file_input=null

fastq_input=null
bam_input=null

BAMINPUT=null

publish_dir_mode = 'symlink'
Expand Down
Loading

0 comments on commit 97ab644

Please sign in to comment.