diff --git a/.gitignore b/.gitignore index 75940ac..a30c937 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,13 @@ .DS_Store site env/ +data/ +.cache/ +.bash_history +.bashrc +.conda/ +.config/ +.lesshst +.local/ +.profile +extensions/ \ No newline at end of file diff --git a/Docker/environment.yml b/Docker/environment.yml index b01b79f..cca6d55 100644 --- a/Docker/environment.yml +++ b/Docker/environment.yml @@ -8,5 +8,5 @@ dependencies: - snpeff>=5.0 - gatk4=4.5 - r-base - - cnvkit + - cnvkit>=0.9.10 - bcftools diff --git a/scripts/01_download_data.sh b/scripts/01_download_data.sh index ef26d2e..8c9876e 100644 --- a/scripts/01_download_data.sh +++ b/scripts/01_download_data.sh @@ -59,4 +59,6 @@ bcftools view -Oz -r chr6,chr17 af-only-gnomad.hg38.vcf.gz > af-only-gnomad.hg38 bcftools index --tbi af-only-gnomad.hg38.subset.vcf.gz bcftools view -Oz -r chr6,chr17 1000g_pon.hg38.vcf.gz > 1000g_pon.hg38.subset.vcf.gz -bcftools index --tbi 1000g_pon.hg38.subset.vcf.gz \ No newline at end of file +bcftools index --tbi 1000g_pon.hg38.subset.vcf.gz + +wget https://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/refFlat.txt.gz \ No newline at end of file diff --git a/scripts/07_get_pileups.sh b/scripts/07_get_pileups.sh index 5e9bda1..b77cd62 100644 --- a/scripts/07_get_pileups.sh +++ b/scripts/07_get_pileups.sh @@ -8,7 +8,7 @@ for sample in tumor normal do gatk GetPileupSummaries \ -I "$ALIGNDIR"/"$sample".rg.md.bam \ - -V "$RESOURCEDIR"/af-only-gnomad.hg38.vcf.gz \ - -L "$RESOURCEDIR"/af-only-gnomad.hg38.vcf.gz \ + -V "$RESOURCEDIR"/af-only-gnomad.hg38.subset.vcf.gz \ + -L "$RESOURCEDIR"/af-only-gnomad.hg38.subset.vcf.gz \ -O "$VARIANTDIR"/"$sample".pileups.table done diff --git a/scripts/08_calculate_contamination.sh b/scripts/08_calculate_contamination.sh index f020cf3..c1afa9d 100644 --- a/scripts/08_calculate_contamination.sh +++ b/scripts/08_calculate_contamination.sh @@ -7,4 +7,4 @@ RESOURCEDIR=/config/data/resources gatk CalculateContamination \ -I "$VARIANTDIR"/tumor.pileups.table \ -matched "$VARIANTDIR"/normal.pileups.table \ --O "$VARIANTDIR"/ontamination.table +-O "$VARIANTDIR"/contamination.table diff --git a/scripts/09_filter_variants.sh b/scripts/09_filter_variants.sh new file mode 100644 index 0000000..c57fd35 --- /dev/null +++ b/scripts/09_filter_variants.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +ALIGNDIR=/config/data/alignments +REFDIR=/config/data/reference +RESOURCEDIR=/config/data/resources +VARIANTDIR=/config/data/variants + +gatk FilterMutectCalls \ +-R "$REFDIR"/ref_genome.fa \ +-V "$VARIANTDIR"/somatic.vcf.gz \ +-O "$VARIANTDIR"/somatic.filtered.vcf.gz \ No newline at end of file diff --git a/scripts/10_run_cnvkit.sh b/scripts/10_run_cnvkit.sh new file mode 100644 index 0000000..c97e84d --- /dev/null +++ b/scripts/10_run_cnvkit.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +ALIGNDIR=/config/data/alignments +REFDIR=/config/data/reference +RESOURCEDIR=/config/data/resources +VARIANTDIR=/config/data/variants + +cnvkit.py batch "$ALIGNDIR"/tumor.rg.md.bam \ +--normal "$ALIGNDIR"/normal.rg.md.bam \ +--targets "$REFDIR"/exome_regions.bed.interval_list \ +--fasta "$REFDIR"/ref_genome.fa \ +--annotate "$RESOURCEDIR"/refFlat.txt.gz \ +--output-reference "$VARIANTDIR"/reference.cnn \ +--output-dir "$VARIANTDIR"/cnvkit/ \ No newline at end of file