From 011524dd31bbc23e6964d742b71e0e229f545127 Mon Sep 17 00:00:00 2001 From: LaraFuhrmann <55209716+LaraFuhrmann@users.noreply.github.com> Date: Wed, 8 May 2024 11:30:30 +0200 Subject: [PATCH] [add virus config] config + resources for h3n2 ha segement - thx anika --- config/h3n2_ha.yaml | 75 +++++++++++++++++++++++++++++ resources/h3n2_ha/gffs/h3n2_ha.gff3 | 5 ++ resources/h3n2_ha/h3n2_ha.fasta | 23 +++++++++ 3 files changed, 103 insertions(+) create mode 100644 config/h3n2_ha.yaml create mode 100644 resources/h3n2_ha/gffs/h3n2_ha.gff3 create mode 100644 resources/h3n2_ha/h3n2_ha.fasta diff --git a/config/h3n2_ha.yaml b/config/h3n2_ha.yaml new file mode 100644 index 00000000..5ac13ede --- /dev/null +++ b/config/h3n2_ha.yaml @@ -0,0 +1,75 @@ +# config-file used for the analysis of H3N2 segment HA +# config file was used to analysis data from SRA available +# through the SRA Run accession: SRP385331 + +general: + aligner: bwa + primers_trimmer: samtools + threads: 6 + snv_caller: lofreq + temp_prefix: ./temp + preprocessor: skip + +input: + datadir: samples/ + samples_file: samples.tsv + reference: "{VPIPE_BASEDIR}/../resources/h3n2_ha/h3n2_ha.fasta" + genes_gff: "{VPIPE_BASEDIR}/../resources/h3n2_ha/gffs/h3n2_ha.gff3" + paired: true + read_length: 151 + +output: + datadir: results/ + snv: True + local: True + global: False + visualization: False + QA: False + diversity: False + +gunzip: + mem: 100000 + +extract: + mem: 100000 + +preprocessing: + mem: 10000 + +sam2bam: + mem: 5000 + +ref_bwa_index: + mem: 65536 + +bwa_align: + mem: 40690 + threads: 8 + +bowtie_align: + mem: 12288 + threads: 6 + +coverage: + mem: 131072 + threads: 32 + time: 60 + +minor_variants: + mem: 16384 + threads: 64 + +coverage_intervals: + coverage: 0 + mem: 2000 + threads: 1 + +lofreq: + consensus: false + +snv: + consensus: false + localscratch: $TMPDIR + time: 240 + mem: 1024 + threads: 64 diff --git a/resources/h3n2_ha/gffs/h3n2_ha.gff3 b/resources/h3n2_ha/gffs/h3n2_ha.gff3 new file mode 100644 index 00000000..3cd57f2a --- /dev/null +++ b/resources/h3n2_ha/gffs/h3n2_ha.gff3 @@ -0,0 +1,5 @@ +##gff-version 3 +##sequence-region EPI1857216 1 1718 +EPI1857216 feature gene 1 48 . + . gene_name="SigPep" +EPI1857216 feature gene 49 1035 . + . gene_name="HA1" +EPI1857216 feature gene 1036 1698 . + . gene_name="HA2" diff --git a/resources/h3n2_ha/h3n2_ha.fasta b/resources/h3n2_ha/h3n2_ha.fasta new file mode 100644 index 00000000..427694bc --- /dev/null +++ b/resources/h3n2_ha/h3n2_ha.fasta @@ -0,0 +1,23 @@ +>EPI_ISL_1563628 | A/Darwin/6/2021 | A / H3N2 | | 2021-03-16 +ATGAAGACTATCATTGCTTTGAGCAACATTCTATGTCTTGTTTTCGCTCAAAAAATACCTGGAAATGACAATAGCACGGC +AACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACAAATGACCGAATTGAAGTTACTA +ATGCTACTGAGTTGGTTCAGAATTCATCAATAGGTGAAATATGCGGCAGTCCTCATCAGATCCTTGATGGAGGGAACTGC +ACACTAATAGATGCTCTATTGGGGGACCCTCAGTGTGACGGCTTTCAAAATAAGGAATGGGACCTTTTTGTTGAAAGAAG +CAGAGCCAACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACAC +TGGAGTTTAAAAATGAAAGCTTCAATTGGACTGGAGTCAAACAAAACGGAACAAGTTCTGCGTGCATAAGGGGATCTAGT +AGTAGTTTTTTTAGTAGATTAAATTGGTTGACCAGCTTAAACAACATATATCCAGCACAGAACGTGACTATGCCAAACAA +GGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGATACGGACAAGAACCAAATCTCCCTGTTTGCTCAAT +CATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATAAGG +GATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCT +AATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGTA +AGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCGTTCCAAAATGTAAACAGGATCACATACGGGGCC +TGTCCCAGATATGTTAAGCAAAGCACCCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACCAGAGGCAT +ATTTGGCGCAATAGCGGGTTTCATAGAAAATGGATGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATT +CTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTG +ATCGGAAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGAAGAGTTCAAGACCTTGAGAA +ATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACGATTG +ACCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGAAAT +GGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATAAGAAATGAAACTTATGACCACAATGTGTA +CAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGA +TTTCCTTTGCCATGTCATGTTTTTTGCTTTGTATTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATT +AGATGCAACATTTGCATTTGAGTGCATTAATTAAAAAC