Skip to content

Commit

Permalink
Merge branch 'add_virus_configs'
Browse files Browse the repository at this point in the history
- Virus definitions
- More documentation to address V-pipe 3.0 review
  • Loading branch information
DrYak committed May 28, 2024
2 parents 9bd42ce + 449fb91 commit 86136a9
Show file tree
Hide file tree
Showing 17 changed files with 3,103 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
This directory stores workflow scripts that are picked up by GitHub Actions to automatically perform test installations of V-pipe on Mac OS and Linux systems and run end-to-end tests by executing tutorials with real example data.
This ensures successful installation and reproducible execution on different systems.
For each update of V-pipe these workflow scripts are automatically executed and report about installation problems or troubles on the test data
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ Also see [snakemake's documentation](https://snakemake.readthedocs.io/en/stable/
Tutorials introducing usage of V-pipe are available in the [docs/](docs/README.md) subdirectory.
### Tutorials
Tutorials for your first steps with V-pipe for different scenarios are available in the [docs/](docs/README.md) subdirectory.
### Using quick install script
To deploy V-pipe, use the [installation script](utils/README.md#quick-installer) with the following parameters:
Expand All @@ -50,6 +55,8 @@ cd work
./vpipe --jobs 4 --printshellcmds --dry-run
```

Test data to test your installation is available with the tutorials provided in the [docs/](docs/README.md) subdirectory.

### Using Docker

Note: the [docker image](https://github.com/cbg-ethz/V-pipe/pkgs/container/v-pipe) is only setup with components to run the workflow for HIV and SARS-CoV-2 virus base configurations.
Expand Down
32 changes: 32 additions & 0 deletions config/drosophila_c_virus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# config-file used for the analysis of drosphila C virus Illumina samples
# Lezcano et al., Virus Evolution, 2023, https://doi.org/10.1093/ve/vead074
# NCBI BioProject accession number PRJNA993483

general:
aligner: "bwa"
snv_caller: "shorah"

input:
reference: resources/drosphila_c_virus/NC_001834.1.fasta
datadir: resources/samples/
read_length: 100
samples_file: samples.tsv
paired: true

consensus_bcftools:
max_coverage: 150000

snv:
consensus: false
disk_mb: 1250
mem_mb: 35000
time_min: 6000
threads: 64

output:
snv: true
local: true
global: false
visualization: false
diversity: false
QA: false
75 changes: 75 additions & 0 deletions config/h3n2_ha.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# config-file used for the analysis of H3N2 segment HA
# config file was used to analysis wastewater Illumina data from SRA available
# through the SRA Run accession: SRP385331

general:
aligner: bwa
primers_trimmer: samtools
threads: 6
snv_caller: lofreq
temp_prefix: ./temp
preprocessor: skip

input:
datadir: samples/
samples_file: samples.tsv
reference: "{VPIPE_BASEDIR}/../resources/h3n2_ha/h3n2_ha.fasta"
genes_gff: "{VPIPE_BASEDIR}/../resources/h3n2_ha/gffs/h3n2_ha.gff3"
paired: true
read_length: 151

output:
datadir: results/
snv: True
local: True
global: False
visualization: False
QA: False
diversity: False

gunzip:
mem: 100000

extract:
mem: 100000

preprocessing:
mem: 10000

sam2bam:
mem: 5000

ref_bwa_index:
mem: 65536

bwa_align:
mem: 40690
threads: 8

bowtie_align:
mem: 12288
threads: 6

coverage:
mem: 131072
threads: 32
time: 60

minor_variants:
mem: 16384
threads: 64

coverage_intervals:
coverage: 0
mem: 2000
threads: 1

lofreq:
consensus: false

snv:
consensus: false
localscratch: $TMPDIR
time: 240
mem: 1024
threads: 64
34 changes: 34 additions & 0 deletions config/herpes_simplex_virus_2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# config-file used for the analysis of herpes_simplex_virus_2 Illumina samples
# Lezcano et al., Virus Evolution, 2023, https://doi.org/10.1093/ve/vead074
# deletion analysis
# analysed sample is from López-Muñoz AD, Rastrojo A, Kropp KA, Viejo-Borbolla A, Alcamí A. Combination of long- and short-read sequencing fully resolves complex repeats of herpes simplex virus 2 strain MS complete genome. Microb Genom. 2021 Jun;7(6).
# Sample accession number: ERR3278849

general:
aligner: "bwa"
snv_caller: "shorah"

input:
reference: resources/herpes_simplex_virus_2/MK855052.1.fasta
datadir: resources/samples/
read_length: 250
samples_file: samples.tsv
paired: true

consensus_bcftools:
max_coverage: 150000

snv:
consensus: false
disk_mb: 1250
mem_mb: 35000
time_min: 6000
threads: 64

output:
snv: true
local: true
global: false
visualization: false
diversity: false
QA: false
25 changes: 25 additions & 0 deletions config/polio.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# config-file used for the analysis of poliovirus MinION samples
# sample accession number: ERR4027774 (Shaw et al., 2020, DOI: https://doi.org/10.1128/jcm.00920-20)

general:
virus_base_config: ""
aligner: minimap
preprocessor: skip

input:
reference: resources/polio/AY560657.1.fasta
datadir: resources/samples/
samples_file: config/samples.tsv
paired: false

output:
trim_primers: false
snv: false
local: false
global: false
visualization: false
QA: false
diversity: false

minimap_align:
preset: map-ont
27 changes: 27 additions & 0 deletions config/rsvb.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# config file for Human respiratory syncytial virus B
# config file is used to process Illumina RSV samples

general:
virus_base_config: ""
preprocessor: "prinseq"
aligner: "bwa"
primers_trimmer: "samtools"

input:
datadir: "samples/"
samples_file: "samples.tsv"
read_length: 251
primers_bedfile: "{VPIPE_BASEDIR}/../resources/rsvb/RSVB_primers_400_V2.1.bed"
inserts_bedfile: "{VPIPE_BASEDIR}/../resources/rsvb/RSVB_inserts_400_V2.1.bed"
reference: "{VPIPE_BASEDIR}/../resources/rsvb/MT107528.1.fasta"

output:
datadir: "results"
trim_primers: true
snv: false
local: false
global: false
snv:
consensus: false
lofreq:
consensus: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
This repository contains the Snakemake workflow to reproduce the benchmarking study for the global haplotype reconstruction methods presented in https://doi.org/10.1101/2023.10.16.562462.

The notebooks in the directory `workflow/notebooks/` can be used to reproduce the figures of Figure 4.

Here is a step-by-step guide on how to run this workflow.
1. Clone the repository of V-pipe 3.0 into your working directory: `git clone https://github.com/cbg-ethz/V-pipe.git`
2. Go into the directory of the benchmarking study for the global haplotype reconstruction `cd V-pipe/resources/auxiliary_workflows/benchmark/resources/multi_setup`
3. The parameters to reproduce the synthetic dataset of varying coverage is here: `config_varycoverage/params.csv` with the configuration file `config_varycoverage/config.yaml` where simulation mode, replicate number and methods to be executed are defined.
4. The parameters to reproduce the synthetic dataset of varying distance pattern is here: `config_varyparams/params.csv` with the configuration file `config_varyparams/config.yaml` where simulation mode, replicate number and methods to be executed are defined.
5. The parameters to reproduce the real dataset is here: `config_realdata/params.csv` with the configuration file `config_realdata/config.yaml` where replicate number and methods to be executed are defined.
6. The methods to execute must be define in a Python script in this directory: `V-pipe/resources/auxiliary_workflows/benchmark/resources/method_definitions`
- Haploclique: `V-pipe/resources/auxiliary_workflows/benchmark/resources/method_definitions/haploclique.py`
- PredictHaplo: `V-pipe/resources/auxiliary_workflows/benchmark/resources/method_definitions/predicthaplo.py`
- HaploConduct: `V-pipe/resources/auxiliary_workflows/benchmark/resources/method_definitions/haploconduct.py`
- CliqueSNV: `V-pipe/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv.py`
7. Now the workflow is ready, go back to the directory `V-pipe/resources/auxiliary_workflows/benchmark/resources/multi_setup`.
8. To install the needed Conda environments execute: `snakemake --conda-create-envs-only --use-conda -c1`.
9. To submit the workflow to a lsf-cluster execute `./run_workflow.sh`, otherwise execute the workflow with `snakemake --use-conda -c1`
10. The workflow will provide the results in the directory `results`.
11. When the workflow has terminated and all result files were generated, figures from Figure 4 from the manuscript can be generated by executing the notebooks in `workflow/notebooks/`.
134 changes: 134 additions & 0 deletions resources/drosophila_c_virus/NC_001834.1.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
>NC_001834.1 Drosophila C virus strain EB, complete genome
TTTATATCGTGTGTACATATAAATATGTACACACGGCTTTTAGGTAGAATATTGTTTTCAATGTTGATTT
TAAAGGTAACTTTGGTTATTATGCTTTACGGTTTTCATTGTTGATGGTATTTGTGGCCTGCGGTCCCTAA
TTGTTGAATTATTTATTCTGATACGTTGTTTTCATTGTTGATGGTAAGGATTCTTATTTTGAAGTGGTTT
TTCAGAAGATAACTCTAAATATGAATTATGCCTTATTGTTTTCAATGTTGATGGCCTTCGTTTAAATACT
CTTTGTTAATGACGGTAATCAAAGATTACATCTCAAACTTAGATTAATATTTTTAAGTAGGGTATACTGA
GTTAGTCCTCTCTCTTTACTGATTTTGATATCTGGTAATTGACTTCGAAGAAAGATGCGTCTTTTGGATT
TGTAATGACTGGGCCTTAAGTTCATAGGTGTTATTACATGGAGGAACACATTACTTTGGTTGATGATGAT
GTTTTGATGATGACTTTCAATGTATGTGCTTATGTTAAGCCTGACATAAGAACTTACTAGTTTGCATAAT
GCAAAGGGTTAGTATATGATTTTTAGTATGTGGATTTTGACACTGCCTTTGATTAGGATGTGTGAATGAT
TTTGAAACATATTAAGATGTTTATACGAGCGTGTTGTTTACTATTTTCAGGATATGTGGAAGCGGTTGTG
TATGATCTATACGCACATTTAGTTCCCAGAGGGCGTTGTCGTCTCCCCCTAAGCAAGGGAGAAACACGTG
GCACATGATCTTGCGCTTAACGATAAAAATGGAATCTGATAAAAGTATGGCCTGTTTAAATAGAATTTTG
ATGAATAAGATGATGTTTGTGGAAGATAAGATCTCTACCCTTAAGATGGTTGCTGATTATTATCAAAAAG
AAGTAAAGTATGATTTTGATGCAGTTGAATCTCCCCGTGAGGCACCTGTATTTAGATGTACTTGTCGATT
CCTTGGTTATACCATTATGACTCAAGGCATCGGTAAGAAGAATCCGAAACAGGAAGCTGCACGTCAGATG
TTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTGGACCCGTTCAATCGCGCCCCGTGTATTATCGTT
ACAACGACCCTAGATATACACGGTTGGAAAAAGCTATTGAACGTCGAGACGATAAAATTAAAACATTAAT
TAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATTTATTCCCAAGGAATGTTTGATAAATTAACTAAA
CAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTGAACAGATGAATGGAAATTTGACTCGTATTTGTG
ATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAATATTCAAGCCACTGTGATTGATACAACAGACAA
ATATGTTTCTTTAAAAGAGGATATTATGAAGATTGTTTTAGTGATATTGCTTGTTCGTCTTTTAATGGTT
TGGAAGAAGTATCGTGCTTCTCTGTGTGTTATTTTAATCTTTATTTTTAAATTTTATGGATTCGATCAAA
AGTTGATTGATTTAATTATGGATTTGAAGAATAAAATATTTTCACAGGGTGCATTGGAAGATACAGTTGA
GGAGGTTGTATATCATCCTTGGTTCCATACGTGTGGAAAAATCATCTTTGCGGTTATGGCTTTCTTAACA
ATTAAGAAAATTCCTGGTAAACAGGATTGGGATAGTTACATAACACGTTTAGATCGTATCCCAAAATCTA
TTGAGGGAGCTAAAAAGATCACTGATTACTGTTCAGAATATTTTAATATTGCTAATGATCAGATCAAGAT
GATGGTTCTTGGAAAGACTAAAGAAGAATTGCAACGTGCTAATGGACTATATGGAGAAATTCAAGCTTGG
GCTCAAGAGGTTCGCCAGTATTTGGAATTGGATCAACGGAATAAAATTGATCTAGATACTGAAACCGCAA
ATCGTGTTGAACAACTTTGGATAAAGGGCTTGAAATTCAAGAGTGAACCCCTTTTGAGTAAGGAAATGTC
AGCTTTAGTTCATACAACTCTTTTACCAGCTAAGCAATTGTACGAGTATGTATCGTGTTCTCCTGTTAAA
GGGGGAGGACCACGTATGCGTCCAATTTGTTTATGGTTGGTAGGTGAATCAGGAGTTGGTAAGACTGAAA
TGGTATATCCATTGTGCATTGATGTTCTTCGGGAAATGGGGATGATTAAGAAAGATGATTTTCATCATCA
AGTTTATGGTCGTCAAGTTGAAACTGAATTCTGGGATGGTTATAAAGGACAGAAAATTGTCATTTATGAT
GATGCATTTCAGAAGAAAGATGACAAAACAGCAGCTAACCCAGAAATTTTTGAGGTTATTCGCTCTTGCA
ACACTTTTCCTCAGCATTTACATATGGCAGCTCTTCATGATAAAAATACTTTTTCTGCTGCTGAATTACT
CTTATATACCACTAATGATTATAATGTTAAGCTGGAATCTATTACTTTTCCCGATGCTTTCTTTAATCGT
ATGGGCGATATGGCTTATAAAGTTAGTCCTAAGAAAGAGTATGGTATTGAAACCGAGAAAGGGAATTCAG
GTAAAACTTATTTAAAATTGGATAAGAGTAAATTGGACAAAACAAAAGCTATTGACCTTTCAGTGTATGA
ATTCCAAAAAATTGTACGTGACGAGAAAAGTGATGCAGGTTGGATTGATTCTGGATCACCCTTGGACTAT
GAAGATTTTGCTAAATTAGTGTGTTCAAAATGGAAAGAAGCGAAACAATCTTCAATGAATAAATTGAAAT
TTTTGGAAGAATATGCTATTCGTGCTCAGGTTGGATCAGAAGAAAATTCTGAATATGGTGATTGTATAGA
TTTTGTCGATGATATTGCCAAACGCTTACAAAAAGGTGAAACTCTTGAAGAAATAGAGTTTGATTATGCC
TCAGATCCAGAGATGTTTACTCAATACTATCATTTTAAATCTACAATTAAACCGGCATCGCGTTGGCAGA
AGTATAAGGATCGGATGGACATTTGTTTGAGCGACTGTAAGACTTATTTAGCAAAGAAATACGAAGAAAT
TAAGAAAATTCTTGCCGAACATCCTATCTTGACGATTTTAGGAATGATAGGGGTTGCCTTATCTGCTCTG
GCAATGTACTATTGGTTTTCTAAATCGTTGGATCCTGTAGAAGCCGAGGTTGCTCCTTCTGGTGACGCTA
AAACAGTGCGCTTACCAAGGAAACTCGTTGAGATTGGTGCTTCTGGAGATGTTAAAACACAGAAGATTGT
GAAACCCGTTGTAGAGACCGAATGGCATCGTAACAATAAAGGAGAGATTGAAATTTCTTGTGATGAATGT
GGTATGCATAGGATGTCTGCATTTAACAATATGACAGATGAAGAATTTGATAACTGTACATATGAAGATT
TGAATAAGGACCAGAAACGTGAACTTGCCCAGTGGTCTACTAAAGATTCTTGGTTAGGTCGATTCTTTTT
GAGTCGAGATCGCAAGAATAAGGTTGGAATTTGGGCCGAAGTGGGACAATCAGGTGATGTTAAAACAAAT
AAAGCTCAGATTAAACGTGTTGAAGCTGGAGCCGAAGAATTAGTTACTGTTGCTTTAACTCAAGGTTGTT
CTGATGATGCTGCACACAATTTGATGATTGACGTTTTCCAAAAAAATACATATAGAATGTCATACTTCCG
TGGAGACAAGCGTTATCAACTTGGAAATTGTACATTTGTTCGTGGTTGGTCTTTTATTATGCCATATCAT
TTTGTACAGGCTGTGTTTGCGCGAAGATTGCCACCTAACACGATTATTTCTTTGTCCCAACAGATGTCTG
AAGATTTAATGCAAATTCCATTATCACACTTCTTTTCTGCTGGTGTTGACAATTTTTATTTAACAGATAA
TTGTGTGCGCTTGCCATTTAAAAATGGAGATTTTCGCGATTGTGTTATGGTTAATTTACATTCACGAATG
TGCACGCCACATCGTGATTTGGTTCGGCATTTTATTTTAACTTCTGATCAAGGTAAATTGAAGGGATCTT
TTAGTGGTGCAATGGCAACTTTCCATGTTAACAATATGGGTTTATATCGTGTTTATAATTGGCTAAATGC
AGTTCGTCCTTGCGATAAAAAGATAGAAATTTTCCACCCTGAAGATGGTTTTGAGTATCCCGAAGAATCA
TATATTCAACGTGACTGTTATGAATATAATGCACCGACTCGTACTGGGGATTGTGGATCTATTATTGGAT
TGTATAATAAATATTTAGAAAGGAAAATCATCGGTATGCACATTGCTGGAAATGATGCAGAAGAGCATGG
TTATGCGTGCCCTTTGACACAAGAGTGTCTTGAGACTGCTTTTTCTGCTTTAGTAAATAAAAATAAGAAG
AATATTTCCTCACAATTTTATTATGAAATACCCAATATGGTTGATCCACTCGGTGATAGTAGTGTTCCTG
AAGGTAAGTTTTACGCTTTAGGAAAGTCATCTATTCGTGTGGGACAGGCAGTTAATTCGTCCATAATTCC
TTCTCGAATTTATGGGAAGTTGTCTGTTCCTACAATGAAACCAGCACTACTCAAGCCAACGATTCTGAAT
AACAAAGTACATAATCCTTTATTGTCGGGACTTAAGAAATGTGGTGTAGACACTGCGGTCTTGAGTGATG
ATGAAGTTTTGAGTGCTTCACAAGATGTTTGTCGTGTTATGTTGAACCAATATAATAAAAATTTGAATAA
AACAAAGTATCAACGCATTTTAACATATGAGGAAGCTATTCGTGGAACCCAAGATGATGAATTTATGTGT
GCTATTAATCGTACAACGTCACCAGGATTTCCTTATGCACAAATGAAAAGAAATGCTCCAGGTAAACAGC
AATGGATGGGTTTTGGTGAAGAATTTGATTTTACAAGTAATTATGCACTAGCTTTGCGGAAAGATGTTGA
ACAACTCATTGAAGATTGTGCTAGCGGAAAAATATCTAATGTCATTTTTGTAGATACATTGAAAGATGAG
CGACGCGATATAGCTAAAGTAAATGTAGGTAAAACGCGTGTATTTTCTGCAGGTCCTCAACATTTTGTAG
TTGCATTTCGCCAATATTTCCTACCTTTTGCTGCTTGGTTGATGCATAATCGCATTTCGAACGAAGTAGC
AGTTGGCACTAATGTTTATTCATCTGATTGGGAACGTATTGCAAAACGTCTTAAAACAAAAGGTAGTCAC
GTCATTGCGGGGGACTTTGGAAATTTCGATGGATCTTTAGTAGCACAAATTTTGTGGGCCATATTTTGGG
AAATTTTTGTTGTATGGCTTAAGCAATTTATTGATATAGAGAATTCAGAAGGAAAACGTATTTTATGTAT
CTGTCTTGGTTTGTGGTCACATTTAGTTCACTCTGTTCATATTTATGAAGATAATGTATATATGTGGACT
CATTCTCAACCTTCTGGCAATCCTTTCACTGTTATTATTAATTGCTTGTATAATTCGATTATTATGCGAC
TGTCATGGATTCGTGTGATGGAGAAATTTCAACCTAGACTTAAGTCCATGAAGTGGTTCAACGAATATGT
CGCCTTGATAACATATGGTGACGACAATGTTTTAAACATTGATGCAAAGGTTGTGGAATGGTTTAATCAG
ATTAACATTAGTGAGGTTATGACTGAAATGCGACATGAATATACGGACGAAGCTAAAACTGGTGATATTG
TTAAATCTCGTAAATTAGAAGATATTTTCTTTTTGAAGAGAAAATTTCGTTTTAGCCCAGAATTACAACG
CCATGTTGCTCCATTGAAGATCGAAGTTATTTATGAAATGTTGAATTGGTCTCGCCGCTCTATAGATCCA
GATGAAATCTTGATGTCGAACATTGAAACGGCTTTTCGTGAAGTAGTTTACCACGGAAAAGAAGAATACG
ATAAACTAAGGTCAGCGGTATTGGCGTTGAAGGTACCCCAGGAACTTCCTGAAAACCCTCAGATTTTGAC
GTACAACCAATATTTGCACGATATTGAATATCTTGCGGACCCTTTGTACGACTTTTAGTTAAGATGTGAT
CTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAATAATTAGGTTAACT
ATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCTCTCTGCTTCTTAT
ATGATTAGGTTGTCATTTAGAATAAGAAAATAACCTGCTAACTTTCAAACAAATAATAATAACATTGAAA
ATGAAGATCGGAAAATTACTTCCGAGCAAAAAGAGATTGTACACTTTTCTAGTGAAGGAGTTACCCCTAG
TACCACTGCGGTGCCTGATATCGTTAGTCTTTCAACAGATTATTTGTCTATGACTACTCGTGAAGATCGT
ATCCACACGATTAAAGATTTTCTTTCTCGTCCAATTATAATTCAAACTGGTCTTTGGTCTTCCGCTACAA
CTGCCGAAACTCAATTGTATACTGCTAATTTCCCTGAAGTGTTCATTTCTAATACTATGTATCAAGAAAA
GTTGCGTGGGTTCGTGGGTTTGCGAGCAACTTTAGTCATTAAAGTGCAAGTGAATTCCCAACCTTTCCAG
CAAGGACGATTGATGCTACAGTATTATCCGTATGCACAGTATATGCCTAACCGTGTTTCTTTGGTGAATT
CCACTCTCCAAGGACGCTCTGGTTGTCCTCGAACAGATTTGGATTTGAGCGTTGGTACGGAAGTTGAAAT
GCGAATTCCTTATGTGTCCCCTCATGTATATTACAATCTTATTACTGGACAAGGATCATTTGGCGCTATA
TATTTGGTTGTATATAGCCAACTAAGAGATCAAGTTACAGGAACAGGTTCTGTTGAATATACTGTTTGGG
CTCATTTGGAAGATGTAGATGTGCAATACCCGACCGGTGCAAACATTTTCACGGGTAGCTCTCCAAATTT
TGCCTCTTTGGGTCAGAAAATGAGTGATGGAAAATTCACTGAAAAAGACTTGAGAGATATTTGGACTTCA
AAAGCGTACAATAAACAACCAGACAAAATTTTCGCACAAGTGGCTTCTGAAATAACACAACTCAAAGAAT
CAGGAACAATTAGTTCTGGAATTGGACAAGTTTCTGAAGGTCTTTCTACCATGTCTAAAATCCCTATACT
CGGAAATATGTTTACAAAACCCGCCTGGATTTCAGCTCAAGTATCTAATATCTTCAAGATGCTTGGTTTT
TCAAAACCCACTGTTCAAGGTCTTCCTTGTGAATCGAAACTGCGTGGTCAAGTTCGAATGGCGAATTTTG
ATGGCGCTGATACATCACATAAATTGGCTTTGTCTGCCCAAAACGAAATTGAAACAAAATCTGGACTTTC
TGGAACTTCTCCTGATGAAATGGATTTATCACACGTCCTTTCCATACCAAATTTTTGGGATCGTTTTACT
TGGAACACAACCGATGCCACTAGTTCTATTTTATGGGATAATTATGTTACACCAATGAAAATTAAACCAT
ATTCCTCTACAATATTAGATAGATTTAGATGCACTCATATGGGTTTTGTAGCCAACACACACGGTTATTG
GTGTGGATCAATAGTTTATACTTTTAAATTTGTTAAGACTCAATTTCATTCTGGACGTTTACGCATTAGT
TTTATTCCATTTTATTATAATACGACTATATCTGCAGGAGTTCCCGATGTTTCTCGTACCCAAAAAGTAA
TCGTTGATCTGCGCACCTCTACAGAAGTCTCTTTCACTATTCCGTATGTGTCTTCACGACCTTGGATGTA
CTGTATTCGTCCTGAAGCTTCGTGGCTTGGAACCGATAATGCTTTGATGTACAACGCCGTTACGGGTATA
GTGAGAGTTGAGGTTCTTAACCAGTTGGTTGCCGCTAACAACGTGTTTCAATCTATAGACACTATTGTTG
AAGTTAGTGGTGGTCCTGATTTAACTTTTGCAGCACCAATGGCTCCCTCTTATGTTCCTTATTCTGGAGG
TTTTACTTTAGCAGATGATGCGGCAGCAAAGAAACAGCGTGAGGAGGAGTATGACAACAACATACCTCAA
ACTATTTCTAATCGTGGAAAACGTGAGGTTGAAGATGCTCGTATTGTTGCGCAAGTAATGGGTGAAGATT
TAGCTATTCAAAGAAACGATGCTCAACATGGTGTTCATCCAATGACTATAGACACTCATAAGATCGACTC
AAATTGGTCTCCGGAAGCGCATTGTATTGGTGAAAAGATTATGTCTATTCGCCAATTGATTAAGCGTTTT
GGCATGGCTTTGAACTCCTTGAATTTGATAAGTGATGCACCAAACACCTTGATAGCACCATTTTCAGTTC
AGCACCCAACTCCTGTTGTTGCCCCTGCTGAACCCATGTCCCTTTTTGAATATTATTATTTCATTTATGG
ATTTTGGAGAGGTGGCATGAGATTTAAACTTCAGGCAGTACGTACAAACTCAGCAGAAACATCAGTTAAA
ACCGACACAACTTGGACTGTAAATTTGTGGAATTCTGTACAAGATTCTTTTAATTCTCTAATTAATGTAT
TTAGTACTACTGATTACCCTATAAAATCCACAGGAGCACTTCCAGCCGGAACAAGCGGTTTTGGCAATTC
GATGACGTATATAGATCCTGAGGTTGAAGGTTTTATGGAATTTGAGATTCCATATTATAATATCTCCCAT
ATTTCTCCAGCTACAACCTATGTTCGTGGTACTGAATCTCCTATTACAATTAATAGTGTCTTGCGTGGAC
ATTTGCCACCACAAATTGTGGCTGTTGCACCACAGGGCACTATTGCCACTACAGATGTAGTGAACGCTCA
ATTTGCTCGTGCTCCTTCTGACGACTTTTCATTTATGTATCTCGTTGGTGTTCCACCACTTACCAACGTC
GCTCGTCCCTAACTCCCTTACTATTCTGGATCCTTTAAAATTTATTAGGATAGACAAAAATTAACTCTAT
ATTAGATAGTATTAGATTAAGTTTCTTTTTGGTTTTGGGTTTTATTCAGTAACTATCTGCCCTGCTTACA
CGGGTATTATTTTTAATTCTTGTCCCTTCTGGACTCTTTTATTTTGTATTTTCAAAATTTTTACTAATTT
TTAGTCAGAGTCCTTAGGGGCTACCAGGTTTTTCGCAATTTTCCTGCTTACTGACAGTAATTGCAATTTC
GAATTAAAATAATAGTTGTTTTCT
5 changes: 5 additions & 0 deletions resources/h3n2_ha/gffs/h3n2_ha.gff3
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
##gff-version 3
##sequence-region EPI1857216 1 1718
EPI1857216 feature gene 1 48 . + . gene_name="SigPep"
EPI1857216 feature gene 49 1035 . + . gene_name="HA1"
EPI1857216 feature gene 1036 1698 . + . gene_name="HA2"
Loading

0 comments on commit 86136a9

Please sign in to comment.