diff --git a/q2_moshpit/kraken2/classification.py b/q2_moshpit/kraken2/classification.py index 98675e87..a9a37fc7 100644 --- a/q2_moshpit/kraken2/classification.py +++ b/q2_moshpit/kraken2/classification.py @@ -20,6 +20,7 @@ from q2_moshpit._utils import run_command, _process_common_input_params from q2_moshpit.kraken2.utils import _process_kraken2_arg from q2_types_genomics.feature_data import MAGSequencesDirFmt +from q2_types_genomics.per_sample_data import ContigSequencesDirFmt from q2_types_genomics.kraken2 import ( Kraken2ReportDirectoryFormat, Kraken2OutputDirectoryFormat, @@ -50,7 +51,7 @@ def _construct_output_paths( def _classify_kraken2( seqs, common_args ) -> (Kraken2ReportDirectoryFormat, Kraken2OutputDirectoryFormat): - if isinstance(seqs, MAGSequencesDirFmt): + if isinstance(seqs, (MAGSequencesDirFmt, ContigSequencesDirFmt)): manifest = None else: manifest: Optional[pd.DataFrame] = seqs.manifest.view(pd.DataFrame) @@ -68,16 +69,24 @@ def get_paths_for_reads(index, row): def get_paths_for_mags(mag_id, fp): return mag_id, [fp] + def get_paths_for_contigs(contig_id, fp): + # HACK: remove after adding manifest or other solution, see + # https://github.com/bokulich-lab/q2-types-genomics/issues/56 + return contig_id.rstrip('_contigs'), [fp] + try: if manifest is not None: # we got reads - use the manifest iterate_over = manifest.iterrows() path_function = get_paths_for_reads - else: # we got MAGs - use the filenames directly + else: iterate_over = ( (os.path.basename(fp).split(".")[0], fp) for fp in sorted(glob.glob(os.path.join(seqs.path, "*.fasta"))) ) - path_function = get_paths_for_mags + if type(seqs) is MAGSequencesDirFmt: + path_function = get_paths_for_mags + elif type(seqs) is ContigSequencesDirFmt: + path_function = get_paths_for_contigs for args in iterate_over: _sample, fn = path_function(*args) @@ -103,6 +112,7 @@ def classify_kraken2( seqs: Union[ SingleLanePerSamplePairedEndFastqDirFmt, SingleLanePerSampleSingleEndFastqDirFmt, + ContigSequencesDirFmt, MAGSequencesDirFmt, ], kraken2_db: Kraken2DBDirectoryFormat, diff --git a/q2_moshpit/kraken2/select.py b/q2_moshpit/kraken2/select.py index 148fce14..9504375d 100644 --- a/q2_moshpit/kraken2/select.py +++ b/q2_moshpit/kraken2/select.py @@ -137,7 +137,7 @@ def _kraken_to_ncbi_tree(df): for _, row in df.iterrows(): r = row['rank'] label = row['name'] - otu = str(row['ncbi_tax_id']) + otu = str(row['taxon_id']) if r in ('U', 'R'): continue # unclassified or root @@ -174,7 +174,6 @@ def _kraken_to_ncbi_tree(df): if parent_node.children: parent_node.children[0].is_actual_tip = True - print(tree) return tree diff --git a/q2_moshpit/kraken2/tests/data/contigs/README.md b/q2_moshpit/kraken2/tests/data/contigs/README.md new file mode 100644 index 00000000..2715396a --- /dev/null +++ b/q2_moshpit/kraken2/tests/data/contigs/README.md @@ -0,0 +1,8 @@ +## samples/ +simulated contigs taken from reference genomes from refseq for each of the +species Bacillus anthracis, Mus musculus, Staphylococcus aureus, and +Staphylococcus epidermidis + +## small-kraken2-db/ +kraken2 database created using ~20k nt of each of the above mentioned genomes, +the ncbi taxonomy, and all default paramters (kraken2 version 2.1.3) diff --git a/q2_moshpit/kraken2/tests/data/contigs/samples/ba_contigs.fasta b/q2_moshpit/kraken2/tests/data/contigs/samples/ba_contigs.fasta new file mode 100644 index 00000000..177ba50b --- /dev/null +++ b/q2_moshpit/kraken2/tests/data/contigs/samples/ba_contigs.fasta @@ -0,0 +1,40 @@ +>contig0 +TCGGGAAAGTAGTAGAAGAAGTTCAATGTGAAAAAGTAGATGGAGAAGAGTTAAAAATATCTTTTAGTGCAAAATATATGATGGATGCACTAAAGGCATTAGATAGTACTGAAATTAAGATTAGCTTTACTGGAGCAATGAGACCATTCTTAATTCGTACGGTAAATGATGAATCCATTATTCAATTAATTTTACCGGTTCGTACTTACTAAGTAAGAAATAAGGGTTGCTAGTTTTCAGATGCTAGTAGCCCTTATTTGATTTTTGGGTATTACTTTCCTAATGCTAGTTTATTTAGTACAATGAAAGAATGA +>contig1 +TCGTTATATGCGTCAAATTATTGAACATGGTTATATCTATATTGCACAGCCACCGTTGTTTAAAGTACAACAAGGTAAAAAAATTCAATATGCTTATAATGAGAAAGAGCTTGAAAAGATTTTAGCTGAATTACCAGCTCAACCTAAACCTGGAATCCAACGTTACAAAGGTTTAGGAGAAATGAATCCAACTCAGCTTTGGGAAACAACAATGGACCCAGAAGTACGTTCATTACT +>contig2 +AAAGTGGCGTTATTTCAGACCCATTCTTTTTAACTCCAGAACATCAAGTGTATGATGCAGAGCATCTTATGGGAAAATACCGTATCTCAGGTGTACCGGTTGTAAATAATTTAGATGAGCGAAAATTAGTTGGTATTATTACAAACCGTGATATGCGTTTTATCCAAGACTACTCAATCAAAATTTCCGACGTAATGACAAAAGAACAGCTAATTACAGCTCCAGTTGGTACAACGCTAAGTGAAGCTGAAAAGATCCTACAAAAGTATAAAATTGAAAAACTCCCTCTTGTTGATAACAACGGTGTATTACAAGGGCTTATTACAATAAAAGATATTGAAAAAGTAATTGAATTCCCAAATTCTGCGAAGGATAAGCAAGGGCGCTTATTAGTTGGAGCAGCAGTTGGTGTAACGGCTGATGCTATGACTCGTATCGACGCATTAGTAAAAGCTAGCGTAGATGCAATCGTACTTGATACAGCTCACGGACATTCTCAAGGTGTTATTGATAAAGTAAAAGAAGTTCGTGCAAAGTATCCATCATTAAATATTATCGCTGGAAATGTTGCTACTGCTGAAGCAACAAAAGCATTAATTGAAGCAGGTGCAAACGTAGTTAAAGTTGGTATTGGACCAGGTTCTATCTGTACAACACGTGTTGTAGCCGGCGTTGGTGTACCACAATTAACAGCGGTTTATGATTGTGCAACAGAAGCTCGTAAACACGGTATTCCAGTTATTGCTGATGGTGG +>contig3 +TTACACTAGGGCAATTTTTAAAGTTAGCCGATGTAATTGATACAGGTGGCGCTGTAAAATGGTTTTTACAAGAATATGAAGTGTACGTGAATCAAGAACTTGAAAATAGAAGAGGGCGCAAGCTATATGCGAACGATATTATTGAAATTCCAGGAAGCGGAAGTTTCCAAGTTCAGTCATAAAGGGGGAGCCCTTTGTTTATTTCAGAAATACAATTAAAAAACTATCGCAATTATGAAAAATTAGAGCTTTCCTTTGAAGATAAAGTAAATGTAATTATCGGCGAGAACGCGCAAGGGAAAACGAATTTGATGGAAGCTATTTATGTTTTGGCGATGGCGAAATCTCATAGAACCTCTAATGATCGTGAGCTTATCCGTTGGGATGAAGATTTCGGTCAAATTAAAGGAAAGTTACAAAAGAGAAATAGTT +>contig4 +TTTTCCGTTTACGAGTCGTAACACCATGAGCTAAAACAGTAGTGGATTCAGGATAGTTCGGATCAATAGTTAATAAGCACGCTTTAAAAAGTTGGCTTATTCCATAAAACAATAACATTGGTTGAATTGAAAAGGGAGCTACCTTATATAATTCGTAATAATTTTTCCCATGTTCTAAGTAATAAATAAAGGGATAACAATTTTCGAAACTTTTTTTCAGCATCTTGTATGGAGGATTTTTCATAACAACGGGCAAGATAACGTTGTACATTTTGAGATGAAAAGAAGAAACTTAATTGCTGCCAAGTACAATGTGTTTGATGCATATATTACGCTCCTTTTATTGTCTAAAAATTCTAACATATAAATACGTCCTTGACAGTATTTTAACCAATTGATAAGCTACTAATAATAATTTCTGGTATCATGGGG +>contig5 +TGTGGCATTAGAAAATGGTGGTTCTTATACGGTAAAAGAGTTGTACGAGGCAATGGCAATCTTCTCTGCAAACGGTGCAACGATTGCATTAGCAGAAGCAATTGCAGGTAAAGAAGTAGATTTCGTAAAAATGATGAATGATAAATCGAAAGAGCTAGGGTTGAAAAATTATAAATTTGTCAATTCTACAGGTTTAACGAATAAGGATTTAAAGGGAATGCATCCAGAAGGAACAACAGCAGATGAAGAAAATAAAATGTCTGCAAAGGATGTTGCAACTTTAGCACAACATTTAATTAAAGATTATCCGAAAGTGTTAGATACAGCAAAAATCCCGAAAAAAGAATTCCGTCCAGAAAAAGAGAAGTTTGCAATGTCGAACTGGAACTGGATGTTAAAAGGTTTAGTTAAAGAATATGATGGCGTAGATGGCCTAAAAACAGGTTCAACTCCAGAAGCAGGAGATTGCTTCACTGGTACGGTTGAAAGAAACGGTATGCGTTTTATTTCTGTAGTAATTAAAACAAGTTCTCATACAGCACGTTTTGATGAAACAAAGAAGCTATATGATTATGGATTTGCTAACTTTGAAATGAAACAAATGTATAAAAAAGGTTCTTCAGTAAAAGGACAAGAAACAGTACGAGTAGAAAATGCGAAAGATAAGGATGTAGCAGTTCAAACGAAACAAGCCATTTCACTTCCAGTACCAAAAGGAAGTAAAGAAGTTTATAAAACAGAATTAAAAGAATCAAGTAAAGGACAAGAAGCAC +>contig6 +GGAACACGTGAAATTCCGTCGGAATCTGGGAGGACCATCTCCCAAGGCTAAATACTCCCTAGTGATCGATAGTGAACCAGTACCGTGAGGGAAAGGTGAAAAGCACCCCGGAAGGGGAGTGAAAGAGATCCTGAAACCGTGTGCCTACAAATAGTCAGAGCCCGTTAACGGGTGATGGCGTGCCTTTTGTAGAATGAACCGGCGAGTTACGATCCCGTGCGAGGTTAAGCTGAAGAGGCGGAGCCGCAGCGAAAGCGAGTCTGAATAGGGCGTTTAGTACGTGGTCGTAGACCCGAAACCAGGTGATCTACCCATGTCCAGGGTGAAGTTCAGGTAACACTGAATGGAGGCCCGAACCCACGCACGTTGAAAAGTGCGGGGATGAGGTGTGGGTAGCGGAGAAATTCCAATCGAACCTGGAGATAGCTGGTTCTCCCCGAAATAGCTTTAGGGCTAGCCTTAAGTGTAAGAGTCTTGGAGGTAGAGCACTGATTGGACTAGGGGTCCTCATCGGATTACCGAATTCAGTCAAACTCCGAATGCCAATGACTTATCCTTAGG +>contig7 +TCTTGACATCCTCTGACAACCCTAGAGATAGGGCTTCTCCTTCGGGAGCAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAAGAGCTGCAAGACCGCGAGGTGGAGCTAATCTCATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCTGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGGGGTAACCTTTTTGGAGCCAGCCGCCTAAGGTGGGACAGATGATTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTCTATGGAGAATTGATGAACGCTGTTCATCAATATAAGTTTCCGTGTTTCGTTTTCGTTTAGTTTTGAGAGTTCAATAAAAAGTATTGACTCTTAAATGAGGATATGATAT +>contig8 +AGCAATTACCTATAAGATATGCTGCATTTAGCTCTTGTTTCCGTTCTGAAGCAGGTTCAGCTGGCCGTGATACACGTGGTTTAATTCGTCAGCATCAGTTCAATAAAGTAGAGCTTGTAAAGTTCGTAAAACCAGAAGATTCTTACGAAGAGTTAGAAAAACTAACAAATGATGCAGAACGCGTGTTACAATTATTAGAGTTGCCATATCGCGTTATGAGCATGTGCACAGGCGATTTAGGATTTACAGCAGCGAAGAAATACGATATCGAAGTATGGATTCCAAGCTATGGCACATATCGTGAAATCTCTTCTTGTAGTAATTTCGAGGCTTTCCAAGCGAGACGTGCAAATATCCGTTTCCGTCGTGAGCCAAACGGCAAACCAGAACATGTTCATACATTAAATGGATCTGGTCTTGCAATTGGACGTACGGTAGCAGCTATTTTAGAGAACTACCAACAAGAAGATGGTACAATTATAATTCCAGAAGTTCTTCGCCCTTATATGGGAGGAAAAACAGTTATTAAGTAAATTTATAAACATTCATCGGTATGAGTGATTGGTAATTATGAGCGTTGTCAGTACTATAATGTAGGAGGGGAAAAGTAAAATTTTCCTTTCCTCATAATTTATTTTAGTAGGGTTGACTAACTGTTTTTCTTTTGATATTATATTTGATGTCAATATGGAGGTATACCCAAGTCTGGCTGAAGGGATCGGTCTTGAAAA +>contig9 +CTAAGGTGTGCGAGAGAACTCTGGTTAAGGAACTCGGCAAAATGACCCCGTAACTTCGGGAGAAGGGGTGCTTTCTTAACGGAAAGCCGCAGTGAATAGGCCCAAGCGACTGTTTAGCAAAAACACAGCTCTCTGCGAAGCCGTAAGGCGAAGTATAGGGGGTGACACCTGCCCGGTGCTGGAAGGTTAAGGAGAGGGGTTAGCGTAAGCGAAGCTCTGAACTGAAGCCCCAGTAAACGGCGGCCGTAACTATAACGGTCCTAAGGTAGCGAAATTCCTTGTCGGGTAAGTTCCGACCCGCACG +>contig10 +TTGGTGAAGTAATCGGTAAGTATCACCCTCATGGTGATTCAGCTGTTTATGAAACGATGGTACGTATGGCGCAAGATTTCAGTCAACGTTATATGCTTGTTGATGGGCATGGTAACTTTGGATCTGTCGATGGAGATTCAGCGGCAGCAATGCGTTATACAGAAGCAAGAATGTCTAAAATCTCTATGGAATTAATACGTGATATTTCAAAAAATACAATTGATTATCAAGATAACTATGATGGTTCTGAAAGAGAGCCGATTGTGTTACCAGCGCGTTTTCCTAACTTACTAGTAAATGGTACGACAGGTATTGCAGTTGGTATGGCAACAAATATTCCGCCGCATCAACTTGGTGAAGTAATTGATGGCGTATTGGCATTAAGTCATAATCCCGATATTACTATTGCAGAATTAATGGAGTGCATTCCAGGACCAGATTTTCCGACGGCAGGTTTAATTTTAGGAAGAAGTGGTATTCGTAGAGCTTATGAAACAGGACGCGGGTCTATTATACTTCGTGCTAAAGTTGAAATTGAAGAGAAGTCAAATGGCAAACAATCTATTATCGTAACGGAATTACCTTATCAAGTGAATAAGGCGCGATTGATTGAAAAAATTGCAGAATTAGTTCGCGATAAGAAAATTGAAGGTATTACAGATTTACGTGATGAATCAGATCGAAATGGTATGCGTATTGTTATGGAAGTACGTCGTGATGCCAATGCTAATGTATTATTAAATAACTTATATAAACATACAGCACTTCAAACAAGTTTTGGTATTAACATGCTGTCTCTTGTAAATGGAGAACCACAAGTACTGAATTTAAAACAAAATTTATATCATTACTTGGAACATCAAAAGGTAGTAATTCGTAGACGTACTGCTTATGAACTTGAAAAGGCAGAAGCACGTGCTCATATCTTAGAAGGATTACGAAT +>contig11 +GGTTGTAAACTTTGCAGCAGGTGGTGTAGCAACACCAGCAGATGCAGCGTTAATGATGCAATTAGGTGCGGATGGTGTATTTGTTGGATCTGGTATCTTTAAATCAGAGAACCCAGCGAAATTTGCACGTGCAATCGTTGAAGCAACAACTCATTATGAAGATTACGAACTAATTGCAAGCCTTTCAAAAGGATTAGGTAATGCGATGAAGGGTATCGAAATTTCAACGTTATTACCAGAACAACGCATGCAAGAGCGTGGATGGTAATTGAAGGAGAACTTTAAAATGGTGAAAATTGGTGTACTAGGTCTTCAAGGTGCAGTTCGTGAACATGTAAAATCAGTTGAAGCAAGTGGTGCAGAAGCTGTTGTTGTAAAGCGTATAGAACAACTTGAAGAGATTGATGGTCTTATTTTACCAGGCGGTGAAAGTACAACTATGCGCCGTCTTATTGATAAGTATGATTTCATGGAACCACTTCGTACATTCGC +>contig12 +ATCTTTGTGTCAATGGAACAAAAGCAAATGCAAGAAAATTCATATGATGAAAGTCAAATACAGGTACTTGAAGGACTAGAAGCAGTTCGAAAACGCCCTGGTATGTATATTGGATCTACAAGTGGAAAAGGACTTCACCATCTTGTATGGGAAATCGTTGATAATAGTATCGATGAAGCACTTGCAGGGTACTGTGACGAAATTAACGTTAGTATCGAAGAAGATAATAGTATTCGTGTAACAGATAATGGACGTGGTATTCCAGTTGGTATACAAGAAAAAATGGGACGTCCTGCTGTAGAAGTTATTATGACCGTTCTTCATGCTGGTGGTAAGTTTGGCGGCGGCGGTTATAAAGTTTCTGGTGGTTTGCATGGTGTTGGGGCATCTGTAGTAAATGCTCTATCAACAGAACTAGAGGTATTTGT +>contig13 +CTATTTTTTATCTTTATTAATTAAATAAAATCTTATACTTACCGGAGGTTCTTCTTTATGCGTTTTTCAATTCAAAAAGACTATCTTGTAAGAAGTGTACAAGATGTAATGAAGGCTGTTTCTTTTCGTACAACAATTCCGATCCTTACAGGAATTAAAGTTGTCGCTACGGAAGAAGGAGTTACATTAACAGGAAGCGATGCTGATATTTCGATTGAATCTTTTATCCCAGTTGAAGAGGATGGAAAAGAAATTGTAGAAGTAAAACAATCAGGAAGTATTGTTTTACAGGCTAAATATTTTAGTGAAATTGTAAAAAAATTGCCGAAAGAAACTGTAGAAATTTCTGTCGAAAATCATTTAATGACAAAAATAACTTCTGGGAAATCAGAATTTAATTTAAATGGTTTAGATTCTGCAGAATATCCATTGTTACCACAAATTGAAGAACATCATGTTTTTAAGATTCCAACAGATTTACTTAAACATATGATCAGACAAACTGTATTTGCAGTCTCCACTTCTGAAACAAGACCAATCTTGACAGGTGTAAACTGGAAGGTATATAACAGCGAACTAACTTGTATTGCTACAGATAGTCACAGGTTAGCTCTTCGAAAAGCAAAAATTGAAGGTATTGTAGATGAATTCCAGGCAAATGTTGTTATTCC +>contig14 +TAATTACGCCAAAAGATACAAACGACCCTGGATTTTTAACTGGTAAGTCGTTGCAAGTGGATCTTGTAACAACATCTGAAGTAGAAGAAGCAAACTGGTTTACTCGTGCTATGCGCGGAATTGGTTCTTTCTTTAGTGGTATATGGAATAGTGCTGTTGATACAGTAAAAGGTTGGTTTTAAAAGCTCCTCATTGTAGGGGCTTTTTCTTATTCCTATTTTTCATACCGACTTTATGAAAAAGTAGTAGACAAGCATCTGATAGTTAGTGGTAGAATGTAAGAGTATTCTTAATTTTCGCCTTTAACGGGGAAAAGCAATTCACCTAGGGGGGGTTTTGTAACATGACAAATGTAACAGGGACAGAACGTGTAAAACGTGGAATGGCAGAAATGCAAAAAGGCGGCGTTATTATGGACGTAATTAACGCTGAACAAGCAAAAATTGCAGAAGAGGCAGGCGCAGTTGCCATTATGGCATTAGAGCGCGTACCAGCAGATATTCGTGCAGCAGGTGGCGTTTCTCGTATGGCAGATCCAACAATCGTTGAAGAAGTTATGGGTGCTGTGTCAATTCCGGTTATGGCAAAATGCCGTATCGGTCACCTTGTAGAAGCACGTGTATTAGAATCATTAGGGGT +>contig15 +TTCTGCTAGTAAAAAAGACGAAATGTAGTAAGGAACTTTTTGAGCAGAAATAGTAAGGAATTGATAAATAAAGGGGTAATCTTCAGATCCATAGCTTCCAAGTAAATCATACCACCAATCTGTTTCGTATCTAGAAATCATACTTAAATTGTAGAGCAATAAATAATGAACAAGAAGTTCAGGTAATGCGGGTTTAGGATTTCTAGGGTCAGTTGTAAGTGGTAAGTAATAAGTATCAGCAAGATATTCGTAGTATAAAGGCGTACTATATA +>contig16 +AAACCAGATCCAGAAATTTTTCAGGAAACAACAGTATACGAATTTGATACACTAGCAACTCGTATGCGTGAATTAGCATTTTTAAATCGTAATATTAAACTGACGATTGAAGATAAACGTGAACATAAGCAAAAAAAAGAATTCCATTATGAAGGTGGAATTAAATCATATGTTGAGCATTTAAACCGCTCAAAACAACCAATCCATGAAGAGCCTGTATATGTAGAAGGATCAAAAGATGGTATTCAAGTTGAAGTTTCCTTACAGTATAACGAAGGATATACAAATAATATTTACTCATTTACGAACAACATTCACACGTATGAAGGTGGAACACATGAAGTAGGGTTTAAAACAGCTTTAACTCGTGTGATTAACGATTATGGGCGTAAAAATAGTATTCTAAAAGATGCAGACAGTAATTTAACTGGTGAGGACGTTCGTGAAGGTTTAAC +>contig17 +CTTGCTCTTATGAAGTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCCATAAGACTGGGATAACTCCGGGAAACCGGGGCTAATACCGGATAACATTTTGAACCGCATGGTTCGAAATTGAAAGGCGGCTTCGGCTGTCACTTATGGATGGACCCGCGTCGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCAACGATGCGTAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGAGTGATGAAGGCTTTCGGGTCGTAAAACTCTGTTGTTAGGGAAGAACAAGTGCTAGTTGAATAAGCTGGCACCTTGACGGTACCTAACCAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTATCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTTTCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAT +>contig18 +GCTAGCTTCGGTGGAGGCGCTGGTGGGATACCGCCCTGACTGTATTGAAATTCTAACCTACGGGTCTTATCGACCCGGGAGACAGTGTCAGGTGGGCAGTTTGACTGGGGCGGTCGCCTCCTAAAGTGTAACGGAGGCGCCCAAAGGTTCCCTCAGAATGGTTGGAAATCATTCGTAGAGTGCAAAGGCATAAGGGAGCTTGACTGCGAGACCTACAAGTCGAGCAGGGACGAAAGTCGGGCTTAGTGATCCGGTGGTTCCGCATGGAAGGGCCATCGCTCAACGGATAAAAGCTACCCCGGGGATAACAGGCTTATCTCCCCCAAGAGTCCACATCGACGGGGAGGTTTGGCACCTCGATGTCGGCTCATCGCATCCTGGGGCTGTAGTCGGTCCCAAGGGTTGGGCTGTTCGCCCATTAAAGCGGTACGCGAGCTGGGTTCAGAACGTCGTGAGACAGTTCGGTCCCTATCCGTCGTGGGCGTAGGAAATTTGAGAGGAGCTGTCCTTAGTACGAGAGGACCGGGATGGACGCACCGCTGGTGTACCAGTTGTTCTGCCAAGGGCATAGCTGGGTAGCTATGTGCGGAAGGGATAAGTGCTGAAAGCATCTAAGCATGAAGCCCCCCTCAAGATGAGATTTCCCATAGCGTAAGCTAGTAAGATCCCTGAAAGATGATCAGGTTGATAGGTTCGAGGTGGAAGCATGGTGACATGTGGAGCTGACGAATACTAATAGATCGAGGACTTAACCATATAATATGTAGCAATGTTATCTAGTTTTGAAGGAATATGCCTTCATAGTTTGGTGATGATGGCAGAGAGGTCACACCCGTTCCCATACCGAACACGGAAGTTAAGCTCTCTAGCGCCGATGGTAGTTGGGACCTTGTCCCTGTGAGAGTAGGACGTCGCCAAGCAACTAAAAACACAAGTC +>contig19 +CGGTAAAGCAAGGGCCGTTTTTAGCTGCTTCTTTCCATCCAGAATTAACGGATGATCATCGTGTAACAGCATACTTTGTAGAAATGGTAAAAGAAGCGAAAATGAAAAAAGTTGTATAAGTAACTTGCAACTTGTATAAGATTATAGTAAATTGATGGTAACAATTTTATAAAATAAGCGTGTTGATAGGAAGTAGTAACAAATGTCGTTTCTTATAGAGAGTCGATGGTTGGTGGAAATCGATAGAAACAGTTTGTGAATCCATCCTGGAATGGAATGTGGAATATCTTTATGATTAGTAAACATTCCCGGTGAAGAGCCGTTATTTCTACTTGAGAGGAAGGCGGTAATGCTTTCAACTAGG diff --git a/q2_moshpit/kraken2/tests/data/contigs/samples/mm_contigs.fasta b/q2_moshpit/kraken2/tests/data/contigs/samples/mm_contigs.fasta new file mode 100644 index 00000000..0e986284 --- /dev/null +++ b/q2_moshpit/kraken2/tests/data/contigs/samples/mm_contigs.fasta @@ -0,0 +1,40 @@ +>contig0 +TATTTTTGGAACTTAGCTGACCACAAGTAGCTGGAAAGGGAAATCATGGCTAGCCTTGTGCCAGAAGTTTCTAGAGAAGAGTGTCCAAAACTTGCTGAAACCAAACAAGTGACAGATGAAAACTGTACATGGAGGTGGCGCTAAAAGCGAACTAATTCAAAATCGTAGAAGAAAAAATCAGAAAGCAGTAGAGGCTCTCATAAGCTGGGCGCTTTATACCTAGGTAGGAACTAGTGTTTTTCATCATTACACCAAGAAACAGATTCTGGATGCAGGGCAGAGGGCAAACCAGAGCGTTCTGTTGCTCAGACTACTCCCTGGGGGCTCTGTGGCTCCCACGTGGGGGTCGCAGACAGTGTGGTGTGTTCTTAGCTGGTTCTGAGGTAGAGGATTGTCACACTCTCACTGACTCTTTCTCTCCCACTACCTATCTAGACACCCCAGTAGCCAAGAAGTCTTGCTGAGCTAAGGGTGAGGAGCCATTAGAATCTCAAGCGTTTAAACTCTGCCTTGTCTTCTTGGGTCCCAGCTGAGGTCATTCCATACCCCGAGCCATTTAACCCAACACACAGGGCCTGACCTCTTAACAGTGCAGAGGCATGGAGTGCCAGCTATGTCCTCTCAGTGTGTCCCAAGAGGATAGTTTTACAGTGTTCTTCCCTTTGCCCATTTTTCCCTTTCACAT +>contig1 +GTTTAAGAACTCCCAAGGCAGAGTATCTTAGCACAAAAATGCTTATTTGGATGAAAAAAAAAAAAAAAGAAGAAGAAGAAGAAGAAGGTGGTTGGGGTCCAGGCTTCCTGAAGCTCATTGATCACTGAAGTTGCTTGGCTGTCTTGGCTGTTTGTAGCAGTGTTCTAGTGTGTGTGTGTATGTGTGTGCTTGTGTCCGGGGGTAGGAGATGGGGAGGGATAAGGAGGGAGGAAGGGAGAGAGGTAGTCTACTGCCTTCAGCTCAACTGGGATCTTTTGAAAGCCAAACACACAGCTTCTCTAGGACTGTAATCATTACTCACTTTGTTGTTACACCTTATAGATGTACCTGTAAGATGCACAGTTAGCCACTCCTATAAACATACAAAGCACAGGATATCATCAGAAAAGTGTTTGGCTGGGGTCTAGAAGGCCATCTGGATTAGTCTGTAACTGGCTGCCATAAGTTCAAGTCCTCTTCTAAGCCATGCCTTACTGCATCATAACAGATAACTTCAAAGGT +>contig2 +ACTCAAAAATATATTTACAAACACCTTGGTGAGAAGCTTTGGTTGGTTCCCCAATTAGCCCCTATGTTAATCATCCTGTTTATTCTCATCGAGGTTCTGCCACTTGGCTGGTTACCTCTGCGTAGTCCCATGTTTCTGTCCTCCTCCATGGTCTCAGGCAAATCTTCCACCTATTGTCTATCCCAGAATCCAATCTCCCTACCACACGTCCTGCCTTCTATTTCCTGCCTAGCCTTATAGGCCAATCAGCATTTGATTGGCAGGTCCTGTGACTATACAGTACATAAGAGAATCTCTCTATAGTAAGAGTTAAAATAGATTTGTGCATATATAATGGGGCAGAGGGAGAAAGCTTGGCTTGGGGAAAGGCTGATCTAGTAAAGAGCACAGGGCAAGATGAGAAACTGTAGAAGGACCCAAAGTCCCCAGAGAAGCTGAAATAAAGGCTCAAGATCTCGGGGACAGAGCAGGTTGAGTGTTGACTTAGCTTAACAAATGTCTTGGGAATGGGTAGATGAGGTGGAAGCAGAGTTCCCAGAGCTGTGAGTGGGAGGGCCCAAGGTCAGAGGTGCAGAACGGAGTCAGGCAACTCTGAGAATGTCCAGCTATGGTGTGTGCCCTCTGGGAACATGGTGCATAGCTCACTGATTGGTCAAGTGTGCTTACTGGGCACCTGTGCTACACCAAACCATGCTCTAGAGTCAAAGTCCCCACTCCCTGGGAATTCTGATAGCGTGAAGGAGTAA +>contig3 +AATTATACCAAATTTTATGTGACAGCACTGACTATGTCATTGAATTACATCAAATTGTGTTATAATTTCTCTGAAACCCTTAAAAACACTTTACAAAAAACGCCTTATAAACTGCAAGACAAGTCCCAGAGGATGGGGCTGAGGAGGTAGTCTGGTCCTTGCCTTTGAGTTTTAAAAGTATTTTATTTTTTCTCTATCTTGTATTTGTTTGTTTATTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATGATTATCTTGTTTTAAGACAGCATTTCATTTTATAGCCCAGGCTGGCCTCACATACTCCAGGAAATTCTCCTGCCTCAGCAGGATTACAGGTGTGAGCCAGCCTGGCTTGTCTTTGCCTTCCTAAATAAAAAGTTTGAAGCCCAGGAAACTTGGTCATTCCTCACCCCGTGGAGAAGCAAAGAAGTGATCATTTTCTTAAAGACCCTGGTGTAGTTGTTTAAAACTAC +>contig4 +GCCTGCAGCACCAAACAGGAGGAGGCAGAAAAGGAAACCCCAGACCTTGTTATCAGGAAGGGGAGGTGTTTTCTCACAGCTGCCTTTGGGCTCCTTTTCTAAGGGCCATATATCTGAAGTGACAGGATCACCTCATACTGTGTAAGAGGTTATGTTGTTCTTAAAGCCACAGAAAACAGCAAACCCTCTGAAGTGTAGCTTGACATGAAACGCCTACTACATGCCACTAAAGTACTGACAGCAGGCTGAGGGAAGTCACCAAGGAGAAGAGCCGAGTGTGCTCCCAAACGCTCTCTAATTGATTTACATTATATAACAAGGTAAAAGCTTACTAGACAGGTGGCCAGTTGTCACCTGACCT +>contig5 +GTGGTCATCCCAGATGAGGACCTCATCACACTACTCAGGATGGTAAATAACTCTAAACAGGCACATCAATTATTCACCAAGTTGACATTTAATAGTTTCAGCCATGACTGACCAAGAGCAACTAAAACCATAGGAAGCAAAACCAAGAACAATGGGAAATGGTTCATCATATATCATCATACATTTCCCTTGAGATATAGCCATTGGGGGGAAATCACCTGCCAATAGAGGGGTGGAACTATTGCTTTATAGAGCTTGGATACACCTTCGCAAAGGAAAAAGTCAGTGAGTCCTGACCAGGAGTAACAAGTGGAGAACCACTTGCATAAAGAGGTTGTGATTCTTTGAAA +>contig6 +TTGAGAATCACATTTTTTTTAAGACATCATAAATTGAAATACATTTTGTGGTATACTCTGTGAGTTCTGTTAGTAACAAATGCCACCATGTGTTGATAGCACAGAATCCTGGTCTTCATCGGCCTTTATTGTATTTCAAAAACATTAAGGATTAGAGAGGGCCTGCAATCACAGTGAAGAAGGAATCCTTCCCTTGAGCTATCTCCATTCAAGACATAGATATAGGTAATTAGCCTTTTTCTAAGGGTCACTGACAGATTTTGGTACAAGTCAAGTCCATATTGCTTAAAAATGAAATGGGTATACCCTCAAAAACAATATTTCTTCAGTGGCTCTAATGTATTGTTTCCCAAACTAGAAAGGCAACAAATCTATTCAGTTACAGGGACCAAGCCTCTGGGGTTACGAGTGTCACATGACGAGCTCTGAAGACGCTCAAGGATGCTGTGGAGTCCTCTGATGGACTTGAGGGAGGCAGACAGCATCTCTAAACCACCTTCTCTGCCAGTCTCAAGTCTTTGAGAGCTCATGTGTTAAAGAAAGAAAGAAGCAATACCCAGAGTGTGTTTGCTCCATTAAATGAGTCGACGACCTTAAAGCCTCTCAGCAGCTGGAACCCAGGGCCATGGAAGGGATGCTATGCCGCCGTGTGAGGACAGAGACCTGCACTGAGTATTTGTAAGCATTCAGATGCTTCCAATGCAGAATGCCACCCTGACAGCTGGTCTACAGCACCTTTCCCCTGGGTGGCACTTGAAATCTATAAAACACTTTCCCTTATAATTTATCTTTTGTCATTGTGAGGAGAAGAGAATGTTCTTCTCTACGTGTGATGCATAGGGAACCTAGATTAGGGAGGTTATGTGGTCTTAACTTCCCTACCTGTCACTTGGGGTTGAAGTTTGAACCTTAGTTAGCAGAATGTTATTCAGACCCAGCTGAACCAAGGTCTCCTAAAAAGTTGAGAATCATATATTTTTTAAGACATCATAAATTGAAA +>contig7 +CTATATATAAAAAAAATGGTCATAAATTAAGTCCTTTGGACTATGGTGTGGACTGTCTTGGACCTGCTGAAAAGTGTTCTTTTTCTCTCAGTCTCAATGTATCGTTGCCATAGTGACAGGAGGGTGGGGCAAATGTGCTGCTGTTACTGCTGCCACTAATCAGATAGCTGTGTGCAAGTTCCAATGTACTTTTTAAAGCAGACTTGGAAGACTTAAGTAATGATCTTGTAGCAGGCACCCCGCAACCTATTATCATAACCACTAAAAGTTCCTTGTGAAAAATAGACTTTCAGGAACTGGAGAGATGGCTCAGTGGTTACGAGTACTGGCTGCTCTTGCAGGGGACCCAGGTTCAGTTCACAGCACTCACATAGCAGCTCACAATCTTTTGTAAATCCAGTTCCAAGGGATCCAACGCCCTCTTCTGGCCCACATGAACACTGCAATGCGTGTAGTTCACATCCATAAACACGGGTAAAACGCACATACACATAAAATTAAAATAAATTAATATTTACATCTAAATGTTAAATAAACTTTTCCCCCAGAAGTTAGCT +>contig8 +TCCTGTTTTCCAGCAAGAAAGAGATGAAAAAAATTACTTATGGGGCAAGCATACGAACATTTCCATTGCTGAATTTAGCTAGGAGCTGTTCCATGGTCAAAAGATGAAAGAAAGCGAGTCTGAAGGAAGACTCGGGAAGGGTGTTTATAAGGATGCATGATGTACCCTGAGCGCTGGGTCCTGCTGGGTCCTGCTGGGTCCTGCTGGGTCCTGCTGGGTCCTGCTGGGTCCTGCTGGGTCCTGCTGGGTCCTGCTGGGTCCTGCTGGGACCTGCTGGTCTCTGCCGCTAGAGACATCAGTGAGCCCATAGCTGTGAGGCTTTAACAAGGAAGAGCTTGTGGGTTTTCGCAGTTTCTGAATCGTGTCGTCTGGCTATTGGATGGTCCAGGAAAGCACCAGAAGCTTGCCTTTGCAGTCTTTTTGTTTCTACCCCCTTCAGCGCTCTGACTTATTTTTAGTTCTATGGTTTTCTCCCACAAGTTACCATCAGGTCACGGAGTCATATGGGTTGTACTCTGCTGTCACGGTGTCCTCTGTACACAGAGCGTAATGGAAACTCTGATCACCCTAGATGGTGCCATTTGAAAGGATTACATTAAAACTCTGTTGAGCCATTGCCGCAGTGTCCTAACAAAGGGTCTGTTTATGCTATATATATATATATATTTTTTAACAATAGACATTCAGACAAGAGAAAGTCATTTGGAAGGCGCTTGAAATTGCCTTCCTGATGTCAATTTTATTCTGAATTGTAAAACAGTTCAAAAGAATGAGTCAAATAGAAATTTTAGTGCAAAACATTACTACTTCATGACTGTAGTTCACCAACAGAATT +>contig9 +TGACACAGCAGCTGAAATTCAAATTAGGTAAAATGATGTAAGCAATTTTTCATGCTGTTTTAATTCTTGCGCTTCTCTGAAACTAAGCAAACCAACAGACATTATTTCCAGCTATTCAAACCAGAGGCATGACATTGTCATTTGAAATTCAATGGAAGAAAAAGCTTCCTACAATACTCAAATTCATTCCTACTTCCAGGAGAGTCCAAAGTGGTAAGCATAATTACTAGTACTAGATTAATTCAAAAGAGATGACTTGGTTGACAGTTAGCCGTGCCAGGTCCTGTTCCAGCCACTTTGTGACTATGAGCTTGTTTGGTCTCATAATCCTGAGAGGTGAGAGAAGATTATTACTCCTGTTTTACAGACAAGAACAGAACCTCCCAGGAAGCATCTCTAGCCTAGCAGAGCCCAACTGAGCAAGATCAGAGGCGTGTGTAT +>contig10 +TGCTGCCATGAGAGAAGCTAATGGAATTACTTCTATTGAGCATCGGGATAGTTGAGTTAGAGGGACTCAGGAAAGATTTTGGCCAGGACAGTGATGTAATAATATAGTATGGTTGGCAGCACTGAGTGAATGCTTTCCATGTGACAAGTGGCTTATGTCATTTCATCCATAGCCCCATCCTCGAGAGAGTTAATGTCCAGGGGCAGAAACATGAGTCAACGGGGTCTCAGCCTCTGGCCACATCTTGAATCCATATATTCCCTTTCTGCACCAACTTGAACCTTGAGACTAGCCATGTGGAGCCTTATGTAAGAGGATAGGGTCTACACGAGCCTGTTGGAGCGTGATGAGATCGTGCCAGGAGGAATGGGCTGGTCAGCATCCAGTAGCCCCACATGCTCCAGAAAGGACAACAATGGACTGACCAAGATGATAATTAGAGGTGACATGAACAGGGTACTTAGCGATTTAGTGAGGGGCAAGAGGGTAGTACTGATTAGAGAGGCCTATGTTCCAAGATAAAGGGAGTAAGGAACTGAGCTTTTCTGTGGGTGCTTTTCTTGGATTTTGGCGTCAGTTAAAGAAGGATGGCAGTTGGGGGTCAAACCTACAGAGAGGCAGACTTACAAGAAATAACAACACCCACGGATTTCCCAAGACACAAGACAGGCCAGAGCACAGAGCAAGCACAGGCCAGGGAATGGATCCCCGGCAGGAACAGCACCCTGTCTTCTGATGTGATGAGGGAGCAGAAAGT +>contig11 +TGTTGCTCAAGGATTAATGGGTTTACCCCCAAAGTAACCCCGTTACAAAGGCCAGTTTGACTCATCTCACCATCTGGATCCCTTTCACCATGTAATGCTTAGGCCATCTTGAGACTCATCAGCAATAACGCCCACAAGATACAAACTCCTCACCTTGGACTCTCTAACCACCAGTCCTCTATTTTCTTTATAAATCCCCCAGCCTATGGTACCACTCTGGCATTAGAAAATGGGCCAGAAGACCCTCCAGTTGGAGACTGGGGTGTCTCTTGTCTGCTAACCCATACATTCTTCCTGTGACGCTTGTGAGCTGCCTTCTCTGTCCACAGCCTCTACCTAATTGTGTCCAGAATGGCTCTCATTATCAAAGCTTTCTCTATTCTTAAGACTTTTCACAACACGGGTTCCTTAATACTTCTGTTGCTATCCCCAGAACCACGTGGAGAACCCAGGAAATCTTGGATGTCTAATAAACATTTACTGTGT +>contig12 +TAGCTGCATATGTATCAAAAGATGGCCTAGTCAGCCATCACTGGAAAGAGAGGCCCATTGGACATGCAAACTTTATATGCCCCAGTAGAGGGGAACGCCAGGGCCAAAAAGTGGGAATGGGTGGGTAGGGGAGTGGGGGGGGGGAAGGGTATGGGGGACTTTTGGGATAGCATTGGAAATGTAATTGAGGAAAATACATAATATAAAAATATTTTTTTAAAAAAAGGAATTATGTCCTTGAGTATAAGGAGCTTATTGTATTCCTTTTCCATTCTTGGCCCTTAGCAGAGAACATGGCACTAAATGCCAATAAATGGTGTTCAAATGAAGTGGAACCAAACCTTCCCCCAGCTCATTGTGACATCCTCTGATTACATAAACGCTGCCATAGCAGAGTTAGTTAATGTACATGCCCAGGACTAGAGTTGCTACATCATATGTGTTGTGAATCTACTTTTGGGGAGGAGGTCTTTACTGCCTTTCATAGTGACTAAACCATTTTATACTCTCACTATGCATGCATAGAATTCCAGCTTTATCACACCCTCACCAACACTTACCATCTTCTGATTTTAATAATAGTAGTTGAGTGCATGAGGCCATTATGGGTTTTACCTGAATTTTTGTAG +>contig13 +AACTTCCTTGAACGTGAGGGCTTTATTGGAGTAGGCCCGCCCATGTGTGCGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGATGTCAAGTGCACTGAAACAGAATCAAAATAATTTTGAAGGTATTTTAAAGGAAAAAAAAATGAGTGAGAGAGGTGTGCTTCATTGCCAAGGGCTAGCTTTATTTTAGCTCTGGAGATAAAATGGGTAGGATTGCATCTGCATCTGCATTTGCATCTGCGTCATCTGCAACACAGAGAAGAGCAGAGTGCCTGCCAAGCAGCAGCGCAGTGTTTGCAGAAGCTAGTAGACGTGGAGGAAAGTCCTTCAAGACTGTGCAGCTGAAGGGAAGCAAAGGTGCCCGTCCATCTCAGCATGATCAGATGAAAATGCAGAGTGTCAGTACAAATGTAGTGTGCATATGTAAAGTGAAAGTATTTATTTACTGTAGACTCCAGTTAAATCAGGC +>contig14 +GATGAAGGATTCTTCACTAACAACACACATGTATTGGTCTGCCTTACGTTGTGCAGTTTAGCAGCATTTGTTGGGACTCTATAGAGAGAAATGAACCAAAAAACTTCTGGGAGTGTGCTGCAGTTTCTTGCCACTTCTACAGACTCAGGCTGACTGGCAGAGTGATGTCAGCTGAGACAGACACATGTGGTGAAGCAAGACCCGTGGAGGACATGTGATGTTTCGAGGGTATAAATAATAGGACTTGATGGACAGAGATGGAGGGTGAGCTAGGGGTGCCTATAGTGCTCGCTGTGCAATACTTGTGAGTCTCACATCTTTGCTGATCTTCGCTTCTCCTGGCGCTTCTCCCAGGCCCCTCCTGCTGACTCATGCCGAGGCTGAGGCCTGGCTCTCTCTGCTAGGCAGTGCCACCACCGCTGATTCCTGTTTGCTATCCCGACCCTACCAGACTGGACTGCTGGTGCATCCACGAAGTGTTTGTTTATGAGTGGATCAAACTGCTGCTGCTGACCTGTGAACTCAACTGCTGATTTCCTGACAATACAGAGGGGAGTTACTCCAAAGAACTATTTCTAAATAGAAACAGCCCCCCCCCCCACACACACACACCCAACTCCTTACTTTTCCTCTACTTCTGGTGGGTGGTGGGTTACAAGCAAGGTTAAAGCACTAAGAAACATCATTAAAAGTAGGCATTTGAAAGAAAGTCACAATCACCATCATTTTACAGGTGCTGAGGTTGGGTAAATGAAAAGAGAAGTAATTTGTTATGATCGCAAAGCAGGTCAGTCTGTGAAAGTTCCCTCCAATCCTGTACTCCAAGGGTAGGCTGGCTTTCACAGCCTAACACCACTGATGTTCATTGCCCTATCTGAGTATCTCCCAAAGTACTG +>contig15 +ATGCCCCTCAACAGAGGAATGGATACAAAAAATGTGGTACATTTACACAATGGAGTACTACTCAGCTATTAAAAGGAATGAATTTATGAAATTTCTAGGCAAATGGTTGGACCTGGAGGGCATCATCCTGAGTGAGGTAACACAATCACAAAAGAACTCAAATGATATGTACTCACTGATAAGTGGATATTGGCCCAGAAACTTA +>contig16 +AATCCAAGCAGATATGGGAAGGGAGGAGGCTTGCTTTTTCCGTAGTGCACACTCAGATATACCAGAGTTACCAGACAAGCTTTGATAGCACTGAAAAATGTCCATGATCAGGGCACTGCTGTTGCAGAAGGGAGATGGCCTGTTGGGAAGCCATCTGATAGTGGTTATCATATTTCATGACCAGTTTATCTAAACGTGGAGTGAAACTTTTGGACAGATATTTATATGAAACCATTTCTTTGGGATTATTCTCATAATGGTTTAAGATATCACATATGAAGAGATACCATGACCACAGCATCTCTTATAAAGAAAAGCATTTAATTGGGGCTGGCTTACAGTTCAGAGGTTTAGTCTATTATCATCATGGTGAGAAGCACAGTAGTATGCAAGTAGACATAGTGCTAGATAGGGGCTGAGTTCTACATCTGTTTCCTCAGACAGCAGAAGAGTGAGCTACTGGGCCTAGGT +>contig17 +GATGCTGCAGTAGAGCGGTCCCTGATCCCTGAGTTCTTACCCTTGGCTACCCCTGGAAAGCTACATAATGTCAGGTCAGCCCATCAAACTCACTATCACTCAGTGTTTTCTTCCTTTGCAATGAGAAACATGACAGAACTTGTATGGTTTCAAATATATTTTCACGGTGGAGGTCCCAATTTCAATTTAAATTTGTACATACTGATATTTTCTATTTCCAAGCTAAAAATTGAGATGTTGTAGTCGTAAAGACTTTATCTCCATTCAAATTCCAGTTAAAGCTCAAAGAAACGTAAAACAAA +>contig18 +AGAGAGAGAGAGAGGGACTTGATTTCTGACAAGATTAGCTTTTGAAATATGAAATATATATTCTAGTGAGGGGGAATTTCTAACACTTTGAAGAACCAACCTGATTAAATACTTCAAAACGTTATCCGCCGCACTCCCCACCAATGTATTGCTGTTCTGAAATGCCCTTCCATTTGTTGGTTATTTCATCAGTTATAATGTAAAACTTACATGTCTTTGGCACATTCACTGGTTTCTGTCCCTTTGATGGCTTGTTCAGTTCTTCAACACTTGAGCAGGGAACAACAGCTCCCAACACTCACTCTATTGTCCATTTTTTCAATTCATCAGTTGATTATAGACTGGACACGTGTAATATAGACTGTCCTACTTTCCGTGGGAAATATACAACAAAGATCCCACCCCAAGTAGTTGGCTGAAAGCATGAATAAATAGGTCATAAACATACATGATTGTGTATATTAACTATGTATTATAATAAAAGTTATGTAAATGTGTGCTCTCTCTCCTGTGTATGTTTCAGAACCCTGTTATACTGTACTTGCCATTGTCATGATGTGTTGATGGTAACACACTTAGGTGACGAAACAAAGTGAGGTGGATTGCAGTAGTTAGGTATCGTGATG +>contig19 +GAGATCCAGTAATATGAACCATGGGGTTTATCTTGACAGGTAAGTCAACTTATTGCAGAGAAAAACAGCCAGACAATACCGTCCTTGTTACAGAACCACAGGTGTTTAAAAGGCTAAGCACACTAAAGGGAGAGACTACAGCATCCGACAGGAGTGGATGGGGACAAGGAAGCAGAGTGAGGAGGGAGGGGGTGGGGGATGTGTAGACGGCAAGGCAGGTGCTTAGGAAATCAAAAAGAGAAATAGAAATCTACAGTGAAAAGCAGCTGCCAGGTAAGTTTTTATGGCCCCTCTAGAATGAGACTTCCAACTTCCTCTTAGT diff --git a/q2_moshpit/kraken2/tests/data/contigs/samples/sa_contigs.fasta b/q2_moshpit/kraken2/tests/data/contigs/samples/sa_contigs.fasta new file mode 100644 index 00000000..cf4a02e2 --- /dev/null +++ b/q2_moshpit/kraken2/tests/data/contigs/samples/sa_contigs.fasta @@ -0,0 +1,40 @@ +>contig0 +AGAAAGCAAAAGCGAAAGATGAGAAATTAAAGCAACAAGAAGAGAACGAAAAGAAACAAAAAGAACAAGCACAAAAAGATAAAGAAAAACGTCAAGAGGCTGAAAGAAAAAAATAGTATAGGACTGAGGCAAAGACAATGCATAAATTGATTATAAAATATAACAAACAATTGAAGATGCTCAATTTGCGAGATGGTAAGACATATACTATTAGCGAAGACGAGCGTGCAGATATTACGTTGAAATCGTTAGGCGAAGTCATTCATTTAGAACAAAATAATCAAGGTACTTGGCAAGCGAATCATACTTCTATTAATAAGGTGCTTGTTAGAAAAGGTGACCTTGATGACATTACATTACAGCTTTATACAGAAGCTGATTATGCATCATTTGCGTATCCTTCAATTCAAGATACGATGACAATTGGACCAAATGCGTATGATGATATGGTTATTCAAAGCTTGATGAATGCCATCATTATTAAAGATTTTCAATCAATACAAGAATCACAATACGTACGCATTGTGCACGATAAAAATACAGATGTGTATATTAACTATGAACTACAAGAGCAACTAACGAACAAAGCTTACATTGGTGATCATATTTATGTTGAAGGGATATGGCTCGAAGTACAAGCTGATGGTTTAAATGTATTGAGTCAGAATACAGTGGCATCGTCATTAATTCGCTTAACACAAGAGATGCCACATGCACAGGCAGATGATTACAATACGTACCATCGTTCGCCAAGGATTATTCACCGTGAACCGACCGATGATATTAAGATTGAAAGACCACCACAGCCAATACAGAAGAACAATACAGTGATATGGCGTTCCATTATACCGCCATTAGTAATGATTGCTTTAACTGTTGTCATCTTTTTAGTGAGACCAATTGGTATTTATATTTTAATGATGATTGGTATGAGTACAGTAACGATAGTATTTGGTATTACAAC +>contig1 +GTCATGATTGTTATCCCAGAAAACTTTTCAAAATTGGCAATGCAATTAGACGCTAAAACACCATCGAAAATATCGCTACAGTATAAAACAGCTGTAGGACAAAAAGAAGAAGTAGCTAAAAACACAGAAAAAGTTGTAAGTAATGTACTTAACGACTTTAACAAAAACTTAGTCGAAATTTATTTAACAAGCATCATTGATAATTTACATAATGCACAAAAAAATGTTGGCGCTATTATGACGCGTGAACATGGTGTGAATAGTAAATTCTCGAATTACTTATTAAATCCAATTAACGACTTCCCGGAATTATTTACAGATACGCTTGTAAATTCAATTTCTGCAAACAAAGATATTACAAAATGGTTCCAAACATACAATAAATCATTATTGAGTGCGAATTCAGATACGTTCAGAGTGAACACAGATTATAATGTTTCGACTTTAATTGAAAAACAAAATTCATTATTTGACGAGCACAATACAGCGATGGATAAAATGTTACAAGATTATAAATCGCAAAAAGATAGCGTGGAACTTGATAACTATATCAATGCATT +>contig2 +GTTATTGTGGGATATACTTTGCCATTGCTTTTGGCATCTATTTATGTTTTTGGGGTAGCTGGTTTTGGATTTGACGTTTTTAATTATTGGCTAGGTATAGTGATGATGCTATTTATTTCTTGGTTAGGCTTATTTTTATTTTATAAAAATAAATTTGATAGTGAAAATCCGAATAAAGCAGTTAATGTCATAGCAATTATAATAAAATTATTTGCATTTGGTGGATTATTCTATATTAGTACAATTGTACCTAACACTGCAGATGAAGAAAAATTTATATATACAAGTATTCTAATAAATTTAGCTTCTGATGCTCTTCTTGTTAGGTCATATTTTAATTACGCGTTATACAAGAGTATTAAGAAAGATATCGAAAAAGTTTAGATAATTAAAGATTTTTGTATAGGGGGGATAAGTGTAATGGAGATAAAGGAAATTGTAGCAAATATAAAAAGGCCGTACTTAACACCTTTTGTAATATTTACGATTTTGCTCTCATTGTTTTTTGATGCGATTATGTTTTTCAATAGTAAACTTTATGATAAGTTGCCATTATATTTAGTCGTATTTTTAATTTTTGCAATAG +>contig3 +TTATTGATCATGTCATTTTAGAATATGTAAACCAAGATTTATCAGAATATGGTATTTCATTAATCTTTGTTGAAGATGTGATTGAAAGTTTGCCAGAGCATGTAGATACCATTATTGATATCAAGTCTCGTACTGAAGGCGAACTGATTACGAAAGAAAAAGAATTAGTTCAATTGAAATTTACACCTGAAAATATTGATAACGTTGATAAAGAATATATCGCGAGACGTTTGGCGAATTTGATACACGTCGAACATTTGAAAAATGCAATTCCTGATAGTATTACATTTTTAGAGATGTATAACGTGAAAGAAGTAGATCAGCTTGATGTGGTTAATCGATGGAGACAAAACGAAACATACAAAACGATGGCAGTACCTTTAGGTGTAAGAGGTAAAGATGATATTTTATCATTGAACTTACATGAAAAAGCACACGGGCCACATGGTTTAGTTGCTGGTACCACTGGTTCAGGGAAATCTGAGATTATCCAATCATACATTTTATCTTTAGCTATTAATTTTCACCCTCATGAAGTTGCATTCCTATTGATTGACTATAAAGGTGGGGGTATGGCGAACTTATTTAAAGATTTAGTCCATTTAGTTGGTACGATTACAAACT +>contig4 +TTGAGGCGGTGTGTGTGAATCAATTGCGTCATTAATGTGCGTATACTGACCACTTAATGAAGAATGGTACTGATTGTTGTTAAGATCACGTTGATTTGCGTGTTGATGATTGTCGTTTGTACGTGACTGGTTTTGATGATTGTTGTTTGGCGTGTTGTTTTTGTCATATGTATAAGTATACGCGCCGGTGTCTTTATTCACTTTGAACTGTGCGTTTGGGTGTGCTTTCTTTGCTTCTTCTAATGTTTTGCTATCATTCGTATATGCTTGAGCCGAGTTAGGCGACATACTAAATAAAGTAAGAGTTGTCATCGTCAGTAAAATTGTTTTCTTCATAATAACCATTTAATCCTTTATGTATTTAATTTAATTTTAGTATACACATTTATATTACAAAAATGAATGGTTAATTAAAAATATATGGGTATATTCAATATATTTATTTAAAAAAAGCTAAAAATACTTAAAAAACTATATACATATACTAATAATTTATATATTATTTGAGTAAGGAGCACTTTCTCAAAAAATAGTGTCCCTAAAAAGTTTTGATAAACTTAAAATATTCAGGAGGTTTCTAGTTATGGCAATGATTAAGATGAGTCCAGAGGAAATCAGAGCAAAATCGCAATCTTACGGGCAAGGTTCAGACCAAATCCGTCAAATTTTATCTGATTTAACACGTGCACAAGGTGAAATTGCAGCGAACTGGGAAGGTCAAGCTTTCAGCCGTTTCGAAGAGCAATTCCAACAACTTAGT +>contig5 +GATGCAAAAGAAGTGGAATTAACATTAGGACTTAAAGACGTACCAGAAGAACAATACCAAGGACCGATGGTATTGCAATTGAAAAAAGCTGGTCACATCGCGTTAATCGGAAGTCCAGGATATGGTAGAACAACGTTCTTACACAACATTATTTTCGATGTTGCAAGACACCATCGTCCTGATCAAGCACACATGTACTTGTTCGATTTCGGTACTAATGGTTTGATGCCAGTCACAGATATACCACATGTCGCTGATTACTTTACAGTAGATCAAGAAGACAAGATTGCTAAGGCGATACGGAAGATACATGATATTATTTCTGAGCGAAAAAGACTATTAAGTCAAGAGAGAGTAGTTAATATAGAGCAATATAATAAAGAAACTGGAAATTCAATTCCAAATGTTTTCTTAATTATCGATAACTACGACACGGTGAAAGAATCACCATTTATGGAAGAATATGAAGAAATGATGTCTAAGGTGACACGTGAAGGTTT +>contig6 +ATTGATGTATTTACTATCAATGATTACAGCATATATTTTCTATAGTTATGAACGTGCCAAAGGACAAATGAATTTCATTAAAGATGATTATAGTAGTAAAAACCATCTTTGGAATAATGTCATTACGTCAGGTGTTATTGGTACAACTGGTTTGGTAGAAGGATTAATTGTCGGTTTAATTGCAATGAATAATTTCCATGTATTAGCTGGCTATAGAGCGAAATTCATCTTAATGGTGATTTTAACTATGATGGTCTTCGTACTTATTAATACGTATTTACTAAGACAGGTAAAATCTATCGGCATGTTCTTAATGATTGCTGCATTGGGTCTATACTTTGTAGCTATGAATAATTTGAAAGCGGCTGGACAAGGTGTGACTAATAAAATTTCACCATTGTCTTATATCGATAACATGTTCTTCAATTATTTAAATGCAGAGCATCCTATAGGCTTGGCGCTAGTAATATTAACAGTACTTGTGATTATTGGTTTTGTACTGAACATGTTTATAAAACACTTTAAGAAAGAGAGATTAATCTAATGTTGATGAATAGCGTGATTGCTTTAACTTTTTTAACAGCATCTAGCAATAATGGCGGACTTAATATTGACGTGCAACAAGAAGAGGAAAAGCGAATCAATAATGATT +>contig7 +ATTTACTAAAAATGAAAAATTACGTTACTTACTCAATATCAAAAATTTAGAAGAAGTAAATCGTACACGCTACACATTTGTGTTGGCACCAGATGAATTATTTTTCACAAGAGATGGATTACCAATTGCTAAAACAAGAGGGTTACAAAATGTTGTTGATCCATTACCGGTGTCAGAAGCTGAATTTTTAACAAGATATAAAGCACTGGTTATCTGTGCATTCAATGAGAAACAATCATTTGATGCTTTAGTTGAAGGAAACTTAGAACTACATAAAGGAACGCCATTTGAAACTAAAGTTATTGAAGCGGCAACTTTAGATTTACTAACGGCATTTTTAGATGAACAGTATCAGAAACAAGAACAAGATTATAGTCAAAATTATGCATATGTACGCAAAGTAGGACATACCGTTTTTAAATGGGTCGCTATCGGTATGACAACGTTAAGTGTTTTATTAATTGCATTCTTAGCCTTTTTATATTTTTCAGTAATGAAGCATAATGAGCGCATTGAAAAAGGATACCAAGCATTTGTAAAGGATGATTATACGCAAGTACTAAATACGTATGATGATTTAGATGGAAAAAAATTAGATAAAGAGGCACTTTACATTTATGCCAAAAGTTATATCCAAACAAATAAACAAGGTTTAGAAAAAGATAAGAAAGAAAATTTACTTAATAACGTGACACCAAATTCAAACAAAGACTACTTATTATATTGGATGGAATTAGGACAAGGACATCTTGATGAAGCGATTAATATAGCCACTTATTTAGATGATAACGATATTACAAAGTTAGCGTTGATTAATAAGTTAAATGAGATTAAAAATAACGGAGATTTATCGAATGATAAACGTTCTGAAGAAACGAAAAAGTATAACGATAAATTGCAAGATATTTTAGACAAAGAAAAA +>contig8 +TATAATTCAAAGGTATTCGGAATACTGAACACAATCCAAACTACACCAACGATACTCCATAACATAGTATAAAACCATGTAATCAACGCACGAATGATTAATAAACGCTCTTTAGAAAAATGAAACATTTTCAATCGCGCTTTCAATACAGTATCTTGATTCATTTTCAAAACTGTAAATAAAGATAGTGCAAAGATGAATACCGTTGCTAAAAATCCTGTAATTGCATAATAACTGCCCGTATCGTATAAATGAATCGGTTCTAAGTTAAATGCACCTGAACGGTTTAATCCTGTAATCAGCAAATCAGTCATAACATTGATACTGTCAGAATGTGATGCTTTCGGTGCTAAGTCTTGAAAAGCTAAGATGCCACCCATTGATCGCATAAGACGTTGGTAAACAGAATCTGTTAGCTGAGATAGCACGACACTTTTCATGGATTGTTGATCATATGTATATACTGAAATTGGTAGTTCGCCTTGTTTATAAAATGCCTTGGTCATACCTTTATCAAAAACAAAAT +>contig9 +ATGTGCTACACGGTAACCGATACCAATAAGTACGAGTGCGATGACAATGGTTGTTACGAGCAAGATGTATTGTAACCATTGCTTGAACACAACAAGTTGTATATAAGGCTTCATTGACGATACCTCCAAACCAATACAGCTAAATTAATTATCAAAAGTGCGATGAAGCTAAGGTAGAAACTAGGATGCAGTTCTAAAATGTAGTTGTTTAAAATAATTTCTAACAATTGATTTGTTACAACTGCGAACGGTTGAATATTGAAAACGCCATTTGCTATATGTTGTAAAAAAATCGTAGGTATTGTTAAACCAGATAA +>contig10 +CTCTTCATTTATTGGATACTTATTTAAAGAGTTTGATAATAAAAATAAACATTATGATTATGCAGTAATTTTGGCTACTGGATATTACTTATTTGGCGATAATTTTGAAGTGCACAAGTCTAGAAAAGAAATAGGTATTTTAGGTAGGGAAGATGAATGTATAAATAATATAAAAAAAATAGAAAATTACAACGTTGGAAAATTTGAAGATTTTAAAAATAATATCATGGAAAGTATTAGGCGATTATATGAAAAAGATAGAAAAGAATTTTACGAATTGCTATCTAAACTGAATGGTGATGTGTTGGATTTGAATGAAATAATAAAATATATGAAAACTTTAACCGTAGGTGATGTTATAAAA +>contig11 +CAAGTAACGATTAAAGAAGCGAACAGTAATAACTACCAATGGTATATGCATAATAATCGTTTAACTGTTTCAGCTGGTGATAAAGTCAAAGCTGGTGACCAAATTGCATATTCAGGTAGTACGGGTAATTCAACAGCACCTCACGTACACTTCCAACGTATGTCTGGTGGCATCGGTAATCAATATGCAGTAGACCCAACGTCATACTTGCAAAGTAGATAATACAGTAAATCCCAAGTTACGATATCATACGCAACTTGGGATTTTTTCGTTTTAATAATAGGTATAAGCCCATTGCTTGTCCTCTAAAACATGTTGATAAAAAGGATCTTGGCCAATTAACTTGATATCATCTGCAAGTGCTTCCACTTCATCTAAATGATGGGTAGTTAATATAATTAAACATTTAGATTTCATGATGTTAAGTAGTTGGTGGATGTCATGTCTAGATTTTAAATCAATACCAACTGTCGGTTCATCTAAAATGAGAATTCGAGGTTGACCTAGTAAACCTACTAATATATTAATTTTACGTTTATTTCCACCGGACAATGTAGATACTTTGGCAGACGTATCATCAAAGTTTAATTGCTGTAAATATTCGTTGATAGTTGTATCGTTAATTGGATTTTTACAAAGTGATTTAAAAAATTTAATGTTTTCAGCCACTGTCATGTGTTCAAATAACGCAATGTCTTGTGGCA +>contig12 +AACTAAAGAAAAACTTAAGAATATGGATCAAACAGGTAAGCAATCACTAGAAAGTCTAAGCAATGATGTAAGAGGATTAATAACTCAAATAAATAAATTGAATAATGAATATCATGATAAAAATGGAATTATCGTATCAAAGATAAACTCCATTACAAAAGAAAATTGGTATAAAAATGAAACTAACAAGGGTGAATTTAAAAAAATGAAAGCGGATCATCCTGTTGCTTATGCAGAAGGAGATGAGACAATAGGTAGAACTCAAGCAATTTCTAAAGATGGACATATAGTTGGTACAGCTGAAATAGGTAAAGTTAGTGGAATAGCATATCGAAGCGACGATTCGATGGGGGTTAAAGGAGATGCTAAAGTAGCTGGATTAGATGGTAAGGTAAAGTACGAGAATGTTGAAGGTAAGGCAGGACTCGAAGTAGGGAGTGTAAAAGGTTCAGCTGAAATTAGTAACTCGCATATATCTGCTATTGCAGAAGCGCAATTAATTGATGCGGAAGCACAATTAAGATTTGGATCAGAAGATAATAATTTGCATGTAAAAGGAGATGCAAAAGTTTTGTCAGCAAGTGCGAGTGGATCTTTAGGTGCTGATGGAGTAGAAGCGAAAGCTGAAGCAGTCGGAGCTAATACAGGGGTTGAAGGTGGTTTTGCACTTAGTGGATTAAATTTTGATTTTGGCGCGAGTATTGGGGCTCAAGCTGGGGGAGGTGTAAATGTTGGCAAAAAAGGAATTGAAGTTGATGCTAAATTTATACTTGGAGGAAAAATTAAAATTACAT +>contig13 +TTACCATTAGGATTAGACTTTGAGGATGTGGAACTAGTTGGTTTCGACTTAAGTCAAACAAATATTTTCACTTCAGTTAAACCTGTTGATATAGATAATGGTTTAACTTTATTAGAGAAACAGCTAAATATAATTTCAAATGAATATGAAATTGCAACTTTAGACACAAAAGGCATATTAAAAAATACAGGGTATGAAGATTATTTGTACTGTGGAGATAAGAAAGAAATTATTAGTTTTAAAAATGAATTAGTGAGTTTTATAAAAAATGTAGAACCACAAAAAAATGGATAGTAGTTATATCGGATTTTAAAGAATTTATAAATATTGCATCACCAAATAATGATGAAATAAAAACTATATTTTTAGACGGACCAAAGAATAATGTCTTTCCAATTATTTACGGATTGTATCAAGAAACAATTGGAGGATTTAGTTCTCAAATAAAGTTACTTAAGGAAATAGTCAGTAGCGCTTTTGTAGGAATTAGTATTAGCGAACAAGAATTAATAAAAGTTCGTTATAAAGTAAATGAAAAAAACCTTAAGAATAATGAAATGTATTATATATATAATTATGAATATAAAAAAATTAAATTATTTGAATAAAAGGAGAGTATTATGAGATTTGATTTTAATGCGAAAAATGGTGAGATTTCTGACAATAATATTTATACAAATATAATTATAGGTACTGGAGTTGGACATTCTAAAGTTATATTTGATAACGGGAAAGAAATTAATTTTTCTAATGCAACGCCAATTATCTTCGCACAATATTTGAAAATTGAATTGAAAAAGAGAAATATAATTGTAGATATTAATAATTATTTTAATAGCACTGTTATTTCATTTGCGCATGATAAAAAATATAGATATAAGGATATGTATATTGAAAAATACAAT +>contig14 +CTGTAGGGGCTTTTTTGACTTTAGGATTTGTCATTTTTTCAATTCATAAAGGGAGACGAACGAAAAATGAATCAGCACGTAAAAGTAACATTTGATTTTACTAATTATAATTACGGCACATATGACTTAGCAGTACCAGCATATTTACCAATAAAAAATTTAATAGCTTTAGTATTGGATAGTTTGGACATTTCAATATTTGATGTCAATACACAAATTAAAGTGATGACGAAAGGTCAATTACTTGTTGAAAATGATCGACTTATTGATTATCAAATCGCTGATGGAGATATTTTGAAGTTACTATAGGAGGAAAAATAGATGGTTAAAAATCATAACCCTAAAAATGAAATGCAAGATATGTTAACGCCTTTAGATGCTGAAGAAGCAGCTAAAACAAAATTACGCTTAGATATGAGAGAGATTCCTAAGTCTTCAATTAAACCAGAAC +>contig15 +TGAATTTGATAGTGAAAATCCTAATCCTGCAGTTAACGCCATAGCAATTATTATAAAGTTATTTGCTTTAGCAACTATATTTTATATAAGTAAAGTTGCGCCTAGTATTGCGGATGAAAAGAACTTTATATTTGTAAGTATACTAATAAATTTAGGTGTTGATGCTCTACTTGTTAGGTCATATTTTAATTACGCGTTATATAAGAGTATTAAGAAAGATATCGAAAATGAAGGAAAGACAATGTAAACGAAAGACATAAAAAGTAATAGGAGTCTAACCATGATAATTTTTATTTTAATAACAATATTTGCTATTTATTATATAGCAATGATTGCTAGTTTATTTAAAAGTGAAGGTTTTTCAATAATAGGTTTAATATTAGATGTTGTTATAATGGGGACATTGATTTTTTACTATTTTATAGGCGCTCGCTTTGTTGATCATGATTTAAGCAACTTTTTAATGTTCATGGATACTGGATCATATATTTTTATGTACTTTGCAATTAAGTGTTTATGGGTGAAACCTAAGGTAGTAAATTATTTGATTGCAAAAGAATTGGGTGAATCTAAAGAGGTCATTGAAGAGCAGGAATTAGATTTACAGACATCAAAGATAAGAGGTATCTACTTTTTCATTATTTCAGTGGTGATGTTAATTATCACTAAATTAAGAATGCAACCTGAGTTACAGGCAGATGCTATATCGATGAATCCTGTATTTATTTTTGTTGGTGTTATAATCATTTTAATTTGGCTAGTACTGGATATATATCGCAAGAAAAAATACGGTATATTCTTATTCAAGACGATAGTGCCATTAGTTGTTACGATTTGGATTATTATAGCTACAATTATACTTTCGTAAATGATTTTGATTAATGGGGAATGTAGAGAACATTTCTTTGTAATTTTATAATCGAATAACATACAAAATAAAGCGACCAA +>contig16 +ATTCCATTTTTCGCCGAAATAGTTGATAGTTCCATCATTAGCTTTTTCAATACCAGCAATCATACGAAGTAATGTTGATTTTCCAGCACCATTATCACCAAGTAATACGGTTAAACGATTACTATCAAAGGACATAGTTAAATGATTGAAAATCTGTTTGTTACGGTAACGCTTTGAAAGGTTATTAATTTCTATCATCAGTTACGCTCCTTTACATGAAAATAATCAAGTATACGATACCCATAGCAAGTGCATATATAAATGTCATGAATAATCGATGACTTATTGTTTGAATATGGAATAAGATAAAGACGATACCTATCTCATAAATCAATATAAGTAACAATGATTTTAAGTAAAATATTAAGCTGAGTGGTTGAGACAAATATAGACTAACTGCCAATAGT +>contig17 +TTTTAGCTTCTTCTAATACAGCTAATGTAATTTTTTTAGTGTCAGCTCTGCTAAGTGTTAACGTGACAGGTCTAACTTTGTACTTTTCACCATTAACCTTAATTTCTTCTTTTTTACCTTTATCAAAATTATCGTCATCTAATTTGTCGACAATAAGTTCGGAATATTTTTCGGCAATTTTGCTGTAGTCACTTTGTTGTGCTTGAGCATTACTAAAAAGAGTATTTAAATTTAGTTGTTGGTTTGTAATACCATTTTCTTTTGCTGTTTCTTCATCTTCACCTGTAAGTTTTGAATACGTTGATAATAAATCAGAATTATTAACACTATATTTCCCTTTAAATAATGGTGATTCGAAATAATGCTTATCTTTATCTGCAGCTAACTGGAATTTCCCTAATGCAGAGTCTGCGATTGTTGGTTCAAGATTAATCATTGATTTCTCTTTTTTAGGATCATGTCCATATGACATTTTAATTTTCGAAGCATTAACAACAGATTTAGGAATACCAAGCC +>contig18 +AAATACGAAGCAATTGTCAAAGAAGCAAAACGTTATAAAAATGAATATAATTTGAAAAAACCGTTAGCAGAACACATTAATTTAACAGATTACGATAACCAAGTTGCGCAAGACACAAGTAGTTTGATTAATGATGGTGTCAAAGTGCAACGTACTGAAACGATTAAAAGTAATGATATTAATCAATTAACTGTTGCAACAGATCCTCATTTTAATTTTGAAGGCGACATTAAAATTAATGGTAAAAAATATGACATTAAGGATCAAAGTGTTCAACTCGATACATCTAACAAGGAATATAAAGTTGAAGTCAATGGCGTTGCTAAATTGAAAAAGGATGCTGAGAAAGATTTCTTA +>contig19 +ATTATAAAATGTACACAGATACAAAAGGCAGAGAAGATAAACCATTCGATAACAAACGTTTAATTGATATGATGGTTGACCAAGTTATCAATGACATGGAAAGTTTCAAAGACGATAAAGTAGCTGTGTTACATCAAATTGATTCAATGGAAGAAAACTCAGACAAACTGATTGATGACATTTTAAATAACAAAAAGAATACAACAAAAAATAAAGAAGATATTTCTAAGCTGATTGATCAGTTAGAAAACGTTAAAAAGACTTTTGCTGAAGAGCCACAAGAACCAAAAATTGATAAAGGCAAAAATGATGAATTTAATACGATGTCTTCAAATTTAGATAAAGAAATTAGTAGAATTTCTGAGAAAAGTACGCAATTGCTATCAGATACACAAGAATCAAAAACAATTGCAGATTCAGTTAGTGGACAATTAAATCAATTAGATAATAATGTGAATAAACTACATGCGACAGGTCGAGCATTAGGCGTAAGAGCGAATGATTTGAACCGTCAAATGGCTAAAAACGATAAAGATAATGAGTTGTTCGCTAAAGAGTTTAAAAAAGTATTACAAAATTCTAAAGATGGCGACAG diff --git a/q2_moshpit/kraken2/tests/data/contigs/samples/se_contigs.fasta b/q2_moshpit/kraken2/tests/data/contigs/samples/se_contigs.fasta new file mode 100644 index 00000000..07de7bce --- /dev/null +++ b/q2_moshpit/kraken2/tests/data/contigs/samples/se_contigs.fasta @@ -0,0 +1,40 @@ +>contig0 +CACAAATAAACAAATTACCTACACTTTTACAGATTATGTAGATAAATATGAAAATATTAAAGCGCACCTTAAATTAACATCATACATTGATAAATCAAAGGTTCCAAATAATAACACTAAGTTAGATGTAGAATATAAGACGGCCCTTTCATCAGTAAATAAAACAATTACGGTTGAATATCAAAAACCTAACGAAAATCGGACTGCTAACCTTCAAAGTATGTTCACAAACATAGATACGAAAAACCATACAGTTGAGCAAACGATTTATATTAACCCTCTTCGTTATTCAGCCAAAGAAACAAATGTAAATATTTCAGGGAATGGCGATGAAGGTTCAACAATTATCGACGATAGTACAATCATTAAAGTTTATAAGGTTGGAGATAATCAAAATTTACCAGATAGTAACAGAATTTATGATTACAGTGAATATGAAGATGTCACAAATGATGATTATGCCCAATTAGGAAATAATAATGACGTGAATATTAATTTTGGTAATATAGATTCACCATATATTATTAAAGTTATTAGTAAATATGACCCTAATAAGGACGATTACACGACGATACAGCAAACTGTGACAATGCAAACGACTATAAATGAGTATACTGGTGAGTTTAGAACAGCATCCTATGATAATACAATTGCTTTCTCTAC +>contig1 +GATAGCACCCACAATAAACGATTTTAGTTGTTTATCATGATTACTCCACTTAGTATATTTCTGTTATTGTAGTTTCACTCATCAGTATGATAGCGAAGTATGTTGTTAAATTTGTTGAAAAGTATAAATAATAGTGAGTTTTAAGTTTTGAGTTAAGTTACACTCTATGAGCAAATGTTTCGATCGCTGCTTTAATCTCATCTCTTACACGTTGAAATTCTGACCAATCTTTACCAGCAGGATCATCAAATCCCCAATGTTCTTTAGTGACGTTTTTAGGTAAAACTGGACAGTTTGCATCTGCATCACTACACAATGTGACAACTATATCAGACGCTATTAAAATGTCATTGTTTATCAGGTCAGAAGTGTGATGTGAAATATCAATACCGACTTCTTTCATAGCTTTTATAGCATGCGGATTAACACCATGTGTTTCAATGCCAGCAGAATATACATTCCATTTATCACCTAATATTAATTTTCCAAAACCTTCAGCCATTTGACTGCGACAAGAATTACCAGTACAAATAAAATAAATAGTTTTCTTTTGCATAAAAAATCTCCTAATTTAAAAGATAATAAGTGTTAGGTAAAGACCTAACAAAGTAAAGAATAATACAGGTATAGTAATTACAATACCCGTTTTAAAGTAAGTACCCCAAGAAATTTTAACTCCTTTTTGAACTAAAACATGTAACCATAATAATGTAGCAAGAGATCCAATGGGTGTGATTTTTGGTCCTAAATCTGCTCCAATCACATTGGCATAAATCATTCCTTCTTTTATAGAACTTATAGCATGCGATTGATCGATGGCTATCGCATCAATAAGTACGGTAGGCATATTATTCATAATTGCTGAAAGAAATGCGGATACAAATCCCATGCCCATAATACTGCTAAAAAGT +>contig2 +AAAATTTTATTTTTATAATCATCGATGACGTCATTATTTTTATCTAAATATGCTTTAACTGTAACGATACCACGTTGATTATGTGCAGAATATTCACTAATTTCTTTTGAGCAAGGACATAGTGTTGTTACTTTGGCTTGAATAGTTAATTCTTTACGTGTAACGGTATGATTTTCAATAGCTAAACCATAAGTAACATCTGCATGACCTACAGCTTTAATATGTGTAACAGGACTATAACGATCAAAGAACCATTTACCTGACACATCAACACCTGCAGCATTTTGATTCATCTTATCTTGTAGCGTACGCAACAATTGATGTAGT +>contig3 +AACTTGTCATTCATTCCATTTTTAAACCTAATATAGCAAATAGGAGATCGTTTATTGATAAATTAAAATTCCCATATTTTTTAGGAAAATCTAAATATATTTTGGTTGATGATTATCATCCGATGATATATAAACTTCAATTTAGAGAAAACCAAGAAATAGTTCAAGTATGGCATGCCGTGGGTGCTTTCAAGACTGTAGGATTTAGTAGAACTGGGAAAAAGGGAGGACCTTTCATAGATTCTATTGGCCATAGGAATTATAGTAAAGCTTATGTTTCGTCAAATAATGATATTCTTTACTATGCTGAAGCTTTTGGAATTGAAGAACATAAGGTTATTCCAACAGGTGTTCCACGTACG +>contig4 +AGAGTGTGCAGAACTATTGAGAAGTTTTTTTAAGGAAATACGTGAAAAGAAAAAAGCAGAAAAACAGGGGAAGATACAAAAGGATATAAATTTGTTAAAATGATGTCGTACATAAATAAACGTATTTTTGAAGTCCAGTAAATCGTTAAAGATTGTGTGATAACATATAACAATGATGAAGTTAAGACACATGCTACACACATTAATTTTAAATCAATTAAAAAAATTCATATAGCATCACTTTAAAATAAGTGGAAACGAGGTCAACTAATGATAAAGCTAATAGCAACTGATATGGATGGAACATTACTTAACGCTGGTCATGAAATCACCACTATGAATCAAGAAGCGATAAAATTTGCGCAAGCGAACGGCATTACAGTAGTTATTGCAACTGGTAGAGCATTTTATGAGGCACAAACACCTGTTGCTGAAACAGACTTAAAAGTACCTTACATATGTTTGAATGGTGCAGAAGTTCGTGATGAATCATTTAATATAATGAGTACATCGCATCTTAATCATGATTTAGTACACAAAATTACTACAGC +>contig5 +TAAAGATGGTAAATCGAGTGAACGATTAGTTAAAGATTTATTTAACAAATTTTTTCAGTAAAAGAATTTAAAGGTTCTAATTTATGCTAATTGTAACAATAGATAAATAACAAGTTAGAAAATTTATATCATAGTGAAACAACTCTAAATCTAGTCATGTCTAGTAAATTTTCAAAGTATAGTTAAGTCATTTTGTTTTTAGCAAGTTCTAGATTAGTGATGTACAAAACGTATTATGAATAGAGTCTTTTAAAGTATGTATTAATTAATAAAAGTACCTAAAGAAAATAGCCATTTTTAATTTTGAACCCAATTTTAGTTTTTATACTTAAATAAAGTTCAGTTTCCTATAATTGTCATGTAATCTATAATACAATAGTTTATGTAACTATAATTAATATAAAAATCATTTTCAGGAGAGAATTAAAATTGGAATGGATATTATTTGATAAAGATGGTACATTGATTGAGTTTGACAGTAGTTGGGAAAAAATTGGCGTACGTCTCGTTGATCAACTTTTAGAAACCTTTCCAGTACATGATAAGGAAGCGGCACATCGCCAATTAGGCATTATCGATAAAAAAATTAAGCCAGATTCTGTTATGGGATCGGGTTCATTAGGTGAAATTATAGAGTCTTTTAATGGTGTAACAGGTAAAGAAACATCAGATTGGACAAAGGATACGAGTCAGGAACTCATTGATTCAAGAGTGCCTGAAAACAATTGGATTGATGGTGTA +>contig6 +AACAGGTTTTATAGTATAGCCACATTCATTTCTCCAGTAATGAATCCAATTTTTACTGAACACTGATGTTACGTTACTTGCTTCTTGAAAATAATCATACATGTTTATATAATCTTCGCTATTAATATATTGCGATGCTGTTTGAGTTAAATAGCGATTCCATGCTAGTAATAAGATTTTAGCTTGATAATGATATGCTTTAAATAGGTTTAATCGATTAATCATTGCTTTTTCTATACCAGCAGTTTTATGACCGATGTTATTACCTACAAAATAG +>contig7 +ACCAGCTGTTTGATACTGATCAATTAAAACTGGATAAGTTAAATTTTTAATGCCTACTTCCTCTATTTCAAATAAGAAATTTTTGTGAGTGCTTTGTAAATCGGTCATTTCATTTTTAGTAGTTGGTTTCGTACCTTTGACAGGGTCAACAGAACCGAAATGTTTCCAACGACCCTCTCTAGTGGATAAGTCAAATTCTGTCATGTTTTACCTCCATTTAAGATTCAAAGTGATATGTCCAAAATGGCTCTATTTTTAAGAAACTTTGTGCTTGACCATCTATTTCGGGACTAGCAAGTTGTTTTAAAAATGGTCCTGTTTGTGAAGCATGCGCTTCGAAAGCTTTGAGTTTTATATCACTATATTGCGATATGTCATTTTGAATATCAGGTTCTCCAAGAATTTCCGGTGCATCATTGCTAAACGCTACAAGTGTAAGACGTGGGCGATCTGATACATGCATGCGCCCAACTGTACGTACTACAGCTTCTGCAGTTGCTTCGTGATCAGGGTGGACTGCAAATTTAGGATAGAACGAAATAATTAATGATGGATTTGTTTCGTCAATAAGTGATTGAATCATTTGATCCATTTGATCGTAAGGTTCAAATTCAACAGTTTTATCTCTTAACCCCATTTTCCTTAAATCTGTAATCCCAATTGCTTTGCATGCTTCTTCTAACTCACGTTCACGTATAAGTGGTAAAGATTCTCTTGTTGCAAAAGGAGGGTTACCTAGATTACGTCCCATTTGTCCTAGGGTGAGACATGCATATGTGACAGGAATACCTTTTTCAATGTAACTTGCGATAGTTCCAGCAGACGAAAAGGTTTCATCATCAGGATGAGGGAAAATCACAAGTACGTGTCTTTCATCAGTCATGATTAGCAACCTC +>contig8 +ACAGTGACTCAGACTCAGATAGTGATTCAGATTCAGACAGCGATTCCGACTCAGACAGTGACTCAGGATTAGACAATAGCTCAGATAAGAATACAAAAGATAAATTACCGGATACAGGAGCTAATGAAGATCATGATTCTAAAGGCACATTACTTGGAGCTTTATTTGCAGGTTTAGGAGCGTTATTATTAGGGAAGCGTCGCAAAAATAGAAAAAATAAAAATTAAATTATTCAAATGAAATTAGTGAAAGAAGCAGATACGACATTTGAATAGAAAGTATATTTAGTCCAACAAATATAAGGTGTTGATTAAAACTATAATATAACTTTCACGTTTATCATATCTTGTGAAAAAGACGATGCAAACAAGGTCATTTTTATTAAAAATGACTGAAATGTATAATTTTTAGAGCAACAGATGTAACTCACAATCTGACAGTGATTTAATAGAGGAACCGTGAATTTTAAATGAATTCATGGTTCCTTTTTATCGAATGAATAAAAATTTTTTTATGTAATACAAAAAAATACTATGAAACCATTTAATGATAAATGACTTCATAGCAAAATTTGATTTTTGTTTTTATGCTTTAGTTAAAATGTTGAAGTAAATTTTTCCACAGTAACATGATTTGTGTTTGAGAATACTTTTGTGCAGTTTCAATACAATGGTGAGAAAAAGATTGT +>contig9 +TTTTGCTAAAAATCACATTCATATTCCTGATGGCTATTCAGAAAATCTAGAAGCAGAAGCGGAACGATATAACAAATTATTAGATGAAAGAGGTCCGATTGATATTCAAATTTTAGGAATTGGAGAAAATGGTCACATTGGATTTAATGAACCAGGAACAGACTTCAATAGTGAAACACATGTGGTGAACTTAACAGAAAGCACTATAAAAGCAAATAGTCGATATTTTGACAATGAAGCGGATGTTCCTAGACAAGCAGTTTCAATGGGATTAGCAAGTATTTTAAAAGCTAAAAGGATTATACTACTTGCATTTGGTCCAAAGAAAAAAGAGGCCATAAGTAAACTGTTAAATGAACAGGTTACCGAAGATGTACCTGCGACCATTTTACACACACATCCTAATGTTGAAGTTTATGTAGATGATGATGCAGCACCAGATTGTTTATAATTATTTAGTAGTTTATATAGTAGTTTTTATATACAAAATATTTTATGTGTTTAATAAGTTTAGTGAAGGGTATAAAACTATAAGCAAAAGAGAATTTCTCAAAATGAAAGGATGAATTTCATTGGAACTACAATTAGCCATTGATTTATTAAATAAAGAAGAAGCAGCAAAATTAGCTCAAAAAGTTGAAGAATATGTAGATATTGTTGAAATCGGTACACCAATTGTAATTAACGAAGGCTTACCTGCAGTTCAACATTTAAATGAAAATATTAATAATGCTAAAGTATTAGCTGACTTGAAAATTATGGATGCAGCAGATTACGAAGTAAGCCAAGCAGTAAAATATGGTGCAGATATTGTTACAATTTTAGGTGTTGCTGAAGATGCTTCAATTAAAGCAGCAGTTGAAGAAGCGCATAAACATGGAAAAGCATTGCTTGTTGATATGATAGCAGTGCAAAACTTAGAGCAACGTGCTAAAGAACTAGATGAGATGGGTGCAG +>contig10 +GAAGGTGTTAGCTTTTGATGCAGATTTAAGTAAAATCGACCAAGTAGGTCAAGCGTTAGCATCATCACCAAATTTAGCAGTATCTTCATCATCAAGAGGTAATCTAGAAATTACTCATTCAAACGCTCAAAAAGGCATTGCCTTATCAGCTATTGCTCATCAATTGGGTATAGATTTAACAGATGTGATAGCAATAGGAGATAATTTAAATGATATTTCAATGCTAGAACGTGTAGGTTATCCAGTAGCAATGAATAATGCTACAGACGAAGTTAAACATATTGCTAAATATGTCACAGACACCAATGAAAATAGTGGAGTTGGAAAAGCAATCATGAAAATATTAAAAGAAGAAAATAATTTAGAGGTGTAAAATAATGAAAGGTTTAATTATTATAGGGAGTGCTCAAGTAGGGTCTCATACGAACGCTTTATCAAAATATTTAAAAGGTCAACTCGGCGAACATGATGTTGAGGTGGAAATCTTTGACCTAGCTGAGAAACCCATTCATCAATTGGATTTTGCTGGTACAACACAAGCAGTTGATGAAATTAAAAACAATGTCAAATCTTTACAGAGTAAAGCAATGGAAGCAGATTTCTTAATTTTAGGAACGCCAAATTATCATGGATCGTTTTCAGGTATTCTTAAAAATGCACTTGACCACCTTAATATGGACCATTTCAAAATGAAACCCGTGGGACTCATTTGCAATAGTGGAGGAATAGTAAGTTCTGAGCCATTATCACACTTAAGAGTCATCGTACGTAGTTTACTTGGTATTGCTGTACCAACGCAAATTGCTA +>contig11 +TAAACCAGATACTATCAAAGATATTGTTGCAGAAGATCCAGATTTAGTTATTGTTGGTGGCGGTATTGCGAATGCTGACGATCCTGTAGAAGCAGCAAAACAATGTCGAGCAGCTATTGAAGGTAAATAAGATGAGTGAATTTAATAATTATCGTCTTATTCTTGAAGAGTTAGATGCTACTTTATCCCAAGTAGATAAT +>contig12 +TAACAATGCATCCACTTTGTTGTTCTATAAATTGTGGTATGACGCTTTGAGTCACATTAAATGCTCCTTTTAAAGTACCATTAAGTTGTTGTTGATAATCTTCCCAAGTTAAATCTTTAAACGATTTTTGCTGATTTGGATCAAATTTAAAACCAACAAGTGCATTATTAATGACTATATCTATTTTTCCAAAATGTTGTGTGGCAGTTTGAATCATTCGATTCACATCGTTTCTGTCTGTCACATCAGCTTGAATAGCTATAGCCCTACTTTGATTATAATTTCTAATTAATTTTTCAGCTTTATCCTTATTTTGTTTGTAATTAACAACAACATTGTAGCCTTTTTCTAGTAAAGTTATTGCAATTTTAGCGCCTAATCCACGACTACTTCCCGTGAT +>contig13 +TTAGATATATTATCTTGTATCCACTTGTCGGTATGCTTTAATAACTTGAGCAATTTTATCAATGACTGAATCAAGGGAGTCTACGTCTTCGTGTATGTCATATTCATTAATATTAAGTCGAACAACAGGACATGCATTAAAATTGTTAATCCAATTTTCATACCGTTTAAATAGTTTTTTCCAATATTCTGGATCTGTATTGATTTCCATGTCACGTCCCCGCTGTTGAATACGATCAATTACTTCATCATAATCACATTCTAGATAAATCAGAACATCAGGCTTTGGAAAATAAGGTGTCATAACCATTGCATTAAATAATTCGTAATACGTGTGGTAGTCGTCAGCACTCATTGTTCCTTGTTCTTCGTGCATTTTTGCAAAAATATCCACATCTTCATAAATTGAACGATCTTGAACGAATCCGCCACCATACTCAAACATGCGTTTTTGTTCCTTAAAACGTTCAGCAAGAAAGTAAATTTGAAGATGAAAGCTCCAACGTTCAAAATTATGATAAAATTTATCTAAATAAGGATTATGGTCTACATTCTCAAAGGAAG +>contig14 +AAAATTACATTGAAATAGTCAAAGATAAGGAGTTTTTATGATTAAAAAAAAATAATTTACTAACTAAAAAGAAACCTATAGCAAATAAATCCAATAAATATGCAATTAGAAAATTCACAGTAGGTACAGCGTCTATTGTAATAGGTGCAACATTATTGTTTGGTTTAGGTCATAATGAGGCCAAAGCTGAGGAGAATACAGTACAAGACGTTAAAGATTCGAATATGGATGATGAATTATCAGATAGCAATGATCAGTCCAGTAATGAAGAAAAGAATG +>contig15 +AAATATCTGCTAAAATCAATGTCATCCCTACATTCTTTAAGCCAAAAATAACAAGATACATACCGATAGAAAATATCACGATATTCCACGGAGCCCCTTTGATCACTTGTTTCGTATGCACTACATTAGACTGACGCGCTAATAATATAAAAATCACAGCTATAGCTCCAGTGATAAATGATACAGGTATATGTATGAACTCACTCACTAAGTAACCTATAAGTAATAAAGCTAAAATAATCCATGAAATTTTAAATAATCGTGTGTCTCGTATAGCATCTTTAGGTTCAGAAATACTTGATATATCAAAAGTTTTAGGGATGGATTTTCTAAAGTAAAACCATAAAACTAAGATACTAGCTAATAGAGAAAATAAATTAGGTATAAACATACGCATTAAATATTCAACAAATTTAATACCAAAGTAATCTGCTGATACAATGTTAACTAAATTACTTACAACAAGTGGCAATGAGGTGGAATCTGCGATAAACCCACAAGCTATAATAAAAGGAAATACTAACTTATTATTGAAACCTAAATTTCTTATCATTGCTAAAACAATAGGCGTCAAAATTAAGGCTGCACCATCATTAGCAAAGAATGCTGCAATAAGCGCCCCTAATATCATGATATATATAAACATCTTTAATCCATGTCCATTAGATGCCTTAACCATATGAATAGCTGACCATTCAAAGAATCCTATCTCATCAAGAATCAATGATATAAGAATGACTGAGACAAAAGTTAGTGTAGCATTCCAAACAATTCCGATTACTTCTAATATATCTGTAAAATTTACTACACCCGTAATAATAACTAAAAGCGCCCCAATGATTGCTGAAATACCTATGTCTAACCCTTTAGGTTGCCAAATGATTAAGGTTAAAGTGATGAAAAATATTA +>contig16 +CGCACGTCTACGTCCTTTTTCTTTTAATAATACATAAAAAACATCGGTTTTATGAGTCGAGATTTAATAATGTACGTAGTGTAGATTGATTATAAGAATTTTTAAATAAAGCACGACGTTGTAGTTCTGGTATAATGTATTTTAAAAAATCTTCAAGGCTATGGGGTAATGAAGGTGGCATCAGATTAAAGCCGTCTGCAACGCCTTCATTAAACCACTGCTCCATCTCATCCACTATCTCCTCAGGTGTACCTATAAGAGTAAGATGACCACCACCGGCACTTAAGTAACCTAATAATTCTCTTACAGTCGGTTGTTTATCTTTTATAATGGCTAAAACTGTTTCATATCGACCAATCGGACCTTTGAAATTTTTAGCAGGAGGTAAGGGAGGAACTGGTTCATCTAATTCCCAATCTGAACAATCTTGTTGAACAAAAAAACTGAGTTGCTTTAAAGCATCTTCAATGGGTAATTTTTGATCAAGTTCTGCTTTTTTTGATAATGCTTTTTCATGGGTTTCACCTACATAAGTTACTAAACCAGGGAAAATTTTAATTTT +>contig17 +TTTTGACGCACCGGCTTGCATAGATACTGGTTTACCTTGAGGACTTTGTGGTGTTGTATGTGGTCCTCGTACTTGAAAATAGTCTCCCTCATGATAAAAAGGTTTAATATCTTTAGACTCAATAATTTTGTTGTTGTTTTTATCTGGTACATAAGATGCAGTTGACCAAGAAGTGAATAGCTGATTCATTACCGATGCAAATTCATCTGCTTTTTTGTAGCGCTCATGTCTTTCTGGTAAGTATTGAAGACTGTGATTTTGGGCTTCAATATCTGTCATGGATGTGACTAAATTCCAACCCACACGGCCATGAGATAAGTAGTCTAGACTTAATAATTGACGAGAAGCGGTGAATGGATTTGAAAATGTACTCGATATTGTTGGCACTAATCCTATATGTGATGTAACTTGTGCAATAGCTGATAAGTTAATTATAGGATCAAACCAAAAAGCGGGCA +>contig18 +GCCAAATTAATCCAAAATTTTTGAATGATAAAAAAAAGCAAATGACTTTAGAGGAGTTAAGAGATTACAACTCACTTGAGACCCAATCGTTACAAAAAAGTTATTTATTAAAAAATTATGGTAAGAGTTTTCAAAGGTATAATAACAAAGAGATTAAATCTTACGTCATTGTTCCAGCAATTTCCCAAGAAATTAATGAGTTTATTTTTAAAGTTCAATATAAATCTGAAATAAATAAAATAAGTAAACTTAAGCATTTATCATTCATATTACATAAAGCTTTGAGGAAAATTAGTTTCAATGTAAGAGATAAAATCTATTTGTCGATATTTAACA +>contig19 +TGTGTTGAAACAATGATGGCTTTATACCTTTGTAAATGTTCAAAAACCCCTTTATAAACATTTTTGATTGGTGAATGCGTTAAATCAGTAACATCTTTAACATGAGTGCTATGTAACACTGCAATAACAGGGATGTAAGATGCAACTATGTTAAATATATGTGATGTAACGAGGTTACGATCGCTAAAGAATAAGTCACTACTTTGATATATTGTTTCAATAAAAAAAGCACCTAGCTCAGTTTCATTGTTTAAAAAGTGCGTTTTACCTAAATGATTTAATATAATCTTTTTTGCCACATAGGGATGATTTGAATTAATATCATAATACTTTTCAATTTTCACGTTTCCTTGAGGAGAAAGATAGTACTCTGCTTGTATTTTTTGGTCCGTTGATAAAAC diff --git a/q2_moshpit/kraken2/tests/data/contigs/small-kraken2-db/hash.k2d b/q2_moshpit/kraken2/tests/data/contigs/small-kraken2-db/hash.k2d new file mode 100644 index 00000000..0d374a36 Binary files /dev/null and b/q2_moshpit/kraken2/tests/data/contigs/small-kraken2-db/hash.k2d differ diff --git a/q2_moshpit/kraken2/tests/data/contigs/small-kraken2-db/opts.k2d b/q2_moshpit/kraken2/tests/data/contigs/small-kraken2-db/opts.k2d new file mode 100644 index 00000000..c343a957 Binary files /dev/null and b/q2_moshpit/kraken2/tests/data/contigs/small-kraken2-db/opts.k2d differ diff --git a/q2_moshpit/kraken2/tests/data/contigs/small-kraken2-db/taxo.k2d b/q2_moshpit/kraken2/tests/data/contigs/small-kraken2-db/taxo.k2d new file mode 100644 index 00000000..d0dcfdb2 Binary files /dev/null and b/q2_moshpit/kraken2/tests/data/contigs/small-kraken2-db/taxo.k2d differ diff --git a/q2_moshpit/kraken2/tests/test_classification.py b/q2_moshpit/kraken2/tests/test_classification.py index afbd99e0..c098828a 100644 --- a/q2_moshpit/kraken2/tests/test_classification.py +++ b/q2_moshpit/kraken2/tests/test_classification.py @@ -10,27 +10,27 @@ from subprocess import CalledProcessError import pandas as pd +from unittest.mock import patch, ANY, call + from q2_types.per_sample_sequences import ( SingleLanePerSampleSingleEndFastqDirFmt, SingleLanePerSamplePairedEndFastqDirFmt, ) -from qiime2 import Artifact - from q2_types_genomics.feature_data import MAGSequencesDirFmt +from q2_types_genomics.per_sample_data import ContigSequencesDirFmt from q2_types_genomics.kraken2 import ( Kraken2ReportDirectoryFormat, Kraken2OutputDirectoryFormat, Kraken2DBDirectoryFormat, ) +from q2_moshpit.kraken2.classification import ( + _get_seq_paths, _construct_output_paths, _classify_kraken2, + classify_kraken2 +) -from unittest.mock import patch, ANY, call - +from qiime2 import Artifact from qiime2.plugin.testing import TestPluginBase from qiime2.plugins import moshpit -from q2_moshpit.kraken2.classification import ( - _get_seq_paths, _construct_output_paths, _classify_kraken2 -) - class TestKraken2Classification(TestPluginBase): package = "q2_moshpit.kraken2.tests" @@ -91,12 +91,68 @@ def test_construct_output_paths(self): self.assertEqual(obs_rep_fp, exp_rep_fp) self.assertEqual(obs_out_fp, exp_out_fp) + @patch("q2_moshpit.kraken2.classification.Kraken2OutputDirectoryFormat") + @patch("q2_moshpit.kraken2.classification.Kraken2ReportDirectoryFormat") + @patch( + "q2_moshpit.kraken2.classification._get_seq_paths", + return_value=(1, 2, [3]) + ) + @patch( + "q2_moshpit.kraken2.classification._construct_output_paths", + return_value=(1, 2) + ) + @patch("q2_moshpit.kraken2.classification.run_command") + def test_classify_kraken_exception(self, p1, p2, p3, p4, p5): + seqs = MAGSequencesDirFmt(self.get_data_path("mags-derep"), "r") + common_args = ["--db", "/some/where/db", "--quick"] + + # run kraken2 + p1.side_effect = CalledProcessError(returncode=123, cmd="abc") + with self.assertRaisesRegex( + Exception, + r'error was encountered .* \(return code 123\)' + ): + _classify_kraken2(seqs, common_args) + + @patch("q2_moshpit.kraken2.classification._classify_kraken2") + def test_classify_kraken_action(self, p1): + seqs = Artifact.import_data( + 'FeatureData[MAG]', self.get_data_path("mags-derep") + ) + db = Artifact.import_data('Kraken2DB', self.get_data_path("db")) + p1.return_value = ( + Kraken2ReportDirectoryFormat( + self.get_data_path("reports-mags"), "r" + ), + Kraken2OutputDirectoryFormat( + self.get_data_path("outputs-mags"), "r" + ), + ) + + moshpit.actions.classify_kraken2( + seqs=seqs, kraken2_db=db, threads=3, confidence=0.9, quick=True + ) + + exp_args = [ + '--threads', '3', '--confidence', '0.9', + '--minimum-base-quality', '0', '--minimum-hit-groups', '2', + '--quick', '--db', str(db.view(Kraken2DBDirectoryFormat).path) + ] + p1.assert_called_with(ANY, exp_args) + + +class TestKraken2ClassifyMAGs(TestPluginBase): + package = "q2_moshpit.kraken2.tests" + + def setUp(self): + super().setUp() + @patch("q2_moshpit.kraken2.classification.Kraken2OutputDirectoryFormat") @patch("q2_moshpit.kraken2.classification.Kraken2ReportDirectoryFormat") @patch("q2_moshpit.kraken2.classification._get_seq_paths") @patch("q2_moshpit.kraken2.classification._construct_output_paths") @patch("q2_moshpit.kraken2.classification.run_command") - def test_classify_kraken_mags(self, p1, p2, p3, p4, p5): + def test_classify_kraken2_mags_has_correct_calls(self, p1, p2, p3, p4, p5): seqs = MAGSequencesDirFmt(self.get_data_path("mags-derep"), "r") common_args = ["--db", "/some/where/db", "--quick"] @@ -185,12 +241,25 @@ def test_classify_kraken_mags(self, p1, p2, p3, p4, p5): ) p3.assert_not_called() + # TODO + def test_classify_kraken2_MAGs(self): + pass + + +class TestKraken2ClassifyReads(TestPluginBase): + package = "q2_moshpit.kraken2.tests" + + def setUp(self): + super().setUp() + @patch("q2_moshpit.kraken2.classification.Kraken2OutputDirectoryFormat") @patch("q2_moshpit.kraken2.classification.Kraken2ReportDirectoryFormat") @patch("q2_moshpit.kraken2.classification._get_seq_paths") @patch("q2_moshpit.kraken2.classification._construct_output_paths") @patch("q2_moshpit.kraken2.classification.run_command") - def test_classify_kraken_reads(self, p1, p2, p3, p4, p5): + def test_classify_kraken2_reads_has_correct_calls( + self, p1, p2, p3, p4, p5 + ): seqs = SingleLanePerSamplePairedEndFastqDirFmt( self.get_data_path("paired-end"), "r" ) @@ -271,54 +340,126 @@ def test_classify_kraken_reads(self, p1, p2, p3, p4, p5): ] ) + # TODO + def test_classify_kraken2_reads(self): + pass + + +class TestKraken2ClassifyContigs(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.datadir = os.path.join( + os.path.dirname(os.path.abspath(__file__)), 'data' + ) + @patch("q2_moshpit.kraken2.classification.Kraken2OutputDirectoryFormat") @patch("q2_moshpit.kraken2.classification.Kraken2ReportDirectoryFormat") - @patch( - "q2_moshpit.kraken2.classification._get_seq_paths", - return_value=(1, 2, [3]) - ) - @patch( - "q2_moshpit.kraken2.classification._construct_output_paths", - return_value=(1, 2) - ) + @patch("q2_moshpit.kraken2.classification._get_seq_paths") @patch("q2_moshpit.kraken2.classification.run_command") - def test_classify_kraken_exception(self, p1, p2, p3, p4, p5): - seqs = MAGSequencesDirFmt(self.get_data_path("mags-derep"), "r") + def test_classify_kraken2_contigs_has_correct_calls( + self, + run_command_mock, + _get_seq_paths_mock, + report_format_mock, + output_format_mock + ): + samples_dir = os.path.join(self.datadir, 'contigs', 'samples') + contigs = ContigSequencesDirFmt(samples_dir, "r") + common_args = ["--db", "/some/where/db", "--quick"] - # run kraken2 - p1.side_effect = CalledProcessError(returncode=123, cmd="abc") - with self.assertRaisesRegex( - Exception, - r'error was encountered .* \(return code 123\)' - ): - _classify_kraken2(seqs, common_args) + fake_output_dir = Kraken2OutputDirectoryFormat() + fake_report_dir = Kraken2ReportDirectoryFormat() - @patch("q2_moshpit.kraken2.classification._classify_kraken2") - def test_classify_kraken_action(self, p1): - seqs = Artifact.import_data( - 'FeatureData[MAG]', self.get_data_path("mags-derep") - ) - db = Artifact.import_data('Kraken2DB', self.get_data_path("db")) - p1.return_value = ( - Kraken2ReportDirectoryFormat( - self.get_data_path("reports-mags"), "r" - ), - Kraken2OutputDirectoryFormat( - self.get_data_path("outputs-mags"), "r" - ), - ) + samples = ('ba', 'mm', 'sa', 'se') + exp_output_fps = [] + exp_report_fps = [] + for sample in samples: + exp_output_fps.append( + os.path.join(fake_output_dir.path, f'{sample}.output.txt') + ) + exp_report_fps.append( + os.path.join(fake_report_dir.path, f'{sample}.report.txt') + ) - moshpit.actions.classify_kraken2( - seqs=seqs, kraken2_db=db, threads=3, confidence=0.9, quick=True - ) + output_format_mock.return_value = fake_output_dir + report_format_mock.return_value = fake_report_dir - exp_args = [ - '--threads', '3', '--confidence', '0.9', - '--minimum-base-quality', '0', '--minimum-hit-groups', '2', - '--quick', '--db', str(db.view(Kraken2DBDirectoryFormat).path) - ] - p1.assert_called_with(ANY, exp_args) + obs_reports, obs_outputs = _classify_kraken2(contigs, common_args) + self.assertIsInstance(obs_reports, Kraken2ReportDirectoryFormat) + self.assertIsInstance(obs_outputs, Kraken2OutputDirectoryFormat) + + calls = [] + for i, sample in enumerate(samples): + calls.append(call( + cmd=[ + "kraken2", + "--db", + "/some/where/db", + "--quick", + "--report", + exp_report_fps[i], + "--output", + exp_output_fps[i], + os.path.join( + contigs.path, + f'{sample}_contigs.fasta' + ) + ], + verbose=True + )) + run_command_mock.assert_has_calls(calls, any_order=True) + + _get_seq_paths_mock.assert_not_called() + + def test_classify_kraken2_contigs(self): + db_path = os.path.join(self.datadir, 'contigs', 'small-kraken2-db') + contigs_path = os.path.join(self.datadir, 'contigs', 'samples') + + db = Kraken2DBDirectoryFormat(db_path, 'r') + samples = ContigSequencesDirFmt(contigs_path, 'r') + + reports, outputs = classify_kraken2(samples, db) + + self.assertIsInstance(reports, Kraken2ReportDirectoryFormat) + self.assertIsInstance(outputs, Kraken2OutputDirectoryFormat) + + sample_id_to_ncbi_id = { + 'ba': 1392, # bacillus anthracis + 'mm': 10090, # mus musculus + 'sa': 1280, # staph aureus + 'se': 1282 # staph epidermidis + } + + output_views = outputs.reports.iter_views(pd.DataFrame) + for path, df in output_views: + sample_id = str(path).rsplit('.output.txt')[0] + + # the expected number of records are in the output + self.assertEqual(len(df), 20) + + # no sequences are unclassified + self.assertNotIn('U', list(df['classification'])) + + # all classifications are correct + self.assertEqual( + pd.unique(df['taxon_id']), [sample_id_to_ncbi_id[sample_id]] + ) + + report_views = reports.reports.iter_views(pd.DataFrame) + for path, df in report_views: + sample_id = str(path).rsplit('.report.txt')[0] + + # the dataframe is non-empty + self.assertGreater(len(df), 0) + + # the correct taxonomy id (feature id) is present somewhere in the + # classification tree, and none of the others are present + for current_sample_id, taxon_id in sample_id_to_ncbi_id.items(): + if current_sample_id == sample_id: + self.assertIn(taxon_id, list(df['taxon_id'])) + else: + self.assertNotIn(taxon_id, list(df['taxon_id'])) if __name__ == "__main__": diff --git a/q2_moshpit/kraken2/tests/test_selection.py b/q2_moshpit/kraken2/tests/test_selection.py index 11cacbc2..9cc08027 100644 --- a/q2_moshpit/kraken2/tests/test_selection.py +++ b/q2_moshpit/kraken2/tests/test_selection.py @@ -236,7 +236,7 @@ def make_dirfmt(self, string, coverage=False): pad1=0, pad2=0, rank=rank, - ncbi_tax_id=tax_id, + taxon_id=tax_id, name=taxonomy_fragment) ) df = pd.DataFrame(rows) diff --git a/q2_moshpit/plugin_setup.py b/q2_moshpit/plugin_setup.py index 7c8f5282..fe5d5722 100644 --- a/q2_moshpit/plugin_setup.py +++ b/q2_moshpit/plugin_setup.py @@ -139,6 +139,10 @@ SampleData[Kraken2Reports % Properties('reads')], SampleData[Kraken2Outputs % Properties('reads')] ), + SampleData[Contigs]: ( + SampleData[Kraken2Reports % Properties('contigs')], + SampleData[Kraken2Outputs % Properties('contigs')] + ), FeatureData[MAG]: ( FeatureData[Kraken2Reports % Properties('mags')], FeatureData[Kraken2Outputs % Properties('mags')] @@ -157,8 +161,8 @@ ('hits', T_kraken_out_hits), ], input_descriptions={ - "seqs": "Sequences to be classified. Both, single-/paired-end reads" - "and assembled MAGs, can be provided.", + "seqs": "The sequences to be classified. Single-end or paired-end " + "reads, contigs, or MAGs can be provided.", "kraken2_db": "Kraken 2 database.", }, parameter_descriptions=kraken2_param_descriptions, diff --git a/setup.py b/setup.py index 1e4e574e..60a9d4db 100644 --- a/setup.py +++ b/setup.py @@ -59,6 +59,7 @@ 'data/kraken2-reports-select/*/*', 'data/kraken2-to-ncbi-tree/*', 'data/kraken2-to-ncbi-tree/*/*/*', + 'data/contigs/*/*', ], 'q2_moshpit.dereplication.tests': [ 'data/*', 'data/mags/*', 'data/mags/*/*',