diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 01887be0..bc1fb7bb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,16 +40,6 @@ jobs: # necessary for versioneer fetch-depth: 0 - - name: Update missing libs - if: matrix.os == 'macos-latest' - run: | - brew install gsl - ln -s $HOME/homebrew/Cellar/gsl/2.7.1/lib/libgsl.27.dylib /usr/local/lib/libgsl.0.dylib - if [[ ! -f /usr/local/lib/libgslcblas.0.dylib ]] - then - ln -s $HOME/homebrew/Cellar/gsl/2.7.1/lib/libgslcblas.0.dylib /usr/local/lib/libgslcblas.0.dylib - fi - - name: hack - template coverage output path run: echo "COV=coverage xml -o $GITHUB_WORKSPACE/coverage.xml" >> $GITHUB_ENV diff --git a/.gitignore b/.gitignore index 3360be47..6aff3029 100644 --- a/.gitignore +++ b/.gitignore @@ -130,3 +130,6 @@ dmypy.json # PyCharm configuration .idea/ + +# Mac OS +.DS_Store diff --git a/ci/recipe/meta.yaml b/ci/recipe/meta.yaml index a89a715d..aff0ac82 100644 --- a/ci/recipe/meta.yaml +++ b/ci/recipe/meta.yaml @@ -23,6 +23,8 @@ requirements: - samtools - qiime2 {{ qiime2_epoch }}.* - q2-types-genomics {{ qiime2_epoch }}.* + - eggnog-mapper >=2.1.10 + - diamond - tqdm - xmltodict diff --git a/q2_moshpit/__init__.py b/q2_moshpit/__init__.py index eda384d0..43318383 100644 --- a/q2_moshpit/__init__.py +++ b/q2_moshpit/__init__.py @@ -8,9 +8,11 @@ from .kraken2 import classification, database from .metabat2 import metabat2 +from . import eggnog + from ._version import get_versions __version__ = get_versions()['version'] del get_versions -__all__ = ['metabat2', 'classification', 'database'] +__all__ = ['metabat2', 'classification', 'database', 'eggnog'] diff --git a/q2_moshpit/eggnog/__init__.py b/q2_moshpit/eggnog/__init__.py new file mode 100644 index 00000000..0b1e70f0 --- /dev/null +++ b/q2_moshpit/eggnog/__init__.py @@ -0,0 +1,12 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2022, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- + + +from ._method import (eggnog_diamond_search, eggnog_annotate) + +__all__ = ['eggnog_diamond_search', 'eggnog_annotate'] diff --git a/q2_moshpit/eggnog/_method.py b/q2_moshpit/eggnog/_method.py new file mode 100644 index 00000000..55468e98 --- /dev/null +++ b/q2_moshpit/eggnog/_method.py @@ -0,0 +1,122 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2022, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- + +import subprocess +import os +import tempfile +import re +import pandas as pd + +from q2_types_genomics.per_sample_data import ContigSequencesDirFmt +from q2_types_genomics.genome_data import SeedOrthologDirFmt, OrthologFileFmt +from q2_types_genomics.feature_data import OrthologAnnotationDirFmt +from q2_types_genomics.reference_db import EggnogRefDirFmt +from q2_types.feature_data import DNAFASTAFormat +from q2_types_genomics.reference_db import DiamondDatabaseDirFmt +import qiime2.util + + +def eggnog_diamond_search(input_sequences: ContigSequencesDirFmt, + diamond_db: DiamondDatabaseDirFmt, + num_cpus: int = 1, db_in_memory: bool = False + ) -> (SeedOrthologDirFmt, pd.DataFrame): + + diamond_db_fp = os.path.join(str(diamond_db), 'ref_db.dmnd') + temp = tempfile.TemporaryDirectory() + + # run analysis + for relpath, obj_path in input_sequences.sequences.iter_views( + DNAFASTAFormat): + sample_label = str(relpath).rsplit(r'_', 1)[0] + + _diamond_search_runner(input_path=obj_path, + diamond_db=diamond_db_fp, + sample_label=sample_label, + output_loc=temp.name, + num_cpus=num_cpus, + db_in_memory=db_in_memory) + + result = SeedOrthologDirFmt() + + for item in os.listdir(temp.name): + if re.match(r".*\.seed_orthologs", item): + qiime2.util.duplicate(os.path.join(temp.name, item), + os.path.join(result.path, item)) + + ft = _eggnog_feature_table(result) + + return (result, ft) + + +def _eggnog_feature_table(seed_orthologs: SeedOrthologDirFmt) -> pd.DataFrame: + + per_sample_counts = [] + + for sample_path, obj in seed_orthologs.seed_orthologs.iter_views( + OrthologFileFmt): + # TODO: put filename to sample name logic on OrthologFileFmt object + sample_name = str(sample_path).replace('.emapper.seed_orthologs', '') + sample_df = obj.view(pd.DataFrame) + sample_feature_counts = sample_df.value_counts('sseqid') + sample_feature_counts.name = str(sample_name) + per_sample_counts.append(sample_feature_counts) + df = pd.DataFrame(per_sample_counts) + df.fillna(0, inplace=True) + df.columns = df.columns.astype('str') + + return df + + +def _diamond_search_runner(input_path, diamond_db, sample_label, output_loc, + num_cpus, db_in_memory): + + cmds = ['emapper.py', '-i', str(input_path), '-o', sample_label, + '-m', 'diamond', '--no_annot', '--dmnd_db', str(diamond_db), + '--itype', 'metagenome', '--output_dir', output_loc, '--cpu', + str(num_cpus)] + if db_in_memory: + cmds.append('--dbmem') + + subprocess.run(cmds, check=True) + + +def eggnog_annotate(hits_table: SeedOrthologDirFmt, + eggnog_db: EggnogRefDirFmt, + db_in_memory: bool = False) -> OrthologAnnotationDirFmt: + + eggnog_db_fp = eggnog_db.path + + result = OrthologAnnotationDirFmt() + + # run analysis + for relpath, obj_path in hits_table.seed_orthologs.iter_views( + OrthologFileFmt): + sample_label = str(relpath).rsplit(r'.', 2)[0] + + _annotate_seed_orthologs_runner(seed_ortholog=obj_path, + eggnog_db=eggnog_db_fp, + sample_label=sample_label, + output_loc=result, + db_in_memory=db_in_memory) + + return result + + +def _annotate_seed_orthologs_runner(seed_ortholog, eggnog_db, sample_label, + output_loc, db_in_memory): + + # at this point instead of being able to specify the type of target + # orthologs, we want to annotate _all_. + + cmds = ['emapper.py', '-m', 'no_search', '--annotate_hits_table', + str(seed_ortholog), '--data_dir', str(eggnog_db), + '-o', str(sample_label), '--output_dir', str(output_loc)] + if db_in_memory: + cmds.append('--dbmem') + + subprocess.run(cmds, check=True) diff --git a/q2_moshpit/eggnog/tests/__init__.py b/q2_moshpit/eggnog/tests/__init__.py new file mode 100644 index 00000000..61af993e --- /dev/null +++ b/q2_moshpit/eggnog/tests/__init__.py @@ -0,0 +1,7 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2022, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- diff --git a/q2_moshpit/eggnog/tests/data/README.md b/q2_moshpit/eggnog/tests/data/README.md new file mode 100644 index 00000000..7d5dcc2a --- /dev/null +++ b/q2_moshpit/eggnog/tests/data/README.md @@ -0,0 +1,5 @@ +The `random-db-1.fa` file in this directory is the source for `random-db-1/ref_db.dmnd`. Construction of the database was performed using diamond version 2.1.7 with the following command. + +``` +diamond makedb --in random-db-1.fa --db random-db1 +``` \ No newline at end of file diff --git a/q2_moshpit/eggnog/tests/data/contig-sequences-1/s1_contigs.fa b/q2_moshpit/eggnog/tests/data/contig-sequences-1/s1_contigs.fa new file mode 100644 index 00000000..6a696e38 --- /dev/null +++ b/q2_moshpit/eggnog/tests/data/contig-sequences-1/s1_contigs.fa @@ -0,0 +1,6 @@ +>should-hit-seq-0 +AACGATACCGAAGCGGTGATTGCGCATAAAAACAGCCTGAACATGGCGATTCTGATTATGTTTGATGATGCGGTGGCGATTTTTGTGTATAGCAACCTGGTGTGCATTCGCTTTGATGAACATTATGGCTGGGGCAACGAAGCGAACCGCATTCCGCATATTCTGCCGGATATTCGCCGCTGCATGCCGTGGAGCCATAAAGGCGATCATAACCCGATGGGCAACATGAGCGCGCTGTGCAAATGGGCGACCTATCGCCCGATTAAAAGCTGGTGGAGCCCGTATAAAATTGCGCGCGTGTTTTTTTGCATTGTGCATTTTGTGAACACCCCGAAACCGAAATGGGGCATGGAATTTGATAAACGCGAAGGCATGGTGATTACCATGCGCATTTGGAAAAACTGCCTGGGCATGTGCCTGGAAAAAGCGAACATTTGCGAAGGCACCCGCAACTGGCGCATTAAAATGAGCATGTGGGCGGGCAGCTTTATTGCGCTGATGGATT +>should-hit-seq-2 +CTGGAAAGCGCGATGGGCGGCCCGCATATGAGCATTACCCCGGAAGAAAACGCGTTTGGCGGCTTTAACTTTTGCACCGGCGTGGTGACCGAACATATTCCGATGGATATTGTGGCGATTTGCTGGGCGCTGTTTAGCTGGGAAAACACCAAATTTGGCACCGTGAAAGATAACTGGCTGTATCGCTGGACCATTTGGTGGTGGTTTACCCTGGATACCGGCGCGAGCGTGGATTGCAAATGGGGCTGCAACCGCCGCGAACGCGCGATTTGGGTGTGCTGGAACCGCTTTATTACCATTAGCTTTCATAAACCGCGCGATTGCAAAACCAGCGCGTATCATACCGGCAAACCGGAAATGTATCTGGATCTGATGTGGATTTTTGTGAGCGTGCATGTGTTTATTATGACCCATCTGGCGGGCGATCATTTTCTGGGCCGCGTGCTGCTGCATCATAACAACGATGAAGATTATGATCGCAACTTTCCGATGCTGGATTTTAACTGCCATTGCTGGATTGCGATTTGGCATCGCGTGTGGTATCCGAGCAAAGTGCATGGCAGCGTGGATGCGCTGTTTGAATGGATTCCGCGCAACGGCGATTTTACCCTGCGCCGCAACGCGGGCGATCCGCGCTATACCAGCGCGAGCATGCGCTTTTTTGCGATGTGCGCGATGGAAATTATGCTGGCGCTGATGGGCGAAAGCATGAAACATGCGCTGGAAAGCGCGATGGGCGGCCCGCATATGAGCATTACCCCGGAAGAAAACGCGTTTGGCGGCTTTAACTTTTGCACCGGCGTGGTGACCGAACATATTCCGATGGATATTGTGGCGATTTGCTGGGCGCTGTTTAGCTGGGAAAACACCAAATTTGGCACCGTGAAAGATAACTGGCTGTATCGCTGGACCATTTGGTGGTGGTTTACCCTGGATACCGGCGCGAGCGTGGATTGCAAATGGGGCTGCAACCGCCGCGAACGCGCGATTTGGGTGTGCTGGAACCGCTTTATTACCATTAGCTTTCATAAACCGCGCGATTGCAAAACCAGCGCGTATCATACCGGCAAACCGGAAATGTATCTGGATCTGATGTGGATTTTTGTGAGCGTGCATGTGTTTATTATGACCCATCTGGCGGGCGATCATTTTCTGGGCCGCGTGCTGCTGCATCATAACAACGATGAAGATTATGATCGCAACTTTCCGATGCTGGATTTTAACTGCCATTGCTGGATTGCGATTTGGCATCGCGTGTGGTATCCGAGCAAAGTGCATGGCAGCGTGGATGCGCTGTTTGAATGGATTCCGCGCAACGGCGATTTTACCCTGCGCCGCAACGCGGGCGATCCGCGCTATACCAGCGCGAGCATGCGCTTTTTTGCGATGTGCGCGATGGAAATTATGCTGGCGCTGATGGGCGAAAGCATGAAACATGCGCATGGC +>shouldnt-hit +GCATTGAAGCTTTCTGACTGTTAAATAGTGTAGGCCCCAGCTGTTGATTTTTTAGACTAGAGGTGGGGCACTGTCCCGACACTTCTGGGTGTCCGCCACTGAGATGAACCCCACCGGGTCAAAGGATGTCAACGAAGTTCATTCAAGCTCACACGTCCAAGACCAGTGGTCAGGCTCTCTGTCATGCACCGTCCGCTTTGCAGCCGCGTCTCAGCGCCTCCCTACGCTCGAGATTGTCTGGCGCTCGGGTCATGGC diff --git a/q2_moshpit/eggnog/tests/data/contig-sequences-1/s2_contigs.fa b/q2_moshpit/eggnog/tests/data/contig-sequences-1/s2_contigs.fa new file mode 100644 index 00000000..cccc50c6 --- /dev/null +++ b/q2_moshpit/eggnog/tests/data/contig-sequences-1/s2_contigs.fa @@ -0,0 +1,10 @@ +>shouldnt-hit-0 +ATAAATTAGTTACACTCTCCGTGACTCGAGCTAACCTGAACTCGTAAGAGGGTCCCTTAGCTAGAGACTTGTCTTGACCCAAACTAGTAGTAACTGCAAAACGGAATCTTAACAAAGGTTGCTACTAATGGCACGTCGTCACTTTTCTGAATTCGCATATGGATCCACGAGGGGAAATTGGCTTTGGAGAGATACACATCTGCCGACCAGACGCGGAATCTCAGTGAGTGTCATTCATGGCCCCTACCCT +>shouldnt-hit-1 +ATGCGTTCGTCACGAGGTTGCAACGGGCCGCCTTGCTTCTTAGCTCGAGAGATAGTTACGGGTTTTAGTAGTAGGAGCGTATTCCATACCCACAATTCGGAACTGCCCATGAGCCGCCTAGTAAGGATAACCTTATGATAGCTATATGCTCTTCCTACTATCTAGCGGTGCTCAATTTGCCAATTTCCGGGTCCGACTACGAGGCCGGATCGCTGAGGAGTGACAACCCCTGTGCTATACATTACGGTCA +>should-hit-seq8a +TGCAAATGGAGCATGTGCACCTTTTGGAACTATCGCTTTTTTAGCCTGATTCTGTATTTTCGCACCACCAACGCGCTGACCGCGTGGACCTTTGTGTTTTGCTGGCCGAACCTGATTGGCCGCAGCATGAAACATGATGGCCTGTGCCATCATAGCGCGACCTATGTGTTTCATGCGATGATGGCGGAAATGGCGAAAGTGATGGATTTTTGCAGCGCGTGGGTGGAAGATGTGATGATGCCGATGCTGGGCTTTTATCATAACCTGTTTAACCCGCGCACCGGCAACGAAAACATTTGGAACGATAACTGCGAAGTGAACTGGACCGTGGTGATGAACGGCGGCATGATGTTTTTTGTGCTGTGGGATAAACTGATTATGGTGAGCGCGGAATGGAACAACTGGGCGCGCAAAATTGTGAAAGTGTATCGCGATAAAGATAACAAAATTGTGAGCCGCTGGTTTAGCTATCGCGATGGCGTGAGCTTTAACTTTAAAGGCTGCCTGCCGTGCTTTAAAAGCGGCATTATTCATCATTGGAACCATGATTTTGCGGCGTATAAAAACTGCGGCATGCCGGAAACCCGCCCGGATCTGTATTATGGCATGAGCTATGCGCTGTTTTATAGCCTGAAAGAAACCTTTGATGGCTTTCATATTGTGAACCCGGGCGAACTGAACGTGTGGCGCATTAAAAAACGCAAAGAAATGAACATGACCCTGGAACTGCATCATATGCATTGGTATGCGTGCAAATTTCTGGATCCGATTGGCAACGGCAAAATTGCGTGCAAATGCATTGATTATGTGTATCGCGTGCATGAAAGCTGCTGCGTGCGCCATCTGAGCCTGTGGGATTATTATTGCGAAGCGGATAACGAAGAAATGCTGGCGGCGACCGCGCCGAAAAACAAACGCCTGAGCGCGGCGCTGTATGATCGCTGCGGCTTTGATATGGATAAAGGCAGCGAACATGAATGCGAATTTAAACGCAAAGATATTGTGAAATATTTTATGATGATTGAAATGTATACCGGCACCCCGGTGCGCCCGTTTCGCCGCAAATGGCGCCTGTGCCATTGCCATAAAGAAAGCCGCTGGTGGTTTGTGTGCAGCCCGGGCCCGTTTTTTGGCTAT +>should-hit-seq8b +ACCGGCAACGAAAACATTTGGAACGATAACTGCGAAGTGAACTGGACCGTGGTGATGAACGGCGGCATGATGTTTTTTGTGCTGTGGGATAAACTGATTATGGTGAGCGCGGAATGGAACAACTGGGCGCGCAAAATTGTGAAAGTGTATCGCGATAAAGATAACAAAATTGTGAGCCGCTGGTTTAGCTATCGCGATGGCGTGAGCTTTAACTTTAAAGGCTGCCTGCCGTGCTTTAAAAGCGGCATTATTCATCATTGGAACCATGATTTTGCGGCGTATAAAAACTGCGGCATGCCGGAAACCCGCCCGGATCTGTATTATGGCATGAGCTATGCGCTGTTTTATAGCCTGAAAGAAACCTTTGATGGCTTTCATATTGTGAACCCGGGCGAACTGAACGTGTGGCGCATTAAAAAACGCAAAGAAATGAACATGACCCTGGAACTGCATCATATGCATTGGTATGCGTGCAAATTTCTGGATCCGATTGGCAACGGCAAAATTGCGTGCAAATGCATTGATTATGTGTATCGCGTGCATGAAAGCTGCTGCGTGCGCCATCTGAGCCTGTGGGATTATTATTGCGAAGCGGATAACGAAGAAATGCTGGCGGCGACCGCGCCGAAAAACAAACGCCTGAGCGCGGCGCTGTATGATCGCTGCGGCTTTGATATGGATAAAGGCAGCGAACATGAATGCGAATTTAAACGCAAAGATATTGTGAAATATTTTATGATGATTGAAATGTATACCGGCACCCCGGTGCGCCCGTTTCGCCGCAAATGGCGCCTGTGCCATTGCCATAAAGAAAGCCGCTGGTGGTTTGTGTGCAGCCCGGGCCCGTTTTTTGGCTAT +>should-hit-seq8c +TGCAAATGGAGCATGTGCACCTTTTGGAACTATCGCTTTTTTAGCCTGATTCTGTATTTTCGCACCACCAACGCGCTGACCGCGTGGACCTTTGTGTTTTGCTGGCCGAACCTGATTGGCCGCAGGAAACATGATGGCCTGTGCCATCATAGCGCGACCTATGTGTTTCATGCGATGATGGCGGAAATGGCGAAAGTGATGGATTTTTGCAGCGCGTGGGTGGAAGATGTGATGATGCCGATGCTGGGCTTTTATCATAACCTGTTTAACCCGCGCACCGGCAACGAAAACATTTGGAACGATAACTGCGAAGTGAACTGGACCGTGGTGATGAACGGCGGCATGATGTTTTTTGTGCTGTGGGATAAACTGATTATGGTGAGCGCGGAATGGAACAACTGGGCGCGCAAAATTGTGAAAGTGTATCGCGATAAAGATAACAAAATTGTGAGCCGCTGGTTTAGCTATCGCGATGGCGTGAGCTTTAACTTTAAAGGCTGCCTGCCGTGCTTTAAAAGCGGCATTATTCATCATTGGAACCATGATTTTGCGGCGTATAAAAACTGCGGCATGCCGGAAACCCGCCCGGATCTGTATTATGGCATGAGCTATGCGCTGTTTTATAGCCTGAAAGAAACCTTTGATGGCTTTCATATTGTGAACCCGGGCGAACTGAACGTGTGGCGCATTAAAAAACGCAAAGAAATGAACATGACCCTGGAACTGCATCATATGCATTGGTATGCGTGCAAATTTCTGGATCCGATTGGCAAC diff --git a/q2_moshpit/eggnog/tests/data/eggnog_db/eggnog.db b/q2_moshpit/eggnog/tests/data/eggnog_db/eggnog.db new file mode 100644 index 00000000..9ed31352 Binary files /dev/null and b/q2_moshpit/eggnog/tests/data/eggnog_db/eggnog.db differ diff --git a/q2_moshpit/eggnog/tests/data/eggnog_db/eggnog.taxa.db b/q2_moshpit/eggnog/tests/data/eggnog_db/eggnog.taxa.db new file mode 100644 index 00000000..136b907a Binary files /dev/null and b/q2_moshpit/eggnog/tests/data/eggnog_db/eggnog.taxa.db differ diff --git a/q2_moshpit/eggnog/tests/data/eggnog_db/eggnog.taxa.db.traverse.pkl b/q2_moshpit/eggnog/tests/data/eggnog_db/eggnog.taxa.db.traverse.pkl new file mode 100644 index 00000000..e1e3d659 Binary files /dev/null and b/q2_moshpit/eggnog/tests/data/eggnog_db/eggnog.taxa.db.traverse.pkl differ diff --git a/q2_moshpit/eggnog/tests/data/expected/test_output.emapper.annotations b/q2_moshpit/eggnog/tests/data/expected/test_output.emapper.annotations new file mode 100644 index 00000000..22aa1272 --- /dev/null +++ b/q2_moshpit/eggnog/tests/data/expected/test_output.emapper.annotations @@ -0,0 +1,2 @@ +1000565.METUNv1_03812 1000565.METUNv1_03812 4.71e-264 714.0 COG0012@1|root,COG0012@2|Bacteria,1MVM4@1224|Proteobacteria,2VJ1W@28216|Betaproteobacteria,2KUD2@206389|Rhodocyclales 206389|Rhodocyclales J ATPase that binds to both the 70S ribosome and the 50S ribosomal subunit in a nucleotide-independent manner ychF - - ko:K06942 - - - - ko00000,ko03009 - - - MMR_HSR1,YchF-GTPase_C +362663.ECP_0061 362663.ECP_0061 0.0 1624.0 COG0417@1|root,COG0417@2|Bacteria,1MVY9@1224|Proteobacteria,1RMQ1@1236|Gammaproteobacteria,3XPER@561|Escherichia 1236|Gammaproteobacteria L DNA polymerase polB GO:0003674,GO:0003824,GO:0003887,GO:0004518,GO:0004527,GO:0004529,GO:0004536,GO:0006139,GO:0006259,GO:0006260,GO:0006261,GO:0006281,GO:0006725,GO:0006807,GO:0006950,GO:0006974,GO:0007154,GO:0008150,GO:0008152,GO:0008296,GO:0008408,GO:0009058,GO:0009059,GO:0009432,GO:0009605,GO:0009987,GO:0009991,GO:0016740,GO:0016772,GO:0016779,GO:0016787,GO:0016788,GO:0016796,GO:0016895,GO:0018130,GO:0019438,GO:0031668,GO:0033554,GO:0034061,GO:0034641,GO:0034645,GO:0034654,GO:0043170,GO:0044237,GO:0044238,GO:0044249,GO:0044260,GO:0044271,GO:0045004,GO:0045005,GO:0046483,GO:0050896,GO:0051716,GO:0071496,GO:0071704,GO:0071897,GO:0090304,GO:0090305,GO:0140097,GO:1901360,GO:1901362,GO:1901576 2.7.7.7 ko:K02336 - - - - ko00000,ko01000,ko03400 - - - DNA_pol_B,DNA_pol_B_exo1 diff --git a/q2_moshpit/eggnog/tests/data/good_hits/test_tiny.emappper.seed_orthologs b/q2_moshpit/eggnog/tests/data/good_hits/test_tiny.emappper.seed_orthologs new file mode 100644 index 00000000..706cb4a8 --- /dev/null +++ b/q2_moshpit/eggnog/tests/data/good_hits/test_tiny.emappper.seed_orthologs @@ -0,0 +1,2 @@ +1000565.METUNv1_03812 1000565.METUNv1_03812 4.71e-264 714.0 1 363 1 363 100.0 100.0 100.0 +362663.ECP_0061 362663.ECP_0061 0.0 1624.0 1 783 1 783 100.0 100.0 100.0 diff --git a/q2_moshpit/eggnog/tests/data/random-db-1.fa b/q2_moshpit/eggnog/tests/data/random-db-1.fa new file mode 100644 index 00000000..491d13e8 --- /dev/null +++ b/q2_moshpit/eggnog/tests/data/random-db-1.fa @@ -0,0 +1,20 @@ +>0 +WDHEEAWAYDNCVSMGKHYGPVAISAKCASCPKNLFGMYSYECHHTRVDNMGMKIEDMNHCCDCGGPYKTCRCARYTPEIEMTPTSVVVYDGAVENRIFARHFCEVMNDTEAVIAHKNSLNMAILIMFDDAVAIFVYSNLVCIRFDEHYGWGNEANRIPHILPDIRRCMPWSHKGDHNPMGNMSALCKWATYRPIKSWWSPYKIARVFFCIVHFVNTPKPKWGMEFDKREGMVITMRIWKNCLGMCLEKANICEGTRNWRIKMSMWAGSFIALMDCRMWDVEIINPVWGRMHWATVRGVGEWVVGPLIYSEFKSHHVIIWLWGYCRAPWKEACMVRDNMSDIHHLEWTVIVDHIEPWPNWHPVRNGPFEGSSPLWCPLVIKRIRWKCNPREACHGTEGGAYMGKDCNKSIKDASTMMKEIERLKHCNKHPANPAGERWGGDYPEIKYKPILSPKAIMNHSIDIMTDWLRTFKFLSWYIPHPHEAISAERFGCPTLVKGKR +>1 +SGGIYRVYGWTTLYSYNTYHGLCRGGERNSCYYEKIHCYSGIDDIEMPMPGICWEKGWCKTWKLLIPMYWNNYKGNAELHCLFGPTWLNCWVSEGSSAENAMRFPALRPAHYCARLIYMMWAYDVPFCVCIRDHDFRRHKNPKGHEVLVPTLKIEESINNISPDAVGYAFLEKCYPNMGEWPPPEMLEFERADTKNKHMCKYLSDGKCHKFHICDNRLIAERRASRLGYTELYDPCYSVDLIEGHESCDIWVSYSPYRINDGNFHARARTHILYTSMYFGEHTKHDMVIDYADWMALKALSDVLTSAHAKGLMVFAWMMPAEDMRTKPEIFYVHPYDDTFYCKGTAVHKLKWGYSVLNNCHTGAMHGWIFPMPMGVAHAYVKRNYEYEIKKECSPHLKYCVLYSIDVYLGPCMRFEWWTRSTFPCNWGFHWGDKVVNETKYHTGCCDHNTFYIGHHFPSWPGLWRDLKMMCCHDKPENGWCERYDVEIMYIVLEIMLGMY +>2 +WRFIIDYVRYFASCVKPEHHLWTCWGGVEVEFLVHSFKVLGFTHMFMYPPIHPDMACGVWDVRTDAYFANSFLIAVTVFKVFNLINNMHSPSRTRFFLSMWHMWEATVDGNTKIPGTDEEELVYDHKSSKLPCCEMCEMGSTLFEEVLICPHDVSYEGESEEWSGWTHKRMPKRCLNVIDVIVIDNTVHYCTNHKNMILWRWYTGIYKWYNSRSRCTLSHGHMYLRDWCDFHNTVCTVPWTGYCVANVEHHFGGIMRSLESAMGGPHMSITPEENAFGGFNFCTGVVTEHIPMDIVAICWALFSWENTKFGTVKDNWLYRWTIWWWFTLDTGASVDCKWGCNRRERAIWVCWNRFITISFHKPRDCKTSAYHTGKPEMYLDLMWIFVSVHVFIMTHLAGDHFLGRVLLHHNNDEDYDRNFPMLDFNCHCWIAIWHRVWYPSKVHGSVDALFEWIPRNGDFTLRRNAGDPRYTSASMRFFAMCAMEIMLALMGESMKHAHG +>3 +CPENGWFKCETHWMAEDPEKKLIVWNVNRRKCGYVKGGIRFCDEDITMIKYRDLGFTNFNFFRSWCHETNILFKFYAPMTHCLPPTSNTKVKDSTPTFMHAKTCWFMTLSRNIKYYCRDMIMYYASMPMIGLKINCLGTVHRRVGEATCTMDSTNHPGFNDIMMLDAGKNLWVMHFGGNHNGVLGLRHLVTMLWPMLGWEFPDSAWSFSTWADCDCHIGLSESWWVLLPADSSPGRLMHRHRMRKLNGTANNCPYHGLCLWHNVCESHIMRRWRMYKIWRKTGFVHYAGLYIFIGGTEYMLIVTRSTIWPYWVTRCTGIHKMICKCYERHWFFDLSRPVCWMNLYVLNKPWCGKLEGKCPIHIERIACKLKMLYKVHIVESDGLVPWIAVNPMDSGTCLIKSMMCIGCHALPDTYNMAGDCNGRESAVPYRGHSSTCMWVRHFWCGVMAHFETHGCESVRNAHDRPKNAGNARTFANADHPPVLYMGHYCVRHNPWHAEG +>4 +FCYGVNDNEWCDRPTWYPKMYKRAHRSNDMPFYCHGDPGSYDNVINYLNCFMAYLSACMHMFEPHCFGVCMGMADWLNRAGIESNGEIMDGAMRKECWGYLVERARNGHEENVMTEIATMHAIWLTWEVSTCWYAPKVWKGLPGTPYSVVHLRCPGADGNHPLMIELTCKNACSTYFLTEVWNDVDEDYKLITLLCDEFVVYWPMKRGNTCNCAYLCWRMAYRRNASDCHGAKWPWIMAEIFAKWPKSAKYYLSCMRMATNCPIDDGSTVGVHRESMMEWTIPNRSHFEESLVKTETNFGSLWCPWFCIPHKTLENGSCTYWMGCPRRPAWTGWVLWEIAEPVAWFIGCSWDWAHMTLSLLEDVWIFDSKGKLHGLWRFYFVVCKPHVMMFNFFCFLNTGCWYCVLTHWWKHLSRLFLGSPRMAVIWLNLNLTLSFWENNEVHAFKCMTACVPCFRISFVTADHNDTSNALGPLFPWPCGVNLCLWMHSKNASMFIAPRW +>5 +CYHKMTTPCIRPGRRGRSSFILCRKLSDCRGEFYVNKRRWYYSTWKHYWYIFATGVIESGRKCYFFVGLGHEIPTFGFNYNLCISLFLIFYKIFLSHDCATFYYKYDWLYMVSIAHGHTWDVKWWWKAFPSNRDFCMFRTCVPTTFAACGAGCEHWWEHDSVHVAVSYKRVVPHDFDWFYIAVSTCGPIEGWHEELAHFLPNHVTCKMCPKFCPENRAVCIAHIMKHEENRWDLDDCYSWSSDKNVHIGVTVGEWSYIDTYCMITVYPLHVMNEYPTMINMEMDPRPPSRDRSRNGSVFNCPEKVWMNSNCYSYGKGIYHECKNYPWDSTCFRSHWVLMIVMRIWCNCRRGRLFPEELHFVIWWEHHDWVCHAPVFWANYFRANEIFFMKHFTKYTNRTKICNPSYSRCDSHVHERSSWIAKDKGIVMCTTSRHKSMWGAAMAMSAKWVKRREWSTDHFFDVPVTDPAFPIRMRDCDESWRTVVFATPPCCEMCTLAEAW +>6 +NDIERSISTSYFMPVLVAFWFDAEDISKGKTYERSETPIGSWTVTMKPLTDWRIFSEFWIEAGNIIKDGRILKVPREYNWWPGWEDCEVIPAPTYFVHLFETFRAHCHPHEDNPPYKAWAYPKYHWRATVCCMSDFDNNWLTEMFSRLSWIAAEEMKPCAPATINHMVLMMVAWEGWRAFPEAANEIKLTHYTWKMETIRGASHVYPLHSPICEMSGRAMNEWVGPTANHLAFCLSIDCLFGCFPHTAITSKTHEKCYEITGWYDWHCYLVMRKWHPTRWVTDGALNNGIMHFTEFYDGVKPTGLRHVMWGVIYSVSPHWEAFYLNDLDDLYNFGVDSELLVHWAYGHRYGMNIGIFAVPMGSPESGRYYYITAVIDKDCNRVNGFNSALIVRFTTEVNVGTKDLWNCNINCMVAIIRAPRRFHALEAFSHGVMEYMWECREPMWCYRKIYNNSKWAETNPDMCMWALFMKIAMPKYRPDPGFHAKFNSSYEPRMLWALH +>7 +FMRTFHEWHITGNVHCDGKVWTYITVCEAILIHHTMNVPGIPPCSETGLWKIVTVMGEICLDFYMGAADICYMRFDGCTVILRMHWPLLAWKDIPYFVPIESPPGLVRPCLGMLTDMIKADMVRIHKWVHCEYKGLPDAPHRMKRKFWSKSKPWLHMYGIIWMGEEYRFDGHSLYEEIDSADPVHKIKFTRRKARAKEWVPSYMKKGCRFIWECCHTECLDCPMKAHDSEAEIKPNDYHFKWCCKMTGEMFTPLDRKVYLNPCFKPCFEEFTVNHTAWWNYSSVECALYWCETYNGAHWFIWTNDERGTLCFWRHCLTDMHCWVSYPRTRAASKYCYFSTWDGSHLGKHWPMNKVSCLETWYMAFYAVPGKDMFACNILPEKAWKWGHYDTSHGYLDGCSPTADIHSIEAWHEHEKAMYIKVCVEYKVERWKETLWEKKFWYMCLEESAALIIPPNRDSTVWVIMFMDDKLMAAPPVNYTEFNRRNKNLCNMSEAYIGVF +>8 +CKWSMCTFWNYRFFSLILYFRTTNALTAWTFVFCWPNLIGRSMKHDGLCHHSATYVFHAMMAEMAKVMDFCSAWVEDVMMPMLGFYHNLFNPRTGNENIWNDNCEVNWTVVMNGGMMFFVLWDKLIMVSAEWNNWARKIVKVYRDKDNKIVSRWFSYRDGVSFNFKGCLPCFKSGIIHHWNHDFAAYKNCGMPETRPDLYYGMSYALFYSLKETFDGFHIVNPGELNVWRIKKRKEMNMTLELHHMHWYACKFLDPIGNGKIACKCIDYVYRVHESCCVRHLSLWDYYCEADNEEMLAATAPKNKRLSAALYDRCGFDMDKGSEHECEFKRKDIVKYFMMIEMYTGTPVRPFRRKWRLCHCHKESRWWFVCSPGPFFGYMGHCWTYCHAKGMRACFRLIKEMASHCFFCMPLMHHNMRIHHMHTTRTPAKDVIIDILVGAASAYNDNEVSSMKTFYHCHAYSFWMRNVCPLTWTVHRMLPGWMACHLILEGSYCNGDMAD +>9 +FDFFKRVFIPFMETDDFMEHFYTSWHCGTIFIFRTYHILIWPGYRITNPFCIHGSPEFVAAHRAYLPAHPDDLKAWSDWFCMKEVVGKKWGYVAAKHNIHLVEKFKLDNGCDNCRRGMHSSGLKLVSGRNCNRHWSCVVLPCFCATLHHHHLWSESGPHLIFLWNYFKGGLHAFVADGMECHWIGSLTFFCMDPCVWKKDRGCWPWIFKLVVNYEKPKWMNFMVKNDCHMVRMPAMRMPKFNGYNSLAYSKCRVHHSAPVWHAVGRRMTVHPVGNLNSVIGYWDFRTNGYGTSGLLFRKFCCPHPADCFRACSFANAMHEGAEHPCDWSFYSKWEHICCSRMRRHECIREWDIRTDYDSDCEWMSVVSTLNPIKEWTHSELWSKSFWNWRVWDMFGTMYPGHRGPLECNLYDGVPRCPNTSSRSGMHEGIAYVLECESPHMAILGDAKFDLANREKYKATPHPPRTKYTRDERVRCNFEWEFSKHVGWHAGLSSVRIYCC diff --git a/q2_moshpit/eggnog/tests/data/random-db-1/ref_db.dmnd b/q2_moshpit/eggnog/tests/data/random-db-1/ref_db.dmnd new file mode 100644 index 00000000..e8009ce5 Binary files /dev/null and b/q2_moshpit/eggnog/tests/data/random-db-1/ref_db.dmnd differ diff --git a/q2_moshpit/eggnog/tests/test_method.py b/q2_moshpit/eggnog/tests/test_method.py new file mode 100644 index 00000000..ec323df8 --- /dev/null +++ b/q2_moshpit/eggnog/tests/test_method.py @@ -0,0 +1,63 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2022, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- + +import pandas as pd +import pandas.testing as pdt + +import qiime2 +from qiime2.plugin.testing import TestPluginBase +from .._method import eggnog_diamond_search, eggnog_annotate +from q2_types_genomics.reference_db import ( + DiamondDatabaseDirFmt, EggnogRefDirFmt) +from q2_types_genomics.per_sample_data import ContigSequencesDirFmt +from q2_types_genomics.genome_data import SeedOrthologDirFmt, OrthologFileFmt + + +class TestDiamond(TestPluginBase): + package = 'q2_moshpit.eggnog.tests' + + def test_good_small_search(self): + input_sequences = qiime2.Artifact.import_data( + 'SampleData[Contigs]', + self.get_data_path('contig-sequences-1') + ).view(ContigSequencesDirFmt) + + diamond_db = qiime2.Artifact.import_data( + 'ReferenceDB[Diamond]', + self.get_data_path('random-db-1') + ).view(DiamondDatabaseDirFmt) + + _, obs = eggnog_diamond_search( + input_sequences=input_sequences, + diamond_db=diamond_db) + exp = pd.DataFrame({'0': [1.0, 0.0], '2': [1.0, 0.0], '8': [0.0, 3.0]}, + index=['s1', 's2']) + exp.columns.name = 'sseqid' + + pdt.assert_frame_equal(obs, exp) + + +class TestAnnotate(TestPluginBase): + package = 'q2_moshpit.eggnog.tests' + + def test_small_good_hits(self): + so_fp = self.get_data_path('good_hits/') + seed_orthologs = SeedOrthologDirFmt(so_fp, mode='r') + + egg_db_fp = self.get_data_path('eggnog_db/') + egg_db = EggnogRefDirFmt(egg_db_fp, mode='r') + + obs_obj = eggnog_annotate(hits_table=seed_orthologs, eggnog_db=egg_db) + + exp_fp = self.get_data_path('expected/test_output.emapper.annotations') + exp = OrthologFileFmt(exp_fp, mode='r').view(pd.DataFrame) + + objs = list(obs_obj.annotations.iter_views(OrthologFileFmt)) + self.assertEqual(len(objs), 1) + df = objs[0][1].view(pd.DataFrame) + pdt.assert_frame_equal(df, exp) diff --git a/q2_moshpit/metabat2/metabat2.py b/q2_moshpit/metabat2/metabat2.py index b11236f7..3feaad19 100644 --- a/q2_moshpit/metabat2/metabat2.py +++ b/q2_moshpit/metabat2/metabat2.py @@ -18,14 +18,16 @@ from q2_moshpit.metabat2.utils import _process_metabat2_arg -def _get_sample_name_from_path(fp): - return os.path.splitext(os.path.basename(fp))[0].split('_')[0] +def _get_sample_name_from_path(fp, suffix): + return os.path.basename(fp).rsplit(suffix, maxsplit=1)[0] def _assert_samples(contigs_fps, maps_fps) -> dict: contigs_fps, maps_fps = sorted(contigs_fps), sorted(maps_fps) - contig_samps = [_get_sample_name_from_path(x) for x in contigs_fps] - map_samps = [_get_sample_name_from_path(x) for x in maps_fps] + contig_samps = [_get_sample_name_from_path(x, '_contigs.fa') + for x in contigs_fps] + map_samps = [_get_sample_name_from_path(x, '_alignment.bam') + for x in maps_fps] if set(contig_samps) != set(map_samps): raise Exception('Contigs and alignment maps should belong to the ' 'same sample set. You provided contigs for ' diff --git a/q2_moshpit/metabat2/tests/test_metabat2.py b/q2_moshpit/metabat2/tests/test_metabat2.py index e22da164..15ef4c19 100644 --- a/q2_moshpit/metabat2/tests/test_metabat2.py +++ b/q2_moshpit/metabat2/tests/test_metabat2.py @@ -29,40 +29,52 @@ class TestMetabat2(TestPluginBase): package = 'q2_moshpit.metabat2.tests' def test_get_sample_name_from_path(self): - obs = _get_sample_name_from_path('/a/b/sampleX.fasta') + obs = _get_sample_name_from_path('/a/b/sampleX.fasta', + '.fasta') exp = 'sampleX' self.assertEqual(exp, obs) def test_get_sample_name_from_path_underscores(self): - obs = _get_sample_name_from_path('/a/b/sampleX_something.fasta') + obs = _get_sample_name_from_path('/a/b/sampleX_something.fasta', + '_something.fasta') exp = 'sampleX' self.assertEqual(exp, obs) def test_assert_samples_ok(self): - contigs = ['/a/b/s1_tigs.fa', '/a/b/s3_tigs.fa', '/a/b/s2_tigs.fa'] - maps = ['/a/b/s3_aln.bam', '/a/b/s2_aln.bam', '/a/b/s1_aln.bam'] + contigs = ['/a/b/s1_contigs.fa', '/a/b/s3_contigs.fa', + '/a/b/s2_contigs.fa'] + maps = ['/a/b/s3_alignment.bam', '/a/b/s2_alignment.bam', + '/a/b/s1_alignment.bam'] obs_samples = _assert_samples(contigs, maps) exp_samples = { - 's1': {'contigs': '/a/b/s1_tigs.fa', 'map': '/a/b/s1_aln.bam'}, - 's2': {'contigs': '/a/b/s2_tigs.fa', 'map': '/a/b/s2_aln.bam'}, - 's3': {'contigs': '/a/b/s3_tigs.fa', 'map': '/a/b/s3_aln.bam'} + 's1': {'contigs': '/a/b/s1_contigs.fa', + 'map': '/a/b/s1_alignment.bam'}, + 's2': {'contigs': '/a/b/s2_contigs.fa', + 'map': '/a/b/s2_alignment.bam'}, + 's3': {'contigs': '/a/b/s3_contigs.fa', + 'map': '/a/b/s3_alignment.bam'} } self.assertDictEqual(exp_samples, obs_samples) def test_assert_samples_uneven(self): - contigs = ['/a/b/s1_tigs.fa', '/a/b/s3_tigs.fa'] - maps = ['/a/b/s3_aln.bam', '/a/b/s2_aln.bam', '/a/b/s1_aln.bam'] + contigs = ['/a/b/s1_contigs.fa', '/a/b/s3_contigs.fa'] + maps = ['/a/b/s3_alignment.bam', '/a/b/s2_alignment.bam', + '/a/b/s1_alignment.bam'] with self.assertRaisesRegex( Exception, - 'contigs for samples: s1,s3 but maps for samples: s1,s2,s3' + 'Contigs and alignment maps should belong to the same sample' + ' set. You provided contigs for samples: s1,s3 but maps for' + ' samples: s1,s2,s3. Please check your inputs and try again.' ): _assert_samples(contigs, maps) def test_assert_samples_non_matching(self): - contigs = ['/a/b/s1_tigs.fa', '/a/b/s4_tigs.fa', '/a/b/s2_tigs.fa'] - maps = ['/a/b/s3_aln.bam', '/a/b/s2_aln.bam', '/a/b/s1_aln.bam'] + contigs = ['/a/b/s1_contigs.fa', '/a/b/s4_contigs.fa', + '/a/b/s2_contigs.fa'] + maps = ['/a/b/s3_alignment.bam', '/a/b/s2_alignment.bam', + '/a/b/s1_alignment.bam'] with self.assertRaisesRegex( Exception, diff --git a/q2_moshpit/plugin_setup.py b/q2_moshpit/plugin_setup.py index 38ae402f..8f152105 100644 --- a/q2_moshpit/plugin_setup.py +++ b/q2_moshpit/plugin_setup.py @@ -5,30 +5,40 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -from q2_types.feature_data import FeatureData, Sequence + from q2_types.per_sample_sequences import ( SequencesWithQuality, PairedEndSequencesWithQuality ) -from q2_types.sample_data import SampleData import q2_moshpit.kraken2.classification from q2_types_genomics.kraken2 import ( Kraken2Reports, Kraken2Outputs, Kraken2DB ) + +from qiime2.plugin import (Plugin, Citations) + +import importlib +import q2_moshpit + +from q2_types.sample_data import SampleData +from q2_types.feature_table import FeatureTable, Frequency +from q2_types.feature_data import FeatureData, Sequence + +from q2_types_genomics.reference_db import ReferenceDB, Diamond, Eggnog +from q2_types_genomics.feature_data import NOG +from q2_types_genomics.genome_data import GenomeData, BLAST6 from q2_types_genomics.kraken2._type import BrackenDB from q2_types_genomics.per_sample_data import MAGs, Contigs from q2_types_genomics.per_sample_data._type import AlignmentMap + from qiime2.core.type import Bool, Range, Int, Str, Float, List, Choices -from qiime2.plugin import (Plugin, Citations) -import q2_moshpit -from q2_moshpit import __version__ citations = Citations.load('citations.bib', package='q2_moshpit') plugin = Plugin( name='moshpit', - version=__version__, + version=q2_moshpit.__version__, website="https://github.com/bokulich-lab/q2-moshpit", package='q2_moshpit', description=( @@ -38,6 +48,9 @@ short_description='QIIME 2 plugin for metagenome analysis.', ) +importlib.import_module('q2_moshpit.eggnog') +importlib.import_module('q2_moshpit.metabat2') + plugin.methods.register_function( function=q2_moshpit.metabat2.bin_contigs_metabat, inputs={ @@ -203,3 +216,55 @@ 'versions from an online resource.', citations=[citations["wood2019"], citations["lu2017"]] ) + +plugin.methods.register_function( + function=q2_moshpit.eggnog.eggnog_diamond_search, + inputs={'input_sequences': SampleData[Contigs], + 'diamond_db': ReferenceDB[Diamond], + }, + parameters={ + 'num_cpus': Int, + 'db_in_memory': Bool, + }, + input_descriptions={ + 'input_sequences': 'Sequence data of the contigs we want to ' + 'search for hits using the Diamond Database', + 'diamond_db': 'The filepath to an artifact containing the' + 'Diamond database', + }, + parameter_descriptions={ + 'num_cpus': 'Number of CPUs to utilize. \'0\' will ' + 'use all available.', + 'db_in_memory': 'Read database into memory. The ' + 'database can be very large, so this ' + 'option should only be used on clusters or other ' + 'machines with enough memory.', + }, + outputs=[('seed_ortholog', GenomeData[BLAST6]), + ('ortholog_counts', FeatureTable[Frequency]) + ], + name='Run eggNOG search using diamond aligner', + description="This method performs the steps by which we find our " + "possible target sequences to annotate using the diamond " + "search functionality from the eggnog `emapper.py` script", + ) + +plugin.methods.register_function( + function=q2_moshpit.eggnog.eggnog_annotate, + inputs={ + 'hits_table': GenomeData[BLAST6], + 'eggnog_db': ReferenceDB[Eggnog], + }, + parameters={ + 'db_in_memory': Bool, + }, + parameter_descriptions={ + 'db_in_memory': 'Read eggnog database into memory. The ' + 'eggnog database is very large(>44GB), so this ' + 'option should only be used on clusters or other ' + 'machines with enough memory.', + }, + outputs=[('ortholog_annotations', FeatureData[NOG])], + name='Annotate orthologs against eggNOG database', + description="Apply eggnog mapper to annotate seed orthologs.", + ) diff --git a/setup.py b/setup.py index 1c07d974..6196f722 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ cmdclass=versioneer.get_cmdclass(), license='BSD-3-Clause', packages=find_packages(), - author="Michal Ziemski", + author="Michal Ziemski, Keegan Evans", author_email="ziemski.michal@gmail.com", description="QIIME 2 plugin for metagenome analysis.", url="https://github.com/bokulich-lab/q2-moshpit", @@ -25,11 +25,26 @@ ['q2-moshpit=q2_moshpit.plugin_setup:plugin'] }, package_data={ - 'q2_moshpit': ['citations.bib'], + 'q2_moshpit': ['citations.bib', 'tests/data/*'], + 'q2_moshpit.usage_examples': ['tests/data/*'], 'q2_moshpit.metabat2.tests': [ 'data/*', 'data/bins/samp1/*', 'data/contigs/*', 'data/depth/*', 'data/maps/*' ], + 'q2_moshpit.checkm.tests': [ + 'data/*', 'data/bins/*', 'data/bins/*/*', + 'data/checkm_reports/*/*/*', 'data/plots/*/*/*' + ], + 'q2_moshpit.eggnog': [ + 'tests/data/*', + 'tests/data/contig-sequences-1/*', + 'tests/data/random-db-1/*', + 'tests/data/good_hits/*', + 'tests/data/bad_hits/*', + 'tests/data/eggnog_db/*', + 'tests/data/expected/*', + 'citations.bib', + ], 'q2_moshpit.kraken2.tests': [ 'data/*', 'data/mags/*', 'data/mags/*/*', 'data/single-end/*', 'data/paired-end/*',