Skip to content

Commit

Permalink
ENH: allow SampleData[MAGs] as input to eggnog-diamond-search and `…
Browse files Browse the repository at this point in the history
…classify-kraken2` (#125)

Co-authored-by: Santiago Castro Dau <[email protected]>
  • Loading branch information
misialq and Sann5 authored May 10, 2024
1 parent 2b3b03d commit 06a920b
Show file tree
Hide file tree
Showing 24 changed files with 2,836 additions and 73 deletions.
17 changes: 9 additions & 8 deletions q2_moshpit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,25 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

from . import busco
from . import eggnog
from . import partition
from . import prodigal
from ._version import get_versions
from .dereplication import dereplicate_mags
from .kaiju import classification as kaiju_class, database as kaiju_db
from .kraken2 import (
classification as kraken_class, database as kraken_db, bracken, helpers
classification as kraken_class,
database as kraken_db, bracken,
helpers as kraken_helpers
)
from .metabat2 import metabat2
from . import prodigal
from . import eggnog
from . import busco
from . import partition


from ._version import get_versions
__version__ = get_versions()['version']
del get_versions

__all__ = [
'metabat2', 'bracken', 'kraken_class', 'kraken_db',
'kaiju_class', 'kaiju_db', 'dereplicate_mags', 'eggnog',
'busco', 'prodigal', 'helpers', 'partition'
'busco', 'prodigal', 'kraken_helpers', 'partition'
]
49 changes: 33 additions & 16 deletions q2_moshpit/eggnog/_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,25 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import glob
import subprocess
import os
import subprocess
import tempfile
import qiime2.util
import pandas as pd
from typing import Union
from q2_types.per_sample_sequences import ContigSequencesDirFmt, Contigs

import pandas as pd
import qiime2.util

from q2_types.feature_data import FeatureData, BLAST6
from q2_types.feature_data_mag import (
OrthologAnnotationDirFmt, MAGSequencesDirFmt, MAG
)
from q2_types.genome_data import SeedOrthologDirFmt, OrthologFileFmt
from q2_types.per_sample_sequences import (
ContigSequencesDirFmt, MultiMAGSequencesDirFmt, Contigs, MAGs
)
from q2_types.reference_db import (
EggnogRefDirFmt, DiamondDatabaseDirFmt
)
from q2_types.feature_data import DNAFASTAFormat, FeatureData, BLAST6
from q2_types.feature_data_mag import (
OrthologAnnotationDirFmt, MAGSequencesDirFmt, MAG
)
from q2_types.sample_data import SampleData


Expand All @@ -40,6 +44,10 @@ def eggnog_diamond_search(
"moshpit", "partition_feature_data_mags")
elif sequences.type <= SampleData[Contigs]:
partition_method = ctx.get_action("assembly", "partition_contigs")
elif sequences.type <= SampleData[MAGs]:
partition_method = ctx.get_action(
"moshpit", "partition_sample_data_mags"
)
else:
raise NotImplementedError()

Expand All @@ -61,7 +69,11 @@ def eggnog_diamond_search(


def _eggnog_diamond_search(
sequences: Union[ContigSequencesDirFmt, MAGSequencesDirFmt],
sequences: Union[
ContigSequencesDirFmt,
MultiMAGSequencesDirFmt,
MAGSequencesDirFmt
],
diamond_db: DiamondDatabaseDirFmt,
num_cpus: int = 1, db_in_memory: bool = False
) -> (SeedOrthologDirFmt, pd.DataFrame):
Expand All @@ -71,22 +83,27 @@ def _eggnog_diamond_search(

# run analysis
if isinstance(sequences, ContigSequencesDirFmt):
for relpath, obj_path in sequences.sequences.iter_views(
DNAFASTAFormat):
sample_id = str(relpath).rsplit(r'_', 1)[0]
for sample_id, contigs_fp in sequences.sample_dict().items():
_diamond_search_runner(
input_path=obj_path, diamond_db=diamond_db_fp,
input_path=contigs_fp, diamond_db=diamond_db_fp,
sample_label=sample_id, output_loc=temp.name,
num_cpus=num_cpus, db_in_memory=db_in_memory
)
elif isinstance(sequences, MAGSequencesDirFmt):
for mag_fp in glob.glob(f'{sequences.path}/*.fa*'):
sample_id = os.path.splitext(os.path.basename(mag_fp))[0]
for mag_id, mag_fp in sequences.feature_dict().items():
_diamond_search_runner(
input_path=mag_fp, diamond_db=diamond_db_fp,
sample_label=sample_id, output_loc=temp.name,
sample_label=mag_id, output_loc=temp.name,
num_cpus=num_cpus, db_in_memory=db_in_memory
)
elif isinstance(sequences, MultiMAGSequencesDirFmt):
for sample_id, mags in sequences.sample_dict().items():
for mag_id, mag_fp in mags.items():
_diamond_search_runner(
input_path=mag_fp, diamond_db=diamond_db_fp,
sample_label=mag_id, output_loc=temp.name,
num_cpus=num_cpus, db_in_memory=db_in_memory
)

result = SeedOrthologDirFmt()
ortholog_fps = [
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
sample-id,mag-id,filename
sample1,mag1,sample1/194b1aca-9373-4298-ba5c-05b382d1f553.fa
sample1,mag2,sample1/e1ba1d20-b466-4fef-ae19-6bf9c5c63d6f.fa
sample2,mag1,sample2/8f40a3bc-14f0-426b-b2ba-7fb3b1cd0c02.fa
sample2,mag2,sample2/c6bd5123-35cf-473a-bebf-85cf544bcd48.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
>shouldnt-hit-0
ATAAATTAGTTACACTCTCCGTGACTCGAGCTAACCTGAACTCGTAAGAGGGTCCCTTAGCTAGAGACTTGTCTTGACCCAAACTAGTAGTAACTGCAAAACGGAATCTTAACAAAGGTTGCTACTAATGGCACGTCGTCACTTTTCTGAATTCGCATATGGATCCACGAGGGGAAATTGGCTTTGGAGAGATACACATCTGCCGACCAGACGCGGAATCTCAGTGAGTGTCATTCATGGCCCCTACCCT
>shouldnt-hit-1
ATGCGTTCGTCACGAGGTTGCAACGGGCCGCCTTGCTTCTTAGCTCGAGAGATAGTTACGGGTTTTAGTAGTAGGAGCGTATTCCATACCCACAATTCGGAACTGCCCATGAGCCGCCTAGTAAGGATAACCTTATGATAGCTATATGCTCTTCCTACTATCTAGCGGTGCTCAATTTGCCAATTTCCGGGTCCGACTACGAGGCCGGATCGCTGAGGAGTGACAACCCCTGTGCTATACATTACGGTCA
>should-hit-seq8a
TGCAAATGGAGCATGTGCACCTTTTGGAACTATCGCTTTTTTAGCCTGATTCTGTATTTTCGCACCACCAACGCGCTGACCGCGTGGACCTTTGTGTTTTGCTGGCCGAACCTGATTGGCCGCAGCATGAAACATGATGGCCTGTGCCATCATAGCGCGACCTATGTGTTTCATGCGATGATGGCGGAAATGGCGAAAGTGATGGATTTTTGCAGCGCGTGGGTGGAAGATGTGATGATGCCGATGCTGGGCTTTTATCATAACCTGTTTAACCCGCGCACCGGCAACGAAAACATTTGGAACGATAACTGCGAAGTGAACTGGACCGTGGTGATGAACGGCGGCATGATGTTTTTTGTGCTGTGGGATAAACTGATTATGGTGAGCGCGGAATGGAACAACTGGGCGCGCAAAATTGTGAAAGTGTATCGCGATAAAGATAACAAAATTGTGAGCCGCTGGTTTAGCTATCGCGATGGCGTGAGCTTTAACTTTAAAGGCTGCCTGCCGTGCTTTAAAAGCGGCATTATTCATCATTGGAACCATGATTTTGCGGCGTATAAAAACTGCGGCATGCCGGAAACCCGCCCGGATCTGTATTATGGCATGAGCTATGCGCTGTTTTATAGCCTGAAAGAAACCTTTGATGGCTTTCATATTGTGAACCCGGGCGAACTGAACGTGTGGCGCATTAAAAAACGCAAAGAAATGAACATGACCCTGGAACTGCATCATATGCATTGGTATGCGTGCAAATTTCTGGATCCGATTGGCAACGGCAAAATTGCGTGCAAATGCATTGATTATGTGTATCGCGTGCATGAAAGCTGCTGCGTGCGCCATCTGAGCCTGTGGGATTATTATTGCGAAGCGGATAACGAAGAAATGCTGGCGGCGACCGCGCCGAAAAACAAACGCCTGAGCGCGGCGCTGTATGATCGCTGCGGCTTTGATATGGATAAAGGCAGCGAACATGAATGCGAATTTAAACGCAAAGATATTGTGAAATATTTTATGATGATTGAAATGTATACCGGCACCCCGGTGCGCCCGTTTCGCCGCAAATGGCGCCTGTGCCATTGCCATAAAGAAAGCCGCTGGTGGTTTGTGTGCAGCCCGGGCCCGTTTTTTGGCTAT
>should-hit-seq8b
ACCGGCAACGAAAACATTTGGAACGATAACTGCGAAGTGAACTGGACCGTGGTGATGAACGGCGGCATGATGTTTTTTGTGCTGTGGGATAAACTGATTATGGTGAGCGCGGAATGGAACAACTGGGCGCGCAAAATTGTGAAAGTGTATCGCGATAAAGATAACAAAATTGTGAGCCGCTGGTTTAGCTATCGCGATGGCGTGAGCTTTAACTTTAAAGGCTGCCTGCCGTGCTTTAAAAGCGGCATTATTCATCATTGGAACCATGATTTTGCGGCGTATAAAAACTGCGGCATGCCGGAAACCCGCCCGGATCTGTATTATGGCATGAGCTATGCGCTGTTTTATAGCCTGAAAGAAACCTTTGATGGCTTTCATATTGTGAACCCGGGCGAACTGAACGTGTGGCGCATTAAAAAACGCAAAGAAATGAACATGACCCTGGAACTGCATCATATGCATTGGTATGCGTGCAAATTTCTGGATCCGATTGGCAACGGCAAAATTGCGTGCAAATGCATTGATTATGTGTATCGCGTGCATGAAAGCTGCTGCGTGCGCCATCTGAGCCTGTGGGATTATTATTGCGAAGCGGATAACGAAGAAATGCTGGCGGCGACCGCGCCGAAAAACAAACGCCTGAGCGCGGCGCTGTATGATCGCTGCGGCTTTGATATGGATAAAGGCAGCGAACATGAATGCGAATTTAAACGCAAAGATATTGTGAAATATTTTATGATGATTGAAATGTATACCGGCACCCCGGTGCGCCCGTTTCGCCGCAAATGGCGCCTGTGCCATTGCCATAAAGAAAGCCGCTGGTGGTTTGTGTGCAGCCCGGGCCCGTTTTTTGGCTAT
>should-hit-seq8c
TGCAAATGGAGCATGTGCACCTTTTGGAACTATCGCTTTTTTAGCCTGATTCTGTATTTTCGCACCACCAACGCGCTGACCGCGTGGACCTTTGTGTTTTGCTGGCCGAACCTGATTGGCCGCAGGAAACATGATGGCCTGTGCCATCATAGCGCGACCTATGTGTTTCATGCGATGATGGCGGAAATGGCGAAAGTGATGGATTTTTGCAGCGCGTGGGTGGAAGATGTGATGATGCCGATGCTGGGCTTTTATCATAACCTGTTTAACCCGCGCACCGGCAACGAAAACATTTGGAACGATAACTGCGAAGTGAACTGGACCGTGGTGATGAACGGCGGCATGATGTTTTTTGTGCTGTGGGATAAACTGATTATGGTGAGCGCGGAATGGAACAACTGGGCGCGCAAAATTGTGAAAGTGTATCGCGATAAAGATAACAAAATTGTGAGCCGCTGGTTTAGCTATCGCGATGGCGTGAGCTTTAACTTTAAAGGCTGCCTGCCGTGCTTTAAAAGCGGCATTATTCATCATTGGAACCATGATTTTGCGGCGTATAAAAACTGCGGCATGCCGGAAACCCGCCCGGATCTGTATTATGGCATGAGCTATGCGCTGTTTTATAGCCTGAAAGAAACCTTTGATGGCTTTCATATTGTGAACCCGGGCGAACTGAACGTGTGGCGCATTAAAAAACGCAAAGAAATGAACATGACCCTGGAACTGCATCATATGCATTGGTATGCGTGCAAATTTCTGGATCCGATTGGCAAC
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>should-hit-seq-0
AACGATACCGAAGCGGTGATTGCGCATAAAAACAGCCTGAACATGGCGATTCTGATTATGTTTGATGATGCGGTGGCGATTTTTGTGTATAGCAACCTGGTGTGCATTCGCTTTGATGAACATTATGGCTGGGGCAACGAAGCGAACCGCATTCCGCATATTCTGCCGGATATTCGCCGCTGCATGCCGTGGAGCCATAAAGGCGATCATAACCCGATGGGCAACATGAGCGCGCTGTGCAAATGGGCGACCTATCGCCCGATTAAAAGCTGGTGGAGCCCGTATAAAATTGCGCGCGTGTTTTTTTGCATTGTGCATTTTGTGAACACCCCGAAACCGAAATGGGGCATGGAATTTGATAAACGCGAAGGCATGGTGATTACCATGCGCATTTGGAAAAACTGCCTGGGCATGTGCCTGGAAAAAGCGAACATTTGCGAAGGCACCCGCAACTGGCGCATTAAAATGAGCATGTGGGCGGGCAGCTTTATTGCGCTGATGGATT
>should-hit-seq-2
CTGGAAAGCGCGATGGGCGGCCCGCATATGAGCATTACCCCGGAAGAAAACGCGTTTGGCGGCTTTAACTTTTGCACCGGCGTGGTGACCGAACATATTCCGATGGATATTGTGGCGATTTGCTGGGCGCTGTTTAGCTGGGAAAACACCAAATTTGGCACCGTGAAAGATAACTGGCTGTATCGCTGGACCATTTGGTGGTGGTTTACCCTGGATACCGGCGCGAGCGTGGATTGCAAATGGGGCTGCAACCGCCGCGAACGCGCGATTTGGGTGTGCTGGAACCGCTTTATTACCATTAGCTTTCATAAACCGCGCGATTGCAAAACCAGCGCGTATCATACCGGCAAACCGGAAATGTATCTGGATCTGATGTGGATTTTTGTGAGCGTGCATGTGTTTATTATGACCCATCTGGCGGGCGATCATTTTCTGGGCCGCGTGCTGCTGCATCATAACAACGATGAAGATTATGATCGCAACTTTCCGATGCTGGATTTTAACTGCCATTGCTGGATTGCGATTTGGCATCGCGTGTGGTATCCGAGCAAAGTGCATGGCAGCGTGGATGCGCTGTTTGAATGGATTCCGCGCAACGGCGATTTTACCCTGCGCCGCAACGCGGGCGATCCGCGCTATACCAGCGCGAGCATGCGCTTTTTTGCGATGTGCGCGATGGAAATTATGCTGGCGCTGATGGGCGAAAGCATGAAACATGCGCTGGAAAGCGCGATGGGCGGCCCGCATATGAGCATTACCCCGGAAGAAAACGCGTTTGGCGGCTTTAACTTTTGCACCGGCGTGGTGACCGAACATATTCCGATGGATATTGTGGCGATTTGCTGGGCGCTGTTTAGCTGGGAAAACACCAAATTTGGCACCGTGAAAGATAACTGGCTGTATCGCTGGACCATTTGGTGGTGGTTTACCCTGGATACCGGCGCGAGCGTGGATTGCAAATGGGGCTGCAACCGCCGCGAACGCGCGATTTGGGTGTGCTGGAACCGCTTTATTACCATTAGCTTTCATAAACCGCGCGATTGCAAAACCAGCGCGTATCATACCGGCAAACCGGAAATGTATCTGGATCTGATGTGGATTTTTGTGAGCGTGCATGTGTTTATTATGACCCATCTGGCGGGCGATCATTTTCTGGGCCGCGTGCTGCTGCATCATAACAACGATGAAGATTATGATCGCAACTTTCCGATGCTGGATTTTAACTGCCATTGCTGGATTGCGATTTGGCATCGCGTGTGGTATCCGAGCAAAGTGCATGGCAGCGTGGATGCGCTGTTTGAATGGATTCCGCGCAACGGCGATTTTACCCTGCGCCGCAACGCGGGCGATCCGCGCTATACCAGCGCGAGCATGCGCTTTTTTGCGATGTGCGCGATGGAAATTATGCTGGCGCTGATGGGCGAAAGCATGAAACATGCGCATGGC
>shouldnt-hit
GCATTGAAGCTTTCTGACTGTTAAATAGTGTAGGCCCCAGCTGTTGATTTTTTAGACTAGAGGTGGGGCACTGTCCCGACACTTCTGGGTGTCCGCCACTGAGATGAACCCCACCGGGTCAAAGGATGTCAACGAAGTTCATTCAAGCTCACACGTCCAAGACCAGTGGTCAGGCTCTCTGTCATGCACCGTCCGCTTTGCAGCCGCGTCTCAGCGCCTCCCTACGCTCGAGATTGTCTGGCGCTCGGGTCATGGC
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
>shouldnt-hit-0
ATAAATTAGTTACACTCTCCGTGACTCGAGCTAACCTGAACTCGTAAGAGGGTCCCTTAGCTAGAGACTTGTCTTGACCCAAACTAGTAGTAACTGCAAAACGGAATCTTAACAAAGGTTGCTACTAATGGCACGTCGTCACTTTTCTGAATTCGCATATGGATCCACGAGGGGAAATTGGCTTTGGAGAGATACACATCTGCCGACCAGACGCGGAATCTCAGTGAGTGTCATTCATGGCCCCTACCCT
>shouldnt-hit-1
ATGCGTTCGTCACGAGGTTGCAACGGGCCGCCTTGCTTCTTAGCTCGAGAGATAGTTACGGGTTTTAGTAGTAGGAGCGTATTCCATACCCACAATTCGGAACTGCCCATGAGCCGCCTAGTAAGGATAACCTTATGATAGCTATATGCTCTTCCTACTATCTAGCGGTGCTCAATTTGCCAATTTCCGGGTCCGACTACGAGGCCGGATCGCTGAGGAGTGACAACCCCTGTGCTATACATTACGGTCA
>should-hit-seq8a
TGCAAATGGAGCATGTGCACCTTTTGGAACTATCGCTTTTTTAGCCTGATTCTGTATTTTCGCACCACCAACGCGCTGACCGCGTGGACCTTTGTGTTTTGCTGGCCGAACCTGATTGGCCGCAGCATGAAACATGATGGCCTGTGCCATCATAGCGCGACCTATGTGTTTCATGCGATGATGGCGGAAATGGCGAAAGTGATGGATTTTTGCAGCGCGTGGGTGGAAGATGTGATGATGCCGATGCTGGGCTTTTATCATAACCTGTTTAACCCGCGCACCGGCAACGAAAACATTTGGAACGATAACTGCGAAGTGAACTGGACCGTGGTGATGAACGGCGGCATGATGTTTTTTGTGCTGTGGGATAAACTGATTATGGTGAGCGCGGAATGGAACAACTGGGCGCGCAAAATTGTGAAAGTGTATCGCGATAAAGATAACAAAATTGTGAGCCGCTGGTTTAGCTATCGCGATGGCGTGAGCTTTAACTTTAAAGGCTGCCTGCCGTGCTTTAAAAGCGGCATTATTCATCATTGGAACCATGATTTTGCGGCGTATAAAAACTGCGGCATGCCGGAAACCCGCCCGGATCTGTATTATGGCATGAGCTATGCGCTGTTTTATAGCCTGAAAGAAACCTTTGATGGCTTTCATATTGTGAACCCGGGCGAACTGAACGTGTGGCGCATTAAAAAACGCAAAGAAATGAACATGACCCTGGAACTGCATCATATGCATTGGTATGCGTGCAAATTTCTGGATCCGATTGGCAACGGCAAAATTGCGTGCAAATGCATTGATTATGTGTATCGCGTGCATGAAAGCTGCTGCGTGCGCCATCTGAGCCTGTGGGATTATTATTGCGAAGCGGATAACGAAGAAATGCTGGCGGCGACCGCGCCGAAAAACAAACGCCTGAGCGCGGCGCTGTATGATCGCTGCGGCTTTGATATGGATAAAGGCAGCGAACATGAATGCGAATTTAAACGCAAAGATATTGTGAAATATTTTATGATGATTGAAATGTATACCGGCACCCCGGTGCGCCCGTTTCGCCGCAAATGGCGCCTGTGCCATTGCCATAAAGAAAGCCGCTGGTGGTTTGTGTGCAGCCCGGGCCCGTTTTTTGGCTAT
>should-hit-seq8b
ACCGGCAACGAAAACATTTGGAACGATAACTGCGAAGTGAACTGGACCGTGGTGATGAACGGCGGCATGATGTTTTTTGTGCTGTGGGATAAACTGATTATGGTGAGCGCGGAATGGAACAACTGGGCGCGCAAAATTGTGAAAGTGTATCGCGATAAAGATAACAAAATTGTGAGCCGCTGGTTTAGCTATCGCGATGGCGTGAGCTTTAACTTTAAAGGCTGCCTGCCGTGCTTTAAAAGCGGCATTATTCATCATTGGAACCATGATTTTGCGGCGTATAAAAACTGCGGCATGCCGGAAACCCGCCCGGATCTGTATTATGGCATGAGCTATGCGCTGTTTTATAGCCTGAAAGAAACCTTTGATGGCTTTCATATTGTGAACCCGGGCGAACTGAACGTGTGGCGCATTAAAAAACGCAAAGAAATGAACATGACCCTGGAACTGCATCATATGCATTGGTATGCGTGCAAATTTCTGGATCCGATTGGCAACGGCAAAATTGCGTGCAAATGCATTGATTATGTGTATCGCGTGCATGAAAGCTGCTGCGTGCGCCATCTGAGCCTGTGGGATTATTATTGCGAAGCGGATAACGAAGAAATGCTGGCGGCGACCGCGCCGAAAAACAAACGCCTGAGCGCGGCGCTGTATGATCGCTGCGGCTTTGATATGGATAAAGGCAGCGAACATGAATGCGAATTTAAACGCAAAGATATTGTGAAATATTTTATGATGATTGAAATGTATACCGGCACCCCGGTGCGCCCGTTTCGCCGCAAATGGCGCCTGTGCCATTGCCATAAAGAAAGCCGCTGGTGGTTTGTGTGCAGCCCGGGCCCGTTTTTTGGCTAT
>should-hit-seq8c
TGCAAATGGAGCATGTGCACCTTTTGGAACTATCGCTTTTTTAGCCTGATTCTGTATTTTCGCACCACCAACGCGCTGACCGCGTGGACCTTTGTGTTTTGCTGGCCGAACCTGATTGGCCGCAGGAAACATGATGGCCTGTGCCATCATAGCGCGACCTATGTGTTTCATGCGATGATGGCGGAAATGGCGAAAGTGATGGATTTTTGCAGCGCGTGGGTGGAAGATGTGATGATGCCGATGCTGGGCTTTTATCATAACCTGTTTAACCCGCGCACCGGCAACGAAAACATTTGGAACGATAACTGCGAAGTGAACTGGACCGTGGTGATGAACGGCGGCATGATGTTTTTTGTGCTGTGGGATAAACTGATTATGGTGAGCGCGGAATGGAACAACTGGGCGCGCAAAATTGTGAAAGTGTATCGCGATAAAGATAACAAAATTGTGAGCCGCTGGTTTAGCTATCGCGATGGCGTGAGCTTTAACTTTAAAGGCTGCCTGCCGTGCTTTAAAAGCGGCATTATTCATCATTGGAACCATGATTTTGCGGCGTATAAAAACTGCGGCATGCCGGAAACCCGCCCGGATCTGTATTATGGCATGAGCTATGCGCTGTTTTATAGCCTGAAAGAAACCTTTGATGGCTTTCATATTGTGAACCCGGGCGAACTGAACGTGTGGCGCATTAAAAAACGCAAAGAAATGAACATGACCCTGGAACTGCATCATATGCATTGGTATGCGTGCAAATTTCTGGATCCGATTGGCAAC
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>should-hit-seq-0
AACGATACCGAAGCGGTGATTGCGCATAAAAACAGCCTGAACATGGCGATTCTGATTATGTTTGATGATGCGGTGGCGATTTTTGTGTATAGCAACCTGGTGTGCATTCGCTTTGATGAACATTATGGCTGGGGCAACGAAGCGAACCGCATTCCGCATATTCTGCCGGATATTCGCCGCTGCATGCCGTGGAGCCATAAAGGCGATCATAACCCGATGGGCAACATGAGCGCGCTGTGCAAATGGGCGACCTATCGCCCGATTAAAAGCTGGTGGAGCCCGTATAAAATTGCGCGCGTGTTTTTTTGCATTGTGCATTTTGTGAACACCCCGAAACCGAAATGGGGCATGGAATTTGATAAACGCGAAGGCATGGTGATTACCATGCGCATTTGGAAAAACTGCCTGGGCATGTGCCTGGAAAAAGCGAACATTTGCGAAGGCACCCGCAACTGGCGCATTAAAATGAGCATGTGGGCGGGCAGCTTTATTGCGCTGATGGATT
>should-hit-seq-2
CTGGAAAGCGCGATGGGCGGCCCGCATATGAGCATTACCCCGGAAGAAAACGCGTTTGGCGGCTTTAACTTTTGCACCGGCGTGGTGACCGAACATATTCCGATGGATATTGTGGCGATTTGCTGGGCGCTGTTTAGCTGGGAAAACACCAAATTTGGCACCGTGAAAGATAACTGGCTGTATCGCTGGACCATTTGGTGGTGGTTTACCCTGGATACCGGCGCGAGCGTGGATTGCAAATGGGGCTGCAACCGCCGCGAACGCGCGATTTGGGTGTGCTGGAACCGCTTTATTACCATTAGCTTTCATAAACCGCGCGATTGCAAAACCAGCGCGTATCATACCGGCAAACCGGAAATGTATCTGGATCTGATGTGGATTTTTGTGAGCGTGCATGTGTTTATTATGACCCATCTGGCGGGCGATCATTTTCTGGGCCGCGTGCTGCTGCATCATAACAACGATGAAGATTATGATCGCAACTTTCCGATGCTGGATTTTAACTGCCATTGCTGGATTGCGATTTGGCATCGCGTGTGGTATCCGAGCAAAGTGCATGGCAGCGTGGATGCGCTGTTTGAATGGATTCCGCGCAACGGCGATTTTACCCTGCGCCGCAACGCGGGCGATCCGCGCTATACCAGCGCGAGCATGCGCTTTTTTGCGATGTGCGCGATGGAAATTATGCTGGCGCTGATGGGCGAAAGCATGAAACATGCGCTGGAAAGCGCGATGGGCGGCCCGCATATGAGCATTACCCCGGAAGAAAACGCGTTTGGCGGCTTTAACTTTTGCACCGGCGTGGTGACCGAACATATTCCGATGGATATTGTGGCGATTTGCTGGGCGCTGTTTAGCTGGGAAAACACCAAATTTGGCACCGTGAAAGATAACTGGCTGTATCGCTGGACCATTTGGTGGTGGTTTACCCTGGATACCGGCGCGAGCGTGGATTGCAAATGGGGCTGCAACCGCCGCGAACGCGCGATTTGGGTGTGCTGGAACCGCTTTATTACCATTAGCTTTCATAAACCGCGCGATTGCAAAACCAGCGCGTATCATACCGGCAAACCGGAAATGTATCTGGATCTGATGTGGATTTTTGTGAGCGTGCATGTGTTTATTATGACCCATCTGGCGGGCGATCATTTTCTGGGCCGCGTGCTGCTGCATCATAACAACGATGAAGATTATGATCGCAACTTTCCGATGCTGGATTTTAACTGCCATTGCTGGATTGCGATTTGGCATCGCGTGTGGTATCCGAGCAAAGTGCATGGCAGCGTGGATGCGCTGTTTGAATGGATTCCGCGCAACGGCGATTTTACCCTGCGCCGCAACGCGGGCGATCCGCGCTATACCAGCGCGAGCATGCGCTTTTTTGCGATGTGCGCGATGGAAATTATGCTGGCGCTGATGGGCGAAAGCATGAAACATGCGCATGGC
>shouldnt-hit
GCATTGAAGCTTTCTGACTGTTAAATAGTGTAGGCCCCAGCTGTTGATTTTTTAGACTAGAGGTGGGGCACTGTCCCGACACTTCTGGGTGTCCGCCACTGAGATGAACCCCACCGGGTCAAAGGATGTCAACGAAGTTCATTCAAGCTCACACGTCCAAGACCAGTGGTCAGGCTCTCTGTCATGCACCGTCCGCTTTGCAGCCGCGTCTCAGCGCCTCCCTACGCTCGAGATTGTCTGGCGCTCGGGTCATGGC
60 changes: 56 additions & 4 deletions q2_moshpit/eggnog/tests/test_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@
import pandas.testing as pdt
from qiime2.plugin.testing import TestPluginBase
from qiime2.sdk.parallel_config import ParallelConfig

from q2_moshpit.eggnog import _eggnog_diamond_search, _eggnog_annotate
from q2_types.feature_data_mag import MAGSequencesDirFmt
from .._method import _eggnog_diamond_search, _eggnog_annotate
from q2_types.reference_db import (
DiamondDatabaseDirFmt, EggnogRefDirFmt
)
from q2_types.per_sample_sequences import ContigSequencesDirFmt
from q2_types.per_sample_sequences import (
ContigSequencesDirFmt, MultiMAGSequencesDirFmt
)
from q2_types.genome_data import SeedOrthologDirFmt, OrthologFileFmt
from q2_types.feature_data_mag import OrthologAnnotationDirFmt

Expand All @@ -36,6 +39,8 @@ def setUp(self):
self.plugin.pipelines["eggnog_diamond_search"]
self._eggnog_diamond_search = \
self.plugin.methods["_eggnog_diamond_search"]
self._eggnog_annotate = \
self.plugin.methods["_eggnog_annotate"]

def test_good_small_search_contigs(self):
contigs = qiime2.Artifact.import_data(
Expand All @@ -53,7 +58,7 @@ def test_good_small_search_contigs(self):

pdt.assert_frame_equal(obs, exp)

def test_good_small_search_mags(self):
def test_good_small_search_mags_derep(self):
mags = qiime2.Artifact.import_data(
'FeatureData[MAG]',
self.get_data_path('mag-sequences')
Expand All @@ -72,6 +77,31 @@ def test_good_small_search_mags(self):

pdt.assert_frame_equal(obs, exp)

def test_good_small_search_mags(self):
mags = qiime2.Artifact.import_data(
'SampleData[MAGs]',
self.get_data_path('mag-sequences-per-sample')
).view(MultiMAGSequencesDirFmt)

_, obs = _eggnog_diamond_search(
sequences=mags,
diamond_db=self.diamond_db
)
exp = pd.DataFrame(
{
'8': [3.0, 3.0, 0.0, 0.0],
'0': [0.0, 0.0, 1.0, 1.0],
'2': [0.0, 0.0, 1.0, 1.0]
},
index=['194b1aca-9373-4298-ba5c-05b382d1f553',
'8f40a3bc-14f0-426b-b2ba-7fb3b1cd0c02',
'c6bd5123-35cf-473a-bebf-85cf544bcd48',
'e1ba1d20-b466-4fef-ae19-6bf9c5c63d6f']
)
exp.columns.name = 'sseqid'

pdt.assert_frame_equal(obs, exp)

def test_eggnog_search_parallel_contigs(self):
contigs = qiime2.Artifact.import_data(
'SampleData[Contigs]',
Expand All @@ -94,7 +124,7 @@ def test_eggnog_search_parallel_contigs(self):

pdt.assert_frame_equal(parallel, single)

def test_eggnog_search_parallel_mags(self):
def test_eggnog_search_parallel_mags_derep(self):
mags = qiime2.Artifact.import_data(
'FeatureData[MAG]',
self.get_data_path('mag-sequences')
Expand All @@ -116,6 +146,28 @@ def test_eggnog_search_parallel_mags(self):

pdt.assert_frame_equal(parallel, single)

def test_eggnog_search_parallel_mags(self):
mags = qiime2.Artifact.import_data(
'SampleData[MAGs]',
self.get_data_path('mag-sequences-per-sample')
)

with ParallelConfig():
_, parallel = self.eggnog_diamond_search.parallel(
mags,
self.diamond_db_artifact
)._result()

_, single = self._eggnog_diamond_search(
sequences=mags,
diamond_db=self.diamond_db_artifact
)

parallel = parallel.view(pd.DataFrame)
single = single.view(pd.DataFrame)

pdt.assert_frame_equal(parallel, single)


class TestAnnotate(TestPluginBase):
package = 'q2_moshpit.eggnog.tests'
Expand Down
Loading

0 comments on commit 06a920b

Please sign in to comment.