Skip to content

Commit

Permalink
ENH: eggnog_hmmer_search outputs gff files now
Browse files Browse the repository at this point in the history
  • Loading branch information
DorielaGrabocka committed Dec 13, 2024
1 parent e435d5d commit 0c7c6dd
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 24 deletions.
2 changes: 1 addition & 1 deletion q2_moshpit/eggnog/orthologs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ def _run_eggnog_search_pipeline(
partition_method = ctx.get_action(plugin, action_name)
_eggnog_search = ctx.get_action("moshpit", search_action)
collate_hits = ctx.get_action("types", "collate_orthologs")
collate_loci = ctx.get_action("types", "collate_loci")
_eggnog_feature_table = ctx.get_action("moshpit", "_eggnog_feature_table")
collate_loci = ctx.get_action("types", "collate_loci")
(partitioned_sequences,) = partition_method(sequences, num_partitions)

hits = []
Expand Down
25 changes: 13 additions & 12 deletions q2_moshpit/eggnog/orthologs/hmmer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from q2_moshpit.eggnog.types import EggnogHmmerIdmapDirectoryFmt
from q2_types.feature_data_mag import MAGSequencesDirFmt
from q2_types.genome_data import (
ProteinsDirectoryFormat, SeedOrthologDirFmt
ProteinsDirectoryFormat, SeedOrthologDirFmt, LociDirectoryFormat
)
from q2_types.per_sample_sequences import (
ContigSequencesDirFmt, MultiMAGSequencesDirFmt
Expand All @@ -38,7 +38,7 @@ def _eggnog_hmmer_search(
seed_alignments: ProteinsDirectoryFormat,
num_cpus: int = 1,
db_in_memory: bool = False
) -> (SeedOrthologDirFmt, pd.DataFrame):
) -> (SeedOrthologDirFmt, pd.DataFrame, LociDirectoryFormat):
with tempfile.TemporaryDirectory() as output_loc:
taxon_id = os.listdir(idmap.path)[0].split(".")[0]
tmp_subdir = f"{output_loc}/hmmer/{taxon_id}"
Expand All @@ -54,17 +54,18 @@ def _eggnog_hmmer_search(
'--genepred', 'prodigal' # default incompatible with HMMER
]
)
result, ft = _eggnog_search(sequences, search_runner, output_loc)
return result, ft
result, ft, loci = _eggnog_search(sequences, search_runner, output_loc)
return result, ft, loci


def eggnog_hmmer_search(
ctx, sequences, pressed_hmm_db, idmap, seed_alignments,
num_cpus=1, db_in_memory=False, num_partitions=None
ctx, sequences, pressed_hmm_db, idmap, seed_alignments,
num_cpus=1, db_in_memory=False, num_partitions=None
):
collated_hits, collated_tables = _run_eggnog_search_pipeline(
ctx, sequences, [idmap, pressed_hmm_db, seed_alignments],
num_cpus, db_in_memory, num_partitions,
"_eggnog_hmmer_search"
)
return collated_hits, collated_tables
collated_hits, collated_tables, collated_loci = (
_run_eggnog_search_pipeline(
ctx, sequences, [idmap, pressed_hmm_db, seed_alignments],
num_cpus, db_in_memory, num_partitions,
"_eggnog_hmmer_search"
))
return collated_hits, collated_tables, collated_loci
18 changes: 10 additions & 8 deletions q2_moshpit/eggnog/tests/test_orthologs.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,11 @@ def setUp(self):
def test_eggnog_hmmer_search_pipeline(self):
mock_action = MagicMock(side_effect=[
lambda sequences, num_partitions: ({"mag1": {}, "mag2": {}},),
lambda seq, pressed, idmap, fastas, num_cpus, db_in_memory: (0, 0),
lambda seq, pressed, idmap, fastas, num_cpus, db_in_memory:
(0, 0, 0),
lambda hits: ("collated_hits",),
lambda collated_hits: ("collated_tables",),
lambda collated_loci: ("collated_loci",),
])
mock_ctx = MagicMock(get_action=mock_action)
obs = eggnog_hmmer_search(
Expand All @@ -85,7 +87,7 @@ def test_eggnog_hmmer_search_pipeline(self):
idmap=self.idmap_artifact,
seed_alignments=self.fastas_artifact
)
exp = ("collated_hits", "collated_tables")
exp = ("collated_hits", "collated_tables", "collated_loci")
self.assertTupleEqual(obs, exp)

def test_symlink_files_to_target_dir(self):
Expand Down Expand Up @@ -114,8 +116,8 @@ def test_eggnog_hmmer_search(
self, mock_eggnog_search, mock_symlink, mock_tmpdir, mock_makedirs
):
mock_tmpdir.return_value.__enter__.return_value = "tmp"
mock_eggnog_search.return_value = (0, 1)
result, ft = _eggnog_hmmer_search(
mock_eggnog_search.return_value = (0, 1, 2)
result, ft, loci = _eggnog_hmmer_search(
sequences=self.mags,
idmap=self.idmap,
pressed_hmm_db=self.pressed_hmm,
Expand All @@ -129,7 +131,7 @@ def test_eggnog_hmmer_search(
ANY, # partial() method not patchable or comparable
"tmp"
)
self.assertTupleEqual((result, ft), (0, 1))
self.assertTupleEqual((result, ft, loci), (0, 1, 2))

def test_eggnog_search_mags(self):
sequences = MultiMAGSequencesDirFmt(
Expand All @@ -138,7 +140,7 @@ def test_eggnog_search_mags(self):
output_loc = self.get_data_path('hits')
search_runner = MagicMock()

result, ft = _eggnog_search(sequences, search_runner, output_loc)
result, ft, _ = _eggnog_search(sequences, search_runner, output_loc)
result.validate()
self.assertIsInstance(ft, pd.DataFrame)

Expand All @@ -155,7 +157,7 @@ def test_eggnog_search_contigs(self):
output_loc = self.get_data_path('hits')
search_runner = MagicMock()

result, ft = _eggnog_search(sequences, search_runner, output_loc)
result, ft, _ = _eggnog_search(sequences, search_runner, output_loc)
result.validate()
self.assertIsInstance(ft, pd.DataFrame)

Expand All @@ -171,7 +173,7 @@ def test_eggnog_search_mags_derep(self):
output_loc = self.get_data_path('hits')
search_runner = MagicMock()

result, ft = _eggnog_search(sequences, search_runner, output_loc)
result, ft, _ = _eggnog_search(sequences, search_runner, output_loc)
result.validate()
self.assertIsInstance(ft, pd.DataFrame)

Expand Down
9 changes: 6 additions & 3 deletions q2_moshpit/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,7 +736,8 @@
},
outputs=[
('eggnog_hits', SampleData[Orthologs]),
('table', FeatureTable[Frequency])
('table', FeatureTable[Frequency]),
('loci', GenomeData[Loci]),
],
name='Run eggNOG search using HMMER aligner',
description="This method uses HMMER to find possible target sequences "
Expand Down Expand Up @@ -824,12 +825,14 @@
},
outputs=[
('eggnog_hits', SampleData[Orthologs]),
('table', FeatureTable[Frequency])
('table', FeatureTable[Frequency]),
('loci', GenomeData[Loci])
],
output_descriptions={
'eggnog_hits': 'BLAST6-like table(s) describing the identified '
'orthologs. One table per sample or MAG in the input.',
'table': 'Feature table with counts of orthologs per sample/MAG.'
'table': 'Feature table with counts of orthologs per sample/MAG.',
'loci': 'Loci of the identified orthologs.'
},
name='Run eggNOG search using HMMER aligner',
description='This method performs the steps by which we find our '
Expand Down

0 comments on commit 0c7c6dd

Please sign in to comment.