generated from bokulich-lab/q2-plugin-template
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ENH: Action to annotate MAGs and contigs with AMRFinderPlus (#88)
- Loading branch information
1 parent
38e48b5
commit 41bae5e
Showing
10 changed files
with
563 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
import os | ||
import shutil | ||
import tempfile | ||
from typing import Union | ||
|
||
import pandas as pd | ||
from q2_types.genome_data import GenesDirectoryFormat | ||
from q2_types.per_sample_sequences import ContigSequencesDirFmt, MultiMAGSequencesDirFmt | ||
|
||
from q2_amr.amrfinderplus.types import ( | ||
AMRFinderPlusAnnotationsDirFmt, | ||
AMRFinderPlusDatabaseDirFmt, | ||
) | ||
from q2_amr.amrfinderplus.utils import run_amrfinderplus_n | ||
from q2_amr.card.utils import create_count_table, read_in_txt | ||
|
||
|
||
def annotate_sample_data_amrfinderplus( | ||
sequences: Union[MultiMAGSequencesDirFmt, ContigSequencesDirFmt], | ||
amrfinderplus_db: AMRFinderPlusDatabaseDirFmt, | ||
organism: str = None, | ||
plus: bool = False, | ||
report_all_equal: bool = False, | ||
ident_min: float = None, | ||
curated_ident: bool = False, | ||
coverage_min: float = 0.5, | ||
translation_table: str = "11", | ||
threads: int = None, | ||
) -> ( | ||
AMRFinderPlusAnnotationsDirFmt, | ||
AMRFinderPlusAnnotationsDirFmt, | ||
GenesDirectoryFormat, | ||
pd.DataFrame, | ||
): | ||
annotations = AMRFinderPlusAnnotationsDirFmt() | ||
mutations = AMRFinderPlusAnnotationsDirFmt() | ||
genes = GenesDirectoryFormat() | ||
frequency_list = [] | ||
|
||
# Create list of paths to all mags or contigs | ||
if isinstance(sequences, MultiMAGSequencesDirFmt): | ||
manifest = sequences.manifest.view(pd.DataFrame) | ||
files = manifest["filename"] | ||
else: | ||
files = [ | ||
os.path.join(str(sequences), file) for file in os.listdir(str(sequences)) | ||
] | ||
|
||
with tempfile.TemporaryDirectory() as tmp: | ||
# Iterate over paths of MAGs or contigs | ||
for file in files: | ||
# Set sample and MAG IDs | ||
if isinstance(sequences, MultiMAGSequencesDirFmt): | ||
index_value = manifest.query("filename == @file").index[0] | ||
sample_id = index_value[0] | ||
mag_id = index_value[1] | ||
else: | ||
sample_id = os.path.splitext(os.path.basename(file))[0][:-8] | ||
mag_id = "" | ||
|
||
# Run amrfinderplus | ||
run_amrfinderplus_n( | ||
working_dir=tmp, | ||
amrfinderplus_db=amrfinderplus_db, | ||
dna_sequences=file, | ||
protein_sequences=None, | ||
gff=None, | ||
organism=organism, | ||
plus=plus, | ||
report_all_equal=report_all_equal, | ||
ident_min=ident_min, | ||
curated_ident=curated_ident, | ||
coverage_min=coverage_min, | ||
translation_table=translation_table, | ||
threads=threads, | ||
) | ||
|
||
# Create frequency dataframe and append it to list | ||
frequency_df = read_in_txt( | ||
path=os.path.join(tmp, "amr_annotations.tsv"), | ||
samp_bin_name=str(os.path.join(sample_id, mag_id)), | ||
data_type="mags", | ||
colname="Gene symbol", | ||
) | ||
frequency_list.append(frequency_df) | ||
|
||
# Move mutations file. If it is not created, create an empty mutations file | ||
des_path_mutations = os.path.join( | ||
str(mutations), | ||
sample_id, | ||
f"{mag_id + '_' if mag_id else ''}amr_mutations.tsv", | ||
) | ||
os.makedirs(os.path.dirname(des_path_mutations), exist_ok=True) | ||
if organism: | ||
shutil.move(os.path.join(tmp, "amr_mutations.tsv"), des_path_mutations) | ||
else: | ||
with open(des_path_mutations, "w"): | ||
pass | ||
|
||
# Move annotations file | ||
des_path_annotations = os.path.join( | ||
str(annotations), | ||
sample_id, | ||
f"{mag_id + '_' if mag_id else ''}amr_annotations.tsv", | ||
) | ||
os.makedirs(os.path.dirname(des_path_annotations), exist_ok=True) | ||
shutil.move(os.path.join(tmp, "amr_annotations.tsv"), des_path_annotations) | ||
|
||
# Move genes file | ||
shutil.move( | ||
os.path.join(tmp, "amr_genes.fasta"), | ||
os.path.join( | ||
str(genes), f"{mag_id if mag_id else sample_id}_amr_genes.fasta" | ||
), | ||
) | ||
|
||
feature_table = create_count_table(df_list=frequency_list) | ||
return ( | ||
annotations, | ||
mutations, | ||
genes, | ||
feature_table, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import os | ||
from unittest.mock import MagicMock, patch | ||
|
||
from q2_types.per_sample_sequences import ContigSequencesDirFmt, MultiMAGSequencesDirFmt | ||
from qiime2.plugin.testing import TestPluginBase | ||
|
||
from q2_amr.amrfinderplus.sample_data import annotate_sample_data_amrfinderplus | ||
from q2_amr.amrfinderplus.types import AMRFinderPlusDatabaseDirFmt | ||
|
||
|
||
class TestAnnotateSampleDataAMRFinderPlus(TestPluginBase): | ||
package = "q2_amr.amrfinderplus.tests" | ||
|
||
def mock_run_amrfinderplus_n( | ||
self, | ||
working_dir, | ||
amrfinderplus_db, | ||
dna_sequences, | ||
protein_sequences, | ||
gff, | ||
organism, | ||
plus, | ||
report_all_equal, | ||
ident_min, | ||
curated_ident, | ||
coverage_min, | ||
translation_table, | ||
threads, | ||
): | ||
with open(os.path.join(working_dir, "amr_annotations.tsv"), "w"): | ||
pass | ||
if organism: | ||
with open(os.path.join(working_dir, "amr_mutations.tsv"), "w"): | ||
pass | ||
if dna_sequences: | ||
with open(os.path.join(working_dir, "amr_genes.fasta"), "w"): | ||
pass | ||
|
||
files_contigs = [ | ||
"amr_annotations.tsv", | ||
"amr_mutations.tsv", | ||
"sample1_amr_genes.fasta", | ||
] | ||
|
||
files_mags = [ | ||
"mag1_amr_annotations.tsv", | ||
"mag1_amr_mutations.tsv", | ||
"mag1_amr_genes.fasta", | ||
] | ||
|
||
def test_annotate_sample_data_amrfinderplus_mags(self): | ||
sequences = MultiMAGSequencesDirFmt() | ||
with open(os.path.join(str(sequences), "MANIFEST"), "w") as file: | ||
file.write("sample-id,mag-id,filename\nsample1,mag1,sample1/mag1.fasta\n") | ||
self._helper(sequences=sequences, organism=None, files=self.files_mags) | ||
|
||
def test_annotate_sample_data_amrfinderplus_mags_organism(self): | ||
sequences = MultiMAGSequencesDirFmt() | ||
with open(os.path.join(str(sequences), "MANIFEST"), "w") as file: | ||
file.write("sample-id,mag-id,filename\nsample1,mag1,sample1/mag1.fasta\n") | ||
self._helper(sequences, "Escherichia", files=self.files_mags) | ||
|
||
def test_annotate_sample_data_amrfinderplus_contigs(self): | ||
sequences = ContigSequencesDirFmt() | ||
with open(os.path.join(str(sequences), "sample1_contigs.fasta"), "w"): | ||
pass | ||
self._helper(sequences=sequences, organism=None, files=self.files_contigs) | ||
|
||
def test_annotate_sample_data_amrfinderplus_contigs_organism(self): | ||
sequences = ContigSequencesDirFmt() | ||
with open(os.path.join(str(sequences), "sample1_contigs.fasta"), "w"): | ||
pass | ||
self._helper( | ||
sequences=sequences, organism="Escherichia", files=self.files_contigs | ||
) | ||
|
||
def _helper(self, sequences, organism, files): | ||
amrfinderplus_db = AMRFinderPlusDatabaseDirFmt() | ||
mock_create_count_table = MagicMock() | ||
mock_read_in_txt = MagicMock() | ||
with patch( | ||
"q2_amr.amrfinderplus.sample_data.run_amrfinderplus_n", | ||
side_effect=self.mock_run_amrfinderplus_n, | ||
), patch( | ||
"q2_amr.amrfinderplus.sample_data.read_in_txt", mock_read_in_txt | ||
), patch( | ||
"q2_amr.amrfinderplus.sample_data.create_count_table", | ||
mock_create_count_table, | ||
): | ||
result = annotate_sample_data_amrfinderplus( | ||
sequences=sequences, | ||
amrfinderplus_db=amrfinderplus_db, | ||
organism=organism, | ||
) | ||
self.assertTrue( | ||
os.path.exists(os.path.join(str(result[0]), "sample1", files[0])) | ||
) | ||
self.assertTrue( | ||
os.path.exists(os.path.join(str(result[1]), "sample1", files[1])) | ||
) | ||
self.assertTrue(os.path.exists(os.path.join(str(result[2]), files[2]))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
from unittest.mock import patch | ||
|
||
from qiime2.plugin.testing import TestPluginBase | ||
|
||
from q2_amr.amrfinderplus.utils import run_amrfinderplus_n | ||
|
||
|
||
class TestAnnotateMagsCard(TestPluginBase): | ||
package = "q2_amr.amrfinderplus.tests" | ||
|
||
@patch("q2_amr.amrfinderplus.utils.run_command") | ||
def test_run_amrfinderplus_n(self, mock_run_command): | ||
run_amrfinderplus_n( | ||
working_dir="path_dir", | ||
amrfinderplus_db="amrfinderplus_db", | ||
dna_sequences="dna_sequences", | ||
protein_sequences="protein_sequences", | ||
gff="gff", | ||
organism="Escherichia", | ||
plus=True, | ||
report_all_equal=True, | ||
ident_min=1, | ||
curated_ident=False, | ||
coverage_min=1, | ||
translation_table="11", | ||
threads=4, | ||
) | ||
mock_run_command.assert_called_once_with( | ||
[ | ||
"amrfinder", | ||
"--database", | ||
"amrfinderplus_db", | ||
"-o", | ||
"path_dir/amr_annotations.tsv", | ||
"--print_node", | ||
"-n", | ||
"dna_sequences", | ||
"--nucleotide_output", | ||
"path_dir/amr_genes.fasta", | ||
"-p", | ||
"protein_sequences", | ||
"--protein_output", | ||
"path_dir/amr_proteins.fasta", | ||
"-g", | ||
"gff", | ||
"--threads", | ||
"4", | ||
"--organism", | ||
"Escherichia", | ||
"--mutation_all", | ||
"path_dir/amr_mutations.tsv", | ||
"--plus", | ||
"--report_all_equal", | ||
"--ident_min", | ||
"1", | ||
"--coverage_min", | ||
"1", | ||
"--translation_table", | ||
"11", | ||
], | ||
"path_dir", | ||
verbose=True, | ||
) | ||
|
||
@patch("q2_amr.amrfinderplus.utils.run_command") | ||
def test_run_amrfinderplus_n_minimal(self, mock_run_command): | ||
run_amrfinderplus_n( | ||
working_dir="path_dir", | ||
amrfinderplus_db="amrfinderplus_db", | ||
dna_sequences=None, | ||
protein_sequences=None, | ||
gff=None, | ||
organism=None, | ||
plus=False, | ||
report_all_equal=False, | ||
ident_min=None, | ||
curated_ident=True, | ||
coverage_min=None, | ||
translation_table=None, | ||
threads=None, | ||
) | ||
mock_run_command.assert_called_once_with( | ||
[ | ||
"amrfinder", | ||
"--database", | ||
"amrfinderplus_db", | ||
"-o", | ||
"path_dir/amr_annotations.tsv", | ||
"--print_node", | ||
"--ident_min", | ||
"-1", | ||
], | ||
"path_dir", | ||
verbose=True, | ||
) |
Oops, something went wrong.