Skip to content

Commit

Permalink
ENH: Action to annotate MAGs and contigs with AMRFinderPlus (#88)
Browse files Browse the repository at this point in the history
  • Loading branch information
VinzentRisch authored Jul 17, 2024
1 parent 38e48b5 commit 41bae5e
Show file tree
Hide file tree
Showing 10 changed files with 563 additions and 19 deletions.
123 changes: 123 additions & 0 deletions q2_amr/amrfinderplus/sample_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import os
import shutil
import tempfile
from typing import Union

import pandas as pd
from q2_types.genome_data import GenesDirectoryFormat
from q2_types.per_sample_sequences import ContigSequencesDirFmt, MultiMAGSequencesDirFmt

from q2_amr.amrfinderplus.types import (
AMRFinderPlusAnnotationsDirFmt,
AMRFinderPlusDatabaseDirFmt,
)
from q2_amr.amrfinderplus.utils import run_amrfinderplus_n
from q2_amr.card.utils import create_count_table, read_in_txt


def annotate_sample_data_amrfinderplus(
sequences: Union[MultiMAGSequencesDirFmt, ContigSequencesDirFmt],
amrfinderplus_db: AMRFinderPlusDatabaseDirFmt,
organism: str = None,
plus: bool = False,
report_all_equal: bool = False,
ident_min: float = None,
curated_ident: bool = False,
coverage_min: float = 0.5,
translation_table: str = "11",
threads: int = None,
) -> (
AMRFinderPlusAnnotationsDirFmt,
AMRFinderPlusAnnotationsDirFmt,
GenesDirectoryFormat,
pd.DataFrame,
):
annotations = AMRFinderPlusAnnotationsDirFmt()
mutations = AMRFinderPlusAnnotationsDirFmt()
genes = GenesDirectoryFormat()
frequency_list = []

# Create list of paths to all mags or contigs
if isinstance(sequences, MultiMAGSequencesDirFmt):
manifest = sequences.manifest.view(pd.DataFrame)
files = manifest["filename"]
else:
files = [
os.path.join(str(sequences), file) for file in os.listdir(str(sequences))
]

with tempfile.TemporaryDirectory() as tmp:
# Iterate over paths of MAGs or contigs
for file in files:
# Set sample and MAG IDs
if isinstance(sequences, MultiMAGSequencesDirFmt):
index_value = manifest.query("filename == @file").index[0]
sample_id = index_value[0]
mag_id = index_value[1]
else:
sample_id = os.path.splitext(os.path.basename(file))[0][:-8]
mag_id = ""

# Run amrfinderplus
run_amrfinderplus_n(
working_dir=tmp,
amrfinderplus_db=amrfinderplus_db,
dna_sequences=file,
protein_sequences=None,
gff=None,
organism=organism,
plus=plus,
report_all_equal=report_all_equal,
ident_min=ident_min,
curated_ident=curated_ident,
coverage_min=coverage_min,
translation_table=translation_table,
threads=threads,
)

# Create frequency dataframe and append it to list
frequency_df = read_in_txt(
path=os.path.join(tmp, "amr_annotations.tsv"),
samp_bin_name=str(os.path.join(sample_id, mag_id)),
data_type="mags",
colname="Gene symbol",
)
frequency_list.append(frequency_df)

# Move mutations file. If it is not created, create an empty mutations file
des_path_mutations = os.path.join(
str(mutations),
sample_id,
f"{mag_id + '_' if mag_id else ''}amr_mutations.tsv",
)
os.makedirs(os.path.dirname(des_path_mutations), exist_ok=True)
if organism:
shutil.move(os.path.join(tmp, "amr_mutations.tsv"), des_path_mutations)
else:
with open(des_path_mutations, "w"):
pass

# Move annotations file
des_path_annotations = os.path.join(
str(annotations),
sample_id,
f"{mag_id + '_' if mag_id else ''}amr_annotations.tsv",
)
os.makedirs(os.path.dirname(des_path_annotations), exist_ok=True)
shutil.move(os.path.join(tmp, "amr_annotations.tsv"), des_path_annotations)

# Move genes file
shutil.move(
os.path.join(tmp, "amr_genes.fasta"),
os.path.join(
str(genes), f"{mag_id if mag_id else sample_id}_amr_genes.fasta"
),
)

feature_table = create_count_table(df_list=frequency_list)
return (
annotations,
mutations,
genes,
feature_table,
)
101 changes: 101 additions & 0 deletions q2_amr/amrfinderplus/tests/test_sample_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import os
from unittest.mock import MagicMock, patch

from q2_types.per_sample_sequences import ContigSequencesDirFmt, MultiMAGSequencesDirFmt
from qiime2.plugin.testing import TestPluginBase

from q2_amr.amrfinderplus.sample_data import annotate_sample_data_amrfinderplus
from q2_amr.amrfinderplus.types import AMRFinderPlusDatabaseDirFmt


class TestAnnotateSampleDataAMRFinderPlus(TestPluginBase):
package = "q2_amr.amrfinderplus.tests"

def mock_run_amrfinderplus_n(
self,
working_dir,
amrfinderplus_db,
dna_sequences,
protein_sequences,
gff,
organism,
plus,
report_all_equal,
ident_min,
curated_ident,
coverage_min,
translation_table,
threads,
):
with open(os.path.join(working_dir, "amr_annotations.tsv"), "w"):
pass
if organism:
with open(os.path.join(working_dir, "amr_mutations.tsv"), "w"):
pass
if dna_sequences:
with open(os.path.join(working_dir, "amr_genes.fasta"), "w"):
pass

files_contigs = [
"amr_annotations.tsv",
"amr_mutations.tsv",
"sample1_amr_genes.fasta",
]

files_mags = [
"mag1_amr_annotations.tsv",
"mag1_amr_mutations.tsv",
"mag1_amr_genes.fasta",
]

def test_annotate_sample_data_amrfinderplus_mags(self):
sequences = MultiMAGSequencesDirFmt()
with open(os.path.join(str(sequences), "MANIFEST"), "w") as file:
file.write("sample-id,mag-id,filename\nsample1,mag1,sample1/mag1.fasta\n")
self._helper(sequences=sequences, organism=None, files=self.files_mags)

def test_annotate_sample_data_amrfinderplus_mags_organism(self):
sequences = MultiMAGSequencesDirFmt()
with open(os.path.join(str(sequences), "MANIFEST"), "w") as file:
file.write("sample-id,mag-id,filename\nsample1,mag1,sample1/mag1.fasta\n")
self._helper(sequences, "Escherichia", files=self.files_mags)

def test_annotate_sample_data_amrfinderplus_contigs(self):
sequences = ContigSequencesDirFmt()
with open(os.path.join(str(sequences), "sample1_contigs.fasta"), "w"):
pass
self._helper(sequences=sequences, organism=None, files=self.files_contigs)

def test_annotate_sample_data_amrfinderplus_contigs_organism(self):
sequences = ContigSequencesDirFmt()
with open(os.path.join(str(sequences), "sample1_contigs.fasta"), "w"):
pass
self._helper(
sequences=sequences, organism="Escherichia", files=self.files_contigs
)

def _helper(self, sequences, organism, files):
amrfinderplus_db = AMRFinderPlusDatabaseDirFmt()
mock_create_count_table = MagicMock()
mock_read_in_txt = MagicMock()
with patch(
"q2_amr.amrfinderplus.sample_data.run_amrfinderplus_n",
side_effect=self.mock_run_amrfinderplus_n,
), patch(
"q2_amr.amrfinderplus.sample_data.read_in_txt", mock_read_in_txt
), patch(
"q2_amr.amrfinderplus.sample_data.create_count_table",
mock_create_count_table,
):
result = annotate_sample_data_amrfinderplus(
sequences=sequences,
amrfinderplus_db=amrfinderplus_db,
organism=organism,
)
self.assertTrue(
os.path.exists(os.path.join(str(result[0]), "sample1", files[0]))
)
self.assertTrue(
os.path.exists(os.path.join(str(result[1]), "sample1", files[1]))
)
self.assertTrue(os.path.exists(os.path.join(str(result[2]), files[2])))
95 changes: 95 additions & 0 deletions q2_amr/amrfinderplus/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from unittest.mock import patch

from qiime2.plugin.testing import TestPluginBase

from q2_amr.amrfinderplus.utils import run_amrfinderplus_n


class TestAnnotateMagsCard(TestPluginBase):
package = "q2_amr.amrfinderplus.tests"

@patch("q2_amr.amrfinderplus.utils.run_command")
def test_run_amrfinderplus_n(self, mock_run_command):
run_amrfinderplus_n(
working_dir="path_dir",
amrfinderplus_db="amrfinderplus_db",
dna_sequences="dna_sequences",
protein_sequences="protein_sequences",
gff="gff",
organism="Escherichia",
plus=True,
report_all_equal=True,
ident_min=1,
curated_ident=False,
coverage_min=1,
translation_table="11",
threads=4,
)
mock_run_command.assert_called_once_with(
[
"amrfinder",
"--database",
"amrfinderplus_db",
"-o",
"path_dir/amr_annotations.tsv",
"--print_node",
"-n",
"dna_sequences",
"--nucleotide_output",
"path_dir/amr_genes.fasta",
"-p",
"protein_sequences",
"--protein_output",
"path_dir/amr_proteins.fasta",
"-g",
"gff",
"--threads",
"4",
"--organism",
"Escherichia",
"--mutation_all",
"path_dir/amr_mutations.tsv",
"--plus",
"--report_all_equal",
"--ident_min",
"1",
"--coverage_min",
"1",
"--translation_table",
"11",
],
"path_dir",
verbose=True,
)

@patch("q2_amr.amrfinderplus.utils.run_command")
def test_run_amrfinderplus_n_minimal(self, mock_run_command):
run_amrfinderplus_n(
working_dir="path_dir",
amrfinderplus_db="amrfinderplus_db",
dna_sequences=None,
protein_sequences=None,
gff=None,
organism=None,
plus=False,
report_all_equal=False,
ident_min=None,
curated_ident=True,
coverage_min=None,
translation_table=None,
threads=None,
)
mock_run_command.assert_called_once_with(
[
"amrfinder",
"--database",
"amrfinderplus_db",
"-o",
"path_dir/amr_annotations.tsv",
"--print_node",
"--ident_min",
"-1",
],
"path_dir",
verbose=True,
)
Loading

0 comments on commit 41bae5e

Please sign in to comment.