Skip to content

Commit

Permalink
ENH: add AMRFinderPlusAnnotation type (#86)
Browse files Browse the repository at this point in the history
  • Loading branch information
VinzentRisch authored Jul 11, 2024
1 parent fb9e4b5 commit 0b5439a
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 3 deletions.
6 changes: 6 additions & 0 deletions q2_amr/amrfinderplus/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,19 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
from q2_amr.amrfinderplus.types._format import (
AMRFinderPlusAnnotationDirFmt,
AMRFinderPlusAnnotationFormat,
AMRFinderPlusAnnotationsDirFmt,
AMRFinderPlusDatabaseDirFmt,
BinaryFormat,
TextFormat,
)

__all__ = [
"AMRFinderPlusDatabaseDirFmt",
"AMRFinderPlusAnnotationFormat",
"AMRFinderPlusAnnotationsDirFmt",
"AMRFinderPlusAnnotationDirFmt",
"TextFormat",
"BinaryFormat",
]
67 changes: 67 additions & 0 deletions q2_amr/amrfinderplus/types/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import pandas as pd
from q2_types.feature_data import MixedCaseDNAFASTAFormat, ProteinFASTAFormat
from q2_types.per_sample_sequences._format import MultiDirValidationMixin
from qiime2.core.exceptions import ValidationError
from qiime2.plugin import model


Expand Down Expand Up @@ -57,3 +60,67 @@ def amr_dna_comp_path_maker(self, species, extension):
@amr_dna_tab.set_path_maker
def amr_dna_tab_path_maker(self, species):
return "AMR_DNA-%s.tab" % species


class AMRFinderPlusAnnotationFormat(model.TextFileFormat):
def _validate(self):
header_coordinates = [
"Protein identifier",
"Contig id",
"Start",
"Stop",
"Strand",
"Gene symbol",
"Sequence name",
"Scope",
"Element type",
"Element subtype",
"Class",
"Subclass",
"Method",
"Target length",
"Reference sequence length",
"% Coverage of reference sequence",
"% Identity to reference sequence",
"Alignment length",
"Accession of closest sequence",
"Name of closest sequence",
"HMM id",
"HMM description",
"Hierarchy node",
]
header = header_coordinates[:1] + header_coordinates[5:]
try:
header_obs = pd.read_csv(str(self), sep="\t", nrows=0).columns.tolist()
if header != header_obs and header_coordinates != header_obs:
raise ValidationError(
"Header line does not match AMRFinderPlusAnnotationFormat. Must "
"consist of the following values: "
+ ", ".join(header_coordinates)
+ ".\n\nWhile Contig id, Start, Stop and Strand are optional."
+ "\n\nFound instead: "
+ ", ".join(header_obs)
)
except pd.errors.EmptyDataError:
pass

def _validate_(self, level):
self._validate()


class AMRFinderPlusAnnotationsDirFmt(MultiDirValidationMixin, model.DirectoryFormat):
annotation = model.FileCollection(
r".*amr_(annotations|mutations)\.tsv$", format=AMRFinderPlusAnnotationFormat
)

@annotation.set_path_maker
def annotation_path_maker(self, sample_id, mag_id):
prefix = f"{sample_id}/{mag_id}_" if mag_id else f"{sample_id}/"
return f"{prefix}amr_annotations.tsv"


AMRFinderPlusAnnotationDirFmt = model.SingleFileDirectoryFormat(
"AMRFinderPlusAnnotationDirFmt",
r"amr_(annotations|mutations)\.tsv$",
AMRFinderPlusAnnotationFormat,
)
8 changes: 8 additions & 0 deletions q2_amr/amrfinderplus/types/_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
from q2_types.feature_data import FeatureData
from q2_types.sample_data import SampleData
from qiime2.core.type import SemanticType

AMRFinderPlusDatabase = SemanticType("AMRFinderPlusDatabase")
AMRFinderPlusAnnotations = SemanticType(
"AMRFinderPlusAnnotations", variant_of=SampleData.field["type"]
)
AMRFinderPlusAnnotation = SemanticType(
"AMRFinderPlusAnnotation", variant_of=FeatureData.field["type"]
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node
aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib
blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node
aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib
blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Incorrect Header 1 Incorrect Header 2 Incorrect Header 3
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,101 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import os
import tempfile

from qiime2.core.exceptions import ValidationError
from qiime2.plugin.testing import TestPluginBase

from q2_amr.amrfinderplus.types._format import AMRFinderPlusDatabaseDirFmt
from q2_amr.amrfinderplus.types._format import (
AMRFinderPlusAnnotationDirFmt,
AMRFinderPlusAnnotationFormat,
AMRFinderPlusAnnotationsDirFmt,
AMRFinderPlusDatabaseDirFmt,
)


class TestAMRFinderPlusDatabaseTypesAndFormats(TestPluginBase):
class TestAMRFinderPlusTypesAndFormats(TestPluginBase):
package = "q2_amr.amrfinderplus.types.tests"

def test_amrfinderplus_database_directory_format_validate_positive(self):
format = AMRFinderPlusDatabaseDirFmt(self.get_data_path("database"), mode="r")
format.validate()

def test_amrfinderplus_annotation_format_validate_positive(self):
filepath = self.get_data_path(
"annotation/no_coordinates/"
"aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv"
)

format = AMRFinderPlusAnnotationFormat(filepath, mode="r")
format.validate()

def test_amrfinderplus_annotation_format_validate_positive_coordinates(self):
filepath = self.get_data_path(
"annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d"
"_amr_annotations.tsv"
)
format = AMRFinderPlusAnnotationFormat(filepath, mode="r")
format.validate()

def test_amrfinderplus_annotation_format_validate_positive_empty(self):
with tempfile.TemporaryDirectory() as temp_dir:
temp_file_path = os.path.join(temp_dir, "amr_annotations.tsv")
with open(temp_file_path, "w"):
pass
format = AMRFinderPlusAnnotationFormat(temp_file_path, mode="r")
format.validate()

def test_amrfinderplus_annotation_format_validation_error(self):
with self.assertRaises(ValidationError) as context:
path = self.get_data_path("annotation_wrong/amr_annotation.tsv")
format = AMRFinderPlusAnnotationFormat(path, mode="r")
format.validate()

header_coordinates = [
"Protein identifier",
"Contig id",
"Start",
"Stop",
"Strand",
"Gene symbol",
"Sequence name",
"Scope",
"Element type",
"Element subtype",
"Class",
"Subclass",
"Method",
"Target length",
"Reference sequence length",
"% Coverage of reference sequence",
"% Identity to reference sequence",
"Alignment length",
"Accession of closest sequence",
"Name of closest sequence",
"HMM id",
"HMM description",
]
expected_message = (
"Header line does not match AMRFinderPlusAnnotation format. Must "
"consist of the following values: "
+ ", ".join(header_coordinates)
+ ".\nWhile Contig id, Start, Stop and Strand are optional."
+ "\n\nFound instead: "
+ "Incorrect Header 1, Incorrect Header 2, Incorrect Header 3"
)

self.assertEqual(str(context.exception), expected_message)

def test_amrfinderplus_annotation_directory_format(self):
dirpath = self.get_data_path(
"annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d"
)
annotations = AMRFinderPlusAnnotationDirFmt(dirpath, mode="r")
assert isinstance(annotations, AMRFinderPlusAnnotationDirFmt)

def test_amrfinderplus_annotations_directory_format(self):
dirpath = self.get_data_path("annotation")
annotations = AMRFinderPlusAnnotationsDirFmt(dirpath, mode="r")
assert isinstance(annotations, AMRFinderPlusAnnotationsDirFmt)
24 changes: 23 additions & 1 deletion q2_amr/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# ----------------------------------------------------------------------------
import importlib

from q2_types.feature_data import FeatureData
from q2_types.feature_table import FeatureTable, Frequency
from q2_types.per_sample_sequences import (
MAGs,
Expand All @@ -29,11 +30,18 @@

from q2_amr import __version__
from q2_amr.amrfinderplus.types._format import (
AMRFinderPlusAnnotationDirFmt,
AMRFinderPlusAnnotationFormat,
AMRFinderPlusAnnotationsDirFmt,
AMRFinderPlusDatabaseDirFmt,
BinaryFormat,
TextFormat,
)
from q2_amr.amrfinderplus.types._type import AMRFinderPlusDatabase
from q2_amr.amrfinderplus.types._type import (
AMRFinderPlusAnnotation,
AMRFinderPlusAnnotations,
AMRFinderPlusDatabase,
)
from q2_amr.card.database import fetch_card_db
from q2_amr.card.heatmap import heatmap
from q2_amr.card.kmer import (
Expand Down Expand Up @@ -1084,6 +1092,8 @@
CARDReadsAlleleKmerAnalysis,
CARDMAGsKmerAnalysis,
AMRFinderPlusDatabase,
AMRFinderPlusAnnotations,
AMRFinderPlusAnnotation,
)

plugin.register_semantic_type_to_format(
Expand Down Expand Up @@ -1118,6 +1128,15 @@
AMRFinderPlusDatabase,
artifact_format=AMRFinderPlusDatabaseDirFmt,
)

plugin.register_semantic_type_to_format(
SampleData[AMRFinderPlusAnnotations],
artifact_format=AMRFinderPlusAnnotationsDirFmt,
)
plugin.register_semantic_type_to_format(
FeatureData[AMRFinderPlusAnnotation],
artifact_format=AMRFinderPlusAnnotationDirFmt,
)
plugin.register_formats(
CARDKmerDatabaseDirectoryFormat,
CARDKmerJSONFormat,
Expand Down Expand Up @@ -1145,6 +1164,9 @@
AMRFinderPlusDatabaseDirFmt,
TextFormat,
BinaryFormat,
AMRFinderPlusAnnotationFormat,
AMRFinderPlusAnnotationsDirFmt,
AMRFinderPlusAnnotationDirFmt,
)

importlib.import_module("q2_amr.card.types._transformer")

0 comments on commit 0b5439a

Please sign in to comment.