From 6844065f3e90358471c0fc67b041fdfd321773a9 Mon Sep 17 00:00:00 2001
From: VinzentRisch <100149044+VinzentRisch@users.noreply.github.com>
Date: Mon, 16 Oct 2023 16:41:51 +0200
Subject: [PATCH] FIX: Fixed q2-amr failing, if no ARGs are detected (#9)
-Fixed the problem of q2-amr failing if no ARGs are detected with annotate_reads_card or annotate_mags_card methods. Now the user is informed that no ARGs where detected and no further output is produced.
-Removed mapq, mapped and coverage parameters from annotate_reads_card method.
-Added CARDGeneAnnotationDirectoryFormat/CARDGeneAnnotationDirectoryFormat-> qiime2.Metadata transformer .
-Fixed bug in heatmap method. Parameters clus and cat can not be used at the same time.
---
q2_amr/assets/rgi/heatmap/index.html | 22 ++-
q2_amr/card/heatmap.py | 102 ++++++++++++
q2_amr/card/mags.py | 39 +++--
q2_amr/card/reads.py | 70 ++------
q2_amr/card/utils.py | 32 ++++
q2_amr/plugin_setup.py | 99 ++++++-----
q2_amr/tests/card/test_database.py | 22 +--
q2_amr/tests/card/test_heatmap.py | 113 +++++++++++++
q2_amr/tests/card/test_mags.py | 94 +++++++----
q2_amr/tests/card/test_reads.py | 157 +++++-------------
q2_amr/tests/card/test_utils.py | 72 +++++++-
q2_amr/tests/data/output.mags.txt | 5 +
q2_amr/types/_transformer.py | 39 ++++-
q2_amr/types/_type.py | 8 +-
...mr_annotation.json => amr_annotation.json} | 0
..._amr_annotation.txt => amr_annotation.txt} | 0
...mr_annotation.json => amr_annotation.json} | 0
..._amr_annotation.txt => amr_annotation.txt} | 4 +-
...pping_data.txt => allele_mapping_data.txt} | 0
...mapping_data.txt => gene_mapping_data.txt} | 0
...ng_stats.txt => overall_mapping_stats.txt} | 0
...pping_data.txt => allele_mapping_data.txt} | 0
...mapping_data.txt => gene_mapping_data.txt} | 0
...ng_stats.txt => overall_mapping_stats.txt} | 0
.../tests/test_types_formats_transformers.py | 10 ++
25 files changed, 608 insertions(+), 280 deletions(-)
create mode 100644 q2_amr/card/heatmap.py
create mode 100644 q2_amr/tests/card/test_heatmap.py
create mode 100644 q2_amr/tests/data/output.mags.txt
rename q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/{sample1_bin1_amr_annotation.json => amr_annotation.json} (100%)
rename q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/{sample1_bin1_amr_annotation.txt => amr_annotation.txt} (100%)
rename q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/{sample2_bin1_amr_annotation.json => amr_annotation.json} (100%)
rename q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/{sample2_bin1_amr_annotation.txt => amr_annotation.txt} (96%)
rename q2_amr/types/tests/data/annotate_reads_output/sample1/{sample1.allele_mapping_data.txt => allele_mapping_data.txt} (100%)
rename q2_amr/types/tests/data/annotate_reads_output/sample1/{sample1.gene_mapping_data.txt => gene_mapping_data.txt} (100%)
rename q2_amr/types/tests/data/annotate_reads_output/sample1/{sample1.overall_mapping_stats.txt => overall_mapping_stats.txt} (100%)
rename q2_amr/types/tests/data/annotate_reads_output/sample2/{sample2.allele_mapping_data.txt => allele_mapping_data.txt} (100%)
rename q2_amr/types/tests/data/annotate_reads_output/sample2/{sample2.gene_mapping_data.txt => gene_mapping_data.txt} (100%)
rename q2_amr/types/tests/data/annotate_reads_output/sample2/{sample2.overall_mapping_stats.txt => overall_mapping_stats.txt} (100%)
diff --git a/q2_amr/assets/rgi/heatmap/index.html b/q2_amr/assets/rgi/heatmap/index.html
index 722c6f6..3e92f86 100644
--- a/q2_amr/assets/rgi/heatmap/index.html
+++ b/q2_amr/assets/rgi/heatmap/index.html
@@ -1,10 +1,10 @@
-{% extends 'tabbed.html' %}
+{% extends 'base.html' %}
-{% block tabcontent %}
+{% block content %}
@@ -16,15 +16,27 @@ Downloads
-
+
+
-
My Picture
+
CARD annotation: heatmap
+
+
+
+
+
Displayed is a heatmap of annotate-mags-card output.
+ Yellow represents a perfect hit, teal represents a
+ strict hit, purple represents no hit.
+
+
+
+
{% endblock %}
{% block footer %}
diff --git a/q2_amr/card/heatmap.py b/q2_amr/card/heatmap.py
new file mode 100644
index 0000000..02e583d
--- /dev/null
+++ b/q2_amr/card/heatmap.py
@@ -0,0 +1,102 @@
+import glob
+import os
+import shutil
+import subprocess
+import tempfile
+from distutils.dir_util import copy_tree
+
+import pkg_resources
+import q2templates
+
+from q2_amr.card.utils import run_command
+from q2_amr.types import CARDAnnotationDirectoryFormat
+
+
+def heatmap(
+ output_dir: str,
+ amr_annotation: CARDAnnotationDirectoryFormat,
+ clus: str = None,
+ cat: str = None,
+ display: str = "plain",
+ frequency: bool = False,
+):
+ TEMPLATES = pkg_resources.resource_filename("q2_amr", "assets")
+ annotation_dir = str(amr_annotation)
+ with tempfile.TemporaryDirectory() as tmp:
+ results_dir = os.path.join(tmp, "results")
+ json_files_dir = os.path.join(tmp, "json_files")
+ os.makedirs(results_dir)
+ os.makedirs(json_files_dir)
+ for json_file in glob.glob(os.path.join(annotation_dir, "*", "*", "*.json")):
+ sample, bin_name, _ = json_file.split(os.path.sep)[-3:]
+ destination_path = os.path.join(json_files_dir, f"{sample}_{bin_name}")
+ shutil.copy(json_file, destination_path)
+
+ run_rgi_heatmap(tmp, json_files_dir, clus, cat, display, frequency)
+ change_names(results_dir)
+ copy_tree(os.path.join(TEMPLATES, "rgi", "heatmap"), output_dir)
+ copy_tree(results_dir, os.path.join(output_dir, "rgi_data"))
+ context = {"tabs": [{"title": "Heatmap", "url": "index.html"}]}
+ index = os.path.join(TEMPLATES, "rgi", "heatmap", "index.html")
+ templates = [index]
+ q2templates.render(templates, output_dir, context=context)
+
+
+class InvalidParameterCombinationError(Exception):
+ def __init__(self, message="Invalid parameter combination"):
+ self.message = message
+ super().__init__(self.message)
+
+
+def run_rgi_heatmap(tmp, json_files_dir, clus, cat, display, frequency):
+ cmd = [
+ "rgi",
+ "heatmap",
+ "--input",
+ json_files_dir,
+ "--output",
+ f"{tmp}/results/heatmap",
+ "--display",
+ display,
+ ]
+ if (clus == "both" or clus == "genes") and cat:
+ raise InvalidParameterCombinationError(
+ "If the parameter clus is set to genes "
+ "or both it is not possible to use the "
+ "cat parameter"
+ )
+ if clus:
+ cmd.extend(["--clus", clus])
+ if cat:
+ cmd.extend(["--cat", cat])
+ if frequency:
+ cmd.append("--frequency")
+ try:
+ run_command(cmd, tmp, verbose=True)
+ except subprocess.CalledProcessError as e:
+ raise Exception(
+ "An error was encountered while running rgi, "
+ f"(return code {e.returncode}), please inspect "
+ "stdout and stderr to learn more."
+ )
+
+
+def change_names(results_dir):
+ """
+ This function changes the names of the output files of the "rgi heatmap" function.
+ The output files are called heatmap-*.extension with * being the number of samples
+ included in the heatmap. The files are changed to heatmap.extension so that they
+ can be accessed in the index.html file more easily.
+
+ Parameters:
+ - results_dir (str): The directory where the files are stored.
+ """
+ extensions = [".eps", ".csv", ".png"]
+ files = os.listdir(results_dir)
+ for filename in files:
+ if os.path.splitext(filename)[1] in extensions:
+ file_ext = os.path.splitext(filename)[1]
+ new_filename = "heatmap" + file_ext
+ old_path = os.path.join(results_dir, filename)
+ new_path = os.path.join(results_dir, new_filename)
+ os.rename(old_path, new_path)
diff --git a/q2_amr/card/mags.py b/q2_amr/card/mags.py
index 45057e0..3198775 100644
--- a/q2_amr/card/mags.py
+++ b/q2_amr/card/mags.py
@@ -6,7 +6,12 @@
import pandas as pd
from q2_types_genomics.per_sample_data import MultiMAGSequencesDirFmt
-from q2_amr.card.utils import load_preprocess_card_db, run_command
+from q2_amr.card.utils import (
+ create_count_table,
+ load_preprocess_card_db,
+ read_in_txt,
+ run_command,
+)
from q2_amr.types import CARDAnnotationDirectoryFormat, CARDDatabaseFormat
@@ -14,15 +19,15 @@ def annotate_mags_card(
mag: MultiMAGSequencesDirFmt,
card_db: CARDDatabaseFormat,
alignment_tool: str = "BLAST",
- input_type: str = "contig",
split_prodigal_jobs: bool = False,
include_loose: bool = False,
include_nudge: bool = False,
low_quality: bool = False,
- num_threads: int = 1,
-) -> CARDAnnotationDirectoryFormat:
+ threads: int = 1,
+) -> (CARDAnnotationDirectoryFormat, pd.DataFrame):
manifest = mag.manifest.view(pd.DataFrame)
amr_annotations = CARDAnnotationDirectoryFormat()
+ frequency_list = []
with tempfile.TemporaryDirectory() as tmp:
load_preprocess_card_db(tmp, card_db, "load")
for samp_bin in list(manifest.index):
@@ -33,24 +38,34 @@ def annotate_mags_card(
tmp,
input_sequence,
alignment_tool,
- input_type,
split_prodigal_jobs,
include_loose,
include_nudge,
low_quality,
- num_threads,
+ threads,
+ )
+ txt_path = os.path.join(bin_dir, "amr_annotation.txt")
+ json_path = os.path.join(bin_dir, "amr_annotation.json")
+
+ shutil.move(f"{tmp}/output.txt", txt_path)
+ shutil.move(f"{tmp}/output.json", json_path)
+ samp_bin_name = os.path.join(samp_bin[0], samp_bin[1])
+ frequency_df = read_in_txt(
+ path=txt_path, col_name="ARO", samp_bin_name=samp_bin_name
)
- shutil.move(f"{tmp}/output.txt", f"{bin_dir}/amr_annotation.txt")
- shutil.move(f"{tmp}/output.json", f"{bin_dir}/amr_annotation.json")
- print("a")
- return amr_annotations
+ if frequency_df is not None:
+ frequency_list.append(frequency_df)
+ feature_table = create_count_table(df_list=frequency_list)
+ return (
+ amr_annotations,
+ feature_table,
+ )
def run_rgi_main(
tmp,
input_sequence: str,
alignment_tool: str = "BLAST",
- input_type: str = "contig",
split_prodigal_jobs: bool = False,
include_loose: bool = False,
include_nudge: bool = False,
@@ -69,7 +84,7 @@ def run_rgi_main(
"--alignment_tool",
alignment_tool,
"--input_type",
- input_type,
+ "contig",
"--local",
]
if include_loose:
diff --git a/q2_amr/card/reads.py b/q2_amr/card/reads.py
index 1fc0dab..a1a5a65 100644
--- a/q2_amr/card/reads.py
+++ b/q2_amr/card/reads.py
@@ -3,7 +3,6 @@
import subprocess
import tempfile
from distutils.dir_util import copy_tree
-from functools import reduce
from typing import Union
import altair as alt
@@ -15,7 +14,12 @@
SingleLanePerSampleSingleEndFastqDirFmt,
)
-from q2_amr.card.utils import load_preprocess_card_db, run_command
+from q2_amr.card.utils import (
+ create_count_table,
+ load_preprocess_card_db,
+ read_in_txt,
+ run_command,
+)
from q2_amr.types import (
CARDAlleleAnnotationDirectoryFormat,
CARDDatabaseFormat,
@@ -30,10 +34,6 @@ def annotate_reads_card(
card_db: CARDDatabaseFormat,
aligner: str = "kma",
threads: int = 1,
- include_baits: bool = False,
- mapq: float = None,
- mapped: float = None,
- coverage: float = None,
) -> (
CARDAlleleAnnotationDirectoryFormat,
CARDGeneAnnotationDirectoryFormat,
@@ -65,15 +65,19 @@ def annotate_reads_card(
rev=rev,
aligner=aligner,
threads=threads,
- include_baits=include_baits,
- mapq=mapq,
- mapped=mapped,
- coverage=coverage,
)
- allele_frequency = read_in_txt(samp_input_dir, "allele")
- allele_frequency_list.append(allele_frequency)
- gene_frequency = read_in_txt(samp_input_dir, "gene")
- gene_frequency_list.append(gene_frequency)
+ path_allele = os.path.join(samp_input_dir, "output.allele_mapping_data.txt")
+ allele_frequency = read_in_txt(
+ path=path_allele, col_name="ARO Accession", samp_bin_name=samp
+ )
+ if allele_frequency is not None:
+ allele_frequency_list.append(allele_frequency)
+ path_gene = os.path.join(samp_input_dir, "output.gene_mapping_data.txt")
+ gene_frequency = read_in_txt(
+ path=path_gene, col_name="ARO Accession", samp_bin_name=samp
+ )
+ if gene_frequency is not None:
+ gene_frequency_list.append(gene_frequency)
move_files(samp_input_dir, samp_allele_dir, "allele")
move_files(samp_input_dir, samp_gene_dir, "gene")
@@ -98,32 +102,6 @@ def move_files(source_dir: str, des_dir: str, map_type: str):
)
-def create_count_table(df_list: list) -> pd.DataFrame:
- df_merged = reduce(
- lambda left, right: pd.merge(left, right, on="ARO Accession", how="outer"),
- df_list,
- )
- df_transposed = df_merged.transpose()
- df_transposed = df_transposed.fillna(0)
- df_transposed.columns = df_transposed.iloc[0]
- df_transposed = df_transposed.drop("ARO Accession")
- df_transposed.columns.name = None
- df_transposed.index.name = "sample_id"
- return df_transposed
-
-
-def read_in_txt(samp_dir: str, map_type: str):
- df = pd.read_csv(
- os.path.join(samp_dir, f"output.{map_type}_mapping_data.txt"), sep="\t"
- )
- df = df[["ARO Accession"]]
- df = df.astype(str)
- samp = os.path.basename(samp_dir)
- df[samp] = df.groupby("ARO Accession")["ARO Accession"].transform("count")
- df = df.drop_duplicates(subset=["ARO Accession"])
- return df
-
-
def run_rgi_bwt(
cwd: str,
samp: str,
@@ -131,10 +109,6 @@ def run_rgi_bwt(
rev: str,
aligner: str,
threads: int,
- include_baits: bool,
- mapq: float,
- mapped: float,
- coverage: float,
):
cmd = [
"rgi",
@@ -152,14 +126,6 @@ def run_rgi_bwt(
]
if rev:
cmd.extend(["--read_two", rev])
- if include_baits:
- cmd.append("--include_baits")
- if mapq:
- cmd.extend(["--mapq", str(mapq)])
- if mapped:
- cmd.extend(["--mapped", str(mapped)])
- if coverage:
- cmd.extend(["--coverage", str(coverage)])
try:
run_command(cmd, cwd, verbose=True)
except subprocess.CalledProcessError as e:
diff --git a/q2_amr/card/utils.py b/q2_amr/card/utils.py
index 602e0a8..99415d5 100644
--- a/q2_amr/card/utils.py
+++ b/q2_amr/card/utils.py
@@ -1,5 +1,8 @@
import json
import subprocess
+from functools import reduce
+
+import pandas as pd
EXTERNAL_CMD_WARNING = (
"Running external command line application(s). "
@@ -44,3 +47,32 @@ def load_preprocess_card_db(tmp, card_db, operation):
f"(return code {e.returncode}), please inspect "
"stdout and stderr to learn more."
)
+
+
+def read_in_txt(path: str, col_name: str, samp_bin_name: str):
+ df = pd.read_csv(path, sep="\t")
+ if df.empty:
+ return None
+ df = df[[col_name]]
+ df = df.astype(str)
+ df[samp_bin_name] = df.groupby(col_name)[col_name].transform("count")
+ df = df.drop_duplicates(subset=[col_name])
+ return df
+
+
+def create_count_table(df_list: list) -> pd.DataFrame:
+ if not df_list:
+ raise ValueError(
+ "RGI did not identify any AMR genes. No output can be created."
+ )
+ df = reduce(
+ lambda left, right: pd.merge(left, right, on=left.columns[0], how="outer"),
+ df_list,
+ )
+ df = df.transpose()
+ df = df.fillna(0)
+ df.columns = df.iloc[0]
+ df = df.drop(df.index[0])
+ df.columns.name = None
+ df.index.name = "sample_id"
+ return df
diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py
index 37d4db5..84efb88 100644
--- a/q2_amr/plugin_setup.py
+++ b/q2_amr/plugin_setup.py
@@ -7,18 +7,19 @@
# ----------------------------------------------------------------------------
import importlib
-from q2_types.feature_table import FeatureTable, Frequency
+from q2_types.feature_table import FeatureTable, PresenceAbsence
from q2_types.per_sample_sequences import (
PairedEndSequencesWithQuality,
SequencesWithQuality,
)
from q2_types.sample_data import SampleData
from q2_types_genomics.per_sample_data import MAGs
-from qiime2.core.type import Bool, Choices, Float, Int, Range, Str
+from qiime2.core.type import Bool, Choices, Int, Range, Str
from qiime2.plugin import Citations, Plugin
from q2_amr import __version__
from q2_amr.card.database import fetch_card_db
+from q2_amr.card.heatmap import heatmap
from q2_amr.card.mags import annotate_mags_card
from q2_amr.card.reads import annotate_reads_card, visualize_annotation_stats
from q2_amr.types import (
@@ -45,9 +46,9 @@
version=__version__,
website="https://github.com/bokulich-lab/q2-amr",
package="q2_amr",
- description="This is a QIIME 2 plugin that annotates microbiome sequence data with "
+ description="This is a QIIME 2 plugin that annotates sequence data with "
"antimicrobial resistance gene information from CARD.",
- short_description="This is a QIIME 2 plugin that annotates microbiome sequence "
+ short_description="This is a QIIME 2 plugin that annotates sequence "
"data with antimicrobial resistance gene information from CARD.",
)
plugin.methods.register_function(
@@ -62,7 +63,7 @@
"phenotypes."
},
name="Download CARD data.",
- description=("Downloads the CARD database from the CARD website."),
+ description=("Download the latest version of the CARD database."),
citations=[citations["alcock_card_2023"]],
)
@@ -71,32 +72,35 @@
inputs={"mag": SampleData[MAGs], "card_db": CARDDatabase},
parameters={
"alignment_tool": Str % Choices(["BLAST", "DIAMOND"]),
- "input_type": Str % Choices(["contig", "protein"]),
"split_prodigal_jobs": Bool,
"include_loose": Bool,
"include_nudge": Bool,
"low_quality": Bool,
- "num_threads": Int % Range(1, 9),
+ "threads": Int % Range(0, None, inclusive_start=False),
},
- outputs=[("amr_annotations", SampleData[CARDAnnotation])],
+ outputs=[
+ ("amr_annotations", SampleData[CARDAnnotation]),
+ ("feature_table", FeatureTable[PresenceAbsence]),
+ ],
input_descriptions={
- "mag": "MAG to be annotated with CARD.",
+ "mag": "MAGs to be annotated with CARD.",
"card_db": "CARD Database.",
},
parameter_descriptions={
"alignment_tool": "Specify alignment tool BLAST or DIAMOND.",
- "input_type": "Specify data input type contig or protein.",
- "split_prodigal_jobs": "Run multiple prodigal jobs simultaneously for contigs "
- "in a fasta file.",
+ "split_prodigal_jobs": "Run multiple prodigal jobs simultaneously for contigs"
+ " in one sample",
"include_loose": "Include loose hits in addition to strict and perfect hits .",
"include_nudge": "Include hits nudged from loose to strict hits.",
"low_quality": "Use for short contigs to predict partial genes.",
- "num_threads": "Number of threads (CPUs) to use in the BLAST search.",
+ "threads": "Number of threads (CPUs) to use in the BLAST search.",
},
- output_descriptions={"amr_annotations": "AMR Annotation as .txt and .json file."},
- name="Annotate MAGs with antimicrobial resistance gene information from CARD.",
- description="Annotate MAGs with antimicrobial resistance gene information from "
- "CARD.",
+ output_descriptions={
+ "amr_annotations": "AMR annotation as .txt and .json file.",
+ "feature_table": "Presence and absence table of ARGs in all samples.",
+ },
+ name="Annotate MAGs with antimicrobial resistance genes from CARD.",
+ description="Annotate MAGs with antimicrobial resistance genes from CARD.",
citations=[citations["alcock_card_2023"]],
)
@@ -109,40 +113,54 @@
},
parameters={
"aligner": Str % Choices(["kma", "bowtie2", "bwa"]),
- "include_baits": Bool,
- "mapq": Float % Range(0, None, inclusive_start=True),
- "mapped": Float % Range(0, None, inclusive_start=True),
- "coverage": Float % Range(0, None, inclusive_start=True),
"threads": Int % Range(0, None, inclusive_start=False),
},
outputs=[
- ("amr_allele_annotation", CARDAlleleAnnotation),
- ("amr_gene_annotation", CARDGeneAnnotation),
- ("allele_feature_table", FeatureTable[Frequency]),
- ("gene_feature_table", FeatureTable[Frequency]),
+ ("amr_allele_annotation", SampleData[CARDAlleleAnnotation]),
+ ("amr_gene_annotation", SampleData[CARDGeneAnnotation]),
+ ("allele_feature_table", FeatureTable[PresenceAbsence]),
+ ("gene_feature_table", FeatureTable[PresenceAbsence]),
],
input_descriptions={
- "reads": "Paired or single end metagenomic reads.",
+ "reads": "Paired or single end reads.",
"card_db": "CARD Database",
},
parameter_descriptions={
"aligner": "Specify alignment tool.",
- "include_baits": "Include baits.",
- "mapq": "Filter reads based on MAPQ score.",
- "mapped": "Filter reads based on mapped reads.",
- "coverage": "Filter reads based on coverage of reference sequence.",
"threads": "Number of threads (CPUs) to use.",
},
output_descriptions={
"amr_allele_annotation": "AMR annotation mapped on alleles.",
"amr_gene_annotation": "AMR annotation mapped on genes.",
- "allele_feature_table": "Samples combined into one frequency count table.",
- "gene_feature_table": "Samples combined into one frequency count table.",
+ "allele_feature_table": "Presence and absence table of ARGs in all samples for"
+ " allele mapping.",
+ "gene_feature_table": "Presence and absence table of ARGs in all samples for "
+ "gene mapping.",
+ },
+ name="Annotate reads with antimicrobial resistance genes from CARD.",
+ description="Annotate reads with antimicrobial resistance genes from CARD.",
+ citations=[citations["alcock_card_2023"]],
+)
+
+plugin.visualizers.register_function(
+ function=heatmap,
+ inputs={"amr_annotation": SampleData[CARDAnnotation]},
+ parameters={
+ "cat": Str % Choices(["drug_class", "resistance_mechanism", "gene_family"]),
+ "clus": Str % Choices(["samples", "genes", "both"]),
+ "display": Str % Choices(["plain", "fill", "text"]),
+ "frequency": Bool,
+ },
+ input_descriptions={"amr_annotation": "AMR Annotations from MAGs"},
+ parameter_descriptions={
+ "cat": "The option to organize resistance genes based on a category.",
+ "clus": "Option to use SciPy's hierarchical clustering algorithm to cluster "
+ "rows (AMR genes) or columns (samples).",
+ "display": "Specify display options for categories",
+ "frequency": "Represent samples based on resistance profile.",
},
- name="Annotate metagenomic reads with antimicrobial resistance gene information "
- "from CARD.",
- description="Annotate metagenomic reads with antimicrobial resistance gene "
- "information from CARD.",
+ name="Create heatmap from annotate-mags-card output.",
+ description=("Create heatmap from annotate-mags-card output."),
citations=[citations["alcock_card_2023"]],
)
@@ -155,7 +173,7 @@
},
parameter_descriptions={},
name="Visualize mapping statistics.",
- description="Visualize mapping statistics.",
+ description="Visualize mapping statistics of an annotate-reads-card output.",
citations=[citations["alcock_card_2023"]],
)
@@ -168,13 +186,14 @@
CARDDatabase, artifact_format=CARDDatabaseDirectoryFormat
)
plugin.register_semantic_type_to_format(
- CARDAnnotation, artifact_format=CARDAnnotationDirectoryFormat
+ SampleData[CARDAnnotation], artifact_format=CARDAnnotationDirectoryFormat
)
plugin.register_semantic_type_to_format(
- CARDAlleleAnnotation, artifact_format=CARDAlleleAnnotationDirectoryFormat
+ SampleData[CARDAlleleAnnotation],
+ artifact_format=CARDAlleleAnnotationDirectoryFormat,
)
plugin.register_semantic_type_to_format(
- CARDGeneAnnotation, artifact_format=CARDGeneAnnotationDirectoryFormat
+ SampleData[CARDGeneAnnotation], artifact_format=CARDGeneAnnotationDirectoryFormat
)
plugin.register_formats(
diff --git a/q2_amr/tests/card/test_database.py b/q2_amr/tests/card/test_database.py
index 8cae7af..e4ec9b7 100644
--- a/q2_amr/tests/card/test_database.py
+++ b/q2_amr/tests/card/test_database.py
@@ -12,26 +12,28 @@
class TestAnnotateMagsCard(TestPluginBase):
package = "q2_amr.tests"
- @patch("requests.get")
- def test_fetch_card_db(self, mock_requests):
+ def test_fetch_card_db(self):
f = open(self.get_data_path("card.tar.bz2"), "rb")
mock_response = MagicMock(raw=f)
- mock_requests.return_value = mock_response
- obs = fetch_card_db()
+ with patch("requests.get") as mock_requests:
+ mock_requests.return_value = mock_response
+ obs = fetch_card_db()
self.assertTrue(os.path.exists(os.path.join(str(obs), "card.json")))
self.assertIsInstance(obs, CARDDatabaseDirectoryFormat)
mock_requests.assert_called_once_with(
"https://card.mcmaster.ca/latest/data", stream=True
)
- @patch("requests.get", side_effect=requests.ConnectionError)
- def test_fetch_card_data_connection_error(self, mock_requests):
- with self.assertRaisesRegex(
+ def test_fetch_card_data_connection_error(self):
+ with patch(
+ "requests.get", side_effect=requests.ConnectionError
+ ), self.assertRaisesRegex(
requests.ConnectionError, "Network connectivity problems."
):
fetch_card_db()
- @patch("tarfile.open", side_effect=tarfile.ReadError)
- def test_fetch_card_data_tarfile_read_error(self, mock_requests):
- with self.assertRaisesRegex(tarfile.ReadError, "Tarfile is invalid."):
+ def test_fetch_card_data_tarfile_read_error(self):
+ with patch(
+ "tarfile.open", side_effect=tarfile.ReadError
+ ), self.assertRaisesRegex(tarfile.ReadError, "Tarfile is invalid."):
fetch_card_db()
diff --git a/q2_amr/tests/card/test_heatmap.py b/q2_amr/tests/card/test_heatmap.py
new file mode 100644
index 0000000..44aee61
--- /dev/null
+++ b/q2_amr/tests/card/test_heatmap.py
@@ -0,0 +1,113 @@
+import os
+import tempfile
+from unittest.mock import patch
+
+from qiime2.plugin.testing import TestPluginBase
+
+from q2_amr.card.heatmap import (
+ InvalidParameterCombinationError,
+ change_names,
+ heatmap,
+ run_rgi_heatmap,
+)
+from q2_amr.types import CARDAnnotationDirectoryFormat
+
+
+class TestHeatmap(TestPluginBase):
+ package = "q2_amr.tests"
+
+ def test_heatmap(self):
+ amr_annotation = CARDAnnotationDirectoryFormat()
+
+ def mock_run_rgi_heatmap(tmp, json_files_dir, clus, cat, display, frequency):
+ file_types = [".png", ".eps", ".csv"]
+ for file_type in file_types:
+ with open(
+ os.path.join(tmp, "results", f"heatmap-3.{file_type}"), "w"
+ ) as file:
+ file.write(file_type)
+
+ with patch(
+ "q2_amr.card.heatmap.run_rgi_heatmap", side_effect=mock_run_rgi_heatmap
+ ), tempfile.TemporaryDirectory() as tmp:
+ os.makedirs(os.path.join(tmp, "results"))
+ heatmap(tmp, amr_annotation)
+ self.assertTrue(
+ os.path.exists(os.path.join(tmp, "rgi_data", "heatmap.png"))
+ )
+ self.assertTrue(
+ os.path.exists(os.path.join(tmp, "rgi_data", "heatmap.eps"))
+ )
+ self.assertTrue(
+ os.path.exists(os.path.join(tmp, "rgi_data", "heatmap.csv"))
+ )
+ self.assertTrue(os.path.exists(os.path.join(tmp, "index.html")))
+ self.assertTrue(os.path.exists(os.path.join(tmp, "q2templateassets")))
+
+ def test_run_rgi_heatmap(self):
+ with patch("q2_amr.card.heatmap.run_command") as mock_run_command:
+ run_rgi_heatmap(
+ "path_tmp", "json_files_dir_path", "samples", "drug_class", "fill", True
+ )
+ mock_run_command.assert_called_once_with(
+ [
+ "rgi",
+ "heatmap",
+ "--input",
+ "json_files_dir_path",
+ "--output",
+ "path_tmp/results/heatmap",
+ "--display",
+ "fill",
+ "--clus",
+ "samples",
+ "--cat",
+ "drug_class",
+ "--frequency",
+ ],
+ "path_tmp",
+ verbose=True,
+ )
+
+ def test_change_names(self):
+ with patch(
+ "q2_amr.card.heatmap.os.listdir",
+ return_value=["heatmap-7.eps", "heatmap-7.png", "heatmap-7.csv"],
+ ), patch("q2_amr.card.heatmap.os.rename") as mock_rename:
+ results_dir = "/path/to/results"
+ change_names(results_dir)
+ expected_calls = [
+ ("/path/to/results/heatmap-7.eps", "/path/to/results/heatmap.eps"),
+ ("/path/to/results/heatmap-7.png", "/path/to/results/heatmap.png"),
+ ("/path/to/results/heatmap-7.csv", "/path/to/results/heatmap.csv"),
+ ]
+ actual_calls = [call[0] for call in mock_rename.call_args_list]
+ self.assertEqual(expected_calls, actual_calls)
+
+ def test_change_names_empty(self):
+ with patch("q2_amr.card.heatmap.os.listdir", return_value=[]), patch(
+ "q2_amr.card.heatmap.os.rename"
+ ) as mock_rename:
+ results_dir = "/path/to/results"
+ change_names(results_dir)
+ expected_calls = []
+ actual_calls = [call[0] for call in mock_rename.call_args_list]
+ self.assertEqual(expected_calls, actual_calls)
+
+ def test_invalid_combination_raises_error(self):
+ tmp = "path"
+ json_files_dir = "path"
+ clus = "both"
+ cat = "drug_class"
+ display = "text"
+ frequency = False
+
+ with self.assertRaises(InvalidParameterCombinationError) as cm:
+ run_rgi_heatmap(tmp, json_files_dir, clus, cat, display, frequency)
+
+ self.assertEqual(
+ str(cm.exception),
+ "If the parameter clus is set to genes"
+ " or both it is not possible to use "
+ "the cat parameter",
+ )
diff --git a/q2_amr/tests/card/test_mags.py b/q2_amr/tests/card/test_mags.py
index 912bf34..8573bda 100644
--- a/q2_amr/tests/card/test_mags.py
+++ b/q2_amr/tests/card/test_mags.py
@@ -1,8 +1,10 @@
import os
import shutil
import subprocess
-from unittest.mock import patch
+from copy import deepcopy
+from unittest.mock import MagicMock, patch
+import pandas as pd
from q2_types_genomics.per_sample_data import MultiMAGSequencesDirFmt
from qiime2.plugin.testing import TestPluginBase
@@ -13,49 +15,77 @@
class TestAnnotateMagsCard(TestPluginBase):
package = "q2_amr.tests"
- def test_annotate_mags_card(self):
+ table = pd.DataFrame(
+ {
+ "sample_id": ["sample1", "sample2"],
+ 3000796: [1, 0],
+ 3000815: [1, 1],
+ 3000805: [1, 1],
+ 3000026: [1, 2],
+ 3000797: [0, 1],
+ }
+ )
+
+ def mock_run_rgi_main(
+ self,
+ tmp,
+ input_sequence,
+ alignment_tool,
+ split_prodigal_jobs,
+ include_loose,
+ include_nudge,
+ low_quality,
+ num_threads,
+ ):
output_txt = self.get_data_path("rgi_output.txt")
output_json = self.get_data_path("rgi_output.json")
+ shutil.copy(output_txt, f"{tmp}/output.txt")
+ shutil.copy(output_json, f"{tmp}/output.json")
+
+ def return_count_table(self, df_list):
+ count_table = deepcopy(self.table)
+ count_table.set_index("sample_id", inplace=True)
+ count_table = count_table.astype(float)
+ count_table.columns = count_table.columns.astype(float)
+ return count_table
+
+ def test_annotate_mags_card(self):
+
manifest = self.get_data_path("MANIFEST_mags")
mag = MultiMAGSequencesDirFmt()
card_db = CARDDatabaseFormat()
shutil.copy(manifest, os.path.join(str(mag), "MANIFEST"))
- def mock_run_rgi_main(
- tmp,
- input_sequence,
- alignment_tool,
- input_type,
- split_prodigal_jobs,
- include_loose,
- include_nudge,
- low_quality,
- num_threads,
- ):
- shutil.copy(output_txt, f"{tmp}/output.txt")
- shutil.copy(output_json, f"{tmp}/output.json")
-
+ mock_create_count_table = MagicMock(side_effect=self.return_count_table)
+ mock_read_in_txt = MagicMock()
with patch(
- "q2_amr.card.mags.run_rgi_main", side_effect=mock_run_rgi_main
- ), patch("q2_amr.card.mags.load_preprocess_card_db"):
+ "q2_amr.card.mags.run_rgi_main", side_effect=self.mock_run_rgi_main
+ ), patch("q2_amr.card.mags.load_preprocess_card_db"), patch(
+ "q2_amr.card.mags.read_in_txt", mock_read_in_txt
+ ), patch(
+ "q2_amr.card.mags.create_count_table", mock_create_count_table
+ ):
result = annotate_mags_card(mag, card_db)
- self.assertIsInstance(result, CARDAnnotationDirectoryFormat)
+ self.assertIsInstance(result[0], CARDAnnotationDirectoryFormat)
+ self.assertIsInstance(result[1], pd.DataFrame)
self.assertTrue(
os.path.exists(
- os.path.join(str(result), "sample1", "bin1", "amr_annotation.txt")
+ os.path.join(
+ str(result[0]), "sample1", "bin1", "amr_annotation.txt"
+ )
)
)
self.assertTrue(
os.path.exists(
- os.path.join(str(result), "sample1", "bin1", "amr_annotation.json")
+ os.path.join(
+ str(result[0]), "sample1", "bin1", "amr_annotation.json"
+ )
)
)
def test_run_rgi_main(self):
with patch("q2_amr.card.mags.run_command") as mock_run_command:
- run_rgi_main(
- "path_tmp", "path_input", "DIAMOND", "contig", True, True, True, True, 8
- )
+ run_rgi_main("path_tmp", "path_input", "DIAMOND", True, True, True, True, 8)
mock_run_command.assert_called_once_with(
[
"rgi",
@@ -80,15 +110,15 @@ def test_run_rgi_main(self):
verbose=True,
)
- @patch("q2_amr.card.mags.run_command")
- def test_exception_raised(self, mock_run_command):
- mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd")
- tmp = "path/to/tmp"
- input_sequence = "path/to/input_sequence.fasta"
+ def test_exception_raised(self):
expected_message = (
"An error was encountered while running rgi, "
"(return code 1), please inspect stdout and stderr to learn more."
)
- with self.assertRaises(Exception) as cm:
- run_rgi_main(tmp, input_sequence)
- self.assertEqual(str(cm.exception), expected_message)
+ tmp = "path/to/tmp"
+ input_sequence = "path/to/input_sequence.fasta"
+ with patch("q2_amr.card.mags.run_command") as mock_run_command:
+ mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd")
+ with self.assertRaises(Exception) as cm:
+ run_rgi_main(tmp, input_sequence)
+ self.assertEqual(str(cm.exception), expected_message)
diff --git a/q2_amr/tests/card/test_reads.py b/q2_amr/tests/card/test_reads.py
index 25ff97a..1685454 100644
--- a/q2_amr/tests/card/test_reads.py
+++ b/q2_amr/tests/card/test_reads.py
@@ -2,7 +2,6 @@
import shutil
import subprocess
import tempfile
-from copy import deepcopy
from unittest.mock import ANY, MagicMock, call, patch
import pandas as pd
@@ -11,14 +10,13 @@
SingleLanePerSampleSingleEndFastqDirFmt,
)
from qiime2.plugin.testing import TestPluginBase
+from test_mags import TestAnnotateMagsCard
from q2_amr.card.reads import (
annotate_reads_card,
- create_count_table,
extract_sample_stats,
move_files,
plot_sample_stats,
- read_in_txt,
run_rgi_bwt,
visualize_annotation_stats,
)
@@ -38,38 +36,31 @@ def test_annotate_reads_card_single(self):
def test_annotate_reads_card_paired(self):
self.annotate_reads_card_test_body("paired")
+ def copy_needed_files(self, cwd, samp, **kwargs):
+ output_allele = self.get_data_path("output.allele_mapping_data.txt")
+ output_gene = self.get_data_path("output.gene_mapping_data.txt")
+ output_stats = self.get_data_path("output.overall_mapping_stats.txt")
+ samp_dir = os.path.join(cwd, samp)
+ shutil.copy(output_allele, samp_dir)
+ shutil.copy(output_gene, samp_dir)
+ shutil.copy(output_stats, samp_dir)
+
def annotate_reads_card_test_body(self, read_type):
manifest = self.get_data_path(f"MANIFEST_reads_{read_type}")
if read_type == "single":
reads = SingleLanePerSampleSingleEndFastqDirFmt()
shutil.copy(manifest, os.path.join(str(reads), "MANIFEST"))
-
else:
reads = SingleLanePerSamplePairedEndFastqDirFmt()
shutil.copy(manifest, os.path.join(str(reads), "MANIFEST"))
-
- output_allele = self.get_data_path("output.allele_mapping_data.txt")
- output_gene = self.get_data_path("output.gene_mapping_data.txt")
- output_stats = self.get_data_path("output.overall_mapping_stats.txt")
card_db = CARDDatabaseFormat()
-
- def copy_needed_files(cwd, samp, **kwargs):
- samp_dir = os.path.join(cwd, samp)
- shutil.copy(output_allele, samp_dir)
- shutil.copy(output_gene, samp_dir)
- shutil.copy(output_stats, samp_dir)
-
- def return_count_table(df_list):
- count_table = deepcopy(self.table)
- count_table.set_index("sample_id", inplace=True)
- count_table = count_table.astype(float)
- count_table.columns = count_table.columns.astype(float)
- return count_table
-
- mock_run_rgi_bwt = MagicMock(side_effect=copy_needed_files)
+ mock_run_rgi_bwt = MagicMock(side_effect=self.copy_needed_files)
mock_run_rgi_load = MagicMock()
mock_read_in_txt = MagicMock()
- mock_create_count_table = MagicMock(side_effect=return_count_table)
+ mag_test_class = TestAnnotateMagsCard()
+ mock_create_count_table = MagicMock(
+ side_effect=mag_test_class.return_count_table
+ )
with patch("q2_amr.card.reads.run_rgi_bwt", mock_run_rgi_bwt), patch(
"q2_amr.card.reads.load_preprocess_card_db", mock_run_rgi_load
), patch("q2_amr.card.reads.read_in_txt", mock_read_in_txt), patch(
@@ -87,10 +78,6 @@ def return_count_table(df_list):
aligner="kma",
rev=None,
threads=1,
- include_baits=False,
- mapq=None,
- mapped=None,
- coverage=None,
),
call(
cwd=tmp_dir,
@@ -99,10 +86,6 @@ def return_count_table(df_list):
aligner="kma",
rev=None,
threads=1,
- include_baits=False,
- mapq=None,
- mapped=None,
- coverage=None,
),
]
else:
@@ -114,10 +97,6 @@ def return_count_table(df_list):
rev=f"{reads}/sample1_00_L001_R2_001.fastq.gz",
aligner="kma",
threads=1,
- include_baits=False,
- mapq=None,
- mapped=None,
- coverage=None,
),
call(
cwd=tmp_dir,
@@ -126,10 +105,6 @@ def return_count_table(df_list):
rev=f"{reads}/sample2_00_L001_R2_001.fastq.gz",
aligner="kma",
threads=1,
- include_baits=False,
- mapq=None,
- mapped=None,
- coverage=None,
),
]
exp_calls_mock_load = [
@@ -138,10 +113,26 @@ def return_count_table(df_list):
call(tmp_dir, ANY, "load_fasta"),
]
exp_calls_mock_read = [
- call(f"{tmp_dir}/sample1", "allele"),
- call(f"{tmp_dir}/sample1", "gene"),
- call(f"{tmp_dir}/sample2", "allele"),
- call(f"{tmp_dir}/sample2", "gene"),
+ call(
+ path=f"{tmp_dir}/sample1/output.allele_mapping_data.txt",
+ col_name="ARO Accession",
+ samp_bin_name="sample1",
+ ),
+ call(
+ path=f"{tmp_dir}/sample1/output.gene_mapping_data.txt",
+ col_name="ARO Accession",
+ samp_bin_name="sample1",
+ ),
+ call(
+ path=f"{tmp_dir}/sample2/output.allele_mapping_data.txt",
+ col_name="ARO Accession",
+ samp_bin_name="sample2",
+ ),
+ call(
+ path=f"{tmp_dir}/sample2/output.gene_mapping_data.txt",
+ col_name="ARO Accession",
+ samp_bin_name="sample2",
+ ),
]
exp_calls_mock_count = [call([ANY, ANY]), call([ANY, ANY])]
mock_run_rgi_bwt.assert_has_calls(exp_calls_mock_run)
@@ -172,10 +163,6 @@ def test_run_rgi_bwt(self):
"path_rev",
"bowtie2",
8,
- True,
- 3,
- 5,
- 3.2,
)
mock_run_command.assert_called_once_with(
[
@@ -193,26 +180,21 @@ def test_run_rgi_bwt(self):
"bowtie2",
"--read_two",
"path_rev",
- "--include_baits",
- "--mapq",
- "3",
- "--mapped",
- "5",
- "--coverage",
- "3.2",
],
"path_tmp",
verbose=True,
)
- @patch("q2_amr.card.reads.run_command")
- def test_exception_raised(self, mock_run_command):
- mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd")
+ def test_exception_raised(self):
expected_message = (
"An error was encountered while running rgi, "
"(return code 1), please inspect stdout and stderr to learn more."
)
- with self.assertRaises(Exception) as cm:
+
+ with patch(
+ "q2_amr.card.reads.run_command"
+ ) as mock_run_command, self.assertRaises(Exception) as cm:
+ mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd")
run_rgi_bwt(
cwd="path/cwd",
samp="sample1",
@@ -220,12 +202,8 @@ def test_exception_raised(self, mock_run_command):
rev="path/rev",
aligner="bwa",
threads=1,
- include_baits=True,
- mapq=0.3,
- mapped=0.3,
- coverage=0.3,
)
- self.assertEqual(str(cm.exception), expected_message)
+ self.assertEqual(str(cm.exception), expected_message)
def test_move_files_allele(self):
self.move_files_test_body("allele")
@@ -312,54 +290,3 @@ def mock_plot_sample_stats(sample_stats, output_dir):
self.assertTrue(os.path.exists(os.path.join(tmp, "sample_stats_plot.html")))
self.assertTrue(os.path.exists(os.path.join(tmp, "index.html")))
self.assertTrue(os.path.exists(os.path.join(tmp, "q2templateassets")))
-
- mapping_data_sample1 = pd.DataFrame(
- {
- "ARO Accession": [3000796, 3000815, 3000805, 3000026],
- "sample1": [1, 1, 1, 1],
- }
- )
-
- mapping_data_sample2 = pd.DataFrame(
- {
- "ARO Accession": [3000797, 3000815, 3000805, 3000026],
- "sample2": [1, 1, 1, 2],
- }
- )
-
- table = pd.DataFrame(
- {
- "sample_id": ["sample1", "sample2"],
- 3000796: [1, 0],
- 3000815: [1, 1],
- 3000805: [1, 1],
- 3000026: [1, 2],
- 3000797: [0, 1],
- }
- )
-
- def test_read_in_txt_allele(self):
- self.read_in_txt_test_body("allele", self.mapping_data_sample1)
-
- def test_read_in_txt_gene(self):
- self.read_in_txt_test_body("gene", self.mapping_data_sample1)
-
- def read_in_txt_test_body(self, map_type, mapping_data):
- mapping_file = self.get_data_path(f"output.{map_type}_mapping_data.txt")
- exp = mapping_data
- with tempfile.TemporaryDirectory() as tmp:
- samp_dir = os.path.join(tmp, "sample1")
- os.mkdir(samp_dir)
- shutil.copy(mapping_file, samp_dir)
- obs = read_in_txt(samp_dir, map_type)
- obs["ARO Accession"] = obs["ARO Accession"].astype(int)
- pd.testing.assert_frame_equal(exp, obs)
-
- def test_create_count_table(self):
- df_list = [self.mapping_data_sample1, self.mapping_data_sample2]
- obs = create_count_table(df_list)
- exp = self.table
- exp.set_index("sample_id", inplace=True)
- exp = exp.astype(float)
- exp.columns = exp.columns.astype(float)
- pd.testing.assert_frame_equal(exp, obs)
diff --git a/q2_amr/tests/card/test_utils.py b/q2_amr/tests/card/test_utils.py
index 4fc49a8..10d3bc4 100644
--- a/q2_amr/tests/card/test_utils.py
+++ b/q2_amr/tests/card/test_utils.py
@@ -1,15 +1,38 @@
import subprocess
from unittest.mock import patch
+import pandas as pd
from qiime2.plugin.testing import TestPluginBase
+from test_mags import TestAnnotateMagsCard
-from q2_amr.card.utils import load_preprocess_card_db
+from q2_amr.card.utils import create_count_table, load_preprocess_card_db, read_in_txt
from q2_amr.types import CARDDatabaseFormat
class TestAnnotateReadsCARD(TestPluginBase):
package = "q2_amr.tests"
+ mapping_data_sample1 = pd.DataFrame(
+ {
+ "ARO Accession": [3000796, 3000815, 3000805, 3000026],
+ "sample1": [1, 1, 1, 1],
+ }
+ )
+
+ mags_mapping_data_sample1 = pd.DataFrame(
+ {
+ "ARO": [3000796, 3000815, 3000805, 3000026],
+ "sample1": [1, 1, 1, 1],
+ }
+ )
+
+ mapping_data_sample2 = pd.DataFrame(
+ {
+ "ARO Accession": [3000797, 3000815, 3000805, 3000026],
+ "sample2": [1, 1, 1, 2],
+ }
+ )
+
def test_load_card_db(self):
card_db = CARDDatabaseFormat()
with patch("q2_amr.card.utils.run_command") as mock_run_command:
@@ -46,9 +69,7 @@ def test_load_card_db_fasta(self):
verbose=True,
)
- @patch("q2_amr.card.utils.run_command")
- def test_exception_raised(self, mock_run_command):
- mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd")
+ def test_exception_raised(self):
tmp = "path/to/tmp"
card_db = "path/to/card_db.json"
expected_message = (
@@ -56,6 +77,45 @@ def test_exception_raised(self, mock_run_command):
"(return code 1), please inspect stdout and stderr to learn more."
)
operation = "load"
- with self.assertRaises(Exception) as cm:
+ with patch(
+ "q2_amr.card.utils.run_command"
+ ) as mock_run_command, self.assertRaises(Exception) as cm:
+ mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd")
load_preprocess_card_db(tmp, card_db, operation)
- self.assertEqual(str(cm.exception), expected_message)
+ self.assertEqual(str(cm.exception), expected_message)
+
+ def test_read_in_txt_mags(self):
+ path = self.get_data_path("output.mags.txt")
+ self.read_in_txt_test_body(
+ path, "ARO", "sample1", self.mags_mapping_data_sample1
+ )
+
+ def test_read_in_txt_allele(self):
+ path = self.get_data_path("output.allele_mapping_data.txt")
+ self.read_in_txt_test_body(
+ path, "ARO Accession", "sample1", self.mapping_data_sample1
+ )
+
+ def test_read_in_txt_gene(self):
+ path = self.get_data_path("output.gene_mapping_data.txt")
+ self.read_in_txt_test_body(
+ path, "ARO Accession", "sample1", self.mapping_data_sample1
+ )
+
+ def read_in_txt_test_body(self, path, col_name, samp_bin_name, mapping_data):
+ exp = mapping_data
+ obs = read_in_txt(path, col_name, samp_bin_name)
+ obs[col_name] = obs[col_name].astype(int)
+ pd.testing.assert_frame_equal(exp, obs)
+
+ def test_create_count_table(self):
+ df_list = [self.mapping_data_sample1, self.mapping_data_sample2]
+ obs = create_count_table(df_list)
+ mag_test_class = TestAnnotateMagsCard()
+ exp = mag_test_class.table
+ exp.set_index("sample_id", inplace=True)
+ exp = exp.astype(float)
+ exp.columns = exp.columns.astype(float)
+ pd.testing.assert_frame_equal(exp, obs)
+ df_list_empty = []
+ self.assertRaises(ValueError, create_count_table, df_list_empty)
diff --git a/q2_amr/tests/data/output.mags.txt b/q2_amr/tests/data/output.mags.txt
new file mode 100644
index 0000000..7d9fc51
--- /dev/null
+++ b/q2_amr/tests/data/output.mags.txt
@@ -0,0 +1,5 @@
+ORF_ID Contig Start Stop Orientation Cut_Off Pass_Bitscore Best_Hit_Bitscore Best_Hit_ARO Best_Identities ARO Model_type SNPs_in_Best_Hit_ARO Other_SNPs Drug Class Resistance Mechanism AMR Gene Family Predicted_DNA Predicted_Protein CARD_Protein_Sequence Percentage Length of Reference Sequence ID Model_ID Nudged Note
+NC_000962.3_689 # 759789 # 763325 # 1 # ID=1_689;partial=00;start_type=GTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.643 NC_000962.3_689 759789 763325 + Strict 2300 2394.77 Mycobacterium tuberculosis rpoB mutants conferring resistance to rifampicin 99.91 3000796 protein variant model D516G, H526T, L511R n/a rifamycin antibiotic antibiotic target alteration; antibiotic target replacement rifamycin-resistant beta-subunit of RNA polymerase (rpoB) GTGCTGGAAGGATGCATCTTGGCAGATTCCCGCCAGAGCAAAACAGCCGCTAGTCCTAGTCCGAGTCGCCCGCAAAGTTCCTCGAATAACTCCGTACCCGGAGCGCCAAACCGGGTCTCCTTCGCTAAGCTGCGCGAACCACTTGAGGTTCCGGGACTCCTTGACGTCCAGACCGATTCGTTCGAGTGGCTGATCGGTTCGCCGCGCTGGCGCGAATCCGCCGCCGAGCGGGGTGATGTCAACCCAGTGGGTGGCCTGGAAGAGGTGCTCTACGAGCTGTCTCCGATCGAGGACTTCTCCGGGTCGATGTCGTTGTCGTTCTCTGACCCTCGTTTCGACGATGTCAAGGCACCCGTCGACGAGTGCAAAGACAAGGACATGACGTACGCGGCTCCACTGTTCGTCACCGCCGAGTTCATCAACAACAACACCGGTGAGATCAAGAGTCAGACGGTGTTCATGGGTGACTTCCCGATGATGACCGAGAAGGGCACGTTCATCATCAACGGGACCGAGCGTGTGGTGGTCAGCCAGCTGGTGCGGTCGCCCGGGGTGTACTTCGACGAGACCATTGACAAGTCCACCGACAAGACGCTGCACAGCGTCAAGGTGATCCCGAGCCGCGGCGCGTGGCTCGAGTTTGACGTCGACAAGCGCGACACCGTCGGCGTGCGCATCGACCGCAAACGCCGGCAACCGGTCACCGTGCTGCTCAAGGCGCTGGGCTGGACCAGCGAGCAGATTGTCGAGCGGTTCGGGTTCTCCGAGATCATGCGATCGACGCTGGAGAAGGACAACACCGTCGGCACCGACGAGGCGCTGTTGGACATCTACCGCAAGCTGCGTCCGGGCGAGCCCCCGACCAAAGAGTCAGCGCAGACGCTGTTGGAAAACTTGTTCTTCAAGGAGAAGCGCTACGACCTGGCCCGCGTCGGTCGCTATAAGGTCAACAAGAAGCTCGGGCTGCATGTCGGCGAGCCCATCACGTCGTCGACGCTGACCGAAGAAGACGTCGTGGCCACCATCGAATATCTGGTCCGCTTGCACGAGGGTCAGACCACGATGACCGTTCCGGGCGGCGTCGAGGTGCCGGTGGAAACCGACGACATCGACCACTTCGGCAACCGCCGCCTGCGTACGGTCGGCGAGCTGATCCAAAACCAGATCCGGGTCGGCATGTCGCGGATGGAGCGGGTGGTCCGGGAGCGGATGACCACCCAGGACGTGGAGGCGATCACACCGCAGACGTTGATCAACATCCGGCCGGTGGTCGCCGCGATCAAGGAGTTCTTCGGCACCAGCCAGCTGAGCCAATTCATGGACCAGAACAACCCGCTGTCGGGGTTGACCCACAAGCGCCGACTGTCGGCGCTGGGGCCCGGCGGTCTGTCACGTGAGCGTGCCGGGCTGGAGGTCCGCGACGTGCACCCGTCGCACTACGGCCGGATGTGCCCGATCGAAACCCCTGAGGGGCCCAACATCGGTCTGATCGGCTCGCTGTCGGTGTACGCGCGGGTCAACCCGTTCGGGTTCATCGAAACGCCGTACCGCAAGGTGGTCGACGGCGTGGTTAGCGACGAGATCGTGTACCTGACCGCCGACGAGGAGGACCGCCACGTGGTGGCACAGGCCAATTCGCCGATCGATGCGGACGGTCGCTTCGTCGAGCCGCGCGTGCTGGTCCGCCGCAAGGCGGGCGAGGTGGAGTACGTGCCCTCGTCTGAGGTGGACTACATGGACGTCTCGCCCCGCCAGATGGTGTCGGTGGCCACCGCGATGATTCCCTTCCTGGAGCACGACGACGCCAACCGTGCCCTCATGGGGGCAAACATGCAGCGCCAGGCGGTGCCGCTGGTCCGTAGCGAGGCCCCGCTGGTGGGCACCGGGATGGAGCTGCGCGCGGCGATCGACGCCGGCGACGTCGTCGTCGCCGAAGAAAGCGGCGTCATCGAGGAGGTGTCGGCCGACTACATCACTGTGATGCACGACAACGGCACCCGGCGTACCTACCGGATGCGCAAGTTTGCCCGGTCCAACCACGGCACTTGCGCCAACCAGTGCCCCATCGTGGACGCGGGCGACCGAGTCGAGGCCGGTCAGGTGATCGCCGACGGTCCCTGTACTGACGACGGCGAGATGGCGCTGGGCAAGAACCTGCTGGTGGCCATCATGCCGTGGGAGGGCCACAACTACGAGGACGCGATCATCCTGTCCAACCGCCTGGTCGAAGAGGACGTGCTCACCTCGATCCACATCGAGGAGCATGAGATCGATGCTCGCGACACCAAGCTGGGTGCGGAGGAGATCACCCGCGACATCCCGAACATCTCCGACGAGGTGCTCGCCGACCTGGATGAGCGGGGCATCGTGCGCATCGGTGCCGAGGTTCGCGACGGGGACATCCTGGTCGGCAAGGTCACCCCGAAGGGTGAGACCGAGCTGACGCCGGAGGAGCGGCTGCTGCGTGCCATCTTCGGTGAGAAGGCCCGCGAGGTGCGCGACACTTCGCTGAAGGTGCCGCACGGCGAATCCGGCAAGGTGATCGGCATTCGGGTGTTTTCCCGCGAGGACGAGGACGAGTTGCCGGCCGGTGTCAACGAGCTGGTGCGTGTGTATGTGGCTCAGAAACGCAAGATCTCCGACGGTGACAAGCTGGCCGGCCGGCACGGCAACAAGGGCGTGATCGGCAAGATCCTGCCGGTTGAGGACATGCCGTTCCTTGCCGACGGCACCCCGGTGGACATTATTTTGAACACCCACGGCGTGCCGCGACGGATGAACATCGGCCAGATTTTGGAGACCCACCTGGGTTGGTGTGCCCACAGCGGCTGGAAGGTCGACGCCGCCAAGGGGGTTCCGGACTGGGCCGCCAGGCTGCCCGACGAACTGCTCGAGGCGCAGCCGAACGCCATTGTGTCGACGCCGGTGTTCGACGGCGCCCAGGAGGCCGAGCTGCAGGGCCTGTTGTCGTGCACGCTGCCCAACCGCGACGGTGACGTGCTGGTCGACGCCGACGGCAAGGCCATGCTCTTCGACGGGCGCAGCGGCGAGCCGTTCCCGTACCCGGTCACGGTTGGCTACATGTACATCATGAAGCTGCACCACCTGGTGGACGACAAGATCCACGCCCGCTCCACCGGGCCGTACTCGATGATCACCCAGCAGCCGCTGGGCGGTAAGGCGCAGTTCGGTGGCCAGCGGTTCGGGGAGATGGAGTGCTGGGCCATGCAGGCCTACGGTGCTGCCTACACCCTGCAGGAGCTGTTGACCATCAAGTCCGATGACACCGTCGGCCGCGTCAAGGTGTACGAGGCGATCGTCAAGGGTGAGAACATCCCGGAGCCGGGCATCCCCGAGTCGTTCAAGGTGCTGCTCAAAGAACTGCAGTCGCTGTGCCTCAACGTCGAGGTGCTATCGAGTGACGGTGCGGCGATCGAACTGCGCGAAGGTGAGGACGAGGACCTGGAGCGGGCCGCGGCCAACCTGGGAATCAATCTGTCCCGCAACGAATCCGCAAGTGTCGAGGATCTTGCGTAA MLEGCILADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA MADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA 100.51 gnl|BL_ORD_ID|2005|hsp_num:0 1237
+NC_000962.3_689 # 759789 # 763325 # 1 # ID=1_689;partial=00;start_type=GTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.643 NC_000962.3_689 759789 763325 + Strict 2300 2394.77 Mycobacterium tuberculosis rpoB mutants conferring resistance to rifampicin 99.91 3000815 protein variant model D516G, H526T, L511R n/a rifamycin antibiotic antibiotic target alteration; antibiotic target replacement rifamycin-resistant beta-subunit of RNA polymerase (rpoB) GTGCTGGAAGGATGCATCTTGGCAGATTCCCGCCAGAGCAAAACAGCCGCTAGTCCTAGTCCGAGTCGCCCGCAAAGTTCCTCGAATAACTCCGTACCCGGAGCGCCAAACCGGGTCTCCTTCGCTAAGCTGCGCGAACCACTTGAGGTTCCGGGACTCCTTGACGTCCAGACCGATTCGTTCGAGTGGCTGATCGGTTCGCCGCGCTGGCGCGAATCCGCCGCCGAGCGGGGTGATGTCAACCCAGTGGGTGGCCTGGAAGAGGTGCTCTACGAGCTGTCTCCGATCGAGGACTTCTCCGGGTCGATGTCGTTGTCGTTCTCTGACCCTCGTTTCGACGATGTCAAGGCACCCGTCGACGAGTGCAAAGACAAGGACATGACGTACGCGGCTCCACTGTTCGTCACCGCCGAGTTCATCAACAACAACACCGGTGAGATCAAGAGTCAGACGGTGTTCATGGGTGACTTCCCGATGATGACCGAGAAGGGCACGTTCATCATCAACGGGACCGAGCGTGTGGTGGTCAGCCAGCTGGTGCGGTCGCCCGGGGTGTACTTCGACGAGACCATTGACAAGTCCACCGACAAGACGCTGCACAGCGTCAAGGTGATCCCGAGCCGCGGCGCGTGGCTCGAGTTTGACGTCGACAAGCGCGACACCGTCGGCGTGCGCATCGACCGCAAACGCCGGCAACCGGTCACCGTGCTGCTCAAGGCGCTGGGCTGGACCAGCGAGCAGATTGTCGAGCGGTTCGGGTTCTCCGAGATCATGCGATCGACGCTGGAGAAGGACAACACCGTCGGCACCGACGAGGCGCTGTTGGACATCTACCGCAAGCTGCGTCCGGGCGAGCCCCCGACCAAAGAGTCAGCGCAGACGCTGTTGGAAAACTTGTTCTTCAAGGAGAAGCGCTACGACCTGGCCCGCGTCGGTCGCTATAAGGTCAACAAGAAGCTCGGGCTGCATGTCGGCGAGCCCATCACGTCGTCGACGCTGACCGAAGAAGACGTCGTGGCCACCATCGAATATCTGGTCCGCTTGCACGAGGGTCAGACCACGATGACCGTTCCGGGCGGCGTCGAGGTGCCGGTGGAAACCGACGACATCGACCACTTCGGCAACCGCCGCCTGCGTACGGTCGGCGAGCTGATCCAAAACCAGATCCGGGTCGGCATGTCGCGGATGGAGCGGGTGGTCCGGGAGCGGATGACCACCCAGGACGTGGAGGCGATCACACCGCAGACGTTGATCAACATCCGGCCGGTGGTCGCCGCGATCAAGGAGTTCTTCGGCACCAGCCAGCTGAGCCAATTCATGGACCAGAACAACCCGCTGTCGGGGTTGACCCACAAGCGCCGACTGTCGGCGCTGGGGCCCGGCGGTCTGTCACGTGAGCGTGCCGGGCTGGAGGTCCGCGACGTGCACCCGTCGCACTACGGCCGGATGTGCCCGATCGAAACCCCTGAGGGGCCCAACATCGGTCTGATCGGCTCGCTGTCGGTGTACGCGCGGGTCAACCCGTTCGGGTTCATCGAAACGCCGTACCGCAAGGTGGTCGACGGCGTGGTTAGCGACGAGATCGTGTACCTGACCGCCGACGAGGAGGACCGCCACGTGGTGGCACAGGCCAATTCGCCGATCGATGCGGACGGTCGCTTCGTCGAGCCGCGCGTGCTGGTCCGCCGCAAGGCGGGCGAGGTGGAGTACGTGCCCTCGTCTGAGGTGGACTACATGGACGTCTCGCCCCGCCAGATGGTGTCGGTGGCCACCGCGATGATTCCCTTCCTGGAGCACGACGACGCCAACCGTGCCCTCATGGGGGCAAACATGCAGCGCCAGGCGGTGCCGCTGGTCCGTAGCGAGGCCCCGCTGGTGGGCACCGGGATGGAGCTGCGCGCGGCGATCGACGCCGGCGACGTCGTCGTCGCCGAAGAAAGCGGCGTCATCGAGGAGGTGTCGGCCGACTACATCACTGTGATGCACGACAACGGCACCCGGCGTACCTACCGGATGCGCAAGTTTGCCCGGTCCAACCACGGCACTTGCGCCAACCAGTGCCCCATCGTGGACGCGGGCGACCGAGTCGAGGCCGGTCAGGTGATCGCCGACGGTCCCTGTACTGACGACGGCGAGATGGCGCTGGGCAAGAACCTGCTGGTGGCCATCATGCCGTGGGAGGGCCACAACTACGAGGACGCGATCATCCTGTCCAACCGCCTGGTCGAAGAGGACGTGCTCACCTCGATCCACATCGAGGAGCATGAGATCGATGCTCGCGACACCAAGCTGGGTGCGGAGGAGATCACCCGCGACATCCCGAACATCTCCGACGAGGTGCTCGCCGACCTGGATGAGCGGGGCATCGTGCGCATCGGTGCCGAGGTTCGCGACGGGGACATCCTGGTCGGCAAGGTCACCCCGAAGGGTGAGACCGAGCTGACGCCGGAGGAGCGGCTGCTGCGTGCCATCTTCGGTGAGAAGGCCCGCGAGGTGCGCGACACTTCGCTGAAGGTGCCGCACGGCGAATCCGGCAAGGTGATCGGCATTCGGGTGTTTTCCCGCGAGGACGAGGACGAGTTGCCGGCCGGTGTCAACGAGCTGGTGCGTGTGTATGTGGCTCAGAAACGCAAGATCTCCGACGGTGACAAGCTGGCCGGCCGGCACGGCAACAAGGGCGTGATCGGCAAGATCCTGCCGGTTGAGGACATGCCGTTCCTTGCCGACGGCACCCCGGTGGACATTATTTTGAACACCCACGGCGTGCCGCGACGGATGAACATCGGCCAGATTTTGGAGACCCACCTGGGTTGGTGTGCCCACAGCGGCTGGAAGGTCGACGCCGCCAAGGGGGTTCCGGACTGGGCCGCCAGGCTGCCCGACGAACTGCTCGAGGCGCAGCCGAACGCCATTGTGTCGACGCCGGTGTTCGACGGCGCCCAGGAGGCCGAGCTGCAGGGCCTGTTGTCGTGCACGCTGCCCAACCGCGACGGTGACGTGCTGGTCGACGCCGACGGCAAGGCCATGCTCTTCGACGGGCGCAGCGGCGAGCCGTTCCCGTACCCGGTCACGGTTGGCTACATGTACATCATGAAGCTGCACCACCTGGTGGACGACAAGATCCACGCCCGCTCCACCGGGCCGTACTCGATGATCACCCAGCAGCCGCTGGGCGGTAAGGCGCAGTTCGGTGGCCAGCGGTTCGGGGAGATGGAGTGCTGGGCCATGCAGGCCTACGGTGCTGCCTACACCCTGCAGGAGCTGTTGACCATCAAGTCCGATGACACCGTCGGCCGCGTCAAGGTGTACGAGGCGATCGTCAAGGGTGAGAACATCCCGGAGCCGGGCATCCCCGAGTCGTTCAAGGTGCTGCTCAAAGAACTGCAGTCGCTGTGCCTCAACGTCGAGGTGCTATCGAGTGACGGTGCGGCGATCGAACTGCGCGAAGGTGAGGACGAGGACCTGGAGCGGGCCGCGGCCAACCTGGGAATCAATCTGTCCCGCAACGAATCCGCAAGTGTCGAGGATCTTGCGTAA MLEGCILADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA MADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA 100.51 gnl|BL_ORD_ID|2005|hsp_num:0 1237
+NC_000962.3_689 # 759789 # 763325 # 1 # ID=1_689;partial=00;start_type=GTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.643 NC_000962.3_689 759789 763325 + Strict 2300 2394.77 Mycobacterium tuberculosis rpoB mutants conferring resistance to rifampicin 99.91 3000805 protein variant model D516G, H526T, L511R n/a rifamycin antibiotic antibiotic target alteration; antibiotic target replacement rifamycin-resistant beta-subunit of RNA polymerase (rpoB) GTGCTGGAAGGATGCATCTTGGCAGATTCCCGCCAGAGCAAAACAGCCGCTAGTCCTAGTCCGAGTCGCCCGCAAAGTTCCTCGAATAACTCCGTACCCGGAGCGCCAAACCGGGTCTCCTTCGCTAAGCTGCGCGAACCACTTGAGGTTCCGGGACTCCTTGACGTCCAGACCGATTCGTTCGAGTGGCTGATCGGTTCGCCGCGCTGGCGCGAATCCGCCGCCGAGCGGGGTGATGTCAACCCAGTGGGTGGCCTGGAAGAGGTGCTCTACGAGCTGTCTCCGATCGAGGACTTCTCCGGGTCGATGTCGTTGTCGTTCTCTGACCCTCGTTTCGACGATGTCAAGGCACCCGTCGACGAGTGCAAAGACAAGGACATGACGTACGCGGCTCCACTGTTCGTCACCGCCGAGTTCATCAACAACAACACCGGTGAGATCAAGAGTCAGACGGTGTTCATGGGTGACTTCCCGATGATGACCGAGAAGGGCACGTTCATCATCAACGGGACCGAGCGTGTGGTGGTCAGCCAGCTGGTGCGGTCGCCCGGGGTGTACTTCGACGAGACCATTGACAAGTCCACCGACAAGACGCTGCACAGCGTCAAGGTGATCCCGAGCCGCGGCGCGTGGCTCGAGTTTGACGTCGACAAGCGCGACACCGTCGGCGTGCGCATCGACCGCAAACGCCGGCAACCGGTCACCGTGCTGCTCAAGGCGCTGGGCTGGACCAGCGAGCAGATTGTCGAGCGGTTCGGGTTCTCCGAGATCATGCGATCGACGCTGGAGAAGGACAACACCGTCGGCACCGACGAGGCGCTGTTGGACATCTACCGCAAGCTGCGTCCGGGCGAGCCCCCGACCAAAGAGTCAGCGCAGACGCTGTTGGAAAACTTGTTCTTCAAGGAGAAGCGCTACGACCTGGCCCGCGTCGGTCGCTATAAGGTCAACAAGAAGCTCGGGCTGCATGTCGGCGAGCCCATCACGTCGTCGACGCTGACCGAAGAAGACGTCGTGGCCACCATCGAATATCTGGTCCGCTTGCACGAGGGTCAGACCACGATGACCGTTCCGGGCGGCGTCGAGGTGCCGGTGGAAACCGACGACATCGACCACTTCGGCAACCGCCGCCTGCGTACGGTCGGCGAGCTGATCCAAAACCAGATCCGGGTCGGCATGTCGCGGATGGAGCGGGTGGTCCGGGAGCGGATGACCACCCAGGACGTGGAGGCGATCACACCGCAGACGTTGATCAACATCCGGCCGGTGGTCGCCGCGATCAAGGAGTTCTTCGGCACCAGCCAGCTGAGCCAATTCATGGACCAGAACAACCCGCTGTCGGGGTTGACCCACAAGCGCCGACTGTCGGCGCTGGGGCCCGGCGGTCTGTCACGTGAGCGTGCCGGGCTGGAGGTCCGCGACGTGCACCCGTCGCACTACGGCCGGATGTGCCCGATCGAAACCCCTGAGGGGCCCAACATCGGTCTGATCGGCTCGCTGTCGGTGTACGCGCGGGTCAACCCGTTCGGGTTCATCGAAACGCCGTACCGCAAGGTGGTCGACGGCGTGGTTAGCGACGAGATCGTGTACCTGACCGCCGACGAGGAGGACCGCCACGTGGTGGCACAGGCCAATTCGCCGATCGATGCGGACGGTCGCTTCGTCGAGCCGCGCGTGCTGGTCCGCCGCAAGGCGGGCGAGGTGGAGTACGTGCCCTCGTCTGAGGTGGACTACATGGACGTCTCGCCCCGCCAGATGGTGTCGGTGGCCACCGCGATGATTCCCTTCCTGGAGCACGACGACGCCAACCGTGCCCTCATGGGGGCAAACATGCAGCGCCAGGCGGTGCCGCTGGTCCGTAGCGAGGCCCCGCTGGTGGGCACCGGGATGGAGCTGCGCGCGGCGATCGACGCCGGCGACGTCGTCGTCGCCGAAGAAAGCGGCGTCATCGAGGAGGTGTCGGCCGACTACATCACTGTGATGCACGACAACGGCACCCGGCGTACCTACCGGATGCGCAAGTTTGCCCGGTCCAACCACGGCACTTGCGCCAACCAGTGCCCCATCGTGGACGCGGGCGACCGAGTCGAGGCCGGTCAGGTGATCGCCGACGGTCCCTGTACTGACGACGGCGAGATGGCGCTGGGCAAGAACCTGCTGGTGGCCATCATGCCGTGGGAGGGCCACAACTACGAGGACGCGATCATCCTGTCCAACCGCCTGGTCGAAGAGGACGTGCTCACCTCGATCCACATCGAGGAGCATGAGATCGATGCTCGCGACACCAAGCTGGGTGCGGAGGAGATCACCCGCGACATCCCGAACATCTCCGACGAGGTGCTCGCCGACCTGGATGAGCGGGGCATCGTGCGCATCGGTGCCGAGGTTCGCGACGGGGACATCCTGGTCGGCAAGGTCACCCCGAAGGGTGAGACCGAGCTGACGCCGGAGGAGCGGCTGCTGCGTGCCATCTTCGGTGAGAAGGCCCGCGAGGTGCGCGACACTTCGCTGAAGGTGCCGCACGGCGAATCCGGCAAGGTGATCGGCATTCGGGTGTTTTCCCGCGAGGACGAGGACGAGTTGCCGGCCGGTGTCAACGAGCTGGTGCGTGTGTATGTGGCTCAGAAACGCAAGATCTCCGACGGTGACAAGCTGGCCGGCCGGCACGGCAACAAGGGCGTGATCGGCAAGATCCTGCCGGTTGAGGACATGCCGTTCCTTGCCGACGGCACCCCGGTGGACATTATTTTGAACACCCACGGCGTGCCGCGACGGATGAACATCGGCCAGATTTTGGAGACCCACCTGGGTTGGTGTGCCCACAGCGGCTGGAAGGTCGACGCCGCCAAGGGGGTTCCGGACTGGGCCGCCAGGCTGCCCGACGAACTGCTCGAGGCGCAGCCGAACGCCATTGTGTCGACGCCGGTGTTCGACGGCGCCCAGGAGGCCGAGCTGCAGGGCCTGTTGTCGTGCACGCTGCCCAACCGCGACGGTGACGTGCTGGTCGACGCCGACGGCAAGGCCATGCTCTTCGACGGGCGCAGCGGCGAGCCGTTCCCGTACCCGGTCACGGTTGGCTACATGTACATCATGAAGCTGCACCACCTGGTGGACGACAAGATCCACGCCCGCTCCACCGGGCCGTACTCGATGATCACCCAGCAGCCGCTGGGCGGTAAGGCGCAGTTCGGTGGCCAGCGGTTCGGGGAGATGGAGTGCTGGGCCATGCAGGCCTACGGTGCTGCCTACACCCTGCAGGAGCTGTTGACCATCAAGTCCGATGACACCGTCGGCCGCGTCAAGGTGTACGAGGCGATCGTCAAGGGTGAGAACATCCCGGAGCCGGGCATCCCCGAGTCGTTCAAGGTGCTGCTCAAAGAACTGCAGTCGCTGTGCCTCAACGTCGAGGTGCTATCGAGTGACGGTGCGGCGATCGAACTGCGCGAAGGTGAGGACGAGGACCTGGAGCGGGCCGCGGCCAACCTGGGAATCAATCTGTCCCGCAACGAATCCGCAAGTGTCGAGGATCTTGCGTAA MLEGCILADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA MADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA 100.51 gnl|BL_ORD_ID|2005|hsp_num:0 1237
+NC_000962.3_689 # 759789 # 763325 # 1 # ID=1_689;partial=00;start_type=GTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.643 NC_000962.3_689 759789 763325 + Strict 2300 2394.77 Mycobacterium tuberculosis rpoB mutants conferring resistance to rifampicin 99.91 3000026 protein variant model D516G, H526T, L511R n/a rifamycin antibiotic antibiotic target alteration; antibiotic target replacement rifamycin-resistant beta-subunit of RNA polymerase (rpoB) GTGCTGGAAGGATGCATCTTGGCAGATTCCCGCCAGAGCAAAACAGCCGCTAGTCCTAGTCCGAGTCGCCCGCAAAGTTCCTCGAATAACTCCGTACCCGGAGCGCCAAACCGGGTCTCCTTCGCTAAGCTGCGCGAACCACTTGAGGTTCCGGGACTCCTTGACGTCCAGACCGATTCGTTCGAGTGGCTGATCGGTTCGCCGCGCTGGCGCGAATCCGCCGCCGAGCGGGGTGATGTCAACCCAGTGGGTGGCCTGGAAGAGGTGCTCTACGAGCTGTCTCCGATCGAGGACTTCTCCGGGTCGATGTCGTTGTCGTTCTCTGACCCTCGTTTCGACGATGTCAAGGCACCCGTCGACGAGTGCAAAGACAAGGACATGACGTACGCGGCTCCACTGTTCGTCACCGCCGAGTTCATCAACAACAACACCGGTGAGATCAAGAGTCAGACGGTGTTCATGGGTGACTTCCCGATGATGACCGAGAAGGGCACGTTCATCATCAACGGGACCGAGCGTGTGGTGGTCAGCCAGCTGGTGCGGTCGCCCGGGGTGTACTTCGACGAGACCATTGACAAGTCCACCGACAAGACGCTGCACAGCGTCAAGGTGATCCCGAGCCGCGGCGCGTGGCTCGAGTTTGACGTCGACAAGCGCGACACCGTCGGCGTGCGCATCGACCGCAAACGCCGGCAACCGGTCACCGTGCTGCTCAAGGCGCTGGGCTGGACCAGCGAGCAGATTGTCGAGCGGTTCGGGTTCTCCGAGATCATGCGATCGACGCTGGAGAAGGACAACACCGTCGGCACCGACGAGGCGCTGTTGGACATCTACCGCAAGCTGCGTCCGGGCGAGCCCCCGACCAAAGAGTCAGCGCAGACGCTGTTGGAAAACTTGTTCTTCAAGGAGAAGCGCTACGACCTGGCCCGCGTCGGTCGCTATAAGGTCAACAAGAAGCTCGGGCTGCATGTCGGCGAGCCCATCACGTCGTCGACGCTGACCGAAGAAGACGTCGTGGCCACCATCGAATATCTGGTCCGCTTGCACGAGGGTCAGACCACGATGACCGTTCCGGGCGGCGTCGAGGTGCCGGTGGAAACCGACGACATCGACCACTTCGGCAACCGCCGCCTGCGTACGGTCGGCGAGCTGATCCAAAACCAGATCCGGGTCGGCATGTCGCGGATGGAGCGGGTGGTCCGGGAGCGGATGACCACCCAGGACGTGGAGGCGATCACACCGCAGACGTTGATCAACATCCGGCCGGTGGTCGCCGCGATCAAGGAGTTCTTCGGCACCAGCCAGCTGAGCCAATTCATGGACCAGAACAACCCGCTGTCGGGGTTGACCCACAAGCGCCGACTGTCGGCGCTGGGGCCCGGCGGTCTGTCACGTGAGCGTGCCGGGCTGGAGGTCCGCGACGTGCACCCGTCGCACTACGGCCGGATGTGCCCGATCGAAACCCCTGAGGGGCCCAACATCGGTCTGATCGGCTCGCTGTCGGTGTACGCGCGGGTCAACCCGTTCGGGTTCATCGAAACGCCGTACCGCAAGGTGGTCGACGGCGTGGTTAGCGACGAGATCGTGTACCTGACCGCCGACGAGGAGGACCGCCACGTGGTGGCACAGGCCAATTCGCCGATCGATGCGGACGGTCGCTTCGTCGAGCCGCGCGTGCTGGTCCGCCGCAAGGCGGGCGAGGTGGAGTACGTGCCCTCGTCTGAGGTGGACTACATGGACGTCTCGCCCCGCCAGATGGTGTCGGTGGCCACCGCGATGATTCCCTTCCTGGAGCACGACGACGCCAACCGTGCCCTCATGGGGGCAAACATGCAGCGCCAGGCGGTGCCGCTGGTCCGTAGCGAGGCCCCGCTGGTGGGCACCGGGATGGAGCTGCGCGCGGCGATCGACGCCGGCGACGTCGTCGTCGCCGAAGAAAGCGGCGTCATCGAGGAGGTGTCGGCCGACTACATCACTGTGATGCACGACAACGGCACCCGGCGTACCTACCGGATGCGCAAGTTTGCCCGGTCCAACCACGGCACTTGCGCCAACCAGTGCCCCATCGTGGACGCGGGCGACCGAGTCGAGGCCGGTCAGGTGATCGCCGACGGTCCCTGTACTGACGACGGCGAGATGGCGCTGGGCAAGAACCTGCTGGTGGCCATCATGCCGTGGGAGGGCCACAACTACGAGGACGCGATCATCCTGTCCAACCGCCTGGTCGAAGAGGACGTGCTCACCTCGATCCACATCGAGGAGCATGAGATCGATGCTCGCGACACCAAGCTGGGTGCGGAGGAGATCACCCGCGACATCCCGAACATCTCCGACGAGGTGCTCGCCGACCTGGATGAGCGGGGCATCGTGCGCATCGGTGCCGAGGTTCGCGACGGGGACATCCTGGTCGGCAAGGTCACCCCGAAGGGTGAGACCGAGCTGACGCCGGAGGAGCGGCTGCTGCGTGCCATCTTCGGTGAGAAGGCCCGCGAGGTGCGCGACACTTCGCTGAAGGTGCCGCACGGCGAATCCGGCAAGGTGATCGGCATTCGGGTGTTTTCCCGCGAGGACGAGGACGAGTTGCCGGCCGGTGTCAACGAGCTGGTGCGTGTGTATGTGGCTCAGAAACGCAAGATCTCCGACGGTGACAAGCTGGCCGGCCGGCACGGCAACAAGGGCGTGATCGGCAAGATCCTGCCGGTTGAGGACATGCCGTTCCTTGCCGACGGCACCCCGGTGGACATTATTTTGAACACCCACGGCGTGCCGCGACGGATGAACATCGGCCAGATTTTGGAGACCCACCTGGGTTGGTGTGCCCACAGCGGCTGGAAGGTCGACGCCGCCAAGGGGGTTCCGGACTGGGCCGCCAGGCTGCCCGACGAACTGCTCGAGGCGCAGCCGAACGCCATTGTGTCGACGCCGGTGTTCGACGGCGCCCAGGAGGCCGAGCTGCAGGGCCTGTTGTCGTGCACGCTGCCCAACCGCGACGGTGACGTGCTGGTCGACGCCGACGGCAAGGCCATGCTCTTCGACGGGCGCAGCGGCGAGCCGTTCCCGTACCCGGTCACGGTTGGCTACATGTACATCATGAAGCTGCACCACCTGGTGGACGACAAGATCCACGCCCGCTCCACCGGGCCGTACTCGATGATCACCCAGCAGCCGCTGGGCGGTAAGGCGCAGTTCGGTGGCCAGCGGTTCGGGGAGATGGAGTGCTGGGCCATGCAGGCCTACGGTGCTGCCTACACCCTGCAGGAGCTGTTGACCATCAAGTCCGATGACACCGTCGGCCGCGTCAAGGTGTACGAGGCGATCGTCAAGGGTGAGAACATCCCGGAGCCGGGCATCCCCGAGTCGTTCAAGGTGCTGCTCAAAGAACTGCAGTCGCTGTGCCTCAACGTCGAGGTGCTATCGAGTGACGGTGCGGCGATCGAACTGCGCGAAGGTGAGGACGAGGACCTGGAGCGGGCCGCGGCCAACCTGGGAATCAATCTGTCCCGCAACGAATCCGCAAGTGTCGAGGATCTTGCGTAA MLEGCILADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA MADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA 100.51 gnl|BL_ORD_ID|2005|hsp_num:0 1237
diff --git a/q2_amr/types/_transformer.py b/q2_amr/types/_transformer.py
index 163740a..0700715 100644
--- a/q2_amr/types/_transformer.py
+++ b/q2_amr/types/_transformer.py
@@ -175,8 +175,7 @@ def _11(data: CARDAnnotationDirectoryFormat) -> ProteinsDirectoryFormat:
return proteins_directory
-def create_dir_structure(data, seq_type, genes_protein_directory):
- annotation_dir = str(data)
+def create_dir_structure(annotation_dir, seq_type, genes_protein_directory):
for sample in os.listdir(annotation_dir):
for bin in os.listdir(os.path.join(annotation_dir, sample)):
for file in os.listdir(os.path.join(annotation_dir, sample, bin)):
@@ -233,9 +232,41 @@ def read_mapping_data(data_path, variant):
@plugin.register_transformer
def _12(data: CARDAlleleAnnotationDirectoryFormat) -> qiime2.Metadata:
- return read_mapping_data(data, "allele")
+ return tabulate_data(data, "allele")
@plugin.register_transformer
def _13(data: CARDGeneAnnotationDirectoryFormat) -> qiime2.Metadata:
- return read_mapping_data(data, "gene")
+ return tabulate_data(data, "gene")
+
+
+@plugin.register_transformer
+def _14(data: CARDAnnotationDirectoryFormat) -> qiime2.Metadata:
+ return tabulate_data(data, "mags")
+
+
+def tabulate_data(data_path, data_type):
+ df_list = []
+ for samp in os.listdir(str(data_path)):
+ if data_type == "mags":
+ for bin in os.listdir(os.path.join(str(data_path), samp)):
+ file_path = os.path.join(
+ str(data_path), samp, bin, "amr_annotation.txt"
+ )
+ df = pd.read_csv(file_path, sep="\t")
+ df.insert(0, "Sample Name", f"{samp}/{bin}")
+ df["Nudged"] = df["Nudged"].astype(str)
+ elif data_type == "gene" or "allele":
+ file_path = os.path.join(
+ str(data_path), samp, f"{data_type}_mapping_data.txt"
+ )
+ df = pd.read_csv(file_path, sep="\t")
+ df.insert(0, "Sample Name", samp)
+ df_list.append(df)
+ df_combined = pd.concat(df_list, axis=0)
+ df_combined.reset_index(inplace=True, drop=True)
+ df_combined.index.name = "id"
+ df_combined.index = df_combined.index.astype(str)
+ if data_type == "mags":
+ df_combined.rename(columns={"ID": "HSP_Identifier"}, inplace=True)
+ return qiime2.Metadata(df_combined)
diff --git a/q2_amr/types/_type.py b/q2_amr/types/_type.py
index 6e74527..c281c4e 100644
--- a/q2_amr/types/_type.py
+++ b/q2_amr/types/_type.py
@@ -10,5 +10,9 @@
CARDDatabase = SemanticType("CARDDatabase")
CARDAnnotation = SemanticType("CARDAnnotation", variant_of=SampleData.field["type"])
-CARDAlleleAnnotation = SemanticType("CARDAlleleAnnotation")
-CARDGeneAnnotation = SemanticType("CARDGeneAnnotation")
+CARDAlleleAnnotation = SemanticType(
+ "CARDAlleleAnnotation", variant_of=SampleData.field["type"]
+)
+CARDGeneAnnotation = SemanticType(
+ "CARDGeneAnnotation", variant_of=SampleData.field["type"]
+)
diff --git a/q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/sample1_bin1_amr_annotation.json b/q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/amr_annotation.json
similarity index 100%
rename from q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/sample1_bin1_amr_annotation.json
rename to q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/amr_annotation.json
diff --git a/q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/sample1_bin1_amr_annotation.txt b/q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/amr_annotation.txt
similarity index 100%
rename from q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/sample1_bin1_amr_annotation.txt
rename to q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/amr_annotation.txt
diff --git a/q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/sample2_bin1_amr_annotation.json b/q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/amr_annotation.json
similarity index 100%
rename from q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/sample2_bin1_amr_annotation.json
rename to q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/amr_annotation.json
diff --git a/q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/sample2_bin1_amr_annotation.txt b/q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/amr_annotation.txt
similarity index 96%
rename from q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/sample2_bin1_amr_annotation.txt
rename to q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/amr_annotation.txt
index 1f16da6..dbf3c40 100644
--- a/q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/sample2_bin1_amr_annotation.txt
+++ b/q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/amr_annotation.txt
@@ -1,3 +1,3 @@
ORF_ID Contig Start Stop Orientation Cut_Off Pass_Bitscore Best_Hit_Bitscore Best_Hit_ARO Best_Identities ARO Model_type SNPs_in_Best_Hit_ARO Other_SNPs Drug Class Resistance Mechanism AMR Gene Family Predicted_DNA Predicted_Protein CARD_Protein_Sequence Percentage Length of Reference Sequence ID Model_ID Nudged Note
-k141_1197_2 # 683 # 1345 # 1 # ID=49_2;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.588 k141_1197_2 683 1345 + Strict 200 326.635 vanX gene in vanO cluster 75.25 3002954 protein homolog model n/a n/a glycopeptide antibiotic antibiotic target alteration vanX; glycopeptide resistance gene cluster ATGAAGGGTGACTTCGTTTTCGTTGATGAGTGTGTTCCAGGAGTCCGCTGGGATGCCAAATACGCCACATCGGACAACTTCACCGGCAAACCGGTGGAGGGATATCTGGCCAACCGGATTGTCGGGACCAGGGCTTTGTGCTCAGCGCTGGAAAGCGTGCGGCAACGGGCTGCATCCCGCGGTTTCGGGTTGCTGCTGTGGGACGGCTACCGCCCGCAGCGCGCCGTGGATTCGTTCCTGCACTGGGCGAAACAACCAGAGGACGGCGCAACTAAACGCCGCCACTATCCAAATATTTCCCGAGCGGAAATGTTCGAAAGAGGATACGTAGCCTCCAAGTCCGGCCACAGCCGGGGCAGCACCGTCGATTTGACCCTGTATGACCTGGTTACCGGTGACCTCGTTCCCATGGGCGGCGGCCACGACTTGATGGATGAAATTTCGCATCACGGAGCGCCCGGCATCACCCGGGCCGAGACCGGCAACCGCCACACGCTGCGTTCGCTCATGGAGGCCTGCGGTTTCAGTTCCTACGATTCTGAGTGGTGGCATTACACCCTGAAGAACGAACCCTATCCGGACACTTATTTCGATTTTCCCGTTACGGATCCGCTTCCATCAGACGCCGCAACGGCCAGGGACCTTGTCTTCCAGAATGCATAG MKGDFVFVDECVPGVRWDAKYATSDNFTGKPVEGYLANRIVGTRALCSALESVRQRAASRGFGLLLWDGYRPQRAVDSFLHWAKQPEDGATKRRHYPNISRAEMFERGYVASKSGHSRGSTVDLTLYDLVTGDLVPMGGGHDLMDEISHHGAPGITRAETGNRHTLRSLMEACGFSSYDSEWWHYTLKNEPYPDTYFDFPVTDPLPSDAATARDLVFQNA MNDDFVYVDDWVPGVRWDAKYATWDNFTGKPVDGYLANRIVGTRALCAALEQAREKAASLGFGLLLWDGYRPRRAVDSFLRWSEQPEDGQTKQRHYPNIDRPEMLEKGYVATQSGHSRGGAVDLTLYHLATGELAPMGGDHDLMDPISHHRARGIKPIESKNRELLRSIMEDCGFDRYDCEWWHYTLKREPYPDVYFDFPIT 108.91 gnl|BL_ORD_ID|1672|hsp_num:0 1699
-k141_10683_1 # 1 # 453 # 1 # ID=423_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.658 k141_10683_1 1 453 + Strict 50 90.8929 vanY gene in vanM cluster 38.62 3002961 protein homolog model n/a n/a glycopeptide antibiotic antibiotic target alteration vanY; glycopeptide resistance gene cluster GAGGCTGCAGGGGCCTACCGGCAAATGGCCGCGGAAGCGGGCGCCGCCGGAGTTCCCATGTCCGCGGTGAGCGGCTTTCGGACCGGAGCAGAGCAGGACCAGCTGTACGTCTCCTACACGGAGAACTTTGGGCCGGAGGCAGCCGACGCCATTTCGGCCCGTCCCGGGTACAGCGAGCATCAGACGGGGCTGGCCATCGACATCGCCAACCCGGACGGAACCTGCGCCCTGGAATCCTGCTTCGCCGAAACCTTGGCGGGTTCGTGGGCGGCCGCCAATGCCCAGCACTACGGCTTCATCATCCGTTATCCGGCAGGAGCCGAGCACATCACCGGGTACGCCCATGAACCGTGGCATCTGCGGTACGTGGGGACGGAACATGCCCGGACAATGCACGACGCCGGCACCACCTTGGAAGAATATCTGGGACTTCCTGCCGCGCCGGGTTACTGA EAAGAYRQMAAEAGAAGVPMSAVSGFRTGAEQDQLYVSYTENFGPEAADAISARPGYSEHQTGLAIDIANPDGTCALESCFAETLAGSWAAANAQHYGFIIRYPAGAEHITGYAHEPWHLRYVGTEHARTMHDAGTTLEEYLGLPAAPGY MVFQGNLLLVNNEYPVLEESIKTDVVNLFKHDELTKGYELLNREIYLSEKVAREFSEMVDAAEKEGVRHFSINSGFRNFDEQNALYQEMGSDYALPAGYSEHNLGLALDIGSTQMEMSEAPEGKWLEDNAWEYGFILRYPMDKTAITGIQYEPWHFRYVGLPHSAIIEEKNFALEEYLDFLKEQKSISGTIHGENYEISYYPITEKTDIEMPANLHYEISGNNMDGVIVTVYR 64.38 gnl|BL_ORD_ID|1673|hsp_num:0 1713
+k141_1197_2 # 683 # 1345 # 1 # ID=49_2;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.588 k141_1197_2 683 1345 + Strict 200 326.635 vanX gene in vanO cluster 75.25 3002954 protein homolog model n/a n/a glycopeptide antibiotic antibiotic target alteration vanX; glycopeptide resistance gene cluster ATGAAGGGTGACTTCGTTTTCGTTGATGAGTGTGTTCCAGGAGTCCGCTGGGATGCCAAATACGCCACATCGGACAACTTCACCGGCAAACCGGTGGAGGGATATCTGGCCAACCGGATTGTCGGGACCAGGGCTTTGTGCTCAGCGCTGGAAAGCGTGCGGCAACGGGCTGCATCCCGCGGTTTCGGGTTGCTGCTGTGGGACGGCTACCGCCCGCAGCGCGCCGTGGATTCGTTCCTGCACTGGGCGAAACAACCAGAGGACGGCGCAACTAAACGCCGCCACTATCCAAATATTTCCCGAGCGGAAATGTTCGAAAGAGGATACGTAGCCTCCAAGTCCGGCCACAGCCGGGGCAGCACCGTCGATTTGACCCTGTATGACCTGGTTACCGGTGACCTCGTTCCCATGGGCGGCGGCCACGACTTGATGGATGAAATTTCGCATCACGGAGCGCCCGGCATCACCCGGGCCGAGACCGGCAACCGCCACACGCTGCGTTCGCTCATGGAGGCCTGCGGTTTCAGTTCCTACGATTCTGAGTGGTGGCATTACACCCTGAAGAACGAACCCTATCCGGACACTTATTTCGATTTTCCCGTTACGGATCCGCTTCCATCAGACGCCGCAACGGCCAGGGACCTTGTCTTCCAGAATGCATAG MKGDFVFVDECVPGVRWDAKYATSDNFTGKPVEGYLANRIVGTRALCSALESVRQRAASRGFGLLLWDGYRPQRAVDSFLHWAKQPEDGATKRRHYPNISRAEMFERGYVASKSGHSRGSTVDLTLYDLVTGDLVPMGGGHDLMDEISHHGAPGITRAETGNRHTLRSLMEACGFSSYDSEWWHYTLKNEPYPDTYFDFPVTDPLPSDAATARDLVFQNA MNDDFVYVDDWVPGVRWDAKYATWDNFTGKPVDGYLANRIVGTRALCAALEQAREKAASLGFGLLLWDGYRPRRAVDSFLRWSEQPEDGQTKQRHYPNIDRPEMLEKGYVATQSGHSRGGAVDLTLYHLATGELAPMGGDHDLMDPISHHRARGIKPIESKNRELLRSIMEDCGFDRYDCEWWHYTLKREPYPDVYFDFPIT 108.91 gnl|BL_ORD_ID|1674|hsp_num:0 1699
+k141_10683_1 # 1 # 453 # 1 # ID=423_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.658 k141_10683_1 1 453 + Strict 50 90.8929 vanY gene in vanM cluster 38.62 3002961 protein homolog model n/a n/a glycopeptide antibiotic antibiotic target alteration vanY; glycopeptide resistance gene cluster GAGGCTGCAGGGGCCTACCGGCAAATGGCCGCGGAAGCGGGCGCCGCCGGAGTTCCCATGTCCGCGGTGAGCGGCTTTCGGACCGGAGCAGAGCAGGACCAGCTGTACGTCTCCTACACGGAGAACTTTGGGCCGGAGGCAGCCGACGCCATTTCGGCCCGTCCCGGGTACAGCGAGCATCAGACGGGGCTGGCCATCGACATCGCCAACCCGGACGGAACCTGCGCCCTGGAATCCTGCTTCGCCGAAACCTTGGCGGGTTCGTGGGCGGCCGCCAATGCCCAGCACTACGGCTTCATCATCCGTTATCCGGCAGGAGCCGAGCACATCACCGGGTACGCCCATGAACCGTGGCATCTGCGGTACGTGGGGACGGAACATGCCCGGACAATGCACGACGCCGGCACCACCTTGGAAGAATATCTGGGACTTCCTGCCGCGCCGGGTTACTGA EAAGAYRQMAAEAGAAGVPMSAVSGFRTGAEQDQLYVSYTENFGPEAADAISARPGYSEHQTGLAIDIANPDGTCALESCFAETLAGSWAAANAQHYGFIIRYPAGAEHITGYAHEPWHLRYVGTEHARTMHDAGTTLEEYLGLPAAPGY MVFQGNLLLVNNEYPVLEESIKTDVVNLFKHDELTKGYELLNREIYLSEKVAREFSEMVDAAEKEGVRHFSINSGFRNFDEQNALYQEMGSDYALPAGYSEHNLGLALDIGSTQMEMSEAPEGKWLEDNAWEYGFILRYPMDKTAITGIQYEPWHFRYVGLPHSAIIEEKNFALEEYLDFLKEQKSISGTIHGENYEISYYPITEKTDIEMPANLHYEISGNNMDGVIVTVYR 64.38 gnl|BL_ORD_ID|1675|hsp_num:0 1713
diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample1/sample1.allele_mapping_data.txt b/q2_amr/types/tests/data/annotate_reads_output/sample1/allele_mapping_data.txt
similarity index 100%
rename from q2_amr/types/tests/data/annotate_reads_output/sample1/sample1.allele_mapping_data.txt
rename to q2_amr/types/tests/data/annotate_reads_output/sample1/allele_mapping_data.txt
diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample1/sample1.gene_mapping_data.txt b/q2_amr/types/tests/data/annotate_reads_output/sample1/gene_mapping_data.txt
similarity index 100%
rename from q2_amr/types/tests/data/annotate_reads_output/sample1/sample1.gene_mapping_data.txt
rename to q2_amr/types/tests/data/annotate_reads_output/sample1/gene_mapping_data.txt
diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample1/sample1.overall_mapping_stats.txt b/q2_amr/types/tests/data/annotate_reads_output/sample1/overall_mapping_stats.txt
similarity index 100%
rename from q2_amr/types/tests/data/annotate_reads_output/sample1/sample1.overall_mapping_stats.txt
rename to q2_amr/types/tests/data/annotate_reads_output/sample1/overall_mapping_stats.txt
diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample2/sample2.allele_mapping_data.txt b/q2_amr/types/tests/data/annotate_reads_output/sample2/allele_mapping_data.txt
similarity index 100%
rename from q2_amr/types/tests/data/annotate_reads_output/sample2/sample2.allele_mapping_data.txt
rename to q2_amr/types/tests/data/annotate_reads_output/sample2/allele_mapping_data.txt
diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample2/sample2.gene_mapping_data.txt b/q2_amr/types/tests/data/annotate_reads_output/sample2/gene_mapping_data.txt
similarity index 100%
rename from q2_amr/types/tests/data/annotate_reads_output/sample2/sample2.gene_mapping_data.txt
rename to q2_amr/types/tests/data/annotate_reads_output/sample2/gene_mapping_data.txt
diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample2/sample2.overall_mapping_stats.txt b/q2_amr/types/tests/data/annotate_reads_output/sample2/overall_mapping_stats.txt
similarity index 100%
rename from q2_amr/types/tests/data/annotate_reads_output/sample2/sample2.overall_mapping_stats.txt
rename to q2_amr/types/tests/data/annotate_reads_output/sample2/overall_mapping_stats.txt
diff --git a/q2_amr/types/tests/test_types_formats_transformers.py b/q2_amr/types/tests/test_types_formats_transformers.py
index 363b67d..db5414b 100644
--- a/q2_amr/types/tests/test_types_formats_transformers.py
+++ b/q2_amr/types/tests/test_types_formats_transformers.py
@@ -224,6 +224,16 @@ def test_CARDAnnotationDirectoryFormat_to_ProteinsDirectoryFormat_transformer(se
os.path.exists(os.path.join(str(obs), "sample2", "bin1_proteins.fasta"))
)
+ def test_CARDAnnotationDirectoryFormat_to_qiime2_Metadata_transformer(self):
+ transformer = self.get_transformer(
+ CARDAnnotationDirectoryFormat, qiime2.Metadata
+ )
+ annotation = CARDAnnotationDirectoryFormat(
+ self.get_data_path("annotate_mags_output"), "r"
+ )
+ metadata_obt = transformer(annotation)
+ self.assertIsInstance(metadata_obt, qiime2.Metadata)
+
class TestCARDReadsAnnotationTypesAndFormats(AMRTypesTestPluginBase):
def test_CARDGeneAnnotationDirectoryFormat_to_qiime2_Metadata_transformer(self):