From 6844065f3e90358471c0fc67b041fdfd321773a9 Mon Sep 17 00:00:00 2001 From: VinzentRisch <100149044+VinzentRisch@users.noreply.github.com> Date: Mon, 16 Oct 2023 16:41:51 +0200 Subject: [PATCH] FIX: Fixed q2-amr failing, if no ARGs are detected (#9) -Fixed the problem of q2-amr failing if no ARGs are detected with annotate_reads_card or annotate_mags_card methods. Now the user is informed that no ARGs where detected and no further output is produced. -Removed mapq, mapped and coverage parameters from annotate_reads_card method. -Added CARDGeneAnnotationDirectoryFormat/CARDGeneAnnotationDirectoryFormat-> qiime2.Metadata transformer . -Fixed bug in heatmap method. Parameters clus and cat can not be used at the same time. --- q2_amr/assets/rgi/heatmap/index.html | 22 ++- q2_amr/card/heatmap.py | 102 ++++++++++++ q2_amr/card/mags.py | 39 +++-- q2_amr/card/reads.py | 70 ++------ q2_amr/card/utils.py | 32 ++++ q2_amr/plugin_setup.py | 99 ++++++----- q2_amr/tests/card/test_database.py | 22 +-- q2_amr/tests/card/test_heatmap.py | 113 +++++++++++++ q2_amr/tests/card/test_mags.py | 94 +++++++---- q2_amr/tests/card/test_reads.py | 157 +++++------------- q2_amr/tests/card/test_utils.py | 72 +++++++- q2_amr/tests/data/output.mags.txt | 5 + q2_amr/types/_transformer.py | 39 ++++- q2_amr/types/_type.py | 8 +- ...mr_annotation.json => amr_annotation.json} | 0 ..._amr_annotation.txt => amr_annotation.txt} | 0 ...mr_annotation.json => amr_annotation.json} | 0 ..._amr_annotation.txt => amr_annotation.txt} | 4 +- ...pping_data.txt => allele_mapping_data.txt} | 0 ...mapping_data.txt => gene_mapping_data.txt} | 0 ...ng_stats.txt => overall_mapping_stats.txt} | 0 ...pping_data.txt => allele_mapping_data.txt} | 0 ...mapping_data.txt => gene_mapping_data.txt} | 0 ...ng_stats.txt => overall_mapping_stats.txt} | 0 .../tests/test_types_formats_transformers.py | 10 ++ 25 files changed, 608 insertions(+), 280 deletions(-) create mode 100644 q2_amr/card/heatmap.py create mode 100644 q2_amr/tests/card/test_heatmap.py create mode 100644 q2_amr/tests/data/output.mags.txt rename q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/{sample1_bin1_amr_annotation.json => amr_annotation.json} (100%) rename q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/{sample1_bin1_amr_annotation.txt => amr_annotation.txt} (100%) rename q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/{sample2_bin1_amr_annotation.json => amr_annotation.json} (100%) rename q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/{sample2_bin1_amr_annotation.txt => amr_annotation.txt} (96%) rename q2_amr/types/tests/data/annotate_reads_output/sample1/{sample1.allele_mapping_data.txt => allele_mapping_data.txt} (100%) rename q2_amr/types/tests/data/annotate_reads_output/sample1/{sample1.gene_mapping_data.txt => gene_mapping_data.txt} (100%) rename q2_amr/types/tests/data/annotate_reads_output/sample1/{sample1.overall_mapping_stats.txt => overall_mapping_stats.txt} (100%) rename q2_amr/types/tests/data/annotate_reads_output/sample2/{sample2.allele_mapping_data.txt => allele_mapping_data.txt} (100%) rename q2_amr/types/tests/data/annotate_reads_output/sample2/{sample2.gene_mapping_data.txt => gene_mapping_data.txt} (100%) rename q2_amr/types/tests/data/annotate_reads_output/sample2/{sample2.overall_mapping_stats.txt => overall_mapping_stats.txt} (100%) diff --git a/q2_amr/assets/rgi/heatmap/index.html b/q2_amr/assets/rgi/heatmap/index.html index 722c6f6..3e92f86 100644 --- a/q2_amr/assets/rgi/heatmap/index.html +++ b/q2_amr/assets/rgi/heatmap/index.html @@ -1,10 +1,10 @@ -{% extends 'tabbed.html' %} +{% extends 'base.html' %} -{% block tabcontent %} +{% block content %}
-

Downloads

+

Downloads

@@ -16,15 +16,27 @@

Downloads

-
+ +
-

My Picture

+

CARD annotation: heatmap

My Picture
+
+ +
+
+

Displayed is a heatmap of annotate-mags-card output.
+ Yellow represents a perfect hit, teal represents a
+ strict hit, purple represents no hit.

+
+
+ + {% endblock %} {% block footer %} diff --git a/q2_amr/card/heatmap.py b/q2_amr/card/heatmap.py new file mode 100644 index 0000000..02e583d --- /dev/null +++ b/q2_amr/card/heatmap.py @@ -0,0 +1,102 @@ +import glob +import os +import shutil +import subprocess +import tempfile +from distutils.dir_util import copy_tree + +import pkg_resources +import q2templates + +from q2_amr.card.utils import run_command +from q2_amr.types import CARDAnnotationDirectoryFormat + + +def heatmap( + output_dir: str, + amr_annotation: CARDAnnotationDirectoryFormat, + clus: str = None, + cat: str = None, + display: str = "plain", + frequency: bool = False, +): + TEMPLATES = pkg_resources.resource_filename("q2_amr", "assets") + annotation_dir = str(amr_annotation) + with tempfile.TemporaryDirectory() as tmp: + results_dir = os.path.join(tmp, "results") + json_files_dir = os.path.join(tmp, "json_files") + os.makedirs(results_dir) + os.makedirs(json_files_dir) + for json_file in glob.glob(os.path.join(annotation_dir, "*", "*", "*.json")): + sample, bin_name, _ = json_file.split(os.path.sep)[-3:] + destination_path = os.path.join(json_files_dir, f"{sample}_{bin_name}") + shutil.copy(json_file, destination_path) + + run_rgi_heatmap(tmp, json_files_dir, clus, cat, display, frequency) + change_names(results_dir) + copy_tree(os.path.join(TEMPLATES, "rgi", "heatmap"), output_dir) + copy_tree(results_dir, os.path.join(output_dir, "rgi_data")) + context = {"tabs": [{"title": "Heatmap", "url": "index.html"}]} + index = os.path.join(TEMPLATES, "rgi", "heatmap", "index.html") + templates = [index] + q2templates.render(templates, output_dir, context=context) + + +class InvalidParameterCombinationError(Exception): + def __init__(self, message="Invalid parameter combination"): + self.message = message + super().__init__(self.message) + + +def run_rgi_heatmap(tmp, json_files_dir, clus, cat, display, frequency): + cmd = [ + "rgi", + "heatmap", + "--input", + json_files_dir, + "--output", + f"{tmp}/results/heatmap", + "--display", + display, + ] + if (clus == "both" or clus == "genes") and cat: + raise InvalidParameterCombinationError( + "If the parameter clus is set to genes " + "or both it is not possible to use the " + "cat parameter" + ) + if clus: + cmd.extend(["--clus", clus]) + if cat: + cmd.extend(["--cat", cat]) + if frequency: + cmd.append("--frequency") + try: + run_command(cmd, tmp, verbose=True) + except subprocess.CalledProcessError as e: + raise Exception( + "An error was encountered while running rgi, " + f"(return code {e.returncode}), please inspect " + "stdout and stderr to learn more." + ) + + +def change_names(results_dir): + """ + This function changes the names of the output files of the "rgi heatmap" function. + The output files are called heatmap-*.extension with * being the number of samples + included in the heatmap. The files are changed to heatmap.extension so that they + can be accessed in the index.html file more easily. + + Parameters: + - results_dir (str): The directory where the files are stored. + """ + extensions = [".eps", ".csv", ".png"] + files = os.listdir(results_dir) + for filename in files: + if os.path.splitext(filename)[1] in extensions: + file_ext = os.path.splitext(filename)[1] + new_filename = "heatmap" + file_ext + old_path = os.path.join(results_dir, filename) + new_path = os.path.join(results_dir, new_filename) + os.rename(old_path, new_path) diff --git a/q2_amr/card/mags.py b/q2_amr/card/mags.py index 45057e0..3198775 100644 --- a/q2_amr/card/mags.py +++ b/q2_amr/card/mags.py @@ -6,7 +6,12 @@ import pandas as pd from q2_types_genomics.per_sample_data import MultiMAGSequencesDirFmt -from q2_amr.card.utils import load_preprocess_card_db, run_command +from q2_amr.card.utils import ( + create_count_table, + load_preprocess_card_db, + read_in_txt, + run_command, +) from q2_amr.types import CARDAnnotationDirectoryFormat, CARDDatabaseFormat @@ -14,15 +19,15 @@ def annotate_mags_card( mag: MultiMAGSequencesDirFmt, card_db: CARDDatabaseFormat, alignment_tool: str = "BLAST", - input_type: str = "contig", split_prodigal_jobs: bool = False, include_loose: bool = False, include_nudge: bool = False, low_quality: bool = False, - num_threads: int = 1, -) -> CARDAnnotationDirectoryFormat: + threads: int = 1, +) -> (CARDAnnotationDirectoryFormat, pd.DataFrame): manifest = mag.manifest.view(pd.DataFrame) amr_annotations = CARDAnnotationDirectoryFormat() + frequency_list = [] with tempfile.TemporaryDirectory() as tmp: load_preprocess_card_db(tmp, card_db, "load") for samp_bin in list(manifest.index): @@ -33,24 +38,34 @@ def annotate_mags_card( tmp, input_sequence, alignment_tool, - input_type, split_prodigal_jobs, include_loose, include_nudge, low_quality, - num_threads, + threads, + ) + txt_path = os.path.join(bin_dir, "amr_annotation.txt") + json_path = os.path.join(bin_dir, "amr_annotation.json") + + shutil.move(f"{tmp}/output.txt", txt_path) + shutil.move(f"{tmp}/output.json", json_path) + samp_bin_name = os.path.join(samp_bin[0], samp_bin[1]) + frequency_df = read_in_txt( + path=txt_path, col_name="ARO", samp_bin_name=samp_bin_name ) - shutil.move(f"{tmp}/output.txt", f"{bin_dir}/amr_annotation.txt") - shutil.move(f"{tmp}/output.json", f"{bin_dir}/amr_annotation.json") - print("a") - return amr_annotations + if frequency_df is not None: + frequency_list.append(frequency_df) + feature_table = create_count_table(df_list=frequency_list) + return ( + amr_annotations, + feature_table, + ) def run_rgi_main( tmp, input_sequence: str, alignment_tool: str = "BLAST", - input_type: str = "contig", split_prodigal_jobs: bool = False, include_loose: bool = False, include_nudge: bool = False, @@ -69,7 +84,7 @@ def run_rgi_main( "--alignment_tool", alignment_tool, "--input_type", - input_type, + "contig", "--local", ] if include_loose: diff --git a/q2_amr/card/reads.py b/q2_amr/card/reads.py index 1fc0dab..a1a5a65 100644 --- a/q2_amr/card/reads.py +++ b/q2_amr/card/reads.py @@ -3,7 +3,6 @@ import subprocess import tempfile from distutils.dir_util import copy_tree -from functools import reduce from typing import Union import altair as alt @@ -15,7 +14,12 @@ SingleLanePerSampleSingleEndFastqDirFmt, ) -from q2_amr.card.utils import load_preprocess_card_db, run_command +from q2_amr.card.utils import ( + create_count_table, + load_preprocess_card_db, + read_in_txt, + run_command, +) from q2_amr.types import ( CARDAlleleAnnotationDirectoryFormat, CARDDatabaseFormat, @@ -30,10 +34,6 @@ def annotate_reads_card( card_db: CARDDatabaseFormat, aligner: str = "kma", threads: int = 1, - include_baits: bool = False, - mapq: float = None, - mapped: float = None, - coverage: float = None, ) -> ( CARDAlleleAnnotationDirectoryFormat, CARDGeneAnnotationDirectoryFormat, @@ -65,15 +65,19 @@ def annotate_reads_card( rev=rev, aligner=aligner, threads=threads, - include_baits=include_baits, - mapq=mapq, - mapped=mapped, - coverage=coverage, ) - allele_frequency = read_in_txt(samp_input_dir, "allele") - allele_frequency_list.append(allele_frequency) - gene_frequency = read_in_txt(samp_input_dir, "gene") - gene_frequency_list.append(gene_frequency) + path_allele = os.path.join(samp_input_dir, "output.allele_mapping_data.txt") + allele_frequency = read_in_txt( + path=path_allele, col_name="ARO Accession", samp_bin_name=samp + ) + if allele_frequency is not None: + allele_frequency_list.append(allele_frequency) + path_gene = os.path.join(samp_input_dir, "output.gene_mapping_data.txt") + gene_frequency = read_in_txt( + path=path_gene, col_name="ARO Accession", samp_bin_name=samp + ) + if gene_frequency is not None: + gene_frequency_list.append(gene_frequency) move_files(samp_input_dir, samp_allele_dir, "allele") move_files(samp_input_dir, samp_gene_dir, "gene") @@ -98,32 +102,6 @@ def move_files(source_dir: str, des_dir: str, map_type: str): ) -def create_count_table(df_list: list) -> pd.DataFrame: - df_merged = reduce( - lambda left, right: pd.merge(left, right, on="ARO Accession", how="outer"), - df_list, - ) - df_transposed = df_merged.transpose() - df_transposed = df_transposed.fillna(0) - df_transposed.columns = df_transposed.iloc[0] - df_transposed = df_transposed.drop("ARO Accession") - df_transposed.columns.name = None - df_transposed.index.name = "sample_id" - return df_transposed - - -def read_in_txt(samp_dir: str, map_type: str): - df = pd.read_csv( - os.path.join(samp_dir, f"output.{map_type}_mapping_data.txt"), sep="\t" - ) - df = df[["ARO Accession"]] - df = df.astype(str) - samp = os.path.basename(samp_dir) - df[samp] = df.groupby("ARO Accession")["ARO Accession"].transform("count") - df = df.drop_duplicates(subset=["ARO Accession"]) - return df - - def run_rgi_bwt( cwd: str, samp: str, @@ -131,10 +109,6 @@ def run_rgi_bwt( rev: str, aligner: str, threads: int, - include_baits: bool, - mapq: float, - mapped: float, - coverage: float, ): cmd = [ "rgi", @@ -152,14 +126,6 @@ def run_rgi_bwt( ] if rev: cmd.extend(["--read_two", rev]) - if include_baits: - cmd.append("--include_baits") - if mapq: - cmd.extend(["--mapq", str(mapq)]) - if mapped: - cmd.extend(["--mapped", str(mapped)]) - if coverage: - cmd.extend(["--coverage", str(coverage)]) try: run_command(cmd, cwd, verbose=True) except subprocess.CalledProcessError as e: diff --git a/q2_amr/card/utils.py b/q2_amr/card/utils.py index 602e0a8..99415d5 100644 --- a/q2_amr/card/utils.py +++ b/q2_amr/card/utils.py @@ -1,5 +1,8 @@ import json import subprocess +from functools import reduce + +import pandas as pd EXTERNAL_CMD_WARNING = ( "Running external command line application(s). " @@ -44,3 +47,32 @@ def load_preprocess_card_db(tmp, card_db, operation): f"(return code {e.returncode}), please inspect " "stdout and stderr to learn more." ) + + +def read_in_txt(path: str, col_name: str, samp_bin_name: str): + df = pd.read_csv(path, sep="\t") + if df.empty: + return None + df = df[[col_name]] + df = df.astype(str) + df[samp_bin_name] = df.groupby(col_name)[col_name].transform("count") + df = df.drop_duplicates(subset=[col_name]) + return df + + +def create_count_table(df_list: list) -> pd.DataFrame: + if not df_list: + raise ValueError( + "RGI did not identify any AMR genes. No output can be created." + ) + df = reduce( + lambda left, right: pd.merge(left, right, on=left.columns[0], how="outer"), + df_list, + ) + df = df.transpose() + df = df.fillna(0) + df.columns = df.iloc[0] + df = df.drop(df.index[0]) + df.columns.name = None + df.index.name = "sample_id" + return df diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index 37d4db5..84efb88 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -7,18 +7,19 @@ # ---------------------------------------------------------------------------- import importlib -from q2_types.feature_table import FeatureTable, Frequency +from q2_types.feature_table import FeatureTable, PresenceAbsence from q2_types.per_sample_sequences import ( PairedEndSequencesWithQuality, SequencesWithQuality, ) from q2_types.sample_data import SampleData from q2_types_genomics.per_sample_data import MAGs -from qiime2.core.type import Bool, Choices, Float, Int, Range, Str +from qiime2.core.type import Bool, Choices, Int, Range, Str from qiime2.plugin import Citations, Plugin from q2_amr import __version__ from q2_amr.card.database import fetch_card_db +from q2_amr.card.heatmap import heatmap from q2_amr.card.mags import annotate_mags_card from q2_amr.card.reads import annotate_reads_card, visualize_annotation_stats from q2_amr.types import ( @@ -45,9 +46,9 @@ version=__version__, website="https://github.com/bokulich-lab/q2-amr", package="q2_amr", - description="This is a QIIME 2 plugin that annotates microbiome sequence data with " + description="This is a QIIME 2 plugin that annotates sequence data with " "antimicrobial resistance gene information from CARD.", - short_description="This is a QIIME 2 plugin that annotates microbiome sequence " + short_description="This is a QIIME 2 plugin that annotates sequence " "data with antimicrobial resistance gene information from CARD.", ) plugin.methods.register_function( @@ -62,7 +63,7 @@ "phenotypes." }, name="Download CARD data.", - description=("Downloads the CARD database from the CARD website."), + description=("Download the latest version of the CARD database."), citations=[citations["alcock_card_2023"]], ) @@ -71,32 +72,35 @@ inputs={"mag": SampleData[MAGs], "card_db": CARDDatabase}, parameters={ "alignment_tool": Str % Choices(["BLAST", "DIAMOND"]), - "input_type": Str % Choices(["contig", "protein"]), "split_prodigal_jobs": Bool, "include_loose": Bool, "include_nudge": Bool, "low_quality": Bool, - "num_threads": Int % Range(1, 9), + "threads": Int % Range(0, None, inclusive_start=False), }, - outputs=[("amr_annotations", SampleData[CARDAnnotation])], + outputs=[ + ("amr_annotations", SampleData[CARDAnnotation]), + ("feature_table", FeatureTable[PresenceAbsence]), + ], input_descriptions={ - "mag": "MAG to be annotated with CARD.", + "mag": "MAGs to be annotated with CARD.", "card_db": "CARD Database.", }, parameter_descriptions={ "alignment_tool": "Specify alignment tool BLAST or DIAMOND.", - "input_type": "Specify data input type contig or protein.", - "split_prodigal_jobs": "Run multiple prodigal jobs simultaneously for contigs " - "in a fasta file.", + "split_prodigal_jobs": "Run multiple prodigal jobs simultaneously for contigs" + " in one sample", "include_loose": "Include loose hits in addition to strict and perfect hits .", "include_nudge": "Include hits nudged from loose to strict hits.", "low_quality": "Use for short contigs to predict partial genes.", - "num_threads": "Number of threads (CPUs) to use in the BLAST search.", + "threads": "Number of threads (CPUs) to use in the BLAST search.", }, - output_descriptions={"amr_annotations": "AMR Annotation as .txt and .json file."}, - name="Annotate MAGs with antimicrobial resistance gene information from CARD.", - description="Annotate MAGs with antimicrobial resistance gene information from " - "CARD.", + output_descriptions={ + "amr_annotations": "AMR annotation as .txt and .json file.", + "feature_table": "Presence and absence table of ARGs in all samples.", + }, + name="Annotate MAGs with antimicrobial resistance genes from CARD.", + description="Annotate MAGs with antimicrobial resistance genes from CARD.", citations=[citations["alcock_card_2023"]], ) @@ -109,40 +113,54 @@ }, parameters={ "aligner": Str % Choices(["kma", "bowtie2", "bwa"]), - "include_baits": Bool, - "mapq": Float % Range(0, None, inclusive_start=True), - "mapped": Float % Range(0, None, inclusive_start=True), - "coverage": Float % Range(0, None, inclusive_start=True), "threads": Int % Range(0, None, inclusive_start=False), }, outputs=[ - ("amr_allele_annotation", CARDAlleleAnnotation), - ("amr_gene_annotation", CARDGeneAnnotation), - ("allele_feature_table", FeatureTable[Frequency]), - ("gene_feature_table", FeatureTable[Frequency]), + ("amr_allele_annotation", SampleData[CARDAlleleAnnotation]), + ("amr_gene_annotation", SampleData[CARDGeneAnnotation]), + ("allele_feature_table", FeatureTable[PresenceAbsence]), + ("gene_feature_table", FeatureTable[PresenceAbsence]), ], input_descriptions={ - "reads": "Paired or single end metagenomic reads.", + "reads": "Paired or single end reads.", "card_db": "CARD Database", }, parameter_descriptions={ "aligner": "Specify alignment tool.", - "include_baits": "Include baits.", - "mapq": "Filter reads based on MAPQ score.", - "mapped": "Filter reads based on mapped reads.", - "coverage": "Filter reads based on coverage of reference sequence.", "threads": "Number of threads (CPUs) to use.", }, output_descriptions={ "amr_allele_annotation": "AMR annotation mapped on alleles.", "amr_gene_annotation": "AMR annotation mapped on genes.", - "allele_feature_table": "Samples combined into one frequency count table.", - "gene_feature_table": "Samples combined into one frequency count table.", + "allele_feature_table": "Presence and absence table of ARGs in all samples for" + " allele mapping.", + "gene_feature_table": "Presence and absence table of ARGs in all samples for " + "gene mapping.", + }, + name="Annotate reads with antimicrobial resistance genes from CARD.", + description="Annotate reads with antimicrobial resistance genes from CARD.", + citations=[citations["alcock_card_2023"]], +) + +plugin.visualizers.register_function( + function=heatmap, + inputs={"amr_annotation": SampleData[CARDAnnotation]}, + parameters={ + "cat": Str % Choices(["drug_class", "resistance_mechanism", "gene_family"]), + "clus": Str % Choices(["samples", "genes", "both"]), + "display": Str % Choices(["plain", "fill", "text"]), + "frequency": Bool, + }, + input_descriptions={"amr_annotation": "AMR Annotations from MAGs"}, + parameter_descriptions={ + "cat": "The option to organize resistance genes based on a category.", + "clus": "Option to use SciPy's hierarchical clustering algorithm to cluster " + "rows (AMR genes) or columns (samples).", + "display": "Specify display options for categories", + "frequency": "Represent samples based on resistance profile.", }, - name="Annotate metagenomic reads with antimicrobial resistance gene information " - "from CARD.", - description="Annotate metagenomic reads with antimicrobial resistance gene " - "information from CARD.", + name="Create heatmap from annotate-mags-card output.", + description=("Create heatmap from annotate-mags-card output."), citations=[citations["alcock_card_2023"]], ) @@ -155,7 +173,7 @@ }, parameter_descriptions={}, name="Visualize mapping statistics.", - description="Visualize mapping statistics.", + description="Visualize mapping statistics of an annotate-reads-card output.", citations=[citations["alcock_card_2023"]], ) @@ -168,13 +186,14 @@ CARDDatabase, artifact_format=CARDDatabaseDirectoryFormat ) plugin.register_semantic_type_to_format( - CARDAnnotation, artifact_format=CARDAnnotationDirectoryFormat + SampleData[CARDAnnotation], artifact_format=CARDAnnotationDirectoryFormat ) plugin.register_semantic_type_to_format( - CARDAlleleAnnotation, artifact_format=CARDAlleleAnnotationDirectoryFormat + SampleData[CARDAlleleAnnotation], + artifact_format=CARDAlleleAnnotationDirectoryFormat, ) plugin.register_semantic_type_to_format( - CARDGeneAnnotation, artifact_format=CARDGeneAnnotationDirectoryFormat + SampleData[CARDGeneAnnotation], artifact_format=CARDGeneAnnotationDirectoryFormat ) plugin.register_formats( diff --git a/q2_amr/tests/card/test_database.py b/q2_amr/tests/card/test_database.py index 8cae7af..e4ec9b7 100644 --- a/q2_amr/tests/card/test_database.py +++ b/q2_amr/tests/card/test_database.py @@ -12,26 +12,28 @@ class TestAnnotateMagsCard(TestPluginBase): package = "q2_amr.tests" - @patch("requests.get") - def test_fetch_card_db(self, mock_requests): + def test_fetch_card_db(self): f = open(self.get_data_path("card.tar.bz2"), "rb") mock_response = MagicMock(raw=f) - mock_requests.return_value = mock_response - obs = fetch_card_db() + with patch("requests.get") as mock_requests: + mock_requests.return_value = mock_response + obs = fetch_card_db() self.assertTrue(os.path.exists(os.path.join(str(obs), "card.json"))) self.assertIsInstance(obs, CARDDatabaseDirectoryFormat) mock_requests.assert_called_once_with( "https://card.mcmaster.ca/latest/data", stream=True ) - @patch("requests.get", side_effect=requests.ConnectionError) - def test_fetch_card_data_connection_error(self, mock_requests): - with self.assertRaisesRegex( + def test_fetch_card_data_connection_error(self): + with patch( + "requests.get", side_effect=requests.ConnectionError + ), self.assertRaisesRegex( requests.ConnectionError, "Network connectivity problems." ): fetch_card_db() - @patch("tarfile.open", side_effect=tarfile.ReadError) - def test_fetch_card_data_tarfile_read_error(self, mock_requests): - with self.assertRaisesRegex(tarfile.ReadError, "Tarfile is invalid."): + def test_fetch_card_data_tarfile_read_error(self): + with patch( + "tarfile.open", side_effect=tarfile.ReadError + ), self.assertRaisesRegex(tarfile.ReadError, "Tarfile is invalid."): fetch_card_db() diff --git a/q2_amr/tests/card/test_heatmap.py b/q2_amr/tests/card/test_heatmap.py new file mode 100644 index 0000000..44aee61 --- /dev/null +++ b/q2_amr/tests/card/test_heatmap.py @@ -0,0 +1,113 @@ +import os +import tempfile +from unittest.mock import patch + +from qiime2.plugin.testing import TestPluginBase + +from q2_amr.card.heatmap import ( + InvalidParameterCombinationError, + change_names, + heatmap, + run_rgi_heatmap, +) +from q2_amr.types import CARDAnnotationDirectoryFormat + + +class TestHeatmap(TestPluginBase): + package = "q2_amr.tests" + + def test_heatmap(self): + amr_annotation = CARDAnnotationDirectoryFormat() + + def mock_run_rgi_heatmap(tmp, json_files_dir, clus, cat, display, frequency): + file_types = [".png", ".eps", ".csv"] + for file_type in file_types: + with open( + os.path.join(tmp, "results", f"heatmap-3.{file_type}"), "w" + ) as file: + file.write(file_type) + + with patch( + "q2_amr.card.heatmap.run_rgi_heatmap", side_effect=mock_run_rgi_heatmap + ), tempfile.TemporaryDirectory() as tmp: + os.makedirs(os.path.join(tmp, "results")) + heatmap(tmp, amr_annotation) + self.assertTrue( + os.path.exists(os.path.join(tmp, "rgi_data", "heatmap.png")) + ) + self.assertTrue( + os.path.exists(os.path.join(tmp, "rgi_data", "heatmap.eps")) + ) + self.assertTrue( + os.path.exists(os.path.join(tmp, "rgi_data", "heatmap.csv")) + ) + self.assertTrue(os.path.exists(os.path.join(tmp, "index.html"))) + self.assertTrue(os.path.exists(os.path.join(tmp, "q2templateassets"))) + + def test_run_rgi_heatmap(self): + with patch("q2_amr.card.heatmap.run_command") as mock_run_command: + run_rgi_heatmap( + "path_tmp", "json_files_dir_path", "samples", "drug_class", "fill", True + ) + mock_run_command.assert_called_once_with( + [ + "rgi", + "heatmap", + "--input", + "json_files_dir_path", + "--output", + "path_tmp/results/heatmap", + "--display", + "fill", + "--clus", + "samples", + "--cat", + "drug_class", + "--frequency", + ], + "path_tmp", + verbose=True, + ) + + def test_change_names(self): + with patch( + "q2_amr.card.heatmap.os.listdir", + return_value=["heatmap-7.eps", "heatmap-7.png", "heatmap-7.csv"], + ), patch("q2_amr.card.heatmap.os.rename") as mock_rename: + results_dir = "/path/to/results" + change_names(results_dir) + expected_calls = [ + ("/path/to/results/heatmap-7.eps", "/path/to/results/heatmap.eps"), + ("/path/to/results/heatmap-7.png", "/path/to/results/heatmap.png"), + ("/path/to/results/heatmap-7.csv", "/path/to/results/heatmap.csv"), + ] + actual_calls = [call[0] for call in mock_rename.call_args_list] + self.assertEqual(expected_calls, actual_calls) + + def test_change_names_empty(self): + with patch("q2_amr.card.heatmap.os.listdir", return_value=[]), patch( + "q2_amr.card.heatmap.os.rename" + ) as mock_rename: + results_dir = "/path/to/results" + change_names(results_dir) + expected_calls = [] + actual_calls = [call[0] for call in mock_rename.call_args_list] + self.assertEqual(expected_calls, actual_calls) + + def test_invalid_combination_raises_error(self): + tmp = "path" + json_files_dir = "path" + clus = "both" + cat = "drug_class" + display = "text" + frequency = False + + with self.assertRaises(InvalidParameterCombinationError) as cm: + run_rgi_heatmap(tmp, json_files_dir, clus, cat, display, frequency) + + self.assertEqual( + str(cm.exception), + "If the parameter clus is set to genes" + " or both it is not possible to use " + "the cat parameter", + ) diff --git a/q2_amr/tests/card/test_mags.py b/q2_amr/tests/card/test_mags.py index 912bf34..8573bda 100644 --- a/q2_amr/tests/card/test_mags.py +++ b/q2_amr/tests/card/test_mags.py @@ -1,8 +1,10 @@ import os import shutil import subprocess -from unittest.mock import patch +from copy import deepcopy +from unittest.mock import MagicMock, patch +import pandas as pd from q2_types_genomics.per_sample_data import MultiMAGSequencesDirFmt from qiime2.plugin.testing import TestPluginBase @@ -13,49 +15,77 @@ class TestAnnotateMagsCard(TestPluginBase): package = "q2_amr.tests" - def test_annotate_mags_card(self): + table = pd.DataFrame( + { + "sample_id": ["sample1", "sample2"], + 3000796: [1, 0], + 3000815: [1, 1], + 3000805: [1, 1], + 3000026: [1, 2], + 3000797: [0, 1], + } + ) + + def mock_run_rgi_main( + self, + tmp, + input_sequence, + alignment_tool, + split_prodigal_jobs, + include_loose, + include_nudge, + low_quality, + num_threads, + ): output_txt = self.get_data_path("rgi_output.txt") output_json = self.get_data_path("rgi_output.json") + shutil.copy(output_txt, f"{tmp}/output.txt") + shutil.copy(output_json, f"{tmp}/output.json") + + def return_count_table(self, df_list): + count_table = deepcopy(self.table) + count_table.set_index("sample_id", inplace=True) + count_table = count_table.astype(float) + count_table.columns = count_table.columns.astype(float) + return count_table + + def test_annotate_mags_card(self): + manifest = self.get_data_path("MANIFEST_mags") mag = MultiMAGSequencesDirFmt() card_db = CARDDatabaseFormat() shutil.copy(manifest, os.path.join(str(mag), "MANIFEST")) - def mock_run_rgi_main( - tmp, - input_sequence, - alignment_tool, - input_type, - split_prodigal_jobs, - include_loose, - include_nudge, - low_quality, - num_threads, - ): - shutil.copy(output_txt, f"{tmp}/output.txt") - shutil.copy(output_json, f"{tmp}/output.json") - + mock_create_count_table = MagicMock(side_effect=self.return_count_table) + mock_read_in_txt = MagicMock() with patch( - "q2_amr.card.mags.run_rgi_main", side_effect=mock_run_rgi_main - ), patch("q2_amr.card.mags.load_preprocess_card_db"): + "q2_amr.card.mags.run_rgi_main", side_effect=self.mock_run_rgi_main + ), patch("q2_amr.card.mags.load_preprocess_card_db"), patch( + "q2_amr.card.mags.read_in_txt", mock_read_in_txt + ), patch( + "q2_amr.card.mags.create_count_table", mock_create_count_table + ): result = annotate_mags_card(mag, card_db) - self.assertIsInstance(result, CARDAnnotationDirectoryFormat) + self.assertIsInstance(result[0], CARDAnnotationDirectoryFormat) + self.assertIsInstance(result[1], pd.DataFrame) self.assertTrue( os.path.exists( - os.path.join(str(result), "sample1", "bin1", "amr_annotation.txt") + os.path.join( + str(result[0]), "sample1", "bin1", "amr_annotation.txt" + ) ) ) self.assertTrue( os.path.exists( - os.path.join(str(result), "sample1", "bin1", "amr_annotation.json") + os.path.join( + str(result[0]), "sample1", "bin1", "amr_annotation.json" + ) ) ) def test_run_rgi_main(self): with patch("q2_amr.card.mags.run_command") as mock_run_command: - run_rgi_main( - "path_tmp", "path_input", "DIAMOND", "contig", True, True, True, True, 8 - ) + run_rgi_main("path_tmp", "path_input", "DIAMOND", True, True, True, True, 8) mock_run_command.assert_called_once_with( [ "rgi", @@ -80,15 +110,15 @@ def test_run_rgi_main(self): verbose=True, ) - @patch("q2_amr.card.mags.run_command") - def test_exception_raised(self, mock_run_command): - mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd") - tmp = "path/to/tmp" - input_sequence = "path/to/input_sequence.fasta" + def test_exception_raised(self): expected_message = ( "An error was encountered while running rgi, " "(return code 1), please inspect stdout and stderr to learn more." ) - with self.assertRaises(Exception) as cm: - run_rgi_main(tmp, input_sequence) - self.assertEqual(str(cm.exception), expected_message) + tmp = "path/to/tmp" + input_sequence = "path/to/input_sequence.fasta" + with patch("q2_amr.card.mags.run_command") as mock_run_command: + mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd") + with self.assertRaises(Exception) as cm: + run_rgi_main(tmp, input_sequence) + self.assertEqual(str(cm.exception), expected_message) diff --git a/q2_amr/tests/card/test_reads.py b/q2_amr/tests/card/test_reads.py index 25ff97a..1685454 100644 --- a/q2_amr/tests/card/test_reads.py +++ b/q2_amr/tests/card/test_reads.py @@ -2,7 +2,6 @@ import shutil import subprocess import tempfile -from copy import deepcopy from unittest.mock import ANY, MagicMock, call, patch import pandas as pd @@ -11,14 +10,13 @@ SingleLanePerSampleSingleEndFastqDirFmt, ) from qiime2.plugin.testing import TestPluginBase +from test_mags import TestAnnotateMagsCard from q2_amr.card.reads import ( annotate_reads_card, - create_count_table, extract_sample_stats, move_files, plot_sample_stats, - read_in_txt, run_rgi_bwt, visualize_annotation_stats, ) @@ -38,38 +36,31 @@ def test_annotate_reads_card_single(self): def test_annotate_reads_card_paired(self): self.annotate_reads_card_test_body("paired") + def copy_needed_files(self, cwd, samp, **kwargs): + output_allele = self.get_data_path("output.allele_mapping_data.txt") + output_gene = self.get_data_path("output.gene_mapping_data.txt") + output_stats = self.get_data_path("output.overall_mapping_stats.txt") + samp_dir = os.path.join(cwd, samp) + shutil.copy(output_allele, samp_dir) + shutil.copy(output_gene, samp_dir) + shutil.copy(output_stats, samp_dir) + def annotate_reads_card_test_body(self, read_type): manifest = self.get_data_path(f"MANIFEST_reads_{read_type}") if read_type == "single": reads = SingleLanePerSampleSingleEndFastqDirFmt() shutil.copy(manifest, os.path.join(str(reads), "MANIFEST")) - else: reads = SingleLanePerSamplePairedEndFastqDirFmt() shutil.copy(manifest, os.path.join(str(reads), "MANIFEST")) - - output_allele = self.get_data_path("output.allele_mapping_data.txt") - output_gene = self.get_data_path("output.gene_mapping_data.txt") - output_stats = self.get_data_path("output.overall_mapping_stats.txt") card_db = CARDDatabaseFormat() - - def copy_needed_files(cwd, samp, **kwargs): - samp_dir = os.path.join(cwd, samp) - shutil.copy(output_allele, samp_dir) - shutil.copy(output_gene, samp_dir) - shutil.copy(output_stats, samp_dir) - - def return_count_table(df_list): - count_table = deepcopy(self.table) - count_table.set_index("sample_id", inplace=True) - count_table = count_table.astype(float) - count_table.columns = count_table.columns.astype(float) - return count_table - - mock_run_rgi_bwt = MagicMock(side_effect=copy_needed_files) + mock_run_rgi_bwt = MagicMock(side_effect=self.copy_needed_files) mock_run_rgi_load = MagicMock() mock_read_in_txt = MagicMock() - mock_create_count_table = MagicMock(side_effect=return_count_table) + mag_test_class = TestAnnotateMagsCard() + mock_create_count_table = MagicMock( + side_effect=mag_test_class.return_count_table + ) with patch("q2_amr.card.reads.run_rgi_bwt", mock_run_rgi_bwt), patch( "q2_amr.card.reads.load_preprocess_card_db", mock_run_rgi_load ), patch("q2_amr.card.reads.read_in_txt", mock_read_in_txt), patch( @@ -87,10 +78,6 @@ def return_count_table(df_list): aligner="kma", rev=None, threads=1, - include_baits=False, - mapq=None, - mapped=None, - coverage=None, ), call( cwd=tmp_dir, @@ -99,10 +86,6 @@ def return_count_table(df_list): aligner="kma", rev=None, threads=1, - include_baits=False, - mapq=None, - mapped=None, - coverage=None, ), ] else: @@ -114,10 +97,6 @@ def return_count_table(df_list): rev=f"{reads}/sample1_00_L001_R2_001.fastq.gz", aligner="kma", threads=1, - include_baits=False, - mapq=None, - mapped=None, - coverage=None, ), call( cwd=tmp_dir, @@ -126,10 +105,6 @@ def return_count_table(df_list): rev=f"{reads}/sample2_00_L001_R2_001.fastq.gz", aligner="kma", threads=1, - include_baits=False, - mapq=None, - mapped=None, - coverage=None, ), ] exp_calls_mock_load = [ @@ -138,10 +113,26 @@ def return_count_table(df_list): call(tmp_dir, ANY, "load_fasta"), ] exp_calls_mock_read = [ - call(f"{tmp_dir}/sample1", "allele"), - call(f"{tmp_dir}/sample1", "gene"), - call(f"{tmp_dir}/sample2", "allele"), - call(f"{tmp_dir}/sample2", "gene"), + call( + path=f"{tmp_dir}/sample1/output.allele_mapping_data.txt", + col_name="ARO Accession", + samp_bin_name="sample1", + ), + call( + path=f"{tmp_dir}/sample1/output.gene_mapping_data.txt", + col_name="ARO Accession", + samp_bin_name="sample1", + ), + call( + path=f"{tmp_dir}/sample2/output.allele_mapping_data.txt", + col_name="ARO Accession", + samp_bin_name="sample2", + ), + call( + path=f"{tmp_dir}/sample2/output.gene_mapping_data.txt", + col_name="ARO Accession", + samp_bin_name="sample2", + ), ] exp_calls_mock_count = [call([ANY, ANY]), call([ANY, ANY])] mock_run_rgi_bwt.assert_has_calls(exp_calls_mock_run) @@ -172,10 +163,6 @@ def test_run_rgi_bwt(self): "path_rev", "bowtie2", 8, - True, - 3, - 5, - 3.2, ) mock_run_command.assert_called_once_with( [ @@ -193,26 +180,21 @@ def test_run_rgi_bwt(self): "bowtie2", "--read_two", "path_rev", - "--include_baits", - "--mapq", - "3", - "--mapped", - "5", - "--coverage", - "3.2", ], "path_tmp", verbose=True, ) - @patch("q2_amr.card.reads.run_command") - def test_exception_raised(self, mock_run_command): - mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd") + def test_exception_raised(self): expected_message = ( "An error was encountered while running rgi, " "(return code 1), please inspect stdout and stderr to learn more." ) - with self.assertRaises(Exception) as cm: + + with patch( + "q2_amr.card.reads.run_command" + ) as mock_run_command, self.assertRaises(Exception) as cm: + mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd") run_rgi_bwt( cwd="path/cwd", samp="sample1", @@ -220,12 +202,8 @@ def test_exception_raised(self, mock_run_command): rev="path/rev", aligner="bwa", threads=1, - include_baits=True, - mapq=0.3, - mapped=0.3, - coverage=0.3, ) - self.assertEqual(str(cm.exception), expected_message) + self.assertEqual(str(cm.exception), expected_message) def test_move_files_allele(self): self.move_files_test_body("allele") @@ -312,54 +290,3 @@ def mock_plot_sample_stats(sample_stats, output_dir): self.assertTrue(os.path.exists(os.path.join(tmp, "sample_stats_plot.html"))) self.assertTrue(os.path.exists(os.path.join(tmp, "index.html"))) self.assertTrue(os.path.exists(os.path.join(tmp, "q2templateassets"))) - - mapping_data_sample1 = pd.DataFrame( - { - "ARO Accession": [3000796, 3000815, 3000805, 3000026], - "sample1": [1, 1, 1, 1], - } - ) - - mapping_data_sample2 = pd.DataFrame( - { - "ARO Accession": [3000797, 3000815, 3000805, 3000026], - "sample2": [1, 1, 1, 2], - } - ) - - table = pd.DataFrame( - { - "sample_id": ["sample1", "sample2"], - 3000796: [1, 0], - 3000815: [1, 1], - 3000805: [1, 1], - 3000026: [1, 2], - 3000797: [0, 1], - } - ) - - def test_read_in_txt_allele(self): - self.read_in_txt_test_body("allele", self.mapping_data_sample1) - - def test_read_in_txt_gene(self): - self.read_in_txt_test_body("gene", self.mapping_data_sample1) - - def read_in_txt_test_body(self, map_type, mapping_data): - mapping_file = self.get_data_path(f"output.{map_type}_mapping_data.txt") - exp = mapping_data - with tempfile.TemporaryDirectory() as tmp: - samp_dir = os.path.join(tmp, "sample1") - os.mkdir(samp_dir) - shutil.copy(mapping_file, samp_dir) - obs = read_in_txt(samp_dir, map_type) - obs["ARO Accession"] = obs["ARO Accession"].astype(int) - pd.testing.assert_frame_equal(exp, obs) - - def test_create_count_table(self): - df_list = [self.mapping_data_sample1, self.mapping_data_sample2] - obs = create_count_table(df_list) - exp = self.table - exp.set_index("sample_id", inplace=True) - exp = exp.astype(float) - exp.columns = exp.columns.astype(float) - pd.testing.assert_frame_equal(exp, obs) diff --git a/q2_amr/tests/card/test_utils.py b/q2_amr/tests/card/test_utils.py index 4fc49a8..10d3bc4 100644 --- a/q2_amr/tests/card/test_utils.py +++ b/q2_amr/tests/card/test_utils.py @@ -1,15 +1,38 @@ import subprocess from unittest.mock import patch +import pandas as pd from qiime2.plugin.testing import TestPluginBase +from test_mags import TestAnnotateMagsCard -from q2_amr.card.utils import load_preprocess_card_db +from q2_amr.card.utils import create_count_table, load_preprocess_card_db, read_in_txt from q2_amr.types import CARDDatabaseFormat class TestAnnotateReadsCARD(TestPluginBase): package = "q2_amr.tests" + mapping_data_sample1 = pd.DataFrame( + { + "ARO Accession": [3000796, 3000815, 3000805, 3000026], + "sample1": [1, 1, 1, 1], + } + ) + + mags_mapping_data_sample1 = pd.DataFrame( + { + "ARO": [3000796, 3000815, 3000805, 3000026], + "sample1": [1, 1, 1, 1], + } + ) + + mapping_data_sample2 = pd.DataFrame( + { + "ARO Accession": [3000797, 3000815, 3000805, 3000026], + "sample2": [1, 1, 1, 2], + } + ) + def test_load_card_db(self): card_db = CARDDatabaseFormat() with patch("q2_amr.card.utils.run_command") as mock_run_command: @@ -46,9 +69,7 @@ def test_load_card_db_fasta(self): verbose=True, ) - @patch("q2_amr.card.utils.run_command") - def test_exception_raised(self, mock_run_command): - mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd") + def test_exception_raised(self): tmp = "path/to/tmp" card_db = "path/to/card_db.json" expected_message = ( @@ -56,6 +77,45 @@ def test_exception_raised(self, mock_run_command): "(return code 1), please inspect stdout and stderr to learn more." ) operation = "load" - with self.assertRaises(Exception) as cm: + with patch( + "q2_amr.card.utils.run_command" + ) as mock_run_command, self.assertRaises(Exception) as cm: + mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd") load_preprocess_card_db(tmp, card_db, operation) - self.assertEqual(str(cm.exception), expected_message) + self.assertEqual(str(cm.exception), expected_message) + + def test_read_in_txt_mags(self): + path = self.get_data_path("output.mags.txt") + self.read_in_txt_test_body( + path, "ARO", "sample1", self.mags_mapping_data_sample1 + ) + + def test_read_in_txt_allele(self): + path = self.get_data_path("output.allele_mapping_data.txt") + self.read_in_txt_test_body( + path, "ARO Accession", "sample1", self.mapping_data_sample1 + ) + + def test_read_in_txt_gene(self): + path = self.get_data_path("output.gene_mapping_data.txt") + self.read_in_txt_test_body( + path, "ARO Accession", "sample1", self.mapping_data_sample1 + ) + + def read_in_txt_test_body(self, path, col_name, samp_bin_name, mapping_data): + exp = mapping_data + obs = read_in_txt(path, col_name, samp_bin_name) + obs[col_name] = obs[col_name].astype(int) + pd.testing.assert_frame_equal(exp, obs) + + def test_create_count_table(self): + df_list = [self.mapping_data_sample1, self.mapping_data_sample2] + obs = create_count_table(df_list) + mag_test_class = TestAnnotateMagsCard() + exp = mag_test_class.table + exp.set_index("sample_id", inplace=True) + exp = exp.astype(float) + exp.columns = exp.columns.astype(float) + pd.testing.assert_frame_equal(exp, obs) + df_list_empty = [] + self.assertRaises(ValueError, create_count_table, df_list_empty) diff --git a/q2_amr/tests/data/output.mags.txt b/q2_amr/tests/data/output.mags.txt new file mode 100644 index 0000000..7d9fc51 --- /dev/null +++ b/q2_amr/tests/data/output.mags.txt @@ -0,0 +1,5 @@ +ORF_ID Contig Start Stop Orientation Cut_Off Pass_Bitscore Best_Hit_Bitscore Best_Hit_ARO Best_Identities ARO Model_type SNPs_in_Best_Hit_ARO Other_SNPs Drug Class Resistance Mechanism AMR Gene Family Predicted_DNA Predicted_Protein CARD_Protein_Sequence Percentage Length of Reference Sequence ID Model_ID Nudged Note +NC_000962.3_689 # 759789 # 763325 # 1 # ID=1_689;partial=00;start_type=GTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.643 NC_000962.3_689 759789 763325 + Strict 2300 2394.77 Mycobacterium tuberculosis rpoB mutants conferring resistance to rifampicin 99.91 3000796 protein variant model D516G, H526T, L511R n/a rifamycin antibiotic antibiotic target alteration; antibiotic target replacement rifamycin-resistant beta-subunit of RNA polymerase (rpoB) GTGCTGGAAGGATGCATCTTGGCAGATTCCCGCCAGAGCAAAACAGCCGCTAGTCCTAGTCCGAGTCGCCCGCAAAGTTCCTCGAATAACTCCGTACCCGGAGCGCCAAACCGGGTCTCCTTCGCTAAGCTGCGCGAACCACTTGAGGTTCCGGGACTCCTTGACGTCCAGACCGATTCGTTCGAGTGGCTGATCGGTTCGCCGCGCTGGCGCGAATCCGCCGCCGAGCGGGGTGATGTCAACCCAGTGGGTGGCCTGGAAGAGGTGCTCTACGAGCTGTCTCCGATCGAGGACTTCTCCGGGTCGATGTCGTTGTCGTTCTCTGACCCTCGTTTCGACGATGTCAAGGCACCCGTCGACGAGTGCAAAGACAAGGACATGACGTACGCGGCTCCACTGTTCGTCACCGCCGAGTTCATCAACAACAACACCGGTGAGATCAAGAGTCAGACGGTGTTCATGGGTGACTTCCCGATGATGACCGAGAAGGGCACGTTCATCATCAACGGGACCGAGCGTGTGGTGGTCAGCCAGCTGGTGCGGTCGCCCGGGGTGTACTTCGACGAGACCATTGACAAGTCCACCGACAAGACGCTGCACAGCGTCAAGGTGATCCCGAGCCGCGGCGCGTGGCTCGAGTTTGACGTCGACAAGCGCGACACCGTCGGCGTGCGCATCGACCGCAAACGCCGGCAACCGGTCACCGTGCTGCTCAAGGCGCTGGGCTGGACCAGCGAGCAGATTGTCGAGCGGTTCGGGTTCTCCGAGATCATGCGATCGACGCTGGAGAAGGACAACACCGTCGGCACCGACGAGGCGCTGTTGGACATCTACCGCAAGCTGCGTCCGGGCGAGCCCCCGACCAAAGAGTCAGCGCAGACGCTGTTGGAAAACTTGTTCTTCAAGGAGAAGCGCTACGACCTGGCCCGCGTCGGTCGCTATAAGGTCAACAAGAAGCTCGGGCTGCATGTCGGCGAGCCCATCACGTCGTCGACGCTGACCGAAGAAGACGTCGTGGCCACCATCGAATATCTGGTCCGCTTGCACGAGGGTCAGACCACGATGACCGTTCCGGGCGGCGTCGAGGTGCCGGTGGAAACCGACGACATCGACCACTTCGGCAACCGCCGCCTGCGTACGGTCGGCGAGCTGATCCAAAACCAGATCCGGGTCGGCATGTCGCGGATGGAGCGGGTGGTCCGGGAGCGGATGACCACCCAGGACGTGGAGGCGATCACACCGCAGACGTTGATCAACATCCGGCCGGTGGTCGCCGCGATCAAGGAGTTCTTCGGCACCAGCCAGCTGAGCCAATTCATGGACCAGAACAACCCGCTGTCGGGGTTGACCCACAAGCGCCGACTGTCGGCGCTGGGGCCCGGCGGTCTGTCACGTGAGCGTGCCGGGCTGGAGGTCCGCGACGTGCACCCGTCGCACTACGGCCGGATGTGCCCGATCGAAACCCCTGAGGGGCCCAACATCGGTCTGATCGGCTCGCTGTCGGTGTACGCGCGGGTCAACCCGTTCGGGTTCATCGAAACGCCGTACCGCAAGGTGGTCGACGGCGTGGTTAGCGACGAGATCGTGTACCTGACCGCCGACGAGGAGGACCGCCACGTGGTGGCACAGGCCAATTCGCCGATCGATGCGGACGGTCGCTTCGTCGAGCCGCGCGTGCTGGTCCGCCGCAAGGCGGGCGAGGTGGAGTACGTGCCCTCGTCTGAGGTGGACTACATGGACGTCTCGCCCCGCCAGATGGTGTCGGTGGCCACCGCGATGATTCCCTTCCTGGAGCACGACGACGCCAACCGTGCCCTCATGGGGGCAAACATGCAGCGCCAGGCGGTGCCGCTGGTCCGTAGCGAGGCCCCGCTGGTGGGCACCGGGATGGAGCTGCGCGCGGCGATCGACGCCGGCGACGTCGTCGTCGCCGAAGAAAGCGGCGTCATCGAGGAGGTGTCGGCCGACTACATCACTGTGATGCACGACAACGGCACCCGGCGTACCTACCGGATGCGCAAGTTTGCCCGGTCCAACCACGGCACTTGCGCCAACCAGTGCCCCATCGTGGACGCGGGCGACCGAGTCGAGGCCGGTCAGGTGATCGCCGACGGTCCCTGTACTGACGACGGCGAGATGGCGCTGGGCAAGAACCTGCTGGTGGCCATCATGCCGTGGGAGGGCCACAACTACGAGGACGCGATCATCCTGTCCAACCGCCTGGTCGAAGAGGACGTGCTCACCTCGATCCACATCGAGGAGCATGAGATCGATGCTCGCGACACCAAGCTGGGTGCGGAGGAGATCACCCGCGACATCCCGAACATCTCCGACGAGGTGCTCGCCGACCTGGATGAGCGGGGCATCGTGCGCATCGGTGCCGAGGTTCGCGACGGGGACATCCTGGTCGGCAAGGTCACCCCGAAGGGTGAGACCGAGCTGACGCCGGAGGAGCGGCTGCTGCGTGCCATCTTCGGTGAGAAGGCCCGCGAGGTGCGCGACACTTCGCTGAAGGTGCCGCACGGCGAATCCGGCAAGGTGATCGGCATTCGGGTGTTTTCCCGCGAGGACGAGGACGAGTTGCCGGCCGGTGTCAACGAGCTGGTGCGTGTGTATGTGGCTCAGAAACGCAAGATCTCCGACGGTGACAAGCTGGCCGGCCGGCACGGCAACAAGGGCGTGATCGGCAAGATCCTGCCGGTTGAGGACATGCCGTTCCTTGCCGACGGCACCCCGGTGGACATTATTTTGAACACCCACGGCGTGCCGCGACGGATGAACATCGGCCAGATTTTGGAGACCCACCTGGGTTGGTGTGCCCACAGCGGCTGGAAGGTCGACGCCGCCAAGGGGGTTCCGGACTGGGCCGCCAGGCTGCCCGACGAACTGCTCGAGGCGCAGCCGAACGCCATTGTGTCGACGCCGGTGTTCGACGGCGCCCAGGAGGCCGAGCTGCAGGGCCTGTTGTCGTGCACGCTGCCCAACCGCGACGGTGACGTGCTGGTCGACGCCGACGGCAAGGCCATGCTCTTCGACGGGCGCAGCGGCGAGCCGTTCCCGTACCCGGTCACGGTTGGCTACATGTACATCATGAAGCTGCACCACCTGGTGGACGACAAGATCCACGCCCGCTCCACCGGGCCGTACTCGATGATCACCCAGCAGCCGCTGGGCGGTAAGGCGCAGTTCGGTGGCCAGCGGTTCGGGGAGATGGAGTGCTGGGCCATGCAGGCCTACGGTGCTGCCTACACCCTGCAGGAGCTGTTGACCATCAAGTCCGATGACACCGTCGGCCGCGTCAAGGTGTACGAGGCGATCGTCAAGGGTGAGAACATCCCGGAGCCGGGCATCCCCGAGTCGTTCAAGGTGCTGCTCAAAGAACTGCAGTCGCTGTGCCTCAACGTCGAGGTGCTATCGAGTGACGGTGCGGCGATCGAACTGCGCGAAGGTGAGGACGAGGACCTGGAGCGGGCCGCGGCCAACCTGGGAATCAATCTGTCCCGCAACGAATCCGCAAGTGTCGAGGATCTTGCGTAA MLEGCILADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA MADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA 100.51 gnl|BL_ORD_ID|2005|hsp_num:0 1237 +NC_000962.3_689 # 759789 # 763325 # 1 # ID=1_689;partial=00;start_type=GTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.643 NC_000962.3_689 759789 763325 + Strict 2300 2394.77 Mycobacterium tuberculosis rpoB mutants conferring resistance to rifampicin 99.91 3000815 protein variant model D516G, H526T, L511R n/a rifamycin antibiotic antibiotic target alteration; antibiotic target replacement rifamycin-resistant beta-subunit of RNA polymerase (rpoB) GTGCTGGAAGGATGCATCTTGGCAGATTCCCGCCAGAGCAAAACAGCCGCTAGTCCTAGTCCGAGTCGCCCGCAAAGTTCCTCGAATAACTCCGTACCCGGAGCGCCAAACCGGGTCTCCTTCGCTAAGCTGCGCGAACCACTTGAGGTTCCGGGACTCCTTGACGTCCAGACCGATTCGTTCGAGTGGCTGATCGGTTCGCCGCGCTGGCGCGAATCCGCCGCCGAGCGGGGTGATGTCAACCCAGTGGGTGGCCTGGAAGAGGTGCTCTACGAGCTGTCTCCGATCGAGGACTTCTCCGGGTCGATGTCGTTGTCGTTCTCTGACCCTCGTTTCGACGATGTCAAGGCACCCGTCGACGAGTGCAAAGACAAGGACATGACGTACGCGGCTCCACTGTTCGTCACCGCCGAGTTCATCAACAACAACACCGGTGAGATCAAGAGTCAGACGGTGTTCATGGGTGACTTCCCGATGATGACCGAGAAGGGCACGTTCATCATCAACGGGACCGAGCGTGTGGTGGTCAGCCAGCTGGTGCGGTCGCCCGGGGTGTACTTCGACGAGACCATTGACAAGTCCACCGACAAGACGCTGCACAGCGTCAAGGTGATCCCGAGCCGCGGCGCGTGGCTCGAGTTTGACGTCGACAAGCGCGACACCGTCGGCGTGCGCATCGACCGCAAACGCCGGCAACCGGTCACCGTGCTGCTCAAGGCGCTGGGCTGGACCAGCGAGCAGATTGTCGAGCGGTTCGGGTTCTCCGAGATCATGCGATCGACGCTGGAGAAGGACAACACCGTCGGCACCGACGAGGCGCTGTTGGACATCTACCGCAAGCTGCGTCCGGGCGAGCCCCCGACCAAAGAGTCAGCGCAGACGCTGTTGGAAAACTTGTTCTTCAAGGAGAAGCGCTACGACCTGGCCCGCGTCGGTCGCTATAAGGTCAACAAGAAGCTCGGGCTGCATGTCGGCGAGCCCATCACGTCGTCGACGCTGACCGAAGAAGACGTCGTGGCCACCATCGAATATCTGGTCCGCTTGCACGAGGGTCAGACCACGATGACCGTTCCGGGCGGCGTCGAGGTGCCGGTGGAAACCGACGACATCGACCACTTCGGCAACCGCCGCCTGCGTACGGTCGGCGAGCTGATCCAAAACCAGATCCGGGTCGGCATGTCGCGGATGGAGCGGGTGGTCCGGGAGCGGATGACCACCCAGGACGTGGAGGCGATCACACCGCAGACGTTGATCAACATCCGGCCGGTGGTCGCCGCGATCAAGGAGTTCTTCGGCACCAGCCAGCTGAGCCAATTCATGGACCAGAACAACCCGCTGTCGGGGTTGACCCACAAGCGCCGACTGTCGGCGCTGGGGCCCGGCGGTCTGTCACGTGAGCGTGCCGGGCTGGAGGTCCGCGACGTGCACCCGTCGCACTACGGCCGGATGTGCCCGATCGAAACCCCTGAGGGGCCCAACATCGGTCTGATCGGCTCGCTGTCGGTGTACGCGCGGGTCAACCCGTTCGGGTTCATCGAAACGCCGTACCGCAAGGTGGTCGACGGCGTGGTTAGCGACGAGATCGTGTACCTGACCGCCGACGAGGAGGACCGCCACGTGGTGGCACAGGCCAATTCGCCGATCGATGCGGACGGTCGCTTCGTCGAGCCGCGCGTGCTGGTCCGCCGCAAGGCGGGCGAGGTGGAGTACGTGCCCTCGTCTGAGGTGGACTACATGGACGTCTCGCCCCGCCAGATGGTGTCGGTGGCCACCGCGATGATTCCCTTCCTGGAGCACGACGACGCCAACCGTGCCCTCATGGGGGCAAACATGCAGCGCCAGGCGGTGCCGCTGGTCCGTAGCGAGGCCCCGCTGGTGGGCACCGGGATGGAGCTGCGCGCGGCGATCGACGCCGGCGACGTCGTCGTCGCCGAAGAAAGCGGCGTCATCGAGGAGGTGTCGGCCGACTACATCACTGTGATGCACGACAACGGCACCCGGCGTACCTACCGGATGCGCAAGTTTGCCCGGTCCAACCACGGCACTTGCGCCAACCAGTGCCCCATCGTGGACGCGGGCGACCGAGTCGAGGCCGGTCAGGTGATCGCCGACGGTCCCTGTACTGACGACGGCGAGATGGCGCTGGGCAAGAACCTGCTGGTGGCCATCATGCCGTGGGAGGGCCACAACTACGAGGACGCGATCATCCTGTCCAACCGCCTGGTCGAAGAGGACGTGCTCACCTCGATCCACATCGAGGAGCATGAGATCGATGCTCGCGACACCAAGCTGGGTGCGGAGGAGATCACCCGCGACATCCCGAACATCTCCGACGAGGTGCTCGCCGACCTGGATGAGCGGGGCATCGTGCGCATCGGTGCCGAGGTTCGCGACGGGGACATCCTGGTCGGCAAGGTCACCCCGAAGGGTGAGACCGAGCTGACGCCGGAGGAGCGGCTGCTGCGTGCCATCTTCGGTGAGAAGGCCCGCGAGGTGCGCGACACTTCGCTGAAGGTGCCGCACGGCGAATCCGGCAAGGTGATCGGCATTCGGGTGTTTTCCCGCGAGGACGAGGACGAGTTGCCGGCCGGTGTCAACGAGCTGGTGCGTGTGTATGTGGCTCAGAAACGCAAGATCTCCGACGGTGACAAGCTGGCCGGCCGGCACGGCAACAAGGGCGTGATCGGCAAGATCCTGCCGGTTGAGGACATGCCGTTCCTTGCCGACGGCACCCCGGTGGACATTATTTTGAACACCCACGGCGTGCCGCGACGGATGAACATCGGCCAGATTTTGGAGACCCACCTGGGTTGGTGTGCCCACAGCGGCTGGAAGGTCGACGCCGCCAAGGGGGTTCCGGACTGGGCCGCCAGGCTGCCCGACGAACTGCTCGAGGCGCAGCCGAACGCCATTGTGTCGACGCCGGTGTTCGACGGCGCCCAGGAGGCCGAGCTGCAGGGCCTGTTGTCGTGCACGCTGCCCAACCGCGACGGTGACGTGCTGGTCGACGCCGACGGCAAGGCCATGCTCTTCGACGGGCGCAGCGGCGAGCCGTTCCCGTACCCGGTCACGGTTGGCTACATGTACATCATGAAGCTGCACCACCTGGTGGACGACAAGATCCACGCCCGCTCCACCGGGCCGTACTCGATGATCACCCAGCAGCCGCTGGGCGGTAAGGCGCAGTTCGGTGGCCAGCGGTTCGGGGAGATGGAGTGCTGGGCCATGCAGGCCTACGGTGCTGCCTACACCCTGCAGGAGCTGTTGACCATCAAGTCCGATGACACCGTCGGCCGCGTCAAGGTGTACGAGGCGATCGTCAAGGGTGAGAACATCCCGGAGCCGGGCATCCCCGAGTCGTTCAAGGTGCTGCTCAAAGAACTGCAGTCGCTGTGCCTCAACGTCGAGGTGCTATCGAGTGACGGTGCGGCGATCGAACTGCGCGAAGGTGAGGACGAGGACCTGGAGCGGGCCGCGGCCAACCTGGGAATCAATCTGTCCCGCAACGAATCCGCAAGTGTCGAGGATCTTGCGTAA MLEGCILADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA MADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA 100.51 gnl|BL_ORD_ID|2005|hsp_num:0 1237 +NC_000962.3_689 # 759789 # 763325 # 1 # ID=1_689;partial=00;start_type=GTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.643 NC_000962.3_689 759789 763325 + Strict 2300 2394.77 Mycobacterium tuberculosis rpoB mutants conferring resistance to rifampicin 99.91 3000805 protein variant model D516G, H526T, L511R n/a rifamycin antibiotic antibiotic target alteration; antibiotic target replacement rifamycin-resistant beta-subunit of RNA polymerase (rpoB) GTGCTGGAAGGATGCATCTTGGCAGATTCCCGCCAGAGCAAAACAGCCGCTAGTCCTAGTCCGAGTCGCCCGCAAAGTTCCTCGAATAACTCCGTACCCGGAGCGCCAAACCGGGTCTCCTTCGCTAAGCTGCGCGAACCACTTGAGGTTCCGGGACTCCTTGACGTCCAGACCGATTCGTTCGAGTGGCTGATCGGTTCGCCGCGCTGGCGCGAATCCGCCGCCGAGCGGGGTGATGTCAACCCAGTGGGTGGCCTGGAAGAGGTGCTCTACGAGCTGTCTCCGATCGAGGACTTCTCCGGGTCGATGTCGTTGTCGTTCTCTGACCCTCGTTTCGACGATGTCAAGGCACCCGTCGACGAGTGCAAAGACAAGGACATGACGTACGCGGCTCCACTGTTCGTCACCGCCGAGTTCATCAACAACAACACCGGTGAGATCAAGAGTCAGACGGTGTTCATGGGTGACTTCCCGATGATGACCGAGAAGGGCACGTTCATCATCAACGGGACCGAGCGTGTGGTGGTCAGCCAGCTGGTGCGGTCGCCCGGGGTGTACTTCGACGAGACCATTGACAAGTCCACCGACAAGACGCTGCACAGCGTCAAGGTGATCCCGAGCCGCGGCGCGTGGCTCGAGTTTGACGTCGACAAGCGCGACACCGTCGGCGTGCGCATCGACCGCAAACGCCGGCAACCGGTCACCGTGCTGCTCAAGGCGCTGGGCTGGACCAGCGAGCAGATTGTCGAGCGGTTCGGGTTCTCCGAGATCATGCGATCGACGCTGGAGAAGGACAACACCGTCGGCACCGACGAGGCGCTGTTGGACATCTACCGCAAGCTGCGTCCGGGCGAGCCCCCGACCAAAGAGTCAGCGCAGACGCTGTTGGAAAACTTGTTCTTCAAGGAGAAGCGCTACGACCTGGCCCGCGTCGGTCGCTATAAGGTCAACAAGAAGCTCGGGCTGCATGTCGGCGAGCCCATCACGTCGTCGACGCTGACCGAAGAAGACGTCGTGGCCACCATCGAATATCTGGTCCGCTTGCACGAGGGTCAGACCACGATGACCGTTCCGGGCGGCGTCGAGGTGCCGGTGGAAACCGACGACATCGACCACTTCGGCAACCGCCGCCTGCGTACGGTCGGCGAGCTGATCCAAAACCAGATCCGGGTCGGCATGTCGCGGATGGAGCGGGTGGTCCGGGAGCGGATGACCACCCAGGACGTGGAGGCGATCACACCGCAGACGTTGATCAACATCCGGCCGGTGGTCGCCGCGATCAAGGAGTTCTTCGGCACCAGCCAGCTGAGCCAATTCATGGACCAGAACAACCCGCTGTCGGGGTTGACCCACAAGCGCCGACTGTCGGCGCTGGGGCCCGGCGGTCTGTCACGTGAGCGTGCCGGGCTGGAGGTCCGCGACGTGCACCCGTCGCACTACGGCCGGATGTGCCCGATCGAAACCCCTGAGGGGCCCAACATCGGTCTGATCGGCTCGCTGTCGGTGTACGCGCGGGTCAACCCGTTCGGGTTCATCGAAACGCCGTACCGCAAGGTGGTCGACGGCGTGGTTAGCGACGAGATCGTGTACCTGACCGCCGACGAGGAGGACCGCCACGTGGTGGCACAGGCCAATTCGCCGATCGATGCGGACGGTCGCTTCGTCGAGCCGCGCGTGCTGGTCCGCCGCAAGGCGGGCGAGGTGGAGTACGTGCCCTCGTCTGAGGTGGACTACATGGACGTCTCGCCCCGCCAGATGGTGTCGGTGGCCACCGCGATGATTCCCTTCCTGGAGCACGACGACGCCAACCGTGCCCTCATGGGGGCAAACATGCAGCGCCAGGCGGTGCCGCTGGTCCGTAGCGAGGCCCCGCTGGTGGGCACCGGGATGGAGCTGCGCGCGGCGATCGACGCCGGCGACGTCGTCGTCGCCGAAGAAAGCGGCGTCATCGAGGAGGTGTCGGCCGACTACATCACTGTGATGCACGACAACGGCACCCGGCGTACCTACCGGATGCGCAAGTTTGCCCGGTCCAACCACGGCACTTGCGCCAACCAGTGCCCCATCGTGGACGCGGGCGACCGAGTCGAGGCCGGTCAGGTGATCGCCGACGGTCCCTGTACTGACGACGGCGAGATGGCGCTGGGCAAGAACCTGCTGGTGGCCATCATGCCGTGGGAGGGCCACAACTACGAGGACGCGATCATCCTGTCCAACCGCCTGGTCGAAGAGGACGTGCTCACCTCGATCCACATCGAGGAGCATGAGATCGATGCTCGCGACACCAAGCTGGGTGCGGAGGAGATCACCCGCGACATCCCGAACATCTCCGACGAGGTGCTCGCCGACCTGGATGAGCGGGGCATCGTGCGCATCGGTGCCGAGGTTCGCGACGGGGACATCCTGGTCGGCAAGGTCACCCCGAAGGGTGAGACCGAGCTGACGCCGGAGGAGCGGCTGCTGCGTGCCATCTTCGGTGAGAAGGCCCGCGAGGTGCGCGACACTTCGCTGAAGGTGCCGCACGGCGAATCCGGCAAGGTGATCGGCATTCGGGTGTTTTCCCGCGAGGACGAGGACGAGTTGCCGGCCGGTGTCAACGAGCTGGTGCGTGTGTATGTGGCTCAGAAACGCAAGATCTCCGACGGTGACAAGCTGGCCGGCCGGCACGGCAACAAGGGCGTGATCGGCAAGATCCTGCCGGTTGAGGACATGCCGTTCCTTGCCGACGGCACCCCGGTGGACATTATTTTGAACACCCACGGCGTGCCGCGACGGATGAACATCGGCCAGATTTTGGAGACCCACCTGGGTTGGTGTGCCCACAGCGGCTGGAAGGTCGACGCCGCCAAGGGGGTTCCGGACTGGGCCGCCAGGCTGCCCGACGAACTGCTCGAGGCGCAGCCGAACGCCATTGTGTCGACGCCGGTGTTCGACGGCGCCCAGGAGGCCGAGCTGCAGGGCCTGTTGTCGTGCACGCTGCCCAACCGCGACGGTGACGTGCTGGTCGACGCCGACGGCAAGGCCATGCTCTTCGACGGGCGCAGCGGCGAGCCGTTCCCGTACCCGGTCACGGTTGGCTACATGTACATCATGAAGCTGCACCACCTGGTGGACGACAAGATCCACGCCCGCTCCACCGGGCCGTACTCGATGATCACCCAGCAGCCGCTGGGCGGTAAGGCGCAGTTCGGTGGCCAGCGGTTCGGGGAGATGGAGTGCTGGGCCATGCAGGCCTACGGTGCTGCCTACACCCTGCAGGAGCTGTTGACCATCAAGTCCGATGACACCGTCGGCCGCGTCAAGGTGTACGAGGCGATCGTCAAGGGTGAGAACATCCCGGAGCCGGGCATCCCCGAGTCGTTCAAGGTGCTGCTCAAAGAACTGCAGTCGCTGTGCCTCAACGTCGAGGTGCTATCGAGTGACGGTGCGGCGATCGAACTGCGCGAAGGTGAGGACGAGGACCTGGAGCGGGCCGCGGCCAACCTGGGAATCAATCTGTCCCGCAACGAATCCGCAAGTGTCGAGGATCTTGCGTAA MLEGCILADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA MADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA 100.51 gnl|BL_ORD_ID|2005|hsp_num:0 1237 +NC_000962.3_689 # 759789 # 763325 # 1 # ID=1_689;partial=00;start_type=GTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.643 NC_000962.3_689 759789 763325 + Strict 2300 2394.77 Mycobacterium tuberculosis rpoB mutants conferring resistance to rifampicin 99.91 3000026 protein variant model D516G, H526T, L511R n/a rifamycin antibiotic antibiotic target alteration; antibiotic target replacement rifamycin-resistant beta-subunit of RNA polymerase (rpoB) GTGCTGGAAGGATGCATCTTGGCAGATTCCCGCCAGAGCAAAACAGCCGCTAGTCCTAGTCCGAGTCGCCCGCAAAGTTCCTCGAATAACTCCGTACCCGGAGCGCCAAACCGGGTCTCCTTCGCTAAGCTGCGCGAACCACTTGAGGTTCCGGGACTCCTTGACGTCCAGACCGATTCGTTCGAGTGGCTGATCGGTTCGCCGCGCTGGCGCGAATCCGCCGCCGAGCGGGGTGATGTCAACCCAGTGGGTGGCCTGGAAGAGGTGCTCTACGAGCTGTCTCCGATCGAGGACTTCTCCGGGTCGATGTCGTTGTCGTTCTCTGACCCTCGTTTCGACGATGTCAAGGCACCCGTCGACGAGTGCAAAGACAAGGACATGACGTACGCGGCTCCACTGTTCGTCACCGCCGAGTTCATCAACAACAACACCGGTGAGATCAAGAGTCAGACGGTGTTCATGGGTGACTTCCCGATGATGACCGAGAAGGGCACGTTCATCATCAACGGGACCGAGCGTGTGGTGGTCAGCCAGCTGGTGCGGTCGCCCGGGGTGTACTTCGACGAGACCATTGACAAGTCCACCGACAAGACGCTGCACAGCGTCAAGGTGATCCCGAGCCGCGGCGCGTGGCTCGAGTTTGACGTCGACAAGCGCGACACCGTCGGCGTGCGCATCGACCGCAAACGCCGGCAACCGGTCACCGTGCTGCTCAAGGCGCTGGGCTGGACCAGCGAGCAGATTGTCGAGCGGTTCGGGTTCTCCGAGATCATGCGATCGACGCTGGAGAAGGACAACACCGTCGGCACCGACGAGGCGCTGTTGGACATCTACCGCAAGCTGCGTCCGGGCGAGCCCCCGACCAAAGAGTCAGCGCAGACGCTGTTGGAAAACTTGTTCTTCAAGGAGAAGCGCTACGACCTGGCCCGCGTCGGTCGCTATAAGGTCAACAAGAAGCTCGGGCTGCATGTCGGCGAGCCCATCACGTCGTCGACGCTGACCGAAGAAGACGTCGTGGCCACCATCGAATATCTGGTCCGCTTGCACGAGGGTCAGACCACGATGACCGTTCCGGGCGGCGTCGAGGTGCCGGTGGAAACCGACGACATCGACCACTTCGGCAACCGCCGCCTGCGTACGGTCGGCGAGCTGATCCAAAACCAGATCCGGGTCGGCATGTCGCGGATGGAGCGGGTGGTCCGGGAGCGGATGACCACCCAGGACGTGGAGGCGATCACACCGCAGACGTTGATCAACATCCGGCCGGTGGTCGCCGCGATCAAGGAGTTCTTCGGCACCAGCCAGCTGAGCCAATTCATGGACCAGAACAACCCGCTGTCGGGGTTGACCCACAAGCGCCGACTGTCGGCGCTGGGGCCCGGCGGTCTGTCACGTGAGCGTGCCGGGCTGGAGGTCCGCGACGTGCACCCGTCGCACTACGGCCGGATGTGCCCGATCGAAACCCCTGAGGGGCCCAACATCGGTCTGATCGGCTCGCTGTCGGTGTACGCGCGGGTCAACCCGTTCGGGTTCATCGAAACGCCGTACCGCAAGGTGGTCGACGGCGTGGTTAGCGACGAGATCGTGTACCTGACCGCCGACGAGGAGGACCGCCACGTGGTGGCACAGGCCAATTCGCCGATCGATGCGGACGGTCGCTTCGTCGAGCCGCGCGTGCTGGTCCGCCGCAAGGCGGGCGAGGTGGAGTACGTGCCCTCGTCTGAGGTGGACTACATGGACGTCTCGCCCCGCCAGATGGTGTCGGTGGCCACCGCGATGATTCCCTTCCTGGAGCACGACGACGCCAACCGTGCCCTCATGGGGGCAAACATGCAGCGCCAGGCGGTGCCGCTGGTCCGTAGCGAGGCCCCGCTGGTGGGCACCGGGATGGAGCTGCGCGCGGCGATCGACGCCGGCGACGTCGTCGTCGCCGAAGAAAGCGGCGTCATCGAGGAGGTGTCGGCCGACTACATCACTGTGATGCACGACAACGGCACCCGGCGTACCTACCGGATGCGCAAGTTTGCCCGGTCCAACCACGGCACTTGCGCCAACCAGTGCCCCATCGTGGACGCGGGCGACCGAGTCGAGGCCGGTCAGGTGATCGCCGACGGTCCCTGTACTGACGACGGCGAGATGGCGCTGGGCAAGAACCTGCTGGTGGCCATCATGCCGTGGGAGGGCCACAACTACGAGGACGCGATCATCCTGTCCAACCGCCTGGTCGAAGAGGACGTGCTCACCTCGATCCACATCGAGGAGCATGAGATCGATGCTCGCGACACCAAGCTGGGTGCGGAGGAGATCACCCGCGACATCCCGAACATCTCCGACGAGGTGCTCGCCGACCTGGATGAGCGGGGCATCGTGCGCATCGGTGCCGAGGTTCGCGACGGGGACATCCTGGTCGGCAAGGTCACCCCGAAGGGTGAGACCGAGCTGACGCCGGAGGAGCGGCTGCTGCGTGCCATCTTCGGTGAGAAGGCCCGCGAGGTGCGCGACACTTCGCTGAAGGTGCCGCACGGCGAATCCGGCAAGGTGATCGGCATTCGGGTGTTTTCCCGCGAGGACGAGGACGAGTTGCCGGCCGGTGTCAACGAGCTGGTGCGTGTGTATGTGGCTCAGAAACGCAAGATCTCCGACGGTGACAAGCTGGCCGGCCGGCACGGCAACAAGGGCGTGATCGGCAAGATCCTGCCGGTTGAGGACATGCCGTTCCTTGCCGACGGCACCCCGGTGGACATTATTTTGAACACCCACGGCGTGCCGCGACGGATGAACATCGGCCAGATTTTGGAGACCCACCTGGGTTGGTGTGCCCACAGCGGCTGGAAGGTCGACGCCGCCAAGGGGGTTCCGGACTGGGCCGCCAGGCTGCCCGACGAACTGCTCGAGGCGCAGCCGAACGCCATTGTGTCGACGCCGGTGTTCGACGGCGCCCAGGAGGCCGAGCTGCAGGGCCTGTTGTCGTGCACGCTGCCCAACCGCGACGGTGACGTGCTGGTCGACGCCGACGGCAAGGCCATGCTCTTCGACGGGCGCAGCGGCGAGCCGTTCCCGTACCCGGTCACGGTTGGCTACATGTACATCATGAAGCTGCACCACCTGGTGGACGACAAGATCCACGCCCGCTCCACCGGGCCGTACTCGATGATCACCCAGCAGCCGCTGGGCGGTAAGGCGCAGTTCGGTGGCCAGCGGTTCGGGGAGATGGAGTGCTGGGCCATGCAGGCCTACGGTGCTGCCTACACCCTGCAGGAGCTGTTGACCATCAAGTCCGATGACACCGTCGGCCGCGTCAAGGTGTACGAGGCGATCGTCAAGGGTGAGAACATCCCGGAGCCGGGCATCCCCGAGTCGTTCAAGGTGCTGCTCAAAGAACTGCAGTCGCTGTGCCTCAACGTCGAGGTGCTATCGAGTGACGGTGCGGCGATCGAACTGCGCGAAGGTGAGGACGAGGACCTGGAGCGGGCCGCGGCCAACCTGGGAATCAATCTGTCCCGCAACGAATCCGCAAGTGTCGAGGATCTTGCGTAA MLEGCILADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA MADSRQSKTAASPSPSRPQSSSNNSVPGAPNRVSFAKLREPLEVPGLLDVQTDSFEWLIGSPRWRESAAERGDVNPVGGLEEVLYELSPIEDFSGSMSLSFSDPRFDDVKAPVDECKDKDMTYAAPLFVTAEFINNNTGEIKSQTVFMGDFPMMTEKGTFIINGTERVVVSQLVRSPGVYFDETIDKSTDKTLHSVKVIPSRGAWLEFDVDKRDTVGVRIDRKRRQPVTVLLKALGWTSEQIVERFGFSEIMRSTLEKDNTVGTDEALLDIYRKLRPGEPPTKESAQTLLENLFFKEKRYDLARVGRYKVNKKLGLHVGEPITSSTLTEEDVVATIEYLVRLHEGQTTMTVPGGVEVPVETDDIDHFGNRRLRTVGELIQNQIRVGMSRMERVVRERMTTQDVEAITPQTLINIRPVVAAIKEFFGTSQLSQFMDQNNPLSGLTHKRRLSALGPGGLSRERAGLEVRDVHPSHYGRMCPIETPEGPNIGLIGSLSVYARVNPFGFIETPYRKVVDGVVSDEIVYLTADEEDRHVVAQANSPIDADGRFVEPRVLVRRKAGEVEYVPSSEVDYMDVSPRQMVSVATAMIPFLEHDDANRALMGANMQRQAVPLVRSEAPLVGTGMELRAAIDAGDVVVAEESGVIEEVSADYITVMHDNGTRRTYRMRKFARSNHGTCANQCPIVDAGDRVEAGQVIADGPCTDDGEMALGKNLLVAIMPWEGHNYEDAIILSNRLVEEDVLTSIHIEEHEIDARDTKLGAEEITRDIPNISDEVLADLDERGIVRIGAEVRDGDILVGKVTPKGETELTPEERLLRAIFGEKAREVRDTSLKVPHGESGKVIGIRVFSREDEDELPAGVNELVRVYVAQKRKISDGDKLAGRHGNKGVIGKILPVEDMPFLADGTPVDIILNTHGVPRRMNIGQILETHLGWCAHSGWKVDAAKGVPDWAARLPDELLEAQPNAIVSTPVFDGAQEAELQGLLSCTLPNRDGDVLVDADGKAMLFDGRSGEPFPYPVTVGYMYIMKLHHLVDDKIHARSTGPYSMITQQPLGGKAQFGGQRFGEMECWAMQAYGAAYTLQELLTIKSDDTVGRVKVYEAIVKGENIPEPGIPESFKVLLKELQSLCLNVEVLSSDGAAIELREGEDEDLERAAANLGINLSRNESASVEDLA 100.51 gnl|BL_ORD_ID|2005|hsp_num:0 1237 diff --git a/q2_amr/types/_transformer.py b/q2_amr/types/_transformer.py index 163740a..0700715 100644 --- a/q2_amr/types/_transformer.py +++ b/q2_amr/types/_transformer.py @@ -175,8 +175,7 @@ def _11(data: CARDAnnotationDirectoryFormat) -> ProteinsDirectoryFormat: return proteins_directory -def create_dir_structure(data, seq_type, genes_protein_directory): - annotation_dir = str(data) +def create_dir_structure(annotation_dir, seq_type, genes_protein_directory): for sample in os.listdir(annotation_dir): for bin in os.listdir(os.path.join(annotation_dir, sample)): for file in os.listdir(os.path.join(annotation_dir, sample, bin)): @@ -233,9 +232,41 @@ def read_mapping_data(data_path, variant): @plugin.register_transformer def _12(data: CARDAlleleAnnotationDirectoryFormat) -> qiime2.Metadata: - return read_mapping_data(data, "allele") + return tabulate_data(data, "allele") @plugin.register_transformer def _13(data: CARDGeneAnnotationDirectoryFormat) -> qiime2.Metadata: - return read_mapping_data(data, "gene") + return tabulate_data(data, "gene") + + +@plugin.register_transformer +def _14(data: CARDAnnotationDirectoryFormat) -> qiime2.Metadata: + return tabulate_data(data, "mags") + + +def tabulate_data(data_path, data_type): + df_list = [] + for samp in os.listdir(str(data_path)): + if data_type == "mags": + for bin in os.listdir(os.path.join(str(data_path), samp)): + file_path = os.path.join( + str(data_path), samp, bin, "amr_annotation.txt" + ) + df = pd.read_csv(file_path, sep="\t") + df.insert(0, "Sample Name", f"{samp}/{bin}") + df["Nudged"] = df["Nudged"].astype(str) + elif data_type == "gene" or "allele": + file_path = os.path.join( + str(data_path), samp, f"{data_type}_mapping_data.txt" + ) + df = pd.read_csv(file_path, sep="\t") + df.insert(0, "Sample Name", samp) + df_list.append(df) + df_combined = pd.concat(df_list, axis=0) + df_combined.reset_index(inplace=True, drop=True) + df_combined.index.name = "id" + df_combined.index = df_combined.index.astype(str) + if data_type == "mags": + df_combined.rename(columns={"ID": "HSP_Identifier"}, inplace=True) + return qiime2.Metadata(df_combined) diff --git a/q2_amr/types/_type.py b/q2_amr/types/_type.py index 6e74527..c281c4e 100644 --- a/q2_amr/types/_type.py +++ b/q2_amr/types/_type.py @@ -10,5 +10,9 @@ CARDDatabase = SemanticType("CARDDatabase") CARDAnnotation = SemanticType("CARDAnnotation", variant_of=SampleData.field["type"]) -CARDAlleleAnnotation = SemanticType("CARDAlleleAnnotation") -CARDGeneAnnotation = SemanticType("CARDGeneAnnotation") +CARDAlleleAnnotation = SemanticType( + "CARDAlleleAnnotation", variant_of=SampleData.field["type"] +) +CARDGeneAnnotation = SemanticType( + "CARDGeneAnnotation", variant_of=SampleData.field["type"] +) diff --git a/q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/sample1_bin1_amr_annotation.json b/q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/amr_annotation.json similarity index 100% rename from q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/sample1_bin1_amr_annotation.json rename to q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/amr_annotation.json diff --git a/q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/sample1_bin1_amr_annotation.txt b/q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/amr_annotation.txt similarity index 100% rename from q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/sample1_bin1_amr_annotation.txt rename to q2_amr/types/tests/data/annotate_mags_output/sample1/bin1/amr_annotation.txt diff --git a/q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/sample2_bin1_amr_annotation.json b/q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/amr_annotation.json similarity index 100% rename from q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/sample2_bin1_amr_annotation.json rename to q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/amr_annotation.json diff --git a/q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/sample2_bin1_amr_annotation.txt b/q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/amr_annotation.txt similarity index 96% rename from q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/sample2_bin1_amr_annotation.txt rename to q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/amr_annotation.txt index 1f16da6..dbf3c40 100644 --- a/q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/sample2_bin1_amr_annotation.txt +++ b/q2_amr/types/tests/data/annotate_mags_output/sample2/bin1/amr_annotation.txt @@ -1,3 +1,3 @@ ORF_ID Contig Start Stop Orientation Cut_Off Pass_Bitscore Best_Hit_Bitscore Best_Hit_ARO Best_Identities ARO Model_type SNPs_in_Best_Hit_ARO Other_SNPs Drug Class Resistance Mechanism AMR Gene Family Predicted_DNA Predicted_Protein CARD_Protein_Sequence Percentage Length of Reference Sequence ID Model_ID Nudged Note -k141_1197_2 # 683 # 1345 # 1 # ID=49_2;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.588 k141_1197_2 683 1345 + Strict 200 326.635 vanX gene in vanO cluster 75.25 3002954 protein homolog model n/a n/a glycopeptide antibiotic antibiotic target alteration vanX; glycopeptide resistance gene cluster ATGAAGGGTGACTTCGTTTTCGTTGATGAGTGTGTTCCAGGAGTCCGCTGGGATGCCAAATACGCCACATCGGACAACTTCACCGGCAAACCGGTGGAGGGATATCTGGCCAACCGGATTGTCGGGACCAGGGCTTTGTGCTCAGCGCTGGAAAGCGTGCGGCAACGGGCTGCATCCCGCGGTTTCGGGTTGCTGCTGTGGGACGGCTACCGCCCGCAGCGCGCCGTGGATTCGTTCCTGCACTGGGCGAAACAACCAGAGGACGGCGCAACTAAACGCCGCCACTATCCAAATATTTCCCGAGCGGAAATGTTCGAAAGAGGATACGTAGCCTCCAAGTCCGGCCACAGCCGGGGCAGCACCGTCGATTTGACCCTGTATGACCTGGTTACCGGTGACCTCGTTCCCATGGGCGGCGGCCACGACTTGATGGATGAAATTTCGCATCACGGAGCGCCCGGCATCACCCGGGCCGAGACCGGCAACCGCCACACGCTGCGTTCGCTCATGGAGGCCTGCGGTTTCAGTTCCTACGATTCTGAGTGGTGGCATTACACCCTGAAGAACGAACCCTATCCGGACACTTATTTCGATTTTCCCGTTACGGATCCGCTTCCATCAGACGCCGCAACGGCCAGGGACCTTGTCTTCCAGAATGCATAG MKGDFVFVDECVPGVRWDAKYATSDNFTGKPVEGYLANRIVGTRALCSALESVRQRAASRGFGLLLWDGYRPQRAVDSFLHWAKQPEDGATKRRHYPNISRAEMFERGYVASKSGHSRGSTVDLTLYDLVTGDLVPMGGGHDLMDEISHHGAPGITRAETGNRHTLRSLMEACGFSSYDSEWWHYTLKNEPYPDTYFDFPVTDPLPSDAATARDLVFQNA MNDDFVYVDDWVPGVRWDAKYATWDNFTGKPVDGYLANRIVGTRALCAALEQAREKAASLGFGLLLWDGYRPRRAVDSFLRWSEQPEDGQTKQRHYPNIDRPEMLEKGYVATQSGHSRGGAVDLTLYHLATGELAPMGGDHDLMDPISHHRARGIKPIESKNRELLRSIMEDCGFDRYDCEWWHYTLKREPYPDVYFDFPIT 108.91 gnl|BL_ORD_ID|1672|hsp_num:0 1699 -k141_10683_1 # 1 # 453 # 1 # ID=423_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.658 k141_10683_1 1 453 + Strict 50 90.8929 vanY gene in vanM cluster 38.62 3002961 protein homolog model n/a n/a glycopeptide antibiotic antibiotic target alteration vanY; glycopeptide resistance gene cluster GAGGCTGCAGGGGCCTACCGGCAAATGGCCGCGGAAGCGGGCGCCGCCGGAGTTCCCATGTCCGCGGTGAGCGGCTTTCGGACCGGAGCAGAGCAGGACCAGCTGTACGTCTCCTACACGGAGAACTTTGGGCCGGAGGCAGCCGACGCCATTTCGGCCCGTCCCGGGTACAGCGAGCATCAGACGGGGCTGGCCATCGACATCGCCAACCCGGACGGAACCTGCGCCCTGGAATCCTGCTTCGCCGAAACCTTGGCGGGTTCGTGGGCGGCCGCCAATGCCCAGCACTACGGCTTCATCATCCGTTATCCGGCAGGAGCCGAGCACATCACCGGGTACGCCCATGAACCGTGGCATCTGCGGTACGTGGGGACGGAACATGCCCGGACAATGCACGACGCCGGCACCACCTTGGAAGAATATCTGGGACTTCCTGCCGCGCCGGGTTACTGA EAAGAYRQMAAEAGAAGVPMSAVSGFRTGAEQDQLYVSYTENFGPEAADAISARPGYSEHQTGLAIDIANPDGTCALESCFAETLAGSWAAANAQHYGFIIRYPAGAEHITGYAHEPWHLRYVGTEHARTMHDAGTTLEEYLGLPAAPGY MVFQGNLLLVNNEYPVLEESIKTDVVNLFKHDELTKGYELLNREIYLSEKVAREFSEMVDAAEKEGVRHFSINSGFRNFDEQNALYQEMGSDYALPAGYSEHNLGLALDIGSTQMEMSEAPEGKWLEDNAWEYGFILRYPMDKTAITGIQYEPWHFRYVGLPHSAIIEEKNFALEEYLDFLKEQKSISGTIHGENYEISYYPITEKTDIEMPANLHYEISGNNMDGVIVTVYR 64.38 gnl|BL_ORD_ID|1673|hsp_num:0 1713 +k141_1197_2 # 683 # 1345 # 1 # ID=49_2;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.588 k141_1197_2 683 1345 + Strict 200 326.635 vanX gene in vanO cluster 75.25 3002954 protein homolog model n/a n/a glycopeptide antibiotic antibiotic target alteration vanX; glycopeptide resistance gene cluster ATGAAGGGTGACTTCGTTTTCGTTGATGAGTGTGTTCCAGGAGTCCGCTGGGATGCCAAATACGCCACATCGGACAACTTCACCGGCAAACCGGTGGAGGGATATCTGGCCAACCGGATTGTCGGGACCAGGGCTTTGTGCTCAGCGCTGGAAAGCGTGCGGCAACGGGCTGCATCCCGCGGTTTCGGGTTGCTGCTGTGGGACGGCTACCGCCCGCAGCGCGCCGTGGATTCGTTCCTGCACTGGGCGAAACAACCAGAGGACGGCGCAACTAAACGCCGCCACTATCCAAATATTTCCCGAGCGGAAATGTTCGAAAGAGGATACGTAGCCTCCAAGTCCGGCCACAGCCGGGGCAGCACCGTCGATTTGACCCTGTATGACCTGGTTACCGGTGACCTCGTTCCCATGGGCGGCGGCCACGACTTGATGGATGAAATTTCGCATCACGGAGCGCCCGGCATCACCCGGGCCGAGACCGGCAACCGCCACACGCTGCGTTCGCTCATGGAGGCCTGCGGTTTCAGTTCCTACGATTCTGAGTGGTGGCATTACACCCTGAAGAACGAACCCTATCCGGACACTTATTTCGATTTTCCCGTTACGGATCCGCTTCCATCAGACGCCGCAACGGCCAGGGACCTTGTCTTCCAGAATGCATAG MKGDFVFVDECVPGVRWDAKYATSDNFTGKPVEGYLANRIVGTRALCSALESVRQRAASRGFGLLLWDGYRPQRAVDSFLHWAKQPEDGATKRRHYPNISRAEMFERGYVASKSGHSRGSTVDLTLYDLVTGDLVPMGGGHDLMDEISHHGAPGITRAETGNRHTLRSLMEACGFSSYDSEWWHYTLKNEPYPDTYFDFPVTDPLPSDAATARDLVFQNA MNDDFVYVDDWVPGVRWDAKYATWDNFTGKPVDGYLANRIVGTRALCAALEQAREKAASLGFGLLLWDGYRPRRAVDSFLRWSEQPEDGQTKQRHYPNIDRPEMLEKGYVATQSGHSRGGAVDLTLYHLATGELAPMGGDHDLMDPISHHRARGIKPIESKNRELLRSIMEDCGFDRYDCEWWHYTLKREPYPDVYFDFPIT 108.91 gnl|BL_ORD_ID|1674|hsp_num:0 1699 +k141_10683_1 # 1 # 453 # 1 # ID=423_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.658 k141_10683_1 1 453 + Strict 50 90.8929 vanY gene in vanM cluster 38.62 3002961 protein homolog model n/a n/a glycopeptide antibiotic antibiotic target alteration vanY; glycopeptide resistance gene cluster GAGGCTGCAGGGGCCTACCGGCAAATGGCCGCGGAAGCGGGCGCCGCCGGAGTTCCCATGTCCGCGGTGAGCGGCTTTCGGACCGGAGCAGAGCAGGACCAGCTGTACGTCTCCTACACGGAGAACTTTGGGCCGGAGGCAGCCGACGCCATTTCGGCCCGTCCCGGGTACAGCGAGCATCAGACGGGGCTGGCCATCGACATCGCCAACCCGGACGGAACCTGCGCCCTGGAATCCTGCTTCGCCGAAACCTTGGCGGGTTCGTGGGCGGCCGCCAATGCCCAGCACTACGGCTTCATCATCCGTTATCCGGCAGGAGCCGAGCACATCACCGGGTACGCCCATGAACCGTGGCATCTGCGGTACGTGGGGACGGAACATGCCCGGACAATGCACGACGCCGGCACCACCTTGGAAGAATATCTGGGACTTCCTGCCGCGCCGGGTTACTGA EAAGAYRQMAAEAGAAGVPMSAVSGFRTGAEQDQLYVSYTENFGPEAADAISARPGYSEHQTGLAIDIANPDGTCALESCFAETLAGSWAAANAQHYGFIIRYPAGAEHITGYAHEPWHLRYVGTEHARTMHDAGTTLEEYLGLPAAPGY MVFQGNLLLVNNEYPVLEESIKTDVVNLFKHDELTKGYELLNREIYLSEKVAREFSEMVDAAEKEGVRHFSINSGFRNFDEQNALYQEMGSDYALPAGYSEHNLGLALDIGSTQMEMSEAPEGKWLEDNAWEYGFILRYPMDKTAITGIQYEPWHFRYVGLPHSAIIEEKNFALEEYLDFLKEQKSISGTIHGENYEISYYPITEKTDIEMPANLHYEISGNNMDGVIVTVYR 64.38 gnl|BL_ORD_ID|1675|hsp_num:0 1713 diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample1/sample1.allele_mapping_data.txt b/q2_amr/types/tests/data/annotate_reads_output/sample1/allele_mapping_data.txt similarity index 100% rename from q2_amr/types/tests/data/annotate_reads_output/sample1/sample1.allele_mapping_data.txt rename to q2_amr/types/tests/data/annotate_reads_output/sample1/allele_mapping_data.txt diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample1/sample1.gene_mapping_data.txt b/q2_amr/types/tests/data/annotate_reads_output/sample1/gene_mapping_data.txt similarity index 100% rename from q2_amr/types/tests/data/annotate_reads_output/sample1/sample1.gene_mapping_data.txt rename to q2_amr/types/tests/data/annotate_reads_output/sample1/gene_mapping_data.txt diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample1/sample1.overall_mapping_stats.txt b/q2_amr/types/tests/data/annotate_reads_output/sample1/overall_mapping_stats.txt similarity index 100% rename from q2_amr/types/tests/data/annotate_reads_output/sample1/sample1.overall_mapping_stats.txt rename to q2_amr/types/tests/data/annotate_reads_output/sample1/overall_mapping_stats.txt diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample2/sample2.allele_mapping_data.txt b/q2_amr/types/tests/data/annotate_reads_output/sample2/allele_mapping_data.txt similarity index 100% rename from q2_amr/types/tests/data/annotate_reads_output/sample2/sample2.allele_mapping_data.txt rename to q2_amr/types/tests/data/annotate_reads_output/sample2/allele_mapping_data.txt diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample2/sample2.gene_mapping_data.txt b/q2_amr/types/tests/data/annotate_reads_output/sample2/gene_mapping_data.txt similarity index 100% rename from q2_amr/types/tests/data/annotate_reads_output/sample2/sample2.gene_mapping_data.txt rename to q2_amr/types/tests/data/annotate_reads_output/sample2/gene_mapping_data.txt diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample2/sample2.overall_mapping_stats.txt b/q2_amr/types/tests/data/annotate_reads_output/sample2/overall_mapping_stats.txt similarity index 100% rename from q2_amr/types/tests/data/annotate_reads_output/sample2/sample2.overall_mapping_stats.txt rename to q2_amr/types/tests/data/annotate_reads_output/sample2/overall_mapping_stats.txt diff --git a/q2_amr/types/tests/test_types_formats_transformers.py b/q2_amr/types/tests/test_types_formats_transformers.py index 363b67d..db5414b 100644 --- a/q2_amr/types/tests/test_types_formats_transformers.py +++ b/q2_amr/types/tests/test_types_formats_transformers.py @@ -224,6 +224,16 @@ def test_CARDAnnotationDirectoryFormat_to_ProteinsDirectoryFormat_transformer(se os.path.exists(os.path.join(str(obs), "sample2", "bin1_proteins.fasta")) ) + def test_CARDAnnotationDirectoryFormat_to_qiime2_Metadata_transformer(self): + transformer = self.get_transformer( + CARDAnnotationDirectoryFormat, qiime2.Metadata + ) + annotation = CARDAnnotationDirectoryFormat( + self.get_data_path("annotate_mags_output"), "r" + ) + metadata_obt = transformer(annotation) + self.assertIsInstance(metadata_obt, qiime2.Metadata) + class TestCARDReadsAnnotationTypesAndFormats(AMRTypesTestPluginBase): def test_CARDGeneAnnotationDirectoryFormat_to_qiime2_Metadata_transformer(self):