From b66134022f61decb8fe3bed4a48af26f85ad6bd5 Mon Sep 17 00:00:00 2001 From: VinzentRisch <100149044+VinzentRisch@users.noreply.github.com> Date: Thu, 25 Jul 2024 14:36:55 +0200 Subject: [PATCH] ENH: Removed `AMRFinderPlusAnnotation` type (#95) --- q2_amr/amrfinderplus/sample_data.py | 24 ++++++++++--------- .../amrfinderplus/tests/test_sample_data.py | 6 ++--- q2_amr/amrfinderplus/tests/test_utils.py | 2 +- q2_amr/amrfinderplus/types/__init__.py | 2 -- q2_amr/amrfinderplus/types/_format.py | 23 +++++++----------- q2_amr/amrfinderplus/types/_type.py | 6 ++--- .../tests/test_types_formats_transformers.py | 23 ++++++++++++++---- q2_amr/amrfinderplus/utils.py | 2 +- q2_amr/plugin_setup.py | 22 +++++++---------- 9 files changed, 55 insertions(+), 55 deletions(-) diff --git a/q2_amr/amrfinderplus/sample_data.py b/q2_amr/amrfinderplus/sample_data.py index 6a5265e..9a7c4fa 100644 --- a/q2_amr/amrfinderplus/sample_data.py +++ b/q2_amr/amrfinderplus/sample_data.py @@ -32,9 +32,9 @@ def annotate_sample_data_amrfinderplus( GenesDirectoryFormat, pd.DataFrame, ): - annotations = AMRFinderPlusAnnotationsDirFmt() - mutations = AMRFinderPlusAnnotationsDirFmt() - genes = GenesDirectoryFormat() + amr_annotations = AMRFinderPlusAnnotationsDirFmt() + amr_all_mutations = AMRFinderPlusAnnotationsDirFmt() + amr_genes = GenesDirectoryFormat() frequency_list = [] # Create list of paths to all mags or contigs @@ -86,20 +86,22 @@ def annotate_sample_data_amrfinderplus( # Move mutations file. If it is not created, create an empty mutations file des_path_mutations = os.path.join( - str(mutations), + str(amr_all_mutations), sample_id, - f"{mag_id + '_' if mag_id else ''}amr_mutations.tsv", + f"{mag_id + '_' if mag_id else ''}amr_all_mutations.tsv", ) os.makedirs(os.path.dirname(des_path_mutations), exist_ok=True) if organism: - shutil.move(os.path.join(tmp, "amr_mutations.tsv"), des_path_mutations) + shutil.move( + os.path.join(tmp, "amr_all_mutations.tsv"), des_path_mutations + ) else: with open(des_path_mutations, "w"): pass # Move annotations file des_path_annotations = os.path.join( - str(annotations), + str(amr_annotations), sample_id, f"{mag_id + '_' if mag_id else ''}amr_annotations.tsv", ) @@ -110,14 +112,14 @@ def annotate_sample_data_amrfinderplus( shutil.move( os.path.join(tmp, "amr_genes.fasta"), os.path.join( - str(genes), f"{mag_id if mag_id else sample_id}_amr_genes.fasta" + str(amr_genes), f"{mag_id if mag_id else sample_id}_amr_genes.fasta" ), ) feature_table = create_count_table(df_list=frequency_list) return ( - annotations, - mutations, - genes, + amr_annotations, + amr_all_mutations, + amr_genes, feature_table, ) diff --git a/q2_amr/amrfinderplus/tests/test_sample_data.py b/q2_amr/amrfinderplus/tests/test_sample_data.py index 9f705af..8c2ff05 100644 --- a/q2_amr/amrfinderplus/tests/test_sample_data.py +++ b/q2_amr/amrfinderplus/tests/test_sample_data.py @@ -30,7 +30,7 @@ def mock_run_amrfinderplus_n( with open(os.path.join(working_dir, "amr_annotations.tsv"), "w"): pass if organism: - with open(os.path.join(working_dir, "amr_mutations.tsv"), "w"): + with open(os.path.join(working_dir, "amr_all_mutations.tsv"), "w"): pass if dna_sequences: with open(os.path.join(working_dir, "amr_genes.fasta"), "w"): @@ -38,13 +38,13 @@ def mock_run_amrfinderplus_n( files_contigs = [ "amr_annotations.tsv", - "amr_mutations.tsv", + "amr_all_mutations.tsv", "sample1_amr_genes.fasta", ] files_mags = [ "mag1_amr_annotations.tsv", - "mag1_amr_mutations.tsv", + "mag1_amr_all_mutations.tsv", "mag1_amr_genes.fasta", ] diff --git a/q2_amr/amrfinderplus/tests/test_utils.py b/q2_amr/amrfinderplus/tests/test_utils.py index 1e7ae59..4b7f436 100644 --- a/q2_amr/amrfinderplus/tests/test_utils.py +++ b/q2_amr/amrfinderplus/tests/test_utils.py @@ -48,7 +48,7 @@ def test_run_amrfinderplus_n(self, mock_run_command): "--organism", "Escherichia", "--mutation_all", - "path_dir/amr_mutations.tsv", + "path_dir/amr_all_mutations.tsv", "--plus", "--report_all_equal", "--ident_min", diff --git a/q2_amr/amrfinderplus/types/__init__.py b/q2_amr/amrfinderplus/types/__init__.py index c84d73b..8dfb549 100644 --- a/q2_amr/amrfinderplus/types/__init__.py +++ b/q2_amr/amrfinderplus/types/__init__.py @@ -6,7 +6,6 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from q2_amr.amrfinderplus.types._format import ( - AMRFinderPlusAnnotationDirFmt, AMRFinderPlusAnnotationFormat, AMRFinderPlusAnnotationsDirFmt, AMRFinderPlusDatabaseDirFmt, @@ -18,7 +17,6 @@ "AMRFinderPlusDatabaseDirFmt", "AMRFinderPlusAnnotationFormat", "AMRFinderPlusAnnotationsDirFmt", - "AMRFinderPlusAnnotationDirFmt", "TextFormat", "BinaryFormat", ] diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index c8feef3..bcda9e8 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -5,9 +5,10 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +import os + import pandas as pd from q2_types.feature_data import MixedCaseDNAFASTAFormat, ProteinFASTAFormat -from q2_types.per_sample_sequences._format import MultiDirValidationMixin from qiime2.core.exceptions import ValidationError from qiime2.plugin import model @@ -109,19 +110,11 @@ def _validate_(self, level): self._validate() -class AMRFinderPlusAnnotationsDirFmt(MultiDirValidationMixin, model.DirectoryFormat): - annotation = model.FileCollection( - r".*amr_(annotations|mutations)\.tsv$", format=AMRFinderPlusAnnotationFormat +class AMRFinderPlusAnnotationsDirFmt(model.DirectoryFormat): + annotations = model.FileCollection( + r".*amr_(annotations|all_mutations)\.tsv$", format=AMRFinderPlusAnnotationFormat ) - @annotation.set_path_maker - def annotation_path_maker(self, sample_id, mag_id): - prefix = f"{sample_id}/{mag_id}_" if mag_id else f"{sample_id}/" - return f"{prefix}amr_annotations.tsv" - - -AMRFinderPlusAnnotationDirFmt = model.SingleFileDirectoryFormat( - "AMRFinderPlusAnnotationDirFmt", - r"amr_(annotations|mutations)\.tsv$", - AMRFinderPlusAnnotationFormat, -) + @annotations.set_path_maker + def annotations_path_maker(self, name, id, dir_name=""): + return os.path.join(dir_name, f"{id}_amr_{name}.tsv") diff --git a/q2_amr/amrfinderplus/types/_type.py b/q2_amr/amrfinderplus/types/_type.py index 13d0e90..5cc2f5e 100644 --- a/q2_amr/amrfinderplus/types/_type.py +++ b/q2_amr/amrfinderplus/types/_type.py @@ -11,8 +11,6 @@ AMRFinderPlusDatabase = SemanticType("AMRFinderPlusDatabase") AMRFinderPlusAnnotations = SemanticType( - "AMRFinderPlusAnnotations", variant_of=SampleData.field["type"] -) -AMRFinderPlusAnnotation = SemanticType( - "AMRFinderPlusAnnotation", variant_of=FeatureData.field["type"] + "AMRFinderPlusAnnotations", + variant_of=[SampleData.field["type"], FeatureData.field["type"]], ) diff --git a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py index 2b2ea6f..d905785 100644 --- a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py @@ -12,7 +12,6 @@ from qiime2.plugin.testing import TestPluginBase from q2_amr.amrfinderplus.types._format import ( - AMRFinderPlusAnnotationDirFmt, AMRFinderPlusAnnotationFormat, AMRFinderPlusAnnotationsDirFmt, AMRFinderPlusDatabaseDirFmt, @@ -92,14 +91,28 @@ def test_amrfinderplus_annotation_format_validation_error(self): self.assertEqual(str(context.exception), expected_message) - def test_amrfinderplus_annotation_directory_format(self): + def test_amrfinderplus_annotations_dir_fmt_feature(self): dirpath = self.get_data_path( "annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d" ) - annotations = AMRFinderPlusAnnotationDirFmt(dirpath, mode="r") - assert isinstance(annotations, AMRFinderPlusAnnotationDirFmt) + annotations = AMRFinderPlusAnnotationsDirFmt(dirpath, mode="r") + assert isinstance(annotations, AMRFinderPlusAnnotationsDirFmt) - def test_amrfinderplus_annotations_directory_format(self): + def test_amrfinderplus_annotations_dir_fmt_sample(self): dirpath = self.get_data_path("annotation") annotations = AMRFinderPlusAnnotationsDirFmt(dirpath, mode="r") assert isinstance(annotations, AMRFinderPlusAnnotationsDirFmt) + + def test_amrfinderplus_annotations_dir_fmt_path_maker_dir_name(self): + fmt = AMRFinderPlusAnnotationsDirFmt() + path = fmt.annotations_path_maker( + name="annotations", id="id", dir_name="dir_name" + ) + self.assertEqual( + str(path), os.path.join(str(fmt), "dir_name/id_amr_annotations.tsv") + ) + + def test_amrfinderplus_annotations_dir_fmt_path_maker(self): + fmt = AMRFinderPlusAnnotationsDirFmt() + path = fmt.annotations_path_maker(name="annotations", id="id") + self.assertEqual(str(path), os.path.join(str(fmt), "id_amr_annotations.tsv")) diff --git a/q2_amr/amrfinderplus/utils.py b/q2_amr/amrfinderplus/utils.py index 199957c..793ee37 100644 --- a/q2_amr/amrfinderplus/utils.py +++ b/q2_amr/amrfinderplus/utils.py @@ -57,7 +57,7 @@ def run_amrfinderplus_n( "--organism", organism, "--mutation_all", - f"{working_dir}/amr_mutations.tsv", + f"{working_dir}/amr_all_mutations.tsv", ] ) if plus: diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index ea20a55..703cecb 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -35,7 +35,6 @@ from q2_amr.amrfinderplus.database import fetch_amrfinderplus_db from q2_amr.amrfinderplus.sample_data import annotate_sample_data_amrfinderplus from q2_amr.amrfinderplus.types._format import ( - AMRFinderPlusAnnotationDirFmt, AMRFinderPlusAnnotationFormat, AMRFinderPlusAnnotationsDirFmt, AMRFinderPlusDatabaseDirFmt, @@ -43,7 +42,6 @@ TextFormat, ) from q2_amr.amrfinderplus.types._type import ( - AMRFinderPlusAnnotation, AMRFinderPlusAnnotations, AMRFinderPlusDatabase, ) @@ -1175,9 +1173,9 @@ "threads": Int % Range(0, None, inclusive_start=False), }, outputs=[ - ("annotations", SampleData[AMRFinderPlusAnnotations]), - ("mutations", SampleData[AMRFinderPlusAnnotations]), - ("genes", GenomeData[Genes]), + ("amr_annotations", SampleData[AMRFinderPlusAnnotations]), + ("amr_all_mutations", SampleData[AMRFinderPlusAnnotations]), + ("amr_genes", GenomeData[Genes]), ("feature_table", FeatureTable[Frequency]), ], input_descriptions={ @@ -1212,8 +1210,8 @@ "fail. Using more than 4 threads may speed up searches.", }, output_descriptions={ - "annotations": "Annotated AMR genes and mutations.", - "mutations": "Report of genotypes at all locations screened for point " + "amr_annotations": "Annotated AMR genes and mutations.", + "amr_all_mutations": "Report of genotypes at all locations screened for point " "mutations. These files allow you to distinguish between called " "point mutations that were the sensitive variant and the point " "mutations that could not be called because the sequence was not " @@ -1226,8 +1224,8 @@ "'Gene symbols' from known point-mutation sites have gene symbols " "that match the Pathogen Detection Reference Gene Catalog " "standardized nomenclature for point mutations.", - "genes": "Sequences that were identified by AMRFinderPlus as AMR genes. This " - "will include the entire region that aligns to the references for " + "amr_genes": "Sequences that were identified by AMRFinderPlus as AMR genes. " + "This will include the entire region that aligns to the references for " "point mutations.", "feature_table": "Presence/Absence table of ARGs in all samples.", }, @@ -1249,7 +1247,6 @@ CARDMAGsKmerAnalysis, AMRFinderPlusDatabase, AMRFinderPlusAnnotations, - AMRFinderPlusAnnotation, ) plugin.register_semantic_type_to_format( @@ -1289,8 +1286,8 @@ artifact_format=AMRFinderPlusAnnotationsDirFmt, ) plugin.register_semantic_type_to_format( - FeatureData[AMRFinderPlusAnnotation], - artifact_format=AMRFinderPlusAnnotationDirFmt, + FeatureData[AMRFinderPlusAnnotations], + artifact_format=AMRFinderPlusAnnotationsDirFmt, ) plugin.register_formats( CARDKmerDatabaseDirectoryFormat, @@ -1321,7 +1318,6 @@ BinaryFormat, AMRFinderPlusAnnotationFormat, AMRFinderPlusAnnotationsDirFmt, - AMRFinderPlusAnnotationDirFmt, ) importlib.import_module("q2_amr.card.types._transformer")