diff --git a/q2_amrfinderplus/plugin_setup.py b/q2_amrfinderplus/plugin_setup.py index 78063fc..5cc8e2c 100644 --- a/q2_amrfinderplus/plugin_setup.py +++ b/q2_amrfinderplus/plugin_setup.py @@ -5,6 +5,8 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +import importlib + from q2_types.feature_data import FeatureData from q2_types.feature_data_mag import MAG from q2_types.genome_data import Genes, GenomeData, Loci, Proteins @@ -270,3 +272,5 @@ AMRFinderPlusAnnotationFormat, AMRFinderPlusAnnotationsDirFmt, ) + +importlib.import_module("q2_amrfinderplus.types._transformer") diff --git a/q2_amrfinderplus/types/_transformer.py b/q2_amrfinderplus/types/_transformer.py new file mode 100644 index 0000000..ee2df89 --- /dev/null +++ b/q2_amrfinderplus/types/_transformer.py @@ -0,0 +1,55 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2019-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +import pandas as pd +import qiime2 + +from q2_amrfinderplus.plugin_setup import plugin +from q2_amrfinderplus.types import AMRFinderPlusAnnotationsDirFmt + + +@plugin.register_transformer +def _1(data: AMRFinderPlusAnnotationsDirFmt) -> qiime2.Metadata: + return qiime2.Metadata(_metadata_transformer_helper(data)) + + +def _metadata_transformer_helper(data): + df_list = [] + + if any(item.is_dir() for item in data.path.iterdir()): + annotation_dict = data.annotation_dict() + + else: + file_dict = data.annotation_dict() + # Create annotation_dict with fake sample + annotation_dict = {"": file_dict} + + for outer_id, files_dict in annotation_dict.items(): + for inner_id, file_fp in files_dict.items(): + id_value = f"{outer_id}/{inner_id}" if outer_id else inner_id + + # Create df and append to df_list + df = pd.read_csv(file_fp, sep="\t") + df.insert(0, "Sample/MAG_ID", id_value) + df_list.append(df) + + return combine_dataframes(df_list) + + +def combine_dataframes(df_list): + # Concat all dfs + df_combined = pd.concat(df_list, axis=0) + + # Sort all values by sample/mag ID column + df_combined.sort_values(by=df_combined.columns[0], inplace=True) + + # Reset and rename index and set it to string to conform to metadata format + df_combined.reset_index(inplace=True, drop=True) + df_combined.index.name = "id" + df_combined.index = df_combined.index.astype(str) + + return df_combined diff --git a/q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv b/q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv b/q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample1_amr_annotations.tsv b/q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample1_amr_annotations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample1_amr_annotations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample2_amr_annotations.tsv b/q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample2_amr_annotations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample2_amr_annotations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv b/q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv b/q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/metadata_tables/feature_data.tsv b/q2_amrfinderplus/types/tests/data/metadata_tables/feature_data.tsv new file mode 100644 index 0000000..ceac9b1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/metadata_tables/feature_data.tsv @@ -0,0 +1,5 @@ +Sample/MAG_ID Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aa447c99-ecd9-4c4a-a53b-4df6999815dd aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.0 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +aa447c99-ecd9-4c4a-a53b-4df6999815dd blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.0 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam +e026af61-d911-4de3-a957-7e8bf837f30d aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.0 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +e026af61-d911-4de3-a957-7e8bf837f30d blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.0 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_contigs.tsv b/q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_contigs.tsv new file mode 100644 index 0000000..a2dce71 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_contigs.tsv @@ -0,0 +1,5 @@ +Sample/MAG_ID Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +sample1 aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.0 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +sample1 blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.0 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam +sample2 aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.0 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +sample2 blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.0 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_mags.tsv b/q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_mags.tsv new file mode 100644 index 0000000..771d6df --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_mags.tsv @@ -0,0 +1,5 @@ +Sample/MAG_ID Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +sample1/e026af61-d911-4de3-a957-7e8bf837f30d aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.0 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +sample1/e026af61-d911-4de3-a957-7e8bf837f30d blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.0 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam +sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.0 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.0 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv b/q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv b/q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample1_amr_all_mutations.tsv b/q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample1_amr_all_mutations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample1_amr_all_mutations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample2_amr_all_mutations.tsv b/q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample2_amr_all_mutations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample2_amr_all_mutations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv b/q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv b/q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amrfinderplus/types/tests/test_types_formats_transformers.py index d1dc632..b078649 100644 --- a/q2_amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amrfinderplus/types/tests/test_types_formats_transformers.py @@ -8,6 +8,9 @@ import os import tempfile +import pandas as pd +import qiime2 +from pandas._testing import assert_frame_equal from qiime2.core.exceptions import ValidationError from qiime2.plugin.testing import TestPluginBase @@ -17,6 +20,10 @@ AMRFinderPlusDatabaseDirFmt, _create_path, ) +from q2_amrfinderplus.types._transformer import ( + _metadata_transformer_helper, + combine_dataframes, +) class TestAMRFinderPlusTypesAndFormats(TestPluginBase): @@ -181,3 +188,93 @@ def test_amrfinderplus_annotation_dirfmt_annotation_dict(self): } self.assertDictEqual(obs, exp) + + +class MetadataTransformerUtilsTest(TestPluginBase): + package = "q2_amrfinderplus.types.tests" + + def setUp(self): + super().setUp() + # Setup test data + self.df1 = pd.DataFrame( + { + "Sample/MAG_ID": ["id_value_1", "id_value_2"], + "col1": ["val1", "val3"], + "col2": ["val2", "val4"], + } + ) + + self.df2 = pd.DataFrame( + { + "Sample/MAG_ID": ["id_value_3", "id_value_4"], + "col1": ["val5", "val7"], + "col2": ["val6", "val8"], + } + ) + + self.expected_combined_df = pd.DataFrame( + { + "Sample/MAG_ID": [ + "id_value_1", + "id_value_2", + "id_value_3", + "id_value_4", + ], + "col1": ["val1", "val3", "val5", "val7"], + "col2": ["val2", "val4", "val6", "val8"], + } + ) + + self.expected_combined_df.index = self.expected_combined_df.index.astype(str) + self.expected_combined_df.index.name = "id" + + def test_combine_dataframes(self): + # Test combine_dataframes function + df_list = [self.df1, self.df2] + combined_df = combine_dataframes(df_list) + pd.testing.assert_frame_equal(combined_df, self.expected_combined_df) + + +class TestAMRFinderPlusTransformers(TestPluginBase): + package = "q2_amrfinderplus.types.tests" + + def test_annotations_feature_data_mags_transformer_helper(self): + self._test_helper("annotations_feature_data_mags", "feature_data.tsv") + + def test_annotations_sample_data_contigs_transformer_helper(self): + self._test_helper("annotations_sample_data_contigs", "sample_data_contigs.tsv") + + def test_annotations_sample_data_mags_transformer_helper(self): + self._test_helper("annotations_sample_data_mags", "sample_data_mags.tsv") + + def test_mutations_feature_data_mags_transformer_helper(self): + self._test_helper("mutations_feature_data_mags", "feature_data.tsv") + + def test_mutations_sample_data_contigs_transformer_helper(self): + self._test_helper("mutations_sample_data_contigs", "sample_data_contigs.tsv") + + def test_mutations_sample_data_mags_transformer_helper(self): + self._test_helper("mutations_sample_data_mags", "sample_data_mags.tsv") + + def _test_helper(self, data, table_name): + df_expected = pd.read_csv( + self.get_data_path(f"metadata_tables/{table_name}"), + sep="\t", + ) + df_expected.index = df_expected.index.astype(str) + df_expected.index.name = "id" + dir_fmt = AMRFinderPlusAnnotationsDirFmt(self.get_data_path(data), "r") + df_obs = _metadata_transformer_helper(dir_fmt) + assert_frame_equal(df_expected, df_obs) + + def test_annotations_sample_data_mags_to_Metadata(self): + transformer = self.get_transformer( + AMRFinderPlusAnnotationsDirFmt, qiime2.Metadata + ) + fmt = AMRFinderPlusAnnotationsDirFmt( + self.get_data_path("annotations_sample_data_mags"), "r" + ) + + metadata_obt = transformer(fmt) + + self.assertIsInstance(metadata_obt, qiime2.Metadata) diff --git a/setup.py b/setup.py index 20ad82e..3ca0003 100644 --- a/setup.py +++ b/setup.py @@ -30,14 +30,7 @@ }, package_data={ "q2_amrfinderplus": ["citations.bib"], - "q2_amrfinderplus.types.tests": [ - "data/*" - "data/annotation/*" - "data/database/*" - "data/annotation_wrong/*" - "data/annotation/coordinates/*" - "data/annotation/no_coordinates/*" - ], + "q2_amrfinderplus.types.tests": ["data/*" "data/*/*" "data/*/*/*"], "q2_amrfinderplus.tests": ["data/*" "data/*/*" "data/*/*/*"], }, zip_safe=False,