From 46a2879fe9b1bb7f664512599bfdcd54d761681c Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 26 Sep 2024 16:09:07 +0200 Subject: [PATCH 1/7] added new transformer for annotations to metadata with tests --- q2_amrfinderplus/types/_transformer.py | 85 +++++++++++++ ...4c4a-a53b-4df6999815dd_amr_annotations.tsv | 3 + ...4de3-a957-7e8bf837f30d_amr_annotations.tsv | 3 + .../sample1_amr_annotations.tsv | 3 + .../sample2_amr_annotations.tsv | 3 + ...4de3-a957-7e8bf837f30d_amr_annotations.tsv | 3 + ...4c4a-a53b-4df6999815dd_amr_annotations.tsv | 3 + .../data/metadata_tables/feature_data.tsv | 5 + .../metadata_tables/sample_data_contigs.tsv | 5 + .../data/metadata_tables/sample_data_mags.tsv | 5 + ...4a-a53b-4df6999815dd_amr_all_mutations.tsv | 3 + ...e3-a957-7e8bf837f30d_amr_all_mutations.tsv | 3 + .../sample1_amr_all_mutations.tsv | 3 + .../sample2_amr_all_mutations.tsv | 3 + ...e3-a957-7e8bf837f30d_amr_all_mutations.tsv | 3 + ...4a-a53b-4df6999815dd_amr_all_mutations.tsv | 3 + .../tests/test_types_formats_transformers.py | 114 ++++++++++++++++++ 17 files changed, 250 insertions(+) create mode 100644 q2_amrfinderplus/types/_transformer.py create mode 100644 q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv create mode 100644 q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv create mode 100644 q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample1_amr_annotations.tsv create mode 100644 q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample2_amr_annotations.tsv create mode 100644 q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv create mode 100644 q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv create mode 100644 q2_amrfinderplus/types/tests/data/metadata_tables/feature_data.tsv create mode 100644 q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_contigs.tsv create mode 100644 q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_mags.tsv create mode 100644 q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv create mode 100644 q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv create mode 100644 q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample1_amr_all_mutations.tsv create mode 100644 q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample2_amr_all_mutations.tsv create mode 100644 q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv create mode 100644 q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv diff --git a/q2_amrfinderplus/types/_transformer.py b/q2_amrfinderplus/types/_transformer.py new file mode 100644 index 0000000..06b6900 --- /dev/null +++ b/q2_amrfinderplus/types/_transformer.py @@ -0,0 +1,85 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2019-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- +import glob +import os +from pathlib import Path + +import pandas as pd +import qiime2 + +from q2_amrfinderplus.plugin_setup import plugin +from q2_amrfinderplus.types import AMRFinderPlusAnnotationsDirFmt + + +@plugin.register_transformer +def _1(data: AMRFinderPlusAnnotationsDirFmt) -> qiime2.Metadata: + return qiime2.Metadata(_transfomer_helper(data)) + + +def _transfomer_helper(data): + df_list = [] + for file_dir_name in os.listdir(str(data)): + # Check the directory structure + if os.path.isdir(os.path.join(str(data), file_dir_name)): + for file in glob.glob(os.path.join(str(data), file_dir_name, "*")): + file_name = Path(file).stem + + # Annotations file from sample data mags + if file_name.endswith("_amr_annotations"): + id_value = file_dir_name + "/" + file_name[:-16] + + # Mutations file from sample data mags + else: + id_value = file_dir_name + "/" + file_name[:-18] + + create_append_df( + file_path=file, + df_list=df_list, + id_value=id_value, + ) + else: + # Annotations file from feature data mags or sample data contigs + if file_dir_name.endswith("_amr_annotations.tsv"): + id_value = file_dir_name[:-20] + # Mutations file from feature data mags + else: + id_value = file_dir_name[:-22] + + create_append_df( + file_path=os.path.join(str(data), file_dir_name), + df_list=df_list, + id_value=id_value, + ) + + return combine_dataframes(df_list) + + +def create_append_df(file_path, df_list, id_value): + # Read in df + df = pd.read_csv(file_path, sep="\t") + + # Insert column with sample or mag IDs + df.insert(0, "Sample/MAG_ID", id_value) + + # Append df to df list + df_list.append(df) + + +def combine_dataframes(df_list): + # Concat all dfs + df_combined = pd.concat(df_list, axis=0) + + # Sort all values by sample/mag ID column + df_combined.sort_values(by=df_combined.columns[0], inplace=True) + + # Reset and rename index and set it to string to conform to metadata format + df_combined.reset_index(inplace=True, drop=True) + df_combined.index.name = "id" + df_combined.index = df_combined.index.astype(str) + + return df_combined diff --git a/q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv b/q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv b/q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/annotations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample1_amr_annotations.tsv b/q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample1_amr_annotations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample1_amr_annotations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample2_amr_annotations.tsv b/q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample2_amr_annotations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/annotations_sample_data_contigs/sample2_amr_annotations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv b/q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv b/q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/annotations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/metadata_tables/feature_data.tsv b/q2_amrfinderplus/types/tests/data/metadata_tables/feature_data.tsv new file mode 100644 index 0000000..ceac9b1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/metadata_tables/feature_data.tsv @@ -0,0 +1,5 @@ +Sample/MAG_ID Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aa447c99-ecd9-4c4a-a53b-4df6999815dd aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.0 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +aa447c99-ecd9-4c4a-a53b-4df6999815dd blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.0 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam +e026af61-d911-4de3-a957-7e8bf837f30d aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.0 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +e026af61-d911-4de3-a957-7e8bf837f30d blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.0 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_contigs.tsv b/q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_contigs.tsv new file mode 100644 index 0000000..a2dce71 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_contigs.tsv @@ -0,0 +1,5 @@ +Sample/MAG_ID Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +sample1 aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.0 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +sample1 blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.0 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam +sample2 aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.0 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +sample2 blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.0 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_mags.tsv b/q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_mags.tsv new file mode 100644 index 0000000..771d6df --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/metadata_tables/sample_data_mags.tsv @@ -0,0 +1,5 @@ +Sample/MAG_ID Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +sample1/e026af61-d911-4de3-a957-7e8bf837f30d aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.0 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +sample1/e026af61-d911-4de3-a957-7e8bf837f30d blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.0 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam +sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.0 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.0 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv b/q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv b/q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/mutations_feature_data_mags/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample1_amr_all_mutations.tsv b/q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample1_amr_all_mutations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample1_amr_all_mutations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample2_amr_all_mutations.tsv b/q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample2_amr_all_mutations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/mutations_sample_data_contigs/sample2_amr_all_mutations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv b/q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample1/e026af61-d911-4de3-a957-7e8bf837f30d_amr_all_mutations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv b/q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv new file mode 100644 index 0000000..20e52d1 --- /dev/null +++ b/q2_amrfinderplus/types/tests/data/mutations_sample_data_mags/sample2/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_all_mutations.tsv @@ -0,0 +1,3 @@ +Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib +blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amrfinderplus/types/tests/test_types_formats_transformers.py index e23536a..901ec63 100644 --- a/q2_amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amrfinderplus/types/tests/test_types_formats_transformers.py @@ -7,7 +7,11 @@ # ---------------------------------------------------------------------------- import os import tempfile +from io import StringIO +import pandas as pd +import qiime2 +from pandas._testing import assert_frame_equal from qiime2.core.exceptions import ValidationError from qiime2.plugin.testing import TestPluginBase @@ -16,6 +20,11 @@ AMRFinderPlusAnnotationsDirFmt, AMRFinderPlusDatabaseDirFmt, ) +from q2_amrfinderplus.types._transformer import ( + _transfomer_helper, + combine_dataframes, + create_append_df, +) class TestAMRFinderPlusTypesAndFormats(TestPluginBase): @@ -116,3 +125,108 @@ def test_amrfinderplus_annotations_dir_fmt_path_maker(self): fmt = AMRFinderPlusAnnotationsDirFmt() path = fmt.annotations_path_maker(name="annotations", id="id") self.assertEqual(str(path), os.path.join(str(fmt), "id_amr_annotations.tsv")) + + +class MetadataUtilsTest(TestPluginBase): + package = "q2_amrfinderplus.types.tests" + + def setUp(self): + super().setUp() + # Setup test data + self.file_data_1 = "col1\tcol2\nval1\tval2\nval3\tval4" + self.file_data_2 = "col1\tcol2\nval5\tval6\nval7\tval8" + + self.df1 = pd.read_csv(StringIO(self.file_data_1), sep="\t") + self.df2 = pd.read_csv(StringIO(self.file_data_2), sep="\t") + + self.df_list = [] + + self.df1 = pd.DataFrame( + { + "Sample/MAG_ID": ["id_value_1", "id_value_1"], + "col1": ["val1", "val3"], + "col2": ["val2", "val4"], + } + ) + + self.df2 = pd.DataFrame( + { + "Sample/MAG_ID": ["id_value_2", "id_value_2"], + "col1": ["val5", "val7"], + "col2": ["val6", "val8"], + } + ) + + self.expected_combined_df = pd.DataFrame( + { + "Sample/MAG_ID": [ + "id_value_1", + "id_value_1", + "id_value_2", + "id_value_2", + ], + "col1": ["val1", "val3", "val5", "val7"], + "col2": ["val2", "val4", "val6", "val8"], + } + ) + + self.expected_combined_df.index = self.expected_combined_df.index.astype(str) + self.expected_combined_df.index.name = "id" + + def test_create_append_df(self): + # Test create_append_df function + create_append_df(StringIO(self.file_data_1), self.df_list, "id_value_1") + create_append_df(StringIO(self.file_data_2), self.df_list, "id_value_2") + + pd.testing.assert_frame_equal(self.df_list[0], self.df1) + pd.testing.assert_frame_equal(self.df_list[1], self.df2) + + def test_combine_dataframes(self): + # Test combine_dataframes function + df_list = [self.df1, self.df2] + combined_df = combine_dataframes(df_list) + pd.testing.assert_frame_equal(combined_df, self.expected_combined_df) + + +class TestAMRFinderPlusTransformers(TestPluginBase): + package = "q2_amrfinderplus.types.tests" + + def test_annotations_feature_data_mags_transformer_helper(self): + self._test_helper("annotations_feature_data_mags", "feature_data.tsv") + + def test_annotations_sample_data_contigs_transformer_helper(self): + self._test_helper("annotations_sample_data_contigs", "sample_data_contigs.tsv") + + def test_annotations_sample_data_mags_transformer_helper(self): + self._test_helper("annotations_sample_data_mags", "sample_data_mags.tsv") + + def test_mutations_feature_data_mags_transformer_helper(self): + self._test_helper("mutations_feature_data_mags", "feature_data.tsv") + + def test_mutations_sample_data_contigs_transformer_helper(self): + self._test_helper("mutations_sample_data_contigs", "sample_data_contigs.tsv") + + def test_mutations_sample_data_mags_transformer_helper(self): + self._test_helper("mutations_sample_data_mags", "sample_data_mags.tsv") + + def _test_helper(self, data, table_name): + df_expected = pd.read_csv( + self.get_data_path(f"metadata_tables/{table_name}"), + sep="\t", + ) + df_expected.index = df_expected.index.astype(str) + df_expected.index.name = "id" + df_obs = _transfomer_helper(self.get_data_path(data)) + assert_frame_equal(df_expected, df_obs) + + def test_annotations_sample_data_mags_to_Metadata(self): + transformer = self.get_transformer( + AMRFinderPlusAnnotationsDirFmt, qiime2.Metadata + ) + fmt = AMRFinderPlusAnnotationsDirFmt( + self.get_data_path("annotations_sample_data_mags"), "r" + ) + + metadata_obt = transformer(fmt) + + self.assertIsInstance(metadata_obt, qiime2.Metadata) From fa8702571f5793b3919016301bdd326f1561065c Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 26 Sep 2024 16:35:10 +0200 Subject: [PATCH 2/7] added transfomers import to plugin setup --- q2_amrfinderplus/plugin_setup.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/q2_amrfinderplus/plugin_setup.py b/q2_amrfinderplus/plugin_setup.py index 3a13792..cf7d1f6 100644 --- a/q2_amrfinderplus/plugin_setup.py +++ b/q2_amrfinderplus/plugin_setup.py @@ -5,6 +5,8 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +import importlib + from q2_types.sample_data import SampleData from qiime2.plugin import Citations, Plugin @@ -64,3 +66,5 @@ AMRFinderPlusAnnotationFormat, AMRFinderPlusAnnotationsDirFmt, ) + +importlib.import_module("q2_amrfinderplus.types._transformer") From 1026eec7f1af0116a61a4bf353869df1b605135c Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 26 Sep 2024 16:36:59 +0200 Subject: [PATCH 3/7] added files to package data --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 95ceace..58c9b2e 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ }, package_data={ "q2_amrfinderplus": ["citations.bib"], - "q2_amrfinderplus.tests": ["data/*"], + "q2_amrfinderplus.types.tests": ["data/*" "data/*/*" "data/*/*/*"], }, zip_safe=False, ) From f3865cffba1ecb76f2aceb1125a21e9616148ac9 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 9 Oct 2024 12:53:31 +0200 Subject: [PATCH 4/7] removed create append df --- q2_amrfinderplus/types/_transformer.py | 30 ++++++------------- .../tests/test_types_formats_transformers.py | 25 ++-------------- 2 files changed, 11 insertions(+), 44 deletions(-) diff --git a/q2_amrfinderplus/types/_transformer.py b/q2_amrfinderplus/types/_transformer.py index 06b6900..3c2ad29 100644 --- a/q2_amrfinderplus/types/_transformer.py +++ b/q2_amrfinderplus/types/_transformer.py @@ -37,11 +37,11 @@ def _transfomer_helper(data): else: id_value = file_dir_name + "/" + file_name[:-18] - create_append_df( - file_path=file, - df_list=df_list, - id_value=id_value, - ) + # Create df and append it to df_list + df = pd.read_csv(file, sep="\t") + df.insert(0, "Sample/MAG_ID", id_value) + df_list.append(df) + else: # Annotations file from feature data mags or sample data contigs if file_dir_name.endswith("_amr_annotations.tsv"): @@ -50,26 +50,14 @@ def _transfomer_helper(data): else: id_value = file_dir_name[:-22] - create_append_df( - file_path=os.path.join(str(data), file_dir_name), - df_list=df_list, - id_value=id_value, - ) + # Create df and append it to df_list + df = pd.read_csv(os.path.join(str(data), file_dir_name), sep="\t") + df.insert(0, "Sample/MAG_ID", id_value) + df_list.append(df) return combine_dataframes(df_list) -def create_append_df(file_path, df_list, id_value): - # Read in df - df = pd.read_csv(file_path, sep="\t") - - # Insert column with sample or mag IDs - df.insert(0, "Sample/MAG_ID", id_value) - - # Append df to df list - df_list.append(df) - - def combine_dataframes(df_list): # Concat all dfs df_combined = pd.concat(df_list, axis=0) diff --git a/q2_amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amrfinderplus/types/tests/test_types_formats_transformers.py index 5120733..34bcfb1 100644 --- a/q2_amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amrfinderplus/types/tests/test_types_formats_transformers.py @@ -7,7 +7,6 @@ # ---------------------------------------------------------------------------- import os import tempfile -from io import StringIO import pandas as pd import qiime2 @@ -20,11 +19,7 @@ AMRFinderPlusAnnotationsDirFmt, AMRFinderPlusDatabaseDirFmt, ) -from q2_amrfinderplus.types._transformer import ( - _transfomer_helper, - combine_dataframes, - create_append_df, -) +from q2_amrfinderplus.types._transformer import _transfomer_helper, combine_dataframes class TestAMRFinderPlusTypesAndFormats(TestPluginBase): @@ -125,20 +120,12 @@ def test_amrfinderplus_annotations_dir_fmt_path_maker(self): self.assertEqual(str(path), os.path.join(str(fmt), "id_amr_annotations.tsv")) -class MetadataUtilsTest(TestPluginBase): +class MetadataTransformerUtilsTest(TestPluginBase): package = "q2_amrfinderplus.types.tests" def setUp(self): super().setUp() # Setup test data - self.file_data_1 = "col1\tcol2\nval1\tval2\nval3\tval4" - self.file_data_2 = "col1\tcol2\nval5\tval6\nval7\tval8" - - self.df1 = pd.read_csv(StringIO(self.file_data_1), sep="\t") - self.df2 = pd.read_csv(StringIO(self.file_data_2), sep="\t") - - self.df_list = [] - self.df1 = pd.DataFrame( { "Sample/MAG_ID": ["id_value_1", "id_value_1"], @@ -171,14 +158,6 @@ def setUp(self): self.expected_combined_df.index = self.expected_combined_df.index.astype(str) self.expected_combined_df.index.name = "id" - def test_create_append_df(self): - # Test create_append_df function - create_append_df(StringIO(self.file_data_1), self.df_list, "id_value_1") - create_append_df(StringIO(self.file_data_2), self.df_list, "id_value_2") - - pd.testing.assert_frame_equal(self.df_list[0], self.df1) - pd.testing.assert_frame_equal(self.df_list[1], self.df2) - def test_combine_dataframes(self): # Test combine_dataframes function df_list = [self.df1, self.df2] From 4bf39774aa450da9ccd851230faa65c622bd5da2 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 11 Oct 2024 10:34:35 +0200 Subject: [PATCH 5/7] typo --- q2_amrfinderplus/types/_transformer.py | 4 ++-- .../types/tests/test_types_formats_transformers.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/q2_amrfinderplus/types/_transformer.py b/q2_amrfinderplus/types/_transformer.py index 3c2ad29..cd69c21 100644 --- a/q2_amrfinderplus/types/_transformer.py +++ b/q2_amrfinderplus/types/_transformer.py @@ -18,10 +18,10 @@ @plugin.register_transformer def _1(data: AMRFinderPlusAnnotationsDirFmt) -> qiime2.Metadata: - return qiime2.Metadata(_transfomer_helper(data)) + return qiime2.Metadata(_transformer_helper(data)) -def _transfomer_helper(data): +def _transformer_helper(data): df_list = [] for file_dir_name in os.listdir(str(data)): # Check the directory structure diff --git a/q2_amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amrfinderplus/types/tests/test_types_formats_transformers.py index 8bf26eb..02fde8d 100644 --- a/q2_amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amrfinderplus/types/tests/test_types_formats_transformers.py @@ -20,7 +20,7 @@ AMRFinderPlusDatabaseDirFmt, _create_path, ) -from q2_amrfinderplus.types._transformer import _transfomer_helper, combine_dataframes +from q2_amrfinderplus.types._transformer import _transformer_helper, combine_dataframes class TestAMRFinderPlusTypesAndFormats(TestPluginBase): @@ -260,7 +260,7 @@ def _test_helper(self, data, table_name): ) df_expected.index = df_expected.index.astype(str) df_expected.index.name = "id" - df_obs = _transfomer_helper(self.get_data_path(data)) + df_obs = _transformer_helper(self.get_data_path(data)) assert_frame_equal(df_expected, df_obs) def test_annotations_sample_data_mags_to_Metadata(self): From 7463e4a9d5d57fed65bf326035db5ea0b51623d4 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 11 Oct 2024 11:29:42 +0200 Subject: [PATCH 6/7] modified transformer --- q2_amrfinderplus/types/_transformer.py | 44 ++++++------------- .../tests/test_types_formats_transformers.py | 8 +++- 2 files changed, 19 insertions(+), 33 deletions(-) diff --git a/q2_amrfinderplus/types/_transformer.py b/q2_amrfinderplus/types/_transformer.py index cd69c21..ee2df89 100644 --- a/q2_amrfinderplus/types/_transformer.py +++ b/q2_amrfinderplus/types/_transformer.py @@ -5,10 +5,6 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import glob -import os -from pathlib import Path - import pandas as pd import qiime2 @@ -18,40 +14,26 @@ @plugin.register_transformer def _1(data: AMRFinderPlusAnnotationsDirFmt) -> qiime2.Metadata: - return qiime2.Metadata(_transformer_helper(data)) + return qiime2.Metadata(_metadata_transformer_helper(data)) -def _transformer_helper(data): +def _metadata_transformer_helper(data): df_list = [] - for file_dir_name in os.listdir(str(data)): - # Check the directory structure - if os.path.isdir(os.path.join(str(data), file_dir_name)): - for file in glob.glob(os.path.join(str(data), file_dir_name, "*")): - file_name = Path(file).stem - - # Annotations file from sample data mags - if file_name.endswith("_amr_annotations"): - id_value = file_dir_name + "/" + file_name[:-16] - # Mutations file from sample data mags - else: - id_value = file_dir_name + "/" + file_name[:-18] + if any(item.is_dir() for item in data.path.iterdir()): + annotation_dict = data.annotation_dict() - # Create df and append it to df_list - df = pd.read_csv(file, sep="\t") - df.insert(0, "Sample/MAG_ID", id_value) - df_list.append(df) + else: + file_dict = data.annotation_dict() + # Create annotation_dict with fake sample + annotation_dict = {"": file_dict} - else: - # Annotations file from feature data mags or sample data contigs - if file_dir_name.endswith("_amr_annotations.tsv"): - id_value = file_dir_name[:-20] - # Mutations file from feature data mags - else: - id_value = file_dir_name[:-22] + for outer_id, files_dict in annotation_dict.items(): + for inner_id, file_fp in files_dict.items(): + id_value = f"{outer_id}/{inner_id}" if outer_id else inner_id - # Create df and append it to df_list - df = pd.read_csv(os.path.join(str(data), file_dir_name), sep="\t") + # Create df and append to df_list + df = pd.read_csv(file_fp, sep="\t") df.insert(0, "Sample/MAG_ID", id_value) df_list.append(df) diff --git a/q2_amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amrfinderplus/types/tests/test_types_formats_transformers.py index 02fde8d..7c10c19 100644 --- a/q2_amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amrfinderplus/types/tests/test_types_formats_transformers.py @@ -20,7 +20,10 @@ AMRFinderPlusDatabaseDirFmt, _create_path, ) -from q2_amrfinderplus.types._transformer import _transformer_helper, combine_dataframes +from q2_amrfinderplus.types._transformer import ( + _metadata_transformer_helper, + combine_dataframes, +) class TestAMRFinderPlusTypesAndFormats(TestPluginBase): @@ -260,7 +263,8 @@ def _test_helper(self, data, table_name): ) df_expected.index = df_expected.index.astype(str) df_expected.index.name = "id" - df_obs = _transformer_helper(self.get_data_path(data)) + dir_fmt = AMRFinderPlusAnnotationsDirFmt(self.get_data_path(data), "r") + df_obs = _metadata_transformer_helper(dir_fmt) assert_frame_equal(df_expected, df_obs) def test_annotations_sample_data_mags_to_Metadata(self): From f14afa29606c32c93958f67c3afa7d876a706a4c Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 11 Oct 2024 11:41:39 +0200 Subject: [PATCH 7/7] combine_index_test modification --- .../types/tests/test_types_formats_transformers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/q2_amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amrfinderplus/types/tests/test_types_formats_transformers.py index 7c10c19..b078649 100644 --- a/q2_amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amrfinderplus/types/tests/test_types_formats_transformers.py @@ -198,7 +198,7 @@ def setUp(self): # Setup test data self.df1 = pd.DataFrame( { - "Sample/MAG_ID": ["id_value_1", "id_value_1"], + "Sample/MAG_ID": ["id_value_1", "id_value_2"], "col1": ["val1", "val3"], "col2": ["val2", "val4"], } @@ -206,7 +206,7 @@ def setUp(self): self.df2 = pd.DataFrame( { - "Sample/MAG_ID": ["id_value_2", "id_value_2"], + "Sample/MAG_ID": ["id_value_3", "id_value_4"], "col1": ["val5", "val7"], "col2": ["val6", "val8"], } @@ -216,9 +216,9 @@ def setUp(self): { "Sample/MAG_ID": [ "id_value_1", - "id_value_1", - "id_value_2", "id_value_2", + "id_value_3", + "id_value_4", ], "col1": ["val1", "val3", "val5", "val7"], "col2": ["val2", "val4", "val6", "val8"],