From 47bfbc1dce8de0540b5bf57bf9d90bb39b487742 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 21 Mar 2024 17:05:21 +0100 Subject: [PATCH 1/4] added sample_dict to mag and reads annotation dir formats --- q2_amr/types/_format.py | 36 +++++++++++ .../sample1/allele_mapping_data.txt | 0 .../sample1/overall_mapping_stats.txt | 0 .../sample1/sorted.length_100.bam | Bin 0 -> 564 bytes .../sample2/allele_mapping_data.txt | 0 .../sample2/overall_mapping_stats.txt | 0 .../sample2/sorted.length_100.bam | Bin 0 -> 564 bytes .../sample1/gene_mapping_data.txt | 0 .../sample2/gene_mapping_data.txt | 0 .../tests/test_types_formats_transformers.py | 56 +++++++++++++++++- setup.py | 3 +- 11 files changed, 91 insertions(+), 4 deletions(-) rename q2_amr/types/tests/data/{annotate_reads_output => card_allele_annotation}/sample1/allele_mapping_data.txt (100%) rename q2_amr/types/tests/data/{annotate_reads_output => card_allele_annotation}/sample1/overall_mapping_stats.txt (100%) create mode 100644 q2_amr/types/tests/data/card_allele_annotation/sample1/sorted.length_100.bam rename q2_amr/types/tests/data/{annotate_reads_output => card_allele_annotation}/sample2/allele_mapping_data.txt (100%) rename q2_amr/types/tests/data/{annotate_reads_output => card_allele_annotation}/sample2/overall_mapping_stats.txt (100%) create mode 100644 q2_amr/types/tests/data/card_allele_annotation/sample2/sorted.length_100.bam rename q2_amr/types/tests/data/{annotate_reads_output => card_gene_annotation}/sample1/gene_mapping_data.txt (100%) rename q2_amr/types/tests/data/{annotate_reads_output => card_gene_annotation}/sample2/gene_mapping_data.txt (100%) diff --git a/q2_amr/types/_format.py b/q2_amr/types/_format.py index f09628a..4276f21 100644 --- a/q2_amr/types/_format.py +++ b/q2_amr/types/_format.py @@ -6,6 +6,7 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import json +import os import re from copy import copy @@ -277,6 +278,20 @@ def json_path_maker(self, sample_id, bin_id): def txt_path_maker(self, sample_id, bin_id): return f"{sample_id}/{bin_id}/amr_annotation.txt" + def sample_dict(self): + sample_dict = {} + for sample in self.path.iterdir(): + for mag in sample.iterdir(): + files = [ + os.path.join(mag, file) + for file in [ + "amr_annotation.json", + "amr_annotation.txt", + ] + ] + sample_dict[sample.name] = {mag.name: files} + return sample_dict + class CARDAlleleAnnotationFormat(model.TextFileFormat): def _validate(self, n_records=None): @@ -421,6 +436,20 @@ def stats_path_maker(self, sample_id): def bam_path_maker(self, sample_id): return "%s/sorted.length_100.bam" % sample_id + def sample_dict(self): + sample_dict = {} + for sample in self.path.iterdir(): + files = [ + os.path.join(sample, file) + for file in [ + "allele_mapping_data.txt", + "overall_mapping_stats.txt", + "sorted.length_100.bam", + ] + ] + sample_dict[sample.name] = files + return sample_dict + class CARDGeneAnnotationDirectoryFormat(MultiDirValidationMixin, model.DirectoryFormat): gene = model.FileCollection( @@ -431,6 +460,13 @@ class CARDGeneAnnotationDirectoryFormat(MultiDirValidationMixin, model.Directory def gene_path_maker(self, sample_id): return "%s/gene_mapping_data.txt" % sample_id + def sample_dict(self): + sample_dict = {} + for sample in self.path.iterdir(): + file = list(os.path.join(sample, "gene_mapping_data.txt")) + sample_dict[sample.name] = file + return sample_dict + class CARDMAGsKmerAnalysisFormat(model.TextFileFormat): def _validate(self, n_records=None): diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample1/allele_mapping_data.txt b/q2_amr/types/tests/data/card_allele_annotation/sample1/allele_mapping_data.txt similarity index 100% rename from q2_amr/types/tests/data/annotate_reads_output/sample1/allele_mapping_data.txt rename to q2_amr/types/tests/data/card_allele_annotation/sample1/allele_mapping_data.txt diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample1/overall_mapping_stats.txt b/q2_amr/types/tests/data/card_allele_annotation/sample1/overall_mapping_stats.txt similarity index 100% rename from q2_amr/types/tests/data/annotate_reads_output/sample1/overall_mapping_stats.txt rename to q2_amr/types/tests/data/card_allele_annotation/sample1/overall_mapping_stats.txt diff --git a/q2_amr/types/tests/data/card_allele_annotation/sample1/sorted.length_100.bam b/q2_amr/types/tests/data/card_allele_annotation/sample1/sorted.length_100.bam new file mode 100644 index 0000000000000000000000000000000000000000..9b257a6036a2dbe8b269d86aeba5fa1bf02935ce GIT binary patch literal 564 zcmb2|=3rp}f&Xj_PR>jW{ft{D-}XD?AkcPQLjP6p8}1T*-HzG%?`UcNxUfc{d4&c$FXz3pXKOzH|9>VXZcATWKjVtH>vMlk2o$lHYvw)W(o2o! zVROQ)&HdC<#JA@@6D|@9*;Bdr9Fx;anUqkvpHGV{H6q`uq1en+zHzGSy5$@H&+*&vaMKBvqggrK8(qUrFiGl7mwRdX z>DpGsUBPn$Z>?MTS}x>nQiI3B)&ClIZILUk{9%)vbaO72Ey$C%^xDu$xWDg`;);!rX<^%P#$me_8mvc4>K<;10Q&DHaDy zr}iv8vG$yG&w^Vouywp2fbHQdt_}m^Fg9(dosoZUCJmq8h ra>=f&Z?>i>oabKeT6%l)n@@`$aDTlcko*Uj9MIh%&A<#!CLjU;D@Xj7 literal 0 HcmV?d00001 diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample2/allele_mapping_data.txt b/q2_amr/types/tests/data/card_allele_annotation/sample2/allele_mapping_data.txt similarity index 100% rename from q2_amr/types/tests/data/annotate_reads_output/sample2/allele_mapping_data.txt rename to q2_amr/types/tests/data/card_allele_annotation/sample2/allele_mapping_data.txt diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample2/overall_mapping_stats.txt b/q2_amr/types/tests/data/card_allele_annotation/sample2/overall_mapping_stats.txt similarity index 100% rename from q2_amr/types/tests/data/annotate_reads_output/sample2/overall_mapping_stats.txt rename to q2_amr/types/tests/data/card_allele_annotation/sample2/overall_mapping_stats.txt diff --git a/q2_amr/types/tests/data/card_allele_annotation/sample2/sorted.length_100.bam b/q2_amr/types/tests/data/card_allele_annotation/sample2/sorted.length_100.bam new file mode 100644 index 0000000000000000000000000000000000000000..9b257a6036a2dbe8b269d86aeba5fa1bf02935ce GIT binary patch literal 564 zcmb2|=3rp}f&Xj_PR>jW{ft{D-}XD?AkcPQLjP6p8}1T*-HzG%?`UcNxUfc{d4&c$FXz3pXKOzH|9>VXZcATWKjVtH>vMlk2o$lHYvw)W(o2o! zVROQ)&HdC<#JA@@6D|@9*;Bdr9Fx;anUqkvpHGV{H6q`uq1en+zHzGSy5$@H&+*&vaMKBvqggrK8(qUrFiGl7mwRdX z>DpGsUBPn$Z>?MTS}x>nQiI3B)&ClIZILUk{9%)vbaO72Ey$C%^xDu$xWDg`;);!rX<^%P#$me_8mvc4>K<;10Q&DHaDy zr}iv8vG$yG&w^Vouywp2fbHQdt_}m^Fg9(dosoZUCJmq8h ra>=f&Z?>i>oabKeT6%l)n@@`$aDTlcko*Uj9MIh%&A<#!CLjU;D@Xj7 literal 0 HcmV?d00001 diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample1/gene_mapping_data.txt b/q2_amr/types/tests/data/card_gene_annotation/sample1/gene_mapping_data.txt similarity index 100% rename from q2_amr/types/tests/data/annotate_reads_output/sample1/gene_mapping_data.txt rename to q2_amr/types/tests/data/card_gene_annotation/sample1/gene_mapping_data.txt diff --git a/q2_amr/types/tests/data/annotate_reads_output/sample2/gene_mapping_data.txt b/q2_amr/types/tests/data/card_gene_annotation/sample2/gene_mapping_data.txt similarity index 100% rename from q2_amr/types/tests/data/annotate_reads_output/sample2/gene_mapping_data.txt rename to q2_amr/types/tests/data/card_gene_annotation/sample2/gene_mapping_data.txt diff --git a/q2_amr/types/tests/test_types_formats_transformers.py b/q2_amr/types/tests/test_types_formats_transformers.py index d9cd204..c141801 100644 --- a/q2_amr/types/tests/test_types_formats_transformers.py +++ b/q2_amr/types/tests/test_types_formats_transformers.py @@ -315,6 +315,27 @@ def test_CARDAnnotationDirectoryFormat_to_qiime2_Metadata_transformer(self): metadata_obt = transformer(annotation) self.assertIsInstance(metadata_obt, qiime2.Metadata) + def test_card_annotation_directory_format_sample_dict(self): + dirpath = self.get_data_path("annotate_mags_output") + annotations = CARDAnnotationDirectoryFormat(dirpath, mode="r") + + obs = annotations.sample_dict() + exp = { + "sample1": { + "bin1": [ + os.path.join(dirpath, "sample1", "bin1", "amr_annotation.json"), + os.path.join(dirpath, "sample1", "bin1", "amr_annotation.txt"), + ] + }, + "sample2": { + "bin1": [ + os.path.join(dirpath, "sample2", "bin1", "amr_annotation.json"), + os.path.join(dirpath, "sample2", "bin1", "amr_annotation.txt"), + ] + }, + } + self.assertEqual(obs, exp) + class TestCARDReadsAnnotationTypesAndFormats(AMRTypesTestPluginBase): def test_CARDGeneAnnotationDirectoryFormat_to_qiime2_Metadata_transformer(self): @@ -322,21 +343,50 @@ def test_CARDGeneAnnotationDirectoryFormat_to_qiime2_Metadata_transformer(self): CARDGeneAnnotationDirectoryFormat, qiime2.Metadata ) annotation = CARDGeneAnnotationDirectoryFormat( - self.get_data_path("annotate_reads_output"), "r" + self.get_data_path("card_gene_annotation"), "r" ) metadata_obt = transformer(annotation) self.assertIsInstance(metadata_obt, qiime2.Metadata) def test_CARDAlleleAnnotationDirectoryFormat_to_qiime2_Metadata_transformer(self): transformer = self.get_transformer( - CARDGeneAnnotationDirectoryFormat, qiime2.Metadata + CARDAlleleAnnotationDirectoryFormat, qiime2.Metadata ) annotation = CARDAlleleAnnotationDirectoryFormat( - self.get_data_path("annotate_reads_output"), "r" + self.get_data_path("card_allele_annotation"), "r" ) metadata_obt = transformer(annotation) self.assertIsInstance(metadata_obt, qiime2.Metadata) + def test_card_allele_annotation_directory_format_sample_dict(self): + dirpath = self.get_data_path("card_allele_annotation") + annotations = CARDAlleleAnnotationDirectoryFormat(dirpath, mode="r") + + obs = annotations.sample_dict() + exp = { + sample: [ + os.path.join(dirpath, sample, file) + for file in [ + "allele_mapping_data.txt", + "overall_mapping_stats.txt", + "sorted.length_100.bam", + ] + ] + for sample in ["sample1", "sample2"] + } + self.assertEqual(obs, exp) + + def test_card_gene_annotation_directory_format_sample_dict(self): + dirpath = self.get_data_path("card_gene_annotation") + annotations = CARDGeneAnnotationDirectoryFormat(dirpath, mode="r") + + obs = annotations.sample_dict() + exp = { + "sample1": list(os.path.join(dirpath, "sample1", "gene_mapping_data.txt")), + "sample2": list(os.path.join(dirpath, "sample2", "gene_mapping_data.txt")), + } + self.assertEqual(obs, exp) + class TestKmerTypesAndFormats(AMRTypesTestPluginBase): def test_card_mags_kmer_analysis_validate_positive(self): diff --git a/setup.py b/setup.py index 0ea2e18..38ab567 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,8 @@ "q2_amr.types.tests": [ "data/*", "data/annotate_mags_output/*/*/*", - "data/annotate_reads_output/*/*", + "data/card_allele_annotation/*/*", + "data/card_gene_annotation/*/*", ], "q2_amr.card.tests": ["data/*"], }, From df53c33debf82bc61d1f52c3cada5727850b5664 Mon Sep 17 00:00:00 2001 From: VinzentRisch <100149044+VinzentRisch@users.noreply.github.com> Date: Fri, 22 Mar 2024 09:21:51 +0100 Subject: [PATCH 2/4] Update q2_amr/types/_format.py Co-authored-by: Michal Ziemski --- q2_amr/types/_format.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/q2_amr/types/_format.py b/q2_amr/types/_format.py index 4276f21..b31a311 100644 --- a/q2_amr/types/_format.py +++ b/q2_amr/types/_format.py @@ -282,14 +282,11 @@ def sample_dict(self): sample_dict = {} for sample in self.path.iterdir(): for mag in sample.iterdir(): - files = [ - os.path.join(mag, file) - for file in [ - "amr_annotation.json", - "amr_annotation.txt", - ] - ] - sample_dict[sample.name] = {mag.name: files} + sample_dict[sample.name] = { + mag.name: [ + os.path.join(mag, "amr_annotation.json"), + os.path.join(mag, "amr_annotation.txt") + } return sample_dict From 4c45a8bf93cdd8cd9151087dc37c6aeaedb71635 Mon Sep 17 00:00:00 2001 From: VinzentRisch <100149044+VinzentRisch@users.noreply.github.com> Date: Fri, 22 Mar 2024 09:23:08 +0100 Subject: [PATCH 3/4] Update q2_amr/types/tests/test_types_formats_transformers.py Co-authored-by: Michal Ziemski --- q2_amr/types/tests/test_types_formats_transformers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/q2_amr/types/tests/test_types_formats_transformers.py b/q2_amr/types/tests/test_types_formats_transformers.py index c141801..216baa5 100644 --- a/q2_amr/types/tests/test_types_formats_transformers.py +++ b/q2_amr/types/tests/test_types_formats_transformers.py @@ -382,8 +382,8 @@ def test_card_gene_annotation_directory_format_sample_dict(self): obs = annotations.sample_dict() exp = { - "sample1": list(os.path.join(dirpath, "sample1", "gene_mapping_data.txt")), - "sample2": list(os.path.join(dirpath, "sample2", "gene_mapping_data.txt")), + "sample1": [os.path.join(dirpath, "sample1", "gene_mapping_data.txt"),], + "sample2": [os.path.join(dirpath, "sample2", "gene_mapping_data.txt"),], } self.assertEqual(obs, exp) From e7eb72c918048d8bc9342d00de622ee1b824d748 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 22 Mar 2024 09:37:39 +0100 Subject: [PATCH 4/4] changed all the loops to spelled out dicts --- q2_amr/types/_format.py | 18 ++++++--------- .../tests/test_types_formats_transformers.py | 23 ++++++++++--------- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/q2_amr/types/_format.py b/q2_amr/types/_format.py index b31a311..710a0d9 100644 --- a/q2_amr/types/_format.py +++ b/q2_amr/types/_format.py @@ -285,7 +285,8 @@ def sample_dict(self): sample_dict[sample.name] = { mag.name: [ os.path.join(mag, "amr_annotation.json"), - os.path.join(mag, "amr_annotation.txt") + os.path.join(mag, "amr_annotation.txt"), + ] } return sample_dict @@ -436,15 +437,11 @@ def bam_path_maker(self, sample_id): def sample_dict(self): sample_dict = {} for sample in self.path.iterdir(): - files = [ - os.path.join(sample, file) - for file in [ - "allele_mapping_data.txt", - "overall_mapping_stats.txt", - "sorted.length_100.bam", - ] + sample_dict[sample.name] = [ + os.path.join(sample, "allele_mapping_data.txt"), + os.path.join(sample, "overall_mapping_stats.txt"), + os.path.join(sample, "sorted.length_100.bam"), ] - sample_dict[sample.name] = files return sample_dict @@ -460,8 +457,7 @@ def gene_path_maker(self, sample_id): def sample_dict(self): sample_dict = {} for sample in self.path.iterdir(): - file = list(os.path.join(sample, "gene_mapping_data.txt")) - sample_dict[sample.name] = file + sample_dict[sample.name] = [os.path.join(sample, "gene_mapping_data.txt")] return sample_dict diff --git a/q2_amr/types/tests/test_types_formats_transformers.py b/q2_amr/types/tests/test_types_formats_transformers.py index 216baa5..6942a22 100644 --- a/q2_amr/types/tests/test_types_formats_transformers.py +++ b/q2_amr/types/tests/test_types_formats_transformers.py @@ -364,15 +364,16 @@ def test_card_allele_annotation_directory_format_sample_dict(self): obs = annotations.sample_dict() exp = { - sample: [ - os.path.join(dirpath, sample, file) - for file in [ - "allele_mapping_data.txt", - "overall_mapping_stats.txt", - "sorted.length_100.bam", - ] - ] - for sample in ["sample1", "sample2"] + "sample1": [ + os.path.join(dirpath, "sample1", "allele_mapping_data.txt"), + os.path.join(dirpath, "sample1", "overall_mapping_stats.txt"), + os.path.join(dirpath, "sample1", "sorted.length_100.bam"), + ], + "sample2": [ + os.path.join(dirpath, "sample2", "allele_mapping_data.txt"), + os.path.join(dirpath, "sample2", "overall_mapping_stats.txt"), + os.path.join(dirpath, "sample2", "sorted.length_100.bam"), + ], } self.assertEqual(obs, exp) @@ -382,8 +383,8 @@ def test_card_gene_annotation_directory_format_sample_dict(self): obs = annotations.sample_dict() exp = { - "sample1": [os.path.join(dirpath, "sample1", "gene_mapping_data.txt"),], - "sample2": [os.path.join(dirpath, "sample2", "gene_mapping_data.txt"),], + "sample1": [os.path.join(dirpath, "sample1", "gene_mapping_data.txt")], + "sample2": [os.path.join(dirpath, "sample2", "gene_mapping_data.txt")], } self.assertEqual(obs, exp)