From f2e389131ca89f9fc3a28fe81c14d126e5228f22 Mon Sep 17 00:00:00 2001 From: nbokulich Date: Fri, 7 Jun 2024 13:20:10 +0200 Subject: [PATCH 1/2] BUG: fix GTDB taxonomy parser --- rescript/get_gtdb.py | 2 +- rescript/tests/test_get_gtdb.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/rescript/get_gtdb.py b/rescript/get_gtdb.py index 602911d..bb7971d 100644 --- a/rescript/get_gtdb.py +++ b/rescript/get_gtdb.py @@ -96,7 +96,7 @@ def _assemble_queries(version='220.0', def parse_gtdb_taxonomy(tax_str): - tax = tax_str.split()[0] + tax = ' '.join(tax_str.split()[0:2]) return tax diff --git a/rescript/tests/test_get_gtdb.py b/rescript/tests/test_get_gtdb.py index ed2f7b8..de5204f 100644 --- a/rescript/tests/test_get_gtdb.py +++ b/rescript/tests/test_get_gtdb.py @@ -9,7 +9,7 @@ import pkg_resources from qiime2.plugin.testing import TestPluginBase from qiime2.plugins import rescript -from rescript.get_gtdb import _assemble_queries +from rescript.get_gtdb import _assemble_queries, parse_gtdb_taxonomy from q2_types.feature_data import (TSVTaxonomyFormat, DNAFASTAFormat) from urllib.request import urlopen @@ -155,3 +155,12 @@ def _makey_fakey_bact(faking_ignore_this): version='214.1', db_type='All') self.assertEqual(str(resc[0].type), 'FeatureData[Taxonomy]') self.assertEqual(str(resc[1].type), 'FeatureData[Sequence]') + + def test_parse_gtdb_taxonomy(self): + tax_in = ('d__Bacteria;p__Bacillota;c__Bacilli;o__Lactobacillales;' + 'f__Lactobacillaceae;g__Oenococcus;s__Oenococcus oeni ' + '[locus_tag=NZ_AQVA01000009.1] [location=77871..79431] ' + '[ssu_len=1561] [contig_len=79790]') + exp = ('d__Bacteria;p__Bacillota;c__Bacilli;o__Lactobacillales;' + 'f__Lactobacillaceae;g__Oenococcus;s__Oenococcus oeni') + self.assertEqual(parse_gtdb_taxonomy(tax_in), exp) \ No newline at end of file From 78d19fd61957925239ac756ddf0e603ef53a3d18 Mon Sep 17 00:00:00 2001 From: nbokulich Date: Fri, 7 Jun 2024 15:12:16 +0200 Subject: [PATCH 2/2] squash lint --- rescript/tests/test_get_gtdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rescript/tests/test_get_gtdb.py b/rescript/tests/test_get_gtdb.py index de5204f..2939ab4 100644 --- a/rescript/tests/test_get_gtdb.py +++ b/rescript/tests/test_get_gtdb.py @@ -163,4 +163,4 @@ def test_parse_gtdb_taxonomy(self): '[ssu_len=1561] [contig_len=79790]') exp = ('d__Bacteria;p__Bacillota;c__Bacilli;o__Lactobacillales;' 'f__Lactobacillaceae;g__Oenococcus;s__Oenococcus oeni') - self.assertEqual(parse_gtdb_taxonomy(tax_in), exp) \ No newline at end of file + self.assertEqual(parse_gtdb_taxonomy(tax_in), exp)