Skip to content

Commit

Permalink
first working build
Browse files Browse the repository at this point in the history
  • Loading branch information
colinbrislawn committed Oct 26, 2023
1 parent 657fd2b commit b20c29a
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 17 deletions.
22 changes: 7 additions & 15 deletions rescript/get_unite.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
import requests
from requests.exceptions import HTTPError

import qiime2
from q2_types.feature_data import HeaderlessTSVTaxonomyFormat, DNAFASTAFormat
from pandas import DataFrame
from q2_types.feature_data import TaxonomyFormat, DNAFASTAFormat, DNAIterator

# Source: https://unite.ut.ee/repository.php
UNITE_DOIS = {
Expand Down Expand Up @@ -103,7 +103,7 @@ def _unite_get_tgz(

def _unite_get_artifacts(
tgz_file: str = None, cluster_id: str = None
) -> (HeaderlessTSVTaxonomyFormat, DNAFASTAFormat):
) -> (DataFrame, DNAIterator):
"""
Find and import files with matching cluster_id from .tgz
Expand Down Expand Up @@ -136,26 +136,18 @@ def _unite_get_artifacts(
for file in filtered_files:
fp = os.path.join(root, file)
if file.endswith(".txt"):
tax_results = qiime2.Artifact.import_data(
"FeatureData[Taxonomy]",
fp,
"HeaderlessTSVTaxonomyFormat",
)
taxa = TaxonomyFormat(fp, mode="r").view(DataFrame)
elif file.endswith(".fasta"):
seq_results = qiime2.Artifact.import_data(
"FeatureData[Sequence]",
fp,
"MixedCaseDNAFASTAFormat",
)
return tax_results, seq_results
seqs = DNAFASTAFormat(fp, mode="r").view(DNAIterator)
return taxa, seqs


def get_unite_data(
version: str = None,
taxon_group: str = None,
cluster_id: str = "99",
singletons: bool = False,
) -> (HeaderlessTSVTaxonomyFormat, DNAFASTAFormat):
) -> (DataFrame, DNAIterator):
"""
Get Qiime2 artifacts for a given version of UNITE
Expand Down
12 changes: 10 additions & 2 deletions rescript/tests/test_get_unite.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,16 @@ def test_unite_get_artifacts(self):
res_one, res_two = _unite_get_artifacts(
self.unitefile, cluster_id="97"
)
self.assertEqual(str(res_one.type), "FeatureData[Taxonomy]")
self.assertEqual(str(res_two.type), "FeatureData[Sequence]")
# Column names and one feature from TaxonomyFormat
self.assertEqual(
res_one["Taxon"]["SH1140752.08FU_UDB013072_reps"],
"k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Thelephorales;"
"f__Thelephoraceae;g__Tomentella;s__unidentified",
)
self.assertEqual(
str(type(res_two)),
"<class 'q2_types.feature_data._transformer.DNAIterator'>",
)
# test missing files or misspelled cluster_id
with self.assertRaises(ValueError):
_unite_get_artifacts(self.unitefile, "nothing")
Expand Down

0 comments on commit b20c29a

Please sign in to comment.