broadened sample_type to be any metadata key

BenKaehler · May 31, 2018 · bb3d44f · bb3d44f
1 parent 8dfc29f
commit bb3d44f
Show file tree

Hide file tree

Showing 6 changed files with 38 additions and 31 deletions.
diff --git a/q2_clawback/__init__.py b/q2_clawback/__init__.py
@@ -7,17 +7,17 @@
 # ----------------------------------------------------------------------------
 
 from ._version import get_versions
-from ._clawback import (summarize_QIITA_sample_types_and_contexts,
+from ._clawback import (summarize_QIITA_metadata_category_and_contexts,
                         fetch_QIITA_samples,
                         sequence_variants_from_samples,
                         generate_class_weights,
-                        assemble_weights_from_QIITA_sample_types)
+                        assemble_weights_from_QIITA)
 
-__all__ = ['summarize_QIITA_sample_types_and_contexts',
+__all__ = ['summarize_QIITA_metadata_category_and_contexts',
            'sequence_variants_from_samples',
            'fetch_QIITA_samples',
            'generate_class_weights',
-           'assemble_weights_from_QIITA_sample_types']
+           'assemble_weights_from_QIITA']
 
 __version__ = get_versions()['version']
 del get_versions
diff --git a/q2_clawback/_clawback.py b/q2_clawback/_clawback.py
@@ -27,18 +27,21 @@ def sequence_variants_from_samples(samples: biom.Table) -> DNAIterator:
     return DNAIterator(seqs)
 
 
-def _fetch_QIITA_summaries():
-    md = redbiom.fetch.category_sample_values('sample_type')
+def _fetch_QIITA_summaries(category='sample_type'):
+    md = redbiom.fetch.category_sample_values(category)
     counts = md.value_counts(ascending=False)
     caches = redbiom.summarize.contexts()[['ContextName', 'SamplesWithData']]
     caches = caches.sort_values(by='SamplesWithData', ascending=False)
     return counts, caches
 
 
-def summarize_QIITA_sample_types_and_contexts(output_dir: str=None):
-    counts, caches = _fetch_QIITA_summaries()
-    sample_types = q2templates.df_to_html(
-        counts.to_frame(), bold_rows=False, header=False)
+def summarize_QIITA_metadata_category_and_contexts(
+        output_dir: str=None, category: str='sample_type'):
+    counts, caches = _fetch_QIITA_summaries(category=category)
+    counts = counts.to_frame()
+    counts = DataFrame({category: counts.index, 'count': counts.values.T[0]},
+                       columns=[category, 'count'])
+    sample_types = q2templates.df_to_html(counts, bold_rows=False, index=False)
     contexts = q2templates.df_to_html(caches, index=False)
     title = 'Available in QIITA'
     index = os.path.join(TEMPLATES, 'index.html')
@@ -48,9 +51,10 @@ def summarize_QIITA_sample_types_and_contexts(output_dir: str=None):
         'contexts': contexts})
 
 
-def fetch_QIITA_samples(sample_type: list, context: str) -> biom.Table:
-    query = "where sample_type == '"
-    query += "' or sample_type == '".join(sample_type)
+def fetch_QIITA_samples(metadata_value: list, context: str,
+                        metadata_key: str='sample_type') -> biom.Table:
+    query = "where " + metadata_key + " == '"
+    query += ("' or " + metadata_key + " == '").join(metadata_value)
     query += "'"
     sample_ids = redbiom.search.metadata_full(query, False)
     samples, ambig = redbiom.fetch.data_from_samples(context, sample_ids)
@@ -86,11 +90,13 @@ def generate_class_weights(
     return biom.Table(weights[None].T, taxa, sample_ids=['Weight'])
 
 
-def assemble_weights_from_QIITA_sample_types(
-        ctx, classifier, reference_taxonomy, reference_sequences, sample_type,
-        context, unobserved_weight=1e-6, normalise=False):
+def assemble_weights_from_QIITA(
+        ctx, classifier, reference_taxonomy, reference_sequences,
+        metadata_value, context, unobserved_weight=1e-6, normalise=False,
+        metadata_key='sample_type'):
     samples, = ctx.get_action('clawback', 'fetch_QIITA_samples')(
-        sample_type=sample_type, context=context)
+        metadata_value=metadata_value, context=context,
+        metadata_key=metadata_key)
 
     reads, = ctx.get_action('clawback', 'sequence_variants_from_samples')(
         samples=samples)

diff --git a/q2_clawback/assets/index.html b/q2_clawback/assets/index.html
@@ -5,11 +5,11 @@
 {% block content %}
 
 <div class="row">
-  <h1>Available Sample Types</h1>
+  <h1>Metadata Values</h1>
   <div class="col-lg-12">
     {{ sample_types }}
   </div>
-  <h1>Available Contexts</h1>
+  <h1>Contexts</h1>
   <div class="col-lg-12">
     {{ contexts }}
   </div>

diff --git a/q2_clawback/plugin_setup.py b/q2_clawback/plugin_setup.py
@@ -27,9 +27,9 @@
 )
 
 plugin.visualizers.register_function(
-    function=q2_clawback.summarize_QIITA_sample_types_and_contexts,
+    function=q2_clawback.summarize_QIITA_metadata_category_and_contexts,
     inputs={},
-    parameters={},
+    parameters={'category': Str},
     name='Fetch QIITA sample types and contexts',
     description='Display of counts of samples by sample type and context'
 )
@@ -61,23 +61,25 @@
 plugin.methods.register_function(
     function=q2_clawback.fetch_QIITA_samples,
     inputs={},
-    parameters={'sample_type': List[Str],
-                'context': Str},
+    parameters={'metadata_value': List[Str],
+                'context': Str,
+                'metadata_key': Str},
     outputs=[('samples', FeatureTable[Frequency])],
     name='Fetch feature counts for a collection of samples',
     description=('Fetch feature counts for a collection of samples, '
                  'preferebly with SVs for OTU ids')
 )
 
 plugin.pipelines.register_function(
-    function=q2_clawback.assemble_weights_from_QIITA_sample_types,
+    function=q2_clawback.assemble_weights_from_QIITA,
     inputs={'classifier': TaxonomicClassifier,
             'reference_taxonomy': FeatureData[Taxonomy],
             'reference_sequences': FeatureData[Sequence]},
-    parameters={'sample_type': List[Str],
+    parameters={'metadata_value': List[Str],
                 'context': Str,
                 'unobserved_weight': Float,
-                'normalise': Bool},
+                'normalise': Bool,
+                'metadata_key': Str},
     outputs=[('samples', FeatureTable[RelativeFrequency])],
     name='Assemble weights from QIITA for use with q2-feature-classifier',
     description=('Download SV results from QIITA, classify the SVs, use the '

diff --git a/q2_clawback/tests/__init__.py b/q2_clawback/tests/__init__.py
diff --git a/q2_clawback/tests/test_clawback.py b/q2_clawback/tests/test_clawback.py
@@ -13,8 +13,7 @@
 from qiime2.plugin.testing import TestPluginBase
 from qiime2.plugins.feature_classifier.methods import \
     fit_classifier_naive_bayes
-from qiime2.plugins.clawback.pipelines import \
-    assemble_weights_from_QIITA_sample_types
+from qiime2.plugins.clawback.pipelines import assemble_weights_from_QIITA
 from biom import Table
 from q2_types.feature_data import DNAIterator
 from pandas import DataFrame
@@ -43,18 +42,18 @@ def tearDown(self):
 
 
 class ClawbackTests(ClawbackTestPluginBase):
-    def test_assemble_weights_from_QIITA_sample_types(self):
+    def test_assemble_weights_from_QIITA(self):
         counts, caches = q2_clawback._clawback._fetch_QIITA_summaries()
 
         sample_type = 'Tears'
         self.assertTrue(hasattr(counts, sample_type))
         for context in caches.ContextName:
-            if context.startswith('Deblur-illumina-16S-v4'):
+            if context.startswith('Deblur-NA-illumina-16S-v4'):
                 break
         else:
             self.assertTrue(False)
 
-        weights = assemble_weights_from_QIITA_sample_types(
+        weights = assemble_weights_from_QIITA(
             self.classifier, self.taxonomy, self.reads, [sample_type], context)
         weights = weights[0].view(Table)