Made some slight alterations to the runtime notebooks

caporaso-lab · Jul 7, 2017 · aab2c59 · aab2c59
1 parent 6a43bc2
commit aab2c59
Show file tree

Hide file tree

Showing 3 changed files with 158 additions and 145 deletions.
diff --git a/ipynb/runtime/analysis.ipynb b/ipynb/runtime/analysis.ipynb
diff --git a/ipynb/runtime/compute-runtimes.ipynb b/ipynb/runtime/compute-runtimes.ipynb
@@ -39,13 +39,13 @@
     "project_dir = '../..'\n",
     "data_dir = join(project_dir, \"data\")\n",
     "\n",
-    "results_dir = join(project_dir, 'temp_dir_runtime')\n",
+    "results_dir = join(project_dir, 'temp_results_runtime')\n",
     "runtime_results = join(results_dir, 'runtime_results.txt')\n",
     "tmpdir = join(results_dir, 'tmp')\n",
     "\n",
     "ref_db_dir = join(project_dir, 'data/ref_dbs/gg_13_8_otus')\n",
-    "ref_seqs = join(ref_db_dir, '99_otus.fasta')\n",
-    "ref_taxa = join(ref_db_dir, '99_otu_taxonomy.txt')\n",
+    "ref_seqs = join(ref_db_dir, '99_otus_clean.fasta')\n",
+    "ref_taxa = join(ref_db_dir, '99_otu_taxonomy_clean.tsv')\n",
     "\n",
     "num_iters = 1\n",
     "sampling_depths = [1] + list(range(2000,10001,2000))"
@@ -62,9 +62,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "runtime_make_test_data(ref_seqs, tmpdir, sampling_depths)"
@@ -86,12 +84,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[32mSaved TaxonomicClassifier to: ../../temp_dir_runtime/tmp/1.fna.nb.qza\u001b[0m\n",
-      "\u001b[32mSaved TaxonomicClassifier to: ../../temp_dir_runtime/tmp/2000.fna.nb.qza\u001b[0m\n",
-      "\u001b[32mSaved TaxonomicClassifier to: ../../temp_dir_runtime/tmp/4000.fna.nb.qza\u001b[0m\n",
-      "\u001b[32mSaved TaxonomicClassifier to: ../../temp_dir_runtime/tmp/6000.fna.nb.qza\u001b[0m\n",
-      "\u001b[32mSaved TaxonomicClassifier to: ../../temp_dir_runtime/tmp/8000.fna.nb.qza\u001b[0m\n",
-      "\u001b[32mSaved TaxonomicClassifier to: ../../temp_dir_runtime/tmp/10000.fna.nb.qza\u001b[0m\n"
+      "\u001b[33mQIIME is caching your current deployment for improved performance. This may take a few moments and should only happen once per deployment.\u001b[0m\n",
+      "\u001b[32mSaved TaxonomicClassifier to: ../../temp_results_runtime/tmp/1.fna.nb.qza\u001b[0m\n",
+      "\u001b[32mSaved TaxonomicClassifier to: ../../temp_results_runtime/tmp/2000.fna.nb.qza\u001b[0m\n",
+      "\u001b[32mSaved TaxonomicClassifier to: ../../temp_results_runtime/tmp/4000.fna.nb.qza\u001b[0m\n",
+      "\u001b[32mSaved TaxonomicClassifier to: ../../temp_results_runtime/tmp/6000.fna.nb.qza\u001b[0m\n",
+      "\u001b[32mSaved TaxonomicClassifier to: ../../temp_results_runtime/tmp/8000.fna.nb.qza\u001b[0m\n",
+      "\u001b[32mSaved TaxonomicClassifier to: ../../temp_results_runtime/tmp/10000.fna.nb.qza\u001b[0m\n"
      ]
     }
    ],
@@ -124,15 +123,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "qiime1_setup = join(results_dir, '.bashrc')\n",
-    "qiime1_template = ('source activate qiime1; source ' + qiime1_setup + '; '\n",
-    "                   'assign_taxonomy.py -i {1} -o {0} -r {2} -t {3} -m {4} {5}')\n",
+    "qiime1_template = ('bash -c \"source activate qiime1; source ' + qiime1_setup + '; '\n",
+    "                   'assign_taxonomy.py -i {1} -o {0} -r {2} -t {3} -m {4} {5}\"')\n",
     "blast_template = ('qiime feature-classifier classify-consensus-blast --i-query {1}.qza --o-classification '\n",
     "                  '{0}/assign.tmp --i-reference-reads {2}.qza --i-reference-taxonomy {3}.qza {5}')\n",
     "vsearch_template = ('qiime feature-classifier classify-consensus-vsearch --i-query {1}.qza '\n",
@@ -146,7 +145,9 @@
     "    'uclust': (qiime1_template, '--min_consensus_fraction 0.51 --similarity 0.8 --uclust_max_accepts 3'),\n",
     "    'sortmerna': (qiime1_template, '--sortmerna_e_value 0.001 --min_consensus_fraction 0.51 --similarity 0.8 '\n",
     "                 '--sortmerna_best_N_alignments 3 --sortmerna_coverage 0.8'),\n",
-    "    'blast' : (qiime1_template, '-e 0.001'),\n",
+    "    'blast' : (qiime1_template, '-e 0.001')\n",
+    "          }\n",
+    "qiime2_methods = {\n",
     "    'blast+' : (blast_template, '--p-evalue 0.001'),\n",
     "    'vsearch' : (vsearch_template, '--p-perc-identity 0.90'),\n",
     "    'naive-bayes': (naive_bayes_template, '--p-confidence 0.7')\n",
@@ -169,7 +170,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {
     "collapsed": true
    },
@@ -188,7 +189,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {
     "collapsed": true
    },
@@ -207,16 +208,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "84\n",
-      "('qiime feature-classifier classify-sklearn  --o-classification ../../temp_dir_runtime/tmp/assign.tmp --i-classifier ../../temp_dir_runtime/tmp/2000.fna.nb.qza --i-reads ../../temp_dir_runtime/tmp/1.fna.qza --p-confidence 0.7', 'naive-bayes', '1', '2000', 0)\n",
-      "('source activate qiime1; source ../../temp_dir_runtime/.bashrc; assign_taxonomy.py -i ../../temp_dir_runtime/tmp/10000.fna -o ../../temp_dir_runtime/tmp -r ../../temp_dir_runtime/tmp/10000.fna -t ../../data/ref_dbs/gg_13_8_otus/99_otu_taxonomy.txt -m rdp --confidence 0.5 --rdp_max_memory 16000', 'rdp', '10000', '10000', 0)\n"
+      "48\n",
+      "('bash -c \"source activate qiime1; source ../../temp_results_runtime/.bashrc; assign_taxonomy.py -i ../../temp_results_runtime/tmp/1.fna -o ../../temp_results_runtime/tmp -r ../../temp_results_runtime/tmp/2000.fna -t ../../data/ref_dbs/gg_13_8_otus/99_otu_taxonomy_clean.tsv -m blast -e 0.001\"', 'blast', '1', '2000', 0)\n",
+      "('bash -c \"source activate qiime1; source ../../temp_results_runtime/.bashrc; assign_taxonomy.py -i ../../temp_results_runtime/tmp/10000.fna -o ../../temp_results_runtime/tmp -r ../../temp_results_runtime/tmp/10000.fna -t ../../data/ref_dbs/gg_13_8_otus/99_otu_taxonomy_clean.tsv -m sortmerna --sortmerna_e_value 0.001 --min_consensus_fraction 0.51 --similarity 0.8 --sortmerna_best_N_alignments 3 --sortmerna_coverage 0.8\"', 'sortmerna', '10000', '10000', 0)\n"
      ]
     }
    ],
@@ -234,17 +235,8 @@
    },
    "outputs": [],
    "source": [
-    "Parallel(n_jobs=1)(delayed(clock_runtime)(command, runtime_results, force=False) for command in (list(set(commands_a + commands_b))))"
+    "Parallel(n_jobs=21)(delayed(clock_runtime)(command, runtime_results, force=False) for command in (list(set(commands_a + commands_b))))"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

diff --git a/setup.py b/setup.py
@@ -17,7 +17,7 @@
     packages=find_packages(),
     install_requires=['biom-format', 'pandas', 'statsmodels', 'bokeh',
                       'scipy', 'jupyter', 'scikit-bio', 'seaborn',
-		              'scikit-learn'],
+		      'scikit-learn', 'joblib'],
     author="Nicholas Bokulich",
     author_email="[email protected]",
     description="Systematic benchmarking of taxonomic classification methods",