Skip to content

Commit

Permalink
Made some slight alterations to the runtime notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
BenKaehler committed Jul 7, 2017
1 parent 6a43bc2 commit aab2c59
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 145 deletions.
245 changes: 133 additions & 112 deletions ipynb/runtime/analysis.ipynb

Large diffs are not rendered by default.

56 changes: 24 additions & 32 deletions ipynb/runtime/compute-runtimes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@
"project_dir = '../..'\n",
"data_dir = join(project_dir, \"data\")\n",
"\n",
"results_dir = join(project_dir, 'temp_dir_runtime')\n",
"results_dir = join(project_dir, 'temp_results_runtime')\n",
"runtime_results = join(results_dir, 'runtime_results.txt')\n",
"tmpdir = join(results_dir, 'tmp')\n",
"\n",
"ref_db_dir = join(project_dir, 'data/ref_dbs/gg_13_8_otus')\n",
"ref_seqs = join(ref_db_dir, '99_otus.fasta')\n",
"ref_taxa = join(ref_db_dir, '99_otu_taxonomy.txt')\n",
"ref_seqs = join(ref_db_dir, '99_otus_clean.fasta')\n",
"ref_taxa = join(ref_db_dir, '99_otu_taxonomy_clean.tsv')\n",
"\n",
"num_iters = 1\n",
"sampling_depths = [1] + list(range(2000,10001,2000))"
Expand All @@ -62,9 +62,7 @@
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"runtime_make_test_data(ref_seqs, tmpdir, sampling_depths)"
Expand All @@ -86,12 +84,13 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32mSaved TaxonomicClassifier to: ../../temp_dir_runtime/tmp/1.fna.nb.qza\u001b[0m\n",
"\u001b[32mSaved TaxonomicClassifier to: ../../temp_dir_runtime/tmp/2000.fna.nb.qza\u001b[0m\n",
"\u001b[32mSaved TaxonomicClassifier to: ../../temp_dir_runtime/tmp/4000.fna.nb.qza\u001b[0m\n",
"\u001b[32mSaved TaxonomicClassifier to: ../../temp_dir_runtime/tmp/6000.fna.nb.qza\u001b[0m\n",
"\u001b[32mSaved TaxonomicClassifier to: ../../temp_dir_runtime/tmp/8000.fna.nb.qza\u001b[0m\n",
"\u001b[32mSaved TaxonomicClassifier to: ../../temp_dir_runtime/tmp/10000.fna.nb.qza\u001b[0m\n"
"\u001b[33mQIIME is caching your current deployment for improved performance. This may take a few moments and should only happen once per deployment.\u001b[0m\n",
"\u001b[32mSaved TaxonomicClassifier to: ../../temp_results_runtime/tmp/1.fna.nb.qza\u001b[0m\n",
"\u001b[32mSaved TaxonomicClassifier to: ../../temp_results_runtime/tmp/2000.fna.nb.qza\u001b[0m\n",
"\u001b[32mSaved TaxonomicClassifier to: ../../temp_results_runtime/tmp/4000.fna.nb.qza\u001b[0m\n",
"\u001b[32mSaved TaxonomicClassifier to: ../../temp_results_runtime/tmp/6000.fna.nb.qza\u001b[0m\n",
"\u001b[32mSaved TaxonomicClassifier to: ../../temp_results_runtime/tmp/8000.fna.nb.qza\u001b[0m\n",
"\u001b[32mSaved TaxonomicClassifier to: ../../temp_results_runtime/tmp/10000.fna.nb.qza\u001b[0m\n"
]
}
],
Expand Down Expand Up @@ -124,15 +123,15 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"qiime1_setup = join(results_dir, '.bashrc')\n",
"qiime1_template = ('source activate qiime1; source ' + qiime1_setup + '; '\n",
" 'assign_taxonomy.py -i {1} -o {0} -r {2} -t {3} -m {4} {5}')\n",
"qiime1_template = ('bash -c \"source activate qiime1; source ' + qiime1_setup + '; '\n",
" 'assign_taxonomy.py -i {1} -o {0} -r {2} -t {3} -m {4} {5}\"')\n",
"blast_template = ('qiime feature-classifier classify-consensus-blast --i-query {1}.qza --o-classification '\n",
" '{0}/assign.tmp --i-reference-reads {2}.qza --i-reference-taxonomy {3}.qza {5}')\n",
"vsearch_template = ('qiime feature-classifier classify-consensus-vsearch --i-query {1}.qza '\n",
Expand All @@ -146,7 +145,9 @@
" 'uclust': (qiime1_template, '--min_consensus_fraction 0.51 --similarity 0.8 --uclust_max_accepts 3'),\n",
" 'sortmerna': (qiime1_template, '--sortmerna_e_value 0.001 --min_consensus_fraction 0.51 --similarity 0.8 '\n",
" '--sortmerna_best_N_alignments 3 --sortmerna_coverage 0.8'),\n",
" 'blast' : (qiime1_template, '-e 0.001'),\n",
" 'blast' : (qiime1_template, '-e 0.001')\n",
" }\n",
"qiime2_methods = {\n",
" 'blast+' : (blast_template, '--p-evalue 0.001'),\n",
" 'vsearch' : (vsearch_template, '--p-perc-identity 0.90'),\n",
" 'naive-bayes': (naive_bayes_template, '--p-confidence 0.7')\n",
Expand All @@ -169,7 +170,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {
"collapsed": true
},
Expand All @@ -188,7 +189,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {
"collapsed": true
},
Expand All @@ -207,16 +208,16 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"84\n",
"('qiime feature-classifier classify-sklearn --o-classification ../../temp_dir_runtime/tmp/assign.tmp --i-classifier ../../temp_dir_runtime/tmp/2000.fna.nb.qza --i-reads ../../temp_dir_runtime/tmp/1.fna.qza --p-confidence 0.7', 'naive-bayes', '1', '2000', 0)\n",
"('source activate qiime1; source ../../temp_dir_runtime/.bashrc; assign_taxonomy.py -i ../../temp_dir_runtime/tmp/10000.fna -o ../../temp_dir_runtime/tmp -r ../../temp_dir_runtime/tmp/10000.fna -t ../../data/ref_dbs/gg_13_8_otus/99_otu_taxonomy.txt -m rdp --confidence 0.5 --rdp_max_memory 16000', 'rdp', '10000', '10000', 0)\n"
"48\n",
"('bash -c \"source activate qiime1; source ../../temp_results_runtime/.bashrc; assign_taxonomy.py -i ../../temp_results_runtime/tmp/1.fna -o ../../temp_results_runtime/tmp -r ../../temp_results_runtime/tmp/2000.fna -t ../../data/ref_dbs/gg_13_8_otus/99_otu_taxonomy_clean.tsv -m blast -e 0.001\"', 'blast', '1', '2000', 0)\n",
"('bash -c \"source activate qiime1; source ../../temp_results_runtime/.bashrc; assign_taxonomy.py -i ../../temp_results_runtime/tmp/10000.fna -o ../../temp_results_runtime/tmp -r ../../temp_results_runtime/tmp/10000.fna -t ../../data/ref_dbs/gg_13_8_otus/99_otu_taxonomy_clean.tsv -m sortmerna --sortmerna_e_value 0.001 --min_consensus_fraction 0.51 --similarity 0.8 --sortmerna_best_N_alignments 3 --sortmerna_coverage 0.8\"', 'sortmerna', '10000', '10000', 0)\n"
]
}
],
Expand All @@ -234,17 +235,8 @@
},
"outputs": [],
"source": [
"Parallel(n_jobs=1)(delayed(clock_runtime)(command, runtime_results, force=False) for command in (list(set(commands_a + commands_b))))"
"Parallel(n_jobs=21)(delayed(clock_runtime)(command, runtime_results, force=False) for command in (list(set(commands_a + commands_b))))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
packages=find_packages(),
install_requires=['biom-format', 'pandas', 'statsmodels', 'bokeh',
'scipy', 'jupyter', 'scikit-bio', 'seaborn',
'scikit-learn'],
'scikit-learn', 'joblib'],
author="Nicholas Bokulich",
author_email="[email protected]",
description="Systematic benchmarking of taxonomic classification methods",
Expand Down

0 comments on commit aab2c59

Please sign in to comment.