From fe0ddfdf1b592af2801b65a470b90a6f2ea11b95 Mon Sep 17 00:00:00 2001 From: cameronmartino Date: Tue, 26 Oct 2021 09:35:07 -0700 Subject: [PATCH] init restructure --- README.md | 4 + {scripts => ipynb}/NeuFit_Pipeline.ipynb | 1050 ++--------------- .../esophagus_cancer_2021-09-12_21:01:35.txt | 0 ...cer_2021-09-12_21:01:35_FullNonNeutral.csv | 0 ...cer_2021-09-12_21:01:35_NeutralFitPlot.png | Bin ...35_NeutralFitPlot_withNonNeutralColors.png | Bin ...021-09-12_21:01:35_NonNeutral_Outliers.csv | 0 .../esophagus_normal_2021-09-12_20:54:39.txt | 0 ...mal_2021-09-12_20:54:39_FullNonNeutral.csv | 0 ...mal_2021-09-12_20:54:39_NeutralFitPlot.png | Bin ...39_NeutralFitPlot_withNonNeutralColors.png | Bin ...021-09-12_20:54:39_NonNeutral_Outliers.csv | 0 .../headNeck_cancer_2021-09-12_20:57:10.txt | 0 ...cer_2021-09-12_20:57:10_FullNonNeutral.csv | 0 ...cer_2021-09-12_20:57:10_NeutralFitPlot.png | Bin ...10_NeutralFitPlot_withNonNeutralColors.png | Bin ...021-09-12_20:57:10_NonNeutral_Outliers.csv | 0 .../headNeck_normal_2021-09-12_21:02:45.txt | 0 ...mal_2021-09-12_21:02:45_FullNonNeutral.csv | 0 ...mal_2021-09-12_21:02:45_NeutralFitPlot.png | Bin ...45_NeutralFitPlot_withNonNeutralColors.png | Bin ...021-09-12_21:02:45_NonNeutral_Outliers.csv | 0 .../combined/combined_2021-09-11_01:05:08.txt | 0 ...ned_2021-09-11_01:05:08_FullNonNeutral.csv | 0 ...ned_2021-09-11_01:05:08_NeutralFitPlot.png | Bin ...08_NeutralFitPlot_withNonNeutralColors.png | Bin ...021-09-11_01:05:08_NonNeutral_Outliers.csv | 0 .../nonProgressors_2021-09-11_16:59:37.txt | 0 ...ors_2021-09-11_16:59:37_FullNonNeutral.csv | 0 ...ors_2021-09-11_16:59:37_NeutralFitPlot.png | Bin ...37_NeutralFitPlot_withNonNeutralColors.png | Bin ...021-09-11_16:59:37_NonNeutral_Outliers.csv | 0 .../nonProgressorsT1_2021-09-11_17:01:16.txt | 0 ...sT1_2021-09-11_17:01:16_FullNonNeutral.csv | 0 ...sT1_2021-09-11_17:01:16_NeutralFitPlot.png | Bin ...16_NeutralFitPlot_withNonNeutralColors.png | Bin ...021-09-11_17:01:16_NonNeutral_Outliers.csv | 0 .../nonProgressorsT2_2021-09-11_17:02:10.txt | 0 ...sT2_2021-09-11_17:02:10_FullNonNeutral.csv | 0 ...sT2_2021-09-11_17:02:10_NeutralFitPlot.png | Bin ...10_NeutralFitPlot_withNonNeutralColors.png | Bin ...021-09-11_17:02:10_NonNeutral_Outliers.csv | 0 .../progressors_2021-09-11_16:51:08.txt | 0 ...ors_2021-09-11_16:51:08_FullNonNeutral.csv | 0 ...ors_2021-09-11_16:51:08_NeutralFitPlot.png | Bin ...08_NeutralFitPlot_withNonNeutralColors.png | Bin ...021-09-11_16:51:08_NonNeutral_Outliers.csv | 0 .../progressorsT1_2021-09-11_16:56:12.txt | 0 ...sT1_2021-09-11_16:56:12_FullNonNeutral.csv | 0 ...sT1_2021-09-11_16:56:12_NeutralFitPlot.png | Bin ...12_NeutralFitPlot_withNonNeutralColors.png | Bin ...021-09-11_16:56:12_NonNeutral_Outliers.csv | 0 .../progressorsT2_2021-09-11_16:57:39.txt | 0 ...sT2_2021-09-11_16:57:39_FullNonNeutral.csv | 0 ...sT2_2021-09-11_16:57:39_NeutralFitPlot.png | Bin ...39_NeutralFitPlot_withNonNeutralColors.png | Bin ...021-09-11_16:57:39_NonNeutral_Outliers.csv | 0 neuevo/__init__.py | 1 + neuevo/neufit.py | 235 ++++ neuevo/neufit_utils.py | 59 + neuevo/neuplot.py | 90 ++ neuevo/plotting_helper.py | 78 ++ neuevo/scripts/__init__.py | 20 + neuevo/scripts/_neufit.py | 88 ++ neuevo/utils.py | 149 +++ setup.py | 107 ++ 66 files changed, 929 insertions(+), 952 deletions(-) rename {scripts => ipynb}/NeuFit_Pipeline.ipynb (85%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35.txt (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_FullNonNeutral.csv (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NeutralFitPlot.png (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NeutralFitPlot_withNonNeutralColors.png (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NonNeutral_Outliers.csv (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39.txt (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_FullNonNeutral.csv (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NeutralFitPlot.png (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NeutralFitPlot_withNonNeutralColors.png (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NonNeutral_Outliers.csv (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10.txt (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_FullNonNeutral.csv (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NeutralFitPlot.png (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NeutralFitPlot_withNonNeutralColors.png (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NonNeutral_Outliers.csv (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45.txt (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_FullNonNeutral.csv (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NeutralFitPlot.png (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NeutralFitPlot_withNonNeutralColors.png (100%) rename {neufit_output => ipynb/neufit_output}/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NonNeutral_Outliers.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/combined/combined_2021-09-11_01:05:08.txt (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/combined/combined_2021-09-11_01:05:08_FullNonNeutral.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/combined/combined_2021-09-11_01:05:08_NeutralFitPlot.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/combined/combined_2021-09-11_01:05:08_NeutralFitPlot_withNonNeutralColors.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/combined/combined_2021-09-11_01:05:08_NonNeutral_Outliers.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37.txt (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_FullNonNeutral.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NeutralFitPlot.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NeutralFitPlot_withNonNeutralColors.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NonNeutral_Outliers.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16.txt (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_FullNonNeutral.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NeutralFitPlot.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NeutralFitPlot_withNonNeutralColors.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NonNeutral_Outliers.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10.txt (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_FullNonNeutral.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NeutralFitPlot.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NeutralFitPlot_withNonNeutralColors.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NonNeutral_Outliers.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressors/progressors_2021-09-11_16:51:08.txt (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressors/progressors_2021-09-11_16:51:08_FullNonNeutral.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NeutralFitPlot.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NeutralFitPlot_withNonNeutralColors.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NonNeutral_Outliers.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12.txt (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_FullNonNeutral.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NeutralFitPlot.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NeutralFitPlot_withNonNeutralColors.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NonNeutral_Outliers.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39.txt (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_FullNonNeutral.csv (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NeutralFitPlot.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NeutralFitPlot_withNonNeutralColors.png (100%) rename {neufit_output => ipynb/neufit_output}/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NonNeutral_Outliers.csv (100%) create mode 100644 neuevo/__init__.py create mode 100644 neuevo/neufit.py create mode 100644 neuevo/neufit_utils.py create mode 100644 neuevo/neuplot.py create mode 100644 neuevo/plotting_helper.py create mode 100644 neuevo/scripts/__init__.py create mode 100644 neuevo/scripts/_neufit.py create mode 100644 neuevo/utils.py create mode 100644 setup.py diff --git a/README.md b/README.md index f0594bc..b8f791e 100644 --- a/README.md +++ b/README.md @@ -1 +1,5 @@ # NeutralEvolutionModeling + +## install + +pip install -e. \ No newline at end of file diff --git a/scripts/NeuFit_Pipeline.ipynb b/ipynb/NeuFit_Pipeline.ipynb similarity index 85% rename from scripts/NeuFit_Pipeline.ipynb rename to ipynb/NeuFit_Pipeline.ipynb index fc5d830..0df4c3e 100644 --- a/scripts/NeuFit_Pipeline.ipynb +++ b/ipynb/NeuFit_Pipeline.ipynb @@ -80,14 +80,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Pipeline Dependencies + Functions" + "## Choose Dataset + Run Pipeline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Dependencies" + "### Hutch Kraken Dataset: Ludmil Alexandrov" ] }, { @@ -96,7 +96,28 @@ "heading_collapsed": true }, "source": [ - "##### Import Packages" + "##### Dataset Info" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "**Background on Data**\n", + "- This data was orginally in the form of Kraken data that we recived from a rotation student in Ludmill Alexandrov's lab \n", + " - We didn't have access to the fastq files, only the Kraken data due to a limited number of keys\n", + "- This data is originally from Fread Hutch and consistets of progressor and non-progressor patients in reference to EAC\n", + "\n", + "**Information on How to Run Data**\n", + "- This dataset comes with biome files with the taxonomic data already imported into the files\n", + " - This allows us to simply run **biom2data_tax** which is all set in the neufit Function\n", + "- This pipeline assumes everything from Kraken (including this dataset) is already in the **biome** format \n", + " - In the future we will not be using Kraken but will have the full fastq files \n", + " - The pipeline used previously to create the kraken-biome files uses: 'pip install krakenplot' which \n", + " didnt' work on the cluster (1 hour attempt) so wasn't worth implementation\n", + " - More details on how to convert files from Kraken to biom on the command line found on this github: https://github.com/cguccione/BEProgression2EAC/tree/main/PipelineTesting/KrakenBiome_Pipeline" ] }, { @@ -107,22 +128,11 @@ }, "outputs": [], "source": [ - "#Imports needed for Caitlin Functions\n", - "import pandas as pd\n", - "import qiime2 as q2\n", - "from biom import load_table #https://biom-format.org/documentation/generated/biom.load_table.html\n", - "from datetime import datetime\n", - "import os\n", + "from neuevo.neufit import neufit\n", + "from neuevo.utils import biom2data_tax\n", "\n", - "#Imports needed for Neufit\n", - "import numpy as np\n", - "from lmfit import Parameters, Model, fit_report #conda-forge/lmfit\n", - "from scipy.stats import beta #[already installed]\n", - "from statsmodels.stats.proportion import proportion_confint #[already installed]\n", - "from os.path import splitext #[already installed]\n", - "from matplotlib import pyplot #[already installed]\n", - "from math import log10 #[already installed]\n", - "import scipy" + "import matplotlib\n", + "%matplotlib inline" ] }, { @@ -158,950 +168,86 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Functions" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Neufit Orginal Functions" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true - }, - "source": [ - "##### PreSet Functions from Neufit" + "#### Combined Data (Everything in the dataset)" ] }, { "cell_type": "code", "execution_count": 3, - "metadata": { - "hidden": true - }, - "outputs": [], - "source": [ - "# neufit: Fit a neutral community model to species abundances, e.g. from an OTU table\n", - "#\n", - "# For the theory behind this see Sloan et al, Environ Microbiol 2006 8:732-740.\n", - "# To run on the example simulation data: python neufit.py sim_data.csv\n", - "# To link with the mock taxonomy use the -t sim_taxonomy.csv option\n", - "#\n", - "# Copyright (C) 2018 Michael Sieber (sieber.ecoevo.de)\n", - "# All rights reserved.\n", - "#\n", - "# Redistribution and use in source and binary forms, with or without\n", - "# modification, are permitted provided that the following conditions are met:\n", - "#\n", - "# 1. Redistributions of source code must retain the above copyright notice, this\n", - "# list of conditions and the following disclaimer.\n", - "# 2. Redistributions in binary form must reproduce the above copyright notice,\n", - "# this list of conditions and the following disclaimer in the documentation\n", - "# and/or other materials provided with the distribution.\n", - "#\n", - "# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n", - "# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n", - "# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n", - "# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n", - "# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n", - "# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n", - "# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\n", - "# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n", - "# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n", - "# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n", - "#\n", - "# Github: https://github.com/misieber/neufit\n", - "\n", - "def beta_cdf(p, N, m):\n", - " # Expected long term distribution under the neutral model (truncated cumulative beta-distribution)\n", - " return beta.cdf(1.0, N*m*p, N*m*(1.0-p)) - beta.cdf(1.0/N, N*m*p, N*m*(1.0-p))\n", - "def subsample(counts, depth):\n", - " # Subsamples counts to uniform depth, dropping all samples without enough depth\n", - " for sample in counts:\n", - " if counts[sample].sum() >= depth:\n", - " flattened = np.repeat(np.arange(counts[sample].size), counts[sample])\n", - " subsample = np.random.choice(flattened, depth, replace=False)\n", - " counts[sample] = np.bincount(subsample, minlength=counts[sample].size)\n", - " else:\n", - " #CG: changed the following print statment from Python2 to Python3 \n", - " print('dropping sample ' + sample + ' with ' + str(counts[sample].sum()) + ' reads < ' + str(depth))\n", - " counts = counts.drop(sample, axis=1)\n", - " return counts\n", - "\n", - "def non_negative_int(arg):\n", - " # Argparser type: non-negative int\n", - " nnint = int(arg)\n", - " if nnint < 0:\n", - " raise ArgumentTypeError(arg + ' < 0, must be non-negative')\n", - " return nnint" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### Main Neufit Section" - ] - }, - { - "cell_type": "code", - "execution_count": 4, "metadata": {}, - "outputs": [], - "source": [ - "'''\n", - "All the code in the function below was written by the orginal Neufit authors, not me and is from the neufit.py file.\n", - "neufit.py: https://github.com/misieber/neufit/blob/master/neufit.py\n", - "\n", - "Here are the following changes I made:\n", - "- Turned it into a function instead of using argv as input\n", - " -There are specific notes on what I changed in the 'Notes: Neufit Modifyed Args Input/Output Details' section.\n", - "-Changed was all syntax from Python2 to Python3.\n", - "-Changed all print statments to write to file statements\n", - " - Made any necissary edits to allow data to be printed to file instead of terminal \n", - "'''\n", - "def main_neufit(output_filename, dataset_type, _data_filename, _taxonomy_filename, full_non_neutral = False, arg_ignore_level = 0, arg_rarefaction_level = 0):\n", - " '''Inputs: output_filename : the name which will be at the front of all the files; ex. 'combined'\n", - " dataset_type : the dataset type we have from here: ('hutchKraken', 'gregTCGA')\n", - " _data_filename = path of []_data.csv file needed for Neufit to run\n", - " _taxonomy_filename = path of []_taxonomy.csv file needed for Neufit to run\n", - " arg_ignore_level = 0 ; default set from orginal Neufit program\n", - " arg_rarefaction_level = 0; default set from orginal Neufit program \n", - " Outputs: file_header = file path for neufit outpus: The path+dataGroup name + date stamp to be used for all parts of Neufit run: ex. /home/cguccion/NeutralEvolutionModeling/neufit_output/hutchKrakenAlex_combined_2021-08-26_13:24:22 \n", - " occurr_freqs = pandas df that Neufit created in the orginal program but didn't specifcally output orginally\n", - " - Headers of csv: otu_id, mean_abundance, occurrence, Kingdom, Phylum, Class, Order, Family, Genus, Species, predicted_occurrence, lower_conf_int, upper_conf_int\n", - " - I used this occur_freqs df in order to figure out which species is the most non-neutral\n", - " - I believe this is what Neufit uses to physical plot the dots on the graph as well \n", - " n_reads = Number of reads (from orginal program)\n", - " n_samples = Number of smaples (from orginal program)\n", - " r_square = R^2 value (from orginal program)\n", - " beta_fit = stats on the preformance of the model (from orginal program)\n", - " *Runs the main section of neufit'''\n", - " \n", - " ##Added by Caitlin ~ Push output to file instead of printing to screen\n", - " \n", - " #Grab and format data/time\n", - " time = datetime.time(datetime.now())\n", - " date = datetime.date(datetime.now())\n", - " h,s = str(time).split(\".\") #Split the string into hours/min and seconds\n", - " \n", - " #Create file_header which holds the path / location for all future Neufit outpus\n", - " file_header = str(neufit_output_path) + \"/\" + str(dataset_type) + '/' + str(output_filename) + '/' + str(output_filename) + '_' + str(date) + \"_\" + str(h)\n", - " \n", - " #Creates directory for all Neufit Outputs if it doesn't already exist \n", - " dir_name = str(neufit_output_path) + \"/\" + str(dataset_type) + '/' + str(output_filename)\n", - " os.makedirs(dir_name, exist_ok=True)\n", - " \n", - " #Create and open file for Neufit Output txt file\n", - " fn= str(file_header) + \".txt\"\n", - " file = open(fn, 'w')\n", - " \n", - " #Print statments with important info\n", - " print(\"Running dataset: \" + str(dataset_type) + \"Category:\" + str(output_filename) + '\\n')\n", - " ##\n", - " \n", - " # Writes dataset info to Neufit output file + calculates and writes the number of samples/ reads in the file\n", - " file.write('Corresponding csv file: ' + _data_filename + '\\n')\n", - " abundances = pd.read_table(_data_filename, header=0, index_col=0, sep='\\t').astype(int)\n", - " abundances = abundances[abundances.sum(1) > arg_ignore_level]\n", - " file.write ('Dataset contains ' + str(abundances.shape[1]) + ' samples (sample_id, reads): \\n')\n", - " ##Caitlin\n", - " #The following loop is used instead of 'print abundances.sum(0)' so that it can be written to a file\n", - " for index, col in abundances.iteritems():\n", - " col_sum = 0\n", - " for i in col:\n", - " col_sum += i\n", - " file.write (index + '\\t' + str(col_sum) + '\\n')\n", - " file.write ('\\n')\n", - " ##\n", - "\n", - " # Determine uniform read depth\n", - " if arg_rarefaction_level == 0 or arg_rarefaction_level > max(abundances.sum(0)):\n", - " arg_rarefaction_level = min(abundances.sum(0))\n", - " file.write ('rarefying to highest possible uniform read depth'),\n", - " else:\n", - " file.write ('rarefying to custom rarefaction level'),\n", - " file.write ('(' + str(arg_rarefaction_level) + ' reads per sample) \\n')\n", - "\n", - " # Optionally subsample the abundance table, unless all samples already have the required uniform read depth\n", - " if not all(n_reads == arg_rarefaction_level for n_reads in abundances.sum(0)):\n", - " abundances = subsample(abundances, arg_rarefaction_level)\n", - " abundances = abundances[abundances.sum(1) > 0]\n", - "\n", - " # Dataset shape\n", - " n_otus, n_samples = abundances.shape\n", - " n_reads = arg_rarefaction_level\n", - "\n", - " file.write ('fitting neutral expectation to dataset with ' + str(n_samples) + ' samples and ' + str(n_otus) + ' otus \\n \\n')\n", - " # Calculate mean relative abundances and occurrence frequencies\n", - " mean_relative_abundance = (1.0*abundances.sum(1))/n_reads/n_samples\n", - " occurrence_frequency = (1.0*np.count_nonzero(abundances, axis=1))/n_samples\n", - " \n", - " occurr_freqs = pd.DataFrame(mean_relative_abundance, columns=['mean_abundance'])\n", - " if dataset_type == 'TCGA_WGS':\n", - " occurr_freqs.index.name = 'gOTU' #This changes the name of the first column\n", - " else:\n", - " occurr_freqs.index.name = 'otu_id'\n", - " occurr_freqs['occurrence'] = occurrence_frequency\n", - " occurr_freqs = occurr_freqs.sort_values(by=['mean_abundance'])\n", - "\n", - " # Join with taxonomic information (optional)\n", - " if _taxonomy_filename != None: #Changed <> to !=\n", - " if dataset_type == 'TCGA_WGS':\n", - " taxonomy = pd.read_table(_taxonomy_filename, header=0, index_col=1, sep='\\t')\n", - " else:\n", - " taxonomy = pd.read_table(_taxonomy_filename, header=0, index_col=0, sep='\\t')\n", - " occurr_freqs = occurr_freqs.join(taxonomy)\n", - " \n", - " # Fit the neutral model\n", - " params = Parameters()\n", - " params.add('N', value=n_reads, vary=False)\n", - " params.add('m', value=0.5, min=0.0, max=1.0)\n", - " beta_model = Model(beta_cdf)\n", - " beta_fit = beta_model.fit(occurr_freqs['occurrence'], params, p=occurr_freqs['mean_abundance'])\n", - "\n", - " # Report fit statistics\n", - " r_square = 1.0 - np.sum(np.square(occurr_freqs['occurrence'] - beta_fit.best_fit))/np.sum(np.square(occurr_freqs['occurrence'] - np.mean(occurr_freqs['occurrence'])))\n", - " file.write (fit_report(beta_fit))\n", - " file.write ('\\n R^2 = ' + '{:1.2f}'.format(r_square))\n", - " print(fit_report(beta_fit))\n", - " print('\\n R^2 = ' + '{:1.2f}'.format(r_square))\n", - " print('=========================================================')\n", - "\n", - " # Adding the neutral prediction to results\n", - " occurr_freqs['predicted_occurrence'] = beta_fit.best_fit\n", - " occurr_freqs['lower_conf_int'], occurr_freqs['upper_conf_int'] = proportion_confint(occurr_freqs['predicted_occurrence']*n_samples, n_samples, alpha=0.05, method='wilson')\n", - "\n", - " # Save non-neutral otus (here simply determined by lying outside the confidence intervals)\n", - " above = occurr_freqs[occurr_freqs['occurrence'] > occurr_freqs['upper_conf_int']]\n", - " below = occurr_freqs[occurr_freqs['occurrence'] < occurr_freqs['lower_conf_int']]\n", - " \n", - " #Create orginal non neutral output file from Neufit\n", - " if full_non_neutral == True:\n", - " pd.concat((above, below)).to_csv(str(file_header) + '_FullNonNeutral.csv')\n", - " \n", - " file.close()\n", - " \n", - " return(occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true - }, - "source": [ - "##### Neufit Plotting" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "hidden": true - }, - "outputs": [], + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: '/home/cguccion/rawData/01_11_2021_Hutch340_BE_Samples_LudmilAlexandrov/biom/combined_biome'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;31m#Convert data from biom to csv files for Neufit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdataset_type\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'hutchKraken'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mfnData\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfnTaxonomy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbiom2data_tax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhutchKrakenAlex_biom\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcustom_filename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_filename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mdataset_type\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'TCGA_WGS'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;31m#Determine which biom file for the run\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Dropbox/bin/NeutralEvolutionModeling/neuevo/utils.py\u001b[0m in \u001b[0;36mbiom2data_tax\u001b[0;34m(datasetName, biomFilename, finalFilename)\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[0;31m#Make filename and import data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[0mfullFilename\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdatasetName\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'/'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mbiomFilename\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 55\u001b[0;31m \u001b[0mfeatureTable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_table\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfullFilename\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#https://biom-format.org/documentation/generated/biom.load_table.html\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 56\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/qiime2-2020.8/lib/python3.6/site-packages/biom/parse.py\u001b[0m in \u001b[0;36mload_table\u001b[0;34m(f)\u001b[0m\n\u001b[1;32m 668\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"%s does not appear to be a BIOM file!\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 669\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 670\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mbiom_open\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 671\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 672\u001b[0m \u001b[0mtable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparse_biom_table\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/qiime2-2020.8/lib/python3.6/contextlib.py\u001b[0m in \u001b[0;36m__enter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 79\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__enter__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 81\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 82\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"generator didn't yield\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/qiime2-2020.8/lib/python3.6/site-packages/biom/util.py\u001b[0m in \u001b[0;36mbiom_open\u001b[0;34m(fp, permission)\u001b[0m\n\u001b[1;32m 437\u001b[0m \u001b[0;31m# happen if we are reading a file\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 438\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m'r'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'U'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 439\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 440\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"The file '%s' is empty and can't be parsed\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 441\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/qiime2-2020.8/lib/python3.6/genericpath.py\u001b[0m in \u001b[0;36mgetsize\u001b[0;34m(filename)\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mgetsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0;34m\"\"\"Return the size of a file, reported by os.stat().\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 50\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mst_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 51\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/home/cguccion/rawData/01_11_2021_Hutch340_BE_Samples_LudmilAlexandrov/biom/combined_biome'" + ] + } + ], "source": [ + "'''Choose True or False for the following parameters:\n", + " norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring\n", + " colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring\n", + " non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file\n", "'''\n", - "All the code in the function below was written by the orginal Neufit authors, not me and is from the neufit.py file.\n", - "neufit.py: https://github.com/misieber/neufit/blob/master/neufit.py\n", - "\n", - "Here are the following changes I made:\n", - "- Turned it into a function \n", - "- Added a plot clearing option to avoid double keys\n", - "'''\n", - "def neufit_plot(occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header):\n", - " '''Inputs: occurr_freqs = pandas df that Neufit created in the orginal program but didn't specifcally output orginally\n", - " - Headers of csv: otu_id, mean_abundance, occurrence, Kingdom, Phylum, Class, Order, Family, Genus, Species, predicted_occurrence, lower_conf_int, upper_conf_int\n", - " - I used this occur_freqs df in order to figure out which species is the most non-neutral\n", - " - I believe this is what Neufit uses to physical plot the dots on the graph as well \n", - " n_reads = Number of reads (from orginal program)\n", - " n_samples = Number of smaples (from orginal program)\n", - " r_square = R^2 value (from orginal program)\n", - " beta_fit = stats on the preformance of the model (from orginal program)\n", - " file_header = file path for all neufit outpus: The path+dataGroup name + date stamp to be used for all parts of Neufit run: ex. /home/cguccion/NeutralEvolutionModeling/neufit_output/hutchKrakenAlex_combined_2021-08-26_13:24:22 \n", - " Outputs: ! The orginal plot that Neufit outputs (just black dots) - will print to the screen but not save when run\n", - " fn = default filename for neufit plot : customName_NeutralFitPlot.png -- Will often not need this \n", - " *Creates the plot for Neufit '''\n", - " \n", - " \n", - " pyplot.cla() #Clears previous plot - to avoid double keys\n", - "\n", - " # Prepare results plot\n", - " pyplot.xlabel('Mean relative abundance across samples', fontsize=15)\n", - " pyplot.xscale('log')\n", - " x_range = np.logspace(log10(min(occurr_freqs['mean_abundance'])/10), 0, 1000)\n", - " pyplot.xlim(min(x_range), max(x_range))\n", - " pyplot.xticks(fontsize=16)\n", - " pyplot.ylabel('Occurrence frequency in samples', fontsize=15)\n", - " pyplot.ylim(-0.05, 1.05)\n", - " pyplot.yticks(fontsize=16)\n", - "\n", - " # Plot data points\n", - " pyplot.plot(occurr_freqs['mean_abundance'], occurr_freqs['occurrence'], 'o', markersize=6, fillstyle='full', color='black')\n", - "\n", - " # Plot best fit\n", - " pyplot.plot(x_range, beta_cdf(x_range, n_reads, beta_fit.best_values['m']), '-', lw=5, color='darkred')\n", - " lower, upper = proportion_confint(beta_cdf(x_range, n_reads, beta_fit.best_values['m'])*n_samples, n_samples, alpha=0.05, method='wilson')\n", - " pyplot.plot(x_range, lower, '--', lw=2, color='darkred')\n", - " pyplot.plot(x_range, upper, '--', lw=2, color='darkred')\n", - " pyplot.fill_between(x_range, lower, upper, color='lightgrey')\n", - "\n", - " pyplot.text(0.05, 0.9, '$R^2 = ' + '{:1.2f}'.format(r_square) + '$', fontsize=16, transform=pyplot.gca().transAxes)\n", - " pyplot.tight_layout()\n", - " \n", - " fn = file_header + '_NeutralFitPlot.png'\n", - " return(fn)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Caitlin Written Functions" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### biome2data_tax" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "# Caitlin Guccione, 08-25-2021\n", - "\n", - "def biom2data_tax(datasetName, biomFilename, finalFilename):\n", - " '''Inputs: datasetName = filename of dataset ex. hutchKrakenAlex_biom -- found in set paths above\n", - " biomFilename = filename of biom file found in datasetName path above\n", - " finalFilename = filename to be used in _data.csv file for neuFit\n", - " Outputs: fnD = File location of [name]_data.csv\n", - " ![name]_data.csv = Creates a data.csv file to be input into Neufit located at 'neufit_input'\n", - " fnT = File location of [name]_taxonomy.csv\n", - " ![name]_taxonomy.csv = Creates a taxonomy.csv file to be input into Neufit located at 'neufit_input'\n", - " * Imports biom file -> pandas dataframe -> data.csv, taxonomy.csv as desired by neufit'''\n", - " \n", - " #Make filename and import data\n", - " fullFilename = datasetName + '/' + biomFilename\n", - " featureTable = load_table(fullFilename) #https://biom-format.org/documentation/generated/biom.load_table.html\n", - " \n", - " \n", - " '''\n", - " #Create file_header which holds the path / location for all _data and _csv outputs for this data\n", - " ###file_header = str(neufit_output_path) + \"/\" + str(dataset_type) + '/' + str(output_filename) + '/' + str(output_filename) + '_' + str(date) + \"_\" + str(h)\n", - " file_header = str(neufit_output_path) + \"/\" + str(datasetName) + '/' + str(finalFilename) + '/' + str(finalFilename) + '_' + str(date) + \"_\" + str(h)\n", - " \n", - " #Creates directory for all Neufit Outputs if it doesn't already exist \n", - " ###dir_name = str(neufit_output_path) + \"/\" + str(dataset_type) + '/' + str(output_filename)\n", - " dir_name = str(neufit_output_path) + \"/\" + str(datasetName) + '/' + str(finalFilename)\n", - " os.makedirs(dir_name, exist_ok=True)\n", - " '''\n", - " \n", - " #Create _data.csv\n", - " pandas_featureTable = pd.DataFrame(featureTable.matrix_data.toarray(), featureTable.ids('observation'), featureTable.ids())\n", - " fnD = neufit_input_path + '/' + finalFilename + '_data.csv'\n", - " pandas_featureTable.to_csv(fnD, sep='\\t')\n", - " \n", - " #Create _taxonomy.csv\n", - " pandas_TaxTable = pd.DataFrame(featureTable.metadata_to_dataframe('observation'))\n", - " pandas_TaxTable.set_axis(['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species'], axis=1)\n", - " fnT = neufit_input_path + '/' + finalFilename + '_taxonomy.csv'\n", - " pandas_TaxTable.to_csv(fnT, sep='\\t')\n", - " \n", - " '''\n", - " #Create Neufit string to run progam in terminal\n", - " #Can now run Neufit entirely from this notebook so don't need this command\n", - " py = 'python2 /Users/cguccione/Documents/Classes/Kit_Lab/Tools/neufit/neufit.py '\n", - " fnD = finalFilename + '_data.csv'\n", - " fnT = finalFilename + '_taxonomy.csv'\n", - " neufit_command = py + fnD + ' -t ' + fnT\n", - " \n", - " return(neufit_command)\n", - " Outputs: ![name]_data.csv = Creates a data.csv file to be input into Neufit located at 'neufit_input'\n", - " ![name]_taxonomy.csv = Creates a taxonomy.csv file to be input into Neufit located at 'neufit_input'\n", - " '''\n", - " \n", - " return(fnD, fnT)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### biom_addMetaTax_customTCGAehn" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def biom_addMetaTax_customTCGAehn(datasetName, biomFilename, finalFilename):\n", - " '''Inputs: datasetName = filename of dataset ex. hutchKrakenAlex_biom -- found in set paths above\n", - " biomFilename = filename of biom file found in datasetName path above\n", - " finalFilename = filename to be used in _data.csv file for neuFit -- in this case will be normal or not\n", - " Outputs: fnD_hn = File location of head_neck_[name]_data.csv\n", - " fnD_e = File location of esophgous_[name]_data.csv\n", - " !head_neck_[name]_data.csv = Creates a data.csv file to be input into Neufit located at 'neufit_input'\n", - " !esophgous_[name]_data.csv = Creates a data.csv file to be input into Neufit located at 'neufit_input'\n", - " fnT = File location of [name]_taxonomy.csv\n", - " ![name]_taxonomy.csv = Creates a taxonomy.csv file to be input into Neufit located at 'neufit_input'\n", - " * Imports biom file -> pandas dataframe -> splits apart hn from esoph -> 2 data.csv, 1 taxonomy.csv as desired by neufit\n", - " * This custom because we need to seperate esophgous from head and neck patients from specific dataset'''\n", - " \n", - " #Make filename and import all data into pandas df \n", - " fullFilename = datasetName + '/' + biomFilename\n", - " featureTable = load_table(fullFilename) #Loads biom file: https://biom-format.org/documentation/generated/biom.load_table.html\n", - " pandas_featureTable = pd.DataFrame(featureTable.matrix_data.toarray(), featureTable.ids('observation'), featureTable.ids()) #Turn biom file into pandas df \n", - " meta = pd.read_csv(tcgaEhnWGSgreg_meta, sep = '\\t') #Import the metadata into a pandas df\n", - " pandas_TaxTable = pd.read_csv(tcgaEhnWGSgreg_taxa) #Import the taxonomy into a pandas df\n", - " \n", - " #Custom : Seperate the Head and Neck Samples from Esophgous Samples in the biom file\n", - " ##In case Kit asks for it: biomDF has both hn and esoph samples combined\n", - " hn_df = pd.DataFrame() #Biom file with just the head and neck samples\n", - " e_df = pd.DataFrame() #Biom file with just the esophagus samples\n", - " \n", - " for sample in pandas_featureTable.columns.values: #Loop through all the samples in the biom file\n", - " \n", - " #Use the meta data to determine what cancer type the sample correlates to \n", - " cancerType = meta[meta['sample_name'] == sample].reset_index().loc[0, 'primary_site'] #Finds the sample name which matches our i, then finds the corresponding primary site in that row\n", - " \n", - " #Sort each cancer type into approriate dataframes\n", - " if cancerType == 'Head and Neck':\n", - " hn_df[sample] = pandas_featureTable.loc[:, sample]\n", - " elif cancerType == 'Esophagus':\n", - " e_df[sample] = pandas_featureTable.loc[:, sample]\n", - " else:#This should never print - just a saftey check \n", - " print(\"something is off in the dataset -- not just Head and Neck and Esophagus\")\n", - " \n", - " #Create _data.csv and _taxonomy.csv files for head & neck and esophgous\n", - " \n", - " #Head and Neck\n", - " fnD_hn = neufit_input_path + '/' + 'head_neck_' + finalFilename + '_data.csv'\n", - " fnT_hn = neufit_input_path + '/' + 'head_neck_' + finalFilename + '_taxonomy.csv'\n", - " pandas_TaxTable.to_csv(fnT_hn, sep='\\t')\n", - " pandas_featureTable.to_csv(fnD_hn, sep='\\t')\n", - " \n", - " #Esophgous\n", - " fnD_e = neufit_input_path + '/' + 'esophagus_' + finalFilename + '_data.csv'\n", - " fnT_e = neufit_input_path + '/' + 'esophagus_' + finalFilename + '_taxonomy.csv'\n", - " pandas_TaxTable.to_csv(fnT_e, sep='\\t')\n", - " pandas_featureTable.to_csv(fnD_e, sep='\\t')\n", - " \n", - " \n", - " return(fnD_hn, fnD_e, fnT_hn, fnT_e)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### non_neutral_outliers" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "def non_neutral_outliers(file_header, occurr_freqs, dataset_type, non_save, threshold = 0.5):\n", - " '''Inputs: file_header = file path for neufit outpus: The path+dataGroup name + date stamp to be used for all parts of Neufit run: ex. /home/cguccion/NeutralEvolutionModeling/neufit_output/hutchKrakenAlex_combined_2021-08-26_13:24:22 \n", - " occurr_freqs = pandas df that Neufit created in the orginal program but didn't specifcally output orginally\n", - " - Headers of csv: otu_id, mean_abundance, occurrence, Kingdom, Phylum, Class, Order, Family, Genus, Species, predicted_occurrence, lower_conf_int, upper_conf_int\n", - " - I used this occur_freqs df in order to figure out which species is the most non-neutral\n", - " - I believe this is what Neufit uses to physical plot the dots on the graph as well\n", - " threshold = 0.5; autoset to 0.5, but determines what bacteria are considered non-neutral\n", - " Outputs: ! [name]_NonNeutralOutliers.csv\n", - " *Creates the most Non-neutral csv file '''\n", - " \n", - " #Create dataframe\n", - " standoutMicrobes = pd.DataFrame(columns = ('Difference off Neutral Model', 'Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species'))\n", - " \n", - " #Loop and find most non-neutral microbes based upon threshold\n", - " row_count = 0\n", - " for i,j in occurr_freqs.iterrows():\n", - " diff = abs(j['occurrence'] - j['predicted_occurrence'])\n", - " if diff > threshold:\n", - " if dataset_type == 'TCGA_WGS':\n", - " standoutMicrobes.loc[row_count] = [diff, j['Domain'], j['Phylum'], j['Class'], j['Order'], j['Family'], j['Genus'], j['Species']]\n", - " else:\n", - " standoutMicrobes.loc[row_count] = [diff, j['Kingdom'], j['Phylum'], j['Class'], j['Order'], j['Family'], j['Genus'], j['Species']]\n", - " row_count +=1\n", - " standoutMicrobes = standoutMicrobes.sort_values(by =['Difference off Neutral Model'], ascending=False)\n", - " \n", - " #Display and export non-neutral microbes as csv\n", - " print(\"\\nTop NonNeutral Microbes\")\n", - " display(standoutMicrobes)\n", - " print('=========================================================\\n')\n", - " \n", - " fn = file_header + '_NonNeutral_Outliers.csv'\n", - " \n", - " if non_save == False:\n", - " standoutMicrobes.to_csv(fn, sep = '\\t')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true - }, - "source": [ - "##### custom_color_plot" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "hidden": true - }, - "source": [ - "If you want to add another custom color:\n", - "1. Create a new section with bacteria and Mean Abundance and Occurance lists\n", - "2. Add to the for loop at bottom of section as well\n", - "3. Add custum bacteria to plot" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "hidden": true - }, - "outputs": [], - "source": [ - "def custom_color_plot(occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header):\n", - " '''Inputs: file_header = file path for neufit outpus: The path+dataGroup name + date stamp to be used for all parts of Neufit run: ex. /home/cguccion/NeutralEvolutionModeling/neufit_output/hutchKrakenAlex_combined_2021-08-26_13:24:22 \n", - " occurr_freqs = pandas df that Neufit created in the orginal program but didn't specifcally output orginally\n", - " - Headers of csv: otu_id, mean_abundance, occurrence, Kingdom, Phylum, Class, Order, Family, Genus, Species, predicted_occurrence, lower_conf_int, upper_conf_int\n", - " - I used this occur_freqs df in order to figure out which species is the most non-neutral\n", - " - I believe this is what Neufit uses to physical plot the dots on the graph as well\n", - " n_reads = Number of reads (from orginal program)\n", - " n_samples = Number of smaples (from orginal program)\n", - " r_square = R^2 value (from orginal program)\n", - " beta_fit = stats on the preformance of the model (from orginal program)\n", - " Outputs: ! The plot that Neufit outputs + the colored dots - will print to the screen but not save when run\n", - " fn = default filename for neufit plot : _NeutralFitPlot_withNonNeutralColors.png\n", - " *Creates the plot for Neufit + with colored dots '''\n", - " \n", - " #Create orginal black plot\n", - " neufit_plot(occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header)\n", - " \n", - " #If you want to add a new custom color, follow the steps below on what to add where (more info above as well)\n", - " \n", - " # --------- STEP 1 ---------\n", - " # Create mean abundance and occurance list for bacteria \n", - "\n", - " #Phylori\n", - " MA_S_pylori = [] #Mean_Abundance, Species , phylori\n", - " O_S_pylori = [] #Occurrance\n", - "\n", - " #Proteobacteria\n", - " MA_P_Proteobacteria = []\n", - " O_P_Proteobacteria = []\n", - "\n", - " #Streptococcus\n", - " MA_G_Streptococcus = []\n", - " O_G_Streptococcus = []\n", - "\n", - " #Bacteroidetes\n", - " MA_P_Bacteroidetes = []\n", - " O_P_Bacteroidetes = []\n", + "norm_graph = True\n", + "colored_graph = True\n", + "non_neutral = True\n", + "output_filename = 'combined'\n", + "dataset_type = 'hutchKraken'\n", + "custom_filename = 'combined_biome'\n", "\n", - " # --------- STEP 2 ---------\n", - " # Add to customized lists when hitting bacteria in neutral list \n", + "#Convert data from biom to csv files for Neufit\n", + "if dataset_type == 'hutchKraken':\n", + " fnData, fnTaxonomy = biom2data_tax(hutchKrakenAlex_biom, custom_filename, output_filename)\n", + "elif dataset_type == 'TCGA_WGS':\n", + " #Determine which biom file for the run\n", + " if output_filename == 'normal':\n", + " biomFilename ='116640_feature-table-TCGA-WGS-STN-ESCA-HNSC.biom'\n", + " elif output_filename == 'cancer':\n", + " biomFilename ='116639_feature-table-TCGA-WGS-PT-ESCA-HNSC.biom'\n", "\n", - " #for i, j in neutral.iterrows():\n", - " for i,j in occurr_freqs.iterrows():\n", - " if j['Species'] == 's__pylori':\n", - " MA_S_pylori.append(j['mean_abundance'])\n", - " O_S_pylori.append(j['occurrence'])\n", - " if j['Phylum'] == 'p__Proteobacteria':\n", - " MA_P_Proteobacteria.append(j['mean_abundance'])\n", - " O_P_Proteobacteria.append(j['occurrence'])\n", - " if j['Genus'] == 'g__Streptococcus':\n", - " MA_G_Streptococcus.append(j['mean_abundance'])\n", - " O_G_Streptococcus.append(j['occurrence'])\n", - " if j['Phylum'] == 'p__Bacteroidetes':\n", - " MA_P_Bacteroidetes.append(j['mean_abundance'])\n", - " O_P_Bacteroidetes.append(j['occurrence'])\n", + " #Turn biome file into _data and _tax files\n", + " fnD_hn, fnD_e, fnT_hn, fnT_e = biom_addMetaTax_customTCGAehn(tcgaEhnWGSgreg_, biomFilename, output_filename)\n", "\n", - " # --------- STEP 3 ---------\n", - " # Override coloring of dots in current plot with new colors\n", + " #Choose the correct _data file, esoph or head and neck\n", + " if custom_filename == 'e':\n", + " fnData = fnD_e\n", + " fnTaxonomy = fnT_e\n", + " output_filename = 'esophagus_' + output_filename\n", + " elif custom_filename == 'hn':\n", + " fnData = fnD_hn\n", + " fnTaxonomy = fnT_hn\n", + " output_filename = 'headNeck_' + output_filename\n", "\n", - " pyplot.plot(MA_P_Proteobacteria, O_P_Proteobacteria, 'o', markersize=6, fillstyle='full', color='green', label=\"Proteobacteria\")\n", - " pyplot.plot(MA_G_Streptococcus, O_G_Streptococcus, 'o', markersize=6, fillstyle='full', color='orange', label=\"Streptococcus\")\n", - " pyplot.plot(MA_P_Bacteroidetes, O_P_Bacteroidetes, 'o', markersize=6, fillstyle='full', color='purple', label=\"Bacteroidetes\")\n", - " pyplot.plot(MA_S_pylori, O_S_pylori, 'o', markersize=6, fillstyle='full', color='blue', label='H.pylori')\n", + "#Run with all possible output files\n", + "neufit(fnData, fnTaxonomy, output_filename, dataset_type, custom_filename,\n", + " norm_graph, colored_graph, non_neutral, full_non_neutral = True)\n", "\n", - " \n", - " plot_fn = file_header + '_NeutralFitPlot_withNonNeutralColors.png'\n", - " \n", - " return(plot_fn)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true - }, - "source": [ - "##### save_plot" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "hidden": true - }, - "outputs": [], - "source": [ - "def save_plot(fn):\n", - " ##Just saves the plot given a filename\n", - " pyplot.legend(loc=\"center left\")\n", - " pyplot.savefig(fn)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### Neufit_Pipeline_Main" + "\n" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "def Neufit_Pipeline_Main(output_filename, dataset_type, custom_filename, norm_graph, colored_graph, non_neutral, non_save = False, full_non_neutral = False):\n", - " '''\n", - " Inputs: output_filename : the name which will be at the front of all the files; ex. 'combined'\n", - " dataset_type : the dataset type we have from here: ('hutchKraken', 'gregTCGA')\n", - " custom_filename : depedant on the dataset:\n", - " - hutchKraken : the name of the biom file ex.'combined_biome'\n", - " \n", - " norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring\n", - " colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring\n", - " non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file \n", - " \n", - " Default inputs to be changed mainly for testing purposes:\n", - " non_save : True/False, False = default, the following will not be SAVED just printed to the screen\n", - " - * This is intened for testing only! *\n", - " - norm_graph, colored_graph, non_neutral_csv \n", - " - **Took away this feature ** [It will still create [name]_data.csv and [name]_taxonomy.csv but will delete them after running]\n", - " full_non_neutral: True/False : Creates a csv file from orginal Neufit program with information about what is neutral and how far off the curve each point is\n", - " - csv will have: otu_id, mean_abundance, occurrence, Kingdom, Phylum, Class, Order, Family, Genus, Species, predicted_occurrence, lower_conf_int, upper_conf_int\n", - "\n", - " *The main function for the pipeline which calls all other functions*\n", - " \n", - " '''\n", - " \n", - " #Convert data from biom to csv files for Neufit\n", - " if dataset_type == 'hutchKraken':\n", - " fnData, fnTaxonomy = biom2data_tax(hutchKrakenAlex_biom, custom_filename, output_filename)\n", - " elif dataset_type == 'TCGA_WGS':\n", - " #Determine which biom file for the run\n", - " if output_filename == 'normal':\n", - " biomFilename ='116640_feature-table-TCGA-WGS-STN-ESCA-HNSC.biom'\n", - " elif output_filename == 'cancer':\n", - " biomFilename ='116639_feature-table-TCGA-WGS-PT-ESCA-HNSC.biom'\n", - " \n", - " #Turn biome file into _data and _tax files\n", - " fnD_hn, fnD_e, fnT_hn, fnT_e = biom_addMetaTax_customTCGAehn(tcgaEhnWGSgreg_, biomFilename, output_filename)\n", - " \n", - " #Choose the correct _data file, esoph or head and neck\n", - " if custom_filename == 'e':\n", - " fnData = fnD_e\n", - " fnTaxonomy = fnT_e\n", - " output_filename = 'esophagus_' + output_filename\n", - " elif custom_filename == 'hn':\n", - " fnData = fnD_hn\n", - " fnTaxonomy = fnT_hn\n", - " output_filename = 'headNeck_' + output_filename\n", - " \n", - " #Run Neufit\n", - " occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header = main_neufit(output_filename, dataset_type, fnData, fnTaxonomy, full_non_neutral)\n", - " \n", - " #Neufit Plotting and Non-neutral Outline\n", - " if norm_graph == True: #Neutral evolution graph, no color\n", - " nc_fn = neufit_plot(occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header)\n", - " if non_save == False:\n", - " save_plot(nc_fn)\n", - " if colored_graph == True:#Neutral evolution graph with colors\n", - " cc_fn = custom_color_plot(occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header)\n", - " if non_save == False:\n", - " save_plot(cc_fn)\n", - " if non_neutral == True:\n", - " non_neutral_outliers(file_header, occurr_freqs, dataset_type, non_save)\n", - " \n", - " #Easier to delete the Neufit text file then to not create it\n", - " if non_save == True:\n", - " neufit_fn= str(file_header) + \".txt\"\n", - " os.remove(neufit_fn)\n", - " \n", - " #Optional cleanup step with non_save to remove taxonomy and data files - these just overwrite themseleves so not usally a big issue\n", - " '''\n", - " if non_save == True: #Delete [name]_data.csv and [name]_taxonomy.csv to reduce cluter\n", - " os.remove(fnData)\n", - " os.remove(fnTaxonomy)\n", - " '''\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Choose Dataset + Run Pipeline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Hutch Kraken Dataset: Ludmil Alexandrov" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "heading_collapsed": true - }, - "source": [ - "##### Dataset Info" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "hidden": true - }, - "source": [ - "**Background on Data**\n", - "- This data was orginally in the form of Kraken data that we recived from a rotation student in Ludmill Alexandrov's lab \n", - " - We didn't have access to the fastq files, only the Kraken data due to a limited number of keys\n", - "- This data is originally from Fread Hutch and consistets of progressor and non-progressor patients in reference to EAC\n", - "\n", - "**Information on How to Run Data**\n", - "- This dataset comes with biome files with the taxonomic data already imported into the files\n", - " - This allows us to simply run **biom2data_tax** which is all set in the Neufit_Pipeline_Main Function\n", - "- This pipeline assumes everything from Kraken (including this dataset) is already in the **biome** format \n", - " - In the future we will not be using Kraken but will have the full fastq files \n", - " - The pipeline used previously to create the kraken-biome files uses: 'pip install krakenplot' which \n", - " didnt' work on the cluster (1 hour attempt) so wasn't worth implementation\n", - " - More details on how to convert files from Kraken to biom on the command line found on this github: https://github.com/cguccione/BEProgression2EAC/tree/main/PipelineTesting/KrakenBiome_Pipeline" - ] + "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Combined Data (Everything in the dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/cguccion/.conda/envs/neufit/lib/python3.5/site-packages/ipykernel_launcher.py:36: FutureWarning: set_axis currently defaults to operating inplace.\n", - "This will change in a future version of pandas, use inplace=True to avoid this warning.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running dataset: hutchKrakenCategory:combined\n", - "\n", - "[[Fit Statistics]]\n", - " # fitting method = leastsq\n", - " # function evals = 18\n", - " # data points = 5736\n", - " # variables = 1\n", - " chi-square = 15.3753558\n", - " reduced chi-square = 0.00268097\n", - " Akaike info crit = -33965.1662\n", - " Bayesian info crit = -33958.5117\n", - "[[Variables]]\n", - " N: 2108 (fixed)\n", - " m: 0.31794492 +/- 0.00825899 (2.60%) (init = 0.5)\n", - "\n", - " R^2 = 0.81\n", - "=========================================================\n", - "\n", - "Top NonNeutral Microbes\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Difference off Neutral ModelKingdomPhylumClassOrderFamilyGenusSpecies
20.955536k__Virusesp__Peploviricotac__Herviviriceteso__Herpesviralesf__Herpesviridaeg__Roseoloviruss__Human betaherpesvirus 6A
10.938053k__Virusesp__Peploviricotac__Herviviriceteso__Herpesviralesf__Herpesviridaeg__Roseoloviruss__Human betaherpesvirus 6B
30.741159k__Bacteriap__Proteobacteriac__Epsilonproteobacteriao__Campylobacteralesf__Helicobacteraceaeg__Helicobacters__pylori
40.638222k__Bacteriap__Proteobacteriac__Gammaproteobacteriao__Pasteurellalesf__Pasteurellaceaeg__Haemophiluss__parahaemolyticus
00.599833k__Bacteriap__Proteobacteriac__Gammaproteobacteriao__Pseudomonadalesf__Pseudomonadaceaeg__Pseudomonass__sp. J380
\n", - "
" - ], - "text/plain": [ - " Difference off Neutral Model Kingdom Phylum \\\n", - "2 0.955536 k__Viruses p__Peploviricota \n", - "1 0.938053 k__Viruses p__Peploviricota \n", - "3 0.741159 k__Bacteria p__Proteobacteria \n", - "4 0.638222 k__Bacteria p__Proteobacteria \n", - "0 0.599833 k__Bacteria p__Proteobacteria \n", - "\n", - " Class Order Family \\\n", - "2 c__Herviviricetes o__Herpesvirales f__Herpesviridae \n", - "1 c__Herviviricetes o__Herpesvirales f__Herpesviridae \n", - "3 c__Epsilonproteobacteria o__Campylobacterales f__Helicobacteraceae \n", - "4 c__Gammaproteobacteria o__Pasteurellales f__Pasteurellaceae \n", - "0 c__Gammaproteobacteria o__Pseudomonadales f__Pseudomonadaceae \n", - "\n", - " Genus Species \n", - "2 g__Roseolovirus s__Human betaherpesvirus 6A \n", - "1 g__Roseolovirus s__Human betaherpesvirus 6B \n", - "3 g__Helicobacter s__pylori \n", - "4 g__Haemophilus s__parahaemolyticus \n", - "0 g__Pseudomonas s__sp. J380 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=========================================================\n", - "\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEcCAYAAACS6SCjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzsnXlclNX+x9+HYVFAwS1LDdGyRQH31DTRppvZDbPSW4aVlVFwW2zxatJiC2VXb1ndwPBm6k+yxa5bWdfELZcWF1wrLQVcKldGARWYOb8/nplxlmfgAQZFOO/Xa17DnOd5zpwBZr5zzvl+Px8hpUShUCgUitpGwPkegEKhUCgUeqgApVAoFIpaiQpQCoVCoaiVqAClUCgUilqJClAKhUKhqJWoAKVQKBSKWokKUAqFQqGolagApVAoFIpaiaEAJYS4Tghxq8vj5kKIj4QQOUKIfwkhgmpuiAqFQqGojxidQf0TiHF5/DZgBr4DRgEv+XdYCoVCoajvGA1QVwIbAYQQocBtwBNSykeAfwB31szwFAqFQlFfMRqggoHT9p/7AoHAl/bHu4BL/DwuhUKhUNRzjAaon4Gb7D8nAuullCftj1sBx/w9MIVCoVDUbwINnvcy8JkQ4kEgArjV5dhNwGZ/D0yhUCgU9RtDAUpKuUgIcTXQFdgmpdzlcng9sLUmBqdQKBSK+otQflAKhUKhqI0YLtQVQsQJIT4RQvwmhDgjhOhmb08TQgyuuSEqFAqFoj5itFB3MFqa+cXAbMC1MPcM8Jj/h6ZQKBSK+ozRGdTrwEwpZTyQ5nEsB+ji11EpFAqFot5jNEBdBXxi/9lz0+oE0NRvI1IoFAqFAuMB6hDQ3sexTkC+f4ajUCgUCoWG0QD1MfCyEKKfS5sUQlwBjAOy/D4yhUKhUNRrDKWZCyFCgM+BwcAfaNJG+9GSJpYCt0kpS2twnAqFQqGoZ1SqDkoIYUZTMW+OJm+ULaX8pobGplAoFIp6jCrUVSgUCkWtxKfUkd1WwzBSyuLqD0ehUCgUCg2fMyghhA3vlHKfSClN/hqUQqFQKBTlicU+QCUClEKhUCgU/kTtQSkUCoWiVmLUDwoAIUQkEIOWZv47sF1KWVATA1MoFApF/cZoHVQgmgbf3wHX5IliIB1IVXVQCoVCofAnRmdQbwJJaM66/0WTProIuAN4HmgAPF4TA1QoFApF/cToDOo48IqU8k2dY08Dz0kpm9TA+BQKhUJRTzGqxWcDdvg4th2V7adQKBQKP2N0ie//gNHA/3SOPQTM8duIapDmzZvL6Ojo8z0MhUKhqHNs3LjxiJSyhT/7NBqg8oA7hBA7gEWc3YO6FWgE/EsIkWI/V0opM/w5SH8RHR3Nhg0bzvcwFAqFos4hhMjzd59GA9S/7Petgat1jrvuTUmgVgYohUKhUFw4GApQUkqje1UKhUKhUPiFcx54hBBthBDvCiHWCyGKhRBSCBFt8NoAIcSzQohcIcRpIcQWIcQdNTtihUKhUJwPKqskcSXaMl8Dz2NSyiUGu7kc+BuwEfgWuLESQ3gFeAZItV9/F/CZEOKWSjz/Oef06dPcdddd/PLLL4SEhNCyZUsyMjJo3779+R6aQqFQ1FoMBSghRCwwF23/SeicIgGjauarpZQt7f2OxmCAEkJchBacJkkpp9ibVwghLgcmAbU2QAEkJyczaNAgAP79738zevRoli9ffp5HpVCcf6TNRsnJk1hLS7G53qzWanTqv8qXZz+fyIfHPqM03EpQoYn7mw7n9TsmXjD9X8gYnUHNAEqBW4BfgZKqPqGU0lbFSwcBwXintM8BZggh2kkp91Z1XFVh+vTpJCUlOR83bNiQDh06MH78eEaMGOFsb9CggTM4AfTu3ZspU6ZQk+zbt48nn3ySb775BiklN9xwA1OnTiUqKqrCa9euXctLL71ETk4Op0+f5vLLL+fRRx/lgQcecJ6zf/9+3njjDTZs2MCWLVs4deoUe/fuRaXxK3xRcvIk+StWcHDdOo7u3Mnx3bs5dfgwp44d82tA8SebYmFeApQ20h6XNrLyYdHHnBz+MQBfmaEgAiItMDgbum3zX/+V7asuYjRAXQ3cIaXUq4M6V3QCzqAFSFccBcQdgXMaoHJycggJCWHlypUAHD58mBdeeIHExERat25N//79da979913ufXWW2tsXMXFxVx//fWEhIQwa9YshBA899xzDBw4kK1btxIWFubz2q1bt3LDDTfQu3dvpk+fTmhoKPPmzePBBx/kzJkzJCcnA/Drr7/y6aef0r17d6677jqWLl1aY69HceEipeTA2rX8MHkye7/8ElmdWZEOm2KrHyTK4yszlAa7t5UGw4KboCzo7LGCSC3QQOWe31f/X5n9+zouVIwGqB+Air961yxNgQLprc10zOW4F0KIJDQdQUOzh8qQk5PDVVddRe/evZ1tl1xyCT179mTJkiW6Aer1119n165dZGdn+3UsrkyfPp09e/bwyy+/cPnllwMQFxdHhw4deP/993nqqad8Xvvxxx9jtVpZvHgx4eHhAPzlL39hy5YtzJ492xmg+vfvz59//gnAf/7zHxWgFF78sWkTX44YwfFdu2qkf+fso5pBojwKIvTbT4XitdlRlcDiq39f7bWZli1bXuzvPo1m8SUBSUKIRCFEKyFEqOfN3wPTQaAvqaS3J+ZESpkppewhpezRooX/ipyllGzdupVOnTq5tbds2RKAwEDv2D9lyhQ+//xzvvrqK0JDa+5XtmjRInr37u0MTgDt2rWjb9++LFy4sNxrS0pKCAoKomHDhm7tkZGR2GxnV2cDAlTlgcI322fNYm6fPjUWnKD82Ye/iLRU7vzKBhZf/Vf2eWsDkZGRF/m7T6OfMkeAXGA2sA84qXOraY4BTYQQngGpicvxc8bu3bspLCykY8eObu0rV65ECMHQoUPd2t98803mzp3LN998Q2RkpM9+pZSUlZVVeLOWs1SyY8cOYmJivNo7derEzp07y31do0aNAuDxxx/n4MGDFBQUMH36dLKzs3nyySfLvVahsNlsrB4/nq9HjcJaUrWt6uAmTZBNm3LcZKK4nPMqM/s4iSZ/cwio6LM/NCqKsOhowqKjuWmFIMjjZQSVQKiPgfkKLA0uvtjZZ2Djxs72wdl49R9YAjd9H05YdDR4fdydJbhJE2efIc2bG35NASEhPs8zhYU5z2vYqlW5fbq+puDmzakJ81ujS3xzgD7AFKqZJFENdgAhwGW470M5IkT5n7x+JicnB4CrrrqKsrIyioqK+Oabb5gwYQLvvvsuPXr0cJ67f/9+nn76adq3b8/AgQMBbYalJ7u0atUq5znlER8f79z78uTYsWM0aeItLt+0aVOOHz9ebr8xMTGsXLmS2267jfT0dACCgoKYNm0ad911V4XjUtRfig4dYnaXLhT9/rvPc4KbNaP5tdfy+5IlEBBAo8suI6JDB8JatSK0RQsCGzZkZ0AA419+mVNWK63RNNWOxcJBM5RGQJAFLskGUzFY9bZTJbwbC023acsr8QMG8Pn69ew7cwbQvtF2CAri3vvuo3evXl6Xtxs6lAD7Ckj/lSsJWf42Hwet5kwjGyEnBK2ywSol+xJAuszgAkrgVusABk5P9OqzVXw8De0rOEe2bMGyezcAA4HgrVln+z8ZwMio+5myREui2jN/vs99u2ZxcURecQUAJ/PzOfTDDz5/766v6cDKlZw+ckT3vPBLL6Wl/XdScuIE+8pZund9TSfz8nj22mt/83lyFTEaoAYCD0kpP/L3ACrB12iBMRF4yaV9JJqz7zlNkNi8eTMAw4YNc2ufPHkyf//7393a2rRpY/jbRffu3fnxxx8rPK9Ro0blHveeaGJoDLt37+aOO+6gU6dOTJs2jYYNG7Jw4UIeeeQRGjRoQGKi95tPoThz4gRzunf3GZxEYCCdnn+e1kOG0DA8HJ55hkt79iTEYykZ4N7oaE6dOgXAAeBALJCAlsMLlEZCfgKax4IeAdrxfCC5XzJvpafTIyuL1NRU8vPzaRwVxeNpaYb+lyOHDmXm0KHMBFIyUsgsymTvbVZtGrYZuBKIAFORiaT2SaQnp1fcZ3w8xMc7H89kNDN9nNvt/vsr7A+0JfhL4+KMneuxulNOp1w0erTh5y8uLi4y1rFxjAaoXCh3tl0phBCOT/Xu9vvBQojDwGEp5Sr7OWXALCnlgwBSykNCiLeAZ4UQJ4FNwJ3A9WiiteeUnJwcmjVrxtdff42UktzcXJ555hlSU1O5++67aVXB9NgX4eHhdOnSpcLz9AKQgyZNmnDsmPeK5/Hjx3VnVq5MmDCBoKAgvvjiC4KCggAwm80cPXqUJ554ghEjRqj9J4UbUkr+e8stnNy/3/c5ZWUENWxIu8sv15JvXPZHPcnPz3dvMOMMTk6CKd/kJxgYChkBGWSOzSSpfRK5ubnlv5BySMlIIeNABoTbGyKBbpDcOtlQUFJUDaOfNGOBVKOSRAb4zH57xP443f7YdWZkwrv4NxV4FXgCzfqjL/A3KeViP43LMDk5OfTo0YMePXrQs2dPhg8fTnp6OiUlJcydO7fK/a5atYqgoKAKb2az753gTp06sWOHt33Xzp07vfbMPNm2bRudO3d2BicH11xzDUePHuXQoUNVe2GKOssPb7zBgW+/9Xk8IDiY3u+8w8Cnn3ZmhpaHV7ZtVTPaTIAAa7iVjAMZpGSkVHiJJ1lZWURHR5OxKwOCPA4GQeaezCoOTmEEozOol9DSzHcJIXKBAs8TpJTXGH1SKWW5mXe+zpFSWtEC1KtGn6sm+PPPP/njjz/cClcBBg8ezEUXXcT8+fN5+umnq9S3P5b4hgwZwjPPPMOePXucckq5ubmsXbuWSZMmldvvxRdfTE5ODiUlJQQHn/3a+v3339OgQQOaNtXN5lfUU47/+itrnnvO5/ESYGFEBC2bNqWfwZl3WloaSUlJFBfbF20saDMWT06hBQ3PwKGHPZikY3y2k5WVdXYcPoKkNcy/dV0Kd4wGqO32m4Kz+0+uiRCgpV4nJCTw4YcfcvjwYaqS1t6oUSOvfivLQw89xL///W9uvfVWXn31VYQQPP/881x66aU8/PDDzvNWrVqF2WxmxowZ3HvvvQA8+uijDB8+nISEBFJSUmjYsCGLFi1i7ty5PPnkk25Ba968eQBs3LgRgK+++ooWLVrQokUL4l3W2BV1Eykli++80+cmvhWYBew6fNipuGJk38dxjmPPKPS7UIquL3Jf5iuF5GitJi8jNwMaUkHBSeWDSWpqaoVB0lRkVOFNURWM2m0Y26mrJzgy+PQCydChQ/nggw/48ssvnSnb55qwsDCWL1/Ok08+yT333IOUErPZzNSpU92WWKSUWK1Wt/qmYcOGsWTJEt544w1Gjx7N6dOnueyyy3jvvffcghvA8OHD3R6npGhLKOVlGCrqDtJq5bTOXqcVbXXtS8BRBVVcXExqaqrhJJvExES3c1MyUsjck4k1zOqVkJBOOjc8dQPZMlub6djQVQatbDBx2wvLxi1RA4BSSGqfhKLmEDWRu15b6dGjh1SOugpF9cnKymLWE09w49GjXsc+Ryuc9NQkIxZMg0y6Qaa6Y3FbEvTI+gO0GVclExqio6PJy3MxiY1FS9ioZNZefUEIsVFKWb3lH88+jQYoe4LESOAK9O02/ubPgdUEKkApFNUnKyuL5NGjefT0aS99sYPAVHQS7PwUNPTwCiSO56tmMPEKfEBoaCiZmZmq3EKH8xaghBDdgVVoKhJXAFvRJtPRwH7gVynl9f4cWE2gApRCUX3at23LyPx8Gusc+xTQTfEZg/4eTqGJssll1RpPQECAbo2fEMJt+boqZLnUT0VFRZFmsH6qPlITAcpomvlktJl7DNpW5INSyvZAP7QvS//056AUCkXtpamP4AQQr1Of16xZsxrJgluTlcL+9wIp+z/J3qkw4lr34/4Qh05MTCQ3NxebzUZubq4KTucYowGqC/ARZ2u3GwBIKdehpaCXn7usUCjqBFJKbjZpyQabYiFtDIx9UbvfFAtRDz9M27ZtEULQtm1b5syZw5EjR3wmKFQ1C25NVgpdSzJo08RKgIDoFjB99NkgFRoaSlpaWpX6VtQejAYoCZTYrS4OAW1dju0DOvh7YAqFovaRt2wZja1Wp9VFQSQgtPvPEmBnLLozjqT2SZrlqSvVyIKLLsgkzEPzNCwEXvsbtG3bVu0T1RGMBqidaCKtAOuBJ4UQHYQQbYF/AH4XCVQoFLWP7+yzEj2ri7JgmL53uu516cnpJLdOxlRoAqntPVUnQaJVpP7SYFQzamwpLiUjhcCxgYiJgsCxgVVSplBUDqOFupmcnTVNAJYCP9sfFwHD9C5SKBR1h5P797N/9WrAt9VFeXtK6cnplVJyKI+DBSbaNPF+roMWE2388gzueGrxOeSTyEClmtcghmZQUsr/k1K+av/5JzQL+MHAbcDlUkplp6pQ1CEcGnQBAQFER0eTlZXFL599BvZsOV++R657SjU548iNTKLojHtb0RmtvSbI3JOptPjOA1WSpZZSFkopl0opF0kplXqoQlGHcNT/5OXlIaUkLy+Pe+65h08XLXKeo2e057qn5JhxWMOt1RZs1aNfYjqbg5PZf9yEzQb7j5vYHJxMv8Samc34mhkqLb6axVCAEkLcIYR40OVxOyHEOiFEgRDicyGEb4tYhUJxQeGmQWdHSkmBi3xVt20wbDGEWYTunlJFMw69GVpl6ZeYTpu/lxEwUtLm72X0S0z3OWtzpKTbsgT73wtkTVblAqW/sxAVxjA6g3oO3Eof3gWao6WXdwNUPqdCUUdwaNDFEssYxvAiL/IUY4gm1u28btvgf93/jxNPn6BscpnbXkx5Mw69GVpSUpLPIGU0mPmatb32aie3lPQ2Tax0LcmoVJDydxaiwhhGA1R7YBuAECICuBF4Uko5Cc2jKaFmhqdQKM41UVFRxBJLAglEEolA0JhISkmg1CVIicBASo4f1/V4Km/G4TVDi4XipGJG7h7ptVdVmWCmN2uL/SmW4Kk3MuWBF5n6xBi2rdXGHxaipaobxd9ZiApjVGYPyqElEo8mWLzM/ng/UHlfCYVCUStJS0vDjJlgHRvbUs4aZcqyMhoGBuq6O5c343BTCXdo9NnrqTz3qvSWGx3K6J54ztpit8aSsDiBoqNa55YjkSz+T4IzSLWKqNz+UXpyOmWTy5ATpdeMUVEzGA1QW4BEIUQYMBpYIaV05NBEoRXvKhSKOkBiYiIRPrSJpEt7QIMGxN51l/Ox6/5P5p5MOpZ21J1xuEkQ6dm5u+xVedm/29Fr95y1mbPNBHsUa5WWBJP9qRZkD1rU/lFtx2iAmoCWUn4CbQblas0+FPjez+NSKBTnkZMBJ3XbBWfzy1sNGkRYpJYfpbf/szNoJ0ntk7xmHGlpaYSGhmqdVFBP5UtPT6/dc9YWYdHv3HIkokZT0hX+w2gd1Bq0mdI1QFsppWtAmoGWRKFQKC5APJMQUlJSyCabEjzzyEsIItv56Oq773b+XJk6ocTERDIzM2nbti1UUE/lFszs+NLZ89wnsjTS7zysiaVGU9IV/sPwHpSU8qSUcqOUssCjfYmUcpev6xQKRe1FLwlh2rRpbLFtYTGLsVIASAQFBLOYIC1XiqCICK5KOJsbVdk6IYdKePIVyeVmx7kGM4cAbXk6e677RFeMvIJSj85LKSXsritUcLpAUI66CkU9Rtfsz04I2lq+3k5Nu8RE7pgzx/k4cGygtrzngRG/p/Ls3KtLRkoGezL3EGYNo8hURPuk9iSnJ/ulb4U7NeEHZVSLT6FQ1EF8JSEAnEFzyL3Uo71VQgJd//53t7ak9kmaNp3rMp/BOiF/avR5kpyeTA11rTgHVEnqqDoIIS4VQswTQliEECeEEP8VQhhyFhNCRAkhZgkh8oUQxUKIXUKIV+3ZhQqFopL4SkIQQhCJd3ACuDw5maiePd3aVJ2QoiY4pzMoIUQosBzty9l9aLVVrwIrhBBxUsqicq4NQ6u9CgKeB/KBnmirEB2AO2t29ApF3SMtLY2kpCS3WqPQ0FDuu+ce9n3yCRS4bTkT3qEDza64gsBA74+OmpwJKeon53oG9RCaKsVQKeUCKeVCYAialcfDFVzbFy0QPSylnCWlXCGl/CfwNnCHPfgpFIpy8NSqW3tirW4SwpM33cTAIv3viyFnzui2KxT+xvAMSggxDLgdaIPd8t0VKeU1BroZAnwnpfzV5bq9Qoi1wK3Am+Vc66i4O+HRXoAWaL3L2RUKhRNdT6P9Gew6tYvc3Fy3c79ITMRW6pleB8X5+TRr4z/HpZpMkFBc+BhVM58IfIrmA7UP2KFzM0InYLtO+w6gYwXXLgN2A28IIToKIcKFENcDTwDTylseVCgUPmqVgiFbZrtp29nKytizeLFuHxf170/Dxo11j1WWqlpyKGfb+oPRJb4HgUlSyq5SykQp5f2eN4P9NAWO67QfA5qUd6GU8jTQzz7mHcBJIBv4AnjU4PMrFPUWn95FEbhp2x1Yu5aSk/pKEpcNGeK38VTFBLCmfaYUtQujAaoRuJSQVw+9wqsKl+eEEA2AT4CLgHvQJJfGoiVHvFfOdUlCiA1CiA2HDx+u2ogVijqAT+8ii3u6+a8LF+qfFxDAVbfd5rfxVMUEUDnb1i+M7kF9DNxE9YPUcbRZlCdN0J9ZufIgMADNYv43e9tqIYQFyBRCTJNSbvG8SEqZCWSCVqhb1YErFBc6A6wDyC7JdhdnLQGyz6abp2Sk8JHMwPKiZus+OFvzfQKIjIkhslUrv43HVGTSL+4txwRQOdvWL4zOoLLRMuU+FELcLYS42fNmsJ8daPtQnnQEdlZwbSxw3CU4OfjBfn+1wTEoFPWOrKws1r+/HhaDXb1Iu18Mob9p2nYpGSlMO5CBxW59URAJ8xJgk90CqrXZrGutUdlxOHT/GqxpgJfcX0XFvacq2a64oDE6g/rEfh+NVr/kiURfEcWTRcAUIUR7KeUeACFENFoK+fgKrv0DaCKEuNw1CxDoZb8/YOD5FYp6idNXaRt261ENk8lE5ixN2+6+sfchPbwHS4PhKzP8JbQXVw0bVq0xOHT/HDVXRd8VEXAqANsgGzTUzhFl5QdAIQRSZ5eguoFTUTsxpMUnhGhb0TlSSn1BL/d+wtC8pU6hKaBL4BW0Pa44KWWhy/P9BrwspXzZ3hYNbEULVGlohbo90Ip2dwHXSClt5T2/0uJT1FcCAgLQe68LIbDZtLeNmCj0d4MlrL5sNn0TEwkIqHrppK7un8Ow0HXZsRSfKhTljVFOVCv455PzpsVnJPgY7KfInhr+FvB/aP9q2cAYR3CyI9BmZAEu1+YKIXoDE9HUJ5qjpbxnAmkVBSeFoj4TFRWlKwrrKnXka08owgIX9+xZreAEPnT/yjEs1FOlMBWZ6LinI+ZsMxGWCCwRFrLN2exsX9EOgeJCxGeAEkKESimLHT9X1JHjXAPn5QN3VHBOLjrfk6SUO4G/GXkehUKhkZKRwr479mnrFBa0r4TbvH2V7m/wV2aWLKLMJWAElcDgHxoR/IB30W5l0Q2SFRgWejJ8+3DaL2tPsH2QkZZIEhYl0OkGva1txYVOeV+JTgohHOoQhWh1R+XdFApFLcNRN2RrbNO+8kUCCRDWO8zLV+nu4ssZvhgi7UkUkQUwbDF0X3eSYGv1s+T0zAcrMiz0pM03bZzByUFwWTBtvvGfuoWi9lDeEt8DaPtAjp/VAq9CcYGRuSfTKW3kJBhO9zvtZfq3d8kSuv18Nq3ceXrTprSMi6v2WBzPl5qaSn5+PlFRUVwuLie7NNuwTUeYVd+4wFe74sLGZ4CSUs5y+XnmORmNQqHwK0brhk7k53Ps5591z71kwIBy95+M6umtyUohviCTPWlWDhaYyI28mX6J6ZXS4ysyFRFu9Yy4WntlxqK4MDjnflAKheLc4WupzLM993//89nH9JUrnVp9rnVM0dHR3PDUDYakh9ZkpdC1JIM2TawECGjTxErXkgzWZKW42bSXTS4rN6C0T2qva+PePqm9kkGqg6gApVDUYZLaJ4FnfoPOEtrer7/22cf3x46RlJRESkoKSUlJ5OXlIaUkLy+PbJltSHoouiCTsBD308JCtPbKiL8mpyfTOrk1haZCJJJCUyGtk1uTnJ6sZJDqIIbqoOoKqg5KUR8ZMXgEF6+9mIiTEVgaWfij7x/M/Wqu87i1tJR/N21KaWGh17WHgMn2n00mE1bPZIkXMVSXZMsSBOicZ5Ng+gmvPaiquPGqGqnzS03UQakZlEJRh8lIyeCyry8j8mQkAkHkyUgu+/oyMlIyAG3PpsGEhox5upC0MWdljUBL3c1x6csrOIHPLDwsuFl4HCzQX2rML8Fvsx6jy5mKCwejflDqL6xQXEBkZWUR3iecXXN2EeQRAYIIYtdHuxD/EGQczKDMvmdTEAmf3AovjoWxL8IbY+Abl4BlMul8DGTjradnF6B1tfDIjUyiyMOIt+gMTDimP/6qiL8aXc5UXDgY1eI7IISYDXwopfypJgd0riktLWX//v2cPn36fA9FYYAGDRrQpk0bgoI8v3YrHGRlZXHv5Hux/dVGxHf6lbARlgjQycy2BUKx/VPhtL1mCjRB2fvuu49Zs2Y5tfQc7cWLizVFiAjcCoHzxVnliH6J6azJgujjmbSKsHLQYiI3MolP/8iESiqa+yI9OR0yUFl8dQijAep9NA+mp4UQG4APgI+llJ726xcc+/fvp1GjRkRHRyvByVqOlJKjR4+yf/9+2rVrd76HU2tJTU3FdpsNgsESYSHSEul1jiXC19qcB8GAGTLHaUW9ffv2datjSktLIzU1lbypmkJELLGYMRNBBBZpISMlg+T0ZEALUtjli9oAq7KyaLBmNkXXF3lp8VV11pOenK4rkaS4MDG0xCelfFFK2R74C/AL8CbwuxAiSwhxQ00OsKY5ffo0zZo1U8HpAkAIQbNmzdRstwLy8/OdEkLZ5mxKgtzX4EqCSsg2V8LaLeJskW1iYiK5ubnYbDZyc3NJTEx0KkTEEksCCURi3+8ikgMZB5z7Xa44lM2LvityswAJOBFQpQQJRd3E6AwJ1UM8AAAgAElEQVQKACnlcmC5ECIFTRMvBfifEGIfMBPIlFIe9PsoaxgVnC4c1N+qYqKiosiz5EEkbIvTZCE8xVUd7UaoaLnNEbw2jNxAsIfyaxBB7Mncg+ekxmn/AW4WIJe2vZT0XBWcFBqVClAu9AD6A1ehOeF+C4wG/iGESJJSzvHT+BQKRSVJS0tz7kERrAWpbXHboMx+gsu7PqAMGpyB4lBoWAwlIWB1/VQwuNyWmJjI7pG7dY/pyRBd2zqPlU9CVHPIPwITPoW563wonivqLYbTzIUQbYUQLwohfkPbBr0ETaOvlZTyHqAt2l7V5HK6ueDxrKR3TaWtzZSVlZX7WFF3SExMZPbY2YQtD3NfOmuVTHKrZEyFJpBgKoA7F8JLk2HyS/DyZPjbwrNisaZCU7nLba4FtmKcwNJYf1/LIUPkYE1WCtNHQ3QLCBDa/fTRMOJad/sPhcJomvlyNOHY+9F8nNpLKQdJKT+VUpYASCmtwEdAy5oa7PnGsW7uWkmflJRU7SD15ptvEhMTQ0xMDFOnTgVg9uzZxMXF0blzZ+655x4A/vzzT2677TY6d+5M586dWbduHbm5ucTExDj7mjJlChMnTgRgwIABTJgwgfj4eN5++21GjRrFU089xcCBAxk3bhxFRUU88MAD9OzZk65du7Jw4UIAZs6cye23385NN91Ehw4d+Mc//uHs/+uvv6Zbt2507twZs9kM4LMfxfkjMTGRwvWFyLckcqLE+i+rlkDgIiu0OnKilzBst22QOhVWh7xWruyQp6wQoZB9g/d+l0OGyBVfqhKv/Q03+w+FwugS3xHgZuAbWb70RA5QZ9Or3NbN7RQXF5OamuqlDG2UjRs38uGHH/L9998jpaRXr1707NmTtLQ01q5dS/PmzTl2TCsWefzxx4mPj2f+/PlYrVYKCws5fvx4uf0XFBSwatUqAEaNGsWuXbtYtmwZJpOJCRMmcP311zNjxgwKCgq45ppruOEGLeclJyeHzZs3ExISwpVXXsljjz1GgwYNeOihh1i9ejXt2rVzjistLU23n7AwpTBdW9ATUe379Waf5z/+zjtsSU11Zup5/n/rqaR77Xc1snDFyCucWXwOWkXq1zhFNYPoKr6PFHUTo466hkwCpZSlgF/cd2sjvtbHq7NuvmbNGm677Tbnh/ntt9/Ohg0bGDZsGM2bNwegadOmACxfvpzZs2cDWtFkREREhQHqzjvvdHs8fPhwZ8Hl0qVLWbRoEVOmTAG0jEbHazGbzUREaKlgHTt2JC8vj+PHj9O/f39nirdjXL76ufrqq6v8e1H4D8dsxxFQHCKqxVFXE/Od+7kSzdwt548/AJyrBHA2GSIjJYPHPnpMN+nCud9l70xPYuhggYk2TbyD1EGLCeXqpHDF6BLf40KIST6OvS6EeNS/w6qd+Fofr866ud6EVAhhOFstMDAQm+2s271nCrbnLMb1sZSSzz//nJycHHJyctyCSkjI2TUYk8lEWVkZUkrdcZXXj+L840tE9dM23vYaB4KD8XyjO1YJQAtOBzIOEGmxp5JbIklYnEDs1livvnxl//lSlciNVIoPCneMJkmkAL/6OLbLfrzOo+cI6mmbXVn69+/PggULKC4upqioiPnz59O9e3c+/fRTjh49CuBcSjObzWRkaDUlVquVEydO0LJlSw4dOsTRo0c5c+YMX3zxheHnHjRoEO+++64zSG7e7HvJB6BPnz6sWrWKvXv3uo2rsv0ozi2+ZINONfL+crS9pMRLLQjOrhLsydzjJZ0UXBqMOdvsfkE52X/9EtPZHJzM/uMmbDbYf9zE5uBkeyGvQnEWo3tQbfEdoPYC0X4ZTS1HzxFUb32+MnTr1o1Ro0ZxzTXXADB69GhntX58fDwmk4muXbsyc+ZM3n77bZKSkvjggw8wmUxkZGTQp08fXnjhBXr16kW7du246qqrDD/3888/z5gxY4iLi0NKSXR0dLkBrkWLFmRmZnL77bdjs9m46KKL+Oabbyrdj+LcYioyackMHkTqJN391D0Yrivxki1yrBL4cq6NsEQgTglkA2lIYshTVUIt7Sn0MGS3IYQ4CLwkpXxf59jD9mMX18D4/Iqe3cZPP/2klqMuMNTfrHI496BcJj5BJTBssbu9+6ZY+CwBylxrbUvAtMTErGdmkZiYyNjAsbqOtoWmQiaX1ekKE0UFnE+7jcXARCGE20KzECIGzRFG5RUrFLWU9OR0klufrX9qeEJ4BSeAr8wewQkgGOT1krUn1hI4NpD/3fo/SgIrTiVXKPyB0SW+Z4Frgc1CiM3A72iFul2B7cD4mhmeQqHwhWvqOKe05BpfS2yuIqofx8ezf9tqr/4K9IXPsTWyObMAt8Vto08IdFhqpuhoBGFNLITE/0Fy+qs18hoV9RujaebHhBA9gfuAgUAztMLdTGC2lPJMede7IoS4FHgLTXhWAMuAMVJKQ7naQoirgZft4wgD8oF0KeXbRsegUFzoeKaOEwoSbbnekUZOBl77QGcsFg6sXavbZ6RF84TywoZzeXBEOLx58zbCbjk7/So6A2uymqgkB4XfMSx1JKU8LaV8X0p5l5TyL/b76ZUMTqHAcjQNv/vQLDw6ACuEEBVWdQohegDfAyFo2n83A/8ClKGiol6hmzruShC8/+v7XrJcecuWIfWccYHBW5vrGv65fkq81hzCPD41wkI0dQgHa7JS2P9eILYswf73AlmTVS+SfBU1QKXFYoUQgYDnSjVSymKd0z15CGgPXCml/NXe31ZgN/Awmo2Hr+cNAGYB2VLK21wOrTA+eoWibmDEcdbWyEbebXmQDXnbtILbf9qzRV3ZFKvtPxVEHNGWCsvclwoz92Q6swCjfHxitIrQjq/JSqFrSQZhTbT2Nk2sNDmTwZos1AxLUWkMBSghRGPgNeB24CK0pTlPjMxihgDfOYITgJRyrxBiLXAr5QQoYADQEXjEyJgVirqMr9RxNwTg4opbvK2YuQXfsmOMtt/UsBjKTFAawtl3dCjIUklyy2T6NtbKHayNrVofwZBfBtE6MzeHCkR0QaYzODkIC9GcdL08NxSKCjC6xPc+2pLcJ0Aymoq5580IndCSKjzZgRZ8yqOf/b6BEOI7IUSpEOKQEOIdIURDg89fKzGZTHTp0oWYmBiGDx/upfdXEa+99lq1nj883DttuLIUFBSQnl61D6Cbb76ZgoKCao+hPpHUPsl7Oc4XwcBNwFhYe6tV22cScCoMShvg/XUzCNYsWsOGkRsYlTeKMdvGEDs7FgpgwhEo8oiLrioQvnT2HDMshaIyGA1Qg4AnpZRP2vedZnneDPbTFM0/ypNjQBOddlda2e8/AZaiJVn8E20v6iODz19tsrZlET01moCXAoieGk3WturbbTRs2JCcnBy2b99OcHAw06ZNczsupXSTM/KkugHKH1QlQDle15IlS4iM1NudV/jCM3WcYhCnBPgqawxFSykyoKAVuzWWhGx3Z9yE/QnETo1l3btt2RzgWwXiYIH+QspBi9omVlQeowGqCNjvp+fUewsZEZ5zjHWOlPIFKeVKKeUU4CVgqBBCdwYmhEgSQmwQQmw4fPhwFYeskbUti6TFSeRZ8pBI8ix5JC1O8kuQcnDdddfx66+/kpuby9VXX01KSgrdunVj3759zJ07l9jYWGJiYhg3bhwA48eP59SpU3Tp0sWpaDFnzhyuueYaunTpwsMPP4zVvimud72Dp59+mm7dumE2m3H8nqZPn07Pnj3p3Lkzd9xxh3Nmp2f7MX78eH777Te6dOnC2LFjAZg8eTI9e/YkLi6OF198EUD3dUVHR3PkyBEAhg4dSvfu3enUqROZmZkofONqnSHfkNgm2Xy731bCiHjIcjPBpe7bzMEEY8ZMfn4+/RLTafP3MgJGStr8vcxtb2n28Ssp8vguVWTT2hWKymI0QP0LSLEnKlSH42izKE+aoD+zcuWo/f4bj/al9vsuehdJKTOllD2klD1atGhheKB6pGanUlzqYbdRWkxqdmq1+nVQVlbGV199RWysVg/9yy+/cO+997J582aCgoIYN24cy5cvJycnhx9//JEFCxYwadIk5wwsKyuLn376iU8++YS1a9eSk5ODyWQiKyuLgwcP6l4Pmp9Tt27d2LRpE/Hx8bz00kuApqz+448/smXLFq6++mo++OAD4Kztx5YtW9i0aROdOnVi0qRJXHbZZeTk5DB58mSWLl3K7t27+eGHH8jJyWHjxo2sXr3a63W1bdvW7XcwY8YMNm7cyIYNG3jnnXeceoT1HVdzwMCxgaRk6GfG6S79VSwW42REOAT5KIiKIKJCYeQXLL/w0J+QWwo2qd0/9KfWrlBUFqNZfK2BzsAvQogVaD6drkgp5Tjvy7zYgbYP5UlHYKeBa8H77eb4buh7DcxP5Ft82G34aDeKYwYE2gzqwQcf5ODBg7Rt25bevXsD8OOPPzJgwAAcQTYxMZHVq1czdOhQt76ys7PZuHEjPXv2dPZ90UUXlXt9QECA05Zj5MiR3H777QBs376d5557joKCAgoLCxk0aBBgzPZj6dKlLF26lK5duwJQWFjI7t27iYqKcntdnrzzzjvMnz8fgH379rF7926aNWtW1V9tncCXXYZenVN6cjpk4Ob9ZDPZkA3Lj1KxW2MxZ5uJtEQgAmxIm/dMzIKlQmFka5iVuYUwt9DjgIGsQ4XCE6MBahhaAAhE2/vxRAJGAtQiYIoQor2Ucg+AECIa6EvFahRfAWfQtntdlUgH2e83eF3hZ6IiosizeNtdRUVUz6baMQPyxNMawwhSSu677z5ef/11t3bHbMkIDkuNUaNGsWDBAjp37szMmTNZuXKl4T6klDz77LM8/PDDbu25ubk+jQxXrlzJsmXLWL9+PaGhoQwYMMDLPqQ+omcOSJDWnq6TGeeqGgF2xYn9GVi9ikM0YrfGkrAogWC7zpEWnCSu64JBwSWI60SFwsi+sgt9Lj0qFOVgaMlOStmugptRIa7pQC6wUAhxqxBiCJqO3z60TEEAhBBthRBlQogXXMZwFHgdeEQI8ZoQ4gYhxHjgBWCWa+p6TZFmTiM0yMNuIyiUNHPN21T36tWLVatWceTIEaxWK3PnziU+Ph6AoKAgSku1dR2z2cy8efM4dOgQoFli5OXllXu9zWZj3rx5AHz00Uf066clTJ48eZJLLrmE0tJSN1t7PduPRo0acfLkSec5gwYNYsaMGRQWal+lDxw44ByTLywWC02aNCE0NJSff/6Z7777rtzz6wu+ap6M1EIBvPfIe9y1Ihzh43RzttkZnM4iEAFWQBLRvIDeiYt5c1l5VSAaukuM5VhvKBTlUelC3eogpSwSQlyPJnX0f2hf0bLRpI5cFwUEWl2VZwB9Gc3wMwV4Bk0TcDLwSg0PHYDEWLvdRnYq+ZZ8oiKiSDOnOdtrkksuuYTXX3+dgQMHIqXk5ptv5tZbbwUgKSmJuLg4unXrRlZWFq+++io33ngjNpuNoKAg3nvvPXr37u3z+rCwMHbs2EH37t2JiIjgk08+AeCVV16hV69etG3bltjYWGcA8mX70bdvX2JiYhg8eDCTJ0/mp59+ok+fPoCWyj5nzhynm68eN910E9OmTSMuLo4rr7zS5zJgfaO6s5Jjv/xCl/WF2AphXgK45T9IzSpDD2kL4MWsl7DZJ+/73wtk9vErecHyi5t1vKfmn+cSY0XWGwqFLwzZbQAIIeKAVKAHmn1LHynlJiFEGrBGSvlVzQ3TPyi7jbpBffub6dllUArJrZMNffBveOstVj71FOCqGgHCBtIEY94aQ6TFO80/olkBj789lQCXDMAim5b04NxjqsQ4FHWb82a3IYQYDGwELgZm4/5WOQM85s9BKRSKs3jWPJkKTbpBQS/TLyUjhYH7nmbsi5A2RjsvdSpMfgmk/d2fbc6mJMjdQqMkqIS4O7LdghNoOnyvNXdpsO+FVTQOhaIqGDUszAF+lFI+ZNfiKwF62GdQQ4BpUspW5fdy/lEzqLqB+pt5ozvLKrPfuy7kSwgthlu/hoU3CIojtPe/I4svwhKBJcJCtjmbnNu3eQUo0NLHTb+69yknSt/jULOsekFNzKCM7kFdhbbnA95p3ifQr21SKBTnCN1MP713t4DiMG0vKupkO34r3QNBms/Ttrhtzne3qcjE/pOCqMbeX2Dzy9wfu+6FVTbjUKEoD6OFt4fQVMj16ITmyaRQKM4DKRkphjP6HJQGw97gXK+lw7TAjuxrYaKki5WwAMkZj4y8Ipumx3e2I/cMvepmHCoUrhgNUB8DLwsh+rm0SSHEFWj1T/7T+lEoFIZxLqlVQsrIgS3c5iaXtLJLEk+03kmbJlYCBDQL1yZURwuFU3fv7dyOfPqH770wX5mFqg5KURWMLvE9j6b2sAr4w962EC1pYimaFYdCoTjH6C6pGSB2ayzmb8xMfGkiRaYi2ie1J6GTt1VGgyA4UhhAs5FltAEm2G++SGqfpLsHpeqgFFXBaKHuGSnlLcCNaKaB/0FTEP+rlPIWKaVR4X+FDmlpaXTq1Im4uDi6dOnC999/z9SpUyttu1EeK1euZN26dX7rT1E7qOzS2YhwyD4Yy11fJBB5UlMrD7eGs3/afo7u0He8qYxVhtGMQ4XCCJUSf5VSZkspJ0gpk6SU46WUnsKtdZ+9WbAgGj4K0O73Vm91c/369XzxxRds2rSJrVu3smzZMi699NJyA5TVh2V3eagAVTepzNLZiHCY3hK2/tdMaYmHWrkMZtmnZt3rjrv8G2ZlZXnZyHviumxYNrlMBSdFlTFaB9WxoltND7RWsDcLfkiC4jxAavc/JFUrSP3+++80b96ckJAQAJo3b868efM4ePAgAwcOZODAgYCmxPDCCy/Qq1cv1q9fz8aNG4mPj6d79+4MGjSI33//HYABAwYwZswYrr32WmJiYvjhhx/Izc1l2rRpvPXWW3Tp0oVvv/2WvLw8zGYzcXFxmM2ajQLoW2kAvPnmm8TExBATE8PUqVOd4589ezZxcXF07tyZe+65x2cfubm5xMTEOK+bMmUKEydOBDSB2I4dOxIXF8ddd91V5d9lXSMlIwUxTiAmareA8QFeNUWVMS58rblWx2Q5oq8cUXhUv92xwZWVlUVSUhJ5eXlIKcnL02zk9YKUQuEPjO5Bbadi0f66vwu6JRWsHrMaa7HW3q5qckc33ngjL7/8MldccQU33HADd955J48//jhvvvkmK1asoHlzrSqyqKiImJgYXn75ZUpLS4mPj2fhwoW0aNGCTz75hNTUVGbMmOE8d926daxevZoHHniA7du388gjjxAeHs4zz2jVAgkJCdx7773cd999zJgxg8cff5wFCxY4rTTmz5+P1WqlsLCQjRs38uGHH/L9998jpaRXr17Ex8cTHBxMWloaa9eupXnz5hw7dgxAtw9PtXNXJk2axN69ewkJCVHOunZSMlLIOJihGQ3akQ0lGQczmN1nNu8/+j6JiYle0kKcAhoAAd61TZa7s6HfNiKaW7Ac0VOOsOiOpUmo9tZPTU31mtUXFxeTmppaoYisQlEVjAaogTptTdH2pG4EnvDbiGozxT6y6X21GyA8PJyNGzfy7bffsmLFCu68804mTZrkdZ7JZOKOO+4AND+l7du385e/aMLyVquVSy65xHnuiBEjAOjfvz8nTpzQ/dBfv349//3vfwG45557+Mc//gHoW2msWbOG2267zalCfvvtt/Ptt98ihGDYsGHOINq0aVOffZQXoOLi4khMTGTo0KFe9iH1FZ/JD4FQ1LuIpCQt6cARpDzVy7/7cg2DlyY4jQcjLZF88UECAQLMf8tm8X8S3Jb5HMoRehy0mGgDzlm2J77aFYrqYihASSlX+Tg0XwjxKvA33C0w6iahUfblPZ32amAymRgwYAADBgwgNjaWWbNmeZ3ToEEDp9CqlJJOnTqxfv163f4cdhm+Hhu5xhVfaiNSSkN9AwQGBrrZ1rvaaHz55ZesXr2aRYsW8corr7Bjxw4CA8+pjnGto9zkhwj9mcuarBSiCzL5d6SVN9eNocjDFbesJJhln5h58h1tiTb7UzOWIxGUhlpYfEM2e6/YRi+btgzooOgM5EYm0QaIiooiL0/HbqYCE0OFoqpU1yEXYAVwqx/6qf10TgOTu90GplCtvYr88ssv7N692/k4JyeHtm3betlXuHLllVdy+PBhZ4AqLS1lx44dzuMONfI1a9YQERFBRESEV3/XXnstH3/8MaDtLTgsNvSsNPr378+CBQsoLi6mqKiI+fPnc91112E2m/n000+drreOJT69Plq2bMmhQ4c4evQoZ86c4YsvtO8zNpuNffv2MXDgQP75z386zRHrO+UmP9hX2fLz8526d/9+T3AtGc4apqLj+vtJlqMR5B6GTn22MfS5qRy4cQqvFr1Fv5v68ekfJs0N94wmZ7T/uInNwclOS/e0tDRCQz3sZkJDKzQxVCiqij++pv4Vb4fduoljn2lLqrasFxqlBacq7j+B5jT72GOPUVBQQGBgIJdffjmZmZnMnTuXwYMHc8kll7BixQq3a4KDg5k3bx6PP/44FouFsrIyxowZQ6dOmllxkyZNuPbaazlx4oRzXyohIYFhw4axcOFC3n33Xd555x0eeOABJk+eTIsWLfjwww8B31Yao0aN4pprrgFg9OjRTqfc1NRU4uPjMZlMdO3alZkzZ/rsw5Hk0a5dO6666ipAC2AjR47EYrEgpeTJJ58kMtJ7f6S+kdQ+iYw/yy/ADe0VSsaBDEZcDCmRuOnm+dpnsmChnV00NjQ0lMxMzYbNc5kQNMuCNi6PHbO11NRU8vPziYqKIi0tTe0/KWoMo2Kxn+o0B6Np9HUAJkgp3/Dz2PxOfRCLHTBgAFOmTKFHD79qNtYq6trfzBdiotAPUBJCJ4dy+pHT2Brb2BsN0UHup2xbG+u1zyQCSlgW/A1rzmxQwUXhd86nWGwLnbbTwLfAU1LKJf4bkkKhAN9GhRTDfffdR0YjbRk1yv4u3rY21rmvFNHcQuf+m9mdc6Xzcfxt2fQIDuTmf9u8+1QoaiFGkyT0svgUtZCVK1ee7yEo/ERS+yQt1dzzXRoC0/43Tdv5NWnq4id/cJ8xWY5EsmV1VxJGLya27zbnpS2PadvOKRkpyvVWUevxR5KEQqHwM44AoltdGAjyVul8935RCNmfeKtDlJYEk+2hDnFxpM1ZY2UNt4IAa7iVjIMZbkXARhQjFIqaxtAMSggxoxJ9Sinlg1Ucj0JR73EqlJcnAmsPXCPC4f4ImOJDBcJTNeKgxcS0fdOgoceJgTAtbxrppDsVIxxFuQ7FCEDtWSnOKUb3oGKBS4GL0LyhDtl/vgg4jLsfVMVZFwqFwieVUSh3yBf5VIdoflYd4pS9pkkeztDtSzZQihGK2oXRJb6XgSKgn5TyYillnJTyYuA64CTwipSyp/12TU0NVqGoD1RGodyRIGH+WzZBwSVuxwKDS+hzUzbSBpbD8L3tfmdNU3koxQhFbcFogJoEPCeldJPDllKuBV4Aan2KeW3GZDLRpUsXOnfuTLdu3aqsOr5gwQJ27tzpt3EtWrRIV3YJNImm8igoKCA9XW26V4lKuKw47Ndj+24jYfRiIpoXAJKI5gUkPLCYb+dsI/3ZNixcOJD4Bz7QTj7lozN7uy9lCKUYoTjXGA1Q7fH9tikGov0ymguAbVnbmBo9lZcCXmJq9FS2ZW2r+KIKaNiwITk5OWzZsoXXX3+dZ599tkr9VCVAlZWV+Tw2ZMgQxo8fX6WxqABVNVIyUiDE+PkTjmiqD3qcsgtyFI4YwfWzZjllqZKjk8Hzz16mtWdlZekqeSjFCMX5wGiA2gRMFEJc4toohGgFTAQ2Gn1CIcSlQoh5QgiLEOKEEOK/QohKfzUTQjwrhJBCiDWVvbaqbMvaxuKkxVjyLCDBkmdhcdJivwQpBydOnKBJE83WtLCwELPZTLdu3YiNjWXhwoXO8zxtLtatW8eiRYsYO3YsXbp04bfffuO3337jpptuonv37lx33XX8/PPPAIwaNYqnnnqKgQMHMm7cOI4dO8bQoUOJi4ujd+/ebN26FYCZM2fy6KOPArB371769OlDz549ef75593GPHnyZHr27ElcXBwvvvgiAOPHj+e3336jS5cujB071ud5RUVF/PWvf6Vz587ExMQ4ZZrqK5l7Miul7zK3ENILYMsaLc1c24cSWI5E8r+PEjhELG/PmUOEizpHenI6ya08TAVbJdO3cV+SkpKc0lUOmjVrRmZmptp/UpxzjL4VktCs3XOFEBs5myTRHTgKjDTSiRAiFFgOnAHuQ0uoeBVYIYSIk1IWGeynPZBqH8c5Izs1m9Jid/Od0uJSslOziU2MrXK/p06dokuXLpw+fZrff/+d5cuXA5pA7Pz582ncuDFHjhyhd+/eDBkyhJ07d3rZXDRt2pQhQ4Zwyy23MGzYMEDTxJs2bRodOnTg+++/JyUlxdn3rl27WLZsGSaTiccee4yuXbuyYMECli9fzr333ktOTo7bGJ944gmSk5O59957ee+995ztS5cuZffu3fzwww9IKRkyZAirV69m0qRJbN++3dmPr/MOHz5Mq1at+PLLLwGwWPQtH+oLlXXIBXjsCKTONRPkkWYubcEEYebP36c6legd6EkbRUdH65pkhoeHq+CkOC8YLdTdIYS4DHgA6AlcDPwCzAE+lFL6WtX25CG05cIrpZS/AgghtgK7gYeBNw32kwFkAVcafQ3+wJKv/+Hpq90ojiU+0Gww7r33XrZv346UkgkTJrB69WoCAgI4cOAAf/75J8uXL9e1uXClsLCQdevWMXz4cGfbmTNnnD8PHz7cqY6+Zs0aPv/8cwCuv/56jh496hUo1q5d6zznnnvuYdy4cYAWeJYuXerU5issLGT37t1e+xW+zrvuuut45plnGDduHLfccgvXXXddFX+LdZTrUy0AACAASURBVAOf6hEujAjXsveiAiG/FF48CIEF+mnmQURgexGCxwVXWIyrkiMUtQ3DH+5SytNAdTcVhgDfOYKTvd+9Qoi1aHXxFQYoIcTdQDdgBPDfao6nUkRERWjLezrt/qJPnz4cOXKEw4cPs2TJEg4fPszGjRsJCgoiOjqa06dPG7K5sNlsREZGes2EHLh+o9bTY9TrX69NSsmzzz7Lww8/7Naem5tr6DyAjRs3smTJEp599lluvPFGXnjhBd0x1zX01ByS2idpNVBB+tc4bNsdlhjRwfCfS2ByiIXSMzrisBGWs8W4BzIgA59ByoidRkZKBnsy9xBmDaPIVET7pPYkpydX/sUrFAaolJKEEGKwEOJ5IUSmY99ICNHfvhdlhE5o7rye7AAqtI0XQjQB3gL+IaU8ZnTc/sKcZiYo1P2TIyg0CHOa2ccVlefnn3/GarXSrFkzLBYLF110EUFBQaxYscL54eHL5sLVUqNx48a0a9eOzz77DNACxJYtW3Sfs3///k6lgJUrV9K8eXMaN27sdk7fvn3d7DkcDBo0iBkzZjg31g8cOMChQ4e87D18nXfw4EFCQ0MZOXIkzzzzDJs2barGb+/CwVGM66bmcCCDVbmrNAt3H4kPjronV4JC4Prh2YB7mnlJUAnZZhcTwiD7HpcPKrLTyEjJ4EDGAcKt4QgE4dZwDmQcICNFv65KoaguRpUkWgKL0PaccoF2wDS0At370YRjjXyNagroWaseA5oYuH4ysAuYaeBcv+PYZ8pOzcaSbyEiKgJzmrla+09wdg8KtEAya9YsTCYTiYmJJCQk0KNHD7p06eK0qOjUqZOuzcVdd93FQw89xDvvvMO8efPIysoiOTmZV199ldLSUu666y46d+7s9fwTJ07k/vvvJy4ujtDQUF3DxLfffpu7776bt99+2+nsC5pl/U8//USfPn0Abb9izpw5XHbZZfTt25eYmBgGDx7M5MmTdc/79ddfGTt2LAEBAQQFBTl9pOoirjMmbHjPkoJgZ+DOci02ony8Y3vdtA1rIHw934zJbvGebc5mW5x7Ak95e1wV2WnsydxDuEcFcRBB7MncU/21FYVCh8rYbXRCW4bLRfuq1kNKuUkIkQi8KKW8wkA/JcC/pJTPerSnAeOklD4DphDiOiAb6Cal3G5vWwkESin7lXNdElqSB1FRUd09lzDqi3VDXeJC/Js55Yt8LN0ZRc9aw5UiGzz0p5bdp4ep0ETZZN+lBeUxUUxE6ERPiWSinFilPhV1h5qw2zC6xHcTWqHur3gvPuwHWhvs5zjaLMqTJujPrFx5H/gA2C+EiBRCRKLNAE32x7rVI1LKTCllDylljxYt9FxDFIqaJ3NPZtWDkzx7m3AEisqJL2EB2jKgLqWaQnpVKTLpJ9n6alcoqktl9qB8rQ00x3dtuic70GZinnQEKqowvRp4BC2QOW59gd72n9VOraLWUpX0cSc24CXtNvcZeOh9yD0MvhY/vJYBHbVOrZO9EiQclvFioiBwbKCborkn7ZPaU4pHmQWltE9qX/nXpFAYwGiA+hZ4TAjhKv7veHs8gFbbZIRFQG97HRMAQohotECzqIJrB+rctqAlXQwE5hkcg0JxzjEV6flmGMTjXTp3HbQbA3lH9E/P15lhlU0u0w1OeokavoJUcnoyrZNbU2gqRCIpNBXSOrl1jWXxVSZ4KuomRtPMxwFr0ILBfLTg9JAQIgaIQZvFGGE68CiwUAjxnL2fV4B9aEt4AAgh2gK/AS9LKV8GkFKu9OxMCFGAtgfldUyhqE38f3tnHl9FdTb+73OzAGFJWNSqLAFR3EJF0arYvmrcUFIrKpbivkSJbUXbuvEqYOW1ra3Qagnghkt+xX2huBTj0krdcCNuqCxBxIUtAZJAtuf3x5mbzJ3MvXeS3CQ34XzzuZ/ce+bMmefMnJlnzjnPeZ78YfkUflcY0wAiKj7L7ELA1Mfg7sshw6X7KurNMKCbaMrR12u6Y+nnXcQbZvKcye1iEOENORLETN7S9QjUg3KMEg4DlgEXYob7xmMUy49U9fOA5VQAx2Ms8R7CLLZdDRyvqu5pXcFEvLEBFS1dgjmT5yA7WqCdqjGmQS56Yzw01y+FS7+HNTXGH9+aGh8DiRjzTtGGHVs1HJkgfOfs4pjJW7oecXtQIhIC9gS+U9XzWntAVV0LnBknzxoCvGuq6rGtlcdiaS+uGHJF8yz5FFgElEAOOeSSSyaZ7KScdIoZmvIJhevq/C32lLih3KN5rWjVcGSCSGblaWk/gvRQQhjT8qim3JbW4Q1d4XbS2hrWrFnDwQcf3Kx91q9f3+DLz5JY5kyew+S9G520xg3tWUmDcsojjyyyEITuZFFNHhnkMLYY0iLX55JWDfl9L/Gdd3KTPywfj81Dqy39EkU0JZkMytPSfsRVUKpaC5QCGfHy7goUFUF2NoRC5r/LqUKnp7a2lr322ovHH7f2JonCPdEv1wlzS+eaXoASeD4ql1zSSfekplNfdyyHlsBZiyDLhIEiqwzOKslm3lX3xC3XqzCjWfp1BMmsPC3tR1AjiT8CU0XkP6q6oS0FSmaKiiA/H8IOn0tLzW+A9nD2vGDBAp566il27tzJ6tWr+cUvfsG0adO46aabGDBgAFdddRVgPAHsscce/PSnP23Yd8eOHUyePJlly5aRmprKHXfcwXHHHceCBQtYvHgxO3bsoKKigvvuu49x48bx0Ud+HqkszcE70U+GWdQKBFNOzithJv6+HtVJP7TEfFJ69mRHxTDKQycyXaYH8pXn59U8GZgzeQ4U0sRXYTIoT0v7EVRBnYSZhwqH2/iOyAEKVdVzEi1csjF1aqNyClNZadJbo6Dcro7A+NZzKxc3b7/9Nh999BEZGRkcfvjhnHbaaVxyySWMHz+eq666ivr6ehYuXMjbb78d4QsvHCKjpKSEzz77jJNOOonPPze2LW+88QbLly+nX79+TZy8WlqOr5Vcc3DaWjnlZNHUEax4zPt2VAyjmjx61JveVoOvPAo7pUPXZFWelvYjqJXcbpjwGm9jLPgGOGnhz+5tIl2SES3qQGujEYTDbYQ/t9xyS9S8J554Iv3796dHjx6MHz+e119/nezsbPr378/777/fENKif//+Efu9/vrrnHeesXHZf//9GTJkSIOCOvHEE31DdlhaTkFhQesn9LsBOVBMMdUeR7BQTZrLvC+lRw9qyAXPUGCDrzyLpRMSy/fdYOAbVa2x1nKGwYPNsJ5fenvhDXkR/n3ppZeyYMECvv32Wy6++OIm+8XyuegNZmdpHYnyu0cqcAqsXLmSqqOqqH61mp51PVHK6U4xaTQ6gu2+555sXeU/FNizzl5fS+ckVg9qNTAKQEReFpH920ek5GXmTPBEIyAjw6S3F0uWLGHz5s1UVVXx9NNPM2bMGADOOOMMXnjhBd555x1OPvnkJvu5Q2p8/vnnrF27lhEjRrSf4LsIRUVFFH6eAOUUJgOOuvwo7njpDm6tuJUTHhpOb2ZHKCeAUbNmURHa5luE9ZVn6azEUlBVNFruHQv0iZ5112DSJJg/H4YMARHzf/78tjWQePbZZyMC+B1zzDGcd955HHLIIZx55pmMHm2cB6enp3PccccxYcKEhki5bgoKCqirqyMnJ4dzzjmHBQsW0K2br39dSwspKioiPz+fKDYNgZjYy3gsrxtu/k/sDcW9irn89xPYsGEDpf/4R5N9sg45hF7DhjFg4h7WV56lSxE13IaI/AejlJYA12C8PnwTpRxV1evaRMIEMnr0aF22bFlEWmcK3bBgwQKWLVvGXXfd1WRbfX09hx56KI899hj77rtvB0jXfiTrNcvOzjZBJaeAj01DXLzRcsEVPmMbdKuCM14QDlveeM++lwNPnyBU9VFStqdw9sdnM3DJQBvx1tLutHe4jcswAQlPx1js5QJnx/hYOohPPvmE4cOHk5ub2+WVUzKzNmwt0zS4bSD8ouU2hM8Q2JkBT4xT3nPiY76XA4/nQVWmGmevvetYeNhCKu6sYLpOp+LOCn7V81fW2aql0xI0YGE9cKSqvt32IrUdnb0HZTEk6zVr6EEB5GBe6TIJvCC3bjiEfPLWK6R82fhb6kBDIPWgPo4VUrabNUNNDDVqSJqFuJauR1v0oIKugxoKrE/kgS2WrsbMmTM5//bzqT+u3iimyri7RLC2NjJabsnSHIofzaV8YyZTXCHcw0rJTzmB8VfXEk/lFkuyEdSbeamqeh2PdBmC9CItyUEyX6ulW5dSf1q9mX8SoCfRe08+vvjc0XJLluaw6J48yjeawrLKs8hblEfO8py4cqRUpFhnq5YuwS4fzqJ79+5s2rQpqR98FoOqsmnTJrp3797Rovgyf9V87zpZXyb2gtVDoW5fx1LP6en8YztcVmqi5RY/kktNdWRh6TXp5Bbnxi7c8Vdnna1augJBh/i6LAMHDmTdunVs2LDLuhjsVHTv3p2BAwd2tBhNCOo5wmupl51mfoNRUB99kMNfnzRhNfw6X5nlPjbsdUDIE16jEN85KOts1dKZ2OUVVFpaGkOHDu1oMSydlILCAgq/KjQ9pyjDeRN7GUu8walQD6R68oUt9T76bw55z+b5eC5vpLxPpP89qYYrBjY1fPBztnpL5gjOr59PfVEh68tSWJOVzzGTknM+6oSL7qP4yVzYOgj6fEXu+GJeur+phxRL1yaQFV9Xwc+Kz2JpKQWFBRR+U2hiP0chf0UO+/8rl62bMskcUE7uhGJyxpQ0yVev8Jsrp5BVHmsBVTXrBy7ikbNKKM+ElO0hLhlyCfN+FT/K7OtFBYyqLqSna212xU54P31y0impEy66j+KHz4Fal4um1Apyz33EKqkkpr3XQXkPPlJEHhGRlSKyU0QOddJnisjYRAplsXQG5pbOjamccpbnMOiJPLZuMoYO5RuzWHRPHiVLmxo6rK2NMnyHCdFRk1ZGOovYd10JD3e7jpKzSvjylysDKSeA7LL5EcoJoGc3k55sFD+ZG6mcAGp7mnTLLkUgBeUooHeBHwAPEjmyvRP4VeJFs1iSG+0ee/QhtziXOo+hQ011OsWPRj5oK+qNBV9578jhuzDlmeX8/Urjfy+1Z0/qq6upq6pit912CyzrXln+82N7ZSahVd/WQc1Lt3RZgvagbgMWqOr/AF7XqB8AhzTdxWLZtYnWIyrfmEmdmmG9NTWOK6PNUDy0mGpP/PbqtGqKc4spc4rqMWgQn8+axarCQjK8notjsL7Mv6u3vjwJrfr6fNW8dEuXJaiC2h94xPnufW3cCthgQpZdiiBug2qz/HtEmQPKqVU491sYusb42aMGVpxSwvs/fJ86qUNR6qSO93/4PiUjzZzVrVPg1bTPIBRi9FVXNQm9Eos1WflU7IxMq9hp0pON3PHFkOrxwJ5aYdItuxRBFdT3QDSXyAdhfPZZLLsEJ1xzgjGOiKMfjp9QTFp6ZI8oLb2a3AnFdAv72IOGRb0jvsxh1IejSNEUBCFFUxj14SizOFdg8NocflA6he31N/G3wx6msKAwsMzHTJrD++mTWbclhfp6WLclJSkNJABeuv9ics99BPqUAvXQp9QaSOyiBPXF9yfgfOAs4A2gBjgMqABeAu5V1RltKGdCsFZ8ltay9yl7s/7w9YEWaNQNh4//2+iuyGvF5/WxN2WWvxVfWWYZxbnF5C3KI72mcU6rhhr2nry39VZuSQo60orvJmAZ8BqNvaVngI+A5cD/BT2giAwSkcdFpFxEtorIk0703nj7jRaR+SLymYhUishaESkSEbuIydIuFBQUsH5kMOUE8Oq/oysnMJZ7bqLNWWWWZ/Kzp34WoZwgMpx7QWEBqb9LtZ7LLV2KZq2DEpFcjI/mAcBmoFhVlzRj/wzgQ4zl3/9i5rNuxQRGHKmqUUN/isifgaMwcak+BvbGKM7dgUNUNe4Mqu1BWVpKQWGBiZQb0Dt5zvIczliUR8ilVNLSq8m7dBE5Y0qoqIdrnssh47lcMsszKc8sJ606jZ5VTcOzK4pEOaiifD/newrXFUa6WaqGyT4LeC2WtqIjvZkDoKrFmGg3LeUyzFzWCFX9EkBElgNfAJcDd8TY94+qGuGPSESWYkLTXwbc7LuXxdJKCgoLjNugZgQhzC3OjVBO4JiYP5JL7yNKuO3FHAY83jhkl1WeRW2oltqUWlLrGm/LWMoJTDj3eV/OaxrvOh3mfTnPei63dGqCroP6uYj8Lsq234rIhIDH+ynwZlg5AajqamApJjBiVLzKyUkrBTZgelMWS5swt3Ru5Mq/AEQbrivblMnQNZDxXG6TIbvU+lR2pu+kLLMMRanqXhbzGOFw7vW96323R0u3WDoLQXtQ1wP3RtlWCdwAPBqgnIMwc1dePqYFUXlF5ADMEN+nzd3XYglCQWFB1AW5OctzyC1uHKILx2sCs7jWz+ChPNOYnkdTYBlVGfz18hkcsAKWjYIr/+5vOFFHXYOBRMHVBf69O38rd4ul0xDUSGJfjEGEH58624PQD9jik74Z6BuwDABEJBWYi+lBRVOeiEi+iCwTkWXWY7mlubz+3OtMmT2FadOnMWXWlIZ4TDnLc8hblEdWeRbiE6+pONdn0W2qWXQLjYrKS4hyps6GT0dATbp/OTXUsOfkPRus93q+2bNpiPlqJ91i6cQEVVCVQLQYB4MwRg9B8XsdDb7isJG7gKOBc1XVT+mZg6nOV9XRqjq6Oa5hLJaJYyeS96K/EsotbjpE547XVDKyhEV5iyjrY4bryihj0YGLKMkxPSw/xQPVpDlTvGHPEQ3lOMN+ZZllTUzL5/1yHqHFISjD3F1lEFocYt4v57XFabFY2o2gQ3wvATeJyIuq+n04UUR2A6YC/wpYzhb8vU70xb9n5YuI3AbkAxeoatBjWyyBKSoq4gdLf+CrhE55/hQyqvzdDLmH7koOLqFknxJjo1qOmcdyXsXCQ4HhIcIQ5aRRTBomvedWoSJTG/KG86dsT6H29kj79EmTJgEwdepU1q5dy+DBg5k5c2ZDusXSWQmqoK4D3gRWisgLwDfAnsDJmPe2awOW8zFmHsrLgcAnQQoQkamYObFfq+pDAY9rsQSmoKCAwtcLmbZtmu/2jKqMqJZ1EUN34bDvYOaIPGMHDYpH4XbXMvc+Bx1E3tJVPHFCFRH6MUbAwUmTJlmFZOlyBBriU9W1wA8xw2qDgLHO/zuBQ4OsQXJ4FjhSRBrcJolINjDG2RYTEfk1Zt3UVFW9M+AxLZbAFBUVUbimEMZHnyeKppzCjl1dGb07+uJ22RdKT6fnsGEc9nYVZ76eRcq2FFDTc5q8t13XZNm1CBwPSlU3qOoNqnqkqu7r/J+qqhubcby7gTXAMyJyuoj8FGPV9xXQMGAuIkNEpFZEbnal/RyYDbwAvCwiR7o+BzZDBoslKhf8+QI4AhD/eSL1nUI16YvyFjUMxUXFs3taNYx16bRBEybwzeLFEAox/ebHqf1zLTpdqb29tkE5FRUVkZ2dTSgUIjs7m6KiouZW02LpFLRryHdVrRCR44FZwEOYd8piYIqqbndlFUwoOLcCPcVJP8X5uHkNOLaNxLbsQtSdVhcxTzRw7UCOWHZEQ68pWu+pskclucW5jH9yfBOT88iM0L0WdvQxPaexxXCok63v6NEMvfBCdnz3Hb0GDmS/449vsntRURH5+flUVlYCUFpaSn6+GfZrzhBfQWFBRDj4/GH5tndmSTqCOotNA64CxmOs+bp786jq7gmXLsFYV0eWaBQUFFCohbAHEUNx0Ry4uqkN1YIQ4QGiOq26aY+qGgYsgt+VNB26SO3Vi6Mff5yMvfdGRBianU1Gz0YzcbdCoRzzWucqesiQIaxZsyZYXcOeMdyLj2uwQ4iWVtGRro5mYVwR/RN4haarLiyWTstBEw/ik+xPoBtM7G3CYAxONc5cF8QIww5E9aEXNjkPm5WnlUN6MVzpo5wA9psyhe67746IMGDAgCbKqfDrQujlJGQBec53R0mtXRs84s38VfMbywqTZtKtayRLMhFUQZ0NXK+qf2lLYSyW9qKgsIB5X84z7oBGAAITe8Hde0DPEJQsNZ7Ioy3QK88sZ/bVswGYNt3f2i+zPJOschgx27j8L6CpXgAYPHEiq+69l3VPPcVR8+ez+4GRU6q+CiUd47bZUVCDB8cNCNBAXU//MO/R0i2WjiKoghLMPWaxdHoKCgsoXF/YxMHq/w2AVW/k8PyDp1C1PYNoZndea72obo36lLNHMbwLXIrxyeUla9QoqtavZ8c339Ctf3+G//CHTSLlRlUcTucuIyODmTNn+ufxIaUihbpeTctMqUjC8O+WXZqgVnx3AxPbUhCLpb2YWzrX99Ws/K0cFt2TR9X2nvgpp7AnB+/ckq87otRq1u79FqWfpnIesI+PHN332ou+hxzChtdeI7VPH05+8EF69Gzqniiq4ig3c0/z589vloFE/rB8E3I0QuDoa6wslo4iqIL6DjheRF4RkRtFpMDzsSE9LUlPQUEBqampvs5fJ/aClx/NpaY63WfPRmZPmU3JfiUR5uJN3BH1KSPjkG+Z9v/mMOOAA/BbA5HaqxdDL7qI1QsWgAhj/v53Bo8a5XvMaApl8n6TWbNmTbMX6M6ZPIfJe08mZbtdY2VJboJa8cXz26+qmvTjA9aKb9eloKCAwsJCs8TcWecUJn9FDvv/K5etm2JHIyzLLGuYd4pKDZzT4xxuHHstH15/Pd8taRrPU1JSOOD661kxaxZ1lZUccPXVnPqXvzQZ2ouQ35qFW5KcDrPiU9XAC3otlmSk8PVC4yCrGxE6KGd5DoP+mcfWOD2ntPRq8s4p5rte8I/tUTKpUU7U1jDm4cPYfjRkHRS51gkR6s4+mwdnz2Z0ZSWf9ejB7j7zTl7mTJ5jLewsuxztulDXYmlvioqKOLfoXLOCz0cH5BbnUhdTOSk9elUy9vwXyBlTwo+csYSoSmrHDp6of4Zax4ChLAsez4M1A00IjbJMhfKFMAxWlcB7VVX8o6CAlNTUFvvSs70rS1cl0BAfgIjsDvwGGI3xw3eGqn4sIlcBb6vqG20nZmKwQ3y7FgUFBRRuKzQWCh7ldOcAuCILZp47relGAJTM/uXknlNMzphIjxBramDomqZ7SCX0qQbfdb3qOUw1sIgGM/HmLLR1YxfdWpKFthjiCzoHdQSwBBMc8DXgQuBwVX1PRP4ADFfVsxIpWFtgFdSuw8gfjyS3xISyqM0q5/gJxRz7kxLqgY+X5vDKo7mUb8pEQvVofdPp08wBZVw1ezZ+I2/1CilfEKlwauHni4SFP9Pg0c3KMN4lARGhvr75IdpTf5fqbzLuE5bDYmlLOtqTxCuYgZIQcJFr29vALxIplMXSUvr+pC+D+g8i7628hlhOaWVZvLkgj92c1r743rwGaz2jnCK7N2np1eROKKYO/xtkba2T3Xm361YF45+HQ0uUF44zw3qBcDmpaM5CWzeJWHRrhwgtyUpQ44dDgTmqWk/TiLib8F+DaLG0GxlXZCDThbLjy8h9pWm025rqdIofzaXY15RckFAdoGQOKCPv0kUMO6qEuWVQ4enUVNTDjRsbdgOBepcWG1tsPJRHEG2Qwgmz0dyFtm6irZEKuug2PERY18s4ya3rVUfh14UUFBa0SB6LJZEEVVDlQLR46cMw66Qslg5BrhWqflDVoDAyo/jPK9+YSfnGKL716kPc/PAMfj17Nr2PKOGy7+BX38Pkr82cU72a/5d919RAoiYdnjeR3jm0BM5aBN0qaFRMfkN+1UBxyxbaumntotv5q+ZHzl9Bg18+i6WjCaqgngFmuAMNAioiA4DfAk8mXDKLJQ5ygyDThYl7wOqhUDccVmcDUZftKZkD/IMQZvYv5+9lkPIlDF0F/9gGWVuhtgiW3H4QKV8Yw4ho1ntlLr132sCxaF0omu2FWRg7cDK6XFu00NZNaxfdWr98lmQm6BzU9RgH/59gXIsBzAWGA6uBm6PsZ7EkHDlV4HCgG+R/nsOwZ0/hgYoMAHr0qkSiLtsT9j1kBR/+e1TEMF9qejW144q5Ojx05+xelgUL8+DtbVVNrfA8ZJVDSo8eHHDjjex9+ulUP/F81LzNNV4oKipi6tSprF27lsGDBzNz5swIpdaaNVLWL58lmQm6UHeLiBwJnIfxoVwBbAbuAR5U1Z1tJ6LFYhh5wkhO+e8pTKuaBs/DzrSddK9NY4fLiYnxoxdt0kd49+XRaH0ICdWh9SHKwsEFh/pHwtV0WNlvVUzllFILp7zXl73yTqT3vvsiIoS2h4yndC/1INMlsDFCogIURiN/WL6vmbr1y2dJBuKamYtIN+AszFqnL9pFqjbCmpl3TuQm4dyic9ln9T5RI9o2JU6XhyhBBVtAWgXMWjiIqq++ovd++3HsM89wx0uzmP/NfBMWI5pIAdYrZWdnU1pa2iS9peum/LBWfJZE0JHroKqAU1T1tUQevL2xCqrzMPKnI/nZC6eRUtPNlRpUOQUnkH+9eCjcPgN6DhvGMbNnc/C4cYgIMlLMeEMmUA/4jJrFW68UCoXwu0dbum7KYmkr2kJBBTWSKAH2S+SBLRYvV/e+mukynekynTMXjSelpjsNpnltoJwgusVf88qAH958MxcuX05OXl6DX70hW4eYhbgziHqnhY0RCgoLSP1dKjJdSP1daoOZd7T1US1dN9UeFBUVkZ2dTSgUIjs7m6Kioo4WydJJCaqgrgauFZFxImL991kSwpSjT+WWlJuZIdOZIdPJ3J6JOH9tpZC8lGf6W/UFRWphWzfhJLmF3tMzI9YPzZw5k4yMDOdA/vunVKTEXIsUUYZDa9ZNtTXhObPS0lJUldLSUi666CIGDBhgFZal2QQd4tsAZADdMSPpW/DMRKtq0i/WtUN8Hcu13a8lY2eGR/W0jyIyRE4CtXoOSqGJuwnPvFLYAq+0TynkETknVQ09X+5JPYuLogAAHN9JREFUxZEV4ON9Ijz8F8+KL5mINmfmJiMjo1VrvyzJSUfOQU0numkUAKo6I0EytRlWQbUP0+RmQq7OeeR61fZUSJH06FVBevcayjdmNlrvtcZAoo5mzSsVFBYw78t5xrqvHLNwowSI7q8WnR7MmXOyEG3OzEsijTwsyUGH+OITkRAm5Hu5qkYLMhAYERmE8e13Iua2fAmYoqprA+zbHfg9cC7mnfMD4DpV/Xdr5bK0jhlyM+ERY6OcGp+4HaeS3GhDyIx6NQtym7l7U2/kXg8MDtEWuY7pM4YH5j7QYDLeQDn+PahOuBZp8ODBcXtQAGvXxr3dLZZA66BCwBrMAMULrTmYiGQALwM7gQswt/2twCsiMlJVK+IUcS9wGvA7YBVwJfCiiBylqh+0RjZLdKbJNEKup3M96qgi9xM7RLKoIj969KpsCJuxNtY6Wa8iUqAS+AgYAWQ6i1tfrDMWes1QLFOnTm2qnMD0pLzDf510LdLMmTMj1m1FI5mNPCzJQ1wFpaq1IlKKmYNqLZdhfPeNUNUvAURkOfAFcDlwR7QdReSHGK/pF6vq/U7aa8DHwC3ATxMgX5ujqqAa+R9ISWt8Ha+prKS+thatq6O+tjbie7fMTLr37QvAjrIyylau9M1XX1vL4OOPJ7WbMdOeId5xJOXE6SHqdu6k74gRHHzBBb556sFRTo3pjYN3yauQ3KSlVzP2fPNutbMG7n8L4944ivgp24x3hdC2ED3+24PKtyrN3M+kxrmf7MeyKS32n1eKplii9hrCo4yOSXpnXosUPj/hObN+/fqxbds2qqsbPegms5GHJbkIOgd1GXAFZi3UhhYfTKQY6K6qYzzprwGo6v/E2Pcm4CYgS1UrXekzMK6Y+sTzaDE4LU2vy8pqUA7OMUGVo6ZNo1ufPqDK8rvvZuPHH0fmMV/of9BB5Fx8MarKzrIy/jtjRkM+t8JBlYMvvpj+Bx4Iqqx+4QVKlyzxlSu9d2+OuOGGhnLeuu02arY3HU2t4Ga8CqQnt0St7/CzzyaUns77ReGIfd6ugdm/W79+bN78yyh5oGMVUVMZFKU6vZr06nQqe1TSrbobqXWN71q1Ukt19530qMqIiAW1aSu89TB8+kkWN04uo7YbTQgaR6nBw8M+lY1rncohV3J56Y6XfPeJZ0DQVY0HOpORh6XldKSRxGPAGMxt+C7Ge7l7R1XVcwKU8y3wjKpe7kmfA5ytqtE8piMiC4FRqjrCkz4BeAQ4WFU/jnX8QSJ6VTwhk5RG5eSvZGLvG30Wvicz4uZpewVl6iEhjQgeGLayG7h2IKPfHU1IQ9RLPcsOW8bz4xp93eUszyG32AQnLHeMHwZ9UcK/cjGh18theDHwSQo3/O1vXDR5MlfOvbLVkWib++D1ui0Cs+BWVRkyZIh9cFs6NR0ZsHAAsMLzuyX0w5ioe9kM9G3FvuHtXRi/tUGdY4itKY3vNpn9TQ+nz49KuO3FHDKei1Q0JTklrNynhNeOe57Knv6llYwsibDGkzL46ElQl4FezZAhzHygUQHMmTwHCmmVi59JkyY1S6F4h79sb8JiiU1QZ7HHJfCYfl22IE9aVwzT4PuKSD6QDzAwwEEsbUxaNU/4rT1aDYeWlfDFRSVsy4Re5TCqGP665TgOO+EELnjqFlaetDNyvic8EuduxdWgxY0/Yw2btcYLeEtprlKzWHZlgnqSSBRb8O/p9MW/d+Rmc4x9w9uboKrzVXV0oruenYtow7jq+e7NZ9LUk65R8tZLPYpSTz0703eiKBU9KqjoUYGilGWW8cTYKAtjy+HDf6WQ/lB/5Bah/1NDuPy6h/n1yy8z5sYbmTHlXkKLQ1DmHLoMQotC5FbkRsRCyq3KZcjWIYhIq4MBWiyWjiVQD0pE/hQvj6peG6Coj4GDfNIPxMSairfvGSKS4TaScPatBuKubOnerx8HjB1rfKU5/tIQafCd1vDd+e333Z031rZoZQbN5/1ePNPPYMEoih/fdhuSkkIoJQVxPqHU1Ibf1xU+zo+WHdFk37dGv8NdtzxHKCWFMfeOJ//R33jOmHL76TP5ydcnNZkDWjdwHbmvuIbjjvcsejXmfxELUodkDWH4PsNNBFj33E819HyzJ/MemBdVmTRreCyqLajFYulMBDWSWO2T3Bfog3kEbVHVYT55vOVMAf4M7Keqq5y0bIyZ+fWq+pcY+x4CvA9cqKoPOGmpGCPdL1U1L97xO7snCT8z8GkBHXicevipTZTMc+88F5Gn76S+lO1TZhRLPWStzGJL0RYKCgqYP38+dXV1pKSkmHhEOdHnb+IZD9jwDhZL16PDrPii7izyI2A+cIWqvhEgf0/gQ6AK+F9MF+D3QG9gZNhThYgMAVYCt6jqLa79FwInYxbqrgYmA+OAo1X1vXjH7+wKymKxWJKVjgy34YuqvgXcDtwVMH8FcDzwOfAQUIRRNMd73CgJxsuZV76LgPsx3icWA4Mwa7PiKieLxWKxdC4SETpjE8YJTCAcn3tnxsmzBh/rPFWtAq5xPhaLxWLpwgQ1kvBzc5QOHIBxMxRzgazFYrFYLM0laA9qO9HXIH0N/CxhElksFovFQnArvgtpqqB2AOuAt1W1JvGiJR4n8GKsWACOY5wWbffb5k1z//b77k4bAGyMIUss4tUjVp5E1MP93dajfevh/e1tW62pRyw5g2y390jnblvxvo9Q1d7BxA6IOg5O7UcB5rd0u982b5r7t993T9qytqpHrDyJqIenTrYe7ViPeG2rNfUIUhd7j3TdttWW9Yj2CWTFJyK5Ti/Kb9uFIpJIV0gdyaJWbPfb5k1bFOd7vOMHJUg50fIkoh5BZYiHrUfz6+H93d5ty94jTX93lbbVlvXwJegQ35vAU6r6R59tvwXOVNWj2kC+XRYRWaZdwD2TrUdy0VXqAV2nLrYe0Qm6DuogINoK1/cx7oYsiWV+RwuQIGw9kouuUg/oOnWx9YhC0B7UFiBfVR/z2TYBuFtVMxMtnMVisVh2XYIqqEXAHsAxqlrtSk8H/gNsUNVxbSalxWKxWHY5giqokcDrmGAHjwDfAHsCEzAmhseo6kdtKKfFYrFYdjECzUGp6nLgCGApcB7wR+f/68ARVjl1PCKSLiKzReQLEflYRJ6Lv1fyISJrRGSFiHzgfC7taJlag4hcJCIqIp1yMbuIFIvIh861+I8TVaDTISLdReRpEfnUqcuLIhI3AkMyIiI3OvdIfWdqVyKyj4i8LiKfi8j7IhLXoCKwLz5V/QyY2CoJLW3J/2HcT41Q1XoR2bOjBWoF56jqBx0tRGtxvPJfBrzZ0bK0gvGqWg4gImcAC4BOqaSAQlV9EUBEfgncg3Fe3dkoxoxk3dvRgjSTucACVb1HRE4EikRkf40xjBd0HdQgETk0yrZDRWRQy+TtmojIQBG5U0TeEJFK5w06O0reQSLyuIiUi8hWEXlSRAY383gZmLD216tqPYCqftPZ6tFWdEQ9RCSEeYD8CtjZqgo0ltnu9QgrJ4c+LRTdT752rYuq7ggrJ4c3gVb3oDromrylqitbK3s8Elk3EdkNOBJ4wKnDEmfTYbFkCGpmXgicG2XbLwAbbS6S4Zj5uS0YIxJfHMXyMrA/cAFm2HRf4BUxsbOac7wtwPUi8o6I/FdETm+p8J5y27MeYR4UkRIReVBE9m7B/l46oh7XAEtV9d0WSexPh1wPESkSkXWY2G3RngPNpaPaVphfAc+0Yv8wHV2PtiSRdRsMrNdIt3ilTnp0ArrT2AiMi7LtNIwVX0JdXHTmDxByfb8U48cw2yffVUAdMNyVNhSoBa5xpb3nXAO/zyDMW4gClzj59wc2APt0pno4eYY4/1OBm4E3OuH1OAjzhp7m5H8V+Flnq4dPuZcCizvjPeIp8wbgDSCjk9cjIe2qPeqGeUat8Oy3BDOEHF2GgIJWAqdF2XYaUNlWJ6mzf+Jc2GLMW7Y3/TXgtWYcYwBQD6R7Lv5ZnakePvv3duqV1pnqgYn0/A2wxvnsAL4HJnemevjsL0A10D9R9WjvugC/xTgdyEpkHTrimrS1gkpk3YDdgG3uexkTuHZ0rOMGHeIrIbqBxERsPKiWchDgZwH5Mc3wzqGqG4EXgVMAHAOJgzHXrT1ISD1EpKeIZLmSJgEfaft5y0/U9ShU1T1VNVtVszG9qXxVLUyMmHFJ1PXo6zG2OROjaDe3TrxmkZC6AIjINZjn1YmqWpYA2ZpDwuqRhMStm6puAN4GLgRwjCQEiDkEHtSK7w/AEyLSDWPFE14HdQGm0caMkGuJSj/M+K6XzUDfZpY1GbhXRGZieh2/VdUVrZQvKImqxx6YdpaCabxfAWe3XrzAJPJ6dCSJqkdf4BER6Y5pU99jhvrjL55MHAmpi4gMBP4CrMLMjQDUavv5wEtY2xKR/wWuwPRKDhaRuzA9kW9bLWXLCFq3K4AHROR3mFG5SfHaUiAFpapPicgFwG0YZaQ0Bis8V1WfDlKOxRe/C9Qk3H3cQlTXALmtlqbltLoeqroKGJUYcVpMQq5HRIGqx7Zm/5Ye1ietJdfj8MSI0yoSUZd1zd2nDUjUvX4rcGvrxUkoceumql8ARzen0Oasg3pIRB7GTMD3AzZhJr3a822qq7EFcy699MX/jSRZsfVILrpKPaDr1KWr1MOPNqtbYAUF4CijT1tzQEsEH2PGb70cCHzSzrK0BluP5KKr1AO6Tl26Sj38aLO6xTWSEJFDROQeMe4pKpzP5yJyt3RStydJxLPAkeJyueIshBvjbOss2HokF12lHtB16tJV6uFHm9UtprNYZzLrNox54CuYhVUAQ4BjMWbAN6rq7a0RoisiImc5X3Mxk4MFmLVJG1T1NSdPT+BDoAr4X8w47u8x53Wkqm5vb7m92HrYerQVXaUuXaUefnR43WLYvedhLHf+APTx2d4b4/+tjihrpHblj3OR/D6vevINBp4AtmJeBJ7GZ62BrYetR1eqR1eqS1epRzLWLWoPSkReBVar6kW+GRrz3e8IclysfBaLxWKxNIdYc1CjgIUBylgI+DqStVgsFoulpcRSUCkYX0rxqI1TjsVisVgszSaWYvkICBLGfRz+bi4sFovFYmkxsRTUXOBKEblMHL8gXsREOy3AhOOwWCwWiyVhxDMzLwQuB74AFhFpZn4asB8wT1UL2lhOi8VisexixFRQAGJi3l+FiYbYzUneiYmn8ldVTUTQL4vFYrFYIoiroBoyGg/TA5yfG1W1rs2kslgsFssuT2DrO1WtU9XvnM8uqZxEZLqIqIh8EWX7l8726e0sWpshIsc6dTq4mfvlO71vb/oaEflz4iRsHc413ZgEcoxzznN2R8tiSX5EJNtpL0EM2Tot1jy8+ewAhopIRBwZETkcMze3o0OkSj7ygSYKCjgD+Fs7y2KxWDohVkE1nwrgZeDnnvSfO+kV7S5RMxGRNGfItt1R1fdVdW1HHNvSORBD946Ww9LxWAXVMhYCE8Lm987/CUTxvCEix4jIayJSKSKbHE/wvV3b9xSR+0RklYhUOd7ibxWRdFeecJd+gojME5FyEVknIjNEJOZ1FJFXReRxZ9htJaaXt5ez7WARWSwi25zPYyLygzjl/UZE3nFk+E5EFonIcPfxgMOACxyZVUQudLY1DPGJyEUislMiw7wjIgc5++S60k4XkWUiskNEvhWRP4lIWhw5TxORJSLyvYhsFZE3ReSkKHnHiMh7TvkfiMgxnu0qIr/0pEUMD4rIhU6+HOe4FSLymYiM9+wnzr7fO+f8QaCPj0x/EJESEdnuXOsi77UJn08RudrJs0VEFvqc0/5Ou/nGqeMKEZni2h4SkevFDFPvdNrgBbHOr7NfzLbgyneGiLzttO9NIvKciAxxn0fnPnkH0z7PdrYNFZGnneu3za98EblERD52yt4o5l47yLX9BqdeOxwZX4jVxkVkoIg86lyfKhFZKSK/d20/SkSeFZH1zjX+QEQmecoIt4VDxdx/lU6+Q0Wkp4jc75yzVSIy0bOv+35d48iwWET2DnA9LnXOxU4RKRWRaz3bD3Lqv9mR/VMRuTJeuR2FVVAt40lMePLwQ+zHmPDLT3kzisgYoBj4FjgLmAKcCtzvyjYAEx75GuAU4HbgIuBOn2P/CdjulPUwcLPzPR5jMGHhr8M4Ai53bvSlQHfgPOBCTFyXRSL+a98cBgJ3AacDl2G8jiwVkUxnewHwGfAccJTzWexTzpPO/zM86edgwou/CiAiE5y8bwM/BWZghhBvi1PnoZjlEedhIkH/F3jeuSZuMjDnci7mwVjm5IupqGPw/zBhBs7ALNFYKCbkeJhfY67bfMy1q8JcVy+7Yxwyn4ZpN8OAl6Vp73cCxtt0Pub6jnP2A0BEemDO5c8wXqZPxYQ/38tVxp0YT9TzneM9Bdwn8ec44rUFROQ8zPVb6ch6EfA55p4JkwE8ANyDuQfeFpFumHvnAKfsCzHX9DUR6eeU/RPMdXsYGAtcjLnOmc7284EbgTuAkzH3wJdAzxh1ehAYhDmfY4GZNFowgxnKXwpcirmXngDu9yoahweAf2DanwCPA/cC6zHX/i3gQU/7AHPP/ArzTLgEGIlxwBoVMdEnCp1845zvv5fIF6tnMQ6+z8XcS3diHH8nJx3tLbczfYDpGAtGgGeAvzvf5wBPO983AtNd+/wHeMVTzvEYj8AHRzlOKvALzJtkupOW7ezzoCfvB8DCOHK/inkI/sCT/hCwInwMJ21fXB7qMWFVYsmaAvTAeDA+35W+DFjgk38N8GfX72eAFzx5VgB3Od8Fs/7ufk+ei5069Q947ULOeX0RuM9zTRX4hSutF+aF4Q+uNAV+Ga09OL8vdPJd7Errj3EHdoXrfK0HCj1lLXH2zY5xnvd28vzEcz5XAqmutNnAt67fl2MiExwSpezhzvYLPOkPAu804/5o0hac8/418GSc+0qB0z3pVzjnbpgrbSBQDdzg/P4t8G6Msu8CnghaB2ef7UBewLzitKt5wMs+beECV9qpTpq7/WUCNcBkV9qrTtoQV9oYZ99TnN/Zzu9xzu8+jtzTPPLdgnk5DlthK5DTnPPRkR/bg2o5C4GznLe8s/AZ3hORDMyb0KMikhr+AK9jGuBhTj4RkSki8omIVDnbijBvbYM9xf7L8/sTzE0bj3dV9VtP2gmYN+V6l2yrMQ+90URBRI4UM4S1CfMAqcQ81PcLIIeXR4BcERnglH2IU84jzvb9MOfAew5fxvT8oloXOkM1D4jI146cNcBJUeRs6P2qiV+zBDiiBfUB1zVS1U2Y3mD4Gg0C9sQoZjdPen4jImNF5L8iUu7Iv87Z5JX/FVV1+838BNhdGoeIjwfeV9UPosibi1FQT3nOcTFwiE+PzS1jvLYwAtNTuz9KEWEUeN6TdgTwnqquasikug7TewmPXnwAjBKRWSLyE1edcW0/VcxQ+BGx6uLZ5zZnmM57/yEifUXkbyJSimlTNZjell+7KnZ9/9L5/7KrPuWY+Ere4bv3VLXUlW8pph1Fa5NHYXqFj/ncJ3tg2t9m4CtgroicIyK7RykrabAKquU8i7kRZ2IaxiKfPH0xby5zaGzINZiFzmmYhxWY4Zu/YB6Sp2MaYXhc2DtZXOb5Xe2Tx4/vfNIGYIaEajyfYS7ZInBu2H9h3hwvx7zZHY65eVoysf2sc8zwPM05mDfu110yghkudMu42kmPJmfIKftozHDacY6cz/vIuV1Vqzxp32MUSUuIdY3Cw4bf+xyvATFWoc9ilNJ5mAfQkc7mIG1CgPDDuj/wTQx5B2DaaTmR53gBpnfgex4CtoX+zv9YxwfYoqrVnrQ98W+33wH9AFT1JcyQ4U8wPY+NIjJHTBA9gPswQ3wTMMNp34nI7+MoqnMwIwCzgFJn7ijXtX2Bk+d2zAvP4c5x/Nq/+9pU+6SF0737ettHOC1amwzfJx8TeQ1fcdIHqWq9I++3jrzfish/RGRUlDI7nNSOFqCzoqoVIvJP4GrgMVX1s94rw7wZTsc8YL2sd/6f7ZQxNbxBRA5MrMT4rcjejFGK9/hsi7Y26BTMfMHp4To7b2r9WiSU6nYRWYy54edjHiSPqjM+4cgI5g31fZ8iVvukgRm2GgWMVdUXwonOfIyXXiLSw6OkdifyobqTxgd+mJbUOdyL9b69en+fgXmzPid8LsQxKmgBmzDnIxqbMb2fMZielBe/hyUEawubnP/xlL1f+/wGMyfqZQ8a2wWq+gDwgIjshnnRmYUJnHe981CeBcwSkUHAJMxL5deYuaumgqh+DVzovOQcgbl/n3UUcgVmju6Xqtqwv8QxVGoBfr0bb5t0Ez4f4/BX6isAVPUz4EwxBkY/Bv4ILBaRgc65SiqsgmodhZhhuGgNvUJE3gRGqOotMcrpgXkAupnklzHBFGOGyN51KYR49MA8xNxDShNo2paC9uzADI8+IiJ5mN6be7h0BeZhkq2qdwcsLywnuM6r84AfAyz3yX8GxrgBEekFnIhRmGHWYSbrw2WFMENnzeUrjJI6HXjBlT7ek68HUOO5Li1tE8XA2SIyUlX96v4ypgeVqapLmlFukLYQvn4X4D/KEIu3gPNFZKiqrgZwLNmOxiiNCFR1AzBPjNVkkxc8Vf0K+IOIXOS33Sd/PfCmiMzAGF4Mwcz3pRDZrnpjDA6C3kNBOFREBquzJMMx7NkdYyjkxxuYOdm9VNXPICkCVa3BGNzcgWn3WbiUfrJgFVQrUNVXcSzNYnAtUCwi9RgLnm2YOZXTgKmq+jlmvuPXIvIW5gaYROw33kQxHdPgF4vIfZhe096Yh/MCp35ewg+z+0XkXswb7m9pOmzxGXCyiJyMeYte7czH+LEYM3cxz8nXcBOqar2I/AZ4SET6YIboqjGK7GfAWapa6VPmZxil8hcRuQljqTQD87D0UgXMdBTTeqc+6cBfXXmewnj3fx9YhbHgamIaHg9VrRORPwF/FmOi/h+MhdcBnqxLgCkiMhvzYD8aY3nVEh7EDBn/S4yXkxUYa7j9VPV6VV0hInMx1oZ/wgxvdcdc2/1U9dIo5cZtC871uxYoEpEijEWbYpT7P1R1WQy5F2CGoJ8XkZsxxjvTMe10HoCjPPrhDO9hes3/A1zvbJ+HefC+iRnCPA5jCHSd3wHFWB++6JyzzzEvoL/BvFR8qqpVYkzhbxaRrRgFfb1TdrPbQwy+B/7pXK/umJ7Oe+7RADeqWubk/avzIvZvzBTOfsBxqnqGiIwE/oyZ312FmYK4DvhQVZNOOQHWiq85HzxWW1HyRFjxOWk/wrwtb8UMEXyCMXvNdLb3wkwib3Y+92C66g3Wc3isdlxlLwCWxZHpVeDxKNv2xyjOzZgH9ZeYm3+gs/1YtxxO2vkYRVqFufF/RFPrvGHAS5gbV4ELnfSIfK78Dzv5bosi51jMw7zCOY8fALfisl7z2edwjAKuwph7X+g9XzQ+8H7slLkT+BCXpZzrGj3gnKdvMSbZEe2BRsutXp59vedGMObeGzAvLEUYq80IKz7My81XTp1fwjxYI6wJ/c6nnxyYuaC7MQ++HRgF/muPTFMwcxg7Hdlew2WZGeUcx20LTr7xwLvOsTdhXkqGxLuvnHb0tHOetgP/BPZ1bR+H6SFucMpegVEY4joXS53rVonpPV8Soz7dnPO0wsm/0TlmjivPcBoX5a91rlPctkD0e9jbPl7F3JNXOOVXYV7MBgUo61znPFcBWzC90GucbbtjLHdXOefqW8wLw+CWPhPb+hPYWazFYrFY2h4xC903qmqQ9Y1dGmvFZ7FYLJakxCooi8VisSQldojPYrFYLEmJ7UFZLBaLJSmxCspisVgsSYlVUBaLxWJJSqyCslgsFktSYhWUxWKxWJISq6AsFovFkpT8f0H1OJHTJZyoAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "'''Choose True or False for the following parameters:\n", - " norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring\n", - " colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring\n", - " non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file\n", - "'''\n", - "norm_graph = True\n", - "colored_graph = True\n", - "non_neutral = True\n", - "\n", - "#Run with no output files\n", - "#Neufit_Pipeline_Main('combined', 'hutchKraken', 'combined_biome', norm_graph, colored_graph, non_neutral, non_save = True)\n", - "\n", - "#Run with all possible output files\n", - "Neufit_Pipeline_Main('combined', 'hutchKraken', 'combined_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" + "# TODO" ] }, { @@ -1316,7 +462,7 @@ "non_neutral = True\n", "\n", "#Run with all possible output files\n", - "Neufit_Pipeline_Main('progressors', 'hutchKraken', 'P_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" + "neufit('progressors', 'hutchKraken', 'P_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" ] }, { @@ -1545,7 +691,7 @@ "non_neutral = True\n", "\n", "#Run with all possible output files\n", - "Neufit_Pipeline_Main('progressorsT1', 'hutchKraken', 'P_T1_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" + "neufit('progressorsT1', 'hutchKraken', 'P_T1_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" ] }, { @@ -1816,7 +962,7 @@ "non_neutral = True\n", "\n", "#Run with all possible output files\n", - "Neufit_Pipeline_Main('progressorsT2', 'hutchKraken', 'P_T2_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" + "neufit('progressorsT2', 'hutchKraken', 'P_T2_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" ] }, { @@ -2003,7 +1149,7 @@ "non_neutral = True\n", "\n", "#Run with all possible output files\n", - "Neufit_Pipeline_Main('nonProgressors', 'hutchKraken', 'NP_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" + "neufit('nonProgressors', 'hutchKraken', 'NP_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" ] }, { @@ -2176,7 +1322,7 @@ "non_neutral = True\n", "\n", "#Run with all possible output files\n", - "Neufit_Pipeline_Main('nonProgressorsT1', 'hutchKraken', 'NP_T1_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" + "neufit('nonProgressorsT1', 'hutchKraken', 'NP_T1_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" ] }, { @@ -2364,7 +1510,7 @@ "non_neutral = True\n", "\n", "#Run with all possible output files\n", - "Neufit_Pipeline_Main('nonProgressorsT2', 'hutchKraken', 'NP_T2_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" + "neufit('nonProgressorsT2', 'hutchKraken', 'NP_T2_biome', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" ] }, { @@ -2639,10 +1785,10 @@ "#Options: S1: normal/cancer; S2:e (esophgous) / hn (head and neck)\n", "\n", "#Run with no output files\n", - "#Neufit_Pipeline_Main('normal', 'TCGA_WGS', 'e', norm_graph, colored_graph, non_neutral, non_save = True)\n", + "#neufit('normal', 'TCGA_WGS', 'e', norm_graph, colored_graph, non_neutral, non_save = True)\n", "\n", "#Run with all possible output files\n", - "Neufit_Pipeline_Main('normal', 'TCGA_WGS', 'e', norm_graph, colored_graph, non_neutral, full_non_neutral = True)\n" + "neufit('normal', 'TCGA_WGS', 'e', norm_graph, colored_graph, non_neutral, full_non_neutral = True)\n" ] }, { @@ -2836,7 +1982,7 @@ "#Options: S1: normal/cancer; S2:e (esophgous) / hn (head and neck)\n", "\n", "#Run with all possible output files\n", - "Neufit_Pipeline_Main('cancer', 'TCGA_WGS', 'e', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" + "neufit('cancer', 'TCGA_WGS', 'e', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" ] }, { @@ -3072,7 +2218,7 @@ "#Options: S1: normal/cancer; S2:e (esophgous) / hn (head and neck)\n", "\n", "#Run with all possible output files\n", - "Neufit_Pipeline_Main('normal', 'TCGA_WGS', 'hn', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" + "neufit('normal', 'TCGA_WGS', 'hn', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" ] }, { @@ -3279,15 +2425,15 @@ "non_neutral = True\n", "#Options: S1: normal/cancer; S2:e (esophgous) / hn (head and neck)\n", "\n", - "Neufit_Pipeline_Main('cancer', 'TCGA_WGS', 'hn', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" + "neufit('cancer', 'TCGA_WGS', 'hn', norm_graph, colored_graph, non_neutral, full_non_neutral = True)" ] } ], "metadata": { "kernelspec": { - "display_name": "neufit", + "display_name": "Python 3", "language": "python", - "name": "neufit" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -3299,7 +2445,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.5" + "version": "3.6.11" } }, "nbformat": 4, diff --git a/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35.txt b/ipynb/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35.txt similarity index 100% rename from neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35.txt rename to ipynb/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35.txt diff --git a/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_FullNonNeutral.csv b/ipynb/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_FullNonNeutral.csv similarity index 100% rename from neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_FullNonNeutral.csv rename to ipynb/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_FullNonNeutral.csv diff --git a/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NeutralFitPlot.png b/ipynb/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NeutralFitPlot.png similarity index 100% rename from neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NeutralFitPlot.png rename to ipynb/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NeutralFitPlot.png diff --git a/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NeutralFitPlot_withNonNeutralColors.png b/ipynb/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NeutralFitPlot_withNonNeutralColors.png similarity index 100% rename from neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NeutralFitPlot_withNonNeutralColors.png rename to ipynb/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NeutralFitPlot_withNonNeutralColors.png diff --git a/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NonNeutral_Outliers.csv b/ipynb/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NonNeutral_Outliers.csv similarity index 100% rename from neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NonNeutral_Outliers.csv rename to ipynb/neufit_output/TCGA_WGS/esophagus_cancer/esophagus_cancer_2021-09-12_21:01:35_NonNeutral_Outliers.csv diff --git a/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39.txt b/ipynb/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39.txt similarity index 100% rename from neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39.txt rename to ipynb/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39.txt diff --git a/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_FullNonNeutral.csv b/ipynb/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_FullNonNeutral.csv similarity index 100% rename from neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_FullNonNeutral.csv rename to ipynb/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_FullNonNeutral.csv diff --git a/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NeutralFitPlot.png b/ipynb/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NeutralFitPlot.png similarity index 100% rename from neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NeutralFitPlot.png rename to ipynb/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NeutralFitPlot.png diff --git a/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NeutralFitPlot_withNonNeutralColors.png b/ipynb/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NeutralFitPlot_withNonNeutralColors.png similarity index 100% rename from neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NeutralFitPlot_withNonNeutralColors.png rename to ipynb/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NeutralFitPlot_withNonNeutralColors.png diff --git a/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NonNeutral_Outliers.csv b/ipynb/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NonNeutral_Outliers.csv similarity index 100% rename from neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NonNeutral_Outliers.csv rename to ipynb/neufit_output/TCGA_WGS/esophagus_normal/esophagus_normal_2021-09-12_20:54:39_NonNeutral_Outliers.csv diff --git a/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10.txt b/ipynb/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10.txt similarity index 100% rename from neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10.txt rename to ipynb/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10.txt diff --git a/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_FullNonNeutral.csv b/ipynb/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_FullNonNeutral.csv similarity index 100% rename from neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_FullNonNeutral.csv rename to ipynb/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_FullNonNeutral.csv diff --git a/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NeutralFitPlot.png b/ipynb/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NeutralFitPlot.png similarity index 100% rename from neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NeutralFitPlot.png rename to ipynb/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NeutralFitPlot.png diff --git a/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NeutralFitPlot_withNonNeutralColors.png b/ipynb/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NeutralFitPlot_withNonNeutralColors.png similarity index 100% rename from neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NeutralFitPlot_withNonNeutralColors.png rename to ipynb/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NeutralFitPlot_withNonNeutralColors.png diff --git a/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NonNeutral_Outliers.csv b/ipynb/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NonNeutral_Outliers.csv similarity index 100% rename from neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NonNeutral_Outliers.csv rename to ipynb/neufit_output/TCGA_WGS/headNeck_cancer/headNeck_cancer_2021-09-12_20:57:10_NonNeutral_Outliers.csv diff --git a/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45.txt b/ipynb/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45.txt similarity index 100% rename from neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45.txt rename to ipynb/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45.txt diff --git a/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_FullNonNeutral.csv b/ipynb/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_FullNonNeutral.csv similarity index 100% rename from neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_FullNonNeutral.csv rename to ipynb/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_FullNonNeutral.csv diff --git a/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NeutralFitPlot.png b/ipynb/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NeutralFitPlot.png similarity index 100% rename from neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NeutralFitPlot.png rename to ipynb/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NeutralFitPlot.png diff --git a/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NeutralFitPlot_withNonNeutralColors.png b/ipynb/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NeutralFitPlot_withNonNeutralColors.png similarity index 100% rename from neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NeutralFitPlot_withNonNeutralColors.png rename to ipynb/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NeutralFitPlot_withNonNeutralColors.png diff --git a/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NonNeutral_Outliers.csv b/ipynb/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NonNeutral_Outliers.csv similarity index 100% rename from neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NonNeutral_Outliers.csv rename to ipynb/neufit_output/TCGA_WGS/headNeck_normal/headNeck_normal_2021-09-12_21:02:45_NonNeutral_Outliers.csv diff --git a/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08.txt b/ipynb/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08.txt similarity index 100% rename from neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08.txt rename to ipynb/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08.txt diff --git a/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_FullNonNeutral.csv b/ipynb/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_FullNonNeutral.csv similarity index 100% rename from neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_FullNonNeutral.csv rename to ipynb/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_FullNonNeutral.csv diff --git a/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_NeutralFitPlot.png b/ipynb/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_NeutralFitPlot.png similarity index 100% rename from neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_NeutralFitPlot.png rename to ipynb/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_NeutralFitPlot.png diff --git a/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_NeutralFitPlot_withNonNeutralColors.png b/ipynb/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_NeutralFitPlot_withNonNeutralColors.png similarity index 100% rename from neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_NeutralFitPlot_withNonNeutralColors.png rename to ipynb/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_NeutralFitPlot_withNonNeutralColors.png diff --git a/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_NonNeutral_Outliers.csv b/ipynb/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_NonNeutral_Outliers.csv similarity index 100% rename from neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_NonNeutral_Outliers.csv rename to ipynb/neufit_output/hutchKraken/combined/combined_2021-09-11_01:05:08_NonNeutral_Outliers.csv diff --git a/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37.txt b/ipynb/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37.txt similarity index 100% rename from neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37.txt rename to ipynb/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37.txt diff --git a/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_FullNonNeutral.csv b/ipynb/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_FullNonNeutral.csv similarity index 100% rename from neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_FullNonNeutral.csv rename to ipynb/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_FullNonNeutral.csv diff --git a/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NeutralFitPlot.png b/ipynb/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NeutralFitPlot.png similarity index 100% rename from neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NeutralFitPlot.png rename to ipynb/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NeutralFitPlot.png diff --git a/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NeutralFitPlot_withNonNeutralColors.png b/ipynb/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NeutralFitPlot_withNonNeutralColors.png similarity index 100% rename from neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NeutralFitPlot_withNonNeutralColors.png rename to ipynb/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NeutralFitPlot_withNonNeutralColors.png diff --git a/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NonNeutral_Outliers.csv b/ipynb/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NonNeutral_Outliers.csv similarity index 100% rename from neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NonNeutral_Outliers.csv rename to ipynb/neufit_output/hutchKraken/nonProgressors/nonProgressors_2021-09-11_16:59:37_NonNeutral_Outliers.csv diff --git a/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16.txt b/ipynb/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16.txt similarity index 100% rename from neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16.txt rename to ipynb/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16.txt diff --git a/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_FullNonNeutral.csv b/ipynb/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_FullNonNeutral.csv similarity index 100% rename from neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_FullNonNeutral.csv rename to ipynb/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_FullNonNeutral.csv diff --git a/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NeutralFitPlot.png b/ipynb/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NeutralFitPlot.png similarity index 100% rename from neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NeutralFitPlot.png rename to ipynb/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NeutralFitPlot.png diff --git a/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NeutralFitPlot_withNonNeutralColors.png b/ipynb/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NeutralFitPlot_withNonNeutralColors.png similarity index 100% rename from neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NeutralFitPlot_withNonNeutralColors.png rename to ipynb/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NeutralFitPlot_withNonNeutralColors.png diff --git a/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NonNeutral_Outliers.csv b/ipynb/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NonNeutral_Outliers.csv similarity index 100% rename from neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NonNeutral_Outliers.csv rename to ipynb/neufit_output/hutchKraken/nonProgressorsT1/nonProgressorsT1_2021-09-11_17:01:16_NonNeutral_Outliers.csv diff --git a/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10.txt b/ipynb/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10.txt similarity index 100% rename from neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10.txt rename to ipynb/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10.txt diff --git a/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_FullNonNeutral.csv b/ipynb/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_FullNonNeutral.csv similarity index 100% rename from neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_FullNonNeutral.csv rename to ipynb/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_FullNonNeutral.csv diff --git a/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NeutralFitPlot.png b/ipynb/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NeutralFitPlot.png similarity index 100% rename from neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NeutralFitPlot.png rename to ipynb/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NeutralFitPlot.png diff --git a/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NeutralFitPlot_withNonNeutralColors.png b/ipynb/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NeutralFitPlot_withNonNeutralColors.png similarity index 100% rename from neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NeutralFitPlot_withNonNeutralColors.png rename to ipynb/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NeutralFitPlot_withNonNeutralColors.png diff --git a/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NonNeutral_Outliers.csv b/ipynb/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NonNeutral_Outliers.csv similarity index 100% rename from neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NonNeutral_Outliers.csv rename to ipynb/neufit_output/hutchKraken/nonProgressorsT2/nonProgressorsT2_2021-09-11_17:02:10_NonNeutral_Outliers.csv diff --git a/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08.txt b/ipynb/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08.txt similarity index 100% rename from neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08.txt rename to ipynb/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08.txt diff --git a/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_FullNonNeutral.csv b/ipynb/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_FullNonNeutral.csv similarity index 100% rename from neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_FullNonNeutral.csv rename to ipynb/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_FullNonNeutral.csv diff --git a/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NeutralFitPlot.png b/ipynb/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NeutralFitPlot.png similarity index 100% rename from neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NeutralFitPlot.png rename to ipynb/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NeutralFitPlot.png diff --git a/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NeutralFitPlot_withNonNeutralColors.png b/ipynb/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NeutralFitPlot_withNonNeutralColors.png similarity index 100% rename from neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NeutralFitPlot_withNonNeutralColors.png rename to ipynb/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NeutralFitPlot_withNonNeutralColors.png diff --git a/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NonNeutral_Outliers.csv b/ipynb/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NonNeutral_Outliers.csv similarity index 100% rename from neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NonNeutral_Outliers.csv rename to ipynb/neufit_output/hutchKraken/progressors/progressors_2021-09-11_16:51:08_NonNeutral_Outliers.csv diff --git a/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12.txt b/ipynb/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12.txt similarity index 100% rename from neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12.txt rename to ipynb/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12.txt diff --git a/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_FullNonNeutral.csv b/ipynb/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_FullNonNeutral.csv similarity index 100% rename from neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_FullNonNeutral.csv rename to ipynb/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_FullNonNeutral.csv diff --git a/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NeutralFitPlot.png b/ipynb/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NeutralFitPlot.png similarity index 100% rename from neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NeutralFitPlot.png rename to ipynb/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NeutralFitPlot.png diff --git a/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NeutralFitPlot_withNonNeutralColors.png b/ipynb/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NeutralFitPlot_withNonNeutralColors.png similarity index 100% rename from neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NeutralFitPlot_withNonNeutralColors.png rename to ipynb/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NeutralFitPlot_withNonNeutralColors.png diff --git a/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NonNeutral_Outliers.csv b/ipynb/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NonNeutral_Outliers.csv similarity index 100% rename from neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NonNeutral_Outliers.csv rename to ipynb/neufit_output/hutchKraken/progressorsT1/progressorsT1_2021-09-11_16:56:12_NonNeutral_Outliers.csv diff --git a/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39.txt b/ipynb/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39.txt similarity index 100% rename from neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39.txt rename to ipynb/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39.txt diff --git a/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_FullNonNeutral.csv b/ipynb/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_FullNonNeutral.csv similarity index 100% rename from neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_FullNonNeutral.csv rename to ipynb/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_FullNonNeutral.csv diff --git a/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NeutralFitPlot.png b/ipynb/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NeutralFitPlot.png similarity index 100% rename from neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NeutralFitPlot.png rename to ipynb/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NeutralFitPlot.png diff --git a/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NeutralFitPlot_withNonNeutralColors.png b/ipynb/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NeutralFitPlot_withNonNeutralColors.png similarity index 100% rename from neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NeutralFitPlot_withNonNeutralColors.png rename to ipynb/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NeutralFitPlot_withNonNeutralColors.png diff --git a/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NonNeutral_Outliers.csv b/ipynb/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NonNeutral_Outliers.csv similarity index 100% rename from neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NonNeutral_Outliers.csv rename to ipynb/neufit_output/hutchKraken/progressorsT2/progressorsT2_2021-09-11_16:57:39_NonNeutral_Outliers.csv diff --git a/neuevo/__init__.py b/neuevo/__init__.py new file mode 100644 index 0000000..3aa0d7b --- /dev/null +++ b/neuevo/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.0" \ No newline at end of file diff --git a/neuevo/neufit.py b/neuevo/neufit.py new file mode 100644 index 0000000..db86a44 --- /dev/null +++ b/neuevo/neufit.py @@ -0,0 +1,235 @@ +# neufit: Fit a neutral community model to species abundances, e.g. from an OTU table +# +# For the theory behind this see Sloan et al, Environ Microbiol 2006 8:732-740. +# To run on the example simulation data: python neufit.py sim_data.csv +# To link with the mock taxonomy use the -t sim_taxonomy.csv option +# +# Copyright (C) 2018 Michael Sieber (sieber.ecoevo.de) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Github: https://github.com/misieber/neufit + +import os +import scipy +import numpy as np +import pandas as pd +from datetime import datetime +from lmfit import Parameters, Model, fit_report +from scipy.stats import beta +from statsmodels.stats.proportion import proportion_confint +from neuevo.neufit_utils import beta_cdf, subsample +from neuevo.neuplot import neufit_plot + +''' +All the code in the function below was written by the orginal Neufit authors, not me and is from the neufit.py file. +neufit.py: https://github.com/misieber/neufit/blob/master/neufit.py + +Here are the following changes I made: +- Turned it into a function instead of using argv as input + -There are specific notes on what I changed in the 'Notes: Neufit Modifyed Args Input/Output Details' section. +-Changed was all syntax from Python2 to Python3. +-Changed all print statments to write to file statements + - Made any necissary edits to allow data to be printed to file instead of terminal +''' + + +def neufit(fnData, fnTaxonomy, output_filename, + dataset_type, custom_filename, + norm_graph, colored_graph, non_neutral, + non_save = False, full_non_neutral = False): + ''' + Inputs: output_filename : the name which will be at the front of all the files; ex. 'combined' + dataset_type : the dataset type we have from here: ('hutchKraken', 'gregTCGA') + custom_filename : depedant on the dataset: + - hutchKraken : the name of the biom file ex.'combined_biome' + + norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring + colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring + non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file + + Default inputs to be changed mainly for testing purposes: + non_save : True/False, False = default, the following will not be SAVED just printed to the screen + - * This is intened for testing only! * + - norm_graph, colored_graph, non_neutral_csv + - **Took away this feature ** [It will still create [name]_data.csv and [name]_taxonomy.csv but will delete them after running] + full_non_neutral: True/False : Creates a csv file from orginal Neufit program with information about what is neutral and how far off the curve each point is + - csv will have: otu_id, mean_abundance, occurrence, Kingdom, Phylum, Class, Order, Family, Genus, Species, predicted_occurrence, lower_conf_int, upper_conf_int + + *The main function for the pipeline which calls all other functions* + + ''' + + #Run Neufit + occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header = main_neufit(output_filename, + dataset_type, + fnData, fnTaxonomy, + full_non_neutral) + + #Neufit Plotting and Non-neutral Outline + if norm_graph == True: #Neutral evolution graph, no color + nc_fn = neufit_plot(occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header) + if non_save == False: + save_plot(nc_fn) + if colored_graph == True:#Neutral evolution graph with colors + cc_fn = custom_color_plot(occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header) + if non_save == False: + save_plot(cc_fn) + if non_neutral == True: + non_neutral_outliers(file_header, occurr_freqs, dataset_type, non_save) + + #Easier to delete the Neufit text file then to not create it + if non_save == True: + neufit_fn= str(file_header) + ".txt" + os.remove(neufit_fn) + + #Optional cleanup step with non_save to remove taxonomy and data files - these just overwrite themseleves so not usally a big issue + ''' + if non_save == True: #Delete [name]_data.csv and [name]_taxonomy.csv to reduce cluter + os.remove(fnData) + os.remove(fnTaxonomy) + ''' + + +def main_neufit(output_filename, dataset_type, _data_filename, _taxonomy_filename, full_non_neutral = False, arg_ignore_level = 0, arg_rarefaction_level = 0): + '''Inputs: output_filename : the name which will be at the front of all the files; ex. 'combined' + dataset_type : the dataset type we have from here: ('hutchKraken', 'gregTCGA') + _data_filename = path of []_data.csv file needed for Neufit to run + _taxonomy_filename = path of []_taxonomy.csv file needed for Neufit to run + arg_ignore_level = 0 ; default set from orginal Neufit program + arg_rarefaction_level = 0; default set from orginal Neufit program + Outputs: file_header = file path for neufit outpus: The path+dataGroup name + date stamp to be used for all parts of Neufit run: ex. /home/cguccion/NeutralEvolutionModeling/neufit_output/hutchKrakenAlex_combined_2021-08-26_13:24:22 + occurr_freqs = pandas df that Neufit created in the orginal program but didn't specifcally output orginally + - Headers of csv: otu_id, mean_abundance, occurrence, Kingdom, Phylum, Class, Order, Family, Genus, Species, predicted_occurrence, lower_conf_int, upper_conf_int + - I used this occur_freqs df in order to figure out which species is the most non-neutral + - I believe this is what Neufit uses to physical plot the dots on the graph as well + n_reads = Number of reads (from orginal program) + n_samples = Number of smaples (from orginal program) + r_square = R^2 value (from orginal program) + beta_fit = stats on the preformance of the model (from orginal program) + *Runs the main section of neufit''' + + ##Added by Caitlin ~ Push output to file instead of printing to screen + + #Grab and format data/time + time = datetime.time(datetime.now()) + date = datetime.date(datetime.now()) + h,s = str(time).split(".") #Split the string into hours/min and seconds + + #Create file_header which holds the path / location for all future Neufit outpus + file_header = str(neufit_output_path) + "/" + str(dataset_type) + '/' + str(output_filename) + '/' + str(output_filename) + '_' + str(date) + "_" + str(h) + + #Creates directory for all Neufit Outputs if it doesn't already exist + dir_name = str(neufit_output_path) + "/" + str(dataset_type) + '/' + str(output_filename) + os.makedirs(dir_name, exist_ok=True) + + #Create and open file for Neufit Output txt file + fn= str(file_header) + ".txt" + file = open(fn, 'w') + + #Print statments with important info + print("Running dataset: " + str(dataset_type) + "Category:" + str(output_filename) + '\n') + ## + + # Writes dataset info to Neufit output file + calculates and writes the number of samples/ reads in the file + file.write('Corresponding csv file: ' + _data_filename + '\n') + abundances = pd.read_table(_data_filename, header=0, index_col=0, sep='\t').astype(int) + abundances = abundances[abundances.sum(1) > arg_ignore_level] + file.write ('Dataset contains ' + str(abundances.shape[1]) + ' samples (sample_id, reads): \n') + ##Caitlin + #The following loop is used instead of 'print abundances.sum(0)' so that it can be written to a file + for index, col in abundances.iteritems(): + col_sum = 0 + for i in col: + col_sum += i + file.write (index + '\t' + str(col_sum) + '\n') + file.write ('\n') + ## + + # Determine uniform read depth + if arg_rarefaction_level == 0 or arg_rarefaction_level > max(abundances.sum(0)): + arg_rarefaction_level = min(abundances.sum(0)) + file.write ('rarefying to highest possible uniform read depth'), + else: + file.write ('rarefying to custom rarefaction level'), + file.write ('(' + str(arg_rarefaction_level) + ' reads per sample) \n') + + # Optionally subsample the abundance table, unless all samples already have the required uniform read depth + if not all(n_reads == arg_rarefaction_level for n_reads in abundances.sum(0)): + abundances = subsample(abundances, arg_rarefaction_level) + abundances = abundances[abundances.sum(1) > 0] + + # Dataset shape + n_otus, n_samples = abundances.shape + n_reads = arg_rarefaction_level + + file.write ('fitting neutral expectation to dataset with ' + str(n_samples) + ' samples and ' + str(n_otus) + ' otus \n \n') + # Calculate mean relative abundances and occurrence frequencies + mean_relative_abundance = (1.0*abundances.sum(1))/n_reads/n_samples + occurrence_frequency = (1.0*np.count_nonzero(abundances, axis=1))/n_samples + + occurr_freqs = pd.DataFrame(mean_relative_abundance, columns=['mean_abundance']) + if dataset_type == 'TCGA_WGS': + occurr_freqs.index.name = 'gOTU' #This changes the name of the first column + else: + occurr_freqs.index.name = 'otu_id' + occurr_freqs['occurrence'] = occurrence_frequency + occurr_freqs = occurr_freqs.sort_values(by=['mean_abundance']) + + # Join with taxonomic information (optional) + if _taxonomy_filename != None: #Changed <> to != + if dataset_type == 'TCGA_WGS': + taxonomy = pd.read_table(_taxonomy_filename, header=0, index_col=1, sep='\t') + else: + taxonomy = pd.read_table(_taxonomy_filename, header=0, index_col=0, sep='\t') + occurr_freqs = occurr_freqs.join(taxonomy) + + # Fit the neutral model + params = Parameters() + params.add('N', value=n_reads, vary=False) + params.add('m', value=0.5, min=0.0, max=1.0) + beta_model = Model(beta_cdf) + beta_fit = beta_model.fit(occurr_freqs['occurrence'], params, p=occurr_freqs['mean_abundance']) + + # Report fit statistics + r_square = 1.0 - np.sum(np.square(occurr_freqs['occurrence'] - beta_fit.best_fit))/np.sum(np.square(occurr_freqs['occurrence'] - np.mean(occurr_freqs['occurrence']))) + file.write (fit_report(beta_fit)) + file.write ('\n R^2 = ' + '{:1.2f}'.format(r_square)) + print(fit_report(beta_fit)) + print('\n R^2 = ' + '{:1.2f}'.format(r_square)) + print('=========================================================') + + # Adding the neutral prediction to results + occurr_freqs['predicted_occurrence'] = beta_fit.best_fit + occurr_freqs['lower_conf_int'], occurr_freqs['upper_conf_int'] = proportion_confint(occurr_freqs['predicted_occurrence']*n_samples, n_samples, alpha=0.05, method='wilson') + + # Save non-neutral otus (here simply determined by lying outside the confidence intervals) + above = occurr_freqs[occurr_freqs['occurrence'] > occurr_freqs['upper_conf_int']] + below = occurr_freqs[occurr_freqs['occurrence'] < occurr_freqs['lower_conf_int']] + + #Create orginal non neutral output file from Neufit + if full_non_neutral == True: + pd.concat((above, below)).to_csv(str(file_header) + '_FullNonNeutral.csv') + + file.close() + + return(occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header) \ No newline at end of file diff --git a/neuevo/neufit_utils.py b/neuevo/neufit_utils.py new file mode 100644 index 0000000..687171d --- /dev/null +++ b/neuevo/neufit_utils.py @@ -0,0 +1,59 @@ +# neufit: Fit a neutral community model to species abundances, e.g. from an OTU table +# +# For the theory behind this see Sloan et al, Environ Microbiol 2006 8:732-740. +# To run on the example simulation data: python neufit.py sim_data.csv +# To link with the mock taxonomy use the -t sim_taxonomy.csv option +# +# Copyright (C) 2018 Michael Sieber (sieber.ecoevo.de) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Github: https://github.com/misieber/neufit + +import os +import numpy as np +from scipy.stats import beta +from datetime import datetime + +def beta_cdf(p, N, m): + # Expected long term distribution under the neutral model (truncated cumulative beta-distribution) + return beta.cdf(1.0, N*m*p, N*m*(1.0-p)) - beta.cdf(1.0/N, N*m*p, N*m*(1.0-p)) + +def subsample(counts, depth): + # Subsamples counts to uniform depth, dropping all samples without enough depth + for sample in counts: + if counts[sample].sum() >= depth: + flattened = np.repeat(np.arange(counts[sample].size), counts[sample]) + subsample = np.random.choice(flattened, depth, replace=False) + counts[sample] = np.bincount(subsample, minlength=counts[sample].size) + else: + #CG: changed the following print statment from Python2 to Python3 + print('dropping sample ' + sample + ' with ' + str(counts[sample].sum()) + ' reads < ' + str(depth)) + counts = counts.drop(sample, axis=1) + return counts + +def non_negative_int(arg): + # Argparser type: non-negative int + nnint = int(arg) + if nnint < 0: + raise ArgumentTypeError(arg + ' < 0, must be non-negative') + return nnint \ No newline at end of file diff --git a/neuevo/neuplot.py b/neuevo/neuplot.py new file mode 100644 index 0000000..daaba31 --- /dev/null +++ b/neuevo/neuplot.py @@ -0,0 +1,90 @@ +# neufit: Fit a neutral community model to species abundances, e.g. from an OTU table +# +# For the theory behind this see Sloan et al, Environ Microbiol 2006 8:732-740. +# To run on the example simulation data: python neufit.py sim_data.csv +# To link with the mock taxonomy use the -t sim_taxonomy.csv option +# +# Copyright (C) 2018 Michael Sieber (sieber.ecoevo.de) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Github: https://github.com/misieber/neufit + +''' +All the code in the function below was written by the orginal Neufit authors, not me and is from the neufit.py file. +neufit.py: https://github.com/misieber/neufit/blob/master/neufit.py + +Here are the following changes I made: +- Turned it into a function +- Added a plot clearing option to avoid double keys +''' +import os +import numpy as np +from scipy.stats import beta +from datetime import datetime +from statsmodels.stats.proportion import proportion_confint +from matplotlib import pyplot +from math import log10 + +def neufit_plot(occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header): + '''Inputs: occurr_freqs = pandas df that Neufit created in the orginal program but didn't specifcally output orginally + - Headers of csv: otu_id, mean_abundance, occurrence, Kingdom, Phylum, Class, Order, Family, Genus, Species, predicted_occurrence, lower_conf_int, upper_conf_int + - I used this occur_freqs df in order to figure out which species is the most non-neutral + - I believe this is what Neufit uses to physical plot the dots on the graph as well + n_reads = Number of reads (from orginal program) + n_samples = Number of smaples (from orginal program) + r_square = R^2 value (from orginal program) + beta_fit = stats on the preformance of the model (from orginal program) + file_header = file path for all neufit outpus: The path+dataGroup name + date stamp to be used for all parts of Neufit run: ex. /home/cguccion/NeutralEvolutionModeling/neufit_output/hutchKrakenAlex_combined_2021-08-26_13:24:22 + Outputs: ! The orginal plot that Neufit outputs (just black dots) - will print to the screen but not save when run + fn = default filename for neufit plot : customName_NeutralFitPlot.png -- Will often not need this + *Creates the plot for Neufit ''' + + + pyplot.cla() #Clears previous plot - to avoid double keys + + # Prepare results plot + pyplot.xlabel('Mean relative abundance across samples', fontsize=15) + pyplot.xscale('log') + x_range = np.logspace(log10(min(occurr_freqs['mean_abundance'])/10), 0, 1000) + pyplot.xlim(min(x_range), max(x_range)) + pyplot.xticks(fontsize=16) + pyplot.ylabel('Occurrence frequency in samples', fontsize=15) + pyplot.ylim(-0.05, 1.05) + pyplot.yticks(fontsize=16) + + # Plot data points + pyplot.plot(occurr_freqs['mean_abundance'], occurr_freqs['occurrence'], 'o', markersize=6, fillstyle='full', color='black') + + # Plot best fit + pyplot.plot(x_range, beta_cdf(x_range, n_reads, beta_fit.best_values['m']), '-', lw=5, color='darkred') + lower, upper = proportion_confint(beta_cdf(x_range, n_reads, beta_fit.best_values['m'])*n_samples, n_samples, alpha=0.05, method='wilson') + pyplot.plot(x_range, lower, '--', lw=2, color='darkred') + pyplot.plot(x_range, upper, '--', lw=2, color='darkred') + pyplot.fill_between(x_range, lower, upper, color='lightgrey') + + pyplot.text(0.05, 0.9, '$R^2 = ' + '{:1.2f}'.format(r_square) + '$', fontsize=16, transform=pyplot.gca().transAxes) + pyplot.tight_layout() + + fn = file_header + '_NeutralFitPlot.png' + + return(fn) \ No newline at end of file diff --git a/neuevo/plotting_helper.py b/neuevo/plotting_helper.py new file mode 100644 index 0000000..81c72f6 --- /dev/null +++ b/neuevo/plotting_helper.py @@ -0,0 +1,78 @@ +# Caitlin Guccione, 08-25-2021 + +from matplotlib import pyplot +from neuevo.neuplot import neufit_plot + +def custom_color_plot(occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header): + '''Inputs: file_header = file path for neufit outpus: The path+dataGroup name + date stamp to be used for all parts of Neufit run: ex. /home/cguccion/NeutralEvolutionModeling/neufit_output/hutchKrakenAlex_combined_2021-08-26_13:24:22 + occurr_freqs = pandas df that Neufit created in the orginal program but didn't specifcally output orginally + - Headers of csv: otu_id, mean_abundance, occurrence, Kingdom, Phylum, Class, Order, Family, Genus, Species, predicted_occurrence, lower_conf_int, upper_conf_int + - I used this occur_freqs df in order to figure out which species is the most non-neutral + - I believe this is what Neufit uses to physical plot the dots on the graph as well + n_reads = Number of reads (from orginal program) + n_samples = Number of smaples (from orginal program) + r_square = R^2 value (from orginal program) + beta_fit = stats on the preformance of the model (from orginal program) + Outputs: ! The plot that Neufit outputs + the colored dots - will print to the screen but not save when run + fn = default filename for neufit plot : _NeutralFitPlot_withNonNeutralColors.png + *Creates the plot for Neufit + with colored dots ''' + + #Create orginal black plot + neufit_plot(occurr_freqs, n_reads, n_samples, r_square, beta_fit, file_header) + + #If you want to add a new custom color, follow the steps below on what to add where (more info above as well) + + # --------- STEP 1 --------- + # Create mean abundance and occurance list for bacteria + + #Phylori + MA_S_pylori = [] #Mean_Abundance, Species , phylori + O_S_pylori = [] #Occurrance + + #Proteobacteria + MA_P_Proteobacteria = [] + O_P_Proteobacteria = [] + + #Streptococcus + MA_G_Streptococcus = [] + O_G_Streptococcus = [] + + #Bacteroidetes + MA_P_Bacteroidetes = [] + O_P_Bacteroidetes = [] + + # --------- STEP 2 --------- + # Add to customized lists when hitting bacteria in neutral list + + #for i, j in neutral.iterrows(): + for i,j in occurr_freqs.iterrows(): + if j['Species'] == 's__pylori': + MA_S_pylori.append(j['mean_abundance']) + O_S_pylori.append(j['occurrence']) + if j['Phylum'] == 'p__Proteobacteria': + MA_P_Proteobacteria.append(j['mean_abundance']) + O_P_Proteobacteria.append(j['occurrence']) + if j['Genus'] == 'g__Streptococcus': + MA_G_Streptococcus.append(j['mean_abundance']) + O_G_Streptococcus.append(j['occurrence']) + if j['Phylum'] == 'p__Bacteroidetes': + MA_P_Bacteroidetes.append(j['mean_abundance']) + O_P_Bacteroidetes.append(j['occurrence']) + + # --------- STEP 3 --------- + # Override coloring of dots in current plot with new colors + + pyplot.plot(MA_P_Proteobacteria, O_P_Proteobacteria, 'o', markersize=6, fillstyle='full', color='green', label="Proteobacteria") + pyplot.plot(MA_G_Streptococcus, O_G_Streptococcus, 'o', markersize=6, fillstyle='full', color='orange', label="Streptococcus") + pyplot.plot(MA_P_Bacteroidetes, O_P_Bacteroidetes, 'o', markersize=6, fillstyle='full', color='purple', label="Bacteroidetes") + pyplot.plot(MA_S_pylori, O_S_pylori, 'o', markersize=6, fillstyle='full', color='blue', label='H.pylori') + + + plot_fn = file_header + '_NeutralFitPlot_withNonNeutralColors.png' + + return(plot_fn) + +def save_plot(fn): + ##Just saves the plot given a filename + pyplot.legend(loc="center left") + pyplot.savefig(fn) \ No newline at end of file diff --git a/neuevo/scripts/__init__.py b/neuevo/scripts/__init__.py new file mode 100644 index 0000000..b938f5b --- /dev/null +++ b/neuevo/scripts/__init__.py @@ -0,0 +1,20 @@ +import click +from neuevo import __version__ +from importlib import import_module + + +def _terribly_handle_brokenpipeerror(): + # based off http://stackoverflow.com/a/34299346 + import os + import sys + sys.stdout = os.fdopen(1, 'w') + + +@click.group() +@click.version_option(version=__version__) +@click.pass_context +def cli(ctx): + ctx.call_on_close(_terribly_handle_brokenpipeerror) + + +import_module('neuevo.scripts._neufit') \ No newline at end of file diff --git a/neuevo/scripts/_neufit.py b/neuevo/scripts/_neufit.py new file mode 100644 index 0000000..163245f --- /dev/null +++ b/neuevo/scripts/_neufit.py @@ -0,0 +1,88 @@ +import os +import click +from .__init__ import cli +from neuevo.utils import (biom_addmetatax_custom_tcga_ehn, + non_neutral_outliers) +from neuevo.neufit import neufit + + +@cli.command(name='neufit') +@click.option( + '--fnData', + required=True, + help='TODO') +@click.option( + '--fnTaxonomy', + required=True, + help='TODO') +@click.option( + '--output-filename', + required=True, + help='TODO') +@click.option( + '--dataset-type', + required=True, + help='TODO') +@click.option( + '--custom-filename', + required=True, + help='TODO') +@click.option( + '--norm-graph', + required=True, + help='TODO') +@click.option( + '--colored-graph', + required=True, + help='TODO') +@click.option( + '--non-neutral', + required=True, + help='TODO') +@click.option( + '--non-save', + required=False, + default=False, + help='TODO') +@click.option( + '--full-non-neutral', + required=False, + default=False, + help='TODO') +def standalone_neufit(fnData : str, + fnTaxonomy : str, + output_filename : str, + dataset_type: str, + custom_filename : str, + norm_graph : bool, + colored_graph : bool, + non_neutral : bool, + non_save : bool = False, + full_non_neutral : bool = False): + ''' + Inputs: output_filename : the name which will be at the front of all the files; ex. 'combined' + dataset_type : the dataset type we have from here: ('hutchKraken', 'gregTCGA') + custom_filename : depedant on the dataset: + - hutchKraken : the name of the biom file ex.'combined_biome' + + norm_graph : True/False : Prints and saves the neutral evolution graph without any coloring + colored_graph : True/False : Prints and saves the neutral evolution graph without any coloring + non_neutral : True/False : Prints and saves the most non-neutral microbes in csv file + + Default inputs to be changed mainly for testing purposes: + non_save : True/False, False = default, the following will not be SAVED just printed to the screen + - * This is intened for testing only! * + - norm_graph, colored_graph, non_neutral_csv + - **Took away this feature ** [It will still create [name]_data.csv and [name]_taxonomy.csv but will delete them after running] + full_non_neutral: True/False : Creates a csv file from orginal Neufit program with information about what is neutral and how far off the curve each point is + - csv will have: otu_id, mean_abundance, occurrence, Kingdom, Phylum, Class, Order, Family, Genus, Species, predicted_occurrence, lower_conf_int, upper_conf_int + + *The main function for the pipeline which calls all other functions* + + ''' + # run within wrapper + neufit(fnData, fnTaxonomy, output_filename, + dataset_type, custom_filename, + norm_graph, colored_graph, non_neutral, + non_save = non_save, + full_non_neutral = full_non_neutral) \ No newline at end of file diff --git a/neuevo/utils.py b/neuevo/utils.py new file mode 100644 index 0000000..e9b3829 --- /dev/null +++ b/neuevo/utils.py @@ -0,0 +1,149 @@ +# Caitlin Guccione, 08-25-2021 + +import pandas as pd +from biom import load_table +import os +import numpy as np + +def non_neutral_outliers(file_header, occurr_freqs, dataset_type, non_save, threshold = 0.5): + '''Inputs: file_header = file path for neufit outpus: The path+dataGroup name + date stamp to be used for all parts of Neufit run: ex. /home/cguccion/NeutralEvolutionModeling/neufit_output/hutchKrakenAlex_combined_2021-08-26_13:24:22 + occurr_freqs = pandas df that Neufit created in the orginal program but didn't specifcally output orginally + - Headers of csv: otu_id, mean_abundance, occurrence, Kingdom, Phylum, Class, Order, Family, Genus, Species, predicted_occurrence, lower_conf_int, upper_conf_int + - I used this occur_freqs df in order to figure out which species is the most non-neutral + - I believe this is what Neufit uses to physical plot the dots on the graph as well + threshold = 0.5; autoset to 0.5, but determines what bacteria are considered non-neutral + Outputs: ! [name]_NonNeutralOutliers.csv + *Creates the most Non-neutral csv file ''' + + #Create dataframe + standoutMicrobes = pd.DataFrame(columns = ('Difference off Neutral Model', 'Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species')) + + #Loop and find most non-neutral microbes based upon threshold + row_count = 0 + for i,j in occurr_freqs.iterrows(): + diff = abs(j['occurrence'] - j['predicted_occurrence']) + if diff > threshold: + if dataset_type == 'TCGA_WGS': + standoutMicrobes.loc[row_count] = [diff, j['Domain'], j['Phylum'], j['Class'], j['Order'], j['Family'], j['Genus'], j['Species']] + else: + standoutMicrobes.loc[row_count] = [diff, j['Kingdom'], j['Phylum'], j['Class'], j['Order'], j['Family'], j['Genus'], j['Species']] + row_count +=1 + standoutMicrobes = standoutMicrobes.sort_values(by =['Difference off Neutral Model'], ascending=False) + + #Display and export non-neutral microbes as csv + print("\nTop NonNeutral Microbes") + display(standoutMicrobes) + print('=========================================================\n') + + fn = file_header + '_NonNeutral_Outliers.csv' + + if non_save == False: + standoutMicrobes.to_csv(fn, sep = '\t') + +def biom2data_tax(datasetName, biomFilename, finalFilename): + '''Inputs: datasetName = filename of dataset ex. hutchKrakenAlex_biom -- found in set paths above + biomFilename = filename of biom file found in datasetName path above + finalFilename = filename to be used in _data.csv file for neuFit + Outputs: fnD = File location of [name]_data.csv + ![name]_data.csv = Creates a data.csv file to be input into Neufit located at 'neufit_input' + fnT = File location of [name]_taxonomy.csv + ![name]_taxonomy.csv = Creates a taxonomy.csv file to be input into Neufit located at 'neufit_input' + * Imports biom file -> pandas dataframe -> data.csv, taxonomy.csv as desired by neufit''' + + #Make filename and import data + fullFilename = datasetName + '/' + biomFilename + featureTable = load_table(fullFilename) #https://biom-format.org/documentation/generated/biom.load_table.html + + + ''' + #Create file_header which holds the path / location for all _data and _csv outputs for this data + ###file_header = str(neufit_output_path) + "/" + str(dataset_type) + '/' + str(output_filename) + '/' + str(output_filename) + '_' + str(date) + "_" + str(h) + file_header = str(neufit_output_path) + "/" + str(datasetName) + '/' + str(finalFilename) + '/' + str(finalFilename) + '_' + str(date) + "_" + str(h) + + #Creates directory for all Neufit Outputs if it doesn't already exist + ###dir_name = str(neufit_output_path) + "/" + str(dataset_type) + '/' + str(output_filename) + dir_name = str(neufit_output_path) + "/" + str(datasetName) + '/' + str(finalFilename) + os.makedirs(dir_name, exist_ok=True) + ''' + + #Create _data.csv + pandas_featureTable = pd.DataFrame(featureTable.matrix_data.toarray(), featureTable.ids('observation'), featureTable.ids()) + fnD = neufit_input_path + '/' + finalFilename + '_data.csv' + pandas_featureTable.to_csv(fnD, sep='\t') + + #Create _taxonomy.csv + pandas_TaxTable = pd.DataFrame(featureTable.metadata_to_dataframe('observation')) + pandas_TaxTable.set_axis(['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species'], axis=1) + fnT = neufit_input_path + '/' + finalFilename + '_taxonomy.csv' + pandas_TaxTable.to_csv(fnT, sep='\t') + + ''' + #Create Neufit string to run progam in terminal + #Can now run Neufit entirely from this notebook so don't need this command + py = 'python2 /Users/cguccione/Documents/Classes/Kit_Lab/Tools/neufit/neufit.py ' + fnD = finalFilename + '_data.csv' + fnT = finalFilename + '_taxonomy.csv' + neufit_command = py + fnD + ' -t ' + fnT + + return(neufit_command) + Outputs: ![name]_data.csv = Creates a data.csv file to be input into Neufit located at 'neufit_input' + ![name]_taxonomy.csv = Creates a taxonomy.csv file to be input into Neufit located at 'neufit_input' + ''' + + return(fnD, fnT) + + +def biom_addmetatax_custom_tcga_ehn(datasetName, biomFilename, finalFilename): + '''Inputs: datasetName = filename of dataset ex. hutchKrakenAlex_biom -- found in set paths above + biomFilename = filename of biom file found in datasetName path above + finalFilename = filename to be used in _data.csv file for neuFit -- in this case will be normal or not + Outputs: fnD_hn = File location of head_neck_[name]_data.csv + fnD_e = File location of esophgous_[name]_data.csv + !head_neck_[name]_data.csv = Creates a data.csv file to be input into Neufit located at 'neufit_input' + !esophgous_[name]_data.csv = Creates a data.csv file to be input into Neufit located at 'neufit_input' + fnT = File location of [name]_taxonomy.csv + ![name]_taxonomy.csv = Creates a taxonomy.csv file to be input into Neufit located at 'neufit_input' + * Imports biom file -> pandas dataframe -> splits apart hn from esoph -> 2 data.csv, 1 taxonomy.csv as desired by neufit + * This custom because we need to seperate esophgous from head and neck patients from specific dataset''' + + #Make filename and import all data into pandas df + fullFilename = datasetName + '/' + biomFilename + featureTable = load_table(fullFilename) #Loads biom file: https://biom-format.org/documentation/generated/biom.load_table.html + pandas_featureTable = pd.DataFrame(featureTable.matrix_data.toarray(), featureTable.ids('observation'), featureTable.ids()) #Turn biom file into pandas df + meta = pd.read_csv(tcgaEhnWGSgreg_meta, sep = '\t') #Import the metadata into a pandas df + pandas_TaxTable = pd.read_csv(tcgaEhnWGSgreg_taxa) #Import the taxonomy into a pandas df + + #Custom : Seperate the Head and Neck Samples from Esophgous Samples in the biom file + ##In case Kit asks for it: biomDF has both hn and esoph samples combined + hn_df = pd.DataFrame() #Biom file with just the head and neck samples + e_df = pd.DataFrame() #Biom file with just the esophagus samples + + for sample in pandas_featureTable.columns.values: #Loop through all the samples in the biom file + + #Use the meta data to determine what cancer type the sample correlates to + cancerType = meta[meta['sample_name'] == sample].reset_index().loc[0, 'primary_site'] #Finds the sample name which matches our i, then finds the corresponding primary site in that row + + #Sort each cancer type into approriate dataframes + if cancerType == 'Head and Neck': + hn_df[sample] = pandas_featureTable.loc[:, sample] + elif cancerType == 'Esophagus': + e_df[sample] = pandas_featureTable.loc[:, sample] + else:#This should never print - just a saftey check + print("something is off in the dataset -- not just Head and Neck and Esophagus") + + #Create _data.csv and _taxonomy.csv files for head & neck and esophgous + + #Head and Neck + fnD_hn = neufit_input_path + '/' + 'head_neck_' + finalFilename + '_data.csv' + fnT_hn = neufit_input_path + '/' + 'head_neck_' + finalFilename + '_taxonomy.csv' + pandas_TaxTable.to_csv(fnT_hn, sep='\t') + pandas_featureTable.to_csv(fnD_hn, sep='\t') + + #Esophgous + fnD_e = neufit_input_path + '/' + 'esophagus_' + finalFilename + '_data.csv' + fnT_e = neufit_input_path + '/' + 'esophagus_' + finalFilename + '_taxonomy.csv' + pandas_TaxTable.to_csv(fnT_e, sep='\t') + pandas_featureTable.to_csv(fnD_e, sep='\t') + + + return(fnD_hn, fnD_e, fnT_hn, fnT_e) \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..dc58a07 --- /dev/null +++ b/setup.py @@ -0,0 +1,107 @@ +from setuptools.command.egg_info import egg_info +from setuptools.command.develop import develop +from setuptools.command.install import install +import re +import ast +import os +from setuptools import find_packages, setup + +# Dealing with Cython +USE_CYTHON = os.environ.get('USE_CYTHON', False) +ext = '.pyx' if USE_CYTHON else '.c' + +# bootstrap numpy intall +# https://stackoverflow.com/questions/51546255/ +# python-package-setup-setup-py-with-customisation +# -to-handle-wrapped-fortran + + +def custom_command(): + import sys + if sys.platform in ['darwin', 'linux']: + os.system('pip install numpy') + + +class CustomInstallCommand(install): + def run(self): + install.run(self) + custom_command() + + +class CustomDevelopCommand(develop): + def run(self): + develop.run(self) + custom_command() + + +class CustomEggInfoCommand(egg_info): + def run(self): + egg_info.run(self) + custom_command() + + +extensions = [ +] + +if USE_CYTHON: + from Cython.Build import cythonize + extensions = cythonize(extensions) + +classes = """ + Development Status :: 3 - Alpha + License :: OSI Approved :: BSD License + Topic :: Software Development :: Libraries + Topic :: Scientific/Engineering + Topic :: Scientific/Engineering :: Bio-Informatics + Programming Language :: Python :: 3 + Programming Language :: Python :: 3 :: Only + Operating System :: Unix + Operating System :: POSIX + Operating System :: MacOS :: MacOS X +""" +classifiers = [s.strip() for s in classes.split('\n') if s] + +description = ('TODO') + +with open('README.md') as f: + long_description = f.read() + +# version parsing from __init__ pulled from Flask's setup.py +# https://github.com/mitsuhiko/flask/blob/master/setup.py +_version_re = re.compile(r'__version__\s+=\s+(.*)') + +with open('neuevo/__init__.py', 'rb') as f: + hit = _version_re.search(f.read().decode('utf-8')).group(1) + version = str(ast.literal_eval(hit)) + +standalone = ['neuevo=neuevo.scripts.__init__:cli'] + +setup(name='neuevo', + version=version, + license='TODO', + description=description, + long_description=long_description, + long_description_content_type='text/markdown', + author="neuevo development team", + author_email="cguccion@ucsd.edu", + maintainer="neuevo development team", + maintainer_email="cguccion@ucsd.edu", + packages=find_packages(), + ext_modules=extensions, + install_requires=[ + 'numpy >= 1.12.1', + 'click', + 'lmfit', + 'pandas >= 0.10.0', + 'scipy >= 0.19.1', + 'nose >= 1.3.7', + 'scikit-learn >= 0.18.1', + 'scikit-bio > 0.5.3', + 'biom-format', + 'h5py', ], + classifiers=classifiers, + entry_points={'console_scripts': standalone}, + cmdclass={'install': CustomInstallCommand, + 'develop': CustomDevelopCommand, + 'egg_info': CustomEggInfoCommand, }, + zip_safe=False) \ No newline at end of file