Merge branch 'hotfix/1.1.1'

cancerit · May 8, 2018 · 28d8eaf · 28d8eaf
2 parents d51b9c1 + a73c5cc
commit 28d8eaf
Show file tree

Hide file tree

Showing 12 changed files with 60 additions and 26 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,4 +1,9 @@
 # CHANGES
+## 1.1.1
+ * Added segmentation module in setup.py
+ * added requirements.txt file
+ * Corrected broken links in README
+
 ## 1.1.0
  * Additional output formats for normalised and corrected counts
  * segmentation is now optional

diff --git a/README.md b/README.md
@@ -3,16 +3,16 @@
 | --------------------------------------------------- | ----------------------------------------------------- |
 | [![Master Badge][travis-master-badge]][travis-repo] | [![Develop Badge][travis-develop-badge]][travis-repo] |
 
-This is python implementation of Francesco's [CRISPRcleanR] R package for unsupervised identification and
+This is python implementation [CRISPRcleanR] package for unsupervised identification and
 correction of gene independent cell responses to CRISPR-cas9 targeting 
 
 <!-- TOC depthFrom:2 depthTo:6 withLinks:1 updateOnSave:1 orderedList:0 -->
 
 - [Design](#design)
 - [Tools](#tools)
 	- [pyCRISPRCleanR](#pycrisprcleanr)
-	- [inputFormat] (#inputformat)
-	- [outputFormat] (#outputformat)
+	- [inputFormat](#inputformat)
+	- [outputFormat](#outputformat)
 - [INSTALL](#install)
 	- [Package Dependencies](#package-dependencies)
   - [R packages](#r-packages)
@@ -40,33 +40,41 @@ inverse transformed corrected treatment counts
 Various exceptions can occur for malformed input files.
 
 ### inputFormat
+
  * ```gRNA Counts``` file: tab separated file containing following fields
  * sgRNA gene <control_count 1...N> <sample_count 1..N>
  * ```sgRNA library``` file format
  * sgRNA gene chr start end
+
 ### outputFormat
 
   following tab separated output files were produced
 
- 1. crispr_cleanr_normalised_counts.tsv
+ 1. normalised_counts.tsv
  * sgRNA: guideRNA
  * gene: gene name as defined in the library file
  * <control sample count:normalised 1..N> : Normalised count
  * <treatment sample count: normalised 1..N> : Normalised count
 
- 2. crispr_cleanr_fold_changes.tsv
+ 2. normalised_fold_changes.tsv
  * sgRNA: guideRNA
  * gene: gene name as defined in the library file
  * <treatment sample fold chages: fold changes 1..N>
  * avgFC: average fold change values
 
- 3. crispr_cleanr_corrected_counts.tsv [ generated only when ```--segmentation``` option is selected ]
+ 3. crispr_cleanr_corrected_counts.tsv [ generated only when ```--crispr_cleanr``` flag is set ]
  * sgRNA: guideRNA
  * gene: gene name as defined in the library file
  * <control sample count:corrected 1..N> : corrected count
  * <treatment sample count:corrected 1..N >: corrected count
 
- 4. crispr_cleanr_alldata.tsv [ generated only when ```--segmentation``` option is selected ]
+ 4. crispr_cleanr_fold_changes.tsv [ generated only when ```--crispr_cleanr```  flag is set ]
+ * sgRNA: guideRNA
+ * gene: gene name as defined in the library file
+ * <treatment sample fold chages: fold changes 1..N>
+ * avgFC: average fold change values
+
+ 5. alldata.tsv [ generated only when ```--crispr_cleanr``` option is selected ]
  * sgRNA: guideRNA
  * <control sample count: raw 1..N> : raw count
  * <treatment sample count: raw 1..N> : raw count
@@ -82,6 +90,8 @@ Various exceptions can occur for malformed input files.
  * correctedFC: corrected foldchange values
  * <control sample count:corrected 1..N> : corrected count (postfixed _cc)
  * <treatment sample count:corrected 1..N >: corrected count (postfixed _cc)
+ * <treatment sample fold chages: fold changes 1..N> (postfixed _cf)
+ * avgFC_cf: average fold change values based on corrected counts
 
 ## INSTALL
 Installing via `pip install`. Simply execute with the path to the compiled 'whl' found on the [release page][pyCRISPRCleanR-releases]:

diff --git a/pyCRISPRcleanR.egg-info/PKG-INFO b/pyCRISPRcleanR.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.2
 Name: pyCRISPRcleanR
-Version: 1.1.0
+Version: 1.1.1
 Summary: This is python implementation of CRISPRcleanR package for unsupervised identification and correction of gene independent cell responses to CRISPR-cas9 targeting
 Home-page: https://github.com/CancerIT/pyCRISPRcleanR
 Author: Shriram Bhosle

diff --git a/pyCRISPRcleanR.egg-info/SOURCES.txt b/pyCRISPRcleanR.egg-info/SOURCES.txt
@@ -12,4 +12,6 @@ pyCRISPRcleanR.egg-info/dependency_links.txt
 pyCRISPRcleanR.egg-info/entry_points.txt
 pyCRISPRcleanR.egg-info/requires.txt
 pyCRISPRcleanR.egg-info/top_level.txt
-pyCRISPRcleanR/config/logging.conf
+pyCRISPRcleanR/config/logging.conf
+pyCRISPRcleanR/segmentation/__init__.py
+pyCRISPRcleanR/segmentation/cbs.py
diff --git a/pyCRISPRcleanR.egg-info/requires.txt b/pyCRISPRcleanR.egg-info/requires.txt
@@ -2,3 +2,4 @@ rpy2
 pandas
 numpy
 plotly
+tzlocal
diff --git a/pyCRISPRcleanR/abstractCrispr.py b/pyCRISPRcleanR/abstractCrispr.py
@@ -17,7 +17,7 @@ def __init__(self, **kwargs):
         self.ncontrols = kwargs.get('ncontrols', 1)
         self.sample = kwargs.get('sample', 'mySample')
         self.ignored_genes = kwargs.get('ignored_genes', [])
-        self.runcrispr = kwargs.get('correct_counts', None)
+        self.runcrispr = kwargs.get('crispr_cleanr', None)
         self.num_processors = kwargs.get('num_processors', 1)
         self.plot_data = kwargs.get('plot_data', None)
         super().__init__()

diff --git a/pyCRISPRcleanR/crisprCleanR_command.py b/pyCRISPRcleanR/crisprCleanR_command.py
@@ -52,11 +52,11 @@ def main():  # pragma: no cover
     optional.add_argument("-np", "--num_processors", type=int, dest="num_processors", required=False,
                           default=1, help="Number of processors to use for parallel jobs")
 
-    optional.add_argument("-cc", "--correct_counts", type=int, dest="correct_counts", required=False,
-                          default=None, help="Correct counts using CBS [Y/y]")
+    optional.add_argument("-cc", "--crispr_cleanr", action='store_true', dest="crispr_cleanr",
+                          help="flag to run CRISPRcleanR")
 
-    optional.add_argument("-pl", "--plot_data", type=str, dest="plot_data", required=False,
-                          default=None, help="Generate pdf and interactive plotly images [y or 1]")
+    optional.add_argument("-pl", "--plot_data", action='store_true', dest="plot_data",
+                          help="Generate pdf and interactive plotly images")
 
     optional.add_argument("-o", "--outdir", type=str, dest="outdir",
                           default='./', help="path to output folder ")

diff --git a/pyCRISPRcleanR/formatInput.py b/pyCRISPRcleanR/formatInput.py
@@ -78,7 +78,7 @@ def run_analysis(self):
                 all_data = SM.process_segments(cbs_dict, ignored_genes, min_target_genes, controls, num_rep,
                                                outdir=outdir)
                 log.info("Processed CBS segments  .....")
-                SM._print_df(all_data, outdir + "/crispr_cleanr_alldata.tsv")
+                SM._print_df(all_data, outdir + "/alldata.tsv")
                 if self.plot_data:
                     cbs_dict_norm = SM.run_cbs(all_data, cpus, sample, fc_col="correctedFC")
                     log.info("CBS analysis on normalised fold changes completed.....")

diff --git a/pyCRISPRcleanR/staticMethods.py b/pyCRISPRcleanR/staticMethods.py
@@ -115,12 +115,12 @@ def get_norm_count_n_fold_changes(cldf, controls, plot_flag=None, outdir='./'):
 
         cldf = cldf.join(normed, rsuffix='_nc')
         normed.insert(0, 'gene', cldf['gene'])
-        StaticMthods._print_df(normed, outdir + "/crispr_cleanr_normalised_counts.tsv")
+        StaticMthods._print_df(normed, outdir + "/normalised_counts.tsv")
 
         cldf['avgFC'] = fc.mean(axis=1)
         fc['avgFC'] = cldf['avgFC']
         fc.insert(0, 'gene', cldf['gene'])
-        StaticMthods._print_df(fc, outdir + "/crispr_cleanr_fold_changes.tsv")
+        StaticMthods._print_df(fc, outdir + "/normalised_fold_changes.tsv")
 
         cldf['BP'] = round(cldf['start'] + (cldf['end'] - cldf['start']) / 2).astype(int)
         cldf.sort_values(by=['chr', 'start'], ascending=True, inplace=True)
@@ -150,8 +150,8 @@ def process_segments(cbs_dict, ignored_genes, min_genes, controls, no_rep, outdi
             cnarr['correction'] = 0
             cnarr['correctedFC'] = cnarr.avgFC
             n_genes_in_seg = 0
-            reverted_counts = cnarr.iloc[:, cnarr.columns.get_loc('end') +
-                                        controls + 1:cnarr.columns.get_loc('avgFC')]
+            reverted_counts = cnarr.iloc[:, cnarr.columns.get_loc('end') + controls + 1:
+                                        cnarr.columns.get_loc('avgFC')]
 
             for segment in segrows.itertuples():
                 idxs = list(range(segment.startRow - 1, segment.endRow))
@@ -171,10 +171,21 @@ def process_segments(cbs_dict, ignored_genes, min_genes, controls, no_rep, outdi
         alldata = pd.concat(chrdata_list)
         # get control counts to and join with corrected_counts for printing
         nc_control_count = alldata.iloc[:, alldata.columns.get_loc('end') + 1:
-                                        alldata.columns.get_loc('end') + controls + 1]
+                                    alldata.columns.get_loc('end') + controls + 1]
         corrected_count = nc_control_count.join(corrected_count)
         corrected_count = corrected_count.rename(columns=lambda x: str(x)[:-3])
+        # calculate corrected fold changes
+        corrected_fc = corrected_count.apply(
+            lambda x: np.log2((x + 0.5) / (corrected_count.iloc[:, 0:controls].mean(axis=1) + 0.5)))
+        corrected_fc.drop(corrected_fc.columns[0:controls], axis=1, inplace=True)
+        corrected_fc['avgFC'] = corrected_fc.mean(axis=1)
         alldata = alldata.join(corrected_count, rsuffix='_cc')
+        alldata = alldata.join(corrected_fc, rsuffix='_cf')
+
+        # add gene names before writing to a file
+        corrected_fc.insert(0, 'gene', alldata['gene'])
+        StaticMthods._print_df(corrected_fc, outdir + "/crispr_cleanr_fold_changes.tsv")
+        # add gene names before writing to a file
         corrected_count.insert(0, 'gene', alldata['gene'])
         StaticMthods._print_df(corrected_count, outdir + "/crispr_cleanr_corrected_counts.tsv")
         return alldata
@@ -191,8 +202,8 @@ def _correct_counts(segdata, controls, no_rep):
         c = nc.mean(axis=1)
         n = segdata.correctedFC
         reverted['revc'] = c * (pow(2, n))
-        normed_num = segdata.iloc[:, segdata.columns.get_loc('end') +
-                                controls + 1:segdata.columns.get_loc('avgFC')]
+        normed_num = segdata.iloc[:, segdata.columns.get_loc('end') + controls + 1:
+                                segdata.columns.get_loc('avgFC')]
         normed_num += 1
         proportions = normed_num.div(normed_num.agg('sum', axis=1), axis=0)
         reverted = reverted * no_rep

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,5 @@
+tzlocal==1.5.1
+numpy==1.14.3
+pandas==0.22.0
+rpy2==2.9.3
+plotly==2.5.1
diff --git a/setup.py b/setup.py
@@ -3,17 +3,17 @@
 from setuptools import setup
 
 config = {
-    'version': '1.1.0',
+    'version': '1.1.1',
     'name': 'pyCRISPRcleanR',
     'description': 'This is python implementation of CRISPRcleanR package for unsupervised identification and correction of gene independent cell responses to CRISPR-cas9 targeting',
     'author': 'Shriram Bhosle',
     'url': 'https://github.com/CancerIT/pyCRISPRcleanR',
     'author_email': '[email protected]',
     'python_requires': '>= 3.3',
     'setup_requires': ['pytest','pytest-cover'],
-    'install_requires': ['rpy2', 'pandas', 'numpy', 'plotly'],
+    'install_requires': ['rpy2', 'pandas', 'numpy', 'plotly', 'tzlocal'],
     'packages': ['pyCRISPRcleanR'],
-    'package_data': {'pyCRISPRcleanR':['config/*.conf']},
+    'package_data': {'pyCRISPRcleanR':['config/*.conf','segmentation/*.py']},
     'entry_points': {
         'console_scripts': ['pyCRISPRCleanR=pyCRISPRcleanR.crisprCleanR_command:main'],
     }

diff --git a/tests/test_check_combined_data.py b/tests/test_check_combined_data.py
@@ -41,7 +41,7 @@ def test_static_methods(self):
         #alldata.to_pickle('pickled_df_HT-29.pkl', compression='gzip', protocol=-1)
         expected_df=pd.read_pickle(picke_file, compression='gzip')
         result=expected_df.equals(alldata)
-        assert (2038, 20) == alldata.shape, 'process_segments'
+        assert (2038, 24) == alldata.shape, 'process_segments'
         #assert True == result, 'process_segments: check results'
 
 if __name__ == '__main__':
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,3 +2,4 @@ rpy2 @@
     pandas
     numpy
     plotly
+    tzlocal