From 28a6521cde69fae236bdbd322ff0841111def5b9 Mon Sep 17 00:00:00 2001 From: Song Weichen Date: Mon, 5 Aug 2019 08:55:45 +0800 Subject: [PATCH] Delete Samusik_event_classification.ipynb --- python/Samusik_event_classification.ipynb | 475 ---------------------- 1 file changed, 475 deletions(-) delete mode 100644 python/Samusik_event_classification.ipynb diff --git a/python/Samusik_event_classification.ipynb b/python/Samusik_event_classification.ipynb deleted file mode 100644 index ab24ef6..0000000 --- a/python/Samusik_event_classification.ipynb +++ /dev/null @@ -1,475 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from ACDC.random_walk_classifier import * \n", - "from ACDC.cell_type_annotation import * " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "from collections import Counter\n", - "\n", - "path = 'data/Samusik01/40000events/'\n", - "df = pd.read_csv(path + 'Samusik01_5.csv.gz', sep=',', header = 0, compression = 'gzip')\n", - "\n", - "df = df.drop(['Time', 'Cell_length', 'BC1()', 'BC2()', 'BC3()', 'BC4()', 'BC5()', 'BC6()', 'DNA1()',\n", - " 'DNA2()', 'Cisplatin()', 'beadDist()', 'sample', 'event'], axis = 1)\n", - "\n", - "channels = [item[:item.find('(')] for item in df.columns[:-1]]\n", - "df.columns = channels + ['cell_type']\n", - "\n", - "\n", - "#df = df.loc[df['cell_type'] != 'NotDebrisSinglets']\n", - "\n", - "table = pd.read_csv(path + 'Samusik01_table.csv', sep=',', header=0, index_col=0)\n", - "table = table.fillna(0)\n", - "\n", - "cts, channels = get_label(table)\n", - "\n", - "#X0= np.arcsinh((df[channels].values - 1.0)/5.0)\n", - "X0= df[channels].values" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "idx2ct = [key for idx, key in enumerate(table.index)]\n", - "#idx2ct.append('unknown')\n", - "\n", - "ct2idx = {key:idx for idx, key in enumerate(table.index)}\n", - "#ct2idx['unknown'] = len(table.index)\n", - " \n", - "ct_score = np.abs(table.as_matrix()).sum(axis = 1)\n", - "\n", - "## compute manual gated label\n", - "y0 = np.zeros(df.cell_type.shape)\n", - "\n", - "for i, ct in enumerate(df.cell_type):\n", - " if ct in ct2idx:\n", - " y0[i] = ct2idx[ct]\n", - " #else:\n", - " #y0[i] = ct2idx['unknown']" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", - " \"This module will be removed in 0.20.\", DeprecationWarning)\n" - ] - } - ], - "source": [ - "from sklearn.metrics import accuracy_score, confusion_matrix\n", - "import phenograph\n", - "from sklearn.cross_validation import StratifiedKFold\n", - "import pickle\n", - "n_neighbor = 10\n", - "thres = 0.5\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:552: Warning: The least populated class in y has only 2 members, which is too few. The minimum number of labels for any class cannot be less than n_folds=5.\n", - " % (min_labels, self.n_folds)), Warning)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "00 th batch\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.2677004337310791 seconds\n", - "Jaccard graph constructed in 1.3313302993774414 seconds\n", - "Wrote graph to binary file in 0.011000633239746094 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.712212\n", - "After 3 runs, maximum modularity is Q = 0.713748\n", - "Louvain completed 23 runs in 16.70855474472046 seconds\n", - "PhenoGraph complete in 18.32158613204956 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.10300588607788086 seconds\n", - "Jaccard graph constructed in 1.199068546295166 seconds\n", - "Wrote graph to binary file in 0.0070002079010009766 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.53431\n", - "After 2 runs, maximum modularity is Q = 0.542497\n", - "Louvain completed 22 runs in 2.1281216144561768 seconds\n", - "PhenoGraph complete in 3.441196918487549 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.10400581359863281 seconds\n", - "Jaccard graph constructed in 1.2270703315734863 seconds\n", - "Wrote graph to binary file in 0.00500035285949707 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.511915\n", - "After 2 runs, maximum modularity is Q = 0.513102\n", - "Louvain completed 22 runs in 2.06506609916687 seconds\n", - "PhenoGraph complete in 3.4051427841186523 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.11170005798339844 seconds\n", - "Jaccard graph constructed in 1.2349021434783936 seconds\n", - "Wrote graph to binary file in 0.015600204467773438 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.535715\n", - "Louvain completed 21 runs in 3.1987061500549316 seconds\n", - "PhenoGraph complete in 4.560908555984497 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.10930109024047852 seconds\n", - "Jaccard graph constructed in 1.2842023372650146 seconds\n", - "Wrote graph to binary file in 0.06490015983581543 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.644217\n", - "After 5 runs, maximum modularity is Q = 0.64652\n", - "Louvain completed 25 runs in 3.5643064975738525 seconds\n", - "PhenoGraph complete in 5.022710084915161 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.11420011520385742 seconds\n", - "Jaccard graph constructed in 1.2349023818969727 seconds\n", - "Wrote graph to binary file in 0.015599966049194336 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.4833\n", - "Louvain completed 21 runs in 2.8249053955078125 seconds\n", - "PhenoGraph complete in 4.189607858657837 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.10360050201416016 seconds\n", - "Jaccard graph constructed in 1.344102382659912 seconds\n", - "Wrote graph to binary file in 0.0 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.388099\n", - "After 2 runs, maximum modularity is Q = 0.400311\n", - "Louvain completed 22 runs in 2.01180362701416 seconds\n", - "PhenoGraph complete in 3.4595065116882324 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.1167001724243164 seconds\n", - "Jaccard graph constructed in 1.1881022453308105 seconds\n", - "Wrote graph to binary file in 0.0 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.428371\n", - "After 5 runs, maximum modularity is Q = 0.430064\n", - "Louvain completed 25 runs in 4.079107284545898 seconds\n", - "PhenoGraph complete in 5.383909702301025 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.12730026245117188 seconds\n", - "Jaccard graph constructed in 1.1101019382476807 seconds\n", - "Wrote graph to binary file in 0.0 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.411917\n", - "Louvain completed 21 runs in 3.9986071586608887 seconds\n", - "PhenoGraph complete in 5.236009359359741 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.11170005798339844 seconds\n", - "Jaccard graph constructed in 1.2193021774291992 seconds\n", - "Wrote graph to binary file in 0.015600204467773438 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.576159\n", - "Louvain completed 21 runs in 2.1965038776397705 seconds\n", - "PhenoGraph complete in 3.5431063175201416 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.10360026359558105 seconds\n", - "Jaccard graph constructed in 1.3129024505615234 seconds\n", - "Wrote graph to binary file in 0.015599966049194336 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.679502\n", - "Louvain completed 21 runs in 3.5743064880371094 seconds\n", - "PhenoGraph complete in 5.006409168243408 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.12730026245117188 seconds\n", - "Jaccard graph constructed in 1.3597023487091064 seconds\n", - "Wrote graph to binary file in 0.0961003303527832 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.705056\n", - "After 7 runs, maximum modularity is Q = 0.706111\n", - "Louvain completed 27 runs in 3.2311058044433594 seconds\n", - "PhenoGraph complete in 4.814208745956421 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.11420035362243652 seconds\n", - "Jaccard graph constructed in 1.9369032382965088 seconds\n", - "Wrote graph to binary file in 0.049300193786621094 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.770449\n", - "After 2 runs, maximum modularity is Q = 0.772522\n", - "Louvain completed 22 runs in 2.503504514694214 seconds\n", - "PhenoGraph complete in 4.60390830039978 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.12920045852661133 seconds\n", - "Jaccard graph constructed in 1.344102382659912 seconds\n", - "Wrote graph to binary file in 0.11170005798339844 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.66993\n", - "After 3 runs, maximum modularity is Q = 0.672398\n", - "Louvain completed 23 runs in 3.0570056438446045 seconds\n", - "PhenoGraph complete in 4.6576087474823 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.47800111770629883 seconds\n", - "Jaccard graph constructed in 1.7341029644012451 seconds\n", - "Wrote graph to binary file in 0.2209005355834961 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.523315\n", - "After 2 runs, maximum modularity is Q = 0.525439\n", - "After 3 runs, maximum modularity is Q = 0.531106\n", - "After 4 runs, maximum modularity is Q = 0.53519\n", - "Louvain completed 24 runs in 5.898710489273071 seconds\n", - "PhenoGraph complete in 8.347315073013306 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.12730026245117188 seconds\n", - "Jaccard graph constructed in 1.3285024166107178 seconds\n", - "Wrote graph to binary file in 0.038700103759765625 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.752509\n", - "Louvain completed 21 runs in 5.511809825897217 seconds\n", - "PhenoGraph complete in 7.099912643432617 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.5797009468078613 seconds\n", - "Jaccard graph constructed in 2.3581042289733887 seconds\n", - "Wrote graph to binary file in 0.3145005702972412 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.628424\n", - "After 3 runs, maximum modularity is Q = 0.631251\n", - "Louvain completed 23 runs in 6.013510704040527 seconds\n", - "PhenoGraph complete in 9.297016620635986 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.7201011180877686 seconds\n", - "Jaccard graph constructed in 1.765303134918213 seconds\n", - "Wrote graph to binary file in 0.3195009231567383 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.674117\n", - "Louvain completed 21 runs in 6.374811172485352 seconds\n", - "PhenoGraph complete in 9.195316314697266 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 0.11670041084289551 seconds\n", - "Jaccard graph constructed in 1.2661023139953613 seconds\n", - "Wrote graph to binary file in 0.015600204467773438 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.602959\n", - "After 2 runs, maximum modularity is Q = 0.604136\n", - "After 3 runs, maximum modularity is Q = 0.611855\n", - "Louvain completed 23 runs in 2.5641050338745117 seconds\n", - "PhenoGraph complete in 3.9781079292297363 seconds\n", - "Finding 30 nearest neighbors using minkowski metric and 'auto' algorithm\n", - "Neighbors computed in 8.941315650939941 seconds\n", - "Jaccard graph constructed in 4.152107238769531 seconds\n", - "Wrote graph to binary file in 2.074803590774536 seconds\n", - "Running Louvain modularity optimization\n", - "After 1 runs, maximum modularity is Q = 0.88564\n", - "Louvain completed 21 runs in 33.42525887489319 seconds\n", - "PhenoGraph complete in 48.68958568572998 seconds\n", - "01 th batch\n" - ] - } - ], - "source": [ - "import time\n", - "import scipy.io as sio \n", - "\n", - "skf = StratifiedKFold(y0, n_folds=5, shuffle=True, random_state=0)\n", - "result = []\n", - "score_final = []\n", - "\n", - "\n", - "process_time = []\n", - "c = 0\n", - "for tr, te in skf:\n", - " print('%02d th batch' % c)\n", - " if c == 1:\n", - " break\n", - " c += 1\n", - " \n", - " X = X0.copy()\n", - " y_true = y0.copy()\n", - "\n", - " X = X[tr, :]\n", - " y_true = y_true[tr]\n", - "\n", - " mk_model = compute_marker_model(pd.DataFrame(X, columns = channels), table, 0.0)\n", - "\n", - " ## compute posterior probs\n", - " tic = time.clock()\n", - " score = get_score_mat(X, [], table, [], mk_model)\n", - " score = np.concatenate([score, 1.0 - score.max(axis = 1)[:, np.newaxis]], axis = 1) \n", - "\n", - " ## get indices \n", - " ct_index = get_unique_index(X, score, table, thres)\n", - " \n", - " ## baseline - classify events \n", - " y_pred_index = np.argmax(score, axis = 1)\n", - " \n", - " toc = time.clock()\n", - " time0 = toc - tic\n", - " \n", - " \n", - " \n", - " ## running ACDC\n", - " tic = time.clock()\n", - " res_c = get_landmarks(X, score, ct_index, idx2ct, phenograph, thres)\n", - "\n", - " landmark_mat, landmark_label = output_feature_matrix(res_c, [idx2ct[i] for i in range(len(idx2ct))]) \n", - "\n", - " landmark_label = np.array(landmark_label)\n", - "\n", - " lp, y_pred = rm_classify(X, landmark_mat, landmark_label, n_neighbor)\n", - "\n", - " process_time.append(toc-tic)\n", - " \n", - " res = phenograph.cluster(X, k=30, directed=False, prune=False, min_cluster_size=10, jaccard=True,\n", - " primary_metric='euclidean', n_jobs=-1, q_tol=1e-3)\n", - " \n", - " toc = time.clock()\n", - " time1 = toc - tic\n", - " \n", - " \n", - " ## running phenograph classification\n", - " tic = time.clock()\n", - " y_pred_oracle = np.zeros_like(y_true)\n", - " for i in range(max(res[0])+1):\n", - " ic, nc = Counter(y_true[res[0] == i]).most_common(1)[0]\n", - " y_pred_oracle[res[0] == i] = ic\n", - " \n", - " score_final.append([accuracy_score(y_true, [ct2idx[c] for c in y_pred]), \n", - " accuracy_score(y_true, y_pred_index), \n", - " accuracy_score(y_true, y_pred_oracle)])\n", - " \n", - " toc = time.clock()\n", - " time2 = toc - tic \n", - " \n", - " \n", - " result.append((y_true, y_pred, y_pred_index, y_pred_oracle))\n", - " process_time.append((time0, time1, time2))\n", - " \n", - " #pickle.dump(result, open('processed_file/AML/event_classidication_AML.p', 'wb'))\n", - " sio.savemat('processed_file/Samusik01/40000events/event_classidication_Samusik01_5.mat',{'y_true':y_true,'y_pred_index':y_pred_index,'y_pred_oracle':y_pred_oracle,'X':X})" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 0.95048917, 0.67939862, 0.96874316])" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.mean(score_final, axis = 0) # score of ACDC, score-based classification, phenograph classification" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[-1.9956682595356767e-06,\n", - " (0.5571498093994265, 242.22021229918943, 0.0307672175566438)]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "process_time" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python [conda env:Anaconda3]", - "language": "python", - "name": "conda-env-Anaconda3-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}