From 15b5f4a7ef1d8b3a287e63019ed3007d5d5081d4 Mon Sep 17 00:00:00 2001 From: johannesostner Date: Tue, 19 Oct 2021 10:38:20 +0200 Subject: [PATCH] Update model_comparison_analysis.ipynb --- benchmarking/model_comparison_analysis.ipynb | 181 +++++++++++++++++++ 1 file changed, 181 insertions(+) diff --git a/benchmarking/model_comparison_analysis.ipynb b/benchmarking/model_comparison_analysis.ipynb index 4a53023..9f32b8b 100644 --- a/benchmarking/model_comparison_analysis.ipynb +++ b/benchmarking/model_comparison_analysis.ipynb @@ -54,6 +54,187 @@ } } }, + { + "cell_type": "markdown", + "source": [ + "We want to show one example benchmarking dataset that captures the essence of scCODA:\n", + "Compositional analysis on low-replicate data.\n", + "Thus, we choose one where we have an effect that is large (log-fold = 2)\n", + "and the replicate number is low (2 samples per group)." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "relevant indices: Int64Index([4740, 4741, 4742, 4743, 4744, 4745, 4746, 4747, 4748, 4749, 4750,\n", + " 4751, 4752, 4753, 4754, 4755, 4756, 4757, 4758, 4759],\n", + " dtype='int64')\n" + ] + } + ], + "source": [ + "save_path = \"../../sccoda_benchmark_data/model_comparison/data_model_comparison/\"\n", + "\n", + "# read generation parameters and find one where we have an increase that is large\n", + "# and replicate number is low\n", + "gen_params = pd.read_csv(save_path + \"generation_parameters\", index_col=0)\n", + "print(f'relevant indices: {gen_params.loc[(gen_params[\"n_controls\"]==2) & (gen_params[\"log-fold increase\"]==2) & (gen_params[\"Base\"]==1000)].index}')\n", + "\n", + "# choose one dataset as example (e.g. number 2304 one with these properties)\n", + "example_index = 4744\n", + "example_params = gen_params.iloc[example_index, :]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "We select one dataset that matches these criteria (here the fourth).\n", + "The raw counts look like this:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 906. 1370. 1056. 861. 807.]\n", + " [1288. 804. 1061. 895. 952.]\n", + " [3734. 275. 264. 270. 457.]\n", + " [4038. 244. 156. 282. 280.]]\n" + ] + } + ], + "source": [ + "# read all generated data and pick the one we selected\n", + "datasets = ad.read_h5ad(save_path + \"generated_data\")\n", + "example_data = datasets[datasets.obs[\"dataset_no\"] == example_index]\n", + "\n", + "print(example_data.X)\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "First, a barplot of the dataset to gain some intuition:\n", + "\n", + "\n", + "The first cell type increases from the control to the case category,\n", + "while the others behave the same (slightly decrease).\n", + "This decrease is due to compositional effects and should not be picked up as significant by a statistical method\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 30, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAc0AAAEICAYAAAA9YK8aAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAi30lEQVR4nO3de3xU5b3v8c8vCYQoCOFq5CKoAUUR2US0thyrVQvbij3UrUVtbLctL2Wr1WqtPdRL8dANle4qXqoUb1FP1VZUUKzVbm+tF8AdUQOEICoCAWLASCAGwvzOHzPBScxlBTKzJpnv+/WalzNrnlnzW3GY7zzPWutZ5u6IiIhI6zLCLkBERKSjUGiKiIgEpNAUEREJSKEpIiISkEJTREQkIIWmiIhIQAkLTTO7z8y2mNn7zTxvZjbXzNaY2btm9i+JqkVERKQ9JLKn+QAwoYXnJwL5sdtU4A9BVjphwgQHdNNNN910a9tN2kHCQtPdXwW2ttDkbKDIo94EeplZXmvr/fTTT9urRBERkTbJCvG9BwKfxD1eH1tW3t5vVHHOv7X3Kttdv7/8OewSRESkFR3iQCAzm2pmy8xsWUVFRdjliIhImgqzp7kBGBz3eFBs2Ve4+zxgHkBBQUGbx+bVixMRkfYQZmguBC4zs0eBE4Aqd2/3oVkREWna22+/3T8rK2s+cAwdZOQxwSLA+3V1dT8eO3bslqYaJCw0zexPwDeBvma2HrgR6ALg7ncDi4F/BdYAO4EfJaqW5vZpNtcDDaO9esMikmxZWVnzDz744KP69eu3LSMjI+2PsI1EIlZRUTFy06ZN84FJTbVJWGi6+5RWnnfgPxL1/iIi0qpjFJhfysjI8H79+lVt2rTpmObahDk8mzRt7cWlWnsRkQTJUGA2FPt7NDtUrTFsERGRgBSaIiLSbj755JOss846a9igQYNGHX300Ucdd9xxRxYVFfUKu672otAUEZF2EYlEOOuss44YP3589fr1698rKSlZ+fjjj6/95JNPusa32717d1gl7jeFpoiItItFixb16NKli1977bV7Z6EZPnz4runTp2+ZO3dun1NPPfWIE088cfhJJ500YvPmzZmnnXba4cOHDx85evToI996660cgJ/97GeH3HDDDQPqX5+fn390aWlp19LS0q7Dhg07etKkScMOO+ywoydMmHDY9u3bk55hCk0REWkX7733Xs6xxx67s7nnS0pKDnj66ac/WLp0aem11157yOjRo3euXr16xc0337zhoosuGtba+j/66KNul1122Za1a9eW9OjRI3LLLbf0a98taF1aHD176f1Lmlz+hx+NS5n2zbUVEemofvCDHwxZsmRJ9y5duvjUqVO3jB8//vMBAwbsAViyZEmPJ554Yg3ApEmTtk+dOjVr69atLXbkDj744F1nnHHGjti6K+fOndsf2JzwDYmjnqaIiLSLUaNG1bz77rsH1D9+6KGH1r388surt23blgVwwAEHRFpbR1ZWlkciXzarra21+vtm1qBt48fJkBY9zbb24lKtvYhIR3DWWWdtv/7662327Nn9fvGLX1QAVFdXN9k5O+GEE7bff//9fW655ZbyZ555pkdubm5d7969I0OHDq1dvHhxL4B//OMfB2zYsCG7/jXl5eVdX3zxxQNPO+20HY888kjvk046qTopGxZHPU0REWkXGRkZLFq06IPXXnutx8CBA0eNGjXqqAsvvHDoTTfdtL5x29mzZ28sLi4+YPjw4SOnT58+8IEHHvgQoLCwcNu2bdsyjzjiiKNvu+22/oceeugX9a8ZOnToF7fffnv/ww477OjPPvss65prrkn6Za/SoqcpIiLJceihh+5+5pln1jbzdGX9nQEDBux58cUXP2jcoHv37v7Pf/6zrPHy0tLSrllZWTz99NMftmO5baaepoiISEAKTRERSXkjRozYVVZWVhJ2HQpNERGRgBSaIiIiAaXFgUD/57Xrwi6hVb8ZPyvsEkREpBXqaYqIiASUFj1N9eJERFr3TPGHY9tzfd8ZM+zt9lxfU0pLS7u+9NJL3S+55JKtbXnd3Llz+yxbtuzAoqKidW15nXqaIiLSYZWVlWU/9thjvZt6LhGXIEuLnqZIU7SvWyR8d9xxR5+5c+cOMDOOOuqomtmzZ2+46KKLhm7dujWrT58+dUVFRR/l5+fv+t73vje0R48ee5YvX35gRUVFl5tvvnn9j370o23Tp08fuHbt2m5HHnnkyClTpnyam5u756mnnsrduXNnxp49e+yZZ55Zc8EFFwxdt25ddk5OTmTevHkfn3DCCTX7Wq96miIiEoply5Z1mzNnTt4rr7yyurS0dMU999yz7tJLLx1ywQUXVK5evXrFeeedV3nppZcOrm+/efPmLsuWLVv19NNPl914440DAWbOnLmhoKCgetWqVStuvPHGLbD/lyBriXqakrbUixMJ1/PPP3/QWWedtS0vL68OolPrFRcXH/jcc899AHDppZdu/fWvfz2ovv2kSZM+y8zMZOzYsV9UVlZ2aW69+3sJspaopykiIh1Ct27dvP6+uzfbLsglyPaVQlNERELx7W9/+/NFixblbtq0KRNg8+bNmWPGjNkxf/78XIB77rmnd0FBQYuX/+rZs+ee6urqzOaer78EGUD8Jcj2tea0GJ4NesDHt4Z8i28dejoAf//4Bf6+7u8tLm9N/PBffA1NLddQoYiELRmniMQrKCj44uqrry4fP378kRkZGX7MMcfsvPvuu9cVFhYOve222w6uPxCopXWMGzeuJjMz00eMGDHy/PPP/zQ3N3dP/POzZ8/eeMEFFwwdPnz4yJycnEj9Jcj2VVqEpoiIpKbLL7+88vLLL6+MX/bmm2+ubtzuiSee+Cj+8c6dO4sBsrOzvYn2rV6C7IorrqiMbxeUtTQunIoKCgp82bJlYZchItLRWOMFy5cv/2j06NGfhlFMKlu+fHnf0aNHD23qOe3TFBERCSgthmcXTv9b2CW0atLMM5LyPvpbfEl/iy/pb/GlVP9bJOvvIE1TT1NERCQg7dMUEUkP2qcZkPZpioiItIO03qfZ3L6BMNqHvb9Gf4vW378z/y1EACbOfKpdLw323PTvJvW8z2RQTzNE2Qd15YBRXagdtp3S0lIikYTN/CQiIu0gofs0zWwCcBuQCcx391mNnh8CPAj0irW5zt0Xt7TOzrJPMxKJsGDBAgoLC6mpqSEnJ4eioiImT55MRoZ+y4hIu2t1n2YYPc3GlwY799xzt86aNStv9+7dGbm5uXWPPfbY2sGDB9c9++yz3a+++uohAGbG66+/vio3Nzdy/fXXD3jyySd779q1y84888zPfv/732/c37pD2adpZpnAncBEYCQwxcxGNmr2K+Bxdx8DfB+4K1H1pJqysrK9gQlQU1NDYWEhZWVlIVcmIpIcTV0a7PTTT69+5513Vq1cuXLFOeecs3XGjBkHA/zud787eO7cuR+vWrVqxZtvvrmqe/fukQULFhy0Zs2abu++++7KlStXrnjnnXcOeO6557onsuZE7tMcB6xx97UAZvYocDawIq6NAwfF7vcE9vsXQkdRXl6+NzDr1dTUUF5ezogRI0KqSkQkeZq6NNiSJUtyvvvd7w6qqKjosmvXrozBgwfXApx44onV11xzzeBzzz1365QpU7Ydfvjhkb/+9a8HvfrqqweNHDlyJMDOnTszVq1a1W3ixIktTvK+PxI5DjgQ+CTu8frYsng3ARea2XpgMXB5AutJKXl5eeTk5DRYlpOTQ15eXkgViYiE77LLLhsybdq0LatXr15xxx13fFxbW5sB8Jvf/GbT/PnzP66pqckYP378kcXFxd3cnSuvvLJ81apVK1atWrVi3bp171911VUJPYUm7J1nU4AH3H0Q8K/AQ2b2lZrMbKqZLTOzZRUVFUkvMhHy8/MpKiraG5z1+zTz8/NDrkxEJDmaujTY9u3bM4cMGbIb4IEHHuhT37akpCR73LhxNTNnztx07LHH7nj//fe7TZw48fOHHnqob1VVVQbAhx9+2GXDhg0JPSskkSvfAAyOezwotizexcAEAHd/w8y6AX2BLfGN3H0eMA+iBwIlquBkysjIYPLkyVRVVVFVVUXPnj11EJCIhCrZp4g0dWmw6dOnb5wyZcrhPXv2rPvGN76xfd26ddkAv/3tb/u//vrrB5mZjxgxouacc86pysnJ8ZKSkm7HH3/8kRC9+PQjjzzy4cCBA+sSVXPCjp41syxgNfAtomG5FDjf3Uvi2jwHPObuD5jZUcDfgYHeQlGd5ejZeoWFhWzYsIGBAwdSVFQUdjki0nlpRqCAQjl61t3rgMuA54GVRI+SLTGzGWY2KdbsauAnZrYc+BPww5YCU0REJEwJHfuNnXO5uNGyG+LurwC+nsgawjBx5lOB2+7augOADVt3tOl1z03/btuKEhGR/aYdaCIi6SsSiUS+MmybzmJ/j2anZ1Noioikr/crKip6KjijIpGIVVRU9ATeb65NWkzYLiIiX1VXV/fjTZs2zd+0adMxqBMF0R7m+3V1dT9uroFCM0R9umdzwplnsGtnNdkH9uDN2mwqq2vDLktE0sTYsWO3AJNabSh7KTRD0qd7NuNyNvOLK6/aO2H7rFvvYgkDFJwiIilK3fGQnHRIF667clqDCduvu3IaJx3SJeTKRESkOQrNkNRWb2tywvba6m0hVSQiIq1RaIakW4/cJidsz+6eG1JFIiLSGoVmSP65YTezbr2rwYTts269i9c37g65MhERaY4OBApJZXUtSxjAnAcXUlu9jezuuby+cbcOAhIRSWEKzRBVVteyaHUt0AVI2DVTRUSknWh4VkREJCCFpoiISEAKTRERkYAUmiIiIgEpNEVERAJSaIqIiASk0BQREQlIoSkiIhKQQlNERCQghaaIiEhACk0REZGAFJoiIiIBKTRFREQCUmiKiIgEpNAUEREJSKEpIiISkEJTREQkIIWmiIhIQApNERGRgBSaIiIiASk0RUREAlJoioiIBKTQFBERCSihoWlmE8ys1MzWmNl1zbQ518xWmFmJmf2/RNYjIiKyP7KCNjSzk4Ch8a9x96IW2mcCdwKnA+uBpWa20N1XxLXJB34JfN3dt5lZ/zZvgYiISJIECk0zewg4HHgH2BNb7ECzoQmMA9a4+9rYOh4FzgZWxLX5CXCnu28DcPctbSleREQkmYL2NAuAke7ubVj3QOCTuMfrgRMatRkOYGb/BDKBm9z9r214DxERkaQJGprvAwcD5Ql4/3zgm8Ag4FUzG+Xun8U3MrOpwFSAIUOGtHMJIiIiwQQNzb7ACjNbAtTWL3T3SS28ZgMwOO7xoNiyeOuBt9x9N/Chma0mGqJL4xu5+zxgHkBBQUFbersiIiLtJmho3rQP614K5JvZMKJh+X3g/EZtngKmAPebWV+iw7Vr9+G9REREEi7QKSfu/gqwCugRu62MLWvpNXXAZcDzwErgcXcvMbMZZlbfQ30eqDSzFcBLwM/dvXLfNkVERCSxgh49ey5wC/AyYMDtZvZzd/9LS69z98XA4kbLboi778DPYjcREZGUFnR4djpwfP0pIWbWD3gRaDE0RUREOpOgMwJlNDqHsrINrxUREekUgvY0/2pmzwN/ij0+j0bDriIiIp1doNB095+b2feAr8cWzXP3JxNXloiISOoJPPesuz8BPJHAWkRERFJai6FpZv9w92+Y2Xaic83ufYrowa8HJbQ6ERGRFNJiaLr7N2L/7ZGcckRERFJXoCNgY1c5aXWZiIhIZxb0tJGj4x+YWRYwtv3LERERSV0thqaZ/TK2P/NYM/s8dtsObAaeTkqFIiIiKaLF0HT3/wR6AkXuflDs1sPd+7j7L5NTooiISGpodXjW3SPA8UmoRUREJKUF3af5P2am4BQRkbQWdHKDE4ALzOxjYAdfnqd5bMIqExERSTFBQ/PbCa1CRESkAwh6EeqPgV7AWbFbr9gyERGRtBF0coOfAo8A/WO3h83s8kQWJiIikmqCDs9eDJzg7jsAzGw28AZwe6IKExERSTVBj541YE/c4z2xZSIiImkjaE/zfuAtM3uSaFieDdybsKpERERSUNCLUP+Xmb0MfIPoJcJ+5O7FiSxMREQk1QQdnq1njf4rIiKSNoIePXsD8CCQC/QF7jezXyWyMBERkVQTdJ/mBcBod/8CwMxmAe8A/zdBdYmIiKScoMOzG4FucY+zgQ3tX46IiEjqCtrTrAJKzOwFogcCnQ4sMbO5AO5+RYLqExERSRlBQ/PJ2K3ey+1fioiISGoLesrJg2bWFRgeW1Tq7rsTV5aIiEjqCRSaZvZNokfPfkT0dJPBZnaRu7+asMpERERSTNDh2d8BZ7h7KYCZDQf+BIxNVGEiIiKpJujRs13qAxPA3VcDXRJTkoiISGoK2tN828zmAw/HHl8ALEtMSSIiIqkpaGheAvwHUH9qyWvAXQmpSEREJEW1Gppmlgksd/cjgf9KfEkiIiKpqdV9mu6+Byg1syFtXbmZTTCzUjNbY2bXtdDue2bmZlbQ1vcQERFJlqDDs7lEZwRaAuyoX+juk5p7QayHeifR2YPWA0vNbKG7r2jUrgfwU+CtNtYuIiKSVEFD8/p9WPc4YI27rwUws0eJXrx6RaN2NwOzgZ/vw3uIiIgkTYuhaWbdiB4EdATwHnCvu9cFXPdA4JO4x+uBExqt/1+Awe7+rJkpNEVEJKW1tk/zQaCAaGBOJDrJQbswswyiBxZdHaDtVDNbZmbLKioq2qsEERGRNmlteHaku48CMLN7gSVtWPcGYHDc40E0vJxYD+AY4GUzAzgYWGhmk9y9wTmg7j4PmAdQUFDgbahBRESk3bTW09w7KXsbhmXrLQXyzWxYbLL37wML49ZX5e593X2ouw8F3gS+EpgiIiKporWe5mgz+zx234Cc2GMD3N0Pau6F7l5nZpcBzwOZwH3uXmJmM4Bl7r6wudeKiIikohZD090z92fl7r4YWNxo2Q3NtP3m/ryXiIhIogWdsF1ERCTtKTRFREQCUmiKiIgEpNAUEREJSKEpIiISkEJTREQkIIWmiIhIQApNERGRgBSaIiIiASk0RUREAlJoioiIBKTQFBERCUihKSIiEpBCU0REJCCFpoiISEAKTRERkYAUmiIiIgEpNEVERAJSaIqIiASUFXYBknoikQhlZWWUl5eTl5dHfn4+GRn6fSUiom9CaSASibBgwQLGjBnDKaecwpgxY1iwYAGRSCTs0kREQqfQlAbKysooLCykpqYGgJqaGgoLCykrKwu5MhGR8Ck0mxGJRCgtLeXll1+mtLQ0bXpa5eXlewOzXk1NDeXl5SFVJCKSOhSaTUjnIcq8vDxycnIaLMvJySEvLy+kikREUodCswnpPESZn59PUVHR3uDMycmhqKiI/Pz8kCsTEQmfjp5tQktDlCNGjAipquTIyMhg8uTJVFVVUVVVRc+ePZk8ebKOnhURQT3NJqX7EGVGRgavvPIKzz77LK+88ooCU0QkRt+GTdAQpYiINEXDs03QEKWIiDRFKdAMDVGKiEhjSgIREZGAFJoiIiIBaZ+mSCN1dXUUFxezfv16Bg0axJgxY8jK0j8VEVFPU6SBuro6Hn74YU4++WQmT57MySefzMMPP0xdXV3YpYlICkjoz2czmwDcBmQC8919VqPnfwb8GKgDKoB/d/ePE1XPM8Uftqn9jtq6vf9t62tTUVu2YV+3/TtjhrW5rlRSXFzMtGnTGswGNW3aNI4++miOP/74kKsTkbAlLDTNLBO4EzgdWA8sNbOF7r4irlkxUODuO83sUuC3wHmJqknSV9Dg3/nRx03OBvXhx+vYnNW3xdd29B8MItK6RA7PjgPWuPtad98FPAqcHd/A3V9y952xh28CgxJYj0irBuQd0uRsUP0PTo/ZoESkZYkMzYHAJ3GP18eWNedi4LkE1iPSKs8dyB133tlgNqg77rwTclv66Epnka6XBJTgUuKQQDO7ECgATm7m+anAVIAhQ4YksTJJN9W7Ihw27lQW/+1Ftmwqj/YwcwdSvUtfnp1d/SUB669wVD99pmYDk3iJDM0NwOC4x4Niyxows9OA6cDJ7l7b1IrcfR4wD6CgoMDbv9Sv6pppnHHaqVR/XkWPnr3ommns2pOUt5aQVe+KwIF5HHB4HtUAaRSYkUiEsrIyysvLycvLIz8/P20Co7lLAo4aNarTX91IgktkaC4F8s1sGNGw/D5wfnwDMxsD3ANMcPctCaylTbpmGuXvvcFVP71i7y/OP/zxXvJGfU3BKZ1Wuve00vmSgBJcwv4luHsdcBnwPLASeNzdS8xshplNijW7BegO/NnM3jGzhYmqpy0yd1Ry6U8ubvCL89KfXEzmjsqQKxNJnHS++DrokoASTEJ/Prr7Yncf7u6Hu/vM2LIb3H1h7P5p7j7A3Y+L3Sa1vMbk2PbpliZ/cW77NGU6wyLtbuPGjc32tNKBLgkoQaTEgUCppnff/uTk5DT4AsnJySG3b39qWnhdZ6H9uZ1PkPNUe3TJbvJzT1bXQK9P1fNU2zI5R9+jjuexxx9na2Ulvfv0ocewUSxe3vp8K6m67dL+Ov+Oin1Qd2Af/vDHexv84vzDH+9lz4F9Qq4s8eL3515zzTVcecXllL/3Bl0zLezSJME+31HD9ddf3+Bzf/3117N9Rzr8VIx+9j9+53XOO/dcfvjDH3Leuefy8Tuv67MvDain2YRde5y8UV9j0d9fY9unW8jt2589B/ZJi95Wc/tzF/39NejWO+TqJJEOPKgX9913H1dddRVmhrtz3333cfc3z0iLERZ99iUIhWYzdu1x6NabboN6R78w0iAwoeX9ud0G6YujM6s7sA+/umnG3uBoMMKSBp9/ffYlCIWmNJDu+3PTWTqPsIA++xKM9mlKA+m8P1eiwVnTrTfdBh1JTbfeaROYoM++BKOepjSQ7r0NSV/67EsQCk35inTdnyuiz760RsOzIiIiASk0RUREAlJoioiIBKTQFBERCUihKSIiEpBCU0REJCCFpoiISEA6T1NEJIkikQhlZWWUl5eTl5dHfn4+GRnqv3QU+j8lIpIkkUiEBQsWMGbMGE455RTGjBnDggULiEQiYZcmASk0RUSSpKysjMLCwgaXHyssLKSsrCzkyiQohaaISJKUl5c3efmx8vLykCqStlJoiogkSV5e3t6rqNTLyckhLy8vpIqkrRSaIiJJkp+fT1FRUYPLjxUVFZGfnx9yZRKUjp4VEUmSjIwMJk+eTFVVFVVVVfTs2ZPJkyfr6NkORKEpIrKfJs58qk3td72xGGo+g5xe/GVT8ItcPzf9u216H2l/Ck0JTTqfr5bO2w5QV1dHcXEx69evZ9CgQYwZM4asrPT4OurTPZsTzjyDXTuryT6wB2/WZlNZXRt2WRJQenxKJeVEIhGeffZZli5dSiQSITMzk4KCAs4888xOHx715+rVn3pQv18rXYbp6urqePjhh5k2bdre7b/rrru48MILO31w9umezbiczfziyqv2bvusW+9iCQMUnB1E5/8XKinpgw8+oKSkhDlz5jBz5kxuueUWSkpK+OCDD8IuLeHS/Vy94uLivYEJ0e2fNm0axcXFIVeWeCcd0oXrrmy47dddOY2TDukScmUSVOf+WScpa+PGjcyYMaPBl8eMGTP42te+1umPJGzpXL0RI0aEVNX+C7pfb8qw2ia3f2XZh9zwtw0tvraj79Orrd7W5LbXVm8DFJwdgUJT2lXQL84L8iNNfnl8srmy1XV09C/O+nP14rc/nc7Vy+03oMnt79WnP3z4WXiFJUG3HrlNbnt291ygOrzCJDCFpoSiNrvpL48vuvYCPg+trmTIz8/nscce+8r+3M7ew673t4/2MPf2O7ji8sv27tebe/sdvLBuT9ilJdw/N+xm1q137R2ird+n+frG3WGXJgEpNCUUL330Bb+dezfXXnHJ3i+P3869m5c/7rgHQwTtZffpns2JB25jzpw5Dba98PbnAx0M0tF72mu2bIf+h/Lok4v5rHILvfr054V1e6LLO7nK6lqWMIA5Dy6ktnob2d1zeX3jbh0E1IEoNCUUldW1vEm/tPzyOOmQLlxz0SUN9udee8UlzHlwIYtWd/7th2hwrtkC0LXTD8k2VlldG/v/3AUNyXY8Ck0JTbp+eehgEJGOS6eciCRZ/cEg8b48GEREUplCUyTJ6g8GiZ+0WweDiHQMCR2eNbMJwG1AJjDf3Wc1ej4bKALGApXAee7+USJrEgmbDgYR6bgSFppmlgncCZwOrAeWmtlCd18R1+xiYJu7H2Fm3wdmA+clqiaRVJGu+3NFOrpEDs+OA9a4+1p33wU8CpzdqM3ZwIOx+38BvmVmlsCaRERE9lkiQ3Mg8Enc4/WxZU22cfc6oAoIfp0cERGRJOoQp5yY2VRgauxhtZmVhllPAH2BTxP5BvarRK59v2jbEyydtz+dtx32e/v/6u4T2qmUtJXI0NwADI57PCi2rKk2680sC+hJ9ICgBtx9HjAvQXW2OzNb5u4FYdcRBm17em47pPf2p/O2p5tEDs8uBfLNbJiZdQW+Dyxs1GYhcFHs/jnAf7u7J7AmERGRfZawnqa715nZZcDzRE85uc/dS8xsBrDM3RcC9wIPmdkaYCvRYBUREUlJCd2n6e6LgcWNlt0Qd/8L4N8SWUNIOsxQcgJo29NXOm9/Om97WjGNhoqIiASjafREREQCUmi2IzObYGalZrbGzK4Lu55kMrP7zGyLmb0fdi3JZmaDzewlM1thZiVm9tOwa0oWM+tmZkvMbHls238ddk1hMLNMMys2s2fCrkUSS6HZTuKmDZwIjASmmNnIcKtKqgeAdD0HrA642t1HAicC/5FG/+9rgVPdfTRwHDDBzE4Mt6RQ/BRYGXYRkngKzfYTZNrATsvdXyV6BHTacfdyd/+f2P3tRL88G89+1Sl5VP3kuV1it7Q6UMLMBgFnAvPDrkUST6HZfoJMGyidnJkNBcYAb4VcStLEhibfAbYAL7h72mx7zK3AtUAk5DokCRSaIu3EzLoDTwBXuvvnYdeTLO6+x92PIzrr1zgzOybkkpLGzL4DbHH3t8OuRZJDodl+gkwbKJ2UmXUhGpiPuPuCsOsJg7t/BrxEeu3b/jowycw+IrpL5lQzezjckiSRFJrtJ8i0gdIJxS5ndy+w0t3/K+x6ksnM+plZr9j9HKLXz10ValFJ5O6/dPdB7j6U6L/5/3b3C0MuSxJIodlOYpc2q582cCXwuLuXhFtV8pjZn4A3gBFmtt7MLg67piT6OvADor2Md2K3fw27qCTJA14ys3eJ/nB8wd112oV0WpoRSEREJCD1NEVERAJSaIqIiASk0BQREQlIoSkiIhKQQlNERCQghaakJTM72MweNbMPzOxtM1tsZsNbeU117L9Dm7qaS2z5+YmqWUTCp9CUtBObjOBJ4GV3P9zdxwK/BAbs56qHAgpNkU5MoSnp6BRgt7vfXb/A3Ze7+2sAZvZzM1tqZu+28fqQs4DxsckNrjKzV83suPonzewfZjbazG4ys4fM7A0zKzOzn8S12df3FpEkUGhKOjoGaHKCbTM7A8gneqm344CxZva/Aq73OuA1dz/O3X9PdGq9H8bWOxzo5u7LY22PBU4FvgbcYGaH7Od7i0gSKDRFGjojdisG/gc4kmiQ7Ys/A9+JTeb+70Qv1F3vaXevcfdPiU5yPq6d31tEEiAr7AJEQlACnNPMcwb8p7vfs79v4u47zewFohcjPxcYG/904+bt+d4ikhjqaUo6+m8g28ym1i8ws2PNbDzRCff/PXZtTMxsoJn1D7je7UCPRsvmA3OBpe6+LW752WbWzcz6AN8kOtn5/ry3iCSBepqSdtzdzex/A7ea2S+AL4CPiF48uszMjgLeiB5kSzVwIbAlwKrfBfaY2XLgAXf/vbu/bWafA/c30fYloC9ws7tvBDbux3uLSBLoKiciCWRmhwAvA0e6eyS27Cag2t3nhFiaiOwDDc+KJIiZFQJvAdPrA1NEOjb1NEVERAJST1NERCQghaaIiEhACk0REZGAFJoiIiIBKTRFREQCUmiKiIgE9P8BVW2ngjGhpJIAAAAASUVORK5CYII=\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "X = example_data.X\n", + "cell_types = example_data.var.index\n", + "obs = example_data.obs\n", + "\n", + "# Aggregate data by category\n", + "\n", + "count_df = pd.DataFrame(np.divide(X, np.sum(X, axis=1, keepdims=True)), columns=cell_types, index=obs.index).\\\n", + " merge(obs[\"x_0\"], left_index=True, right_index=True)\n", + "plot_df = pd.melt(count_df, id_vars=\"x_0\", var_name=\"Cell type\", value_name=\"count\")\n", + "\n", + "# barplot\n", + "d = sns.barplot(x=\"Cell type\", y=\"count\", hue=\"x_0\", data=plot_df,\n", + " palette='Blues')\n", + "\n", + "sns.scatterplot(x=plot_df.loc[plot_df[\"x_0\"]==0, \"Cell type\"].astype(\"float\")-0.2, y=\"count\", data=plot_df[plot_df[\"x_0\"]==0], color=\"black\", zorder=10, marker=\"o\")\n", + "sns.scatterplot(x=plot_df.loc[plot_df[\"x_0\"]==1, \"Cell type\"].astype(\"float\")+0.2, y=\"count\", data=plot_df[plot_df[\"x_0\"]==1], color=\"black\", zorder=10, marker=\"o\")\n", + "\n", + "loc, labels = plt.xticks()\n", + "\n", + "handles, labels = d.get_legend_handles_labels()\n", + "plt.legend(handles=handles, labels=[\"control\", \"case\"], loc='upper left', bbox_to_anchor=(1, 1), ncol=1, title=\"Group\")\n", + "\n", + "d.set(xlabel=\"Cell type\", ylabel=\"Proportion\", ylim=(0,1.01))\n", + "sns.despine()\n", + "\n", + "# manually add credible effects for each method (see below)\n", + "dashes=[(1,0), (4, 4), (7, 2, 2, 2)]\n", + "colors = ['#e41a1c','#377eb8','#4daf4a','#984ea3']\n", + "\n", + "plt.axhline(y = 1, xmin=0.04, xmax = 0.16, color=colors[0], dashes=dashes[0])\n", + "plt.axhline(y = 0.98, xmin=0.04, xmax = 0.16, color=colors[0], dashes=dashes[1])\n", + "plt.axhline(y = 0.94, xmin=0.04, xmax = 0.16, color=colors[1], dashes=dashes[1])\n", + "plt.axhline(y = 0.9, xmin=0.04, xmax = 0.16, color=colors[2], dashes=dashes[0])\n", + "plt.axhline(y = 0.86, xmin=0.04, xmax = 0.16, color=colors[2], dashes=dashes[2])\n", + "plt.axhline(y = 0.84, xmin=0.04, xmax = 0.16, color=colors[3], dashes=dashes[0])\n", + "plt.axhline(y = 0.82, xmin=0.04, xmax = 0.16, color=colors[3], dashes=dashes[1])\n", + "\n", + "plt.axhline(y = 0.88, xmin=0.44, xmax = 0.56, color=colors[2], dashes=dashes[0])\n", + "plt.axhline(y = 0.84, xmin=0.44, xmax = 0.56, color=colors[3], dashes=dashes[0])\n", + "plt.axhline(y = 0.82, xmin=0.44, xmax = 0.56, color=colors[3], dashes=dashes[1])\n", + "\n", + "plt.axhline(y = 0.84, xmin=0.24, xmax = 0.36, color=colors[3], dashes=dashes[0])\n", + "plt.axhline(y = 0.82, xmin=0.24, xmax = 0.36, color=colors[3], dashes=dashes[1])\n", + "\n", + "plt.axhline(y = 0.84, xmin=0.64, xmax = 0.76, color=colors[3], dashes=dashes[0])\n", + "plt.axhline(y = 0.84, xmin=0.84, xmax = 0.96, color=colors[3], dashes=dashes[0])\n", + "\n", + "plot_path = \"../../sccoda_benchmark_data/model_comparison/model_comparison_plots/\"\n", + "plt.savefig(plot_path + \"/model_comparison_example_data_grouped_v2.svg\", format=\"svg\", bbox_inches=\"tight\")\n", + "plt.savefig(plot_path + \"/model_comparison_example_data_grouped_v2.png\", format=\"png\", bbox_inches=\"tight\")\n", + "\n", + "plt.show()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, { "cell_type": "code", "execution_count": 2,