diff --git a/Numpy (26.02)/Numpy_Task.ipynb b/Numpy (26.02)/Numpy_Task.ipynb index 593ba20..9f181ad 100644 --- a/Numpy (26.02)/Numpy_Task.ipynb +++ b/Numpy (26.02)/Numpy_Task.ipynb @@ -2,7 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 3, + "id": "f125f27f-05bb-43e2-be34-21b7e87c0b4d", "metadata": { "id": "medieval-detail" }, @@ -13,6 +14,7 @@ }, { "cell_type": "markdown", + "id": "bad022a2-22ad-464d-ba71-7ca71e64ba1c", "metadata": { "id": "abstract-istanbul" }, @@ -25,20 +27,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, + "id": "45d3c48b-ac25-46f2-9419-61e1d2499185", "metadata": { "id": "entertaining-automation" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + } + ], "source": [ "python_list = [1, 12, 13, 45, 76, 45, 98, 0]\n", - "print()\n", - "python_list = \n", - "print()" + "print(type(python_list))\n", + "python_list = np.array(python_list)\n", + "print(type(python_list))" ] }, { "cell_type": "markdown", + "id": "e57137f8-29fb-4fb6-be79-3a933601c8fb", "metadata": { "id": "loose-tobago" }, @@ -49,18 +62,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, + "id": "a0f511c6-d9cf-4a29-abd1-5dface554bbf", "metadata": { "id": "included-polymer" }, - "outputs": [], - "source": [ - "z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5]\n" + ] + } + ], + "source": [ + "z = np.full(10, 1.5)\n", "print(z)" ] }, { "cell_type": "markdown", + "id": "77e72d02-590f-4636-a45c-367845abceb5", "metadata": { "id": "threatened-theme" }, @@ -71,18 +94,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, + "id": "834a57ac-c7ca-46d5-98c8-ed3f0aaee6cc", "metadata": { "id": "alert-endorsement" }, - "outputs": [], - "source": [ - "z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0.]]\n" + ] + } + ], + "source": [ + "z = np.zeros((2, 25))\n", "print(z)" ] }, { "cell_type": "markdown", + "id": "c4a9873d-2128-4268-b899-0dfdda9fbffc", "metadata": { "id": "federal-blackberry" }, @@ -93,18 +129,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, + "id": "14e4b538-ea07-4f55-9d32-cd40fc96378f", "metadata": { "id": "static-filing" }, - "outputs": [], - "source": [ - "ones = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n" + ] + } + ], + "source": [ + "ones = np.ones(12)\n", "print(ones)" ] }, { "cell_type": "markdown", + "id": "0c97e1af-56e4-41cf-9b11-f406f287b55a", "metadata": { "id": "whole-chassis" }, @@ -116,18 +162,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, + "id": "d455e5bc-d250-434b-96fb-17c7eedb16c9", "metadata": { "id": "outstanding-deviation" }, - "outputs": [], - "source": [ - "ones = \n", + "outputs": [ + { + "data": { + "text/plain": [ + "(3, 4)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ones = ones.reshape((3, 4))\n", "ones.shape" ] }, { "cell_type": "markdown", + "id": "75e2f0fc-c75e-4850-bc0d-ed8c364d4ddf", "metadata": { "id": "cubic-noise" }, @@ -139,20 +198,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, + "id": "82e562ab-9593-4591-b9d6-8e6e0a3cdda4", "metadata": { "id": "foster-memory" }, - "outputs": [], - "source": [ - "Z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 1 2 3 4 5]\n", + " [ 6 7 8 9 10]\n", + " [11 12 13 14 15]\n", + " [16 17 18 19 20]]\n", + "[[ 1 2 3 4 5]\n", + " [ 6 7 8 9 10]\n", + " [ 11 12 13 -99 15]\n", + " [ 16 17 18 19 20]]\n" + ] + } + ], + "source": [ + "Z = np.arange(1, 21).reshape(4, 5)\n", "print(Z)\n", - "\n", + "Z[2, 3] = -99\n", "print(Z)" ] }, { "cell_type": "markdown", + "id": "672cb33f-b455-4508-8686-89d98aaba97d", "metadata": { "id": "helpful-table" }, @@ -164,20 +240,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, + "id": "2c930315-9e2a-4ebe-aa5f-08a6ee40e9f9", "metadata": { "id": "magnetic-leone" }, - "outputs": [], - "source": [ - "first = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[-6 9 -6 -3 -5 -7 -1 1 3 -4 2 5 7 6 -4]\n", + "[-4 6 7 5 2 -4 3 1 -1 -7 -5 -3 -6 9 -6]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-10, 10, 15)\n", "print(first)\n", - "second = \n", + "second = first[::-1]\n", "print(second)" ] }, { "cell_type": "markdown", + "id": "4705f481-0030-4971-bbb6-7a63c13f447b", "metadata": { "id": "executed-september" }, @@ -189,20 +276,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, + "id": "7c6979ee-ea49-4b7a-9d25-8dcefe66ddf9", "metadata": { "id": "pharmaceutical-sigma" }, - "outputs": [], - "source": [ - "first = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[12 -2 -9 -9 4]\n", + " [-5 -3 0 11 12]\n", + " [ 9 13 8 12 -7]\n", + " [-8 -9 14 -3 2]\n", + " [-8 -1 -4 -3 -1]]\n", + "[[12 4 81 81 4]\n", + " [25 9 0 11 12]\n", + " [ 9 13 8 12 49]\n", + " [64 81 14 9 2]\n", + " [64 1 16 9 1]]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-15, 15, 25).reshape(5, 5)\n", "print(first)\n", - "\n", + "first = np.where(first >= 0, first, first**2)\n", "print(first)" ] }, { "cell_type": "markdown", + "id": "a2ef202b-0128-4504-9d5f-c6a188c934a8", "metadata": { "id": "floral-difference" }, @@ -216,18 +322,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, + "id": "0874190b-9763-4073-a573-3c22fb79885e", "metadata": { "id": "saving-conference" }, - "outputs": [], - "source": [ - "first = \n", - "print(first)\n" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 3 -14 13 -9 12]\n", + " [ -5 1 3 -6 3]\n", + " [ 3 13 -5 10 -1]]\n", + "max = 13; min - -14;\n", + "[ 0.33333333 0. 3.66666667 -1.66666667 4.66666667]\n", + "[ 1. -0.8 4. ]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-15, 15, 15).reshape(3, 5)\n", + "print(first)\n", + "print(f\"max = {first.max()}; min - {first.min()};\")\n", + "print(np.mean(first, axis=0))\n", + "print(np.mean(first, axis=1))\n" ] }, { "cell_type": "markdown", + "id": "15d47024-2ccd-415a-ab51-a420933510f8", "metadata": { "id": "diagnostic-departure" }, @@ -240,23 +364,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, + "id": "0aaf572f-0e40-4186-a33c-6e183e623e22", "metadata": { "id": "olympic-qatar" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5\n", + "a = [[-1 0 0 0 -4]\n", + " [-8 3 -7 4 2]]\n", + "b = [[-6 9 3]\n", + " [ 9 7 -4]\n", + " [ 3 -1 -9]\n", + " [ 8 -8 5]\n", + " [ 7 -1 -2]]\n", + "[[-22 -5 5]\n", + " [100 -78 43]]\n" + ] + } + ], "source": [ "a = np.random.randint(-10, 10, (2, 5))\n", "first_axis = np.random.randint(4, 6)\n", + "print(first_axis)\n", "b = np.random.randint(-10, 10, (first_axis, 3))\n", - "if :\n", + "print('a = ',a)\n", + "print('b = ',b)\n", + "if first_axis==5 : \n", " print(a @ b)\n", "else:\n", - " " + " print('ERORR')" ] }, { "cell_type": "markdown", + "id": "22c928a9-4f11-4c95-ab1a-f3632595cf67", "metadata": { "id": "governmental-austin" }, @@ -268,20 +414,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, + "id": "08b8c72f-e504-4b95-a30f-1f0747f9526e", "metadata": { "id": "suffering-mauritius" }, - "outputs": [], - "source": [ - "mask = \n", - "matrix = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 7.46661456 2.74228464 6.46365292 11.57467534 3.72174496]\n", + " [ 0. 7.98833153 10.32534704 3.10539402 3.50187194]\n", + " [ 0. 0. 2.26596132 4.2688381 6.67929945]\n", + " [ 0. 0. 0. 11.49534556 5.74590548]\n", + " [ 0. 0. 0. 0. 7.01714748]]\n" + ] + } + ], + "source": [ + "mask = np.random.uniform(2, 12, (5,5))\n", + "mask[np.tril_indices(5, -1)] = 0\n", "\n", - "print(matrix)" + "print(mask)" ] }, { "cell_type": "markdown", + "id": "0a095a47-9357-484a-8d1b-50c171c030d2", "metadata": { "id": "altered-baghdad" }, @@ -293,20 +453,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, + "id": "c7811198-5c9c-4d69-aec6-dd8c9b57718a", "metadata": { "id": "refined-stuff" }, - "outputs": [], - "source": [ - "mask = \n", - "matrix = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 0. 13.03693356 8.78801168 10.01754459 9.08361603]\n", + " [ 8.05666906 0. 13.72699202 10.79345868 9.89890479]\n", + " [14.29876326 9.80398181 0. 12.13033643 6.58345871]\n", + " [12.66589377 11.96527085 10.19919009 0. 8.37674351]\n", + " [12.58833494 12.3561968 9.42686704 8.54595175 0. ]]\n" + ] + } + ], + "source": [ + "mask = np.random.normal(10, 2, (5,5))\n", + "np.fill_diagonal(mask, 0)\n", "\n", - "print(matrix)" + "print(mask)" ] }, { "cell_type": "markdown", + "id": "c3dd99f5-0580-48cf-ba05-1dc03d5120d8", "metadata": { "id": "quiet-complement" }, @@ -317,22 +491,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, + "id": "799e8257-9952-4bde-86f1-7ad9600c2086", "metadata": { "id": "french-fighter" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0 1 0 1 0]\n", + "[0 0 1 0 1]\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "a = np.random.randint(0,2,5)\n", "print(a)\n", "b = np.random.randint(0,2,5)\n", "print(b)\n", - "equal = \n", + "equal = np.array_equal(a, b)\n", "equal" ] }, { "cell_type": "markdown", + "id": "996bdf28-1959-43c7-9414-152d475fe315", "metadata": { "id": "color-amplifier" }, @@ -347,23 +542,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, + "id": "1946daa7-fadf-411f-a1eb-ad7e9538a498", "metadata": { "id": "close-daisy" }, - "outputs": [], - "source": [ - "r, c = \n", - "a = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-12 8 -14 12 -16 3]\n", + " [ 7 -4 -11 5 2 0]\n", + " [ 10 12 -16 -9 2 -18]\n", + " [ -5 10 -17 15 8 8]\n", + " [-11 -14 19 17 6 6]]\n", + "15\n", + "[ -5 3 8 8 8 -17 2 6 8 -14 8 6 8 -4 -12]\n" + ] + } + ], + "source": [ + "r, c = np.random.randint(3, 7), np.random.randint(2,12)\n", + "a = np.random.randint(-20,20,(r,c)) \n", "print(a)\n", - "N = \n", + "N = (r * c)//2\n", "print(N)\n", - "sample = \n", + "sample = np.random.choice(a.ravel(),size=N )\n", "print(sample)" ] }, { "cell_type": "markdown", + "id": "1241a6f0-9899-487d-8e3a-00364ac7ab83", "metadata": { "id": "patent-african" }, @@ -376,20 +587,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, + "id": "ac0d577d-c4bf-4114-9997-9e0f74868380", "metadata": { "id": "taken-fabric" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 1. nan inf]\n", + "[False True False]\n", + "[False False True]\n" + ] + }, + { + "data": { + "text/plain": [ + "array([1., 0., 0.])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "a = np.array([1, np.NaN, np.Inf], float)\n", - "\n", - "\n", + "print(a)\n", + "print(np.isnan(a))\n", + "print(np.isinf(a))\n", + "a = np.where(np.isnan(a) | np.isinf(a), 0, a)\n", "a" ] }, { "cell_type": "markdown", + "id": "cf768632-93ad-48d3-9538-24f6db3918b4", "metadata": { "id": "analyzed-ireland" }, @@ -401,20 +636,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, + "id": "40b3c08f-03b5-4cf0-9114-d7b53ead94c7", "metadata": { "id": "imposed-digest" }, - "outputs": [], + "outputs": [ + { + "ename": "", + "evalue": "invalid syntax (, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m Input \u001b[0;32mIn [26]\u001b[0;36m\u001b[0m\n\u001b[0;31m matrix =\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" + ] + } + ], "source": [ - "axis = \n", - "print(axis)\n", - "matrix = \n", - "print(...)" + "matrix =" ] }, { "cell_type": "markdown", + "id": "7f845051-34a5-4917-a6c7-62d6af418e5b", "metadata": { "id": "regulation-colleague" }, @@ -427,18 +670,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, + "id": "8f439683-6df1-41b2-9238-34657eae1a40", "metadata": { "id": "concerned-anthropology" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[58.25906569 48.31170059 52.53408476]\n", + " [51.05296905 52.37315558 47.89095483]\n", + " [67.12803073 42.41539999 40.76421169]\n", + " [51.34492875 55.05489071 53.60475124]\n", + " [63.13586206 51.42074662 55.46247798]\n", + " [45.57750299 54.23007597 45.3707665 ]\n", + " [52.26580704 49.94203424 55.04086665]\n", + " [46.97376983 67.18626141 49.48661174]\n", + " [60.70755932 30.54283876 47.45861248]\n", + " [48.07055666 69.5386048 33.5820917 ]]\n", + "[0 1 0 1 0 1 2 1 0 1]\n", + "[58.25906569 52.37315558 67.12803073 55.05489071 63.13586206 54.23007597\n", + " 55.04086665 67.18626141 60.70755932 69.5386048 ]\n" + ] + } + ], "source": [ "matrix = np.random.normal(50, 10, (10,3))\n", "print(matrix)\n", - "indexes = \n", + "indexes = np.argmax(matrix, axis=1)\n", "print(indexes)\n", - "print(...)" + "print(np.amax(matrix, axis=1))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0e950f1-a858-4803-81e9-dd3aa1a6cf5d", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -448,7 +720,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -462,7 +734,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.10.0" } }, "nbformat": 4, diff --git a/Pandas (06.03)/Pandas. Lecture. Part 1.ipynb b/Pandas (06.03)/Pandas. Lecture. Part 1.ipynb index a0c1d04..0df2491 100644 --- a/Pandas (06.03)/Pandas. Lecture. Part 1.ipynb +++ b/Pandas (06.03)/Pandas. Lecture. Part 1.ipynb @@ -1 +1,5764 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"01_Pandas.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyPGZA72+5Brg/wHtKFk27jK"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"qCUpgW4Chxlt"},"source":["# Игрушечные наборы данных\n","https://scikit-learn.org/stable/datasets/index.html"]},{"cell_type":"code","metadata":{"id":"6-e8Ub9ghvMA","executionInfo":{"status":"ok","timestamp":1632403984813,"user_tz":-300,"elapsed":867,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["import sklearn.datasets as sets\n","datasets = {0:'boston', 1:'iris', 2:'diabets', 3:'digits', 4:'linnerud', 5:'wine', 6:'cancer', 7:'olivetti_faces', 8:'20_newsgroups',\n"," 9:'20_newsgroups_vec', 10:'people_labeled_faces', 11:'pairs_labeled_faces', 12:'covertype', 13:'RCV1_multilabel',\n"," 14:'kddcup99', 15:'california_housing', }\n","choise = 1\n","if choise == 0:\n"," ds = sets.load_boston() #regression\n","elif choise == 1:\n"," ds = sets.load_iris() # classification\n","elif choise == 2:\n"," ds = sets.load_diabetes() # regression\n","elif choise == 3:\n"," ds = sets.load_digits() # classification\n","elif choise == 4:\n"," ds = sets.load_linnerud() # multivariate regression\n","elif choise == 5:\n"," ds = sets.load_wine() # classification\n","elif choise == 6:\n"," ds = sets.load_breast_cancer() # classification\n","elif choise == 7:\n"," ds = sets.fetch_olivetti_faces() # classification\n","elif choise == 8:\n"," ds = sets.fetch_20newsgroups() # classification\n","elif choise == 9:\n"," ds = sets.fetch_20newsgroups_vectorized() # classification\n","elif choise == 10:\n"," ds = sets.fetch_lfw_people() # classification\n","elif choise == 11:\n"," ds = sets.fetch_lfw_pairs() # classification\n","elif choise == 12:\n"," ds = sets.fetch_covtype() # classification\n","elif choise == 13:\n"," ds = sets.fetch_rcv1() # classification\n","elif choise == 14:\n"," ds = sets.fetch_kddcup99() # classification\n","elif choise == 15:\n"," ds = sets.fetch_california_housing() # regression"],"execution_count":1,"outputs":[]},{"cell_type":"code","metadata":{"id":"rHDZmzjAiy7N","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615295304765,"user_tz":-300,"elapsed":1064,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"160c86a8-b336-429a-b12b-52cf5bb6a14b"},"source":["print(ds.DESCR)"],"execution_count":null,"outputs":[{"output_type":"stream","text":[".. _iris_dataset:\n","\n","Iris plants dataset\n","--------------------\n","\n","**Data Set Characteristics:**\n","\n"," :Number of Instances: 150 (50 in each of three classes)\n"," :Number of Attributes: 4 numeric, predictive attributes and the class\n"," :Attribute Information:\n"," - sepal length in cm\n"," - sepal width in cm\n"," - petal length in cm\n"," - petal width in cm\n"," - class:\n"," - Iris-Setosa\n"," - Iris-Versicolour\n"," - Iris-Virginica\n"," \n"," :Summary Statistics:\n","\n"," ============== ==== ==== ======= ===== ====================\n"," Min Max Mean SD Class Correlation\n"," ============== ==== ==== ======= ===== ====================\n"," sepal length: 4.3 7.9 5.84 0.83 0.7826\n"," sepal width: 2.0 4.4 3.05 0.43 -0.4194\n"," petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)\n"," petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)\n"," ============== ==== ==== ======= ===== ====================\n","\n"," :Missing Attribute Values: None\n"," :Class Distribution: 33.3% for each of 3 classes.\n"," :Creator: R.A. Fisher\n"," :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n"," :Date: July, 1988\n","\n","The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\n","from Fisher's paper. Note that it's the same as in R, but not as in the UCI\n","Machine Learning Repository, which has two wrong data points.\n","\n","This is perhaps the best known database to be found in the\n","pattern recognition literature. Fisher's paper is a classic in the field and\n","is referenced frequently to this day. (See Duda & Hart, for example.) The\n","data set contains 3 classes of 50 instances each, where each class refers to a\n","type of iris plant. One class is linearly separable from the other 2; the\n","latter are NOT linearly separable from each other.\n","\n",".. topic:: References\n","\n"," - Fisher, R.A. \"The use of multiple measurements in taxonomic problems\"\n"," Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\n"," Mathematical Statistics\" (John Wiley, NY, 1950).\n"," - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n"," (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.\n"," - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\n"," Structure and Classification Rule for Recognition in Partially Exposed\n"," Environments\". IEEE Transactions on Pattern Analysis and Machine\n"," Intelligence, Vol. PAMI-2, No. 1, 67-71.\n"," - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\". IEEE Transactions\n"," on Information Theory, May 1972, 431-433.\n"," - See also: 1988 MLC Proceedings, 54-64. Cheeseman et al\"s AUTOCLASS II\n"," conceptual clustering system finds 3 classes in the data.\n"," - Many, many more ...\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"59mLor4WoeZg","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1632404056458,"user_tz":-300,"elapsed":683,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"3548322c-6765-4349-8dea-66ab12f3f7d9"},"source":["print(ds.feature_names)\n","print(ds.target_names)"],"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']\n","['setosa' 'versicolor' 'virginica']\n"]}]},{"cell_type":"code","metadata":{"id":"9Yt4tJ2_otjm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1632404071563,"user_tz":-300,"elapsed":420,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"b471a124-b71b-456d-de41-fe29676b6604"},"source":["data = ds.data\n","type(data)"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/plain":["numpy.ndarray"]},"metadata":{},"execution_count":3}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ZgxY_56q3YVG","executionInfo":{"status":"ok","timestamp":1632404086557,"user_tz":-300,"elapsed":402,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"b4e3ee4f-16b7-4b1e-f5be-34d0e5f4dd31"},"source":["data"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[5.1, 3.5, 1.4, 0.2],\n"," [4.9, 3. , 1.4, 0.2],\n"," [4.7, 3.2, 1.3, 0.2],\n"," [4.6, 3.1, 1.5, 0.2],\n"," [5. , 3.6, 1.4, 0.2],\n"," [5.4, 3.9, 1.7, 0.4],\n"," [4.6, 3.4, 1.4, 0.3],\n"," [5. , 3.4, 1.5, 0.2],\n"," [4.4, 2.9, 1.4, 0.2],\n"," [4.9, 3.1, 1.5, 0.1],\n"," [5.4, 3.7, 1.5, 0.2],\n"," [4.8, 3.4, 1.6, 0.2],\n"," [4.8, 3. , 1.4, 0.1],\n"," [4.3, 3. , 1.1, 0.1],\n"," [5.8, 4. , 1.2, 0.2],\n"," [5.7, 4.4, 1.5, 0.4],\n"," [5.4, 3.9, 1.3, 0.4],\n"," [5.1, 3.5, 1.4, 0.3],\n"," [5.7, 3.8, 1.7, 0.3],\n"," [5.1, 3.8, 1.5, 0.3],\n"," [5.4, 3.4, 1.7, 0.2],\n"," [5.1, 3.7, 1.5, 0.4],\n"," [4.6, 3.6, 1. , 0.2],\n"," [5.1, 3.3, 1.7, 0.5],\n"," [4.8, 3.4, 1.9, 0.2],\n"," [5. , 3. , 1.6, 0.2],\n"," [5. , 3.4, 1.6, 0.4],\n"," [5.2, 3.5, 1.5, 0.2],\n"," [5.2, 3.4, 1.4, 0.2],\n"," [4.7, 3.2, 1.6, 0.2],\n"," [4.8, 3.1, 1.6, 0.2],\n"," [5.4, 3.4, 1.5, 0.4],\n"," [5.2, 4.1, 1.5, 0.1],\n"," [5.5, 4.2, 1.4, 0.2],\n"," [4.9, 3.1, 1.5, 0.2],\n"," [5. , 3.2, 1.2, 0.2],\n"," [5.5, 3.5, 1.3, 0.2],\n"," [4.9, 3.6, 1.4, 0.1],\n"," [4.4, 3. , 1.3, 0.2],\n"," [5.1, 3.4, 1.5, 0.2],\n"," [5. , 3.5, 1.3, 0.3],\n"," [4.5, 2.3, 1.3, 0.3],\n"," [4.4, 3.2, 1.3, 0.2],\n"," [5. , 3.5, 1.6, 0.6],\n"," [5.1, 3.8, 1.9, 0.4],\n"," [4.8, 3. , 1.4, 0.3],\n"," [5.1, 3.8, 1.6, 0.2],\n"," [4.6, 3.2, 1.4, 0.2],\n"," [5.3, 3.7, 1.5, 0.2],\n"," [5. , 3.3, 1.4, 0.2],\n"," [7. , 3.2, 4.7, 1.4],\n"," [6.4, 3.2, 4.5, 1.5],\n"," [6.9, 3.1, 4.9, 1.5],\n"," [5.5, 2.3, 4. , 1.3],\n"," [6.5, 2.8, 4.6, 1.5],\n"," [5.7, 2.8, 4.5, 1.3],\n"," [6.3, 3.3, 4.7, 1.6],\n"," [4.9, 2.4, 3.3, 1. ],\n"," [6.6, 2.9, 4.6, 1.3],\n"," [5.2, 2.7, 3.9, 1.4],\n"," [5. , 2. , 3.5, 1. ],\n"," [5.9, 3. , 4.2, 1.5],\n"," [6. , 2.2, 4. , 1. ],\n"," [6.1, 2.9, 4.7, 1.4],\n"," [5.6, 2.9, 3.6, 1.3],\n"," [6.7, 3.1, 4.4, 1.4],\n"," [5.6, 3. , 4.5, 1.5],\n"," [5.8, 2.7, 4.1, 1. ],\n"," [6.2, 2.2, 4.5, 1.5],\n"," [5.6, 2.5, 3.9, 1.1],\n"," [5.9, 3.2, 4.8, 1.8],\n"," [6.1, 2.8, 4. , 1.3],\n"," [6.3, 2.5, 4.9, 1.5],\n"," [6.1, 2.8, 4.7, 1.2],\n"," [6.4, 2.9, 4.3, 1.3],\n"," [6.6, 3. , 4.4, 1.4],\n"," [6.8, 2.8, 4.8, 1.4],\n"," [6.7, 3. , 5. , 1.7],\n"," [6. , 2.9, 4.5, 1.5],\n"," [5.7, 2.6, 3.5, 1. ],\n"," [5.5, 2.4, 3.8, 1.1],\n"," [5.5, 2.4, 3.7, 1. ],\n"," [5.8, 2.7, 3.9, 1.2],\n"," [6. , 2.7, 5.1, 1.6],\n"," [5.4, 3. , 4.5, 1.5],\n"," [6. , 3.4, 4.5, 1.6],\n"," [6.7, 3.1, 4.7, 1.5],\n"," [6.3, 2.3, 4.4, 1.3],\n"," [5.6, 3. , 4.1, 1.3],\n"," [5.5, 2.5, 4. , 1.3],\n"," [5.5, 2.6, 4.4, 1.2],\n"," [6.1, 3. , 4.6, 1.4],\n"," [5.8, 2.6, 4. , 1.2],\n"," [5. , 2.3, 3.3, 1. ],\n"," [5.6, 2.7, 4.2, 1.3],\n"," [5.7, 3. , 4.2, 1.2],\n"," [5.7, 2.9, 4.2, 1.3],\n"," [6.2, 2.9, 4.3, 1.3],\n"," [5.1, 2.5, 3. , 1.1],\n"," [5.7, 2.8, 4.1, 1.3],\n"," [6.3, 3.3, 6. , 2.5],\n"," [5.8, 2.7, 5.1, 1.9],\n"," [7.1, 3. , 5.9, 2.1],\n"," [6.3, 2.9, 5.6, 1.8],\n"," [6.5, 3. , 5.8, 2.2],\n"," [7.6, 3. , 6.6, 2.1],\n"," [4.9, 2.5, 4.5, 1.7],\n"," [7.3, 2.9, 6.3, 1.8],\n"," [6.7, 2.5, 5.8, 1.8],\n"," [7.2, 3.6, 6.1, 2.5],\n"," [6.5, 3.2, 5.1, 2. ],\n"," [6.4, 2.7, 5.3, 1.9],\n"," [6.8, 3. , 5.5, 2.1],\n"," [5.7, 2.5, 5. , 2. ],\n"," [5.8, 2.8, 5.1, 2.4],\n"," [6.4, 3.2, 5.3, 2.3],\n"," [6.5, 3. , 5.5, 1.8],\n"," [7.7, 3.8, 6.7, 2.2],\n"," [7.7, 2.6, 6.9, 2.3],\n"," [6. , 2.2, 5. , 1.5],\n"," [6.9, 3.2, 5.7, 2.3],\n"," [5.6, 2.8, 4.9, 2. ],\n"," [7.7, 2.8, 6.7, 2. ],\n"," [6.3, 2.7, 4.9, 1.8],\n"," [6.7, 3.3, 5.7, 2.1],\n"," [7.2, 3.2, 6. , 1.8],\n"," [6.2, 2.8, 4.8, 1.8],\n"," [6.1, 3. , 4.9, 1.8],\n"," [6.4, 2.8, 5.6, 2.1],\n"," [7.2, 3. , 5.8, 1.6],\n"," [7.4, 2.8, 6.1, 1.9],\n"," [7.9, 3.8, 6.4, 2. ],\n"," [6.4, 2.8, 5.6, 2.2],\n"," [6.3, 2.8, 5.1, 1.5],\n"," [6.1, 2.6, 5.6, 1.4],\n"," [7.7, 3. , 6.1, 2.3],\n"," [6.3, 3.4, 5.6, 2.4],\n"," [6.4, 3.1, 5.5, 1.8],\n"," [6. , 3. , 4.8, 1.8],\n"," [6.9, 3.1, 5.4, 2.1],\n"," [6.7, 3.1, 5.6, 2.4],\n"," [6.9, 3.1, 5.1, 2.3],\n"," [5.8, 2.7, 5.1, 1.9],\n"," [6.8, 3.2, 5.9, 2.3],\n"," [6.7, 3.3, 5.7, 2.5],\n"," [6.7, 3. , 5.2, 2.3],\n"," [6.3, 2.5, 5. , 1.9],\n"," [6.5, 3. , 5.2, 2. ],\n"," [6.2, 3.4, 5.4, 2.3],\n"," [5.9, 3. , 5.1, 1.8]])"]},"metadata":{},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"-7ejnqmmwr_J","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615295357693,"user_tz":-300,"elapsed":855,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"e22abd6b-c840-4e43-aa62-d9c1a5cdd231"},"source":["data.shape"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(150, 4)"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"id":"RmRL0mZ3o5ri","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1632404107395,"user_tz":-300,"elapsed":420,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"bcace884-7ac8-49ce-d14e-05c8f625bb38"},"source":["target = ds.target\n","target[:5], target.shape"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(array([0, 0, 0, 0, 0]), (150,))"]},"metadata":{},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"fpcR7aEBJoGq"},"source":["# Pandas"]},{"cell_type":"code","metadata":{"id":"FVTPYh-hhvah","executionInfo":{"status":"ok","timestamp":1632404228644,"user_tz":-300,"elapsed":546,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["import pandas as pd\n","import numpy as np"],"execution_count":7,"outputs":[]},{"cell_type":"code","metadata":{"id":"CZzMZXcyDnCx","colab":{"base_uri":"https://localhost:8080/","height":423},"executionInfo":{"status":"ok","timestamp":1632404365934,"user_tz":-300,"elapsed":20,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"55d262ac-6243-4338-a45e-57217f23a610"},"source":["df = pd.DataFrame(data, columns=ds.feature_names) # data - может быть как лист, так и numpy array\n","df['target'] = ds.target\n","df"],"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
..................
1456.73.05.22.32
1466.32.55.01.92
1476.53.05.22.02
1486.23.45.42.32
1495.93.05.11.82
\n","

150 rows × 5 columns

\n","
"],"text/plain":[" sepal length (cm) sepal width (cm) ... petal width (cm) target\n","0 5.1 3.5 ... 0.2 0\n","1 4.9 3.0 ... 0.2 0\n","2 4.7 3.2 ... 0.2 0\n","3 4.6 3.1 ... 0.2 0\n","4 5.0 3.6 ... 0.2 0\n",".. ... ... ... ... ...\n","145 6.7 3.0 ... 2.3 2\n","146 6.3 2.5 ... 1.9 2\n","147 6.5 3.0 ... 2.0 2\n","148 6.2 3.4 ... 2.3 2\n","149 5.9 3.0 ... 1.8 2\n","\n","[150 rows x 5 columns]"]},"metadata":{},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":112},"id":"WMx25DeePe80","executionInfo":{"status":"ok","timestamp":1632404401169,"user_tz":-300,"elapsed":1482,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"ca1eb41f-18e0-47de-cc77-b8648b89cec5"},"source":["df.head(2) #tail()"],"execution_count":12,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.20
14.93.01.40.20
\n","
"],"text/plain":[" sepal length (cm) sepal width (cm) ... petal width (cm) target\n","0 5.1 3.5 ... 0.2 0\n","1 4.9 3.0 ... 0.2 0\n","\n","[2 rows x 5 columns]"]},"metadata":{},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"yY02uqmWhvlj","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1632404414446,"user_tz":-300,"elapsed":580,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"f4adccbb-22f7-4192-a8f7-67d00c8ff7c3"},"source":["df.sample(5)"],"execution_count":13,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
1206.93.25.72.32
75.03.41.50.20
656.73.14.41.41
776.73.05.01.71
985.12.53.01.11
\n","
"],"text/plain":[" sepal length (cm) sepal width (cm) ... petal width (cm) target\n","120 6.9 3.2 ... 2.3 2\n","7 5.0 3.4 ... 0.2 0\n","65 6.7 3.1 ... 1.4 1\n","77 6.7 3.0 ... 1.7 1\n","98 5.1 2.5 ... 1.1 1\n","\n","[5 rows x 5 columns]"]},"metadata":{},"execution_count":13}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"L0oDISZyHqUh","executionInfo":{"status":"ok","timestamp":1632404445651,"user_tz":-300,"elapsed":486,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"f2586af7-7f30-4106-861b-539f5ed618d6"},"source":["type(df)"],"execution_count":14,"outputs":[{"output_type":"execute_result","data":{"text/plain":["pandas.core.frame.DataFrame"]},"metadata":{},"execution_count":14}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"arTjJfy442ss","executionInfo":{"status":"ok","timestamp":1632404485030,"user_tz":-300,"elapsed":433,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"6d630c99-cbed-42e1-d69f-c71e595be995"},"source":["type(df[\"target\"])"],"execution_count":15,"outputs":[{"output_type":"execute_result","data":{"text/plain":["pandas.core.series.Series"]},"metadata":{},"execution_count":15}]},{"cell_type":"markdown","metadata":{"id":"xX_Qut-QR_ia"},"source":["### Индексация и срезы данных"]},{"cell_type":"code","metadata":{"id":"jXimDZePWyIp","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614783881358,"user_tz":-300,"elapsed":3256,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"00860947-6e2c-484e-90ae-8149d6c2bb45"},"source":["df['sepal length (cm)'] # выбор столбца по названию"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 5.1\n","1 4.9\n","2 4.7\n","3 4.6\n","4 5.0\n"," ... \n","145 6.7\n","146 6.3\n","147 6.5\n","148 6.2\n","149 5.9\n","Name: sepal length (cm), Length: 150, dtype: float64"]},"metadata":{"tags":[]},"execution_count":96}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"rOBV0RUtHxLh","executionInfo":{"status":"ok","timestamp":1615295621844,"user_tz":-300,"elapsed":619,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"2e25e363-6fd5-477f-9e38-afe8f91522ac"},"source":["type(df['sepal length (cm)'])"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["pandas.core.series.Series"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"sq2YmKFr5m-1","executionInfo":{"status":"ok","timestamp":1632404667952,"user_tz":-300,"elapsed":523,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"e9f125e0-3f1f-4a4b-d39c-5e6091047c86"},"source":["df.columns"],"execution_count":18,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',\n"," 'petal width (cm)', 'target'],\n"," dtype='object')"]},"metadata":{},"execution_count":18}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"o5CI-Ha6P4AX","executionInfo":{"status":"ok","timestamp":1614783884339,"user_tz":-300,"elapsed":1699,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"ee350cf3-212a-4bdd-daf8-f0decfe313c0"},"source":["{name : '_'.join(name.split(' ')) for name in df.columns}"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["{'petal length (cm)': 'petal_length_(cm)',\n"," 'petal width (cm)': 'petal_width_(cm)',\n"," 'sepal length (cm)': 'sepal_length_(cm)',\n"," 'sepal width (cm)': 'sepal_width_(cm)',\n"," 'target': 'target'}"]},"metadata":{"tags":[]},"execution_count":97}]},{"cell_type":"code","metadata":{"id":"ztRKBaVlxM8d","executionInfo":{"status":"ok","timestamp":1632404857471,"user_tz":-300,"elapsed":585,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["# df = df.rename(columns={name : '_'.join(name.split(' ')) for name in df.columns}) # смена имен столбцов\n","df.rename(columns={name : '_'.join(name.split(' ')) for name in df.columns}, inplace=True)"],"execution_count":21,"outputs":[]},{"cell_type":"code","metadata":{"id":"Bryqf6bCxNC5","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1632404863328,"user_tz":-300,"elapsed":29,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"2fb81e40-0667-4c5b-9b50-4ba23010385b"},"source":["df.columns"],"execution_count":22,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Index(['sepal_length_(cm)', 'sepal_width_(cm)', 'petal_length_(cm)',\n"," 'petal_width_(cm)', 'target'],\n"," dtype='object')"]},"metadata":{},"execution_count":22}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"uesXOV19QcNX","executionInfo":{"status":"ok","timestamp":1615295826923,"user_tz":-300,"elapsed":438,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"6476924c-249d-4876-89be-920b127e125b"},"source":["df.target"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 0\n","1 0\n","2 0\n","3 0\n","4 0\n"," ..\n","145 2\n","146 2\n","147 2\n","148 2\n","149 2\n","Name: target, Length: 150, dtype: int64"]},"metadata":{"tags":[]},"execution_count":20}]},{"cell_type":"code","metadata":{"id":"J2il4fodbWLb","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614777840378,"user_tz":-300,"elapsed":566,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"b6d5c2a4-dc69-497d-997c-8127f174765a"},"source":["df.target[-10:] # возможен такой стиль обращения к столбцам, если его имя не содержит пробелов"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["140 2\n","141 2\n","142 2\n","143 2\n","144 2\n","145 2\n","146 2\n","147 2\n","148 2\n","149 2\n","Name: target, dtype: int64"]},"metadata":{"tags":[]},"execution_count":22}]},{"cell_type":"code","metadata":{"id":"2IaGUtDoYIAO","colab":{"base_uri":"https://localhost:8080/","height":357},"executionInfo":{"status":"ok","timestamp":1614777891289,"user_tz":-300,"elapsed":607,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"c64f553c-27a2-4f0d-a1e3-aa82ee895acf"},"source":["df.loc[140: , 'sepal_width_(cm)':'petal_width_(cm)'] # возможность среза данных по ИМЕНАМ строк и столбцов"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_width_(cm)petal_length_(cm)petal_width_(cm)
1403.15.62.4
1413.15.12.3
1422.75.11.9
1433.25.92.3
1443.35.72.5
1453.05.22.3
1462.55.01.9
1473.05.22.0
1483.45.42.3
1493.05.11.8
\n","
"],"text/plain":[" sepal_width_(cm) petal_length_(cm) petal_width_(cm)\n","140 3.1 5.6 2.4\n","141 3.1 5.1 2.3\n","142 2.7 5.1 1.9\n","143 3.2 5.9 2.3\n","144 3.3 5.7 2.5\n","145 3.0 5.2 2.3\n","146 2.5 5.0 1.9\n","147 3.0 5.2 2.0\n","148 3.4 5.4 2.3\n","149 3.0 5.1 1.8"]},"metadata":{"tags":[]},"execution_count":23}]},{"cell_type":"code","metadata":{"id":"RwTuNV_BxNtH","colab":{"base_uri":"https://localhost:8080/","height":357},"executionInfo":{"status":"ok","timestamp":1614777918498,"user_tz":-300,"elapsed":735,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"89004bbc-fd5d-4bb9-fbdc-6756fa31cb1b"},"source":["df.iloc[:10,:4] # возможность среза данных по ПОРЯДКОВЫМ НОМЕРАМ строк и столбцов "],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
55.43.91.70.4
64.63.41.40.3
75.03.41.50.2
84.42.91.40.2
94.93.11.50.1
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) petal_length_(cm) petal_width_(cm)\n","0 5.1 3.5 1.4 0.2\n","1 4.9 3.0 1.4 0.2\n","2 4.7 3.2 1.3 0.2\n","3 4.6 3.1 1.5 0.2\n","4 5.0 3.6 1.4 0.2\n","5 5.4 3.9 1.7 0.4\n","6 4.6 3.4 1.4 0.3\n","7 5.0 3.4 1.5 0.2\n","8 4.4 2.9 1.4 0.2\n","9 4.9 3.1 1.5 0.1"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QSAbGcDbJP9B","executionInfo":{"status":"ok","timestamp":1632405184550,"user_tz":-300,"elapsed":413,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"20274561-ff6c-4031-e1a7-a26a2399cea5"},"source":["[column for column in df.columns if column.startswith('sepal')]"],"execution_count":23,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['sepal_length_(cm)', 'sepal_width_(cm)']"]},"metadata":{},"execution_count":23}]},{"cell_type":"code","metadata":{"id":"pytaw0cAxNp8","colab":{"base_uri":"https://localhost:8080/","height":424},"executionInfo":{"status":"ok","timestamp":1614784351268,"user_tz":-300,"elapsed":1370,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"81983e96-8834-40e4-b828-6706a4f3bbb6"},"source":["df[[column for column in df.columns if column.startswith('sepal')]] # выбор столбцов по условию"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)
05.13.5
14.93.0
24.73.2
34.63.1
45.03.6
.........
1456.73.0
1466.32.5
1476.53.0
1486.23.4
1495.93.0
\n","

150 rows × 2 columns

\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm)\n","0 5.1 3.5\n","1 4.9 3.0\n","2 4.7 3.2\n","3 4.6 3.1\n","4 5.0 3.6\n",".. ... ...\n","145 6.7 3.0\n","146 6.3 2.5\n","147 6.5 3.0\n","148 6.2 3.4\n","149 5.9 3.0\n","\n","[150 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":102}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"bHiE8tk872bY","executionInfo":{"status":"ok","timestamp":1632405255702,"user_tz":-300,"elapsed":666,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"5cab46f0-7d00-4c5a-a435-ecd145b8c82c"},"source":["df.target==1.0"],"execution_count":24,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 False\n","1 False\n","2 False\n","3 False\n","4 False\n"," ... \n","145 False\n","146 False\n","147 False\n","148 False\n","149 False\n","Name: target, Length: 150, dtype: bool"]},"metadata":{},"execution_count":24}]},{"cell_type":"code","metadata":{"id":"YNxRaJqqavOz","colab":{"base_uri":"https://localhost:8080/","height":347},"executionInfo":{"status":"ok","timestamp":1615296046504,"user_tz":-300,"elapsed":815,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"2baa1152-611c-43a3-eea6-c9ae07cfea4e"},"source":["df[df.target==1.0][:10] # выбор данных по условию. В данном случае хотим увидеть данные у которых целевой класс = 1\n","# так же можно увидеть что обращаться к столбцу можно"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
507.03.24.71.41
516.43.24.51.51
526.93.14.91.51
535.52.34.01.31
546.52.84.61.51
555.72.84.51.31
566.33.34.71.61
574.92.43.31.01
586.62.94.61.31
595.22.73.91.41
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n","50 7.0 3.2 ... 1.4 1\n","51 6.4 3.2 ... 1.5 1\n","52 6.9 3.1 ... 1.5 1\n","53 5.5 2.3 ... 1.3 1\n","54 6.5 2.8 ... 1.5 1\n","55 5.7 2.8 ... 1.3 1\n","56 6.3 3.3 ... 1.6 1\n","57 4.9 2.4 ... 1.0 1\n","58 6.6 2.9 ... 1.3 1\n","59 5.2 2.7 ... 1.4 1\n","\n","[10 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":22}]},{"cell_type":"markdown","metadata":{"id":"i4V1_5AOgmB9"},"source":["### Описательная статистика"]},{"cell_type":"code","metadata":{"id":"EuwQ-U54xNnA","colab":{"base_uri":"https://localhost:8080/","height":300},"executionInfo":{"status":"ok","timestamp":1614766986724,"user_tz":-300,"elapsed":1283,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"5ed73970-f852-49b2-82a7-bfe43b1ad3c3"},"source":["df.describe() # статистическое описание набора данных"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
count150.000000150.000000150.000000150.000000150.000000
mean5.8433333.0573333.7580001.1993331.000000
std0.8280660.4358661.7652980.7622380.819232
min4.3000002.0000001.0000000.1000000.000000
25%5.1000002.8000001.6000000.3000000.000000
50%5.8000003.0000004.3500001.3000001.000000
75%6.4000003.3000005.1000001.8000002.000000
max7.9000004.4000006.9000002.5000002.000000
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n","count 150.000000 150.000000 ... 150.000000 150.000000\n","mean 5.843333 3.057333 ... 1.199333 1.000000\n","std 0.828066 0.435866 ... 0.762238 0.819232\n","min 4.300000 2.000000 ... 0.100000 0.000000\n","25% 5.100000 2.800000 ... 0.300000 0.000000\n","50% 5.800000 3.000000 ... 1.300000 1.000000\n","75% 6.400000 3.300000 ... 1.800000 2.000000\n","max 7.900000 4.400000 ... 2.500000 2.000000\n","\n","[8 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":19}]},{"cell_type":"code","metadata":{"id":"X4ykTpKtxNiG","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614778091397,"user_tz":-300,"elapsed":627,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"e62b683d-f476-4422-d691-774ead34e63f"},"source":["df.info() # информация об индексах, пропусках в данных, типах данных и объеме оперативной памяти занимаемой данными"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\n","RangeIndex: 150 entries, 0 to 149\n","Data columns (total 5 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 sepal_length_(cm) 150 non-null float64\n"," 1 sepal_width_(cm) 150 non-null float64\n"," 2 petal_length_(cm) 150 non-null float64\n"," 3 petal_width_(cm) 150 non-null float64\n"," 4 target 150 non-null int64 \n","dtypes: float64(4), int64(1)\n","memory usage: 6.0 KB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"b7khmMfj8mDB","executionInfo":{"status":"ok","timestamp":1632405484185,"user_tz":-300,"elapsed":51,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"8e7ccfa9-cffa-4872-c0a5-d00635211e12"},"source":["df.target.unique(), df.target.nunique()"],"execution_count":26,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(array([0, 1, 2]), 3)"]},"metadata":{},"execution_count":26}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":167},"id":"n1XzQbdFRx7Z","executionInfo":{"status":"ok","timestamp":1615296303195,"user_tz":-300,"elapsed":783,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"a4acff70-40cf-4462-f2b6-03546318b29b"},"source":["df.groupby('target').mean() #df.groupby('target')['petal_length_(cm)'].mean()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)
target
05.0063.4281.4620.246
15.9362.7704.2601.326
26.5882.9745.5522.026
\n","
"],"text/plain":[" sepal_length_(cm) ... petal_width_(cm)\n","target ... \n","0 5.006 ... 0.246\n","1 5.936 ... 1.326\n","2 6.588 ... 2.026\n","\n","[3 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"code","metadata":{"id":"MRiTYhiixNfC","colab":{"base_uri":"https://localhost:8080/","height":217},"executionInfo":{"status":"ok","timestamp":1615296321113,"user_tz":-300,"elapsed":724,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"3bd6da21-1bde-404e-b9fb-d4e36e94634c"},"source":["df.groupby('target').agg([min, max, np.mean, np.std, np.size]) # применение общих функций группировки для всех столбцов"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)
minmaxmeanstdsizeminmaxmeanstdsizeminmaxmeanstdsizeminmaxmeanstdsize
target
04.35.85.0060.35249050.02.34.43.4280.37906450.01.01.91.4620.17366450.00.10.60.2460.10538650.0
14.97.05.9360.51617150.02.03.42.7700.31379850.03.05.14.2600.46991150.01.01.81.3260.19775350.0
24.97.96.5880.63588050.02.23.82.9740.32249750.04.56.95.5520.55189550.01.42.52.0260.27465050.0
\n","
"],"text/plain":[" sepal_length_(cm) ... petal_width_(cm) \n"," min max mean std ... max mean std size\n","target ... \n","0 4.3 5.8 5.006 0.352490 ... 0.6 0.246 0.105386 50.0\n","1 4.9 7.0 5.936 0.516171 ... 1.8 1.326 0.197753 50.0\n","2 4.9 7.9 6.588 0.635880 ... 2.5 2.026 0.274650 50.0\n","\n","[3 rows x 20 columns]"]},"metadata":{"tags":[]},"execution_count":25}]},{"cell_type":"code","metadata":{"id":"w_oHay4KxNdC","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615296592781,"user_tz":-300,"elapsed":511,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"2b52fff3-b9c7-4c74-ea6f-e52b965f4e6b"},"source":["df.groupby('target').agg({'sepal_length_(cm)':[np.mean, np.std], 'petal_width_(cm)':[min, max]}) # индивидуальное применение функций группировки"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)petal_width_(cm)
meanstdminmax
target
05.0060.3524900.10.6
15.9360.5161711.01.8
26.5880.6358801.42.5
\n","
"],"text/plain":[" sepal_length_(cm) petal_width_(cm) \n"," mean std min max\n","target \n","0 5.006 0.352490 0.1 0.6\n","1 5.936 0.516171 1.0 1.8\n","2 6.588 0.635880 1.4 2.5"]},"metadata":{"tags":[]},"execution_count":30}]},{"cell_type":"markdown","metadata":{"id":"NCfoXnc41fmW"},"source":["### Полезные функции, которые конкретно сейчас не нужны, но часто применимы"]},{"cell_type":"code","metadata":{"id":"KV8EM_b41m0m","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615296494311,"user_tz":-300,"elapsed":747,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"b898ccdb-16f0-415b-a629-25b794f42859"},"source":["d = df.copy()\n","d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n","0 5.1 3.5 ... 0.2 0\n","1 4.9 3.0 ... 0.2 0\n","2 4.7 3.2 ... 0.2 0\n","3 4.6 3.1 ... 0.2 0\n","4 5.0 3.6 ... 0.2 0\n","\n","[5 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":26}]},{"cell_type":"code","metadata":{"id":"pGOooxXo1xqA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615296536700,"user_tz":-300,"elapsed":737,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"a3a5fe7b-d857-49c8-8d59-ed08472c37e9"},"source":["targets = {float(i):target for i, target in enumerate(ds.target_names)}\n","targets"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["{0.0: 'setosa', 1.0: 'versicolor', 2.0: 'virginica'}"]},"metadata":{"tags":[]},"execution_count":27}]},{"cell_type":"code","metadata":{"id":"1qI4cEd81xxK","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615296574079,"user_tz":-300,"elapsed":474,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"7e62a1d9-dc06-4fc5-8270-6da0236d7341"},"source":["d.target = d.target.map(targets) # заменим цифровые обозначения классов на буквенные подписи\n","d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n","0 5.1 3.5 ... 0.2 setosa\n","1 4.9 3.0 ... 0.2 setosa\n","2 4.7 3.2 ... 0.2 setosa\n","3 4.6 3.1 ... 0.2 setosa\n","4 5.0 3.6 ... 0.2 setosa\n","\n","[5 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":28}]},{"cell_type":"code","metadata":{"id":"q1W6kwXe1xuc","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615296637939,"user_tz":-300,"elapsed":647,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"cbd628aa-1e1b-4a98-e5f5-b9ef80aa9544"},"source":["d['sepal_length_on_width'] = d['sepal_length_(cm)'] / d['sepal_width_(cm)'] # операции непосредственно со столбцами много быстрее поэлементных операций \n","d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)targetsepal_length_on_width
05.13.51.40.2setosa1.457143
14.93.01.40.2setosa1.633333
24.73.21.30.2setosa1.468750
34.63.11.50.2setosa1.483871
45.03.61.40.2setosa1.388889
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... target sepal_length_on_width\n","0 5.1 3.5 ... setosa 1.457143\n","1 4.9 3.0 ... setosa 1.633333\n","2 4.7 3.2 ... setosa 1.468750\n","3 4.6 3.1 ... setosa 1.483871\n","4 5.0 3.6 ... setosa 1.388889\n","\n","[5 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":31}]},{"cell_type":"code","metadata":{"id":"dRp4-vhV1xmt"},"source":["d.sepal_length_on_width = d.sepal_length_on_width.apply(np.sin)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"357_A4ny1xjb","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615296813029,"user_tz":-300,"elapsed":767,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"196d0d5d-1883-4552-ec7c-890f592130de"},"source":["def bias(x):\n"," if x < 1.0:\n"," return 0\n"," return 1\n","d['petal_width_(cm)'] = d['petal_width_(cm)'].apply(bias)\n","d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)targetsepal_length_on_width
05.13.51.40setosa0.993548
14.93.01.40setosa0.998045
24.73.21.30setosa0.994798
34.63.11.50setosa0.996224
45.03.61.40setosa0.983500
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... target sepal_length_on_width\n","0 5.1 3.5 ... setosa 0.993548\n","1 4.9 3.0 ... setosa 0.998045\n","2 4.7 3.2 ... setosa 0.994798\n","3 4.6 3.1 ... setosa 0.996224\n","4 5.0 3.6 ... setosa 0.983500\n","\n","[5 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":33}]},{"cell_type":"code","metadata":{"id":"aeUhqZEX1xey"},"source":["d.drop([column for column in d.columns if column.endswith('length_(cm)')], axis=1, inplace=True)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"gQJ6De486fsw","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615296912439,"user_tz":-300,"elapsed":684,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"f39caff3-2866-4a3b-b6ac-8510ddad127f"},"source":["d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_width_(cm)petal_width_(cm)targetsepal_length_on_width
03.50setosa0.993548
13.00setosa0.998045
23.20setosa0.994798
33.10setosa0.996224
43.60setosa0.983500
\n","
"],"text/plain":[" sepal_width_(cm) petal_width_(cm) target sepal_length_on_width\n","0 3.5 0 setosa 0.993548\n","1 3.0 0 setosa 0.998045\n","2 3.2 0 setosa 0.994798\n","3 3.1 0 setosa 0.996224\n","4 3.6 0 setosa 0.983500"]},"metadata":{"tags":[]},"execution_count":36}]},{"cell_type":"code","metadata":{"id":"H6wlNTB76hoP","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615296981297,"user_tz":-300,"elapsed":589,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"ef4c98c9-53fc-403d-e5ad-91b96ab8f864"},"source":["f = pd.concat([d,d], axis=0)\n","d.shape, f.shape"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((150, 4), (300, 4))"]},"metadata":{"tags":[]},"execution_count":37}]},{"cell_type":"code","metadata":{"id":"8wvhQgCh6stP","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615297019618,"user_tz":-300,"elapsed":572,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"ed87bb36-d869-49c6-cc1a-516cd9daa65b"},"source":["f = pd.concat([d,d], axis=1)\n","d.shape, f.shape"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((150, 4), (150, 8))"]},"metadata":{"tags":[]},"execution_count":38}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":143},"id":"hYXfdNRds8wc","executionInfo":{"status":"ok","timestamp":1632405950884,"user_tz":-300,"elapsed":476,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"2b21ce4b-5d17-4800-ea16-396dc95557c3"},"source":["df_1 = pd.DataFrame(index=[1,2,3], data=[[1,2],[1,2],[1,2]], columns=[4,5])\n","df_2 = pd.DataFrame(index=[5,6,3], data=[[1,2],[1,2],[1,2]], columns=[5,7])\n","df_1"],"execution_count":27,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
45
112
212
312
\n","
"],"text/plain":[" 4 5\n","1 1 2\n","2 1 2\n","3 1 2"]},"metadata":{},"execution_count":27}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":143},"id":"-bALOOiOs_xk","executionInfo":{"status":"ok","timestamp":1632405952831,"user_tz":-300,"elapsed":12,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"56478aab-30e8-477d-8628-2352f3ed3ac4"},"source":["df_2"],"execution_count":28,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
57
512
612
312
\n","
"],"text/plain":[" 5 7\n","5 1 2\n","6 1 2\n","3 1 2"]},"metadata":{},"execution_count":28}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":237},"id":"nspfyfjMUepW","executionInfo":{"status":"ok","timestamp":1632405958777,"user_tz":-300,"elapsed":400,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"946cd0f5-3470-4620-a1ea-52221f5a06b1"},"source":["df_1 = pd.DataFrame(index=[1,2,3], data=[[1,2],[1,2],[1,2]], columns=[4,5])\n","df_2 = pd.DataFrame(index=[5,6,3], data=[[1,2],[1,2],[1,2]], columns=[5,7])\n","\n","pd.concat([df_1,df_2], axis=0)"],"execution_count":29,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
457
11.02NaN
21.02NaN
31.02NaN
5NaN12.0
6NaN12.0
3NaN12.0
\n","
"],"text/plain":[" 4 5 7\n","1 1.0 2 NaN\n","2 1.0 2 NaN\n","3 1.0 2 NaN\n","5 NaN 1 2.0\n","6 NaN 1 2.0\n","3 NaN 1 2.0"]},"metadata":{},"execution_count":29}]},{"cell_type":"code","metadata":{"id":"hfsafxqc6wl0","colab":{"base_uri":"https://localhost:8080/","height":217},"executionInfo":{"status":"ok","timestamp":1615297123302,"user_tz":-300,"elapsed":594,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"80ab6214-48dd-4847-9637-c8eda376ce2b"},"source":["f.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_width_(cm)petal_width_(cm)targetsepal_length_on_widthsepal_width_(cm)petal_width_(cm)targetsepal_length_on_width
03.50setosa0.9935483.50setosa0.993548
13.00setosa0.9980453.00setosa0.998045
23.20setosa0.9947983.20setosa0.994798
33.10setosa0.9962243.10setosa0.996224
43.60setosa0.9835003.60setosa0.983500
\n","
"],"text/plain":[" sepal_width_(cm) petal_width_(cm) ... target sepal_length_on_width\n","0 3.5 0 ... setosa 0.993548\n","1 3.0 0 ... setosa 0.998045\n","2 3.2 0 ... setosa 0.994798\n","3 3.1 0 ... setosa 0.996224\n","4 3.6 0 ... setosa 0.983500\n","\n","[5 rows x 8 columns]"]},"metadata":{"tags":[]},"execution_count":44}]},{"cell_type":"code","metadata":{"id":"HRY-rDbb8gGk"},"source":["g = d.drop(['sepal_width_(cm)', 'petal_width_(cm)'], axis=1)\n","h = d.drop(['sepal_length_on_width'], axis=1)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"tsgVE2Si8oFG","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615297139175,"user_tz":-300,"elapsed":429,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"dea93f74-7d0d-4030-c81c-84ea655c5f6d"},"source":["g.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
targetsepal_length_on_width
0setosa0.993548
1setosa0.998045
2setosa0.994798
3setosa0.996224
4setosa0.983500
\n","
"],"text/plain":[" target sepal_length_on_width\n","0 setosa 0.993548\n","1 setosa 0.998045\n","2 setosa 0.994798\n","3 setosa 0.996224\n","4 setosa 0.983500"]},"metadata":{"tags":[]},"execution_count":46}]},{"cell_type":"code","metadata":{"id":"kny_HFf489cy","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615297148886,"user_tz":-300,"elapsed":628,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"59ae8694-c22e-4118-e25f-f31a2e148c4e"},"source":["h.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_width_(cm)petal_width_(cm)target
03.50setosa
13.00setosa
23.20setosa
33.10setosa
43.60setosa
\n","
"],"text/plain":[" sepal_width_(cm) petal_width_(cm) target\n","0 3.5 0 setosa\n","1 3.0 0 setosa\n","2 3.2 0 setosa\n","3 3.1 0 setosa\n","4 3.6 0 setosa"]},"metadata":{"tags":[]},"execution_count":47}]},{"cell_type":"code","metadata":{"id":"ZAKyHnni8_wx","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615297241757,"user_tz":-300,"elapsed":588,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"cc83133f-1f83-4c7a-f041-b23d01f14cf4"},"source":["d = g.merge(h, on='target')\n","d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
targetsepal_length_on_widthsepal_width_(cm)petal_width_(cm)
0setosa0.9935483.50
1setosa0.9935483.00
2setosa0.9935483.20
3setosa0.9935483.10
4setosa0.9935483.60
\n","
"],"text/plain":[" target sepal_length_on_width sepal_width_(cm) petal_width_(cm)\n","0 setosa 0.993548 3.5 0\n","1 setosa 0.993548 3.0 0\n","2 setosa 0.993548 3.2 0\n","3 setosa 0.993548 3.1 0\n","4 setosa 0.993548 3.6 0"]},"metadata":{"tags":[]},"execution_count":49}]},{"cell_type":"code","metadata":{"id":"m6ec0Exh9K8V","colab":{"base_uri":"https://localhost:8080/","height":424},"executionInfo":{"status":"ok","timestamp":1614767389654,"user_tz":-300,"elapsed":712,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"1c97b950-0ba5-4b63-f8b7-2560f8decceb"},"source":["pd.get_dummies(d.target)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
setosaversicolorvirginica
0100
1100
2100
3100
4100
............
7495001
7496001
7497001
7498001
7499001
\n","

7500 rows × 3 columns

\n","
"],"text/plain":[" setosa versicolor virginica\n","0 1 0 0\n","1 1 0 0\n","2 1 0 0\n","3 1 0 0\n","4 1 0 0\n","... ... ... ...\n","7495 0 0 1\n","7496 0 0 1\n","7497 0 0 1\n","7498 0 0 1\n","7499 0 0 1\n","\n","[7500 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":46}]},{"cell_type":"code","metadata":{"id":"Hrp_HGEb9t4d","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615297478580,"user_tz":-300,"elapsed":440,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"b3b9983b-e598-4288-90ee-7b0d1abe5ff8"},"source":["d = pd.get_dummies(data=d, columns=['target'])\n","d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_on_widthsepal_width_(cm)petal_width_(cm)target_setosatarget_versicolortarget_virginica
00.9935483.50100
10.9935483.00100
20.9935483.20100
30.9935483.10100
40.9935483.60100
\n","
"],"text/plain":[" sepal_length_on_width sepal_width_(cm) ... target_versicolor target_virginica\n","0 0.993548 3.5 ... 0 0\n","1 0.993548 3.0 ... 0 0\n","2 0.993548 3.2 ... 0 0\n","3 0.993548 3.1 ... 0 0\n","4 0.993548 3.6 ... 0 0\n","\n","[5 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":50}]},{"cell_type":"markdown","metadata":{"id":"Ym2h89BMguk6"},"source":["### Графическое представление"]},{"cell_type":"code","metadata":{"id":"EB8GRu9XxNaZ"},"source":["%matplotlib inline\n","import seaborn as sns\n","from matplotlib import pyplot as plt"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"hbipgoEZxNOg"},"source":["sns.set_style(\"whitegrid\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"VlMb-EWdxNMn","colab":{"base_uri":"https://localhost:8080/","height":122},"executionInfo":{"status":"ok","timestamp":1614779517504,"user_tz":-300,"elapsed":587,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"9907624b-bf04-4f40-f152-94951d92a782"},"source":["print(sns.color_palette())\n","sns.palplot(sns.color_palette())"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[(0.12156862745098039, 0.4666666666666667, 0.7058823529411765), (1.0, 0.4980392156862745, 0.054901960784313725), (0.17254901960784313, 0.6274509803921569, 0.17254901960784313), (0.8392156862745098, 0.15294117647058825, 0.1568627450980392), (0.5803921568627451, 0.403921568627451, 0.7411764705882353), (0.5490196078431373, 0.33725490196078434, 0.29411764705882354), (0.8901960784313725, 0.4666666666666667, 0.7607843137254902), (0.4980392156862745, 0.4980392156862745, 0.4980392156862745), (0.7372549019607844, 0.7411764705882353, 0.13333333333333333), (0.09019607843137255, 0.7450980392156863, 0.8117647058823529)]\n"],"name":"stdout"},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAjwAAABECAYAAACF4e8fAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAChklEQVR4nO3bMWtTYRiG4a+pqJBQEO1gF4UiIo6ZdWpH/4WLm5s4WKgOrs6CILp1chGnThkEMZuODnWR2hRsaYjRmuMfaDsIX488XNf6Lg8cONxwOHNN0xQAgGSdtgcAANQmeACAeIIHAIgneACAeIIHAIh35qTj+w8fm697h6e15dRd747Lucl22zOq2e4tldHvUdszqlj+1pTZpcXSGe20PaWK6eWrZXowa3tGNWe70zLd/9H2jGoWLiyV+XHu8xt3D8tkMml7RhW93m7pdBbLbJb5bhl3lsv34J+zr82XUb/fXzzqdmLwzJqmPNjcrbPqP/Buda/cGNxre0Y1n1delPUv623PqGLj6WH5+eRxOf9ore0pVew/e1k+vRm3PaOam3cOyvDV87ZnVLNyd61c3Jy2PaOa0eqvMhgM2p5Rxa3br0uv+7QcjB+2PaWKnd5GuT+eb3tGNW8X/mwdd/NJCwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIN9c0zbHH4XC4U0rZOr05AAD/7Eq/31886nBi8AAAJPBJCwCIJ3gAgHiCBwCIJ3gAgHiCBwCI9xdiZWLdKNW9eAAAAABJRU5ErkJggg==\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"4umRGJuKqHuO","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615297767532,"user_tz":-300,"elapsed":622,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"49a1d76f-c4ba-4088-817f-e1bdce211bdc"},"source":["targets = {float(i):target for i, target in enumerate(ds.target_names)}\n","targets"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["{0.0: 'setosa', 1.0: 'versicolor', 2.0: 'virginica'}"]},"metadata":{"tags":[]},"execution_count":54}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"SDeuDnTEXKQk","executionInfo":{"status":"ok","timestamp":1615297774179,"user_tz":-300,"elapsed":456,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"53cf3a73-56d9-42cc-f715-9dee1f23fd15"},"source":["df[df.target==1]"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
507.03.24.71.41
516.43.24.51.51
526.93.14.91.51
535.52.34.01.31
546.52.84.61.51
555.72.84.51.31
566.33.34.71.61
574.92.43.31.01
586.62.94.61.31
595.22.73.91.41
605.02.03.51.01
615.93.04.21.51
626.02.24.01.01
636.12.94.71.41
645.62.93.61.31
656.73.14.41.41
665.63.04.51.51
675.82.74.11.01
686.22.24.51.51
695.62.53.91.11
705.93.24.81.81
716.12.84.01.31
726.32.54.91.51
736.12.84.71.21
746.42.94.31.31
756.63.04.41.41
766.82.84.81.41
776.73.05.01.71
786.02.94.51.51
795.72.63.51.01
805.52.43.81.11
815.52.43.71.01
825.82.73.91.21
836.02.75.11.61
845.43.04.51.51
856.03.44.51.61
866.73.14.71.51
876.32.34.41.31
885.63.04.11.31
895.52.54.01.31
905.52.64.41.21
916.13.04.61.41
925.82.64.01.21
935.02.33.31.01
945.62.74.21.31
955.73.04.21.21
965.72.94.21.31
976.22.94.31.31
985.12.53.01.11
995.72.84.11.31
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n","50 7.0 3.2 ... 1.4 1\n","51 6.4 3.2 ... 1.5 1\n","52 6.9 3.1 ... 1.5 1\n","53 5.5 2.3 ... 1.3 1\n","54 6.5 2.8 ... 1.5 1\n","55 5.7 2.8 ... 1.3 1\n","56 6.3 3.3 ... 1.6 1\n","57 4.9 2.4 ... 1.0 1\n","58 6.6 2.9 ... 1.3 1\n","59 5.2 2.7 ... 1.4 1\n","60 5.0 2.0 ... 1.0 1\n","61 5.9 3.0 ... 1.5 1\n","62 6.0 2.2 ... 1.0 1\n","63 6.1 2.9 ... 1.4 1\n","64 5.6 2.9 ... 1.3 1\n","65 6.7 3.1 ... 1.4 1\n","66 5.6 3.0 ... 1.5 1\n","67 5.8 2.7 ... 1.0 1\n","68 6.2 2.2 ... 1.5 1\n","69 5.6 2.5 ... 1.1 1\n","70 5.9 3.2 ... 1.8 1\n","71 6.1 2.8 ... 1.3 1\n","72 6.3 2.5 ... 1.5 1\n","73 6.1 2.8 ... 1.2 1\n","74 6.4 2.9 ... 1.3 1\n","75 6.6 3.0 ... 1.4 1\n","76 6.8 2.8 ... 1.4 1\n","77 6.7 3.0 ... 1.7 1\n","78 6.0 2.9 ... 1.5 1\n","79 5.7 2.6 ... 1.0 1\n","80 5.5 2.4 ... 1.1 1\n","81 5.5 2.4 ... 1.0 1\n","82 5.8 2.7 ... 1.2 1\n","83 6.0 2.7 ... 1.6 1\n","84 5.4 3.0 ... 1.5 1\n","85 6.0 3.4 ... 1.6 1\n","86 6.7 3.1 ... 1.5 1\n","87 6.3 2.3 ... 1.3 1\n","88 5.6 3.0 ... 1.3 1\n","89 5.5 2.5 ... 1.3 1\n","90 5.5 2.6 ... 1.2 1\n","91 6.1 3.0 ... 1.4 1\n","92 5.8 2.6 ... 1.2 1\n","93 5.0 2.3 ... 1.0 1\n","94 5.6 2.7 ... 1.3 1\n","95 5.7 3.0 ... 1.2 1\n","96 5.7 2.9 ... 1.3 1\n","97 6.2 2.9 ... 1.3 1\n","98 5.1 2.5 ... 1.1 1\n","99 5.7 2.8 ... 1.3 1\n","\n","[50 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":55}]},{"cell_type":"markdown","metadata":{"id":"Rg_HMRSVzGz-"},"source":["Строим гистограммы"]},{"cell_type":"code","metadata":{"id":"mx_PNSF8xNKe","colab":{"base_uri":"https://localhost:8080/","height":406},"executionInfo":{"status":"ok","timestamp":1615297826988,"user_tz":-300,"elapsed":1244,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"5d46e25e-fb29-467c-d88f-b3b689306815"},"source":["for target in targets:\n"," sns.distplot(df[df.target==target]['sepal_length_(cm)'],kde=True,kde_kws={\"label\":targets[target]})"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n"],"name":"stderr"},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"bNUuVXgzhvz1","colab":{"base_uri":"https://localhost:8080/","height":406},"executionInfo":{"status":"ok","timestamp":1615297848522,"user_tz":-300,"elapsed":1136,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"7ef13877-988b-4be0-a9b2-b4983762d161"},"source":["for target in targets:\n"," sns.distplot(df[df.target==target]['sepal_width_(cm)'],kde=True,kde_kws={\"label\":targets[target]})"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n"],"name":"stderr"},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"6Li1iREOhvts","colab":{"base_uri":"https://localhost:8080/","height":408},"executionInfo":{"status":"ok","timestamp":1615297853838,"user_tz":-300,"elapsed":923,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"56d8c257-b464-465f-c365-a0dbc90b03b6"},"source":["for target in targets:\n"," sns.distplot(df[df.target==target]['petal_length_(cm)'],kde=True,kde_kws={\"label\":targets[target]})"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n"],"name":"stderr"},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"oCQEu59thvri","colab":{"base_uri":"https://localhost:8080/","height":404},"executionInfo":{"status":"ok","timestamp":1614779712345,"user_tz":-300,"elapsed":1286,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"1e523154-41f1-4e2c-ca8e-0aebf1cee232"},"source":["for target in targets:\n"," sns.distplot(df[df.target==target]['petal_width_(cm)'],kde=True,kde_kws={\"label\":targets[target]})"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n"],"name":"stderr"},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"48op7eXwzKb4"},"source":["Строим точечные графики взаимного влияния параметров"]},{"cell_type":"code","metadata":{"id":"hKh-KV27whqi","colab":{"base_uri":"https://localhost:8080/","height":225},"executionInfo":{"status":"ok","timestamp":1614767506092,"user_tz":-300,"elapsed":1461,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"b6d7f703-3029-4c58-e075-ba67e8307bcf"},"source":["g = sns.FacetGrid(df, hue='target')\n","g.map(plt.scatter, 'sepal_length_(cm)', 'sepal_width_(cm)');\n","g.add_legend();"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"ArJjRTF6ySuO","colab":{"base_uri":"https://localhost:8080/","height":225},"executionInfo":{"status":"ok","timestamp":1614767511831,"user_tz":-300,"elapsed":1523,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"a857d46d-33d9-417c-8a78-e3b3cf5dcbd9"},"source":["g = sns.FacetGrid(df, hue='target')\n","g.map(plt.scatter, 'petal_length_(cm)', 'petal_width_(cm)');\n","g.add_legend();"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"5KszRYQ0yaaV","colab":{"base_uri":"https://localhost:8080/","height":225},"executionInfo":{"status":"ok","timestamp":1614767516166,"user_tz":-300,"elapsed":1157,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"ffcba165-ccb3-4f61-e7ae-c413017c2e8c"},"source":["g = sns.FacetGrid(df, hue='target')\n","g.map(plt.scatter, 'petal_length_(cm)', 'sepal_width_(cm)');\n","g.add_legend();"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"SE3c3sS0yfgl","colab":{"base_uri":"https://localhost:8080/","height":225},"executionInfo":{"status":"ok","timestamp":1614767522475,"user_tz":-300,"elapsed":1668,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"ee44eed9-08f4-495a-e122-a0910757f984"},"source":["g = sns.FacetGrid(df, hue='target')\n","g.map(plt.scatter, 'sepal_length_(cm)', 'petal_width_(cm)');\n","g.add_legend();"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAPcAAADQCAYAAADbJffdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO2de1hU5fbHvzPDNUAUjoB3RcUMRcVLmKaCgD6OgqKmHrU0rTRNyn4qaCni/VIntJ5O5ck0jbyhmHiOJOAly0uKkIqGEcZFEEUBEZCZeX9/TIwzw57Zey6b2TO8n+fx0b33e1kzsNx7f9/1riUihBBQKBSbQ2xpAygUCj9Q56ZQbBTq3BSKjUKdm0KxUahzUyg2ilU6d25urqVNUJGfn29pE3RCbTMOIdtmCFbp3DKZzNImqKipqbG0CTqhthmHkG0zBKt0bgqFwg51bgrFRqHOTaHYKLw69927dzFz5kyMGTMGUqkUu3btatTmwoUL6N+/PyIjIxEZGYlPP/2UT5MoDKTkpSD8YDgCdgUg/GA4UvJSmqRvU4zXnLHjc3CJRIKYmBj4+/vj8ePHmDhxIoYMGYJu3bpptBswYAC++OILPk2h6CAlLwVxP8ehVl4LALhbfRdxP8cBAKS+Ut76NsV4zR1e79xeXl7w9/cHALi6usLX1xelpaV8TkkxkIQrCSpnaqBWXouEKwm89m2K8Zo7vN651SksLEROTg769OnT6NrVq1cREREBLy8vLFu2DN27d9c7Vl1dHXJycvgy1SBqa2sFY4s2XGwrqS7ReZ7Pvky2mTKeOVG3rWfPnk02r7lpEueurq7GokWLsHz5cri6umpc8/f3R3p6OlxcXHD69GksWLAAqampesdzdHQUzJeek5MjGFu04WKbz3Uf3K2+2/i8iw+vfZlsM2U8cyLkn6kh8K6W19fXY9GiRRg3bhzCw8MbXXd1dYWLiwsAYPjw4ZDJZCgvL+fbLMrfRAdGw0nipHHOSeKE6MBoXvs2xXjNHV6dmxCCFStWwNfXF7Nnz2ZsU1ZWhoZ8EdnZ2VAoFGjVqhWfZlk95lSUpb5SRHaLhFik/FUQi8SI7BbJScAypa+u8eJeikMblzYQQYQ2Lm0Q91IcFdOMhNfH8suXLyM5ORl+fn6IjIwEACxevBjFxcUAgGnTpuHEiRNITEyERCKBk5MTPv74Y4hEIj7Nsmr4UKiTbydDQRQAAAVRIPl2Mvp59eOklhvbVxdSXyl1ZjMhssY0S0J6J2pqW8IPhjO+l7ZxaYPUSZpaBRfbDBnPnH2F9DPURsi2GQKNULMy9CnKTT2euW2hmBfq3FaGj4uPQef5HM/ctlDMC3VuK0NICjVVt4VNkwWxUMxDg9iUcCUBJdUl8HHxQXRgtEEiVEpeikb/yG6ROFN4BiXVJXB3dAchBLFnY5FwJUHv2FJfKTLvZeLA7wegIAqIRWL0bd0XCVcSEHs2Fj4uPujk1gkXSy+qrk/2m4wPgj4wy3fBxtrzazVsa8q5hQB1bivEFEWZSW1Pvp2MuJfiAMAgJZ5JLT9fcl51/W71XQ3BTUEU2HdrHwBgovtEo+znytrza1Vzac/dXBycPpY3M/TFbxsa283UngsHfj9gcB9zzdEUcwsFeuduZhijcJtbFW+40/OJrjmaYm6hQO/czQx9Creh6rexqnhDRBuf6JqjKeYWCs3nk1IA6Fe4DVW/mdpzYbLfZIP7mGuOpphbKFDnbmboi99mihVvUL+Z4tiZxprSY4rGcZBPkMZ4U3pMaRJB64OgDxDkE6RxLsgnqNmIaQB9526W6FLbuajf2uq5UGPBU/JScLXsqsa5q2VXkZKXIkh7+YDeuSkquKjf1pIZhWZ1oc5NUYOr+m0NseM07p06N0UNruq3NcSO07h36twUNbio39YSO07j3qmgRlGDKVZ8kPcg3Km6ozOOXTtOfVj7Yao4daZjtv6Gxsmroy9m3tSxrRHq3BQVTGr51bKrOlMdMcWpq8dzMx03qO2+8DVrVhl9MfPNyaHVoY/lFBVNEVuuPp45FW2qjjeGOjdFhaEKs6nZX8w5LlXHG0Odm6KiqWLLG/qZc1yqjjeGOjdFRVPElquPZ05Fm6rjjbF4lU9CCNauXYuwsDCMGzcO169f59Mkih4MzRvOJbZc+1h9PHPmKac5zxkgPFJaWkquXbtGCCGkqqqKhIeHk9zcXI02p06dInPmzCEKhYJkZmaSSZMmsY5748YNXuw1Bj5sOfbHMRJ2IIz0/qY3CTsQRtb8skbj+Ngfx5rMtjW/rCEBuwJIr296kYBdAWTNL2tMHpOQpvkZan+P2t+brutC+v0yBV6Xwry8vODl5QVAs8qnegnftLQ0jB8/HiKRCH379kVlZSXu3bun6tfc4LK81FRlba05VRHbMpu+677wtYTJZqfJ3rl1VfksLS2Fj88z0cPHx6dZl/kV0uYNa05VxLY01hyWzixe5dMYbLmEryGbN4wpk2sI+lIVmfqZ+S59zFYOWN91WsKXI2xVPr29vVFS8uyLLikpgbe3t94xbbmEr64yto3aGVkm1xDEl8SMDi4WiU3+zHyX7GErB6zvupOTk2B+v0zB4lU+Q0JCcOTIERBCcPXqVbi5uTXb921AWJs3rDlVEdvSWHNYOuN0587MzMTRo0fx66+/oqysDE5OTujevTtGjBiBiIgIuLm5MfbjUuVz+PDhOH36NMLCwuDs7Iz169eb6aNZJ0xFB7Q3X3Ry64TlPy1HzNkYsyTbV99w0VCUoPJpJXxcfBDkE2SxogJsturbDMJWvEHfdaG88pkKa5XPuXPnwsvLCyNHjkSvXr3g6emJuro65Ofn48KFC8jIyMCsWbMwcuTIprJZUFUYm9oWbQW7AabcZFxs01aNtXGSOPGyXmzM98ZkKx/2Cen3yxRYnbu8vBweHh56B+HSxpwI6ctvalv67O6j8z0469UsjXOmlPBVh0tJXkMx5nszpWSwIQjp98sUWB/LtZ328ePHkMlkquOWLVs2qWM3d8ydbN+UUr1NDd0cYhic1fLvv/8e27dvh6Ojo+qcSCRCWloaL4ZRmBGLdCvYxuDjwq7OC2XzhS5bhWKf0OD8G/H111/jhx9+QHp6uuoPdeymx9wKNps6LyQFuTko3OaE8527Q4cOcHZ25tMWCgcaRDONVEjPdcSZG98j4Ob38FEAw7wG4ExtsVIFvs6e+qidSzv8UflHo7mYSvLymSaJaTxtW5t76iRDYBXUGrhx4wZiY2PRp08fODg4qM5/8EHTL4sISfCwtC0ppz5E3J+HUSsWPTtJCCAS6e5kAurqtCnqNdP3xqbcGzK+KVj6Z2ouOD+Wr1y5EkFBQejTpw/8/f1VfyiWJSFPy7EB3hwb4Dc+W0hx9bYA58dymUyG2NhYPm2hGEGJBdJt8JEmyZB+VB3nBudfjWHDhmHfvn24d+8eHj16pPpDsSw+Fig3zUeaJEP6UXWcG5yd+9ixY/jiiy8wdepUREVFISoqChMnTuTTNgoHon0nwEmhJZtwk1GMgs/4bCHF1dsCnB/L09PT+bRD8JhbFTYX0hFrACjfvUvEaKyWcygU0Mmtkyp+HACes3sONbIa1qICUl8p8Nd5jbmjO402+nvhElcvlO/dGuCslu/duxfjxo1DixYtAAAVFRU4duwYpk+fzquBTDS1mqlPFfat8xWsssr795S9H/hhEVBf8+ycvTMwbhsQ8IplbTMBIdtmCJwfy/fv369ybABwd3fHgQPCz8hhDppD1g6jSIvXdGxAeZwWbxl7KBpwdm6FQgH1m7xcLkd9fT0vRgkNGtOsg4pCw85TmhTO79xDhw7Fu+++i6lTpwJQxpq//PLLvBkmJGhMsw7c2wMVBcznKRaH8517yZIlCAoKQmJiIhITEzF48GAsWbKET9sEA41p1sHIlcp3bHXsnZXnKRaH851bLBZj2rRpmDZtGp/2CBKry9pxbDFw+Rs8T+SASAL0nwWM/Vh3++z9yvfkikLlXXfkSlZBDMCzNsb01YFQVyWsEVbnnjdvHl555RW8/PLLsLe317hWUFCApKQktGvXDpMmTeLNSCEg9ZVaxy/ZscXAr/8BAIgAgMhVx4wOrq14VxQojwHuDm6CM6tjzpK+FA5LYWVlZdi5cydSU1Ph7u4ODw8P1NXVobCwEJ06dcL06dMRGhraVPYCENZShZBsAQCs9lA6tDYiCbCqvPH5f/XS8d7cAXjvmvnt+xum762pMq2wIbifqZGw3rlbt26NpUuXYunSpSgsLFQlSOzcuTPdAipEmBxb33kBKd50VcK8GLTtoH379ujXrx969uzZyLGnTJliVsMoRiKSGHZel7JtAcWbluE1L2bbU1RXV2euoSim0H+WYecFpHjTVQnzYraKIyKGPcSxsbE4deoUPD09cezYsUbXL1y4gLfffhvt2yvvEmFhYVi4cKG5TBIubOq0oeq1VvsUn65IsKtBiZ0EPjI5ou19IFUX07Tbt++JBFkJSiQS+MjliK6ogTTpTWWb7uFAbqrxthrwWdhyjVMMg9dyQlFRUZgxYwaWLVums82AAQPwxRdf8GmGsGBTpw1Vr7Xap8geIM7RA7Vi5Y/2rr0d4hRlwKkPlZtMmNorCGrt/m5vZ4e4Vi6AvA7SioJnSrsxtuq7bt+b8euxmlUJK8Bsj+VMovvAgQPh7u5urilsA7Z4bEPjtbXaJ7RqiVqx5o+1VixCQt5hA9qLkdCqJfN8hthKY88titnu3Js3bzaq39WrVxEREQEvLy8sW7YM3bt3Z+1jzVU+n68oBFMSJFJRiJs5OazX2cYrsWMWzkrEyiUezu11nDfEVn3X+a7yaQrNrspnamoqtm7digcPHoAQAkIIRCIRrly5AgDw8/MzeHJ/f3+kp6fDxcUFp0+fxoIFC5Cayr6eadVVPnXEY4vc2yvHYbvOMp6PTI679o1/rD4KMI6vs71Mx9KZIbbquS7kSpq2ss7N+bF8y5Yt+Pzzz3H58mVcuXIFmZmZKsc2FldXV7i4uAAAhg8fDplMhvJyhkALW4JNnTZUvdZqH/3wEZwUmrmXnBQE0b4TDGivQPRDHSm0DLFVQEq8uamsrMTevXt5n+fkyZO4ffu2UX05O7enpye6du1q1CS6KCsrU72rZ2dnQ6FQoFWrVmadwyJk71dGfsW1VP6dvf/ZtYBXlMkM3DsAECn/Vk9uwHZdG632UjtPxLkFoI2cQEQI2sgJIsVuSPjjIAK+6YXwS6uQ0r6n7vYyBeIqn0JaXaNsM2COpi19/ql8Z45rqfy7/aBna+giifJY/XqffzL2f37f4MbfjRVRWVmJxMREzu0JIVAoDE94Z4pzs4afNjwmX7x4Effv30doaKhG3vLw8HCdfRcvXoyLFy/i4cOH8PT0xDvvvKOqMzZt2jTs2bMHiYmJkEgkcHJyQkxMDAIDA1mNFtJjUyNbTMhOwodtedfjEVeVrSGaOSkUiHMLgHQS919OAMyfjQ31zy6g70YfXH6/3nvvPaSlpaFLly548cUXcevWLVRWVkImkyE6OhqhoaEoLCzEnDlz0KdPH1y/fh1ffvkljhw5gqNHj8LDwwNt2rSBv78/5syZg7/++gurV6/Gw4cP4eTkhDVr1qCiogLz5s2Dq6sr3NzcsH37dnTs2JHz52B9587IyFD929nZGefOndO4rs+5P/5Yz04kADNmzMCMGTPYTLAu9CnEFvgFTqi4qlrmaqBWLEZCxVUYvODE9NnYUP/sAvtuTOH9999Hbm4ukpOTIZPJUFtbC1dXV5SXl2PKlCmqktZ37tzBpk2b0LdvX2RnZyM1NRVHjx5FfX09oqKiVLn/P/zwQ6xevRqdO3dGVlYWVq9ejd27dyMkJAQjRozA6NGjDbaR1bk3bNgAALh8+TL69++vce3y5csGT2jzCChWGwBKJDrUcB3n9WLsZ2joJ7DvxlwQQvDxxx/j0qVLEIvFKC0txf379wEAbdu2Rd++fQEAV65cwciRI+Ho6AhHR0cEBwcDAKqrq5GZmYno6GeReE+fPjXZLs5q+dq1a3H48GHWc80egWUn8ZHLcdeOQQ2X61bDdaLrs3Hpp6+/lWdu+eGHH1BeXo6kpCTY29sjJCREFY793HPPsfYnhKBFixZITk42q12sglpmZia+/vprlJeXY+fOnao/27dvh9yYXxBbR2AKcbR7X2Y13L2v4YMxfTY2bFQ9d3FxQXV1NQCgqqoKnp6esLe3x/nz51FUVMTYJzAwEBkZGairq0N1dTVOnToFQLlq1L59e/z3v/8FoHT2mzdvNprHUFidu76+Hk+ePIFcLkd1dbXqj6urK7Zt22bUpDaNoWo3z0gnJSrVcJnsbzVcZpyYBjB/Nm01XftYx0oAEcB3YwqtWrVCYGAgxo4di5s3b+LatWsYN24ckpOT4evry9gnICAAISEhiIiIwBtvvAE/Pz+4ubkBUC41Hzx4EBEREZBKpTh58iQAYMyYMfjPf/6D8ePH46+//jLIRs55y4uKitCuXTuDBucLQavlxvB3WiQ0pEVy8QIeqyUtcHQH6iqeHXcZDrx2lLtt2ps3tDeDqB87/70UWfPQPJta2GwTIHzaVl1dDRcXF9TU1GD69OlYs2YNbwU1OaVZ0se///1vsxnTLFFLiwRA6eCPtbKRqDs2APx5GtgVwcnBGTdvaG8GUT+uKde8ZsqmFkojVq5cidu3b6Ourg4TJkzgtVIuq3O//vrrAJTr3ffv30dERAQAICUlBZ6enrwZ1my4/I1x/f48za2dMctX6tjoUpal+Oijj5psLlbnHjRoEABg48aNSEpKUp0PCQlBVFQUf5Y1F3SlPzIX5lhmsvGlLFuFc/hpTU0NCgqeLWMUFBSgpsaEOwJFia70R+bCHMtM6ktZfM1BMTuc17ljY2Mxc+ZMdOjQAYQQFBcXY/Xq1Xza1jzoP0vznZcrXYZzazdypeEho+poL2UxhY9a4VJWc4Czcw8bNgypqanIy8sDAPj6+mrEmFOMZOzHwIPbmu/Q2uq4xBmQ12hez/8JiHNnLzoQ8Arw13lNNb7zUKA8z3C1nIciBBT+YHXuX375BYMHD260z7phzU1fbDmFA9n7gcKLmucUT4GorzQVavVXc3XH51J0IOu7Z+/2RK6cz9j1ZTMWIaBw58yZM1i3bh0UCgUmT56MN998k7UPq3NfunQJgwcP1thAog51bhNhU6C5qt2Xv2F2bqpwNylHMouw5cQtFD+qQduWzlgyqgfG9zMtPkQulyM+Ph47d+6Et7c3Jk2ahJCQEHTr1k1vP1bnXrRIuY7ZsIGEYmbYFGiuSrQVFB2wdY5kFiE26TfU1Ct/FkWPahCb9BsAmOTg2dnZ6NSpEzp06AAAkEqlSEtLY3Vuzmp5aGgo3n//fSQmJiI3N9doQylasCnQXJVoKyg6YOtsOXFL5dgN1NTLseXELZPGLS0thY/Ps8IM3t7eKC0tZe3H2bmPHz+OqVOn4tGjR9i8eTNCQ0OxYMEC46ylPMOYVEVMWEHRAVun+BHz65Ou83zD2bnFYjHs7OwgkUggFovh6elJI9TU0ZdaSR8BryhTDxmSqqjLcM32A+boV8sFtJHFlmnbkvk/YV3nueLt7Y2Skmf10kpLS+Ht7c3aj/NSWP/+/eHn54fZs2dj8uTJtpHrzFyYEnPNpGarL4tVFCivm+KQVOFuEpaM6qHxzg0AzvYSLBnVw6Rxe/fujfz8fBQUFMDb2xspKSmcwlg5O/dHH32Ey5cv47vvvsOBAwfQr18/DBw4EIMHDzbJcJvAFEWaixpO1W2roEE0M7dabmdnh5UrV2Lu3LmQy+WYOHEip/z+nJ07NDQUoaGh+OOPP3D27Fns2rULO3bsQHZ2tkmG2wSmKNJcVWuqblsF4/u1M9mZmRg+fDiGD+cYlfg3nJ37nXfewc2bN9GxY0cMGDAAmzZtQp8+fQw20iYxJX0Q19RFVN2mGAhn537zzTfxwgsvQKIjsd65c+cwZMgQsxlmVZgSc80l9puq2xQj4OzcvXszV2VsYOvWrY2cm62ELyEE69atw+nTp+Hk5ISNGzfyunndJLQzkHj4Avk/4Xld8dpcY665xn6nxQNJbzJnUtGRLeV5Gv/drDFbIUCmbE1sJXzPnDmD/Px8pKamIisrC3FxcThw4IC5TDIfTGr434/SIuCZwq1vSUrf2Ppiv7lkUtGRLUXEdJ3SbDBbCV+RqHE9R7YSvmlpaRg/fjxEIhH69u2LyspK3Lt3z1wmmQ9D4rvNMTZbGVxtaNlcCgNmu3Mbg3ZYnY+PD0pLS+Hl5aW3X1OX8NVVilYbQuSMZXaNGZutDK6h7XWVALYUtIQv/5jNuZsyM2qTl/DlqGiLRBLD7TKyDK6h7XWWALYQzTX7qTGwaVe6YHVutnrZDVs+P/30U86TNqAdVldSUsIprK7J4ZrNRFd8t6Fjs2U/0YZmSxEOZkr9rA6bdqULgwoBMmHKfu6QkBDs2bMHUqkUWVlZcHNzY30ktwhMGUj+VssJkUPElg3F0LHZsp/oU8vV2pOKQoioWt508JT6eeDAgSgsNDyIiXNRAmNgK+FLCEF8fDzOnj0LZ2dnrF+/nnXJDRDWY5OQbNGG2mYcRtv2r146gpk6AO9dM8mmwsJCzJs3z7yP5eqcOnUKubm5qiJnALBw4UKd7dlK+IpEIqxatcoQEygU4SKwxBicl8JWrlyJ48ePY8+ePQCAEydOoLi4mDfDKBSrQ2CJMTg7d2ZmJjZv3owWLVpg4cKF+P7775Gfn8+jaRSKlSGwxBicndvJyQkA4OzsjNLSUtjb26OsrIw3wygUq4OnxBiLFy/G1KlT8eeff2LYsGGcozg5v3OPGDEClZWVmDNnDqKioiASiTBp0iSjDaZQbBIeEmOwaVe64Ozcb7zxBhwcHDBq1CgEBwejrq4Ojo6ORk1KoVD4h/Nj+ZQpU1T/dnBwgJubm8Y5CoUiLFjv3GVlZSgtLUVtbS1u3Lih2v31+PFjWgiQQhEwrM79008/ISkpCSUlJRqFCVxdXbF48WJejaNQKMbD6twTJkzAhAkTcOLECYwaNaopbKJQKGaA8zt3YGAgli9fjrlz5wIAbt++LczEChQKBYABzh0bG4uhQ4eqkil07twZu3fv5s0wCoWi5O7du5g5cybGjBkDqVSKXbt2cerH2bkfPnyIMWPGQCxWdrGzs1P9m0KhKEnJS0H4wXAE7ApA+MFwpOSlmDymRCJBTEwMjh8/jn379uG7777D7du3Wftx9s7nnnsODx8+VKVTunr1Ktzc3Iy3mEKxMVLyUhD3cxzuVt8FAcHd6ruI+znOZAf38vJSJQ51dXWFr68vp0KAnINYYmJiMH/+fBQUFGDq1Kl4+PAhEhISjLeYQrExEq4koFZeq3GuVl6LhCsJkPpKzTJHYWEhcnJyONUM4Ozc3bp1Q1hYGJydneHi4oLQ0FB06dLFJEMpFFuipLrEoPOGUl1djUWLFmH58uVwdXVlbc/5sXzp0qXIy8vDW2+9hRkzZiA/Px9LliwxyVgKxZbwcfEx6Lwh1NfXY9GiRRg3bhzn7Eec79y5ubk4fvy46jgoKAhjxowx3Eob4UhmkVrBt7tmKfhGsW6iA6MR93OcxqO5k8QJ0YHRJo1LCMGKFSvg6+uL2bNnc+7H+c79wgsv4OrVq6rjrKws9OrVyzArbYQjmUWITfoNRY9qQAAUPapBbNJvOJJZZGnTKBZE6itF3EtxaOPSBiKI0MalDeJeijP5ffvy5ctITk7G+fPnERkZicjISJw+fZq1H+c79/Xr1zF16lS0bdsWAFBcXIwuXbpg3LhxAIAffvjBSNOtjy0nbmnUYAaAmno5tpy4Re/ezRypr9Rs4lkDAwYMwK1btwzux9m5d+zYYfDgtkrxI+YNM7rOUyiWgLNzN2XRAaHTtqUzihgcuW1LZ4bWFIploCFmRrBkVA8422uWMna2l2DJqB4WsohCaQzvzn3mzBmMGjUKYWFh+PLLLxtdT0pKQlBQkEoosIbNKOP7tcOGqN5o19IZIgDtWjpjYv922HLiFrrEpGDIxnQqrlEsDq+FAOVyOeLj47Fz5054e3tj0qRJCAkJQbdu3TTajRkzBitXWle5m/H92mF8v3bIycnBrdoWiE36TSWyNajnDe0oFEvA6507OzsbnTp1QocOHeDg4ACpVIq0tDQ+p7QI+tRzCsVS8Hrn1i7R6+3tjezs7EbtUlNTcenSJXTp0gWxsbFo06aN3nGbuoSvPmpra/Wq55a001rK5AoNWsLXTAQHB2Ps2LFwcHDA999/j2XLlrHuE2/yEr56yMnJ0aueW9JOm6zH1QQI2TZD4PWxXLtEb2lpaaMSva1atYKDgwMAYPLkybh+/TqfJvECVc8pQoTXO3fv3r2Rn5+PgoICeHt7IyUlBR999JFGm3v37qnK9qanp6Nr1658mmQ2NGPLndG+lRNy71Wrrgd2dNcppk3/6hec+6NcdTykqwf2vjGY81w0jp3CBV6d287ODitXrsTcuXMhl8sxceJEdO/eHQkJCejVqxdGjhyJb7/9Funp6ZBIJHB3d9fIsCpUGmLL1dVxbc79UY4PjvyGteM1SxJrO3ZD2+lf/cLo4ExzUSWewgVe63PzhaXfiYZsTGd0aG0kIhH+2KC5c65zjO6sHPkbG8ck65qrXUtnnIsJ0Tu/pb8nfVDb+IdGqBkB1xhyuRn+36Rx7BRjoc5tBFxjyCV/55vjYy4ax05hgzq3ETCp40xMe7FDo3NDunowttV1nirxFGOx+Dp3U6GtOAc/3xoZN8t0KtD6FOrx/drh1zvlSLxQADkhkIhEcHEQo7LuWZRady+XRmIaAOx9Y7BBajnTXBP7t6NiGoWVZuHcTIrznvN/qa5rK9BsCvWRzCIculykeqeWE6Lh2ACQe6+aUS0HoHfZi8l27bkOXS7CgE4e1MEpemkWj+VMsd/aqMeCs8WKcxkPABIvFBhp8TNo3DrFWJqFc3NVlhvasSnUVC2nWAPNwrm5KssN7dgUaqqWU6yBZuHcXNRtdQWaTaE2RS03FKqWU4zFZiPUtBXp7l4uePJUoVK/ZXI5Squeqq63cJSg+qlCp/qtfSrTW38AAAr1SURBVF0EAhmHb04iEiHItxXyH9So5u7s6YzzeQ9VY/m2fg55ZU9Ux9Ne7KAhxBmq9BvyPVkKahv/2KRazhS/nXuvGkO6euBcTAjjdXVHZlK/ta9zRU6IxlxFj2o0wknlhGhsOJETolLyGxy8IesLQGPNKdyxycdybcfVPq/rupDQpbRT9ZzCFZt0bltA19MBVc8pXKHOLVB0Ke1UPadwxSadmy1+W9d1IaFLaafqOYUrNunce98Y3MiB1eO3ma57uzmo7pYSkQjebg56rztJNO+sdjqWtCUiEYZ09dDIcT6kq4fGWN29XDSOZwR1ZAxbBZhzpm+I6k3FNEojbEYtZ9rooS+Gmy2+W3u8zp7OuP/4oer6pIHK5SqmZRP1vj7uTpg8oKNZnU9dPadQdGETzm3u5SGm8bSXrxqWq6b3sGPtS5eqKJbAJh7Lzb08ZMrGELpURREKNuHc5l4eMmVjCF2qoggFm3Bucy8PmbIxhC5VUYSCxat8Pn36FO+++y7CwsIwefJkFBYWGjyHuZeHTNkYQpeqKEKBV+duqPK5Y8cOpKSk4NixY7h9+7ZGmwMHDqBFixb48ccfMWvWLGzdutXgecy9PMQ0nvbyla7lKrpURREKvKrl6lU+AaiqfKqX8E1PT8fChQsBAKNGjUJ8fDwIIRAZuBfa3MtDpoxHl6ooQoDXOzdTlc/S0tJGbRqqetrZ2cHNzQ0PHz4EhUIxDatc5xZaCV+h2KINtc04bKWEr8WrfHp7e+Pu3bsAAJlMhqqqKrRq1YpPs8yKk5OTpU3QCbXNONRty83NtaAlpmHxKp8hISE4fPgw+vXrhxMnTiAoKIj1fbtv3758mk2h2AS8p1k6ffo01q9fr6ryOX/+fI0qn3V1dViyZAlycnLg7u6Of/3rXyoBjkKhGI9V5lCjUCjs2ESEGoVCaQx1bgrFRqHOTaHYKNS5KRQbxSqDWIRASEgIXFxcIBaLIZFIkJSUZGmTVFRWVuKDDz7A77//DpFIhPXr16Nfv36WNgsAkJeXh/fee091XFBQgEWLFmHWrFmWM0qNb775BgcOHIBIJIKfnx82bNgAR0dHS5tlHIRiFMHBweTBgweWNoORpUuXkv379xNCCKmrqyMVFRUWtogZmUxGXnrpJVJYWGhpUwghhJSUlJDg4GBSU1NDCCFk0aJF5NChQxa2ynjoY7mNUVVVhUuXLmHSpEkAAAcHB7Ro0cLCVjHzyy+/oEOHDmjXTjibbORyOWprayGTyVBbWwsvLy9Lm2Q01LlNYM6cOYiKisK+ffssbYqKwsJCeHh4IDY2FuPHj8eKFSvw5MkTS5vFSEpKCsaOHWtpM1R4e3vj9ddfR3BwMIYOHQpXV1cMHTrU0mYZDXVuI0lMTMThw4fx1VdfYe/evbh06ZKlTQKgjM+/ceMGpk2bhiNHjsDZ2ZkxSYalefr0KdLT0zF69GhLm6KioqICaWlpSEtLw9mzZ1FTU4Pk5GRLm2U01LmNpGEDjKenJ8LCwpCdnW1hi5T4+PjAx8cHffr0AQCMHj0aN27csLBVjTlz5gz8/f3xj3/8w9KmqPj555/Rvn17eHh4wN7eHuHh4cjMzLS0WUZDndsInjx5gsePH6v+fe7cOXTv3t3CVilp3bo1fHx8kJeXB0D5Xtu1a1cLW9WYlJQUSKVSS5uhQdu2bZGVlYWamhoQQgT73XGFxpYbQUFBARYsWABAKcCMHTsW8+fPt7BVz8jJycGKFStQX1+PDh06YMOGDXB3d7e0WSqePHmC4OBgnDx5Em5ubpY2R4Nt27bh+PHjsLOzQ8+ePbFu3To4ODiwdxQg1LkpFBuFPpZTKDYKdW4KxUahzk2h2CjUuSkUG4U6N4Vio1DnplBsFOrcAuTChQt46623dF5PSkpCfHy82edNSkrSKBoREhKC8vJyzv1PnjyJTz/91GQ7MjIykJCQYPI4zR3q3BQVhw8fxr1794zuv2PHDvzzn/802Y4RI0YgIyMDNTW07LEp0GQNRvLkyRO8++67KCkpgUKhwNtvv42OHTti48aNePLkCVq1aoUNGzbAy8sLM2fORI8ePXDp0iXI5XKsX78eAQEByM7Oxrp161BXVwcnJyesX78evr6+BtlRXl6OVatWobi4GACwfPly9O/fH9u3b0dxcTEKCwtRXFyM1157Da+++ioA4LPPPsPRo0fh4eGBNm3awN/fH+3atcO1a9fwf//3f3ByclLtdNuzZw8yMjIgk8nwySef6AzH/PPPP2Fvbw8PDw8AwP3797Fq1SoUFBQAAOLi4uDl5YW5c+eib9++yMzMRK9evTBx4kRs27YN5eXl2Lp1KwICAiASiTBo0CBkZGRgzJgxRv18KKDJGozlf//7H1mxYoXquLKykkyZMkWVwCElJYXExMQQQgiZMWOGqu3FixeJVColhBBSVVVF6uvrCSGEnDt3jixcuJAQQsj58+fJm2++qXPuQ4cOkdWrVxNCCFm8eDG5dOkSIYSQoqIiMnr0aEIIIdu2bSNTpkwhdXV15MGDB2TQoEHk6dOnJCsri0RERJDa2lpSVVVFwsLCyI4dO1R2Zmdnq+YJDg4mu3fvJoQQsmfPHrJ8+XKdNh08eJBs2LBBdRwdHU127txJCFEmZaisrCQFBQWkZ8+e5ObNm0Qul5MJEyaQmJgYolAoyI8//kjmz5+v6p+cnEzi4+N1zkdhh965jcTPzw+bNm3Cli1bEBwcjBYtWuD333/H7NmzAQAKhQKtW7dWtW/YJDFw4EA8fvwYlZWVqK6uxrJly3Dnzh2IRCLU19cbbMfPP/+sURb58ePHqK6uBgAMHz4cDg4O8PDwgIeHBx48eIArV65g5MiRcHR0hKOjI4KDg/WOHx4eDgDo1asXfvzxR53tysrKVHdtADh//jw2b94MAJBIJHBzc0NFRQXat2+PHj2Utcq7deuGwYMHQyQSoUePHigqKlL19/T0NOkVgUIfy42mS5cuSEpKwunTp/HJJ58gKCgI3bt315m4QbtEkkgkQkJCAl588UV89tlnKCwsVD02G4JCocD+/fsZ83ypb3iQSCSQyWQGj29vbw8AEIvFkMvlOts5OTmhqqqKdTx1m8RisepYJBJpjF9XV2e9ucsEAhXUjKS0tBTOzs6IjIzEnDlzkJWVhfLyctX+3/r6eo0icsePHwcA/Prrr3Bzc4ObmxuqqqpU+8IPHz5slB1Dhw7Ft99+qzpmq5wZGBiIjIwM1NXVobq6GqdOnVJdc3FxUd31DcXX1xd37txRHQ8ePBjfffcdAOXOOS6Or05+fj78/PyMsoWihN65jeT333/H5s2bIRaLYWdnh7i4ONjZ2WHt2rWoqqqCXC7Ha6+9ptrn7ejoiPHjx0Mmk2H9+vUAgLlz5yImJgaff/45hg8fbpQdK1asQHx8PMaNGwe5XI4BAwboXSYLCAhASEgIIiIi4OnpCT8/P9W2ywkTJmDVqlUaghpXBg4ciE2bNoEQApFIhBUrVuDDDz/EoUOHIBaLERcXp/GawsaFCxewePFig2ygaGHpl/7mgLZQZWkeP35MCCHkyZMnZMKECeTatWtmGXfNmjXk3LlzJo9TVlZGXn31VTNY1Lyhd+5myMqVK3H79m3U1dVhwoQJ8Pf3N8u48+bNQ1ZWlsnjFBcXIyYmxgwWNW9osgYBc+jQIezevVvjXGBgIFatWmUhi4RpE4UZ6twUio1C1XIKxUahzk2h2CjUuSkUG4U6N4Vio/w/c7frjhEncosAAAAASUVORK5CYII=\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"markdown","metadata":{"id":"dnoGFA4MzW9o"},"source":["Можно все предыдущие графики вывести одной строчкой кода"]},{"cell_type":"code","metadata":{"id":"izSb9tJThvhk","colab":{"base_uri":"https://localhost:8080/","height":743},"executionInfo":{"status":"ok","timestamp":1614779829439,"user_tz":-300,"elapsed":12239,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"0a4d8076-27df-4520-bac8-004f756b4670"},"source":["sns.pairplot(df,hue='target',diag_kind=\"kde\",kind=\"scatter\",palette=\"husl\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":69},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"MOtqb-wJhvfD","colab":{"base_uri":"https://localhost:8080/","height":296},"executionInfo":{"status":"ok","timestamp":1614767558267,"user_tz":-300,"elapsed":714,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"bc730c65-b8f4-4417-d1f5-68fcc327dbcb"},"source":["sns.boxplot(x=\"target\", y=\"sepal_length_(cm)\", data=df)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":62},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"pop0xJy808kv","colab":{"base_uri":"https://localhost:8080/","height":299},"executionInfo":{"status":"ok","timestamp":1614767566285,"user_tz":-300,"elapsed":648,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"0c951aff-d432-4cab-ba5c-fd913d8256c9"},"source":["sns.boxplot(x=\"target\", y=\"sepal_width_(cm)\", data=df)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":63},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"zL6zrC0108t4","colab":{"base_uri":"https://localhost:8080/","height":296},"executionInfo":{"status":"ok","timestamp":1614767569562,"user_tz":-300,"elapsed":1021,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"41a4f4ed-c685-4b02-adec-77ede4cf6761"},"source":["sns.boxplot(x=\"target\", y=\"petal_length_(cm)\", data=df)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":64},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"gt-CS-v80841","colab":{"base_uri":"https://localhost:8080/","height":296},"executionInfo":{"status":"ok","timestamp":1614767574695,"user_tz":-300,"elapsed":672,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"17e5b682-a469-43a9-fe36-3ad50c2b4449"},"source":["sns.boxplot(x=\"target\", y=\"petal_width_(cm)\", data=df)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":65},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "qCUpgW4Chxlt" + }, + "source": [ + "# Игрушечные наборы данных\n", + "https://scikit-learn.org/stable/datasets/index.html" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "executionInfo": { + "elapsed": 867, + "status": "ok", + "timestamp": 1632403984813, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "6-e8Ub9ghvMA" + }, + "outputs": [], + "source": [ + "import sklearn.datasets as sets\n", + "datasets = {0:'boston', 1:'iris', 2:'diabets', 3:'digits', 4:'linnerud', 5:'wine', 6:'cancer', 7:'olivetti_faces', 8:'20_newsgroups',\n", + " 9:'20_newsgroups_vec', 10:'people_labeled_faces', 11:'pairs_labeled_faces', 12:'covertype', 13:'RCV1_multilabel',\n", + " 14:'kddcup99', 15:'california_housing', }\n", + "choise = 1\n", + "if choise == 0:\n", + " ds = sets.load_boston() #regression\n", + "elif choise == 1:\n", + " ds = sets.load_iris() # classification\n", + "elif choise == 2:\n", + " ds = sets.load_diabetes() # regression\n", + "elif choise == 3:\n", + " ds = sets.load_digits() # classification\n", + "elif choise == 4:\n", + " ds = sets.load_linnerud() # multivariate regression\n", + "elif choise == 5:\n", + " ds = sets.load_wine() # classification\n", + "elif choise == 6:\n", + " ds = sets.load_breast_cancer() # classification\n", + "elif choise == 7:\n", + " ds = sets.fetch_olivetti_faces() # classification\n", + "elif choise == 8:\n", + " ds = sets.fetch_20newsgroups() # classification\n", + "elif choise == 9:\n", + " ds = sets.fetch_20newsgroups_vectorized() # classification\n", + "elif choise == 10:\n", + " ds = sets.fetch_lfw_people() # classification\n", + "elif choise == 11:\n", + " ds = sets.fetch_lfw_pairs() # classification\n", + "elif choise == 12:\n", + " ds = sets.fetch_covtype() # classification\n", + "elif choise == 13:\n", + " ds = sets.fetch_rcv1() # classification\n", + "elif choise == 14:\n", + " ds = sets.fetch_kddcup99() # classification\n", + "elif choise == 15:\n", + " ds = sets.fetch_california_housing() # regression" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 1064, + "status": "ok", + "timestamp": 1615295304765, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "rHDZmzjAiy7N", + "outputId": "160c86a8-b336-429a-b12b-52cf5bb6a14b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".. _iris_dataset:\n", + "\n", + "Iris plants dataset\n", + "--------------------\n", + "\n", + "**Data Set Characteristics:**\n", + "\n", + " :Number of Instances: 150 (50 in each of three classes)\n", + " :Number of Attributes: 4 numeric, predictive attributes and the class\n", + " :Attribute Information:\n", + " - sepal length in cm\n", + " - sepal width in cm\n", + " - petal length in cm\n", + " - petal width in cm\n", + " - class:\n", + " - Iris-Setosa\n", + " - Iris-Versicolour\n", + " - Iris-Virginica\n", + " \n", + " :Summary Statistics:\n", + "\n", + " ============== ==== ==== ======= ===== ====================\n", + " Min Max Mean SD Class Correlation\n", + " ============== ==== ==== ======= ===== ====================\n", + " sepal length: 4.3 7.9 5.84 0.83 0.7826\n", + " sepal width: 2.0 4.4 3.05 0.43 -0.4194\n", + " petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)\n", + " petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)\n", + " ============== ==== ==== ======= ===== ====================\n", + "\n", + " :Missing Attribute Values: None\n", + " :Class Distribution: 33.3% for each of 3 classes.\n", + " :Creator: R.A. Fisher\n", + " :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n", + " :Date: July, 1988\n", + "\n", + "The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\n", + "from Fisher's paper. Note that it's the same as in R, but not as in the UCI\n", + "Machine Learning Repository, which has two wrong data points.\n", + "\n", + "This is perhaps the best known database to be found in the\n", + "pattern recognition literature. Fisher's paper is a classic in the field and\n", + "is referenced frequently to this day. (See Duda & Hart, for example.) The\n", + "data set contains 3 classes of 50 instances each, where each class refers to a\n", + "type of iris plant. One class is linearly separable from the other 2; the\n", + "latter are NOT linearly separable from each other.\n", + "\n", + ".. topic:: References\n", + "\n", + " - Fisher, R.A. \"The use of multiple measurements in taxonomic problems\"\n", + " Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\n", + " Mathematical Statistics\" (John Wiley, NY, 1950).\n", + " - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n", + " (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.\n", + " - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\n", + " Structure and Classification Rule for Recognition in Partially Exposed\n", + " Environments\". IEEE Transactions on Pattern Analysis and Machine\n", + " Intelligence, Vol. PAMI-2, No. 1, 67-71.\n", + " - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\". IEEE Transactions\n", + " on Information Theory, May 1972, 431-433.\n", + " - See also: 1988 MLC Proceedings, 54-64. Cheeseman et al\"s AUTOCLASS II\n", + " conceptual clustering system finds 3 classes in the data.\n", + " - Many, many more ...\n" + ] + } + ], + "source": [ + "print(ds.DESCR)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 683, + "status": "ok", + "timestamp": 1632404056458, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "59mLor4WoeZg", + "outputId": "3548322c-6765-4349-8dea-66ab12f3f7d9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']\n", + "['setosa' 'versicolor' 'virginica']\n" + ] + } + ], + "source": [ + "print(ds.feature_names)\n", + "print(ds.target_names)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 420, + "status": "ok", + "timestamp": 1632404071563, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "9Yt4tJ2_otjm", + "outputId": "b471a124-b71b-456d-de41-fe29676b6604" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.ndarray" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = ds.data\n", + "type(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 402, + "status": "ok", + "timestamp": 1632404086557, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ZgxY_56q3YVG", + "outputId": "b4e3ee4f-16b7-4b1e-f5be-34d0e5f4dd31" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[5.1, 3.5, 1.4, 0.2],\n", + " [4.9, 3. , 1.4, 0.2],\n", + " [4.7, 3.2, 1.3, 0.2],\n", + " [4.6, 3.1, 1.5, 0.2],\n", + " [5. , 3.6, 1.4, 0.2],\n", + " [5.4, 3.9, 1.7, 0.4],\n", + " [4.6, 3.4, 1.4, 0.3],\n", + " [5. , 3.4, 1.5, 0.2],\n", + " [4.4, 2.9, 1.4, 0.2],\n", + " [4.9, 3.1, 1.5, 0.1],\n", + " [5.4, 3.7, 1.5, 0.2],\n", + " [4.8, 3.4, 1.6, 0.2],\n", + " [4.8, 3. , 1.4, 0.1],\n", + " [4.3, 3. , 1.1, 0.1],\n", + " [5.8, 4. , 1.2, 0.2],\n", + " [5.7, 4.4, 1.5, 0.4],\n", + " [5.4, 3.9, 1.3, 0.4],\n", + " [5.1, 3.5, 1.4, 0.3],\n", + " [5.7, 3.8, 1.7, 0.3],\n", + " [5.1, 3.8, 1.5, 0.3],\n", + " [5.4, 3.4, 1.7, 0.2],\n", + " [5.1, 3.7, 1.5, 0.4],\n", + " [4.6, 3.6, 1. , 0.2],\n", + " [5.1, 3.3, 1.7, 0.5],\n", + " [4.8, 3.4, 1.9, 0.2],\n", + " [5. , 3. , 1.6, 0.2],\n", + " [5. , 3.4, 1.6, 0.4],\n", + " [5.2, 3.5, 1.5, 0.2],\n", + " [5.2, 3.4, 1.4, 0.2],\n", + " [4.7, 3.2, 1.6, 0.2],\n", + " [4.8, 3.1, 1.6, 0.2],\n", + " [5.4, 3.4, 1.5, 0.4],\n", + " [5.2, 4.1, 1.5, 0.1],\n", + " [5.5, 4.2, 1.4, 0.2],\n", + " [4.9, 3.1, 1.5, 0.2],\n", + " [5. , 3.2, 1.2, 0.2],\n", + " [5.5, 3.5, 1.3, 0.2],\n", + " [4.9, 3.6, 1.4, 0.1],\n", + " [4.4, 3. , 1.3, 0.2],\n", + " [5.1, 3.4, 1.5, 0.2],\n", + " [5. , 3.5, 1.3, 0.3],\n", + " [4.5, 2.3, 1.3, 0.3],\n", + " [4.4, 3.2, 1.3, 0.2],\n", + " [5. , 3.5, 1.6, 0.6],\n", + " [5.1, 3.8, 1.9, 0.4],\n", + " [4.8, 3. , 1.4, 0.3],\n", + " [5.1, 3.8, 1.6, 0.2],\n", + " [4.6, 3.2, 1.4, 0.2],\n", + " [5.3, 3.7, 1.5, 0.2],\n", + " [5. , 3.3, 1.4, 0.2],\n", + " [7. , 3.2, 4.7, 1.4],\n", + " [6.4, 3.2, 4.5, 1.5],\n", + " [6.9, 3.1, 4.9, 1.5],\n", + " [5.5, 2.3, 4. , 1.3],\n", + " [6.5, 2.8, 4.6, 1.5],\n", + " [5.7, 2.8, 4.5, 1.3],\n", + " [6.3, 3.3, 4.7, 1.6],\n", + " [4.9, 2.4, 3.3, 1. ],\n", + " [6.6, 2.9, 4.6, 1.3],\n", + " [5.2, 2.7, 3.9, 1.4],\n", + " [5. , 2. , 3.5, 1. ],\n", + " [5.9, 3. , 4.2, 1.5],\n", + " [6. , 2.2, 4. , 1. ],\n", + " [6.1, 2.9, 4.7, 1.4],\n", + " [5.6, 2.9, 3.6, 1.3],\n", + " [6.7, 3.1, 4.4, 1.4],\n", + " [5.6, 3. , 4.5, 1.5],\n", + " [5.8, 2.7, 4.1, 1. ],\n", + " [6.2, 2.2, 4.5, 1.5],\n", + " [5.6, 2.5, 3.9, 1.1],\n", + " [5.9, 3.2, 4.8, 1.8],\n", + " [6.1, 2.8, 4. , 1.3],\n", + " [6.3, 2.5, 4.9, 1.5],\n", + " [6.1, 2.8, 4.7, 1.2],\n", + " [6.4, 2.9, 4.3, 1.3],\n", + " [6.6, 3. , 4.4, 1.4],\n", + " [6.8, 2.8, 4.8, 1.4],\n", + " [6.7, 3. , 5. , 1.7],\n", + " [6. , 2.9, 4.5, 1.5],\n", + " [5.7, 2.6, 3.5, 1. ],\n", + " [5.5, 2.4, 3.8, 1.1],\n", + " [5.5, 2.4, 3.7, 1. ],\n", + " [5.8, 2.7, 3.9, 1.2],\n", + " [6. , 2.7, 5.1, 1.6],\n", + " [5.4, 3. , 4.5, 1.5],\n", + " [6. , 3.4, 4.5, 1.6],\n", + " [6.7, 3.1, 4.7, 1.5],\n", + " [6.3, 2.3, 4.4, 1.3],\n", + " [5.6, 3. , 4.1, 1.3],\n", + " [5.5, 2.5, 4. , 1.3],\n", + " [5.5, 2.6, 4.4, 1.2],\n", + " [6.1, 3. , 4.6, 1.4],\n", + " [5.8, 2.6, 4. , 1.2],\n", + " [5. , 2.3, 3.3, 1. ],\n", + " [5.6, 2.7, 4.2, 1.3],\n", + " [5.7, 3. , 4.2, 1.2],\n", + " [5.7, 2.9, 4.2, 1.3],\n", + " [6.2, 2.9, 4.3, 1.3],\n", + " [5.1, 2.5, 3. , 1.1],\n", + " [5.7, 2.8, 4.1, 1.3],\n", + " [6.3, 3.3, 6. , 2.5],\n", + " [5.8, 2.7, 5.1, 1.9],\n", + " [7.1, 3. , 5.9, 2.1],\n", + " [6.3, 2.9, 5.6, 1.8],\n", + " [6.5, 3. , 5.8, 2.2],\n", + " [7.6, 3. , 6.6, 2.1],\n", + " [4.9, 2.5, 4.5, 1.7],\n", + " [7.3, 2.9, 6.3, 1.8],\n", + " [6.7, 2.5, 5.8, 1.8],\n", + " [7.2, 3.6, 6.1, 2.5],\n", + " [6.5, 3.2, 5.1, 2. ],\n", + " [6.4, 2.7, 5.3, 1.9],\n", + " [6.8, 3. , 5.5, 2.1],\n", + " [5.7, 2.5, 5. , 2. ],\n", + " [5.8, 2.8, 5.1, 2.4],\n", + " [6.4, 3.2, 5.3, 2.3],\n", + " [6.5, 3. , 5.5, 1.8],\n", + " [7.7, 3.8, 6.7, 2.2],\n", + " [7.7, 2.6, 6.9, 2.3],\n", + " [6. , 2.2, 5. , 1.5],\n", + " [6.9, 3.2, 5.7, 2.3],\n", + " [5.6, 2.8, 4.9, 2. ],\n", + " [7.7, 2.8, 6.7, 2. ],\n", + " [6.3, 2.7, 4.9, 1.8],\n", + " [6.7, 3.3, 5.7, 2.1],\n", + " [7.2, 3.2, 6. , 1.8],\n", + " [6.2, 2.8, 4.8, 1.8],\n", + " [6.1, 3. , 4.9, 1.8],\n", + " [6.4, 2.8, 5.6, 2.1],\n", + " [7.2, 3. , 5.8, 1.6],\n", + " [7.4, 2.8, 6.1, 1.9],\n", + " [7.9, 3.8, 6.4, 2. ],\n", + " [6.4, 2.8, 5.6, 2.2],\n", + " [6.3, 2.8, 5.1, 1.5],\n", + " [6.1, 2.6, 5.6, 1.4],\n", + " [7.7, 3. , 6.1, 2.3],\n", + " [6.3, 3.4, 5.6, 2.4],\n", + " [6.4, 3.1, 5.5, 1.8],\n", + " [6. , 3. , 4.8, 1.8],\n", + " [6.9, 3.1, 5.4, 2.1],\n", + " [6.7, 3.1, 5.6, 2.4],\n", + " [6.9, 3.1, 5.1, 2.3],\n", + " [5.8, 2.7, 5.1, 1.9],\n", + " [6.8, 3.2, 5.9, 2.3],\n", + " [6.7, 3.3, 5.7, 2.5],\n", + " [6.7, 3. , 5.2, 2.3],\n", + " [6.3, 2.5, 5. , 1.9],\n", + " [6.5, 3. , 5.2, 2. ],\n", + " [6.2, 3.4, 5.4, 2.3],\n", + " [5.9, 3. , 5.1, 1.8]])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 855, + "status": "ok", + "timestamp": 1615295357693, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "-7ejnqmmwr_J", + "outputId": "e22abd6b-c840-4e43-aa62-d9c1a5cdd231" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(150, 4)" + ] + }, + "execution_count": 5, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 420, + "status": "ok", + "timestamp": 1632404107395, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "RmRL0mZ3o5ri", + "outputId": "bcace884-7ac8-49ce-d14e-05c8f625bb38" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0, 0, 0, 0, 0]), (150,))" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "target = ds.target\n", + "target[:5], target.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fpcR7aEBJoGq" + }, + "source": [ + "# Pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "executionInfo": { + "elapsed": 546, + "status": "ok", + "timestamp": 1632404228644, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "FVTPYh-hhvah" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 423 + }, + "executionInfo": { + "elapsed": 20, + "status": "ok", + "timestamp": 1632404365934, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "CZzMZXcyDnCx", + "outputId": "55d262ac-6243-4338-a45e-57217f23a610" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
..................
1456.73.05.22.32
1466.32.55.01.92
1476.53.05.22.02
1486.23.45.42.32
1495.93.05.11.82
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " sepal length (cm) sepal width (cm) ... petal width (cm) target\n", + "0 5.1 3.5 ... 0.2 0\n", + "1 4.9 3.0 ... 0.2 0\n", + "2 4.7 3.2 ... 0.2 0\n", + "3 4.6 3.1 ... 0.2 0\n", + "4 5.0 3.6 ... 0.2 0\n", + ".. ... ... ... ... ...\n", + "145 6.7 3.0 ... 2.3 2\n", + "146 6.3 2.5 ... 1.9 2\n", + "147 6.5 3.0 ... 2.0 2\n", + "148 6.2 3.4 ... 2.3 2\n", + "149 5.9 3.0 ... 1.8 2\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(data, columns=ds.feature_names) # data - может быть как лист, так и numpy array\n", + "df['target'] = ds.target\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 112 + }, + "executionInfo": { + "elapsed": 1482, + "status": "ok", + "timestamp": 1632404401169, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "WMx25DeePe80", + "outputId": "ca1eb41f-18e0-47de-cc77-b8648b89cec5" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.20
14.93.01.40.20
\n", + "
" + ], + "text/plain": [ + " sepal length (cm) sepal width (cm) ... petal width (cm) target\n", + "0 5.1 3.5 ... 0.2 0\n", + "1 4.9 3.0 ... 0.2 0\n", + "\n", + "[2 rows x 5 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(2) #tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "executionInfo": { + "elapsed": 580, + "status": "ok", + "timestamp": 1632404414446, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "yY02uqmWhvlj", + "outputId": "f4adccbb-22f7-4192-a8f7-67d00c8ff7c3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
1206.93.25.72.32
75.03.41.50.20
656.73.14.41.41
776.73.05.01.71
985.12.53.01.11
\n", + "
" + ], + "text/plain": [ + " sepal length (cm) sepal width (cm) ... petal width (cm) target\n", + "120 6.9 3.2 ... 2.3 2\n", + "7 5.0 3.4 ... 0.2 0\n", + "65 6.7 3.1 ... 1.4 1\n", + "77 6.7 3.0 ... 1.7 1\n", + "98 5.1 2.5 ... 1.1 1\n", + "\n", + "[5 rows x 5 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sample(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 486, + "status": "ok", + "timestamp": 1632404445651, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "L0oDISZyHqUh", + "outputId": "f2586af7-7f30-4106-861b-539f5ed618d6" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 433, + "status": "ok", + "timestamp": 1632404485030, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "arTjJfy442ss", + "outputId": "6d630c99-cbed-42e1-d69f-c71e595be995" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df[\"target\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xX_Qut-QR_ia" + }, + "source": [ + "### Индексация и срезы данных" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 3256, + "status": "ok", + "timestamp": 1614783881358, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "jXimDZePWyIp", + "outputId": "00860947-6e2c-484e-90ae-8149d6c2bb45" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 5.1\n", + "1 4.9\n", + "2 4.7\n", + "3 4.6\n", + "4 5.0\n", + " ... \n", + "145 6.7\n", + "146 6.3\n", + "147 6.5\n", + "148 6.2\n", + "149 5.9\n", + "Name: sepal length (cm), Length: 150, dtype: float64" + ] + }, + "execution_count": 96, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df['sepal length (cm)'] # выбор столбца по названию" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 619, + "status": "ok", + "timestamp": 1615295621844, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "rOBV0RUtHxLh", + "outputId": "2e25e363-6fd5-477f-9e38-afe8f91522ac" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 14, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "type(df['sepal length (cm)'])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 523, + "status": "ok", + "timestamp": 1632404667952, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "sq2YmKFr5m-1", + "outputId": "e9f125e0-3f1f-4a4b-d39c-5e6091047c86" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',\n", + " 'petal width (cm)', 'target'],\n", + " dtype='object')" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 1699, + "status": "ok", + "timestamp": 1614783884339, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "o5CI-Ha6P4AX", + "outputId": "ee350cf3-212a-4bdd-daf8-f0decfe313c0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'petal length (cm)': 'petal_length_(cm)',\n", + " 'petal width (cm)': 'petal_width_(cm)',\n", + " 'sepal length (cm)': 'sepal_length_(cm)',\n", + " 'sepal width (cm)': 'sepal_width_(cm)',\n", + " 'target': 'target'}" + ] + }, + "execution_count": 97, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "{name : '_'.join(name.split(' ')) for name in df.columns}" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "executionInfo": { + "elapsed": 585, + "status": "ok", + "timestamp": 1632404857471, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ztRKBaVlxM8d" + }, + "outputs": [], + "source": [ + "# df = df.rename(columns={name : '_'.join(name.split(' ')) for name in df.columns}) # смена имен столбцов\n", + "df.rename(columns={name : '_'.join(name.split(' ')) for name in df.columns}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 29, + "status": "ok", + "timestamp": 1632404863328, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "Bryqf6bCxNC5", + "outputId": "2fb81e40-0667-4c5b-9b50-4ba23010385b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['sepal_length_(cm)', 'sepal_width_(cm)', 'petal_length_(cm)',\n", + " 'petal_width_(cm)', 'target'],\n", + " dtype='object')" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 438, + "status": "ok", + "timestamp": 1615295826923, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "uesXOV19QcNX", + "outputId": "6476924c-249d-4876-89be-920b127e125b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + " ..\n", + "145 2\n", + "146 2\n", + "147 2\n", + "148 2\n", + "149 2\n", + "Name: target, Length: 150, dtype: int64" + ] + }, + "execution_count": 20, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.target" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 566, + "status": "ok", + "timestamp": 1614777840378, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "J2il4fodbWLb", + "outputId": "b6d5c2a4-dc69-497d-997c-8127f174765a" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "140 2\n", + "141 2\n", + "142 2\n", + "143 2\n", + "144 2\n", + "145 2\n", + "146 2\n", + "147 2\n", + "148 2\n", + "149 2\n", + "Name: target, dtype: int64" + ] + }, + "execution_count": 22, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.target[-10:] # возможен такой стиль обращения к столбцам, если его имя не содержит пробелов" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 357 + }, + "executionInfo": { + "elapsed": 607, + "status": "ok", + "timestamp": 1614777891289, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "2IaGUtDoYIAO", + "outputId": "c64f553c-27a2-4f0d-a1e3-aa82ee895acf" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_width_(cm)petal_length_(cm)petal_width_(cm)
1403.15.62.4
1413.15.12.3
1422.75.11.9
1433.25.92.3
1443.35.72.5
1453.05.22.3
1462.55.01.9
1473.05.22.0
1483.45.42.3
1493.05.11.8
\n", + "
" + ], + "text/plain": [ + " sepal_width_(cm) petal_length_(cm) petal_width_(cm)\n", + "140 3.1 5.6 2.4\n", + "141 3.1 5.1 2.3\n", + "142 2.7 5.1 1.9\n", + "143 3.2 5.9 2.3\n", + "144 3.3 5.7 2.5\n", + "145 3.0 5.2 2.3\n", + "146 2.5 5.0 1.9\n", + "147 3.0 5.2 2.0\n", + "148 3.4 5.4 2.3\n", + "149 3.0 5.1 1.8" + ] + }, + "execution_count": 23, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[140: , 'sepal_width_(cm)':'petal_width_(cm)'] # возможность среза данных по ИМЕНАМ строк и столбцов" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 357 + }, + "executionInfo": { + "elapsed": 735, + "status": "ok", + "timestamp": 1614777918498, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "RwTuNV_BxNtH", + "outputId": "89004bbc-fd5d-4bb9-fbdc-6756fa31cb1b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
55.43.91.70.4
64.63.41.40.3
75.03.41.50.2
84.42.91.40.2
94.93.11.50.1
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) petal_length_(cm) petal_width_(cm)\n", + "0 5.1 3.5 1.4 0.2\n", + "1 4.9 3.0 1.4 0.2\n", + "2 4.7 3.2 1.3 0.2\n", + "3 4.6 3.1 1.5 0.2\n", + "4 5.0 3.6 1.4 0.2\n", + "5 5.4 3.9 1.7 0.4\n", + "6 4.6 3.4 1.4 0.3\n", + "7 5.0 3.4 1.5 0.2\n", + "8 4.4 2.9 1.4 0.2\n", + "9 4.9 3.1 1.5 0.1" + ] + }, + "execution_count": 24, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[:10,:4] # возможность среза данных по ПОРЯДКОВЫМ НОМЕРАМ строк и столбцов " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 413, + "status": "ok", + "timestamp": 1632405184550, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "QSAbGcDbJP9B", + "outputId": "20274561-ff6c-4031-e1a7-a26a2399cea5" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['sepal_length_(cm)', 'sepal_width_(cm)']" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[column for column in df.columns if column.startswith('sepal')]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "executionInfo": { + "elapsed": 1370, + "status": "ok", + "timestamp": 1614784351268, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "pytaw0cAxNp8", + "outputId": "81983e96-8834-40e4-b828-6706a4f3bbb6" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)
05.13.5
14.93.0
24.73.2
34.63.1
45.03.6
.........
1456.73.0
1466.32.5
1476.53.0
1486.23.4
1495.93.0
\n", + "

150 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm)\n", + "0 5.1 3.5\n", + "1 4.9 3.0\n", + "2 4.7 3.2\n", + "3 4.6 3.1\n", + "4 5.0 3.6\n", + ".. ... ...\n", + "145 6.7 3.0\n", + "146 6.3 2.5\n", + "147 6.5 3.0\n", + "148 6.2 3.4\n", + "149 5.9 3.0\n", + "\n", + "[150 rows x 2 columns]" + ] + }, + "execution_count": 102, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df[[column for column in df.columns if column.startswith('sepal')]] # выбор столбцов по условию" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 666, + "status": "ok", + "timestamp": 1632405255702, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "bHiE8tk872bY", + "outputId": "5cab46f0-7d00-4c5a-a435-ecd145b8c82c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + " ... \n", + "145 False\n", + "146 False\n", + "147 False\n", + "148 False\n", + "149 False\n", + "Name: target, Length: 150, dtype: bool" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.target==1.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 347 + }, + "executionInfo": { + "elapsed": 815, + "status": "ok", + "timestamp": 1615296046504, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "YNxRaJqqavOz", + "outputId": "2baa1152-611c-43a3-eea6-c9ae07cfea4e" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
507.03.24.71.41
516.43.24.51.51
526.93.14.91.51
535.52.34.01.31
546.52.84.61.51
555.72.84.51.31
566.33.34.71.61
574.92.43.31.01
586.62.94.61.31
595.22.73.91.41
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n", + "50 7.0 3.2 ... 1.4 1\n", + "51 6.4 3.2 ... 1.5 1\n", + "52 6.9 3.1 ... 1.5 1\n", + "53 5.5 2.3 ... 1.3 1\n", + "54 6.5 2.8 ... 1.5 1\n", + "55 5.7 2.8 ... 1.3 1\n", + "56 6.3 3.3 ... 1.6 1\n", + "57 4.9 2.4 ... 1.0 1\n", + "58 6.6 2.9 ... 1.3 1\n", + "59 5.2 2.7 ... 1.4 1\n", + "\n", + "[10 rows x 5 columns]" + ] + }, + "execution_count": 22, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.target==1.0][:10] # выбор данных по условию. В данном случае хотим увидеть данные у которых целевой класс = 1\n", + "# так же можно увидеть что обращаться к столбцу можно" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i4V1_5AOgmB9" + }, + "source": [ + "### Описательная статистика" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "executionInfo": { + "elapsed": 1283, + "status": "ok", + "timestamp": 1614766986724, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "EuwQ-U54xNnA", + "outputId": "5ed73970-f852-49b2-82a7-bfe43b1ad3c3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
count150.000000150.000000150.000000150.000000150.000000
mean5.8433333.0573333.7580001.1993331.000000
std0.8280660.4358661.7652980.7622380.819232
min4.3000002.0000001.0000000.1000000.000000
25%5.1000002.8000001.6000000.3000000.000000
50%5.8000003.0000004.3500001.3000001.000000
75%6.4000003.3000005.1000001.8000002.000000
max7.9000004.4000006.9000002.5000002.000000
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n", + "count 150.000000 150.000000 ... 150.000000 150.000000\n", + "mean 5.843333 3.057333 ... 1.199333 1.000000\n", + "std 0.828066 0.435866 ... 0.762238 0.819232\n", + "min 4.300000 2.000000 ... 0.100000 0.000000\n", + "25% 5.100000 2.800000 ... 0.300000 0.000000\n", + "50% 5.800000 3.000000 ... 1.300000 1.000000\n", + "75% 6.400000 3.300000 ... 1.800000 2.000000\n", + "max 7.900000 4.400000 ... 2.500000 2.000000\n", + "\n", + "[8 rows x 5 columns]" + ] + }, + "execution_count": 19, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe() # статистическое описание набора данных" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 627, + "status": "ok", + "timestamp": 1614778091397, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "X4ykTpKtxNiG", + "outputId": "e62b683d-f476-4422-d691-774ead34e63f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 150 entries, 0 to 149\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 sepal_length_(cm) 150 non-null float64\n", + " 1 sepal_width_(cm) 150 non-null float64\n", + " 2 petal_length_(cm) 150 non-null float64\n", + " 3 petal_width_(cm) 150 non-null float64\n", + " 4 target 150 non-null int64 \n", + "dtypes: float64(4), int64(1)\n", + "memory usage: 6.0 KB\n" + ] + } + ], + "source": [ + "df.info() # информация об индексах, пропусках в данных, типах данных и объеме оперативной памяти занимаемой данными" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 51, + "status": "ok", + "timestamp": 1632405484185, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "b7khmMfj8mDB", + "outputId": "8e7ccfa9-cffa-4872-c0a5-d00635211e12" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0, 1, 2]), 3)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.target.unique(), df.target.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 167 + }, + "executionInfo": { + "elapsed": 783, + "status": "ok", + "timestamp": 1615296303195, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "n1XzQbdFRx7Z", + "outputId": "a4acff70-40cf-4462-f2b6-03546318b29b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)
target
05.0063.4281.4620.246
15.9362.7704.2601.326
26.5882.9745.5522.026
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) ... petal_width_(cm)\n", + "target ... \n", + "0 5.006 ... 0.246\n", + "1 5.936 ... 1.326\n", + "2 6.588 ... 2.026\n", + "\n", + "[3 rows x 4 columns]" + ] + }, + "execution_count": 24, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('target').mean() #df.groupby('target')['petal_length_(cm)'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 217 + }, + "executionInfo": { + "elapsed": 724, + "status": "ok", + "timestamp": 1615296321113, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "MRiTYhiixNfC", + "outputId": "3bd6da21-1bde-404e-b9fb-d4e36e94634c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)
minmaxmeanstdsizeminmaxmeanstdsizeminmaxmeanstdsizeminmaxmeanstdsize
target
04.35.85.0060.35249050.02.34.43.4280.37906450.01.01.91.4620.17366450.00.10.60.2460.10538650.0
14.97.05.9360.51617150.02.03.42.7700.31379850.03.05.14.2600.46991150.01.01.81.3260.19775350.0
24.97.96.5880.63588050.02.23.82.9740.32249750.04.56.95.5520.55189550.01.42.52.0260.27465050.0
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) ... petal_width_(cm) \n", + " min max mean std ... max mean std size\n", + "target ... \n", + "0 4.3 5.8 5.006 0.352490 ... 0.6 0.246 0.105386 50.0\n", + "1 4.9 7.0 5.936 0.516171 ... 1.8 1.326 0.197753 50.0\n", + "2 4.9 7.9 6.588 0.635880 ... 2.5 2.026 0.274650 50.0\n", + "\n", + "[3 rows x 20 columns]" + ] + }, + "execution_count": 25, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('target').agg([min, max, np.mean, np.std, np.size]) # применение общих функций группировки для всех столбцов" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 511, + "status": "ok", + "timestamp": 1615296592781, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "w_oHay4KxNdC", + "outputId": "2b52fff3-b9c7-4c74-ea6f-e52b965f4e6b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)petal_width_(cm)
meanstdminmax
target
05.0060.3524900.10.6
15.9360.5161711.01.8
26.5880.6358801.42.5
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) petal_width_(cm) \n", + " mean std min max\n", + "target \n", + "0 5.006 0.352490 0.1 0.6\n", + "1 5.936 0.516171 1.0 1.8\n", + "2 6.588 0.635880 1.4 2.5" + ] + }, + "execution_count": 30, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('target').agg({'sepal_length_(cm)':[np.mean, np.std], 'petal_width_(cm)':[min, max]}) # индивидуальное применение функций группировки" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NCfoXnc41fmW" + }, + "source": [ + "### Полезные функции, которые конкретно сейчас не нужны, но часто применимы" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 747, + "status": "ok", + "timestamp": 1615296494311, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "KV8EM_b41m0m", + "outputId": "b898ccdb-16f0-415b-a629-25b794f42859" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n", + "0 5.1 3.5 ... 0.2 0\n", + "1 4.9 3.0 ... 0.2 0\n", + "2 4.7 3.2 ... 0.2 0\n", + "3 4.6 3.1 ... 0.2 0\n", + "4 5.0 3.6 ... 0.2 0\n", + "\n", + "[5 rows x 5 columns]" + ] + }, + "execution_count": 26, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "d = df.copy()\n", + "d.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 737, + "status": "ok", + "timestamp": 1615296536700, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "pGOooxXo1xqA", + "outputId": "a3a5fe7b-d857-49c8-8d59-ed08472c37e9" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{0.0: 'setosa', 1.0: 'versicolor', 2.0: 'virginica'}" + ] + }, + "execution_count": 27, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "targets = {float(i):target for i, target in enumerate(ds.target_names)}\n", + "targets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 474, + "status": "ok", + "timestamp": 1615296574079, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "1qI4cEd81xxK", + "outputId": "7e62a1d9-dc06-4fc5-8270-6da0236d7341" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n", + "0 5.1 3.5 ... 0.2 setosa\n", + "1 4.9 3.0 ... 0.2 setosa\n", + "2 4.7 3.2 ... 0.2 setosa\n", + "3 4.6 3.1 ... 0.2 setosa\n", + "4 5.0 3.6 ... 0.2 setosa\n", + "\n", + "[5 rows x 5 columns]" + ] + }, + "execution_count": 28, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "d.target = d.target.map(targets) # заменим цифровые обозначения классов на буквенные подписи\n", + "d.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 647, + "status": "ok", + "timestamp": 1615296637939, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "q1W6kwXe1xuc", + "outputId": "cbd628aa-1e1b-4a98-e5f5-b9ef80aa9544" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)targetsepal_length_on_width
05.13.51.40.2setosa1.457143
14.93.01.40.2setosa1.633333
24.73.21.30.2setosa1.468750
34.63.11.50.2setosa1.483871
45.03.61.40.2setosa1.388889
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... target sepal_length_on_width\n", + "0 5.1 3.5 ... setosa 1.457143\n", + "1 4.9 3.0 ... setosa 1.633333\n", + "2 4.7 3.2 ... setosa 1.468750\n", + "3 4.6 3.1 ... setosa 1.483871\n", + "4 5.0 3.6 ... setosa 1.388889\n", + "\n", + "[5 rows x 6 columns]" + ] + }, + "execution_count": 31, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "d['sepal_length_on_width'] = d['sepal_length_(cm)'] / d['sepal_width_(cm)'] # операции непосредственно со столбцами много быстрее поэлементных операций \n", + "d.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dRp4-vhV1xmt" + }, + "outputs": [], + "source": [ + "d.sepal_length_on_width = d.sepal_length_on_width.apply(np.sin)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 767, + "status": "ok", + "timestamp": 1615296813029, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "357_A4ny1xjb", + "outputId": "196d0d5d-1883-4552-ec7c-890f592130de" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)targetsepal_length_on_width
05.13.51.40setosa0.993548
14.93.01.40setosa0.998045
24.73.21.30setosa0.994798
34.63.11.50setosa0.996224
45.03.61.40setosa0.983500
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... target sepal_length_on_width\n", + "0 5.1 3.5 ... setosa 0.993548\n", + "1 4.9 3.0 ... setosa 0.998045\n", + "2 4.7 3.2 ... setosa 0.994798\n", + "3 4.6 3.1 ... setosa 0.996224\n", + "4 5.0 3.6 ... setosa 0.983500\n", + "\n", + "[5 rows x 6 columns]" + ] + }, + "execution_count": 33, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "def bias(x):\n", + " if x < 1.0:\n", + " return 0\n", + " return 1\n", + "d['petal_width_(cm)'] = d['petal_width_(cm)'].apply(bias)\n", + "d.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aeUhqZEX1xey" + }, + "outputs": [], + "source": [ + "d.drop([column for column in d.columns if column.endswith('length_(cm)')], axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 684, + "status": "ok", + "timestamp": 1615296912439, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "gQJ6De486fsw", + "outputId": "f39caff3-2866-4a3b-b6ac-8510ddad127f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_width_(cm)petal_width_(cm)targetsepal_length_on_width
03.50setosa0.993548
13.00setosa0.998045
23.20setosa0.994798
33.10setosa0.996224
43.60setosa0.983500
\n", + "
" + ], + "text/plain": [ + " sepal_width_(cm) petal_width_(cm) target sepal_length_on_width\n", + "0 3.5 0 setosa 0.993548\n", + "1 3.0 0 setosa 0.998045\n", + "2 3.2 0 setosa 0.994798\n", + "3 3.1 0 setosa 0.996224\n", + "4 3.6 0 setosa 0.983500" + ] + }, + "execution_count": 36, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "d.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 589, + "status": "ok", + "timestamp": 1615296981297, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "H6wlNTB76hoP", + "outputId": "ef4c98c9-53fc-403d-e5ad-91b96ab8f864" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "((150, 4), (300, 4))" + ] + }, + "execution_count": 37, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "f = pd.concat([d,d], axis=0)\n", + "d.shape, f.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 572, + "status": "ok", + "timestamp": 1615297019618, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "8wvhQgCh6stP", + "outputId": "ed87bb36-d869-49c6-cc1a-516cd9daa65b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "((150, 4), (150, 8))" + ] + }, + "execution_count": 38, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "f = pd.concat([d,d], axis=1)\n", + "d.shape, f.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "executionInfo": { + "elapsed": 476, + "status": "ok", + "timestamp": 1632405950884, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "hYXfdNRds8wc", + "outputId": "2b21ce4b-5d17-4800-ea16-396dc95557c3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
45
112
212
312
\n", + "
" + ], + "text/plain": [ + " 4 5\n", + "1 1 2\n", + "2 1 2\n", + "3 1 2" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_1 = pd.DataFrame(index=[1,2,3], data=[[1,2],[1,2],[1,2]], columns=[4,5])\n", + "df_2 = pd.DataFrame(index=[5,6,3], data=[[1,2],[1,2],[1,2]], columns=[5,7])\n", + "df_1" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "executionInfo": { + "elapsed": 12, + "status": "ok", + "timestamp": 1632405952831, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "-bALOOiOs_xk", + "outputId": "56478aab-30e8-477d-8628-2352f3ed3ac4" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
57
512
612
312
\n", + "
" + ], + "text/plain": [ + " 5 7\n", + "5 1 2\n", + "6 1 2\n", + "3 1 2" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_2" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 237 + }, + "executionInfo": { + "elapsed": 400, + "status": "ok", + "timestamp": 1632405958777, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "nspfyfjMUepW", + "outputId": "946cd0f5-3470-4620-a1ea-52221f5a06b1" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
457
11.02NaN
21.02NaN
31.02NaN
5NaN12.0
6NaN12.0
3NaN12.0
\n", + "
" + ], + "text/plain": [ + " 4 5 7\n", + "1 1.0 2 NaN\n", + "2 1.0 2 NaN\n", + "3 1.0 2 NaN\n", + "5 NaN 1 2.0\n", + "6 NaN 1 2.0\n", + "3 NaN 1 2.0" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_1 = pd.DataFrame(index=[1,2,3], data=[[1,2],[1,2],[1,2]], columns=[4,5])\n", + "df_2 = pd.DataFrame(index=[5,6,3], data=[[1,2],[1,2],[1,2]], columns=[5,7])\n", + "\n", + "pd.concat([df_1,df_2], axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 217 + }, + "executionInfo": { + "elapsed": 594, + "status": "ok", + "timestamp": 1615297123302, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "hfsafxqc6wl0", + "outputId": "80ab6214-48dd-4847-9637-c8eda376ce2b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_width_(cm)petal_width_(cm)targetsepal_length_on_widthsepal_width_(cm)petal_width_(cm)targetsepal_length_on_width
03.50setosa0.9935483.50setosa0.993548
13.00setosa0.9980453.00setosa0.998045
23.20setosa0.9947983.20setosa0.994798
33.10setosa0.9962243.10setosa0.996224
43.60setosa0.9835003.60setosa0.983500
\n", + "
" + ], + "text/plain": [ + " sepal_width_(cm) petal_width_(cm) ... target sepal_length_on_width\n", + "0 3.5 0 ... setosa 0.993548\n", + "1 3.0 0 ... setosa 0.998045\n", + "2 3.2 0 ... setosa 0.994798\n", + "3 3.1 0 ... setosa 0.996224\n", + "4 3.6 0 ... setosa 0.983500\n", + "\n", + "[5 rows x 8 columns]" + ] + }, + "execution_count": 44, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "f.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HRY-rDbb8gGk" + }, + "outputs": [], + "source": [ + "g = d.drop(['sepal_width_(cm)', 'petal_width_(cm)'], axis=1)\n", + "h = d.drop(['sepal_length_on_width'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 429, + "status": "ok", + "timestamp": 1615297139175, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "tsgVE2Si8oFG", + "outputId": "dea93f74-7d0d-4030-c81c-84ea655c5f6d" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
targetsepal_length_on_width
0setosa0.993548
1setosa0.998045
2setosa0.994798
3setosa0.996224
4setosa0.983500
\n", + "
" + ], + "text/plain": [ + " target sepal_length_on_width\n", + "0 setosa 0.993548\n", + "1 setosa 0.998045\n", + "2 setosa 0.994798\n", + "3 setosa 0.996224\n", + "4 setosa 0.983500" + ] + }, + "execution_count": 46, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "g.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 628, + "status": "ok", + "timestamp": 1615297148886, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "kny_HFf489cy", + "outputId": "59ae8694-c22e-4118-e25f-f31a2e148c4e" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_width_(cm)petal_width_(cm)target
03.50setosa
13.00setosa
23.20setosa
33.10setosa
43.60setosa
\n", + "
" + ], + "text/plain": [ + " sepal_width_(cm) petal_width_(cm) target\n", + "0 3.5 0 setosa\n", + "1 3.0 0 setosa\n", + "2 3.2 0 setosa\n", + "3 3.1 0 setosa\n", + "4 3.6 0 setosa" + ] + }, + "execution_count": 47, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "h.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 588, + "status": "ok", + "timestamp": 1615297241757, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ZAKyHnni8_wx", + "outputId": "cc83133f-1f83-4c7a-f041-b23d01f14cf4" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
targetsepal_length_on_widthsepal_width_(cm)petal_width_(cm)
0setosa0.9935483.50
1setosa0.9935483.00
2setosa0.9935483.20
3setosa0.9935483.10
4setosa0.9935483.60
\n", + "
" + ], + "text/plain": [ + " target sepal_length_on_width sepal_width_(cm) petal_width_(cm)\n", + "0 setosa 0.993548 3.5 0\n", + "1 setosa 0.993548 3.0 0\n", + "2 setosa 0.993548 3.2 0\n", + "3 setosa 0.993548 3.1 0\n", + "4 setosa 0.993548 3.6 0" + ] + }, + "execution_count": 49, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "d = g.merge(h, on='target')\n", + "d.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "executionInfo": { + "elapsed": 712, + "status": "ok", + "timestamp": 1614767389654, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "m6ec0Exh9K8V", + "outputId": "1c97b950-0ba5-4b63-f8b7-2560f8decceb" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
setosaversicolorvirginica
0100
1100
2100
3100
4100
............
7495001
7496001
7497001
7498001
7499001
\n", + "

7500 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " setosa versicolor virginica\n", + "0 1 0 0\n", + "1 1 0 0\n", + "2 1 0 0\n", + "3 1 0 0\n", + "4 1 0 0\n", + "... ... ... ...\n", + "7495 0 0 1\n", + "7496 0 0 1\n", + "7497 0 0 1\n", + "7498 0 0 1\n", + "7499 0 0 1\n", + "\n", + "[7500 rows x 3 columns]" + ] + }, + "execution_count": 46, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "pd.get_dummies(d.target)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 440, + "status": "ok", + "timestamp": 1615297478580, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "Hrp_HGEb9t4d", + "outputId": "b3b9983b-e598-4288-90ee-7b0d1abe5ff8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_on_widthsepal_width_(cm)petal_width_(cm)target_setosatarget_versicolortarget_virginica
00.9935483.50100
10.9935483.00100
20.9935483.20100
30.9935483.10100
40.9935483.60100
\n", + "
" + ], + "text/plain": [ + " sepal_length_on_width sepal_width_(cm) ... target_versicolor target_virginica\n", + "0 0.993548 3.5 ... 0 0\n", + "1 0.993548 3.0 ... 0 0\n", + "2 0.993548 3.2 ... 0 0\n", + "3 0.993548 3.1 ... 0 0\n", + "4 0.993548 3.6 ... 0 0\n", + "\n", + "[5 rows x 6 columns]" + ] + }, + "execution_count": 50, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "d = pd.get_dummies(data=d, columns=['target'])\n", + "d.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ym2h89BMguk6" + }, + "source": [ + "### Графическое представление" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EB8GRu9XxNaZ" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import seaborn as sns\n", + "from matplotlib import pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hbipgoEZxNOg" + }, + "outputs": [], + "source": [ + "sns.set_style(\"whitegrid\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 122 + }, + "executionInfo": { + "elapsed": 587, + "status": "ok", + "timestamp": 1614779517504, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "VlMb-EWdxNMn", + "outputId": "9907624b-bf04-4f40-f152-94951d92a782" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(0.12156862745098039, 0.4666666666666667, 0.7058823529411765), (1.0, 0.4980392156862745, 0.054901960784313725), (0.17254901960784313, 0.6274509803921569, 0.17254901960784313), (0.8392156862745098, 0.15294117647058825, 0.1568627450980392), (0.5803921568627451, 0.403921568627451, 0.7411764705882353), (0.5490196078431373, 0.33725490196078434, 0.29411764705882354), (0.8901960784313725, 0.4666666666666667, 0.7607843137254902), (0.4980392156862745, 0.4980392156862745, 0.4980392156862745), (0.7372549019607844, 0.7411764705882353, 0.13333333333333333), (0.09019607843137255, 0.7450980392156863, 0.8117647058823529)]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAABECAYAAACF4e8fAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAChklEQVR4nO3bMWtTYRiG4a+pqJBQEO1gF4UiIo6ZdWpH/4WLm5s4WKgOrs6CILp1chGnThkEMZuODnWR2hRsaYjRmuMfaDsIX488XNf6Lg8cONxwOHNN0xQAgGSdtgcAANQmeACAeIIHAIgneACAeIIHAIh35qTj+w8fm697h6e15dRd747Lucl22zOq2e4tldHvUdszqlj+1pTZpcXSGe20PaWK6eWrZXowa3tGNWe70zLd/9H2jGoWLiyV+XHu8xt3D8tkMml7RhW93m7pdBbLbJb5bhl3lsv34J+zr82XUb/fXzzqdmLwzJqmPNjcrbPqP/Buda/cGNxre0Y1n1delPUv623PqGLj6WH5+eRxOf9ore0pVew/e1k+vRm3PaOam3cOyvDV87ZnVLNyd61c3Jy2PaOa0eqvMhgM2p5Rxa3br0uv+7QcjB+2PaWKnd5GuT+eb3tGNW8X/mwdd/NJCwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIN9c0zbHH4XC4U0rZOr05AAD/7Eq/31886nBi8AAAJPBJCwCIJ3gAgHiCBwCIJ3gAgHiCBwCI9xdiZWLdKNW9eAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light", + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "print(sns.color_palette())\n", + "sns.palplot(sns.color_palette())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 622, + "status": "ok", + "timestamp": 1615297767532, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "4umRGJuKqHuO", + "outputId": "49a1d76f-c4ba-4088-817f-e1bdce211bdc" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{0.0: 'setosa', 1.0: 'versicolor', 2.0: 'virginica'}" + ] + }, + "execution_count": 54, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "targets = {float(i):target for i, target in enumerate(ds.target_names)}\n", + "targets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "executionInfo": { + "elapsed": 456, + "status": "ok", + "timestamp": 1615297774179, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "SDeuDnTEXKQk", + "outputId": "53cf3a73-56d9-42cc-f715-9dee1f23fd15" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
507.03.24.71.41
516.43.24.51.51
526.93.14.91.51
535.52.34.01.31
546.52.84.61.51
555.72.84.51.31
566.33.34.71.61
574.92.43.31.01
586.62.94.61.31
595.22.73.91.41
605.02.03.51.01
615.93.04.21.51
626.02.24.01.01
636.12.94.71.41
645.62.93.61.31
656.73.14.41.41
665.63.04.51.51
675.82.74.11.01
686.22.24.51.51
695.62.53.91.11
705.93.24.81.81
716.12.84.01.31
726.32.54.91.51
736.12.84.71.21
746.42.94.31.31
756.63.04.41.41
766.82.84.81.41
776.73.05.01.71
786.02.94.51.51
795.72.63.51.01
805.52.43.81.11
815.52.43.71.01
825.82.73.91.21
836.02.75.11.61
845.43.04.51.51
856.03.44.51.61
866.73.14.71.51
876.32.34.41.31
885.63.04.11.31
895.52.54.01.31
905.52.64.41.21
916.13.04.61.41
925.82.64.01.21
935.02.33.31.01
945.62.74.21.31
955.73.04.21.21
965.72.94.21.31
976.22.94.31.31
985.12.53.01.11
995.72.84.11.31
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n", + "50 7.0 3.2 ... 1.4 1\n", + "51 6.4 3.2 ... 1.5 1\n", + "52 6.9 3.1 ... 1.5 1\n", + "53 5.5 2.3 ... 1.3 1\n", + "54 6.5 2.8 ... 1.5 1\n", + "55 5.7 2.8 ... 1.3 1\n", + "56 6.3 3.3 ... 1.6 1\n", + "57 4.9 2.4 ... 1.0 1\n", + "58 6.6 2.9 ... 1.3 1\n", + "59 5.2 2.7 ... 1.4 1\n", + "60 5.0 2.0 ... 1.0 1\n", + "61 5.9 3.0 ... 1.5 1\n", + "62 6.0 2.2 ... 1.0 1\n", + "63 6.1 2.9 ... 1.4 1\n", + "64 5.6 2.9 ... 1.3 1\n", + "65 6.7 3.1 ... 1.4 1\n", + "66 5.6 3.0 ... 1.5 1\n", + "67 5.8 2.7 ... 1.0 1\n", + "68 6.2 2.2 ... 1.5 1\n", + "69 5.6 2.5 ... 1.1 1\n", + "70 5.9 3.2 ... 1.8 1\n", + "71 6.1 2.8 ... 1.3 1\n", + "72 6.3 2.5 ... 1.5 1\n", + "73 6.1 2.8 ... 1.2 1\n", + "74 6.4 2.9 ... 1.3 1\n", + "75 6.6 3.0 ... 1.4 1\n", + "76 6.8 2.8 ... 1.4 1\n", + "77 6.7 3.0 ... 1.7 1\n", + "78 6.0 2.9 ... 1.5 1\n", + "79 5.7 2.6 ... 1.0 1\n", + "80 5.5 2.4 ... 1.1 1\n", + "81 5.5 2.4 ... 1.0 1\n", + "82 5.8 2.7 ... 1.2 1\n", + "83 6.0 2.7 ... 1.6 1\n", + "84 5.4 3.0 ... 1.5 1\n", + "85 6.0 3.4 ... 1.6 1\n", + "86 6.7 3.1 ... 1.5 1\n", + "87 6.3 2.3 ... 1.3 1\n", + "88 5.6 3.0 ... 1.3 1\n", + "89 5.5 2.5 ... 1.3 1\n", + "90 5.5 2.6 ... 1.2 1\n", + "91 6.1 3.0 ... 1.4 1\n", + "92 5.8 2.6 ... 1.2 1\n", + "93 5.0 2.3 ... 1.0 1\n", + "94 5.6 2.7 ... 1.3 1\n", + "95 5.7 3.0 ... 1.2 1\n", + "96 5.7 2.9 ... 1.3 1\n", + "97 6.2 2.9 ... 1.3 1\n", + "98 5.1 2.5 ... 1.1 1\n", + "99 5.7 2.8 ... 1.3 1\n", + "\n", + "[50 rows x 5 columns]" + ] + }, + "execution_count": 55, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.target==1]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rg_HMRSVzGz-" + }, + "source": [ + "Строим гистограммы" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 406 + }, + "executionInfo": { + "elapsed": 1244, + "status": "ok", + "timestamp": 1615297826988, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "mx_PNSF8xNKe", + "outputId": "5d46e25e-fb29-467c-d88f-b3b689306815" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "for target in targets:\n", + " sns.distplot(df[df.target==target]['sepal_length_(cm)'],kde=True,kde_kws={\"label\":targets[target]})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 406 + }, + "executionInfo": { + "elapsed": 1136, + "status": "ok", + "timestamp": 1615297848522, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "bNUuVXgzhvz1", + "outputId": "7ef13877-988b-4be0-a9b2-b4983762d161" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "for target in targets:\n", + " sns.distplot(df[df.target==target]['sepal_width_(cm)'],kde=True,kde_kws={\"label\":targets[target]})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 408 + }, + "executionInfo": { + "elapsed": 923, + "status": "ok", + "timestamp": 1615297853838, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "6Li1iREOhvts", + "outputId": "56d8c257-b464-465f-c365-a0dbc90b03b6" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "for target in targets:\n", + " sns.distplot(df[df.target==target]['petal_length_(cm)'],kde=True,kde_kws={\"label\":targets[target]})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 404 + }, + "executionInfo": { + "elapsed": 1286, + "status": "ok", + "timestamp": 1614779712345, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "oCQEu59thvri", + "outputId": "1e523154-41f1-4e2c-ca8e-0aebf1cee232" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEICAYAAABfz4NwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXyc1X3v8c+MpBntu2TJkrGMl2MbvICNWQxmM4S0BNJCk9AAgTRNmmYhS7P1Jpc2N+1N0/uizXJzkxAISSCBQNgChLAFDAZvMsZgy8d4txZbsvZ1tMzcP56RkWVZmpFmlb/v10svSc88z3l+x2M9vznnOc85rkAggIiIiDveAYiISGJQQhAREUAJQUREgpQQREQEUEIQEZEgJQQREQEgNVoFG2PuBa4FGq21Zwe3/SfwAaAf2Avcbq1tm6isbdu2Bbxeb8Ri8/l8RLK8eFN9Epvqk9imc316enqOrVixoiTUY6OWEID7gB8Bvxqx7XngG9baQWPMfwDfAL42UUFer5dFixZFLLCampqIlhdvqk9iU30S23SuT3V19cFwjo1al5G1dh3QMmrbc9baweCvG4DKaJ1fRETCE897CB8H/hjH84uIyAjR7DI6JWPM/wAGgQdC2d/n81FTUxOx8/f19UW0vHhTfRKb6pPYVJ/3xDwhGGNuw7nZfKW1NqSJlHQPYXyqT2JTfRLbdK5PdXV1WMfGNCEYY64Bvgpcaq3tieW5RURkfNEcdvpb4DKg2BhTC9yJM6rICzxvjAHYYK39h2jFICIioYtaQrDW3jTG5nuidT4REZkaPaksIiKAEoKIiATFZdjpdNTe00+nb/CEbTneVPIyPXGKSEQkPEoIEdLpG2Td7mMnbFuzoFgJQUSShrqMREQEUEIQEZEgJQQREQGUEEREJEgJQUREACUEEREJUkIQERFACUFERIKUEEREBFBCEBGRICUEEREBlBBERCRICUFERAAlBBERCVJCEBERQAlBRESClBBERARQQhARkSAlBBERAZQQREQkSAlBREQAJQQREQlSQhAREQBSo1WwMeZe4Fqg0Vp7dnBbIfAQUAUcAD5krW2NVgwiIhK6aLYQ7gOuGbXt68CL1tr5wIvB30VEJAFELSFYa9cBLaM2Xw/8MvjzL4EPRuv8IiISnqh1GZ3CDGttQ/DnI8CMUA7y+XzU1NRELIi+vr6IlgfQn5pNw5GGE7Y1F7roPHIwoucZSzTqE0+qT2JTfRLbVOoT64RwnLU2YIwJhLKv1+tl0aJFETt3TU1NRMsDqG3tobzsxOoUFRdRWTAroucZSzTqE0+qT2JTfRLbyPpUV1eHdWysRxkdNcaUAwS/N8b4/CIicgqxTghPAh8L/vwx4IkYn19ERE4hmsNOfwtcBhQbY2qBO4HvAr8zxvwdcBD4ULTOLyIi4YlaQrDW3nSKl66M1jlFRGTy9KSyiIgASggiIhKkhCAiIoASgoiIBCkhiIgIoIQgIiJBSggiIgIoIYiISJASgoiIAEoIIiISpIQgIiKAEoKIiAQpIYiICKCEICIiQUoIIiICKCGIiEiQEoKIiABKCCIiEqSEICIigBKCiIgEKSGIiAighCAiIkFKCCIiAighiIhIkBKCiIgASggiIhKkhCAiIgCkxuOkxpgvAp8AAsDbwO3W2r54xCIiIo6YtxCMMRXA54GV1tqzgRTgI7GOQ0REThSvLqNUIMMYkwpkAvVxikNERIJcgUAg5ic1xtwB/BvQCzxnrf3oePtv27Yt4PV6I3b+vr4+0tPTI1YeQH9qNi/sPDGvrV08E89gV0TPM5Zo1CeeVJ/EpvoktpH16enpqV6xYsXKUI+N+T0EY0wBcD0wB2gDHjbG3Gytvf9Ux3i9XhYtWhSxGGpqaiJaHkBtaw/lZScm16LiIioLZkX0PGOJRn3iSfVJbKpPYhtZn+rq6rCOjUeX0Vpgv7W2yVo7ADwKXBSHOEREZIR4jDI6BFxgjMnE6TK6EtgShzhERGSEmLcQrLUbgUeArThDTt3Az2Idh4iInCguzyFYa+8E7ozHuUVEZGx6UllERAAlBBERCVJCEBERQAlBRESClBBERARQQhARkSAlBBERAZQQREQkSAlBREQAJQQREQlSQhAREUAJQUREgkKa3M4Y8yhwD/BHa60/uiGJiEg8hNpC+DHwt8C7xpjvGmNMFGMSEZE4CKmFYK19AXjBGJMH3BT8+TBwN3B/cOUzERFJYiHfQzDGFAG3AZ8A3gS+D5wLPB+VyEREJKZCvYfwGGCAXwMfsNY2BF96yBij5S9FRKaBUFdMu9ta+8zIDcYYr7XWZ61dGYW4REQkxkLtMvrOGNveiGQgIiISX+O2EIwxZUAFkGGMOQdwBV/KBTKjHJuIiMTQRF1G78O5kVwJ3DVieyfwz1GKSURE4mDchGCt/SXwS2PMDdba38coJhERiYOJuoxuttbeD1QZY740+nVr7V1jHCYiIklooi6jrOD37GgHIiIi8TVRl9FPg9//NTbhiIhIvIT6YNr3cIae9gLPAkuBLwa7k0REZBoI9TmEq621HcC1wAFgHvCVyZ7UGJNvjHnEGLPLGFNjjLlwsmWJiEhkhJoQhlsSfwk8bK1tn+J5vw88a61dCCwDaqZYnoiITFGoU1c8ZYzZhdNl9GljTAnQN5kTBmdMXYPzfAPW2n6gfzJliYhI5ITUQrDWfh24CFgZnOq6G7h+kuecAzQBvzDGvGmM+bkxJmuig0REJLpcgUAgpB2NMRcBVYxoVVhrfxXuCY0xK4ENwGpr7UZjzPeBDmvtt051zLZt2wJerzfcU51SX18f6enpESsPoD81mxd21p+wbe3imXgGuyJ6nrFEoz7xpPokNtUnsY2sT09PT/WKFStCnoA01FFGvwbmAtuAoeDmABB2QgBqgVpr7cbg748AXx/vAK/Xy6JFiyZxqrHV1NREtDyA2tYeystOTK5FxUVUFsyK6HnGEo36xJPqk9hUn8Q2sj7V1dVhHRvqPYSVwGJrbWjNiXFYa48YYw4bY4y11gJXAjunWq6IiExNqAnhHaAMaJhoxxB9DnjAGOMB9gG3R6hcERGZpFATQjGw0xizCfANb7TWXjeZk1prt+G0OkREJEGEmhD+JZpBiIhI/IU67PQVnCeU04I/bwa2RjEuERGJsZASgjHm73FGA/00uKkCeDxaQYmISOyFOnXFZ4DVQAeAtfZdoDRaQYmISOyFmhB8wSkmADDGpOI8hyAiItNEqAnhFWPMPwMZxpirgIeBP0QvLBERibVQE8LXceYfehv4FPAM8M1oBSUiIrEX0rBTa63fGPM48Li1tinKMYmISByMmxCMMS7gTuCzBFsTxpgh4IfW2m9HPzwREYmVibqMvogzuug8a22htbYQOB9YbYz5YtSjExGRmJkoIdwC3GSt3T+8wVq7D7gZuDWagYmISGxNlBDSrLXHRm8M3kdIi05IyaWlu59/eXIHL9YcjXcoIiJTMtFN5fGWttSyl8D/emonj71Zhwu4Y+18SnOmz0IbInJ6mSghLDPGdIyx3QWc9le+9p4Bntpez/XLZ/L09gaqD7Ty/iXl8Q5LRGRSxk0I1tqUWAWSjN7Y18zAUIBbLpjNkfY+dh3pVEIQkaQV6oNpMoath1rxpLpZUpnHsll5NHX56OkfjHdYIiKTooQwBdUHW1lakYc3NYWzK/IAONzSE+eoREQmRwlhkvoH/bxd2865swsAWFSWi9sFB5UQRCRJKSFM0oHmbvqH/CwuzwUgw5NCSY6XI+19cY5MRGRylBAmaU9jFwDzSrOPbyvNSaex03eqQ0REEpoSwiQNJ4QzS7KObyvN9dLa3U//oD9eYYmITJoSwiTtaeyiIj+DTM97I3dLc9IJAMe61EoQkeSjhDBJe5u6mDuiuwigNMcLwNEO3UcQkeSjhDBJh5p7qCrKPGFbUbYHF9DcrVk9RCT5KCFMQkffAJ2+QSryM07Ynup2k5eRRosSgogkISWESahr7QWgoiDjpNcKszw06x6CiCQhJYRJOJ4Q8sdOCGohiEgyCmlN5WgwxqQAW4A6a+218YpjMuraTt1CKMry0N0/hG9gKNZhiYhMSTxbCHcANXE8/6TVtfXiSXVTnOU96bXCbGdbS49aCSKSXOKSEIwxlcBfAj+Px/mnqq61l5l56bjdrpNeK8zyANDcpYQgIsklXl1G/w18FcgJZWefz0dNTeQaE319fVMqb09DC/ke9wll9Kdm03CkgYHgU8oHjxyj+Vg+nUcOTjneiUy1PolG9Ulsqk9im0p9Yp4QjDHXAo3W2mpjzGWhHOP1elm0aFHEYqipqZlSeS2P1nF5VdEJZdS29lBeFgAgY1s7A+50ioqLqCyYNeV4JzLV+iQa1SexqT6JbWR9qqurwzo2Hl1Gq4HrjDEHgAeBK4wx98chjknpGxiiqdNHRX7mKfcpyvboHoKIJJ2YtxCstd8AvgEQbCH8k7X25ljHMVkNwemtxxphNKwg03N8JJKISLLQcwhhGu8ZhGGFWR7aevoZ8gdiFZaIyJTF7TkEAGvty8DL8YwhXHVtzopoleO0EAozPfgD0NTpY3ZR1in3ExFJJGohhKmutRe3C8ry0k+5T0Fw6Gm9uo1EJIkoIYSprq2PGbnppKWc+p+uUAlBRJKQEkKY6tp6mDnO/QOAvIw03C6o0/rKIpJElBDCVNfWO+4NZYAUt4v8TI9aCCKSVJQQwjDkD9DQ1jfukNNhhZkeGtrUQhCR5KGEEIbGzj4G/YEJWwgABVlp1LerhSAiyUMJIQzjLYwzWmGmh7aeAbp8g9EOS0QkIpQQwjD89HFlSC0EZ6TR4ZaeqMYkIhIpSghhGG9hnNGGh54eUkIQkSShhBCGutZeCjLTyPRM/IB3oVoIIpJklBDCUNfWG1LrACAjLYVsb6oSgogkDSWEMDgrpYWWEFwuF+V56eoyEpGkoYQQokAgEFYLAWBmfoYSgogkDSWEELX1DNDTPxTSMwjDZuanc7i1F7+mwRaRJKCEEKLjQ07DbCH0D/o52qknlkUk8SkhhGj45nBlwamXzhxtVjB57G/qjkpMIiKRFNcFcpLJ4VYnIZxRFHpCGG5NbD7Qcvy4HG8qeZmeCY9t7+mnc4ynnEM9XkQkXEoIITrU0kNeRhq56WkhH5PpSSU9zc3re5spyXEW1FmzoDikC3qnb5B1u4+dtD3U40VEwqUuoxAdbullVmHo9w/AGXpaku2lsdMXpahERCJHCSFEh1t7mBXG/YNhpbnpNCkhiEgSUEIIgd8foLa1l1mFk0gIOV66fIP09GvWUxFJbEoIIWjq8tE/6D8+aigcJTlepwy1EkQkwSkhhOD4kNNJtBBmBG8mH+nQswgiktiUEEIwPOR0MvcQ8jPTyEhLOb64johIolJCCMH+pm7cLsIeZQTOSKPKggxqlRBEJMEpIYRg77FuZhVm4k1NmdTxFQUZNHb20T/oj3BkIiKRowfTQrC3sYu5JdmTPr4yPxN/ABra1UqImZ5W6O+c/PGeHMgsiFw8Ikkg5gnBGDML+BUwAwgAP7PWfj/WcYTK7w+w/1g3F88rnnQZw1NYqNsohvo7Yc+Lkz9+3pVKCHLaiUeX0SDwZWvtYuAC4DPGmMVxiCMkdW29+Ab9zC2dfAshNyON3PRUrY0gIgkt5gnBWttgrd0a/LkTqAEqYh1HqPY2dQFwZnHWlMo5sySbfce6CQS0NoKIJKa43kMwxlQB5wAbx9vP5/NRU1MTsfP29fWFVJ7bm8VrO5udY7o6eOvdHtJTAvh9J09n3Z+aTcORhhO2LSz2HN9W7Blkm2+Qre/W0ZU7NOG5xyoPoLnQReeRg5OqT7KIRH1K0voYOHJk0sen5TfT1BCZacv1/iQ21ec9cUsIxphs4PfAF6y1HePt6/V6WbRoUcTOXVNTE1J5ta09bG+sIz3NTW2Pm7reAGsWFFNZcMaY+5aXnfjpPyMzk/Kycufn3H5e2GvZ3ebn+vNDO/fo8gCKiouoLJg1qfoki4jUp+0QlJVN/vjcdIrdwZltc8rAO/kuQ70/iW0616e6ujqsY+OSEIwxaTjJ4AFr7aPxiCFUR9r7mJGTjsvlmlI5+ZkeyvPSWbf7GF95X4SCk8jqaoSD6+HoDugZNfV4TjlUrIC5l8OC90NewvZyikxaPEYZuYB7gBpr7V2xPn84hvwBjrT3sWJ2ZEabnDUzlxdrGmns6KM0Nz0iZUoE9HfBziehdjO43VBs4JyPQulZzusdtdC020kWu56Cp/8Jqi6GpR+Cs/4KvDnxjV8kQuLRQlgN3AK8bYzZFtz2z9baZ+IQy7jqWnvpH/IzMz8yF+8lFfm8UNPIY2/W8alL50akTJmiJgtv/hoGeuDMy50WgDfHGXaaP6prMBCA5j2w4zF460F48nPwp/8B59wMqz4JhXPiUweRCIl5QrDWvgZMrf8lRnYfdR5smpkf/pQVYynJ8bKsMo/fbjrE319yJm53UvwzTF8H18Pbj0D2DLjgHyF35vj7u1xQPB8u/Sqs+YrTotj4U9j0M9jw/2DhX8Jl34Cys2MTv0iEaeqKcew+2kmK20VpTuS6dz54TgUHmnt4bufkR8BIBBxcD28/DKWL4OIvTpwMRnO5YNYquPEe+MLbcMmX4cCr8JOL4fefgI766MQtEkWaumIcu492MSPXS0oEP8lfvrCEX2/I4q7nd3PlohmkpSgnx9zBN4LJYDGs+DikTPHPIHcmXPktuOizsP4HsOHHYJ+FNf8EZ32QkrQ+Z9TTZGgKDYkhJYRT8PsD2KOdmBmRvWGY6nbz9WsW8slfV/PjP+/ljrXzI1q+TODQBnj7ocglg5EyCmDtnXDuLfD0l+GFO+GtBxmsuBoqJ3l/QVNoSAzp4+kp7G7spLNvkKopPqE8lqvPKuODy2fy3y/u5uEthyNevpzCoQ2w/SEoWQgrbo9sMhip8Ey4+VG4/JvQvJuSHfdCe210ziUSQWohnMKm/S0AzCmKfEIA+O4NS2ns9PGVR7bzh+0NrJ5bxBmFmZTmellcnheVc57WDm8MJoMFsPLvICUtuudzuWDZh2GgFzbdDa//EFZ+HEpMdM8rMgVKCKewcX8LpTle8jOjc+FIT0vhlx9fxd2v7uOBDYdYt7vp+GtZnhQ+dlEVM/MzcE/xgTgBajc5w0SLY5QMRsqfxbGFtzBj/6POaKRzb4XyZbE7v0gYlBDGEAgE2LS/hXNm5U/5CeXxpKW4+cfL5vGPl82jraef+rY+6tt6eXDzYX788l4uOLOI65aFOfpFTnR4E7z1W2e46Hl/BymemIfg9+TARZ9zEsLWXzkthRlnxTyO0QaGBuge6MY35KPf309GagY5nhy8Kd54hyZxooQwhr1N3TR1+lg2Kz9m58zP9JCf6WHxzFyuXFTK1x99m4c2H2ZWQQbnnKGbipOyfx3seNRpGZz3ibgkg+PSMmHVp5wRSNW/cB5kK14Qs9MP+AfYcmQLW45uYXvTdva376exp5EAJ8+X5U3xUpZVRmV2JRXZFVTmVDI7dzZz8uZQmVMZs5hHave10z0QmckGR3Nn6lbqMCWEMfx5VyMA588pZPfRrpif3+Vy8Y+XzWXD3mae2t7AwrJcMjyTW74zGRWkM/lhmgD+IWf4545HoWwpnHNr9G4ghyMtA87/B3jjR7D5587DcAVVUT3lzuad/M7+jucPPk9HfwcprhQWFCzg/PLzqciuINeTizfVi8ftoW+wj86BTtp97dR31VPXVceO5h20+dqOl5fqSqXUW8qChgXMyZvDnNw5VOVVMSd3Dvnp0fsA1T3Qzfr69VEp23h0X2dYAvyVJJ6XdjViZuRQlpcel4QAkOJ2cd3ymfzopT28sruRa84uj0sc8ZA61Ad7Nkzu4P5uZyqKpl0w6wJY8jfgTqBk6smC8z8Nr//A6UJafYfzpHSEbT26lR+8+QOqj1aTkZrB2jPWsnb2Wi4ov4DMtMywyuro7+Bg+0H2d+znQPsB3qp9i9rOWtbXrWfAP3B8v3xvPnPy5lCVW3X8e1VeFZU5laS5Y3jfRiZNCWGUjr4BNh9o4ROXnBnvUCjPy+CcM/J5fW8zF80rJjddf1SnFAhAw5uw43EY6IY1X3OmrU7Em/LpuU5LYf1/w8afwEV3QMaJn67b82bSjR8Gu6Er9KeeD7Qf4J537mHTkU0UphfyqaWf4urZV5PtcabvbvO1MeAfIM8b+ki2XE8uS0qWsKRkCQA1Gc70ykP+Ieq76tnfsZ/97c7XgY4DvFL7Co/teez48amuVEozSylML6Qoo4jC9EIK0wvJ9+aT580jz5tHvjeffG8+ud5c8rx5SiBxooQwyqu7jzHoD3DFwtJ4hwLA5aaUNw+18cbeZt531hTm95+uhvqhYTvsf9kZ659XCav+HhZf58w1lKiyiuH8T8HrP4JNP3VuOo/45N6Nn/X7n4PuhZBZOGFxg/5B1tWu4/X61/G4PVxxxhWsKluFJ8XDW8feOmHf1TNXh5UQTiXFncKs3FnMyp3Fmso1J7zW7mvnQMcBDrQfOH6/oqWvhcaeRmqaa2jpa2EwMHjKsrPSso4niYrsCgrSC+gd7GVm9kyKM4pxu9TvHw1KCKM8/XY9xdleVswuoKG9N97hUJTt5ayZuWzc38xlpiTe4cRXf5czR1BH3XvfO49CYAgyi2HZTVCxMrG6iMaTN8sZcbTpZ7D5HqfVMIkhsQc7DvLU3qdo7mtmaclSrp599bjdQoP+QerDaHWMNpAxMObxWWlZxxNNnjePZSXLWFYy9hDbQCBA90A3bb422vvbae9rp72/3fnd1378q6WvhV0tu6jtqsUf8AOQ5k6jIruCM/PPZF7+PGZkzojqaMDTiRLCCF2+QV6saeQj582K6PxFU3XJ/BLeqe9gy4FWrloc+f7mhBTwQ9tBaN4LLfudT/++9vde9+Y6cwiVLHKePC6aC8n4qbHEwPKPwpu/cu59rLgt5Hr0D/XzwsEX2HJ0C/nefD666KPMzZ94WvW+wb6TWg3hONJwhLL+k1ur4bQ8XC4X2Z5ssj3ZVDLxyKVDHYd4ev/T1HfVU99Vz8GOg7x06CVeOvQSuZ5czi4+m6UlSynNTIyWfbJSQhjhhZ1H8Q36uTbBxv7PKsxkdlEm6/ceY9Dvj3c40dXdTOZbv4A9f4Be52lxskud5whyK5wkkFsxpSUtQzI0OLWRTgADfaHtV3Eu9Hc66yy88wic/TcTHnKo4xBP7HmCVl8r55efz+WzLscTz2G1UZbqTqU4o5jijGKWliwFoLO/k71te6lpruGN+jd4vf51yrPKWVW2irOKzyLVrctbuPQvNsLvt9YyMy+dFQk47v+SeSXcv/Egf97VxO2ro3wxjIeBXmd6h/U/IKO/yxmjb/4CSheCJw71HeiB/VO8B1F5Xuj7zrkU+jpg74tO6+e828bcbdA/yMuHX+aN+jfI8+Zx6+JbqcqrmlqcETLVrqjx9A2enFxzPDksL13O8tLldA90s+PYDqqPVvPE3id44dALrJixgpUzVh6/oS4TU0IIOtzSw2t7jvH5K+Yn5MI1C8tzKMnxcv+Gg9x2UdX06jOtq4bH/gGO7YaF19I27wYKaJ/4uOlm4bXg64Tdz5KZX3nSX+eR7iM8/u7jNPY2cm7puVxVdVVCPVU81a6o8SwrHn+6j6y0LFaVr+K8svPY176PTQ2bnJvsda+zYsYKLqq4iByPljqdiBJC0EObD+MCPnTerHiHMia3y8Wa+SX8fmstL+9u4nIzDfpKAwHnIa3n73SGiN7yGMy9gqE9b0LbtomPn25cLlj6YejvIm/zPVQsuJy60oUMDA3wWt1rrK9fT2ZqJjctvIn5BZo2fSwul4u5+XOZmz+X5t5mXqt7jU1HNrHl6BYnMcy8iFxvbrzDTFhKCMDgkJ+Hqw9z6YISKiK0XGY0LJuVx6vvNvGjl/Zw2YKS5G4lDPrgqS/Btvth0XVw3Q9PGot/WnKnwLkfY2DLz7nA/plfpqXwi8FGWn2tLClewvuq3hf2g2Wnq6KMIq6fdz2XVF7Ca7WvseXoFqqPVnPujHO5uOJitRjGoIQAPL/zKEc7fHz7+jMm3jmOUt1ubr1oNv/nT7v5046jXHN2kj6X0H0MHroZDr0Bl34dLv0auJNwhFC0pHrZdMHHuXfbj9ncbZnp9nLtopuZkx//hyWTUWF6IdfNu85JDHWvUX20mq1Ht7JixgpWV6yG6XsvPmynfUIIBAL85JW9zC7KZO2ixB/See3Sch5/s57v/rGGy0wJ6WlJMuZ+2NGd8NsPQ1cj3HgvnH1DvCNKKDt6j3LfsS0822FJ96Rx26CHz+3fRZPvGTZd8HEGPWodTFZBegEfmPsBLq64mNfqXmPzkc1sPbqVtTPX8tXSr1KSeZo/54NWTGPDvhbeqm3n7y85M6GePTiVVLebOz+wmAPNPXzvWRvvcMJjn4V7roLBfrj9GSWDoF7/AH9s38Ut+x7kI/se4JWufdxUeQWfL13NGefczs5zP0x53dtc9cdvU3pkZ7zDTXrDieGz53yWs4vP5rm653j/o+/ne5u/x7HeY/EOL65O6xZCIBDg/zxnKcnxcuOK+EzrOxmXzC/hYxfO5t71+1kxu4AzE33al0AAXvsvePHbzuIwN/3WeZ7gNNY22MvG7sO80PEur3Tto9c/wCxPHl8tu4zr8xfTVVjlTF3hcvHuwqtoKZrDeRt+waUv3cWBORfyztIP0ptVFO9qJLWC9AKum3cdt8y5hXXN6/hNzW942D7Mh8yHuHXxrczISvweg0g7rRPCn3YcofpgK//7r5ckXdfLN/5iETvqO/jCQ2/ylcsqySnrOeH1VDcMjnqGLcebSl5mjDtMfV3whzuCD1zdCNf/yJkGeho4PgHdKQ1BXhnNrnR2uTo50HOEfT0NvNOxj73dDQQIkJeaxeUl57KmaCnL8ubidrnpAvq8J97wbC6Zx3Pvv5PF7zzFgl3PMevgZvbPvYR3zVq6ck+/C1cklWWU8Z2Lv8Mnl36Sn27/KffX3M9van7D1VVXc8viWzi7+Ox4hxgzp21C6Ogb4H89VcP80mz+JolaB8PS01K49/bzuO3eTfz7i3+pUYoAAA5LSURBVId59t1Orlg4g8Is54J/zhn5vHmo7YRj1iwojm1CqKuG33/CmXriyv8JF38pMWcfnaTjE9AF+QMBWod6ODrQReNgF40uvzOp29B7c2Kl4qbCk8el2XOo8hZSmZaL2+Wmp3U/b7TuP77fsoV/fdL5/Kke3ln+1+ydfymL33maM/esY967f+ZI2WIOz15FfeVy+qP9BPc0dkbuGfzbxf/Gp5d9mgdqHuCxPY/xzP5nWF6ynBsW3DDhHFHTwWmZEAKBAP/yxA4a2nt55NMXkZqSnLdSctPTePCTF/L1323hiXeOsfVQG5UFGVTkZ7C3qYtjXT48KW7S01LISEthXmkWFfkZ0R+u6uuEV77nrA6WPQNuewqqLo7uOWMoEAjQPNhDddtu3ug66Fz8B7toGuhmMNhicAGFnjxmpOUw313IGVlFlKZlU5AS5jrZfj/0tJywqdflonrJteyYv4Y5BzYx5+Bmztt4H/5Nbo4VzeFoyTyaiufSUlBJwJMNuhEdlsqcSr626mt8ZvlneGzPYzy460G+tf5b/PvGf+eq2VfxgbkfYOWMldNyaoy41MgYcw3wfSAF+Lm19ruxPP9vtrfx6LZW7rhyPucm4DQV4fCkuvnYeeXMn1nE1kOt7D7SyVu1bWzc33LSvj97dR856alceGYRVy2ewdpFMyjIimCLwdcFb97v3C/oOgLLb4b3fQcykvPfePjCv9fXzB5fM/tGfG8bem8qhSy3hxmp2azMqqQ0NZvStGxKUrNIK5wLrQdo7+ggL2OSD0MN9cOxA2O+1AfUFFZSU1BBfnczlc0HKG89zJKaPwEw6E6ltWAWrcXzaCmqorVwNl050+CBxhjJ9mRzy+JbuHnRzWxr2sYTe57g2QPP8uTeJ8nx5HBxxcVcWnkpq2eujupqcbEU84RgjEkB/i9wFVALbDbGPGmtjfrwid7+Ib7z9E4e2NbKDedWcseV0+dpz7yMNC43pVxuSgkEAiwsz2HLgVZ8g376BoboHRiiONtLbWsPL9smntt5lFS3i0vmF3Pt0plcfdYMciazAI9/yHmeYMfj8PbD0NcGZ1wIH/kNVK6IfEWjoH2oj9r+dur626kdaOdwfxv76p5mb+dB2kdc+HPcXualF7E2dz5zvUUUFi2gsWkXWfGeVM7loi27mLbsYt6ZvRLPQB/FHUcp6Wig0NfN3D0vs8A6K5sNpGUwULoYd2YuHXnldOTNpCO3HF967rTqzoskl8vFOaXncE7pOXxt1dd4re411tWuY13tOv64/48AzMmbw9LipSwtWcpZxWdRlVtFVlpWnCMPXzxaCKuAPdbafQDGmAeB64GoJITmLh/vNnbx2rvHeHDzIY519XPjWXl894YlCTlnUSS4XC4yPankpKcx8tbkmgXFVBZkEggEeKeug6ferueptxr48sNv4XnMzZr5xaysKmRJRR4V+RmU5aU7N9sHeqG3zbnYdzVC635o2ecsTFO7xZmpMzUdFlwDF34GZq2KeZ39gQD9gUF6/YP0+Pvp7TpMT08Dvf4BegMDtA320TLUQ8tg8Guoh+bBHur6O+j0+04oKz8lnTNzZnN17gLmeguZ6y1mrreQ4tSsE7rb6vPKWN+yL9ZVnVB/Wjr1RbOpL5oNpQtxpeeR215PQctBCloOUtF5jNkHNpA28N69jX5PJl3ZpfRkFdKTWUhvZgG9GQX0ezIZ8GQe/z6Qmo4rMM1n3B1HRmoGV82+iqtmX4U/4GfHsR1saNjA9qbtrKtdxxN7nzi+b2lGKVV5VVRkVxyfqbU4o5h8bz5ZaVlkpmWSlZZFVloWnhQPqa7UuM8+EI+EUAEcHvF7LXB+NE70u82H+ervtwPgdsHF80v4/BXzyOo9mrT3DSLB5XKxpDKPJZV5fP2ahWw91MYf3qrnpV2NvFDTeMK+T3i/xTLX3pMLSfE4M5Iu/ZBzf2D+1RGdkvpwfxufP/QEnUM+/AQYCvgZIoA/EGAIf/B7AH9w+0neHbvcdFcqRamZFKZmUpaWw/LMmVSm5VPpyaXSk09FWi7ZKV5nptJEXnEtDAF3Cu0Fs2gvmMWBuRfjL17GW03bSO9tJ7ejntz2enLbG8jqaiK3vZ6yhh2kDvrGL9Plwu9ODX6l4HenkJripcydwvo1n6ErN0mfog+D2+U+YWnRQCDA4c7D2FbLwY6Dx5cUXV+3nua+ZoYCQxOWmepKJdX93ldBegF3X3V3zIbAugKBMf6YosgYcyNwjbX2E8HfbwHOt9Z+9lTHVFdXNwEHYxSiiMh0MXvFihUhP4IdjxZCHTByStHK4LZTCqdCIiIyOfFICJuB+caYOTiJ4CPA38YhDhERGSHmHenW2kHgs8CfgBrgd9baHbGOQ0REThTzewgiIpKYTt+hNiIicgIlBBERAab5XEYTTZFhjPECvwJWAM3Ah621B2IdZ6hCqM9twH/y3qitH1lrfx7TIENkjLkXuBZotNaeNJ2kMcaFU9e/AHqA26y1W2MbZehCqM9lwBPA8Ax2j1prvx27CMNjjJmF87cxAwgAP7PWfn/UPknzHoVYn8tIkvfIGJMOrAO8ONfxR6y1d47aJ+zr27RtIYyYIuP9wGLgJmPM4lG7/R3Qaq2dB/wX8B+xjTJ0IdYH4CFr7fLgV0Img6D7gGvGef39wPzg1yeB/xeDmKbiPsavD8CrI96bhLzQjDAIfNlauxi4APjMGP/fkuk9CqU+kDzvkQ+4wlq7DFgOXGOMuWDUPmFf36ZtQmDEFBnW2n5geIqMka4Hfhn8+RHgyuCnnkQUSn2ShrV2HXDyDHzvuR74lbU2YK3dAOQbY8pjE134QqhPUrHWNgx/2rfWduKMCKwYtVvSvEch1idpBP/Nu4K/pgW/Ro8QCvv6Np0TwlhTZIz+D3B8n+Bw2HYgUZehCqU+ADcYY7YbYx4JNpOTVaj1TSYXGmPeMsb80RhzVryDCZUxpgo4B9g46qWkfI/GqQ8k0XtkjEkxxmwDGoHnrbWnfH9Cvb5N54RwOvoDUGWtXQo8z3ufDiT+tgKzg038HwKPxzmekBhjsoHfA1+w1nbEO56pmqA+SfUeWWuHrLXLcWZ7WGWMmfLSbtM5IYQyRcbxfYwxqUAezs2XRDRhfay1zdba4VnJfo5zMylZhT3FSSKz1nYMN/Gttc8AacaY4jiHNS5jTBrOxfMBa+2jY+ySVO/RRPVJxvcIwFrbBvyZk+9hhX19m84J4fgUGcYYD84UGU+O2udJ4GPBn28EXrLWJuqTehPWZ1T/7XU4/aTJ6kngVmOMK3izrN1a2xDvoCbLGFM23H9rjFmF87eXqB8+hkcQ3QPUWGvvOsVuSfMehVKfZHqPjDElxpj84M8ZOOvL7Bq1W9jXt2k77NRaO2iMGZ4iIwW411q7wxjzbWCLtfZJnP8gvzbG7MG5IfiR+EU8vhDr83ljzHU4IypagNviFvAEjDG/BS4Dio0xtcCdODfGsNb+BHgGZzjjHpwhjbfHJ9LQhFCfG4FPG2MGgV7gIwn84QNgNXAL8Hawnxrgn4EzICnfo1Dqk0zvUTnwy+DoQzfOFEBPTfX6pqkrREQEmN5dRiIiEgYlBBERAZQQREQkSAlBREQAJQQREQlSQhAREUAJQaYJY8xtxpiZIex3nzHmximc59vGmLVjbL/MGPPUiJ8vmuw5jTHlw2VNRfDhpWenWo6cPpQQZLq4DZgwIUyVtfZ/WmtfmGC3y4CLJthnPF8C7p7C8QBYa5uABmPM6qmWJaeHafuksiS34IyUzwLVwLnADuBWYBFwF5ANHMNJBKuBlcADxphe4ELgK8AHgAzgdeBTEz11aow5D/iGtfavjTHX40wxnofzwWmntfZMY8x9wFPW2keCCxb9N85Tuq+NiPsfgCFjzM3A54LFrzHGfAkoA75qrX1knFBuAL4ZLC8FZx77awA/cLe19ofGmAPAb3HWJBjEWY/gfwPzgP8MPnkLzgRtHwXWj1d3EVALQRKbAX5srV0EdACfwZmF8kZr7QrgXuDfghfXLcBHgwub9OKsFndecPWyDJzVzCbyJs5iIwCXAO8A5wHnM2qq5OCKVXfjJJ0VOBd6gitS/QT4r2AsrwYPKQcuDsZxwkp3o8qdg7OoyfAkhZ8EqoDlwVlsHxix+6HgbJev4izQcyPO4i//OmKfLcG6iExICUES2WFr7fAn2/uB9wFnA88H56P5Js4Mm2O53Biz0RjzNnAFMOHc9sE54/caYxbhLEh0F7AG54L66qjdFwL7rbXvBlse909Q/OPWWr+1difOMo6nUg40jfh9LfDTYGxYa0cuwjM8ueHbwEZrbWewm8g3PPEZzlz5Ue9Kk+lBXUaSyEZ38XQCO6y1F453UPDT+4+Bldbaw8aYfwHSQzznOpxumAHgBZxP3ik4XVBT4Rvx83irVvUSeqzDZfpHle/nvb/t9GCZIhNSC0ES2RnGmOGL/98CG4CS4W3GmLQRq1p1AjnBn4cvqMeCC6KEM6roVeALwBvBT9tFOF1X74zabxdQZYyZG/z9phGvjYwlXLtxuoiGPQ98KjifPcaYwjDLW8DJsYuMSQlBEpnFWQy9BiggeP8A+A9jzFvANt4bzXMf8JNgV5IPp3//HZzpwjeHcc6NOF0664K/bwfeHn1D2lrbh9O//7QxZitO18ywPwB/ZYzZZowJq//eWtuN0201L7jp58AhYHuwzn8bTnnA5cDTYR4jpylNfy0JKTha56ngTeHTijHmr4AV1tpvRqCsdcD11trWqUcm051aCCIJxlr7GHBgquUYY0qAu5QMJFRqIchpyRjzGDBn1OavWWv/FKPzLwF+PWqzz1p7fizOLzIWJQQREQHUZSQiIkFKCCIiAighiIhIkBKCiIgASggiIhL0/wETzoODnpeWFwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light", + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "for target in targets:\n", + " sns.distplot(df[df.target==target]['petal_width_(cm)'],kde=True,kde_kws={\"label\":targets[target]})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "48op7eXwzKb4" + }, + "source": [ + "Строим точечные графики взаимного влияния параметров" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "executionInfo": { + "elapsed": 1461, + "status": "ok", + "timestamp": 1614767506092, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "hKh-KV27whqi", + "outputId": "b6d7f703-3029-4c58-e075-ba67e8307bcf" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "g = sns.FacetGrid(df, hue='target')\n", + "g.map(plt.scatter, 'sepal_length_(cm)', 'sepal_width_(cm)');\n", + "g.add_legend();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "executionInfo": { + "elapsed": 1523, + "status": "ok", + "timestamp": 1614767511831, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ArJjRTF6ySuO", + "outputId": "a857d46d-33d9-417c-8a78-e3b3cf5dcbd9" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "g = sns.FacetGrid(df, hue='target')\n", + "g.map(plt.scatter, 'petal_length_(cm)', 'petal_width_(cm)');\n", + "g.add_legend();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "executionInfo": { + "elapsed": 1157, + "status": "ok", + "timestamp": 1614767516166, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "5KszRYQ0yaaV", + "outputId": "ffcba165-ccb3-4f61-e7ae-c413017c2e8c" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "g = sns.FacetGrid(df, hue='target')\n", + "g.map(plt.scatter, 'petal_length_(cm)', 'sepal_width_(cm)');\n", + "g.add_legend();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "executionInfo": { + "elapsed": 1668, + "status": "ok", + "timestamp": 1614767522475, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "SE3c3sS0yfgl", + "outputId": "ee44eed9-08f4-495a-e122-a0910757f984" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPcAAADQCAYAAADbJffdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO2de1hU5fbHvzPDNUAUjoB3RcUMRcVLmKaCgD6OgqKmHrU0rTRNyn4qaCni/VIntJ5O5ck0jbyhmHiOJOAly0uKkIqGEcZFEEUBEZCZeX9/TIwzw57Zey6b2TO8n+fx0b33e1kzsNx7f9/1riUihBBQKBSbQ2xpAygUCj9Q56ZQbBTq3BSKjUKdm0KxUahzUyg2ilU6d25urqVNUJGfn29pE3RCbTMOIdtmCFbp3DKZzNImqKipqbG0CTqhthmHkG0zBKt0bgqFwg51bgrFRqHOTaHYKLw69927dzFz5kyMGTMGUqkUu3btatTmwoUL6N+/PyIjIxEZGYlPP/2UT5MoDKTkpSD8YDgCdgUg/GA4UvJSmqRvU4zXnLHjc3CJRIKYmBj4+/vj8ePHmDhxIoYMGYJu3bpptBswYAC++OILPk2h6CAlLwVxP8ehVl4LALhbfRdxP8cBAKS+Ut76NsV4zR1e79xeXl7w9/cHALi6usLX1xelpaV8TkkxkIQrCSpnaqBWXouEKwm89m2K8Zo7vN651SksLEROTg769OnT6NrVq1cREREBLy8vLFu2DN27d9c7Vl1dHXJycvgy1SBqa2sFY4s2XGwrqS7ReZ7Pvky2mTKeOVG3rWfPnk02r7lpEueurq7GokWLsHz5cri6umpc8/f3R3p6OlxcXHD69GksWLAAqampesdzdHQUzJeek5MjGFu04WKbz3Uf3K2+2/i8iw+vfZlsM2U8cyLkn6kh8K6W19fXY9GiRRg3bhzCw8MbXXd1dYWLiwsAYPjw4ZDJZCgvL+fbLMrfRAdGw0nipHHOSeKE6MBoXvs2xXjNHV6dmxCCFStWwNfXF7Nnz2ZsU1ZWhoZ8EdnZ2VAoFGjVqhWfZlk95lSUpb5SRHaLhFik/FUQi8SI7BbJScAypa+u8eJeikMblzYQQYQ2Lm0Q91IcFdOMhNfH8suXLyM5ORl+fn6IjIwEACxevBjFxcUAgGnTpuHEiRNITEyERCKBk5MTPv74Y4hEIj7Nsmr4UKiTbydDQRQAAAVRIPl2Mvp59eOklhvbVxdSXyl1ZjMhssY0S0J6J2pqW8IPhjO+l7ZxaYPUSZpaBRfbDBnPnH2F9DPURsi2GQKNULMy9CnKTT2euW2hmBfq3FaGj4uPQef5HM/ctlDMC3VuK0NICjVVt4VNkwWxUMxDg9iUcCUBJdUl8HHxQXRgtEEiVEpeikb/yG6ROFN4BiXVJXB3dAchBLFnY5FwJUHv2FJfKTLvZeLA7wegIAqIRWL0bd0XCVcSEHs2Fj4uPujk1gkXSy+qrk/2m4wPgj4wy3fBxtrzazVsa8q5hQB1bivEFEWZSW1Pvp2MuJfiAMAgJZ5JLT9fcl51/W71XQ3BTUEU2HdrHwBgovtEo+znytrza1Vzac/dXBycPpY3M/TFbxsa283UngsHfj9gcB9zzdEUcwsFeuduZhijcJtbFW+40/OJrjmaYm6hQO/czQx9Creh6rexqnhDRBuf6JqjKeYWCs3nk1IA6Fe4DVW/mdpzYbLfZIP7mGuOpphbKFDnbmboi99mihVvUL+Z4tiZxprSY4rGcZBPkMZ4U3pMaRJB64OgDxDkE6RxLsgnqNmIaQB9526W6FLbuajf2uq5UGPBU/JScLXsqsa5q2VXkZKXIkh7+YDeuSkquKjf1pIZhWZ1oc5NUYOr+m0NseM07p06N0UNruq3NcSO07h36twUNbio39YSO07j3qmgRlGDKVZ8kPcg3Km6ozOOXTtOfVj7Yao4daZjtv6Gxsmroy9m3tSxrRHq3BQVTGr51bKrOlMdMcWpq8dzMx03qO2+8DVrVhl9MfPNyaHVoY/lFBVNEVuuPp45FW2qjjeGOjdFhaEKs6nZX8w5LlXHG0Odm6KiqWLLG/qZc1yqjjeGOjdFRVPElquPZ05Fm6rjjbF4lU9CCNauXYuwsDCMGzcO169f59Mkih4MzRvOJbZc+1h9PHPmKac5zxkgPFJaWkquXbtGCCGkqqqKhIeHk9zcXI02p06dInPmzCEKhYJkZmaSSZMmsY5748YNXuw1Bj5sOfbHMRJ2IIz0/qY3CTsQRtb8skbj+Ngfx5rMtjW/rCEBuwJIr296kYBdAWTNL2tMHpOQpvkZan+P2t+brutC+v0yBV6Xwry8vODl5QVAs8qnegnftLQ0jB8/HiKRCH379kVlZSXu3bun6tfc4LK81FRlba05VRHbMpu+677wtYTJZqfJ3rl1VfksLS2Fj88z0cPHx6dZl/kV0uYNa05VxLY01hyWzixe5dMYbLmEryGbN4wpk2sI+lIVmfqZ+S59zFYOWN91WsKXI2xVPr29vVFS8uyLLikpgbe3t94xbbmEr64yto3aGVkm1xDEl8SMDi4WiU3+zHyX7GErB6zvupOTk2B+v0zB4lU+Q0JCcOTIERBCcPXqVbi5uTXb921AWJs3rDlVEdvSWHNYOuN0587MzMTRo0fx66+/oqysDE5OTujevTtGjBiBiIgIuLm5MfbjUuVz+PDhOH36NMLCwuDs7Iz169eb6aNZJ0xFB7Q3X3Ry64TlPy1HzNkYsyTbV99w0VCUoPJpJXxcfBDkE2SxogJsturbDMJWvEHfdaG88pkKa5XPuXPnwsvLCyNHjkSvXr3g6emJuro65Ofn48KFC8jIyMCsWbMwcuTIprJZUFUYm9oWbQW7AabcZFxs01aNtXGSOPGyXmzM98ZkKx/2Cen3yxRYnbu8vBweHh56B+HSxpwI6ctvalv67O6j8z0469UsjXOmlPBVh0tJXkMx5nszpWSwIQjp98sUWB/LtZ328ePHkMlkquOWLVs2qWM3d8ydbN+UUr1NDd0cYhic1fLvv/8e27dvh6Ojo+qcSCRCWloaL4ZRmBGLdCvYxuDjwq7OC2XzhS5bhWKf0OD8G/H111/jhx9+QHp6uuoPdeymx9wKNps6LyQFuTko3OaE8527Q4cOcHZ25tMWCgcaRDONVEjPdcSZG98j4Ob38FEAw7wG4ExtsVIFvs6e+qidSzv8UflHo7mYSvLymSaJaTxtW5t76iRDYBXUGrhx4wZiY2PRp08fODg4qM5/8EHTL4sISfCwtC0ppz5E3J+HUSsWPTtJCCAS6e5kAurqtCnqNdP3xqbcGzK+KVj6Z2ouOD+Wr1y5EkFBQejTpw/8/f1VfyiWJSFPy7EB3hwb4Dc+W0hx9bYA58dymUyG2NhYPm2hGEGJBdJt8JEmyZB+VB3nBudfjWHDhmHfvn24d+8eHj16pPpDsSw+Fig3zUeaJEP6UXWcG5yd+9ixY/jiiy8wdepUREVFISoqChMnTuTTNgoHon0nwEmhJZtwk1GMgs/4bCHF1dsCnB/L09PT+bRD8JhbFTYX0hFrACjfvUvEaKyWcygU0Mmtkyp+HACes3sONbIa1qICUl8p8Nd5jbmjO402+nvhElcvlO/dGuCslu/duxfjxo1DixYtAAAVFRU4duwYpk+fzquBTDS1mqlPFfat8xWsssr795S9H/hhEVBf8+ycvTMwbhsQ8IplbTMBIdtmCJwfy/fv369ybABwd3fHgQPCz8hhDppD1g6jSIvXdGxAeZwWbxl7KBpwdm6FQgH1m7xcLkd9fT0vRgkNGtOsg4pCw85TmhTO79xDhw7Fu+++i6lTpwJQxpq//PLLvBkmJGhMsw7c2wMVBcznKRaH8517yZIlCAoKQmJiIhITEzF48GAsWbKET9sEA41p1sHIlcp3bHXsnZXnKRaH851bLBZj2rRpmDZtGp/2CBKry9pxbDFw+Rs8T+SASAL0nwWM/Vh3++z9yvfkikLlXXfkSlZBDMCzNsb01YFQVyWsEVbnnjdvHl555RW8/PLLsLe317hWUFCApKQktGvXDpMmTeLNSCEg9ZVaxy/ZscXAr/8BAIgAgMhVx4wOrq14VxQojwHuDm6CM6tjzpK+FA5LYWVlZdi5cydSU1Ph7u4ODw8P1NXVobCwEJ06dcL06dMRGhraVPYCENZShZBsAQCs9lA6tDYiCbCqvPH5f/XS8d7cAXjvmvnt+xum762pMq2wIbifqZGw3rlbt26NpUuXYunSpSgsLFQlSOzcuTPdAipEmBxb33kBKd50VcK8GLTtoH379ujXrx969uzZyLGnTJliVsMoRiKSGHZel7JtAcWbluE1L2bbU1RXV2euoSim0H+WYecFpHjTVQnzYraKIyKGPcSxsbE4deoUPD09cezYsUbXL1y4gLfffhvt2yvvEmFhYVi4cKG5TBIubOq0oeq1VvsUn65IsKtBiZ0EPjI5ou19IFUX07Tbt++JBFkJSiQS+MjliK6ogTTpTWWb7uFAbqrxthrwWdhyjVMMg9dyQlFRUZgxYwaWLVums82AAQPwxRdf8GmGsGBTpw1Vr7Xap8geIM7RA7Vi5Y/2rr0d4hRlwKkPlZtMmNorCGrt/m5vZ4e4Vi6AvA7SioJnSrsxtuq7bt+b8euxmlUJK8Bsj+VMovvAgQPh7u5urilsA7Z4bEPjtbXaJ7RqiVqx5o+1VixCQt5hA9qLkdCqJfN8hthKY88titnu3Js3bzaq39WrVxEREQEvLy8sW7YM3bt3Z+1jzVU+n68oBFMSJFJRiJs5OazX2cYrsWMWzkrEyiUezu11nDfEVn3X+a7yaQrNrspnamoqtm7digcPHoAQAkIIRCIRrly5AgDw8/MzeHJ/f3+kp6fDxcUFp0+fxoIFC5Cayr6eadVVPnXEY4vc2yvHYbvOMp6PTI679o1/rD4KMI6vs71Mx9KZIbbquS7kSpq2ss7N+bF8y5Yt+Pzzz3H58mVcuXIFmZmZKsc2FldXV7i4uAAAhg8fDplMhvJyhkALW4JNnTZUvdZqH/3wEZwUmrmXnBQE0b4TDGivQPRDHSm0DLFVQEq8uamsrMTevXt5n+fkyZO4ffu2UX05O7enpye6du1q1CS6KCsrU72rZ2dnQ6FQoFWrVmadwyJk71dGfsW1VP6dvf/ZtYBXlMkM3DsAECn/Vk9uwHZdG632UjtPxLkFoI2cQEQI2sgJIsVuSPjjIAK+6YXwS6uQ0r6n7vYyBeIqn0JaXaNsM2COpi19/ql8Z45rqfy7/aBna+giifJY/XqffzL2f37f4MbfjRVRWVmJxMREzu0JIVAoDE94Z4pzs4afNjwmX7x4Effv30doaKhG3vLw8HCdfRcvXoyLFy/i4cOH8PT0xDvvvKOqMzZt2jTs2bMHiYmJkEgkcHJyQkxMDAIDA1mNFtJjUyNbTMhOwodtedfjEVeVrSGaOSkUiHMLgHQS919OAMyfjQ31zy6g70YfXH6/3nvvPaSlpaFLly548cUXcevWLVRWVkImkyE6OhqhoaEoLCzEnDlz0KdPH1y/fh1ffvkljhw5gqNHj8LDwwNt2rSBv78/5syZg7/++gurV6/Gw4cP4eTkhDVr1qCiogLz5s2Dq6sr3NzcsH37dnTs2JHz52B9587IyFD929nZGefOndO4rs+5P/5Yz04kADNmzMCMGTPYTLAu9CnEFvgFTqi4qlrmaqBWLEZCxVUYvODE9NnYUP/sAvtuTOH9999Hbm4ukpOTIZPJUFtbC1dXV5SXl2PKlCmqktZ37tzBpk2b0LdvX2RnZyM1NRVHjx5FfX09oqKiVLn/P/zwQ6xevRqdO3dGVlYWVq9ejd27dyMkJAQjRozA6NGjDbaR1bk3bNgAALh8+TL69++vce3y5csGT2jzCChWGwBKJDrUcB3n9WLsZ2joJ7DvxlwQQvDxxx/j0qVLEIvFKC0txf379wEAbdu2Rd++fQEAV65cwciRI+Ho6AhHR0cEBwcDAKqrq5GZmYno6GeReE+fPjXZLs5q+dq1a3H48GHWc80egWUn8ZHLcdeOQQ2X61bDdaLrs3Hpp6+/lWdu+eGHH1BeXo6kpCTY29sjJCREFY793HPPsfYnhKBFixZITk42q12sglpmZia+/vprlJeXY+fOnao/27dvh9yYXxBbR2AKcbR7X2Y13L2v4YMxfTY2bFQ9d3FxQXV1NQCgqqoKnp6esLe3x/nz51FUVMTYJzAwEBkZGairq0N1dTVOnToFQLlq1L59e/z3v/8FoHT2mzdvNprHUFidu76+Hk+ePIFcLkd1dbXqj6urK7Zt22bUpDaNoWo3z0gnJSrVcJnsbzVcZpyYBjB/Nm01XftYx0oAEcB3YwqtWrVCYGAgxo4di5s3b+LatWsYN24ckpOT4evry9gnICAAISEhiIiIwBtvvAE/Pz+4ubkBUC41Hzx4EBEREZBKpTh58iQAYMyYMfjPf/6D8ePH46+//jLIRs55y4uKitCuXTuDBucLQavlxvB3WiQ0pEVy8QIeqyUtcHQH6iqeHXcZDrx2lLtt2ps3tDeDqB87/70UWfPQPJta2GwTIHzaVl1dDRcXF9TU1GD69OlYs2YNbwU1OaVZ0se///1vsxnTLFFLiwRA6eCPtbKRqDs2APx5GtgVwcnBGTdvaG8GUT+uKde8ZsqmFkojVq5cidu3b6Ourg4TJkzgtVIuq3O//vrrAJTr3ffv30dERAQAICUlBZ6enrwZ1my4/I1x/f48za2dMctX6tjoUpal+Oijj5psLlbnHjRoEABg48aNSEpKUp0PCQlBVFQUf5Y1F3SlPzIX5lhmsvGlLFuFc/hpTU0NCgqeLWMUFBSgpsaEOwJFia70R+bCHMtM6ktZfM1BMTuc17ljY2Mxc+ZMdOjQAYQQFBcXY/Xq1Xza1jzoP0vznZcrXYZzazdypeEho+poL2UxhY9a4VJWc4Czcw8bNgypqanIy8sDAPj6+mrEmFOMZOzHwIPbmu/Q2uq4xBmQ12hez/8JiHNnLzoQ8Arw13lNNb7zUKA8z3C1nIciBBT+YHXuX375BYMHD260z7phzU1fbDmFA9n7gcKLmucUT4GorzQVavVXc3XH51J0IOu7Z+/2RK6cz9j1ZTMWIaBw58yZM1i3bh0UCgUmT56MN998k7UPq3NfunQJgwcP1thAog51bhNhU6C5qt2Xv2F2bqpwNylHMouw5cQtFD+qQduWzlgyqgfG9zMtPkQulyM+Ph47d+6Et7c3Jk2ahJCQEHTr1k1vP1bnXrRIuY7ZsIGEYmbYFGiuSrQVFB2wdY5kFiE26TfU1Ct/FkWPahCb9BsAmOTg2dnZ6NSpEzp06AAAkEqlSEtLY3Vuzmp5aGgo3n//fSQmJiI3N9doQylasCnQXJVoKyg6YOtsOXFL5dgN1NTLseXELZPGLS0thY/Ps8IM3t7eKC0tZe3H2bmPHz+OqVOn4tGjR9i8eTNCQ0OxYMEC46ylPMOYVEVMWEHRAVun+BHz65Ou83zD2bnFYjHs7OwgkUggFovh6elJI9TU0ZdaSR8BryhTDxmSqqjLcM32A+boV8sFtJHFlmnbkvk/YV3nueLt7Y2Skmf10kpLS+Ht7c3aj/NSWP/+/eHn54fZs2dj8uTJtpHrzFyYEnPNpGarL4tVFCivm+KQVOFuEpaM6qHxzg0AzvYSLBnVw6Rxe/fujfz8fBQUFMDb2xspKSmcwlg5O/dHH32Ey5cv47vvvsOBAwfQr18/DBw4EIMHDzbJcJvAFEWaixpO1W2roEE0M7dabmdnh5UrV2Lu3LmQy+WYOHEip/z+nJ07NDQUoaGh+OOPP3D27Fns2rULO3bsQHZ2tkmG2wSmKNJcVWuqblsF4/u1M9mZmRg+fDiGD+cYlfg3nJ37nXfewc2bN9GxY0cMGDAAmzZtQp8+fQw20iYxJX0Q19RFVN2mGAhn537zzTfxwgsvQKIjsd65c+cwZMgQsxlmVZgSc80l9puq2xQj4OzcvXszV2VsYOvWrY2cm62ELyEE69atw+nTp+Hk5ISNGzfyunndJLQzkHj4Avk/4Xld8dpcY665xn6nxQNJbzJnUtGRLeV5Gv/drDFbIUCmbE1sJXzPnDmD/Px8pKamIisrC3FxcThw4IC5TDIfTGr434/SIuCZwq1vSUrf2Ppiv7lkUtGRLUXEdJ3SbDBbCV+RqHE9R7YSvmlpaRg/fjxEIhH69u2LyspK3Lt3z1wmmQ9D4rvNMTZbGVxtaNlcCgNmu3Mbg3ZYnY+PD0pLS+Hl5aW3X1OX8NVVilYbQuSMZXaNGZutDK6h7XWVALYUtIQv/5jNuZsyM2qTl/DlqGiLRBLD7TKyDK6h7XWWALYQzTX7qTGwaVe6YHVutnrZDVs+P/30U86TNqAdVldSUsIprK7J4ZrNRFd8t6Fjs2U/0YZmSxEOZkr9rA6bdqULgwoBMmHKfu6QkBDs2bMHUqkUWVlZcHNzY30ktwhMGUj+VssJkUPElg3F0LHZsp/oU8vV2pOKQoioWt508JT6eeDAgSgsNDyIiXNRAmNgK+FLCEF8fDzOnj0LZ2dnrF+/nnXJDRDWY5OQbNGG2mYcRtv2r146gpk6AO9dM8mmwsJCzJs3z7yP5eqcOnUKubm5qiJnALBw4UKd7dlK+IpEIqxatcoQEygU4SKwxBicl8JWrlyJ48ePY8+ePQCAEydOoLi4mDfDKBSrQ2CJMTg7d2ZmJjZv3owWLVpg4cKF+P7775Gfn8+jaRSKlSGwxBicndvJyQkA4OzsjNLSUtjb26OsrIw3wygUq4OnxBiLFy/G1KlT8eeff2LYsGGcozg5v3OPGDEClZWVmDNnDqKioiASiTBp0iSjDaZQbBIeEmOwaVe64Ozcb7zxBhwcHDBq1CgEBwejrq4Ojo6ORk1KoVD4h/Nj+ZQpU1T/dnBwgJubm8Y5CoUiLFjv3GVlZSgtLUVtbS1u3Lih2v31+PFjWgiQQhEwrM79008/ISkpCSUlJRqFCVxdXbF48WJejaNQKMbD6twTJkzAhAkTcOLECYwaNaopbKJQKGaA8zt3YGAgli9fjrlz5wIAbt++LczEChQKBYABzh0bG4uhQ4eqkil07twZu3fv5s0wCoWi5O7du5g5cybGjBkDqVSKXbt2cerH2bkfPnyIMWPGQCxWdrGzs1P9m0KhKEnJS0H4wXAE7ApA+MFwpOSlmDymRCJBTEwMjh8/jn379uG7777D7du3Wftx9s7nnnsODx8+VKVTunr1Ktzc3Iy3mEKxMVLyUhD3cxzuVt8FAcHd6ruI+znOZAf38vJSJQ51dXWFr68vp0KAnINYYmJiMH/+fBQUFGDq1Kl4+PAhEhISjLeYQrExEq4koFZeq3GuVl6LhCsJkPpKzTJHYWEhcnJyONUM4Ozc3bp1Q1hYGJydneHi4oLQ0FB06dLFJEMpFFuipLrEoPOGUl1djUWLFmH58uVwdXVlbc/5sXzp0qXIy8vDW2+9hRkzZiA/Px9LliwxyVgKxZbwcfEx6Lwh1NfXY9GiRRg3bhzn7Eec79y5ubk4fvy46jgoKAhjxowx3Eob4UhmkVrBt7tmKfhGsW6iA6MR93OcxqO5k8QJ0YHRJo1LCMGKFSvg6+uL2bNnc+7H+c79wgsv4OrVq6rjrKws9OrVyzArbYQjmUWITfoNRY9qQAAUPapBbNJvOJJZZGnTKBZE6itF3EtxaOPSBiKI0MalDeJeijP5ffvy5ctITk7G+fPnERkZicjISJw+fZq1H+c79/Xr1zF16lS0bdsWAFBcXIwuXbpg3LhxAIAffvjBSNOtjy0nbmnUYAaAmno5tpy4Re/ezRypr9Rs4lkDAwYMwK1btwzux9m5d+zYYfDgtkrxI+YNM7rOUyiWgLNzN2XRAaHTtqUzihgcuW1LZ4bWFIploCFmRrBkVA8422uWMna2l2DJqB4WsohCaQzvzn3mzBmMGjUKYWFh+PLLLxtdT0pKQlBQkEoosIbNKOP7tcOGqN5o19IZIgDtWjpjYv922HLiFrrEpGDIxnQqrlEsDq+FAOVyOeLj47Fz5054e3tj0qRJCAkJQbdu3TTajRkzBitXWle5m/H92mF8v3bIycnBrdoWiE36TSWyNajnDe0oFEvA6507OzsbnTp1QocOHeDg4ACpVIq0tDQ+p7QI+tRzCsVS8Hrn1i7R6+3tjezs7EbtUlNTcenSJXTp0gWxsbFo06aN3nGbuoSvPmpra/Wq55a001rK5AoNWsLXTAQHB2Ps2LFwcHDA999/j2XLlrHuE2/yEr56yMnJ0aueW9JOm6zH1QQI2TZD4PWxXLtEb2lpaaMSva1atYKDgwMAYPLkybh+/TqfJvECVc8pQoTXO3fv3r2Rn5+PgoICeHt7IyUlBR999JFGm3v37qnK9qanp6Nr1658mmQ2NGPLndG+lRNy71Wrrgd2dNcppk3/6hec+6NcdTykqwf2vjGY81w0jp3CBV6d287ODitXrsTcuXMhl8sxceJEdO/eHQkJCejVqxdGjhyJb7/9Funp6ZBIJHB3d9fIsCpUGmLL1dVxbc79UY4PjvyGteM1SxJrO3ZD2+lf/cLo4ExzUSWewgVe63PzhaXfiYZsTGd0aG0kIhH+2KC5c65zjO6sHPkbG8ck65qrXUtnnIsJ0Tu/pb8nfVDb+IdGqBkB1xhyuRn+36Rx7BRjoc5tBFxjyCV/55vjYy4ax05hgzq3ETCp40xMe7FDo3NDunowttV1nirxFGOx+Dp3U6GtOAc/3xoZN8t0KtD6FOrx/drh1zvlSLxQADkhkIhEcHEQo7LuWZRady+XRmIaAOx9Y7BBajnTXBP7t6NiGoWVZuHcTIrznvN/qa5rK9BsCvWRzCIculykeqeWE6Lh2ACQe6+aUS0HoHfZi8l27bkOXS7CgE4e1MEpemkWj+VMsd/aqMeCs8WKcxkPABIvFBhp8TNo3DrFWJqFc3NVlhvasSnUVC2nWAPNwrm5KssN7dgUaqqWU6yBZuHcXNRtdQWaTaE2RS03FKqWU4zFZiPUtBXp7l4uePJUoVK/ZXI5Squeqq63cJSg+qlCp/qtfSrTW38AAAr1SURBVF0EAhmHb04iEiHItxXyH9So5u7s6YzzeQ9VY/m2fg55ZU9Ux9Ne7KAhxBmq9BvyPVkKahv/2KRazhS/nXuvGkO6euBcTAjjdXVHZlK/ta9zRU6IxlxFj2o0wknlhGhsOJETolLyGxy8IesLQGPNKdyxycdybcfVPq/rupDQpbRT9ZzCFZt0bltA19MBVc8pXKHOLVB0Ke1UPadwxSadmy1+W9d1IaFLaafqOYUrNunce98Y3MiB1eO3ma57uzmo7pYSkQjebg56rztJNO+sdjqWtCUiEYZ09dDIcT6kq4fGWN29XDSOZwR1ZAxbBZhzpm+I6k3FNEojbEYtZ9rooS+Gmy2+W3u8zp7OuP/4oer6pIHK5SqmZRP1vj7uTpg8oKNZnU9dPadQdGETzm3u5SGm8bSXrxqWq6b3sGPtS5eqKJbAJh7Lzb08ZMrGELpURREKNuHc5l4eMmVjCF2qoggFm3Bucy8PmbIxhC5VUYSCxat8Pn36FO+++y7CwsIwefJkFBYWGjyHuZeHTNkYQpeqKEKBV+duqPK5Y8cOpKSk4NixY7h9+7ZGmwMHDqBFixb48ccfMWvWLGzdutXgecy9PMQ0nvbyla7lKrpURREKvKrl6lU+AaiqfKqX8E1PT8fChQsBAKNGjUJ8fDwIIRAZuBfa3MtDpoxHl6ooQoDXOzdTlc/S0tJGbRqqetrZ2cHNzQ0PHz4EhUIxDatc5xZaCV+h2KINtc04bKWEr8WrfHp7e+Pu3bsAAJlMhqqqKrRq1YpPs8yKk5OTpU3QCbXNONRty83NtaAlpmHxKp8hISE4fPgw+vXrhxMnTiAoKIj1fbtv3758mk2h2AS8p1k6ffo01q9fr6ryOX/+fI0qn3V1dViyZAlycnLg7u6Of/3rXyoBjkKhGI9V5lCjUCjs2ESEGoVCaQx1bgrFRqHOTaHYKNS5KRQbxSqDWIRASEgIXFxcIBaLIZFIkJSUZGmTVFRWVuKDDz7A77//DpFIhPXr16Nfv36WNgsAkJeXh/fee091XFBQgEWLFmHWrFmWM0qNb775BgcOHIBIJIKfnx82bNgAR0dHS5tlHIRiFMHBweTBgweWNoORpUuXkv379xNCCKmrqyMVFRUWtogZmUxGXnrpJVJYWGhpUwghhJSUlJDg4GBSU1NDCCFk0aJF5NChQxa2ynjoY7mNUVVVhUuXLmHSpEkAAAcHB7Ro0cLCVjHzyy+/oEOHDmjXTjibbORyOWprayGTyVBbWwsvLy9Lm2Q01LlNYM6cOYiKisK+ffssbYqKwsJCeHh4IDY2FuPHj8eKFSvw5MkTS5vFSEpKCsaOHWtpM1R4e3vj9ddfR3BwMIYOHQpXV1cMHTrU0mYZDXVuI0lMTMThw4fx1VdfYe/evbh06ZKlTQKgjM+/ceMGpk2bhiNHjsDZ2ZkxSYalefr0KdLT0zF69GhLm6KioqICaWlpSEtLw9mzZ1FTU4Pk5GRLm2U01LmNpGEDjKenJ8LCwpCdnW1hi5T4+PjAx8cHffr0AQCMHj0aN27csLBVjTlz5gz8/f3xj3/8w9KmqPj555/Rvn17eHh4wN7eHuHh4cjMzLS0WUZDndsInjx5gsePH6v+fe7cOXTv3t3CVilp3bo1fHx8kJeXB0D5Xtu1a1cLW9WYlJQUSKVSS5uhQdu2bZGVlYWamhoQQgT73XGFxpYbQUFBARYsWABAKcCMHTsW8+fPt7BVz8jJycGKFStQX1+PDh06YMOGDXB3d7e0WSqePHmC4OBgnDx5Em5ubpY2R4Nt27bh+PHjsLOzQ8+ePbFu3To4ODiwdxQg1LkpFBuFPpZTKDYKdW4KxUahzk2h2CjUuSkUG4U6N4Vio1DnplBsFOrcAuTChQt46623dF5PSkpCfHy82edNSkrSKBoREhKC8vJyzv1PnjyJTz/91GQ7MjIykJCQYPI4zR3q3BQVhw8fxr1794zuv2PHDvzzn/802Y4RI0YgIyMDNTW07LEp0GQNRvLkyRO8++67KCkpgUKhwNtvv42OHTti48aNePLkCVq1aoUNGzbAy8sLM2fORI8ePXDp0iXI5XKsX78eAQEByM7Oxrp161BXVwcnJyesX78evr6+BtlRXl6OVatWobi4GACwfPly9O/fH9u3b0dxcTEKCwtRXFyM1157Da+++ioA4LPPPsPRo0fh4eGBNm3awN/fH+3atcO1a9fwf//3f3ByclLtdNuzZw8yMjIgk8nwySef6AzH/PPPP2Fvbw8PDw8AwP3797Fq1SoUFBQAAOLi4uDl5YW5c+eib9++yMzMRK9evTBx4kRs27YN5eXl2Lp1KwICAiASiTBo0CBkZGRgzJgxRv18KKDJGozlf//7H1mxYoXquLKykkyZMkWVwCElJYXExMQQQgiZMWOGqu3FixeJVColhBBSVVVF6uvrCSGEnDt3jixcuJAQQsj58+fJm2++qXPuQ4cOkdWrVxNCCFm8eDG5dOkSIYSQoqIiMnr0aEIIIdu2bSNTpkwhdXV15MGDB2TQoEHk6dOnJCsri0RERJDa2lpSVVVFwsLCyI4dO1R2Zmdnq+YJDg4mu3fvJoQQsmfPHrJ8+XKdNh08eJBs2LBBdRwdHU127txJCFEmZaisrCQFBQWkZ8+e5ObNm0Qul5MJEyaQmJgYolAoyI8//kjmz5+v6p+cnEzi4+N1zkdhh965jcTPzw+bNm3Cli1bEBwcjBYtWuD333/H7NmzAQAKhQKtW7dWtW/YJDFw4EA8fvwYlZWVqK6uxrJly3Dnzh2IRCLU19cbbMfPP/+sURb58ePHqK6uBgAMHz4cDg4O8PDwgIeHBx48eIArV65g5MiRcHR0hKOjI4KDg/WOHx4eDgDo1asXfvzxR53tysrKVHdtADh//jw2b94MAJBIJHBzc0NFRQXat2+PHj2Utcq7deuGwYMHQyQSoUePHigqKlL19/T0NOkVgUIfy42mS5cuSEpKwunTp/HJJ58gKCgI3bt315m4QbtEkkgkQkJCAl588UV89tlnKCwsVD02G4JCocD+/fsZ83ypb3iQSCSQyWQGj29vbw8AEIvFkMvlOts5OTmhqqqKdTx1m8RisepYJBJpjF9XV2e9ucsEAhXUjKS0tBTOzs6IjIzEnDlzkJWVhfLyctX+3/r6eo0icsePHwcA/Prrr3Bzc4ObmxuqqqpU+8IPHz5slB1Dhw7Ft99+qzpmq5wZGBiIjIwM1NXVobq6GqdOnVJdc3FxUd31DcXX1xd37txRHQ8ePBjfffcdAOXOOS6Or05+fj78/PyMsoWihN65jeT333/H5s2bIRaLYWdnh7i4ONjZ2WHt2rWoqqqCXC7Ha6+9ptrn7ejoiPHjx0Mmk2H9+vUAgLlz5yImJgaff/45hg8fbpQdK1asQHx8PMaNGwe5XI4BAwboXSYLCAhASEgIIiIi4OnpCT8/P9W2ywkTJmDVqlUaghpXBg4ciE2bNoEQApFIhBUrVuDDDz/EoUOHIBaLERcXp/GawsaFCxewePFig2ygaGHpl/7mgLZQZWkeP35MCCHkyZMnZMKECeTatWtmGXfNmjXk3LlzJo9TVlZGXn31VTNY1Lyhd+5myMqVK3H79m3U1dVhwoQJ8Pf3N8u48+bNQ1ZWlsnjFBcXIyYmxgwWNW9osgYBc+jQIezevVvjXGBgIFatWmUhi4RpE4UZ6twUio1C1XIKxUahzk2h2CjUuSkUG4U6N4Vio/w/c7frjhEncosAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "g = sns.FacetGrid(df, hue='target')\n", + "g.map(plt.scatter, 'sepal_length_(cm)', 'petal_width_(cm)');\n", + "g.add_legend();" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dnoGFA4MzW9o" + }, + "source": [ + "Можно все предыдущие графики вывести одной строчкой кода" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 743 + }, + "executionInfo": { + "elapsed": 12239, + "status": "ok", + "timestamp": 1614779829439, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "izSb9tJThvhk", + "outputId": "0a4d8076-27df-4520-bac8-004f756b4670" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 69, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light", + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.pairplot(df,hue='target',diag_kind=\"kde\",kind=\"scatter\",palette=\"husl\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 296 + }, + "executionInfo": { + "elapsed": 714, + "status": "ok", + "timestamp": 1614767558267, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "MOtqb-wJhvfD", + "outputId": "bc730c65-b8f4-4417-d1f5-68fcc327dbcb" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 62, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(x=\"target\", y=\"sepal_length_(cm)\", data=df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 299 + }, + "executionInfo": { + "elapsed": 648, + "status": "ok", + "timestamp": 1614767566285, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "pop0xJy808kv", + "outputId": "0c951aff-d432-4cab-ba5c-fd913d8256c9" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 63, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(x=\"target\", y=\"sepal_width_(cm)\", data=df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 296 + }, + "executionInfo": { + "elapsed": 1021, + "status": "ok", + "timestamp": 1614767569562, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "zL6zrC0108t4", + "outputId": "41a4f4ed-c685-4b02-adec-77ede4cf6761" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 64, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(x=\"target\", y=\"petal_length_(cm)\", data=df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 296 + }, + "executionInfo": { + "elapsed": 672, + "status": "ok", + "timestamp": 1614767574695, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "gt-CS-v80841", + "outputId": "17e5b682-a469-43a9-fe36-3ad50c2b4449" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 65, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(x=\"target\", y=\"petal_width_(cm)\", data=df)" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyPGZA72+5Brg/wHtKFk27jK", + "collapsed_sections": [], + "name": "01_Pandas.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/Pandas (06.03)/Pandas. Task. Part 1.ipynb b/Pandas (06.03)/Pandas. Task. Part 1.ipynb index 5172e85..b8a4f64 100644 --- a/Pandas (06.03)/Pandas. Task. Part 1.ipynb +++ b/Pandas (06.03)/Pandas. Task. Part 1.ipynb @@ -1 +1,1157 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"anaconda-cloud":{},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.6"},"colab":{"name":"01_task_pandas.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"UTKVH3sMutTM"},"source":["**В задании предлагается с помощью Pandas ответить на несколько вопросов по данным репозитория UCI [Adult](https://archive.ics.uci.edu/ml/datasets/Adult)**"]},{"cell_type":"markdown","metadata":{"id":"3lUT-CqYutTO"},"source":["Уникальные значения признаков (больше информации по ссылке выше):\n","- age: continuous.\n","- workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.\n","- fnlwgt: continuous.\n","- education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.\n","- education-num: continuous.\n","- marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.\n","- occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.\n","- relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.\n","- race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.\n","- sex: Female, Male.\n","- capital-gain: continuous.\n","- capital-loss: continuous.\n","- hours-per-week: continuous.\n","- native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. \n","- salary: >50K,<=50K"]},{"cell_type":"code","metadata":{"id":"6GzulHvOutTR"},"source":["import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"SJ3LbaoiutTT","colab":{"base_uri":"https://localhost:8080/","height":380},"executionInfo":{"status":"ok","timestamp":1626441443051,"user_tz":-300,"elapsed":499,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"eab110b9-0f5f-4bcd-db91-328a0b391379"},"source":["data = pd.read_csv(\"https://raw.githubusercontent.com/aksenov7/Kaggle_competition_group/master/adult.data.csv\")\n","data.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n","
"],"text/plain":[" age workclass fnlwgt ... hours-per-week native-country salary\n","0 39 State-gov 77516 ... 40 United-States <=50K\n","1 50 Self-emp-not-inc 83311 ... 13 United-States <=50K\n","2 38 Private 215646 ... 40 United-States <=50K\n","3 53 Private 234721 ... 40 United-States <=50K\n","4 28 Private 338409 ... 40 Cuba <=50K\n","\n","[5 rows x 15 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"EpQFv8t1ds05"},"source":["# def married(row):\n","# return \"Married\" in row\n","data[\"married\"] = data[\"marital-status\"].apply(lambda row: \"Married\" in row)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":756},"id":"3Bb2mRTEeoJK","executionInfo":{"status":"ok","timestamp":1626441731759,"user_tz":-300,"elapsed":481,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"9dd7d83b-f51a-4e11-f6dc-035a844f81c9"},"source":["data"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalarymarried
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50KFalse
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50KTrue
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50KFalse
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50KTrue
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50KTrue
...................................................
3255627Private257302Assoc-acdm12Married-civ-spouseTech-supportWifeWhiteFemale0038United-States<=50KTrue
3255740Private154374HS-grad9Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States>50KTrue
3255858Private151910HS-grad9WidowedAdm-clericalUnmarriedWhiteFemale0040United-States<=50KFalse
3255922Private201490HS-grad9Never-marriedAdm-clericalOwn-childWhiteMale0020United-States<=50KFalse
3256052Self-emp-inc287927HS-grad9Married-civ-spouseExec-managerialWifeWhiteFemale15024040United-States>50KTrue
\n","

32561 rows × 16 columns

\n","
"],"text/plain":[" age workclass fnlwgt ... native-country salary married\n","0 39 State-gov 77516 ... United-States <=50K False\n","1 50 Self-emp-not-inc 83311 ... United-States <=50K True\n","2 38 Private 215646 ... United-States <=50K False\n","3 53 Private 234721 ... United-States <=50K True\n","4 28 Private 338409 ... Cuba <=50K True\n","... ... ... ... ... ... ... ...\n","32556 27 Private 257302 ... United-States <=50K True\n","32557 40 Private 154374 ... United-States >50K True\n","32558 58 Private 151910 ... United-States <=50K False\n","32559 22 Private 201490 ... United-States <=50K False\n","32560 52 Self-emp-inc 287927 ... United-States >50K True\n","\n","[32561 rows x 16 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"markdown","metadata":{"id":"MoK8B5fIutTW"},"source":["**1. Сколько мужчин и женщин (признак *sex*) представлено в этом наборе данных?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"hdzky90TutTY"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"adF8lgVbutTZ"},"source":["**2. Каков средний возраст (признак *age*) женщин?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"K6C2qZ_zutTb"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"-Cz1S7-HutTd"},"source":["**3. Какова доля граждан Германии (признак *native-country*)?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"Y4mmqN6outTf"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Do-rEgaautTg"},"source":["**4-5. Каковы средние значения и среднеквадратичные отклонения возраста тех, кто получает более 50K в год (признак *salary*) и тех, кто получает менее 50K в год? **"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"eSuk0CAnutTh"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rK9SwvI_utTj"},"source":["**6. Правда ли, что люди, которые получают больше 50k, имеют как минимум высшее образование? (признак *education – Bachelors, Prof-school, Assoc-acdm, Assoc-voc, Masters* или *Doctorate*)**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"eygYabkdutTj"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4DqPASEsutTk"},"source":["**7. Выведите статистику возраста для каждой расы (признак *race*) и каждого пола. Используйте *groupby* и *describe*. Найдите таким образом максимальный возраст мужчин расы *Amer-Indian-Eskimo*.**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"fYkBDZMdutTl"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"cn-jYXhzutTl"},"source":["**8. Среди кого больше доля зарабатывающих много (>50K): среди женатых или холостых мужчин (признак *marital-status*)? Женатыми считаем тех, у кого *marital-status* начинается с *Married* (Married-civ-spouse, Married-spouse-absent или Married-AF-spouse), остальных считаем холостыми.**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"4hIQXgGAutTm"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Rsh8YvoXutTm"},"source":["**9. Какое максимальное число часов человек работает в неделю (признак *hours-per-week*)? Сколько людей работают такое количество часов и каков среди них процент зарабатывающих много?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"RK1JQSIZutTn"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"kUXV84AjutTn"},"source":["**10. Посчитайте среднее время работы (*hours-per-week*) зарабатывающих мало и много (*salary*) для каждой страны (*native-country*).**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"3gzYG3CDutTn"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "UTKVH3sMutTM" + }, + "source": [ + "**В задании предлагается с помощью Pandas ответить на несколько вопросов по данным репозитория UCI [Adult](https://archive.ics.uci.edu/ml/datasets/Adult)**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3lUT-CqYutTO" + }, + "source": [ + "Уникальные значения признаков (больше информации по ссылке выше):\n", + "- age: continuous.\n", + "- workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.\n", + "- fnlwgt: continuous.\n", + "- education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.\n", + "- education-num: continuous.\n", + "- marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.\n", + "- occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.\n", + "- relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.\n", + "- race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.\n", + "- sex: Female, Male.\n", + "- capital-gain: continuous.\n", + "- capital-loss: continuous.\n", + "- hours-per-week: continuous.\n", + "- native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. \n", + "- salary: >50K,<=50K" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "6GzulHvOutTR" + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 380 + }, + "executionInfo": { + "elapsed": 499, + "status": "ok", + "timestamp": 1626441443051, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "SJ3LbaoiutTT", + "outputId": "eab110b9-0f5f-4bcd-db91-328a0b391379" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n", + "
" + ], + "text/plain": [ + " age workclass fnlwgt education education-num \\\n", + "0 39 State-gov 77516 Bachelors 13 \n", + "1 50 Self-emp-not-inc 83311 Bachelors 13 \n", + "2 38 Private 215646 HS-grad 9 \n", + "3 53 Private 234721 11th 7 \n", + "4 28 Private 338409 Bachelors 13 \n", + "\n", + " marital-status occupation relationship race sex \\\n", + "0 Never-married Adm-clerical Not-in-family White Male \n", + "1 Married-civ-spouse Exec-managerial Husband White Male \n", + "2 Divorced Handlers-cleaners Not-in-family White Male \n", + "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", + "4 Married-civ-spouse Prof-specialty Wife Black Female \n", + "\n", + " capital-gain capital-loss hours-per-week native-country salary \n", + "0 2174 0 40 United-States <=50K \n", + "1 0 0 13 United-States <=50K \n", + "2 0 0 40 United-States <=50K \n", + "3 0 0 40 United-States <=50K \n", + "4 0 0 40 Cuba <=50K " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(\"https://raw.githubusercontent.com/aksenov7/Kaggle_competition_group/master/adult.data.csv\")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "EpQFv8t1ds05" + }, + "outputs": [], + "source": [ + "# def married(row):\n", + "# return \"Married\" in row\n", + "data[\"married\"] = data[\"marital-status\"].apply(lambda row: \"Married\" in row)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 756 + }, + "executionInfo": { + "elapsed": 481, + "status": "ok", + "timestamp": 1626441731759, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "3Bb2mRTEeoJK", + "outputId": "9dd7d83b-f51a-4e11-f6dc-035a844f81c9" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalarymarried
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50KFalse
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50KTrue
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50KFalse
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50KTrue
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50KTrue
...................................................
3255627Private257302Assoc-acdm12Married-civ-spouseTech-supportWifeWhiteFemale0038United-States<=50KTrue
3255740Private154374HS-grad9Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States>50KTrue
3255858Private151910HS-grad9WidowedAdm-clericalUnmarriedWhiteFemale0040United-States<=50KFalse
3255922Private201490HS-grad9Never-marriedAdm-clericalOwn-childWhiteMale0020United-States<=50KFalse
3256052Self-emp-inc287927HS-grad9Married-civ-spouseExec-managerialWifeWhiteFemale15024040United-States>50KTrue
\n", + "

32561 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " age workclass fnlwgt education education-num \\\n", + "0 39 State-gov 77516 Bachelors 13 \n", + "1 50 Self-emp-not-inc 83311 Bachelors 13 \n", + "2 38 Private 215646 HS-grad 9 \n", + "3 53 Private 234721 11th 7 \n", + "4 28 Private 338409 Bachelors 13 \n", + "... ... ... ... ... ... \n", + "32556 27 Private 257302 Assoc-acdm 12 \n", + "32557 40 Private 154374 HS-grad 9 \n", + "32558 58 Private 151910 HS-grad 9 \n", + "32559 22 Private 201490 HS-grad 9 \n", + "32560 52 Self-emp-inc 287927 HS-grad 9 \n", + "\n", + " marital-status occupation relationship race sex \\\n", + "0 Never-married Adm-clerical Not-in-family White Male \n", + "1 Married-civ-spouse Exec-managerial Husband White Male \n", + "2 Divorced Handlers-cleaners Not-in-family White Male \n", + "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", + "4 Married-civ-spouse Prof-specialty Wife Black Female \n", + "... ... ... ... ... ... \n", + "32556 Married-civ-spouse Tech-support Wife White Female \n", + "32557 Married-civ-spouse Machine-op-inspct Husband White Male \n", + "32558 Widowed Adm-clerical Unmarried White Female \n", + "32559 Never-married Adm-clerical Own-child White Male \n", + "32560 Married-civ-spouse Exec-managerial Wife White Female \n", + "\n", + " capital-gain capital-loss hours-per-week native-country salary \\\n", + "0 2174 0 40 United-States <=50K \n", + "1 0 0 13 United-States <=50K \n", + "2 0 0 40 United-States <=50K \n", + "3 0 0 40 United-States <=50K \n", + "4 0 0 40 Cuba <=50K \n", + "... ... ... ... ... ... \n", + "32556 0 0 38 United-States <=50K \n", + "32557 0 0 40 United-States >50K \n", + "32558 0 0 40 United-States <=50K \n", + "32559 0 0 20 United-States <=50K \n", + "32560 15024 0 40 United-States >50K \n", + "\n", + " married \n", + "0 False \n", + "1 True \n", + "2 False \n", + "3 True \n", + "4 True \n", + "... ... \n", + "32556 True \n", + "32557 True \n", + "32558 False \n", + "32559 False \n", + "32560 True \n", + "\n", + "[32561 rows x 16 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MoK8B5fIutTW" + }, + "source": [ + "**1. Сколько мужчин и женщин (признак *sex*) представлено в этом наборе данных?**" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "id": "hdzky90TutTY" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Male 21790\n", + "Female 10771\n", + "Name: sex, dtype: int64" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['sex'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "adF8lgVbutTZ" + }, + "source": [ + "**2. Каков средний возраст (признак *age*) женщин?**" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "K6C2qZ_zutTb" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "36.85823043357163" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data.sex =='Female']['age'].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-Cz1S7-HutTd" + }, + "source": [ + "**3. Какова доля граждан Германии (признак *native-country*)?**" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "Y4mmqN6outTf" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.004207487485028101" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(data['native-country'] == 'Germany').count(True) / data.shape[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Do-rEgaautTg" + }, + "source": [ + "**4-5. Каковы средние значения и среднеквадратичные отклонения возраста тех, кто получает более 50K в год (признак *salary*) и тех, кто получает менее 50K в год? **" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "eSuk0CAnutTh" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanstd
salary
<=50K36.78373814.020088
>50K44.24984110.519028
\n", + "
" + ], + "text/plain": [ + " mean std\n", + "salary \n", + "<=50K 36.783738 14.020088\n", + ">50K 44.249841 10.519028" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "#Не понял как тут добавить подпись, чтобы было понятно что это средний возраст и среднеквадратичное отклонение\n", + "data.groupby('salary')['age'].agg([np.mean, np.std])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rK9SwvI_utTj" + }, + "source": [ + "**6. Правда ли, что люди, которые получают больше 50k, имеют как минимум высшее образование? (признак *education – Bachelors, Prof-school, Assoc-acdm, Assoc-voc, Masters* или *Doctorate*)**" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "id": "eygYabkdutTj" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_education = ['Bachelors', 'Prof-school', 'Assoc-acdm', 'Assoc-voc', 'Masters', 'Doctorate']\n", + "all(list(data[data.salary == '>50K']['education'].apply(lambda value: value in high_education)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4DqPASEsutTk" + }, + "source": [ + "**7. Выведите статистику возраста для каждой расы (признак *race*) и каждого пола. Используйте *groupby* и *describe*. Найдите таким образом максимальный возраст мужчин расы *Amer-Indian-Eskimo*.**" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "id": "fYkBDZMdutTl" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
racesex
Amer-Indian-EskimoFemale119.037.11764713.11499117.027.036.046.0080.0
Male192.037.20833312.04956317.028.035.045.0082.0
Asian-Pac-IslanderFemale346.035.08959512.30084517.025.033.043.7575.0
Male693.039.07359312.88394418.029.037.046.0090.0
BlackFemale1555.037.85401912.63719717.028.037.046.0090.0
Male1569.037.68260012.88261217.027.036.046.0090.0
OtherFemale109.031.67889911.63159917.023.029.039.0074.0
Male162.034.65432111.35553117.026.032.042.0077.0
WhiteFemale8642.036.81161814.32909317.025.035.046.0090.0
Male19174.039.65249813.43602917.029.038.049.0090.0
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% \\\n", + "race sex \n", + "Amer-Indian-Eskimo Female 119.0 37.117647 13.114991 17.0 27.0 36.0 \n", + " Male 192.0 37.208333 12.049563 17.0 28.0 35.0 \n", + "Asian-Pac-Islander Female 346.0 35.089595 12.300845 17.0 25.0 33.0 \n", + " Male 693.0 39.073593 12.883944 18.0 29.0 37.0 \n", + "Black Female 1555.0 37.854019 12.637197 17.0 28.0 37.0 \n", + " Male 1569.0 37.682600 12.882612 17.0 27.0 36.0 \n", + "Other Female 109.0 31.678899 11.631599 17.0 23.0 29.0 \n", + " Male 162.0 34.654321 11.355531 17.0 26.0 32.0 \n", + "White Female 8642.0 36.811618 14.329093 17.0 25.0 35.0 \n", + " Male 19174.0 39.652498 13.436029 17.0 29.0 38.0 \n", + "\n", + " 75% max \n", + "race sex \n", + "Amer-Indian-Eskimo Female 46.00 80.0 \n", + " Male 45.00 82.0 \n", + "Asian-Pac-Islander Female 43.75 75.0 \n", + " Male 46.00 90.0 \n", + "Black Female 46.00 90.0 \n", + " Male 46.00 90.0 \n", + "Other Female 39.00 74.0 \n", + " Male 42.00 77.0 \n", + "White Female 46.00 90.0 \n", + " Male 49.00 90.0 " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.groupby(['race', 'sex']).describe()['age']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cn-jYXhzutTl" + }, + "source": [ + "**8. Среди кого больше доля зарабатывающих много (>50K): среди женатых или холостых мужчин (признак *marital-status*)? Женатыми считаем тех, у кого *marital-status* начинается с *Married* (Married-civ-spouse, Married-spouse-absent или Married-AF-spouse), остальных считаем холостыми.**" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "id": "4hIQXgGAutTm" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "married salary\n", + "False <=50K 0.915505\n", + " >50K 0.084495\n", + "True <=50K 0.559486\n", + " >50K 0.440514\n", + "Name: salary, dtype: float64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data.sex == 'Male'].groupby('married')['salary'].value_counts(normalize=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rsh8YvoXutTm" + }, + "source": [ + "**9. Какое максимальное число часов человек работает в неделю (признак *hours-per-week*)? Сколько людей работают такое количество часов и каков среди них процент зарабатывающих много?**" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "id": "RK1JQSIZutTn" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Макс. часов в неделю 99\n", + "Людей работающих макс. часов 85\n", + "Процент зарабатывающих много: 29%\n" + ] + } + ], + "source": [ + "max_hours = data['hours-per-week'].max()\n", + "max_hours_people_count = data[data['hours-per-week'] == max_hours].shape[0]\n", + "percentage = int(data[(data['hours-per-week'] == max_hours) & (data['salary'] == '>50K')].shape[0] / max_hours_people_count * 100)\n", + "\n", + "print(f'Макс. часов в неделю {max_hours}')\n", + "print(f'Людей работающих макс. часов {max_hours_people_count}')\n", + "print(f'Процент зарабатывающих много: {percentage}%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kUXV84AjutTn" + }, + "source": [ + "**10. Посчитайте среднее время работы (*hours-per-week*) зарабатывающих мало и много (*salary*) для каждой страны (*native-country*).**" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "id": "3gzYG3CDutTn" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "native-country salary\n", + "? <=50K 40.164760\n", + " >50K 45.547945\n", + "Cambodia <=50K 41.416667\n", + " >50K 40.000000\n", + "Canada <=50K 37.914634\n", + " ... \n", + "United-States >50K 45.505369\n", + "Vietnam <=50K 37.193548\n", + " >50K 39.200000\n", + "Yugoslavia <=50K 41.600000\n", + " >50K 49.500000\n", + "Name: hours-per-week, Length: 82, dtype: float64" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.groupby(['native-country', 'salary'])['hours-per-week'].mean()" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "colab": { + "collapsed_sections": [], + "name": "01_task_pandas.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Pandas and EDA (12.03)/Pandas and EDA. Lecture.ipynb b/Pandas and EDA (12.03)/Pandas and EDA. Lecture.ipynb index fb80887..d313571 100644 --- a/Pandas and EDA (12.03)/Pandas and EDA. Lecture.ipynb +++ b/Pandas and EDA (12.03)/Pandas and EDA. Lecture.ipynb @@ -1 +1,7353 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"anaconda-cloud":{},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.1"},"name":"seminar02_part2_pandas.ipynb","colab":{"name":"02_Pandas.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"FzQ_ch0ktU7n"},"source":["#
Первичный анализ данных с Pandas
"]},{"cell_type":"code","metadata":{"collapsed":true,"scrolled":true,"id":"Parpx34utU7s","executionInfo":{"status":"ok","timestamp":1633609636856,"user_tz":-300,"elapsed":631,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["import numpy as np\n","import pandas as pd"],"execution_count":5,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"QxIKAzfCtU7u"},"source":["Данные, с которыми работают дата саентисты и аналитики, обычно хранятся в виде табличек — например, в форматах `.csv`, `.tsv` или `.xlsx`. Для того, чтобы считать нужные данные из такого файла, отлично подходит библиотека Pandas.\n","\n","Основными структурами данных в Pandas являются классы `Series` и `DataFrame`. Первый из них представляет собой одномерный индексированный массив данных некоторого фиксированного типа. Второй - это двухмерная структура данных, представляющая собой таблицу, каждый столбец которой содержит данные одного типа. Можно представлять её как словарь объектов типа `Series`. Структура `DataFrame` отлично подходит для представления реальных данных: строки соответствуют признаковым описаниям отдельных объектов, а столбцы соответствуют признакам."]},{"cell_type":"markdown","metadata":{"id":"l_Ell72CtU7w"},"source":["---------\n","\n","## Демонстрация основных методов Pandas \n"]},{"cell_type":"markdown","metadata":{"id":"YMu_ER8WtU7y"},"source":["### Чтение из файла и первичный анализ"]},{"cell_type":"markdown","metadata":{"id":"efGYx1kqtU7z"},"source":["Прочитаем данные и посмотрим на первые 5 строк с помощью метода `head`:"]},{"cell_type":"code","metadata":{"collapsed":true,"scrolled":true,"id":"ByXZK9MFtU71","executionInfo":{"status":"ok","timestamp":1633609637892,"user_tz":-300,"elapsed":597,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["df = pd.read_csv(\"https://raw.githubusercontent.com/Yorko/mlcourse.ai/master/data/telecom_churn.csv\")"],"execution_count":6,"outputs":[]},{"cell_type":"code","metadata":{"scrolled":true,"id":"hFaFpz2utU73","colab":{"base_uri":"https://localhost:8080/","height":241},"executionInfo":{"status":"ok","timestamp":1633609637895,"user_tz":-300,"elapsed":77,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"cbd457e9-c2bd-4beb-a1fa-c7ba8a4c5b97"},"source":["df.head()"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
0KS128415NoYes25265.111045.07197.49916.78244.79111.0110.032.701False
1OH107415NoYes26161.612327.47195.510316.62254.410311.4513.733.701False
2NJ137415NoNo0243.411441.38121.211010.30162.61047.3212.253.290False
3OH84408YesNo0299.47150.9061.9885.26196.9898.866.671.782False
4OK75415YesNo0166.711328.34148.312212.61186.91218.4110.132.733False
\n","
"],"text/plain":[" State Account length ... Customer service calls Churn\n","0 KS 128 ... 1 False\n","1 OH 107 ... 1 False\n","2 NJ 137 ... 0 False\n","3 OH 84 ... 2 False\n","4 OK 75 ... 3 False\n","\n","[5 rows x 20 columns]"]},"metadata":{},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"CpV496POtU75"},"source":["В Jupyter-ноутбуках датафреймы `Pandas` выводятся в виде вот таких красивых табличек, и `print(df.head())` выглядит хуже.\n","\n","Кстати, по умолчанию `Pandas` выводит всего 20 столбцов и 60 строк, поэтому если ваш датафрейм больше, воспользуйтесь функцией `set_option`:"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"CYFyCCGGtU77","executionInfo":{"status":"ok","timestamp":1633609637897,"user_tz":-300,"elapsed":68,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["# задание проанализировать все опции и выбрать 3-5 самых полезных по личному мнению \n","# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.set_option.html\n","pd.set_option(\"display.max_columns\", 100)\n","pd.set_option(\"display.max_rows\", 100)"],"execution_count":8,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"CbfNn4a9tU78"},"source":["А также укажем значение параметра `presicion` равным 2, чтобы отображать два знака после запятой (а не 6, как установлено по умолчанию."]},{"cell_type":"code","metadata":{"collapsed":true,"id":"-0MCBxGItU78","executionInfo":{"status":"ok","timestamp":1633609637899,"user_tz":-300,"elapsed":67,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["pd.set_option(\"precision\", 2)"],"execution_count":9,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Cu652IOYtU79"},"source":["**Посмотрим на размер данных, названия признаков и их типы**"]},{"cell_type":"code","metadata":{"id":"LQw6THQytU79","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609637901,"user_tz":-300,"elapsed":66,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"b2d6d2f1-a6d1-47c6-e4bb-5c5f33834c4a"},"source":["print(df.shape)"],"execution_count":10,"outputs":[{"output_type":"stream","name":"stdout","text":["(3333, 20)\n"]}]},{"cell_type":"markdown","metadata":{"id":"LJEPKleBtU7-"},"source":["Видим, что в таблице 3333 строки и 20 столбцов. Выведем названия столбцов:"]},{"cell_type":"code","metadata":{"id":"CQArdzC8tU7_","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609637903,"user_tz":-300,"elapsed":57,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"08e4c81f-5a94-4589-c4d3-c6792128de13"},"source":["print(df.columns)"],"execution_count":11,"outputs":[{"output_type":"stream","name":"stdout","text":["Index(['State', 'Account length', 'Area code', 'International plan',\n"," 'Voice mail plan', 'Number vmail messages', 'Total day minutes',\n"," 'Total day calls', 'Total day charge', 'Total eve minutes',\n"," 'Total eve calls', 'Total eve charge', 'Total night minutes',\n"," 'Total night calls', 'Total night charge', 'Total intl minutes',\n"," 'Total intl calls', 'Total intl charge', 'Customer service calls',\n"," 'Churn'],\n"," dtype='object')\n"]}]},{"cell_type":"markdown","metadata":{"id":"RoZn1MpBtU8A"},"source":["Чтобы посмотреть общую информацию по датафрейму и всем признакам, воспользуемся методом **`info`**:"]},{"cell_type":"code","metadata":{"scrolled":false,"id":"W_ZF3eM8tU8B","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609637906,"user_tz":-300,"elapsed":54,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"b4d58f04-d867-458f-bb5e-7b91fbdc9cd9"},"source":["print(df.info())"],"execution_count":12,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","RangeIndex: 3333 entries, 0 to 3332\n","Data columns (total 20 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 State 3333 non-null object \n"," 1 Account length 3333 non-null int64 \n"," 2 Area code 3333 non-null int64 \n"," 3 International plan 3333 non-null object \n"," 4 Voice mail plan 3333 non-null object \n"," 5 Number vmail messages 3333 non-null int64 \n"," 6 Total day minutes 3333 non-null float64\n"," 7 Total day calls 3333 non-null int64 \n"," 8 Total day charge 3333 non-null float64\n"," 9 Total eve minutes 3333 non-null float64\n"," 10 Total eve calls 3333 non-null int64 \n"," 11 Total eve charge 3333 non-null float64\n"," 12 Total night minutes 3333 non-null float64\n"," 13 Total night calls 3333 non-null int64 \n"," 14 Total night charge 3333 non-null float64\n"," 15 Total intl minutes 3333 non-null float64\n"," 16 Total intl calls 3333 non-null int64 \n"," 17 Total intl charge 3333 non-null float64\n"," 18 Customer service calls 3333 non-null int64 \n"," 19 Churn 3333 non-null bool \n","dtypes: bool(1), float64(8), int64(8), object(3)\n","memory usage: 498.1+ KB\n","None\n"]}]},{"cell_type":"markdown","metadata":{"id":"FYDNyB6CtU8C"},"source":["`bool`, `int64`, `float64` и `object` — это типы признаков. Видим, что 1 признак — логический (`bool`), 3 признака имеют тип `object` и 16 признаков — числовые.\n","\n","**Изменить тип колонки** можно с помощью метода `astype`. Применим этот метод к признаку `Churn` и переведём его в `int64`:"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"byRJQVM5tU8D","executionInfo":{"status":"ok","timestamp":1633609637909,"user_tz":-300,"elapsed":48,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["df[\"Churn\"] = df[\"Churn\"].astype(\"int64\")"],"execution_count":13,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"sBTm0lLYtU8D"},"source":["Метод **`describe`** показывает основные статистические характеристики данных по каждому числовому признаку (типы `int64` и `float64`): число непропущенных значений, среднее, стандартное отклонение, диапазон, медиану, 0.25 и 0.75 квартили."]},{"cell_type":"code","metadata":{"id":"bAsmrRI6tU8D","colab":{"base_uri":"https://localhost:8080/","height":335},"executionInfo":{"status":"ok","timestamp":1633609637911,"user_tz":-300,"elapsed":48,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"32a7192a-b49b-4be7-9b6e-9b7f08f57731"},"source":["df.describe()"],"execution_count":14,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Account lengthArea codeNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
count3333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.00
mean101.06437.188.10179.78100.4430.56200.98100.1117.08200.87100.119.0410.244.482.761.560.14
std39.8242.3713.6954.4720.079.2650.7119.924.3150.5719.572.282.792.460.751.320.35
min1.00408.000.000.000.000.000.000.000.0023.2033.001.040.000.000.000.000.00
25%74.00408.000.00143.7087.0024.43166.6087.0014.16167.0087.007.528.503.002.301.000.00
50%101.00415.000.00179.40101.0030.50201.40100.0017.12201.20100.009.0510.304.002.781.000.00
75%127.00510.0020.00216.40114.0036.79235.30114.0020.00235.30113.0010.5912.106.003.272.000.00
max243.00510.0051.00350.80165.0059.64363.70170.0030.91395.00175.0017.7720.0020.005.409.001.00
\n","
"],"text/plain":[" Account length Area code Number vmail messages Total day minutes \\\n","count 3333.00 3333.00 3333.00 3333.00 \n","mean 101.06 437.18 8.10 179.78 \n","std 39.82 42.37 13.69 54.47 \n","min 1.00 408.00 0.00 0.00 \n","25% 74.00 408.00 0.00 143.70 \n","50% 101.00 415.00 0.00 179.40 \n","75% 127.00 510.00 20.00 216.40 \n","max 243.00 510.00 51.00 350.80 \n","\n"," Total day calls Total day charge Total eve minutes Total eve calls \\\n","count 3333.00 3333.00 3333.00 3333.00 \n","mean 100.44 30.56 200.98 100.11 \n","std 20.07 9.26 50.71 19.92 \n","min 0.00 0.00 0.00 0.00 \n","25% 87.00 24.43 166.60 87.00 \n","50% 101.00 30.50 201.40 100.00 \n","75% 114.00 36.79 235.30 114.00 \n","max 165.00 59.64 363.70 170.00 \n","\n"," Total eve charge Total night minutes Total night calls \\\n","count 3333.00 3333.00 3333.00 \n","mean 17.08 200.87 100.11 \n","std 4.31 50.57 19.57 \n","min 0.00 23.20 33.00 \n","25% 14.16 167.00 87.00 \n","50% 17.12 201.20 100.00 \n","75% 20.00 235.30 113.00 \n","max 30.91 395.00 175.00 \n","\n"," Total night charge Total intl minutes Total intl calls \\\n","count 3333.00 3333.00 3333.00 \n","mean 9.04 10.24 4.48 \n","std 2.28 2.79 2.46 \n","min 1.04 0.00 0.00 \n","25% 7.52 8.50 3.00 \n","50% 9.05 10.30 4.00 \n","75% 10.59 12.10 6.00 \n","max 17.77 20.00 20.00 \n","\n"," Total intl charge Customer service calls Churn \n","count 3333.00 3333.00 3333.00 \n","mean 2.76 1.56 0.14 \n","std 0.75 1.32 0.35 \n","min 0.00 0.00 0.00 \n","25% 2.30 1.00 0.00 \n","50% 2.78 1.00 0.00 \n","75% 3.27 2.00 0.00 \n","max 5.40 9.00 1.00 "]},"metadata":{},"execution_count":14}]},{"cell_type":"markdown","metadata":{"id":"l6MzhnkotU8D"},"source":["Чтобы посмотреть статистику по нечисловым признакам, нужно явно указать интересующие нас типы в параметре `include`. Можно также задать `include`='all', чтоб вывести статистику по всем имеющимся признакам."]},{"cell_type":"code","metadata":{"scrolled":true,"id":"ewJscFGZtU8F","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609638506,"user_tz":-300,"elapsed":639,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"187fb398-e4bf-4c36-f3ff-e395013e994f"},"source":["df.describe(include=[\"object\", \"bool\"])"],"execution_count":15,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateInternational planVoice mail plan
count333333333333
unique5122
topWVNoNo
freq10630102411
\n","
"],"text/plain":[" State International plan Voice mail plan\n","count 3333 3333 3333\n","unique 51 2 2\n","top WV No No\n","freq 106 3010 2411"]},"metadata":{},"execution_count":15}]},{"cell_type":"markdown","metadata":{"id":"1qbs0vug9TCh"},"source":["Тот же принцип работает при выборе столбцов указанного типа."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":424},"id":"tbL3f9OD9Tg7","executionInfo":{"status":"ok","timestamp":1633609638538,"user_tz":-300,"elapsed":120,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"1489c427-200c-45fa-f127-369a97e46ea8"},"source":["df.select_dtypes(include=['object', 'bool']) # exclude"],"execution_count":16,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateInternational planVoice mail plan
0KSNoYes
1OHNoYes
2NJNoNo
3OHYesNo
4OKYesNo
............
3328AZNoYes
3329WVNoNo
3330RINoNo
3331CTYesNo
3332TNNoYes
\n","

3333 rows × 3 columns

\n","
"],"text/plain":[" State International plan Voice mail plan\n","0 KS No Yes\n","1 OH No Yes\n","2 NJ No No\n","3 OH Yes No\n","4 OK Yes No\n","... ... ... ...\n","3328 AZ No Yes\n","3329 WV No No\n","3330 RI No No\n","3331 CT Yes No\n","3332 TN No Yes\n","\n","[3333 rows x 3 columns]"]},"metadata":{},"execution_count":16}]},{"cell_type":"markdown","metadata":{"id":"Ge-uZsFvtU8G"},"source":["Для категориальных (тип `object`) и булевых (тип `bool`) признаков можно воспользоваться методом **`value_counts`**. Посмотрим на распределение нашей целевой переменной — `Churn`:"]},{"cell_type":"code","metadata":{"id":"eeDu-JiYtU8G","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638540,"user_tz":-300,"elapsed":115,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"19761b7d-d89b-49eb-e4bd-371bd68907d7"},"source":["df[\"Churn\"].value_counts()"],"execution_count":17,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 2850\n","1 483\n","Name: Churn, dtype: int64"]},"metadata":{},"execution_count":17}]},{"cell_type":"markdown","metadata":{"id":"KANMt5q2tU8I"},"source":["2850 пользователей из 3333 — лояльные, значение переменной `Churn` у них — `0`.\n","\n","Посмотрим на распределение пользователей по переменной `Area code`. Укажем значение параметра `normalize=True`, чтобы посмотреть не абсолютные частоты, а относительные."]},{"cell_type":"code","metadata":{"id":"pMenDSyHtU8I","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638542,"user_tz":-300,"elapsed":109,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"a99c176c-d2b0-45b9-e54f-653c1f060dd0"},"source":["df[\"Area code\"].value_counts(normalize=True)"],"execution_count":18,"outputs":[{"output_type":"execute_result","data":{"text/plain":["415 0.50\n","510 0.25\n","408 0.25\n","Name: Area code, dtype: float64"]},"metadata":{},"execution_count":18}]},{"cell_type":"markdown","metadata":{"id":"l4ikQZaptU8I"},"source":["### Сортировка\n","\n","`DataFrame` можно отсортировать по значению какого-нибудь из признаков. В нашем случае, например, по `Total day charge` (`ascending=False` для сортировки по убыванию):"]},{"cell_type":"code","metadata":{"id":"GrbzIXBQtU8J","colab":{"base_uri":"https://localhost:8080/","height":241},"executionInfo":{"status":"ok","timestamp":1633609638544,"user_tz":-300,"elapsed":102,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"7cf76892-8c0d-42fa-fa98-aa49f8c2ab6e"},"source":["df.sort_values(by=\"Total day charge\", ascending=False).head()"],"execution_count":19,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
365CO154415NoNo0350.87559.64216.59418.40253.910011.4310.192.7311
985NY64415YesNo0346.85558.96249.57921.21275.410212.3913.393.5911
2594OH115510YesNo0345.38158.70203.410617.29217.51079.7911.883.1911
156OH83415NoNo0337.412057.36227.411619.33153.91146.9315.874.2701
605MO112415NoNo0335.57757.04212.510918.06265.013211.9312.783.4321
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","365 CO 154 415 No No \n","985 NY 64 415 Yes No \n","2594 OH 115 510 Yes No \n","156 OH 83 415 No No \n","605 MO 112 415 No No \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","365 0 350.8 75 \n","985 0 346.8 55 \n","2594 0 345.3 81 \n","156 0 337.4 120 \n","605 0 335.5 77 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","365 59.64 216.5 94 18.40 \n","985 58.96 249.5 79 21.21 \n","2594 58.70 203.4 106 17.29 \n","156 57.36 227.4 116 19.33 \n","605 57.04 212.5 109 18.06 \n","\n"," Total night minutes Total night calls Total night charge \\\n","365 253.9 100 11.43 \n","985 275.4 102 12.39 \n","2594 217.5 107 9.79 \n","156 153.9 114 6.93 \n","605 265.0 132 11.93 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","365 10.1 9 2.73 \n","985 13.3 9 3.59 \n","2594 11.8 8 3.19 \n","156 15.8 7 4.27 \n","605 12.7 8 3.43 \n","\n"," Customer service calls Churn \n","365 1 1 \n","985 1 1 \n","2594 1 1 \n","156 0 1 \n","605 2 1 "]},"metadata":{},"execution_count":19}]},{"cell_type":"markdown","metadata":{"id":"apUOhvc_tU8J"},"source":["Сортировать можно и по группе столбцов:"]},{"cell_type":"code","metadata":{"id":"KUU1Xp63tU8K","colab":{"base_uri":"https://localhost:8080/","height":241},"executionInfo":{"status":"ok","timestamp":1633609638545,"user_tz":-300,"elapsed":100,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"0bbacb6a-bbf7-4697-b720-20033f341ff3"},"source":["df.sort_values(by=[\"Churn\", \"Total day charge\"], ascending=[True, False]).head()"],"execution_count":20,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
688MN13510NoYes21315.610553.65208.97117.76260.112311.7012.133.2730
2259NC210415NoYes31313.88753.35147.710312.55192.7978.6710.172.7330
534LA67510NoNo0310.49752.7766.51235.65246.59911.099.2102.4840
575SD114415NoYes36309.99052.68200.38917.03183.51058.2614.223.8310
2858AL141510NoYes28308.012352.36247.812821.06152.91036.887.432.0010
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","688 MN 13 510 No Yes \n","2259 NC 210 415 No Yes \n","534 LA 67 510 No No \n","575 SD 114 415 No Yes \n","2858 AL 141 510 No Yes \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","688 21 315.6 105 \n","2259 31 313.8 87 \n","534 0 310.4 97 \n","575 36 309.9 90 \n","2858 28 308.0 123 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","688 53.65 208.9 71 17.76 \n","2259 53.35 147.7 103 12.55 \n","534 52.77 66.5 123 5.65 \n","575 52.68 200.3 89 17.03 \n","2858 52.36 247.8 128 21.06 \n","\n"," Total night minutes Total night calls Total night charge \\\n","688 260.1 123 11.70 \n","2259 192.7 97 8.67 \n","534 246.5 99 11.09 \n","575 183.5 105 8.26 \n","2858 152.9 103 6.88 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","688 12.1 3 3.27 \n","2259 10.1 7 2.73 \n","534 9.2 10 2.48 \n","575 14.2 2 3.83 \n","2858 7.4 3 2.00 \n","\n"," Customer service calls Churn \n","688 3 0 \n","2259 3 0 \n","534 4 0 \n","575 1 0 \n","2858 1 0 "]},"metadata":{},"execution_count":20}]},{"cell_type":"markdown","metadata":{"id":"VCTKeJUYtU8L"},"source":["### Индексация и извлечение данных"]},{"cell_type":"markdown","metadata":{"id":"lveNXBbztU8L"},"source":["`DataFrame` можно индексировать по-разному. В связи с этим рассмотрим различные способы индексации и извлечения нужных нам данных из датафрейма на примере простых вопросов.\n","\n","Для извлечения отдельного столбца можно использовать конструкцию вида `DataFrame['Name']`. Воспользуемся этим для ответа на вопрос: **какова доля нелояльных пользователей в нашем датафрейме?**"]},{"cell_type":"code","metadata":{"id":"FLaA5u1ztU8L","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638547,"user_tz":-300,"elapsed":98,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"d1b61bde-7b0c-45d0-c2e9-32d9bb9539c0"},"source":["df[\"Churn\"].mean()"],"execution_count":21,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.14491449144914492"]},"metadata":{},"execution_count":21}]},{"cell_type":"markdown","metadata":{"id":"QiJUnpEJtU8M"},"source":["14,5% — довольно плохой показатель для компании, с таким процентом оттока можно и разориться."]},{"cell_type":"markdown","metadata":{"id":"2v6CRyJ3tU8M"},"source":["Очень удобной является логическая индексация `DataFrame` по одному столбцу. Выглядит она следующим образом: `df[P(df['Name'])]`, где `P` - это некоторое логическое условие, проверяемое для каждого элемента столбца `Name`. Итогом такой индексации является `DataFrame`, состоящий только из строк, удовлетворяющих условию `P` по столбцу `Name`. \n","\n","Воспользуемся этим для ответа на вопрос: **каковы средние значения числовых признаков среди нелояльных пользователей?**"]},{"cell_type":"code","metadata":{"scrolled":true,"id":"0G0_4zPytU8O","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638548,"user_tz":-300,"elapsed":90,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"79d763ca-3a4e-4408-f218-e5996dbd68bb"},"source":["df[df[\"Churn\"] == 1].mean()"],"execution_count":22,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Account length 102.66\n","Area code 437.82\n","Number vmail messages 5.12\n","Total day minutes 206.91\n","Total day calls 101.34\n","Total day charge 35.18\n","Total eve minutes 212.41\n","Total eve calls 100.56\n","Total eve charge 18.05\n","Total night minutes 205.23\n","Total night calls 100.40\n","Total night charge 9.24\n","Total intl minutes 10.70\n","Total intl calls 4.16\n","Total intl charge 2.89\n","Customer service calls 2.23\n","Churn 1.00\n","dtype: float64"]},"metadata":{},"execution_count":22}]},{"cell_type":"markdown","metadata":{"id":"vX7Kv82ztU8O"},"source":["Скомбинировав предыдущие два вида индексации, ответим на вопрос: **сколько в среднем в течение дня разговаривают по телефону нелояльные пользователи**?"]},{"cell_type":"code","metadata":{"id":"ZmpzMz9LtU8O","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638551,"user_tz":-300,"elapsed":87,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"f4ef2f49-5d18-4228-b513-96402e23b1b4"},"source":["df[df[\"Churn\"] == 1][\"Total day minutes\"].mean()"],"execution_count":23,"outputs":[{"output_type":"execute_result","data":{"text/plain":["206.91407867494814"]},"metadata":{},"execution_count":23}]},{"cell_type":"markdown","metadata":{"id":"rME2EKe8tU8P"},"source":["**Какова максимальная длина международных звонков среди лояльных пользователей (`Churn == 0`), не пользующихся услугой международного роуминга (`'International plan' == 'No'`)?**"]},{"cell_type":"code","metadata":{"id":"DQ0H-bJttU8Q","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638552,"user_tz":-300,"elapsed":82,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"3c8a6304-7ede-495b-f2cc-dcf70beb252f"},"source":["df[(df[\"Churn\"] == 0) & (df[\"International plan\"] == \"No\")][\"Total intl minutes\"].max()"],"execution_count":24,"outputs":[{"output_type":"execute_result","data":{"text/plain":["18.9"]},"metadata":{},"execution_count":24}]},{"cell_type":"markdown","metadata":{"id":"f6IelrO4tU8Q"},"source":["Датафреймы можно индексировать как по названию столбца или строки, так и по порядковому номеру. Для индексации **по названию** используется метод **`loc`**, **по номеру** — **`iloc`**.\n","\n","В первом случае мы говорим _«передай нам значения для id строк от 0 до 5 и для столбцов от State до Area code»_, а во втором — _«передай нам значения первых пяти строк в первых трёх столбцах»_. \n","\n","В случае `iloc` срез работает как обычно, однако в случае `loc` учитываются и начало, и конец среза. Да, неудобно, да, вызывает путаницу."]},{"cell_type":"code","metadata":{"scrolled":true,"id":"Pp82lj7ktU8R","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1633609638554,"user_tz":-300,"elapsed":78,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"8e2a9392-b3f0-44ee-e383-b19a46f8d708"},"source":["d = df.copy()\n","d = d.drop_duplicates('State')\n","d = d.set_index('State')\n","# d = d.reset_index() # сбрасываем столбец-индекс не удаляя его\n","d = d.reset_index(drop=True) # сбрасываем столбец-индекс удаляя его\n","d\n","# d.loc['KS':'OK','Area code':'Total day minutes']"],"execution_count":25,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Account lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
0128415NoYes25265.111045.07197.49916.78244.79111.0110.032.7010
1107415NoYes26161.612327.47195.510316.62254.410311.4513.733.7010
2137415NoNo0243.411441.38121.211010.30162.61047.3212.253.2900
375415YesNo0166.711328.34148.312212.61186.91218.4110.132.7330
4118510YesNo0223.49837.98220.610118.75203.91189.186.361.7000
5121510NoYes24218.28837.09348.510829.62212.61189.577.572.0330
6147415YesNo0157.07926.69103.1948.76211.8969.537.161.9200
7117408NoNo0184.59731.37351.68029.89215.8909.718.742.3510
8141415YesYes37258.68443.96222.011118.87326.49714.6911.253.0200
965415NoNo0129.113721.95228.58319.42208.81119.4012.763.4341
1074415NoNo0187.712731.91163.414813.89196.0948.829.152.4600
11168408NoNo0128.89621.90104.9718.92141.11286.3511.223.0210
1295510NoNo0156.68826.62247.67521.05192.31158.6512.353.3230
13161415NoNo0332.96756.59317.89727.01160.61287.235.491.4641
1485408NoYes27196.413933.39280.99023.8889.3754.0213.843.7310
1593510NoNo0190.711432.42218.211118.55129.61215.838.132.1930
1676510NoYes33189.76632.25212.86518.09165.71087.4610.052.7010
1773415NoNo0224.49038.15159.58813.56192.8748.6813.023.5110
18147415NoNo0155.111726.37239.79320.37208.81339.4010.642.8600
1977408NoNo062.48910.61169.912114.44209.6649.435.761.5451
20130415NoNo0183.011231.1172.9996.20181.8788.189.5192.5700
21111415NoNo0110.410318.77137.310211.67189.61058.537.762.0820
22174415NoNo0124.37621.13277.111223.55250.711511.2815.554.1930
2357408NoYes39213.011536.21191.111216.24182.71158.229.532.5700
2449510NoNo0119.311720.28215.110918.28178.7908.0411.113.0010
25142415NoNo084.89514.42136.76311.62250.514811.2714.263.8320
2675510NoNo0226.110538.44201.510717.13246.29811.0810.352.7810
2772415NoYes37220.08037.40217.310218.47152.8716.8814.763.9730
2836408NoYes30146.312824.87162.58013.81129.31095.8214.563.9200
29135408YesYes41173.18529.43203.910717.33122.2785.5014.6153.9401
3034510NoNo0124.88221.22282.29823.99311.57814.0210.042.7020
3164510NoNo0154.06726.18225.811819.19265.38611.943.530.9510
3259408NoYes28120.99720.55213.09218.11163.11167.348.552.3020
3365415NoNo0211.312035.92162.612213.82134.71186.0613.253.5630
34142408NoNo0187.013331.79134.67411.44242.212710.907.452.0020
3596415NoNo0160.211727.23267.56722.74228.56810.289.352.5120
36116415NoYes34268.68345.66178.214215.15166.31067.4811.633.1320
3774510NoYes33193.79132.93246.19620.92138.0926.2114.633.9420
38149408NoYes28180.79230.72187.86415.96265.55311.9512.633.4030
3938408NoNo0131.29822.30162.99713.85159.01067.158.262.2120
4040415NoYes41148.17425.18169.58814.41214.11029.636.251.6720
41147510NoNo0248.68342.26148.98512.66172.51097.768.042.1630
4290415NoNo0203.414634.58226.711719.27152.41056.867.341.9710
4382415NoNo0300.310951.05181.010015.39270.17312.1511.743.1601
4474415NoYes35154.110426.20123.48410.49202.1579.0910.992.9420
4578415NoNo0252.99342.99178.411215.16263.910511.889.572.5730
46120408NoNo0212.113136.06209.410417.80167.2967.525.351.4311
4778415NoNo0149.711925.45182.211515.49261.512611.779.782.6200
4882415NoYes24155.213126.38244.510620.78122.4685.5110.732.8910
49199415NoYes34230.612139.20219.49918.65299.39413.478.022.1600
5079408NoNo0205.712334.97214.510818.23226.110610.176.7181.8110
\n","
"],"text/plain":[" Account length Area code International plan Voice mail plan \\\n","0 128 415 No Yes \n","1 107 415 No Yes \n","2 137 415 No No \n","3 75 415 Yes No \n","4 118 510 Yes No \n","5 121 510 No Yes \n","6 147 415 Yes No \n","7 117 408 No No \n","8 141 415 Yes Yes \n","9 65 415 No No \n","10 74 415 No No \n","11 168 408 No No \n","12 95 510 No No \n","13 161 415 No No \n","14 85 408 No Yes \n","15 93 510 No No \n","16 76 510 No Yes \n","17 73 415 No No \n","18 147 415 No No \n","19 77 408 No No \n","20 130 415 No No \n","21 111 415 No No \n","22 174 415 No No \n","23 57 408 No Yes \n","24 49 510 No No \n","25 142 415 No No \n","26 75 510 No No \n","27 72 415 No Yes \n","28 36 408 No Yes \n","29 135 408 Yes Yes \n","30 34 510 No No \n","31 64 510 No No \n","32 59 408 No Yes \n","33 65 415 No No \n","34 142 408 No No \n","35 96 415 No No \n","36 116 415 No Yes \n","37 74 510 No Yes \n","38 149 408 No Yes \n","39 38 408 No No \n","40 40 415 No Yes \n","41 147 510 No No \n","42 90 415 No No \n","43 82 415 No No \n","44 74 415 No Yes \n","45 78 415 No No \n","46 120 408 No No \n","47 78 415 No No \n","48 82 415 No Yes \n","49 199 415 No Yes \n","50 79 408 No No \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","1 26 161.6 123 \n","2 0 243.4 114 \n","3 0 166.7 113 \n","4 0 223.4 98 \n","5 24 218.2 88 \n","6 0 157.0 79 \n","7 0 184.5 97 \n","8 37 258.6 84 \n","9 0 129.1 137 \n","10 0 187.7 127 \n","11 0 128.8 96 \n","12 0 156.6 88 \n","13 0 332.9 67 \n","14 27 196.4 139 \n","15 0 190.7 114 \n","16 33 189.7 66 \n","17 0 224.4 90 \n","18 0 155.1 117 \n","19 0 62.4 89 \n","20 0 183.0 112 \n","21 0 110.4 103 \n","22 0 124.3 76 \n","23 39 213.0 115 \n","24 0 119.3 117 \n","25 0 84.8 95 \n","26 0 226.1 105 \n","27 37 220.0 80 \n","28 30 146.3 128 \n","29 41 173.1 85 \n","30 0 124.8 82 \n","31 0 154.0 67 \n","32 28 120.9 97 \n","33 0 211.3 120 \n","34 0 187.0 133 \n","35 0 160.2 117 \n","36 34 268.6 83 \n","37 33 193.7 91 \n","38 28 180.7 92 \n","39 0 131.2 98 \n","40 41 148.1 74 \n","41 0 248.6 83 \n","42 0 203.4 146 \n","43 0 300.3 109 \n","44 35 154.1 104 \n","45 0 252.9 93 \n","46 0 212.1 131 \n","47 0 149.7 119 \n","48 24 155.2 131 \n","49 34 230.6 121 \n","50 0 205.7 123 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","1 27.47 195.5 103 16.62 \n","2 41.38 121.2 110 10.30 \n","3 28.34 148.3 122 12.61 \n","4 37.98 220.6 101 18.75 \n","5 37.09 348.5 108 29.62 \n","6 26.69 103.1 94 8.76 \n","7 31.37 351.6 80 29.89 \n","8 43.96 222.0 111 18.87 \n","9 21.95 228.5 83 19.42 \n","10 31.91 163.4 148 13.89 \n","11 21.90 104.9 71 8.92 \n","12 26.62 247.6 75 21.05 \n","13 56.59 317.8 97 27.01 \n","14 33.39 280.9 90 23.88 \n","15 32.42 218.2 111 18.55 \n","16 32.25 212.8 65 18.09 \n","17 38.15 159.5 88 13.56 \n","18 26.37 239.7 93 20.37 \n","19 10.61 169.9 121 14.44 \n","20 31.11 72.9 99 6.20 \n","21 18.77 137.3 102 11.67 \n","22 21.13 277.1 112 23.55 \n","23 36.21 191.1 112 16.24 \n","24 20.28 215.1 109 18.28 \n","25 14.42 136.7 63 11.62 \n","26 38.44 201.5 107 17.13 \n","27 37.40 217.3 102 18.47 \n","28 24.87 162.5 80 13.81 \n","29 29.43 203.9 107 17.33 \n","30 21.22 282.2 98 23.99 \n","31 26.18 225.8 118 19.19 \n","32 20.55 213.0 92 18.11 \n","33 35.92 162.6 122 13.82 \n","34 31.79 134.6 74 11.44 \n","35 27.23 267.5 67 22.74 \n","36 45.66 178.2 142 15.15 \n","37 32.93 246.1 96 20.92 \n","38 30.72 187.8 64 15.96 \n","39 22.30 162.9 97 13.85 \n","40 25.18 169.5 88 14.41 \n","41 42.26 148.9 85 12.66 \n","42 34.58 226.7 117 19.27 \n","43 51.05 181.0 100 15.39 \n","44 26.20 123.4 84 10.49 \n","45 42.99 178.4 112 15.16 \n","46 36.06 209.4 104 17.80 \n","47 25.45 182.2 115 15.49 \n","48 26.38 244.5 106 20.78 \n","49 39.20 219.4 99 18.65 \n","50 34.97 214.5 108 18.23 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","1 254.4 103 11.45 \n","2 162.6 104 7.32 \n","3 186.9 121 8.41 \n","4 203.9 118 9.18 \n","5 212.6 118 9.57 \n","6 211.8 96 9.53 \n","7 215.8 90 9.71 \n","8 326.4 97 14.69 \n","9 208.8 111 9.40 \n","10 196.0 94 8.82 \n","11 141.1 128 6.35 \n","12 192.3 115 8.65 \n","13 160.6 128 7.23 \n","14 89.3 75 4.02 \n","15 129.6 121 5.83 \n","16 165.7 108 7.46 \n","17 192.8 74 8.68 \n","18 208.8 133 9.40 \n","19 209.6 64 9.43 \n","20 181.8 78 8.18 \n","21 189.6 105 8.53 \n","22 250.7 115 11.28 \n","23 182.7 115 8.22 \n","24 178.7 90 8.04 \n","25 250.5 148 11.27 \n","26 246.2 98 11.08 \n","27 152.8 71 6.88 \n","28 129.3 109 5.82 \n","29 122.2 78 5.50 \n","30 311.5 78 14.02 \n","31 265.3 86 11.94 \n","32 163.1 116 7.34 \n","33 134.7 118 6.06 \n","34 242.2 127 10.90 \n","35 228.5 68 10.28 \n","36 166.3 106 7.48 \n","37 138.0 92 6.21 \n","38 265.5 53 11.95 \n","39 159.0 106 7.15 \n","40 214.1 102 9.63 \n","41 172.5 109 7.76 \n","42 152.4 105 6.86 \n","43 270.1 73 12.15 \n","44 202.1 57 9.09 \n","45 263.9 105 11.88 \n","46 167.2 96 7.52 \n","47 261.5 126 11.77 \n","48 122.4 68 5.51 \n","49 299.3 94 13.47 \n","50 226.1 106 10.17 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","1 13.7 3 3.70 \n","2 12.2 5 3.29 \n","3 10.1 3 2.73 \n","4 6.3 6 1.70 \n","5 7.5 7 2.03 \n","6 7.1 6 1.92 \n","7 8.7 4 2.35 \n","8 11.2 5 3.02 \n","9 12.7 6 3.43 \n","10 9.1 5 2.46 \n","11 11.2 2 3.02 \n","12 12.3 5 3.32 \n","13 5.4 9 1.46 \n","14 13.8 4 3.73 \n","15 8.1 3 2.19 \n","16 10.0 5 2.70 \n","17 13.0 2 3.51 \n","18 10.6 4 2.86 \n","19 5.7 6 1.54 \n","20 9.5 19 2.57 \n","21 7.7 6 2.08 \n","22 15.5 5 4.19 \n","23 9.5 3 2.57 \n","24 11.1 1 3.00 \n","25 14.2 6 3.83 \n","26 10.3 5 2.78 \n","27 14.7 6 3.97 \n","28 14.5 6 3.92 \n","29 14.6 15 3.94 \n","30 10.0 4 2.70 \n","31 3.5 3 0.95 \n","32 8.5 5 2.30 \n","33 13.2 5 3.56 \n","34 7.4 5 2.00 \n","35 9.3 5 2.51 \n","36 11.6 3 3.13 \n","37 14.6 3 3.94 \n","38 12.6 3 3.40 \n","39 8.2 6 2.21 \n","40 6.2 5 1.67 \n","41 8.0 4 2.16 \n","42 7.3 4 1.97 \n","43 11.7 4 3.16 \n","44 10.9 9 2.94 \n","45 9.5 7 2.57 \n","46 5.3 5 1.43 \n","47 9.7 8 2.62 \n","48 10.7 3 2.89 \n","49 8.0 2 2.16 \n","50 6.7 18 1.81 \n","\n"," Customer service calls Churn \n","0 1 0 \n","1 1 0 \n","2 0 0 \n","3 3 0 \n","4 0 0 \n","5 3 0 \n","6 0 0 \n","7 1 0 \n","8 0 0 \n","9 4 1 \n","10 0 0 \n","11 1 0 \n","12 3 0 \n","13 4 1 \n","14 1 0 \n","15 3 0 \n","16 1 0 \n","17 1 0 \n","18 0 0 \n","19 5 1 \n","20 0 0 \n","21 2 0 \n","22 3 0 \n","23 0 0 \n","24 1 0 \n","25 2 0 \n","26 1 0 \n","27 3 0 \n","28 0 0 \n","29 0 1 \n","30 2 0 \n","31 1 0 \n","32 2 0 \n","33 3 0 \n","34 2 0 \n","35 2 0 \n","36 2 0 \n","37 2 0 \n","38 3 0 \n","39 2 0 \n","40 2 0 \n","41 3 0 \n","42 1 0 \n","43 0 1 \n","44 2 0 \n","45 3 0 \n","46 1 1 \n","47 0 0 \n","48 1 0 \n","49 0 0 \n","50 1 0 "]},"metadata":{},"execution_count":25}]},{"cell_type":"code","metadata":{"scrolled":true,"id":"qGN5gaALtU8R","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1633609638556,"user_tz":-300,"elapsed":75,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"1f803cde-6306-4ebe-cd3a-1cb7ad85010e"},"source":["df.iloc[0:5, 0:3]"],"execution_count":26,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea code
0KS128415
1OH107415
2NJ137415
3OH84408
4OK75415
\n","
"],"text/plain":[" State Account length Area code\n","0 KS 128 415\n","1 OH 107 415\n","2 NJ 137 415\n","3 OH 84 408\n","4 OK 75 415"]},"metadata":{},"execution_count":26}]},{"cell_type":"markdown","metadata":{"id":"UCMKdcx9tU8S"},"source":["Метод `ix` индексирует и по названию, и по номеру, но он вызывает путаницу, и поэтому был объявлен устаревшим (deprecated)."]},{"cell_type":"markdown","metadata":{"id":"HnMAXWTAtU8S"},"source":["Если нам нужна первая или последняя строчка датафрейма, пользуемся конструкцией `df[:1]` или `df[-1:]`:"]},{"cell_type":"code","metadata":{"scrolled":true,"id":"OrwoqAGPtU8U","colab":{"base_uri":"https://localhost:8080/","height":115},"executionInfo":{"status":"ok","timestamp":1633609638558,"user_tz":-300,"elapsed":74,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"53bc6332-8c03-4b98-9335-295812d859cd"},"source":["df[-1:]"],"execution_count":27,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
3332TN74415NoYes25234.411339.85265.98222.6241.47710.8613.743.700
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","3332 TN 74 415 No Yes \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","3332 25 234.4 113 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","3332 39.85 265.9 82 22.6 \n","\n"," Total night minutes Total night calls Total night charge \\\n","3332 241.4 77 10.86 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","3332 13.7 4 3.7 \n","\n"," Customer service calls Churn \n","3332 0 0 "]},"metadata":{},"execution_count":27}]},{"cell_type":"markdown","metadata":{"id":"Ur_--vTVtU8W"},"source":["### Применение функций: `apply`, `map` и др."]},{"cell_type":"markdown","metadata":{"id":"da6UVfVjtU8W"},"source":["**Применение функции к каждому столбцу:**"]},{"cell_type":"code","metadata":{"id":"LIlX4ORVtU8W","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638559,"user_tz":-300,"elapsed":71,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"315e42ff-9efa-4fa9-e41f-cba08a9534d2"},"source":["df.apply(np.max)"],"execution_count":28,"outputs":[{"output_type":"execute_result","data":{"text/plain":["State WY\n","Account length 243\n","Area code 510\n","International plan Yes\n","Voice mail plan Yes\n","Number vmail messages 51\n","Total day minutes 3.5e+02\n","Total day calls 165\n","Total day charge 60\n","Total eve minutes 3.6e+02\n","Total eve calls 170\n","Total eve charge 31\n","Total night minutes 4e+02\n","Total night calls 175\n","Total night charge 18\n","Total intl minutes 20\n","Total intl calls 20\n","Total intl charge 5.4\n","Customer service calls 9\n","Churn 1\n","dtype: object"]},"metadata":{},"execution_count":28}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":478},"id":"p-mUIP9HQakx","executionInfo":{"status":"ok","timestamp":1633609638561,"user_tz":-300,"elapsed":66,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"bbe218f1-eddb-4ff4-eb09-6a148e04a29c"},"source":["def make_feature(row):\n"," if row['Voice mail plan'] == 'Yes':\n"," return row['Number vmail messages'] * 4\n"," return row['Number vmail messages'] + 4\n","df['new_Number_vmail_messages'] = df.apply(make_feature, axis=1)\n","df"],"execution_count":29,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415NoYes25265.111045.07197.49916.78244.79111.0110.032.7010100
1OH107415NoYes26161.612327.47195.510316.62254.410311.4513.733.7010104
2NJ137415NoNo0243.411441.38121.211010.30162.61047.3212.253.29004
3OH84408YesNo0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415YesNo0166.711328.34148.312212.61186.91218.4110.132.73304
..................................................................
3328AZ192415NoYes36156.27726.55215.512618.32279.18312.569.962.6720144
3329WV68415NoNo0231.15739.29153.45513.04191.31238.619.642.59304
3330RI28510NoNo0180.810930.74288.85824.55191.9918.6414.163.81204
3331CT184510YesNo0213.810536.35159.68413.57139.21376.265.0101.35204
3332TN74415NoYes25234.411339.85265.98222.60241.47710.8613.743.7000100
\n","

3333 rows × 21 columns

\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","0 KS 128 415 No Yes \n","1 OH 107 415 No Yes \n","2 NJ 137 415 No No \n","3 OH 84 408 Yes No \n","4 OK 75 415 Yes No \n","... ... ... ... ... ... \n","3328 AZ 192 415 No Yes \n","3329 WV 68 415 No No \n","3330 RI 28 510 No No \n","3331 CT 184 510 Yes No \n","3332 TN 74 415 No Yes \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","1 26 161.6 123 \n","2 0 243.4 114 \n","3 0 299.4 71 \n","4 0 166.7 113 \n","... ... ... ... \n","3328 36 156.2 77 \n","3329 0 231.1 57 \n","3330 0 180.8 109 \n","3331 0 213.8 105 \n","3332 25 234.4 113 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","1 27.47 195.5 103 16.62 \n","2 41.38 121.2 110 10.30 \n","3 50.90 61.9 88 5.26 \n","4 28.34 148.3 122 12.61 \n","... ... ... ... ... \n","3328 26.55 215.5 126 18.32 \n","3329 39.29 153.4 55 13.04 \n","3330 30.74 288.8 58 24.55 \n","3331 36.35 159.6 84 13.57 \n","3332 39.85 265.9 82 22.60 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","1 254.4 103 11.45 \n","2 162.6 104 7.32 \n","3 196.9 89 8.86 \n","4 186.9 121 8.41 \n","... ... ... ... \n","3328 279.1 83 12.56 \n","3329 191.3 123 8.61 \n","3330 191.9 91 8.64 \n","3331 139.2 137 6.26 \n","3332 241.4 77 10.86 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","1 13.7 3 3.70 \n","2 12.2 5 3.29 \n","3 6.6 7 1.78 \n","4 10.1 3 2.73 \n","... ... ... ... \n","3328 9.9 6 2.67 \n","3329 9.6 4 2.59 \n","3330 14.1 6 3.81 \n","3331 5.0 10 1.35 \n","3332 13.7 4 3.70 \n","\n"," Customer service calls Churn new_Number_vmail_messages \n","0 1 0 100 \n","1 1 0 104 \n","2 0 0 4 \n","3 2 0 4 \n","4 3 0 4 \n","... ... ... ... \n","3328 2 0 144 \n","3329 3 0 4 \n","3330 2 0 4 \n","3331 2 0 4 \n","3332 0 0 100 \n","\n","[3333 rows x 21 columns]"]},"metadata":{},"execution_count":29}]},{"cell_type":"markdown","metadata":{"id":"j2fEIU5ptU8Y"},"source":["Метод `apply` можно использовать и для того, чтобы применить функцию к каждой строке. Для этого нужно указать `axis=1`."]},{"cell_type":"markdown","metadata":{"id":"e-TxyhUttU8Y"},"source":["**Применение функции к каждой ячейке столбца**\n","\n","Допустим, по какой-то причине нас интересуют все люди из штатов, названия которых начинаются на 'W'. В данному случае это можно сделать по-разному, но наибольшую свободу дает связка `apply`-`lambda` – применение функции ко всем значениям в столбце."]},{"cell_type":"code","metadata":{"scrolled":false,"id":"-jnLxPnWtU8Z","colab":{"base_uri":"https://localhost:8080/","height":261},"executionInfo":{"status":"ok","timestamp":1633609638563,"user_tz":-300,"elapsed":65,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"bcab7df7-ed63-45c1-816b-ec70b6e4fa7b"},"source":["df[df[\"State\"].apply(lambda state: state[0] == \"W\")].head()"],"execution_count":30,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
9WV141415YesYes37258.68443.96222.011118.87326.49714.6911.253.0200148
26WY57408NoYes39213.011536.21191.111216.24182.71158.229.532.5700156
44WI64510NoNo0154.06726.18225.811819.19265.38611.943.530.95104
49WY97415NoYes24133.213522.64217.25818.4670.6793.1811.032.971096
54WY87415NoNo0151.08325.67219.711618.67203.91279.189.732.62514
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","9 WV 141 415 Yes Yes \n","26 WY 57 408 No Yes \n","44 WI 64 510 No No \n","49 WY 97 415 No Yes \n","54 WY 87 415 No No \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","9 37 258.6 84 \n","26 39 213.0 115 \n","44 0 154.0 67 \n","49 24 133.2 135 \n","54 0 151.0 83 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","9 43.96 222.0 111 18.87 \n","26 36.21 191.1 112 16.24 \n","44 26.18 225.8 118 19.19 \n","49 22.64 217.2 58 18.46 \n","54 25.67 219.7 116 18.67 \n","\n"," Total night minutes Total night calls Total night charge \\\n","9 326.4 97 14.69 \n","26 182.7 115 8.22 \n","44 265.3 86 11.94 \n","49 70.6 79 3.18 \n","54 203.9 127 9.18 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","9 11.2 5 3.02 \n","26 9.5 3 2.57 \n","44 3.5 3 0.95 \n","49 11.0 3 2.97 \n","54 9.7 3 2.62 \n","\n"," Customer service calls Churn new_Number_vmail_messages \n","9 0 0 148 \n","26 0 0 156 \n","44 1 0 4 \n","49 1 0 96 \n","54 5 1 4 "]},"metadata":{},"execution_count":30}]},{"cell_type":"markdown","metadata":{"id":"q6SkeDiJtU8Z"},"source":["Метод `map` можно использовать и для **замены значений в колонке**, передав ему в качестве аргумента словарь вида `{old_value: new_value}`:"]},{"cell_type":"code","metadata":{"id":"q3lbm6XXtU8a","colab":{"base_uri":"https://localhost:8080/","height":261},"executionInfo":{"status":"ok","timestamp":1633609638564,"user_tz":-300,"elapsed":63,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"10f505a4-3227-4ff5-b868-1efaadf4a181"},"source":["d = {\"No\": False, \"Yes\": True}\n","df[\"International plan\"] = df[\"International plan\"].map(d)\n","df.head()"],"execution_count":31,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415FalseYes25265.111045.07197.49916.78244.79111.0110.032.7010100
1OH107415FalseYes26161.612327.47195.510316.62254.410311.4513.733.7010104
2NJ137415FalseNo0243.411441.38121.211010.30162.61047.3212.253.29004
3OH84408TrueNo0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415TrueNo0166.711328.34148.312212.61186.91218.4110.132.73304
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","0 KS 128 415 False Yes \n","1 OH 107 415 False Yes \n","2 NJ 137 415 False No \n","3 OH 84 408 True No \n","4 OK 75 415 True No \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","1 26 161.6 123 \n","2 0 243.4 114 \n","3 0 299.4 71 \n","4 0 166.7 113 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","1 27.47 195.5 103 16.62 \n","2 41.38 121.2 110 10.30 \n","3 50.90 61.9 88 5.26 \n","4 28.34 148.3 122 12.61 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","1 254.4 103 11.45 \n","2 162.6 104 7.32 \n","3 196.9 89 8.86 \n","4 186.9 121 8.41 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","1 13.7 3 3.70 \n","2 12.2 5 3.29 \n","3 6.6 7 1.78 \n","4 10.1 3 2.73 \n","\n"," Customer service calls Churn new_Number_vmail_messages \n","0 1 0 100 \n","1 1 0 104 \n","2 0 0 4 \n","3 2 0 4 \n","4 3 0 4 "]},"metadata":{},"execution_count":31}]},{"cell_type":"markdown","metadata":{"id":"YkK8_gEBtU8b"},"source":["Аналогичную операцию можно провернуть с помощью метода `replace`:"]},{"cell_type":"code","metadata":{"id":"xop7OSmZtU8b","colab":{"base_uri":"https://localhost:8080/","height":261},"executionInfo":{"status":"ok","timestamp":1633609639156,"user_tz":-300,"elapsed":653,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"16d543f3-5373-45d1-cb55-b69355e6a5cb"},"source":["df = df.replace({\"Voice mail plan\": d})\n","df.head()"],"execution_count":32,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.7010100
1OH107415FalseTrue26161.612327.47195.510316.62254.410311.4513.733.7010104
2NJ137415FalseFalse0243.411441.38121.211010.30162.61047.3212.253.29004
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.73304
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","0 KS 128 415 False True \n","1 OH 107 415 False True \n","2 NJ 137 415 False False \n","3 OH 84 408 True False \n","4 OK 75 415 True False \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","1 26 161.6 123 \n","2 0 243.4 114 \n","3 0 299.4 71 \n","4 0 166.7 113 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","1 27.47 195.5 103 16.62 \n","2 41.38 121.2 110 10.30 \n","3 50.90 61.9 88 5.26 \n","4 28.34 148.3 122 12.61 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","1 254.4 103 11.45 \n","2 162.6 104 7.32 \n","3 196.9 89 8.86 \n","4 186.9 121 8.41 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","1 13.7 3 3.70 \n","2 12.2 5 3.29 \n","3 6.6 7 1.78 \n","4 10.1 3 2.73 \n","\n"," Customer service calls Churn new_Number_vmail_messages \n","0 1 0 100 \n","1 1 0 104 \n","2 0 0 4 \n","3 2 0 4 \n","4 3 0 4 "]},"metadata":{},"execution_count":32}]},{"cell_type":"markdown","metadata":{"id":"sJ9KC2CrtU8d"},"source":["### Группировка данных\n","\n","В общем случае группировка данных в Pandas выглядит следующим образом:\n","\n","```\n","df.groupby(by=grouping_columns)[columns_to_show].function()\n","```\n","\n","1. К датафрейму применяется метод **`groupby`**, который разделяет данные по `grouping_columns` – признаку или набору признаков.\n","3. Индексируем по нужным нам столбцам (`columns_to_show`). \n","2. К полученным группам применяется функция или несколько функций."]},{"cell_type":"markdown","metadata":{"id":"wiHvK8LFtU8d"},"source":["**Группирование данных в зависимости от значения признака `Churn` и вывод статистик по трём столбцам в каждой группе.**"]},{"cell_type":"code","metadata":{"id":"pXrstrQgtU8d","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609639159,"user_tz":-300,"elapsed":99,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"42ed5c1c-65c1-457f-cd03-6a26bb60da9f"},"source":["columns_to_show = [\"Total day minutes\", \"Total eve minutes\", \"Total night minutes\"]\n","\n","df.groupby([\"Churn\"])[columns_to_show].describe(percentiles=[])"],"execution_count":33,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Total day minutesTotal eve minutesTotal night minutes
countmeanstdmin50%maxcountmeanstdmin50%maxcountmeanstdmin50%max
Churn
02850.0175.1850.180.0177.2315.62850.0199.0450.290.0199.6361.82850.0200.1351.1123.2200.25395.0
1483.0206.9169.000.0217.6350.8483.0212.4151.7370.9211.3363.7483.0205.2347.1347.4204.80354.9
\n","
"],"text/plain":[" Total day minutes Total eve minutes \\\n"," count mean std min 50% max count \n","Churn \n","0 2850.0 175.18 50.18 0.0 177.2 315.6 2850.0 \n","1 483.0 206.91 69.00 0.0 217.6 350.8 483.0 \n","\n"," Total night minutes \\\n"," mean std min 50% max count mean std \n","Churn \n","0 199.04 50.29 0.0 199.6 361.8 2850.0 200.13 51.11 \n","1 212.41 51.73 70.9 211.3 363.7 483.0 205.23 47.13 \n","\n"," \n"," min 50% max \n","Churn \n","0 23.2 200.25 395.0 \n","1 47.4 204.80 354.9 "]},"metadata":{},"execution_count":33}]},{"cell_type":"markdown","metadata":{"id":"_EZVRvNptU8d"},"source":["Сделаем то же самое, но немного по-другому, передав в `agg` список функций:"]},{"cell_type":"code","metadata":{"id":"9x5emqSwtU8e","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609639161,"user_tz":-300,"elapsed":97,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"9e81a4db-b89a-4e06-d928-70be8f26fdc5"},"source":["columns_to_show = [\"Total day minutes\", \"Total eve minutes\", \"Total night minutes\"]\n","\n","df.groupby([\"Churn\"])[columns_to_show].agg([np.mean, np.std, np.min, np.max])"],"execution_count":34,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Total day minutesTotal eve minutesTotal night minutes
meanstdaminamaxmeanstdaminamaxmeanstdaminamax
Churn
0175.1850.180.0315.6199.0450.290.0361.8200.1351.1123.2395.0
1206.9169.000.0350.8212.4151.7370.9363.7205.2347.1347.4354.9
\n","
"],"text/plain":[" Total day minutes Total eve minutes \\\n"," mean std amin amax mean std amin \n","Churn \n","0 175.18 50.18 0.0 315.6 199.04 50.29 0.0 \n","1 206.91 69.00 0.0 350.8 212.41 51.73 70.9 \n","\n"," Total night minutes \n"," amax mean std amin amax \n","Churn \n","0 361.8 200.13 51.11 23.2 395.0 \n","1 363.7 205.23 47.13 47.4 354.9 "]},"metadata":{},"execution_count":34}]},{"cell_type":"markdown","metadata":{"id":"bMsnErVv_o77"},"source":["Сбрасываем индекс с группирующего поля"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":112},"id":"8HFw5er5_DhM","executionInfo":{"status":"ok","timestamp":1633609639162,"user_tz":-300,"elapsed":93,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"bbdaaf9f-323e-42aa-e086-768f78599e65"},"source":["df.groupby('Churn', as_index=False)['State'].count()"],"execution_count":35,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ChurnState
002850
11483
\n","
"],"text/plain":[" Churn State\n","0 0 2850\n","1 1 483"]},"metadata":{},"execution_count":35}]},{"cell_type":"markdown","metadata":{"id":"mwqgfLAVtU8e"},"source":["### Сводные таблицы"]},{"cell_type":"markdown","metadata":{"id":"gYnSr64ptU8e"},"source":["Допустим, мы хотим посмотреть, как наблюдения в нашей выборке распределены в контексте двух признаков — `Churn` и `Customer service calls`. Для этого мы можем построить **таблицу сопряженности**, воспользовавшись методом **`crosstab`**:"]},{"cell_type":"code","metadata":{"id":"yhgrYerutU8f","colab":{"base_uri":"https://localhost:8080/","height":143},"executionInfo":{"status":"ok","timestamp":1633609639163,"user_tz":-300,"elapsed":91,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"25d6f12a-34f1-4f8b-8f49-d8914548046d"},"source":["pd.crosstab(df[\"Churn\"], df[\"International plan\"])"],"execution_count":36,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
International planFalseTrue
Churn
02664186
1346137
\n","
"],"text/plain":["International plan False True \n","Churn \n","0 2664 186\n","1 346 137"]},"metadata":{},"execution_count":36}]},{"cell_type":"code","metadata":{"scrolled":true,"id":"cR0WankTtU8f","colab":{"base_uri":"https://localhost:8080/","height":143},"executionInfo":{"status":"ok","timestamp":1633609639165,"user_tz":-300,"elapsed":90,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"a949d3c8-88f3-4f4a-8a67-6208d2c44445"},"source":["pd.crosstab(df[\"Churn\"], df[\"Voice mail plan\"], normalize=True)"],"execution_count":37,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Voice mail planFalseTrue
Churn
00.600.25
10.120.02
\n","
"],"text/plain":["Voice mail plan False True \n","Churn \n","0 0.60 0.25\n","1 0.12 0.02"]},"metadata":{},"execution_count":37}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"P_eONQ24W0aU","executionInfo":{"status":"ok","timestamp":1633609639166,"user_tz":-300,"elapsed":87,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"525dd141-f2d7-47a6-c4ea-6bd3be3386a9"},"source":["df[\"Customer service calls\"].unique()"],"execution_count":38,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([1, 0, 2, 3, 4, 5, 7, 9, 6, 8])"]},"metadata":{},"execution_count":38}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"abLbV3cKARwi","executionInfo":{"status":"ok","timestamp":1633609639167,"user_tz":-300,"elapsed":75,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"c5f227e8-5570-4c48-e7bf-48bb31227734"},"source":["df[\"Customer service calls\"].nunique()"],"execution_count":39,"outputs":[{"output_type":"execute_result","data":{"text/plain":["10"]},"metadata":{},"execution_count":39}]},{"cell_type":"markdown","metadata":{"id":"1F8uRUIwtU8h"},"source":["Мы видим, что большинство пользователей — лояльные и пользуются дополнительными услугами (международного роуминга / голосовой почты)."]},{"cell_type":"markdown","metadata":{"id":"reNYiSlJtU8h"},"source":["Продвинутые пользователи `Excel` наверняка вспомнят о такой фиче, как **сводные таблицы** (`pivot tables`). В `Pandas` за сводные таблицы отвечает метод **`pivot_table`**, который принимает в качестве параметров:\n","\n","* `values` – список переменных, по которым требуется рассчитать нужные статистики,\n","* `index` – список переменных, по которым нужно сгруппировать данные,\n","* `aggfunc` — то, что нам, собственно, нужно посчитать по группам — сумму, среднее, максимум, минимум или что-то ещё.\n","\n","Давайте посмотрим среднее число дневных, вечерних и ночных звонков для разных `Area code`:"]},{"cell_type":"code","metadata":{"scrolled":false,"id":"xabiD5fktU8h","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609639168,"user_tz":-300,"elapsed":68,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"a51980ac-bbb9-44b4-c453-1085d9014a7c"},"source":["df.pivot_table(\n"," [\"Total day calls\", \"Total eve calls\", \"Total night calls\"],\n"," [\"Area code\"],\n"," aggfunc=\"mean\",\n",").head(10)"],"execution_count":40,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Total day callsTotal eve callsTotal night calls
Area code
408100.5099.7999.04
415100.58100.50100.40
510100.1099.67100.60
\n","
"],"text/plain":[" Total day calls Total eve calls Total night calls\n","Area code \n","408 100.50 99.79 99.04\n","415 100.58 100.50 100.40\n","510 100.10 99.67 100.60"]},"metadata":{},"execution_count":40}]},{"cell_type":"markdown","metadata":{"id":"0_haYJdjtU8h"},"source":["### Преобразование датафреймов\n","\n","Как и многие другие вещи, добавлять столбцы в `DataFrame` можно несколькими способами."]},{"cell_type":"markdown","metadata":{"id":"35zMtFv8tU8i"},"source":["Например, мы хотим посчитать общее количество звонков для всех пользователей. Создадим объект `total_calls` типа `Series` и вставим его в датафрейм:"]},{"cell_type":"code","metadata":{"id":"z1ktVfD0tU8i","colab":{"base_uri":"https://localhost:8080/","height":261},"executionInfo":{"status":"ok","timestamp":1633609639171,"user_tz":-300,"elapsed":67,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"a0c006bf-4504-4c46-af1d-8b8ab8167d79"},"source":["total_calls = (\n"," df[\"Total day calls\"]\n"," + df[\"Total eve calls\"]\n"," + df[\"Total night calls\"]\n"," + df[\"Total intl calls\"]\n",")\n","df.insert(loc=len(df.columns), column=\"Total calls\", value=total_calls)\n","# loc - номер столбца, после которого нужно вставить данный Series\n","# мы указали len(df.columns), чтобы вставить его в самом конце\n","df.head()"],"execution_count":41,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messagesTotal calls
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.7010100303
1OH107415FalseTrue26161.612327.47195.510316.62254.410311.4513.733.7010104332
2NJ137415FalseFalse0243.411441.38121.211010.30162.61047.3212.253.29004333
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.78204255
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.73304359
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","0 KS 128 415 False True \n","1 OH 107 415 False True \n","2 NJ 137 415 False False \n","3 OH 84 408 True False \n","4 OK 75 415 True False \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","1 26 161.6 123 \n","2 0 243.4 114 \n","3 0 299.4 71 \n","4 0 166.7 113 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","1 27.47 195.5 103 16.62 \n","2 41.38 121.2 110 10.30 \n","3 50.90 61.9 88 5.26 \n","4 28.34 148.3 122 12.61 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","1 254.4 103 11.45 \n","2 162.6 104 7.32 \n","3 196.9 89 8.86 \n","4 186.9 121 8.41 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","1 13.7 3 3.70 \n","2 12.2 5 3.29 \n","3 6.6 7 1.78 \n","4 10.1 3 2.73 \n","\n"," Customer service calls Churn new_Number_vmail_messages Total calls \n","0 1 0 100 303 \n","1 1 0 104 332 \n","2 0 0 4 333 \n","3 2 0 4 255 \n","4 3 0 4 359 "]},"metadata":{},"execution_count":41}]},{"cell_type":"markdown","metadata":{"id":"nB0mpCA1tU8j"},"source":["Добавить столбец из имеющихся можно и проще, не создавая промежуточных `Series`:"]},{"cell_type":"code","metadata":{"id":"ZVpdhf1etU8k","colab":{"base_uri":"https://localhost:8080/","height":261},"executionInfo":{"status":"ok","timestamp":1633609639173,"user_tz":-300,"elapsed":64,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"93b3fe31-2757-4cb7-afcc-7c10f765bb46"},"source":["df[\"Total charge\"] = (\n"," df[\"Total day charge\"]\n"," + df[\"Total eve charge\"]\n"," + df[\"Total night charge\"]\n"," + df[\"Total intl charge\"]\n",")\n","\n","df.head()"],"execution_count":42,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messagesTotal callsTotal charge
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.701010030375.56
1OH107415FalseTrue26161.612327.47195.510316.62254.410311.4513.733.701010433259.24
2NJ137415FalseFalse0243.411441.38121.211010.30162.61047.3212.253.2900433362.29
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.7820425566.80
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.7330435952.09
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","0 KS 128 415 False True \n","1 OH 107 415 False True \n","2 NJ 137 415 False False \n","3 OH 84 408 True False \n","4 OK 75 415 True False \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","1 26 161.6 123 \n","2 0 243.4 114 \n","3 0 299.4 71 \n","4 0 166.7 113 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","1 27.47 195.5 103 16.62 \n","2 41.38 121.2 110 10.30 \n","3 50.90 61.9 88 5.26 \n","4 28.34 148.3 122 12.61 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","1 254.4 103 11.45 \n","2 162.6 104 7.32 \n","3 196.9 89 8.86 \n","4 186.9 121 8.41 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","1 13.7 3 3.70 \n","2 12.2 5 3.29 \n","3 6.6 7 1.78 \n","4 10.1 3 2.73 \n","\n"," Customer service calls Churn new_Number_vmail_messages Total calls \\\n","0 1 0 100 303 \n","1 1 0 104 332 \n","2 0 0 4 333 \n","3 2 0 4 255 \n","4 3 0 4 359 \n","\n"," Total charge \n","0 75.56 \n","1 59.24 \n","2 62.29 \n","3 66.80 \n","4 52.09 "]},"metadata":{},"execution_count":42}]},{"cell_type":"markdown","metadata":{"id":"xrn0pZo1tU8l"},"source":["Чтобы удалить столбцы или строки, воспользуйтесь методом `drop`, передавая в качестве аргумента нужные индексы и требуемое значение параметра `axis` (`1`, если удаляете столбцы, и ничего или `0`, если удаляете строки):"]},{"cell_type":"code","metadata":{"scrolled":false,"id":"oSvOmNv-tU8l","colab":{"base_uri":"https://localhost:8080/","height":261},"executionInfo":{"status":"ok","timestamp":1633609639175,"user_tz":-300,"elapsed":62,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"d0304dcc-8e8a-42c9-8765-1822ae6f5c44"},"source":["# избавляемся от созданных только что столбцов\n","df = df.drop([\"Total charge\", \"Total calls\"], axis=1)\n","\n","df.drop([1, 2]).head() # а вот так можно удалить строчки"],"execution_count":43,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.7010100
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.73304
5AL118510TrueFalse0223.49837.98220.610118.75203.91189.186.361.70004
6MA121510FalseTrue24218.28837.09348.510829.62212.61189.577.572.033096
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","0 KS 128 415 False True \n","3 OH 84 408 True False \n","4 OK 75 415 True False \n","5 AL 118 510 True False \n","6 MA 121 510 False True \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","3 0 299.4 71 \n","4 0 166.7 113 \n","5 0 223.4 98 \n","6 24 218.2 88 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","3 50.90 61.9 88 5.26 \n","4 28.34 148.3 122 12.61 \n","5 37.98 220.6 101 18.75 \n","6 37.09 348.5 108 29.62 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","3 196.9 89 8.86 \n","4 186.9 121 8.41 \n","5 203.9 118 9.18 \n","6 212.6 118 9.57 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","3 6.6 7 1.78 \n","4 10.1 3 2.73 \n","5 6.3 6 1.70 \n","6 7.5 7 2.03 \n","\n"," Customer service calls Churn new_Number_vmail_messages \n","0 1 0 100 \n","3 2 0 4 \n","4 3 0 4 \n","5 0 0 4 \n","6 3 0 96 "]},"metadata":{},"execution_count":43}]},{"cell_type":"markdown","metadata":{"id":"JLDUG5hNtU8l"},"source":["--------\n","\n","\n","\n","## Первые попытки прогнозирования оттока\n"]},{"cell_type":"markdown","metadata":{"id":"1sv6q4lNtU8m"},"source":["Посмотрим, как отток связан с признаком *\"Подключение международного роуминга\"* (`International plan`). Сделаем это с помощью сводной таблички `crosstab`, а также путем иллюстрации с `Seaborn` (как именно строить такие картинки и анализировать с их помощью графики – материал следующей статьи.)"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"M7cBvVn-tU8m","executionInfo":{"status":"ok","timestamp":1633609639176,"user_tz":-300,"elapsed":57,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["# надо дополнительно установить (команда в терминале)\n","# чтоб картинки рисовались в тетрадке\n","# !conda install seaborn\n","%matplotlib inline\n","import matplotlib.pyplot as plt\n","import seaborn as sns\n","\n","plt.rcParams[\"figure.figsize\"] = (8, 6)"],"execution_count":44,"outputs":[]},{"cell_type":"code","metadata":{"id":"8ZJBwL8NtU8m","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609639177,"user_tz":-300,"elapsed":56,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"334f814d-2c27-4f67-cabd-17159188ca2b"},"source":["pd.crosstab(df[\"Churn\"], df[\"International plan\"], margins=True)"],"execution_count":45,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
International planFalseTrueAll
Churn
026641862850
1346137483
All30103233333
\n","
"],"text/plain":["International plan False True All\n","Churn \n","0 2664 186 2850\n","1 346 137 483\n","All 3010 323 3333"]},"metadata":{},"execution_count":45}]},{"cell_type":"code","metadata":{"id":"BGwuNSretU8n","colab":{"base_uri":"https://localhost:8080/","height":388},"executionInfo":{"status":"ok","timestamp":1633609640633,"user_tz":-300,"elapsed":1509,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"0b7deac2-30bb-4ec9-e84b-ec60b64ceeff"},"source":["sns.countplot(x=\"International plan\", hue=\"Churn\", data=df)\n","plt.savefig(\"int_plan_and_churn.png\", dpi=300);"],"execution_count":46,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"j8CpA17FtU8o"},"source":["Видим, что когда роуминг подключен, доля оттока намного выше – интересное наблюдение! Возможно, большие и плохо контролируемые траты в роуминге очень конфликтогенны и приводят к недовольству клиентов телеком-оператора и, соответственно, к их оттоку. "]},{"cell_type":"markdown","metadata":{"id":"JDm9ePM4tU8o"},"source":["Далее посмотрим на еще один важный признак – *\"Число обращений в сервисный центр\"* (`Customer service calls`). Также построим сводную таблицу и картинку."]},{"cell_type":"code","metadata":{"id":"UKGrw7fbtU8p","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609640635,"user_tz":-300,"elapsed":25,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"7801c2d5-2a6d-4872-cde2-f847b851b3c5"},"source":["pd.crosstab(df[\"Churn\"], df[\"Customer service calls\"], margins=True)"],"execution_count":47,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Customer service calls0123456789All
Churn
06051059672385902684102850
1921228744764014512483
All697118175942916666229223333
\n","
"],"text/plain":["Customer service calls 0 1 2 3 4 5 6 7 8 9 All\n","Churn \n","0 605 1059 672 385 90 26 8 4 1 0 2850\n","1 92 122 87 44 76 40 14 5 1 2 483\n","All 697 1181 759 429 166 66 22 9 2 2 3333"]},"metadata":{},"execution_count":47}]},{"cell_type":"code","metadata":{"id":"sMJh9m1VtU8p","colab":{"base_uri":"https://localhost:8080/","height":388},"executionInfo":{"status":"ok","timestamp":1633609642719,"user_tz":-300,"elapsed":2104,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"43183fcc-f324-4492-acd6-30a6cf0615b5"},"source":["sns.countplot(x=\"Customer service calls\", hue=\"Churn\", data=df)\n","plt.savefig(\"serv_calls__and_churn.png\", dpi=300);"],"execution_count":48,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"x2ZJPe-DtU8q"},"source":["Может быть, по сводной табличке это не так хорошо видно (или скучно ползать взглядом по строчкам с цифрами), а вот картинка красноречиво свидетельствует о том, что доля оттока сильно возрастает начиная с 4 звонков в сервисный центр. "]},{"cell_type":"markdown","metadata":{"id":"Dqj4LVe3tU8q"},"source":["Добавим теперь в наш DataFrame бинарный признак — результат сравнения `Customer service calls > 3`. И еще раз посмотрим, как он связан с оттоком. "]},{"cell_type":"code","metadata":{"scrolled":true,"id":"o9R6NM8ltU8q","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609642722,"user_tz":-300,"elapsed":14,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"ab3e7ee2-24f1-455b-c973-e994cf85a4c7"},"source":["df[\"Many_service_calls\"] = (df[\"Customer service calls\"] > 3).astype(\"int\")\n","\n","pd.crosstab(df[\"Many_service_calls\"], df[\"Churn\"], margins=True)"],"execution_count":49,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Churn01All
Many_service_calls
027213453066
1129138267
All28504833333
\n","
"],"text/plain":["Churn 0 1 All\n","Many_service_calls \n","0 2721 345 3066\n","1 129 138 267\n","All 2850 483 3333"]},"metadata":{},"execution_count":49}]},{"cell_type":"code","metadata":{"id":"nUQk7G96tU8r","colab":{"base_uri":"https://localhost:8080/","height":389},"executionInfo":{"status":"ok","timestamp":1633609643487,"user_tz":-300,"elapsed":777,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"ed0370de-9813-45fa-f666-ee36ce5206a1"},"source":["sns.countplot(x=\"Many_service_calls\", hue=\"Churn\", data=df)\n","plt.savefig(\"many_serv_calls__and_churn.png\", dpi=300);"],"execution_count":50,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAfsAAAF0CAYAAAAkbZDfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAZPUlEQVR4nO3de5BedZ3n8feHJBBHGeUSENPBBIgoQUEJqOs6k/XCJTogU6yCDhexJjoFW7o7M7XI7gqijqxXBnWtyiwIzAVkCllYJwUbGGZ0XBUSRS5BhggoneUSg+UdufjdP54TfIzdncb0k6f7x/tV9VSf8z3n/M63U9X5POfynCdVhSRJatcOw25AkiQNlmEvSVLjDHtJkhpn2EuS1DjDXpKkxhn2kiQ1bvawGxiE3XffvRYuXDjsNiRJ2m7Wrl37/aqaN9ayJsN+4cKFrFmzZthtSJK03ST57njLPI0vSVLjDHtJkhpn2EuS1Lgmr9lLkvRUPfbYY4yOjvLII48Mu5UJzZ07l5GREebMmTPpbQx7SZKA0dFRdt55ZxYuXEiSYbczpqpi06ZNjI6OsmjRoklv52l8SZKARx55hN12223aBj1AEnbbbbenfPbBsJckqTOdg36z36ZHw16SpEl44IEHOP7449l333055JBDWL58OStXruSNb3zjsFvbKsNekqStqCqOPfZYli1bxne+8x3Wrl3Lhz/8YR588MFtGvfxxx+fog4nZthLkrQVN9xwA3PmzOFd73rXk7WDDjqIV7/61fzkJz/huOOO44UvfCFve9vbqCqg9zTX73//+wCsWbOGZcuWAXD22Wdz4okn8qpXvYoTTzyRs88+m1NPPZVly5axzz77cP755095/4a9JElbcdttt3HIIYeMueyb3/wm5513HuvWrePuu+/mK1/5ylbHW7duHddddx2XXnopAN/+9re59tprufHGG3n/+9/PY489NqX9G/aSJG2Dww47jJGREXbYYQcOPvhg7r333q1uc/TRR/OMZzzjyfk3vOEN7LTTTuy+++7sscce23x5YEuGvSRJW7FkyRLWrl075rKddtrpyelZs2Y9eR1+9uzZ/PKXvwT4jY/KPfOZz5zUGFPFh+o8RYf8+SXDbqF5az960rBbkKRf85rXvIYzzzyTlStXsmLFCgBuueUWvvzlL4+7zcKFC1m7di1HHXUUV1xxxfZqdUwe2UuStBVJuPLKK7nuuuvYd999WbJkCe9973t57nOfO+42Z511Fu9+97tZunQps2bN2o7d/qZsvmuwJUuXLq1BfZ+9R/aD55G9pGG44447eNGLXjTsNiZlrF6TrK2qpWOt75G9JEmNM+wlSWqcYS9JUuMMe0mSGmfYS5LUOMNekqTGGfaSJE0j11xzDfvvvz/77bcf55577pSM6RP0JEkaw1Q/V2UyzxB54oknOO2001i9ejUjIyMceuihHH300RxwwAHbtG+P7CVJmiZuvPFG9ttvP/bZZx923HFHjj/+eK666qptHtewlyRpmtiwYQMLFix4cn5kZIQNGzZs87iGvSRJjTPsJUmaJubPn89999335Pzo6Cjz58/f5nENe0mSpolDDz2Uu+66i3vuuYdHH32Uyy67jKOPPnqbx/VufEmSponZs2fz6U9/miOOOIInnniCU089lSVLlmz7uFPQmyRJzRnW120vX76c5cuXT+mYnsaXJKlxhr0kSY0z7CVJapxhL0lS4wYW9kkWJLkhyboktyd5d1c/O8mGJDd3r+V927w3yfokdyY5oq9+ZFdbn+SMQfUsSVKLBnk3/uPAn1bVN5LsDKxNsrpb9smq+lj/ykkOAI4HlgDPA65L8oJu8WeA1wOjwE1Jrq6qdQPsXZKkZgzsyL6q7q+qb3TTPwbuACZ6DNAxwGVV9YuqugdYDxzWvdZX1d1V9ShwWbeuJEnNOfXUU9ljjz048MADp2zM7fI5+yQLgZcCXwdeBZye5CRgDb2j/x/QeyPwtb7NRvnVm4P7tqi/fIx9rABWAOy9995T+wtIkp52vnfOi6d0vL3fd+uk1jvllFM4/fTTOemkqfuc/8Bv0EvyLOAK4D1V9SPgs8C+wMHA/cDHp2I/VbWyqpZW1dJ58+ZNxZCSJG13v/d7v8euu+46pWMO9Mg+yRx6Qf+3VfUFgKp6sG/5XwFf7GY3AAv6Nh/pakxQlyRJWzHIu/EDXADcUVWf6Kvv1bfascBt3fTVwPFJdkqyCFgM3AjcBCxOsijJjvRu4rt6UH1LktSaQR7Zvwo4Ebg1yc1d7UzghCQHAwXcC7wToKpuT3I5sI7enfynVdUTAElOB64FZgEXVtXtA+xbkqSmDCzsq+pfgIyxaNUE23wI+NAY9VUTbSdJksbnE/QkSZpGTjjhBF75yldy5513MjIywgUXXLDNY/oVt5IkjWGyH5WbapdeeumUj+mRvSRJjTPsJUlqnGEvSVLjDHtJkjpVNewWtuq36dGwlyQJmDt3Lps2bZrWgV9VbNq0iblz5z6l7bwbX5IkYGRkhNHRUTZu3DjsViY0d+5cRkZGntI2hr0kScCcOXNYtGjRsNsYCE/jS5LUOMNekqTGGfaSJDXOsJckqXGGvSRJjTPsJUlqnGEvSVLjDHtJkhpn2EuS1DjDXpKkxhn2kiQ1zrCXJKlxhr0kSY0z7CVJapxhL0lS4wx7SZIaZ9hLktQ4w16SpMYZ9pIkNc6wlySpcYa9JEmNM+wlSWqcYS9JUuMMe0mSGmfYS5LUOMNekqTGGfaSJDXOsJckqXGGvSRJjTPsJUlqnGEvSVLjDHtJkhpn2EuS1DjDXpKkxhn2kiQ1zrCXJKlxhr0kSY0z7CVJapxhL0lS4wYW9kkWJLkhyboktyd5d1ffNcnqJHd1P3fp6klyfpL1SW5J8rK+sU7u1r8rycmD6lmSpBYN8sj+ceBPq+oA4BXAaUkOAM4Arq+qxcD13TzAUcDi7rUC+Cz03hwAZwEvBw4Dztr8BkGSJG3dwMK+qu6vqm900z8G7gDmA8cAF3erXQy8qZs+Briker4GPCfJXsARwOqqeriqfgCsBo4cVN+SJLVmu1yzT7IQeCnwdWDPqrq/W/QAsGc3PR+4r2+z0a42Xl2SJE3CwMM+ybOAK4D3VNWP+pdVVQE1RftZkWRNkjUbN26ciiElSWrCQMM+yRx6Qf+3VfWFrvxgd3qe7udDXX0DsKBv85GuNl7911TVyqpaWlVL582bN7W/iCRJM9gg78YPcAFwR1V9om/R1cDmO+pPBq7qq5/U3ZX/CuCH3en+a4HDk+zS3Zh3eFeTJEmTMHuAY78KOBG4NcnNXe1M4Fzg8iTvAL4LvLlbtgpYDqwHfga8HaCqHk7yAeCmbr1zqurhAfYtSVJTBhb2VfUvQMZZ/Nox1i/gtHHGuhC4cOq6kyTp6cMn6EmS1DjDXpKkxhn2kiQ1zrCXJKlxhr0kSY0z7CVJapxhL0lS4wx7SZIaZ9hLktQ4w16SpMYZ9pIkNc6wlySpcYa9JEmNM+wlSWqcYS9JUuMMe0mSGmfYS5LUOMNekqTGGfaSJDXOsJckqXGGvSRJjTPsJUlqnGEvSVLjDHtJkhpn2EuS1DjDXpKkxhn2kiQ1zrCXJKlxhr0kSY0z7CVJapxhL0lS4wx7SZIaZ9hLktQ4w16SpMYZ9pIkNc6wlySpcYa9JEmNM+wlSWqcYS9JUuMMe0mSGmfYS5LUOMNekqTGGfaSJDXOsJckqXGGvSRJjTPsJUlqnGEvSVLjDHtJkho3qbBPcv1kalssvzDJQ0lu66udnWRDkpu71/K+Ze9Nsj7JnUmO6Ksf2dXWJzljcr+WJEnabPZEC5PMBX4H2D3JLkC6Rb8LzN/K2BcBnwYu2aL+yar62Bb7OQA4HlgCPA+4LskLusWfAV4PjAI3Jbm6qtZtZd+SJKkzYdgD7wTeQy+A1/KrsP8RvSAfV1V9KcnCSfZxDHBZVf0CuCfJeuCwbtn6qrobIMll3bqGvSRJkzThafyq+suqWgT8WVXtU1WLutdBVTVh2E/g9CS3dKf5d+lq84H7+tYZ7Wrj1SVJ0iRN6pp9VX0qyb9J8tYkJ21+/Rb7+yywL3AwcD/w8d9ijDElWZFkTZI1GzdunKphJUma8bZ2Gh+AJH9NL6RvBp7oysVvXo+fUFU92DfmXwFf7GY3AAv6Vh3pakxQ33LslcBKgKVLl9ZT6UuSpJZNKuyBpcABVbVNIZpkr6q6v5s9Fth8p/7VwN8l+QS9+wMWAzfSu0dgcZJF9EL+eOCt29KDJElPN5MN+9uA59I79T4pSS4FltG7k38UOAtYluRgemcF7qV3AyBVdXuSy+ndePc4cFpVPdGNczpwLTALuLCqbp9sD5IkafJhvzuwLsmNwC82F6vq6PE2qKoTxihfMMH6HwI+NEZ9FbBqkn1KkqQtTDbszx5kE5IkaXAmFfZV9c+DbkSSJA3GZO/G/zG96+wAOwJzgJ9W1e8OqjFJkjQ1Jntkv/Pm6SSh9xS7VwyqKUmSNHWe8rfeVc//Ao7Y6sqSJGnoJnsa/w/7Zneg97n7RwbSkSRJmlKTvRv/D/qmH6f3GfljprwbSZI05SZ7zf7tg25EkiQNxqSu2ScZSXJlkoe61xVJRgbdnCRJ2naTvUHvc/SeX/+87vW/u5okSZrmJhv286rqc1X1ePe6CJg3wL4kSdIUmWzYb0ryR0lmda8/AjYNsjFJkjQ1Jhv2pwJvBh6g9813xwGnDKgnSZI0hSb70btzgJOr6gcASXYFPkbvTYAkSZrGJntk/5LNQQ9QVQ8DLx1MS5IkaSpNNux3SLLL5pnuyH6yZwUkSdIQTTawPw58Ncnfd/P/HvjQYFqSJElTabJP0LskyRrgNV3pD6tq3eDakiRJU2XSp+K7cDfgJUmaYZ7yV9xKkqSZxbCXJKlxhr0kSY0z7CVJapxhL0lS4wx7SZIaZ9hLktQ4w16SpMYZ9pIkNc6wlySpcYa9JEmNM+wlSWqcYS9JUuMMe0mSGmfYS5LUOMNekqTGGfaSJDXOsJckqXGGvSRJjTPsJUlqnGEvSVLjDHtJkhpn2EuS1DjDXpKkxhn2kiQ1zrCXJKlxhr0kSY0z7CVJapxhL0lS4wYW9kkuTPJQktv6arsmWZ3kru7nLl09Sc5Psj7JLUle1rfNyd36dyU5eVD9SpLUqkEe2V8EHLlF7Qzg+qpaDFzfzQMcBSzuXiuAz0LvzQFwFvBy4DDgrM1vECRJ0uQMLOyr6kvAw1uUjwEu7qYvBt7UV7+ker4GPCfJXsARwOqqeriqfgCs5jffQEiSpAls72v2e1bV/d30A8Ce3fR84L6+9Ua72nh1SZI0SUO7Qa+qCqipGi/JiiRrkqzZuHHjVA0rSdKMt73D/sHu9Dzdz4e6+gZgQd96I11tvPpvqKqVVbW0qpbOmzdvyhuXJGmm2t5hfzWw+Y76k4Gr+uondXflvwL4YXe6/1rg8CS7dDfmHd7VJEnSJM0e1MBJLgWWAbsnGaV3V/25wOVJ3gF8F3hzt/oqYDmwHvgZ8HaAqno4yQeAm7r1zqmqLW/6kyRJExhY2FfVCeMseu0Y6xZw2jjjXAhcOIWtSZL0tOIT9CRJapxhL0lS4wx7SZIaZ9hLktQ4w16SpMYZ9pIkNc6wlySpcYa9JEmNM+wlSWqcYS9JUuMMe0mSGmfYS5LUOMNekqTGGfaSJDXOsJckqXGGvSRJjTPsJUlqnGEvSVLjDHtJkhpn2EuS1DjDXpKkxhn2kiQ1zrCXJKlxhr0kSY0z7CVJapxhL0lS4wx7SZIaZ9hLktQ4w16SpMYZ9pIkNc6wlySpcYa9JEmNM+wlSWqcYS9JUuMMe0mSGmfYS5LUOMNekqTGGfaSJDXOsJckqXGGvSRJjTPsJUlqnGEvSVLjDHtJkhpn2EuS1DjDXpKkxhn2kiQ1zrCXJKlxhr0kSY0bStgnuTfJrUluTrKmq+2aZHWSu7qfu3T1JDk/yfoktyR52TB6liRpphrmkf2/q6qDq2ppN38GcH1VLQau7+YBjgIWd68VwGe3e6eSJM1g0+k0/jHAxd30xcCb+uqXVM/XgOck2WsYDUqSNBMNK+wL+D9J1iZZ0dX2rKr7u+kHgD276fnAfX3bjnY1SZI0CbOHtN9/W1UbkuwBrE7y7f6FVVVJ6qkM2L1pWAGw9957T12nkiTNcEM5sq+qDd3Ph4ArgcOABzefnu9+PtStvgFY0Lf5SFfbcsyVVbW0qpbOmzdvkO1LkjSjbPewT/LMJDtvngYOB24DrgZO7lY7Gbiqm74aOKm7K/8VwA/7TvdLkqStGMZp/D2BK5Ns3v/fVdU1SW4CLk/yDuC7wJu79VcBy4H1wM+At2//liVJmrm2e9hX1d3AQWPUNwGvHaNewGnboTVJkpo0nT56J0mSBsCwlySpcYa9JEmNM+wlSWqcYS9JUuMMe0mSGmfYS5LUOMNekqTGGfaSJDXOsJckqXGGvSRJjTPsJUlqnGEvSVLjhvEVt9KEvnfOi4fdQvP2ft+tw25B0nbkkb0kSY0z7CVJapxhL0lS4wx7SZIaZ9hLktQ4w16SpMYZ9pIkNc6wlySpcYa9JEmNM+wlSWqcYS9JUuMMe0mSGmfYS5LUOMNekqTGGfaSJDXOsJckqXGGvSRJjTPsJUlqnGEvSVLjDHtJkhpn2EuS1DjDXpKkxs0edgOSpOnhe+e8eNgtPC3s/b5bt/s+DXtJM8Ihf37JsFto3pU7D7sDDYqn8SVJapxhL0lS4wx7SZIaZ9hLktQ4w16SpMYZ9pIkNc6wlySpcYa9JEmNM+wlSWqcYS9JUuNmTNgnOTLJnUnWJzlj2P1IkjRTzIiwTzIL+AxwFHAAcEKSA4bblSRJM8OMCHvgMGB9Vd1dVY8ClwHHDLknSZJmhJkS9vOB+/rmR7uaJEnaima+4jbJCmBFN/uTJHcOsx/99p4PuwPfH3YfTTsrw+5A05B/e9vJ4P7+nj/egpkS9huABX3zI13tSVW1Eli5PZvSYCRZU1VLh92H9HTj3167Zspp/JuAxUkWJdkROB64esg9SZI0I8yII/uqejzJ6cC1wCzgwqq6fchtSZI0I8yIsAeoqlXAqmH3oe3CyzHScPi316hU1bB7kCRJAzRTrtlLkqTfkmGvacXHIkvbX5ILkzyU5LZh96LBMOw1bfhYZGloLgKOHHYTGhzDXtOJj0WWhqCqvgQ8POw+NDiGvaYTH4ssSQNg2EuS1DjDXtPJVh+LLEl66gx7TSc+FlmSBsCw17RRVY8Dmx+LfAdwuY9FlgYvyaXAV4H9k4wmecewe9LU8gl6kiQ1ziN7SZIaZ9hLktQ4w16SpMYZ9pIkNc6wlySpcYa9JEmNM+ylaShJJfmbvvnZSTYm+eIw+9oWSc5J8roh7fvsJH/WTV+U5Lhh9CENy+xhNyBpTD8FDkzyjKr6OfB6ZsCjg5PM7h6O9Buq6n3bux9JPR7ZS9PXKuAN3fQJwKWbFyQ5LMlXk3wzyf9Nsn9XPyXJF5Jck+SuJB/p6qcmOa9v+z9O8smxdprkmUn+Icm3ktyW5C1d/ZAk/5xkbZJrk+zV1f8pyXlJ1gD/Jcl3k+zQN9Z9Seb0H1EnObTr+1tJbkyyc5JZST6a5KYktyR550T/OEn+c5JbuzHO7fu9bupqVyT5na2McW6Sdd3+PjbRutJMZthL09dlwPFJ5gIvAb7et+zbwKur6qXA+4C/6Ft2MPAW4MXAW5IsAC4H/iDJnG6dtwMXjrPfI4H/V1UHVdWBwDXddp8CjquqQ7ptP9S3zY5VtbSq3g/cDPx+V38jcG1VPbZ5xe57Dz4PvLuqDgJeB/wceAfww6o6FDgU+OMki8ZqMMlRwDHAy7sxPtIt+kJVHdrV7ujGHFOS3YBjgSVV9RLgg+OtK810nsaXpqmquiXJQnpH9au2WPxs4OIki4EC5vQtu76qfgiQZB3w/Kq6L8k/Am9Mcgcwp6puHWfXtwIfT/LfgS9W1ZeTHAgcCKxOAjALuL9vm89vMf0W4AZ6X2b0P7YYf3/g/qq6qfs9f9T1ejjwkr7r6c8GFgP3jNHj64DPVdXPujEe7uoHJvkg8BzgWfS+Z2E8PwQeAS7o7oWYsfdDSFtj2EvT29XAx4BlwG599Q8AN1TVsd0bgn/qW/aLvukn+NXf+f8EzqR3VuBz4+2wqv41ycuA5cAHk1wPXAncXlWvHGezn27R818k2RU4BPjH8X+9XxPgP1TVRAG9NRcBb6qqbyU5hd6/25iq6vEkhwGvBY6j9yVMr9mGfUvTlqfxpentQuD9YxyFP5tf3bB3ymQGqqqvAwuAt9J3/X9LSZ4H/Kyq/gb4KPAy4E5gXpJXduvMSbJknP38hN7XFf8lvTMDT2yxyp3AXkkO7cbaOclsekfhf7L5UkOSFyR55jhtrgbevvmafPfGAmBn4P5ujLeN/68BSZ4FPLuqVgH/EThoovWlmcwje2kaq6pR4PwxFn2E3mn8/wr8w1MY8nLg4Kr6wQTrvBj4aJJfAo8Bf1JVj3an189P8mx6/3ecB4z3FcSfB/6eMY6su7HeAnwqyTPoXa9/Hb0zDwuBb6R3rWAj8KaxBq+qa5IcDKxJ8ii9yxxnAv+N3r0NG7ufO0/we+4MXNXdExHgP02wrjSj+RW30tNId236k1V1/bB7kbT9eBpfehpI8pwk/wr83KCXnn48speeprqPno0V/K+tqk3bu5+xJHkx8NdblH9RVS8fRj/STGXYS5LUOE/jS5LUOMNekqTGGfaSJDXOsJckqXGGvSRJjfv/LYr4l4GfdOEAAAAASUVORK5CYII=\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"XfYD5KqGtU8s"},"source":["Объединим рассмотренные выше условия и построим сводную табличку для этого объединения и оттока."]},{"cell_type":"code","metadata":{"id":"ZuaXCibrtU8s","colab":{"base_uri":"https://localhost:8080/","height":143},"executionInfo":{"status":"ok","timestamp":1633609643489,"user_tz":-300,"elapsed":36,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"dbd40bfd-2dd2-41f1-ba50-cf159b12ddd7"},"source":["pd.crosstab(df[\"Many_service_calls\"] & df[\"International plan\"], df[\"Churn\"])"],"execution_count":51,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Churn01
row_0
False2841464
True919
\n","
"],"text/plain":["Churn 0 1\n","row_0 \n","False 2841 464\n","True 9 19"]},"metadata":{},"execution_count":51}]},{"cell_type":"markdown","metadata":{"id":"VyaMr43HtU8t"},"source":["Значит, прогнозируя отток клиента в случае, когда число звонков в сервисный центр больше 3 и подключен роуминг (и прогнозируя лояльность – в противном случае), можно ожидать около 85.8% правильных попаданий (ошибаемся всего 464 + 9 раз). Эти 85.8%, которые мы получили с помощью очень простых рассуждений – это неплохая отправная точка (*baseline*) для дальнейших моделей машинного обучения, которые мы будем строить. "]},{"cell_type":"markdown","metadata":{"id":"d6_n0ESntU8u"},"source":["В целом до появления машинного обучения процесс анализа данных выглядел примерно так. Прорезюмируем:\n"," \n","- Доля лояльных клиентов в выборке – 85.5%. Самая наивная модель, ответ которой \"Клиент всегда лоялен\" на подобных данных будет угадывать примерно в 85.5% случаев. То есть доли правильных ответов (*accuracy*) последующих моделей должны быть как минимум не меньше, а лучше, значительно выше этой цифры;\n","- С помощью простого прогноза , который условно можно выразить такой формулой: \"International plan = True & Customer Service calls > 3 => Churn = 1, else Churn = 0\", можно ожидать долю угадываний 85.8%, что еще чуть выше 85.5%\n","- Эти два бейзлайна мы получили без всякого машинного обучения, и они служат отправной точной для наших последующих моделей. Если окажется, что мы громадными усилиями увеличиваем долю правильных ответов всего, скажем, на 0.5%, то возможно, мы что-то делаем не так, и достаточно ограничиться простой моделью из двух условий. \n","- Перед обучением сложных моделей рекомендуется немного покрутить данные и проверить простые предположения. Более того, в бизнес-приложениях машинного обучения чаще всего начинают именно с простых решений, а потом экспериментируют с их усложнением. "]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FzQ_ch0ktU7n" + }, + "source": [ + "#
Первичный анализ данных с Pandas
" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true, + "executionInfo": { + "elapsed": 631, + "status": "ok", + "timestamp": 1633609636856, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "Parpx34utU7s", + "scrolled": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QxIKAzfCtU7u" + }, + "source": [ + "Данные, с которыми работают дата саентисты и аналитики, обычно хранятся в виде табличек — например, в форматах `.csv`, `.tsv` или `.xlsx`. Для того, чтобы считать нужные данные из такого файла, отлично подходит библиотека Pandas.\n", + "\n", + "Основными структурами данных в Pandas являются классы `Series` и `DataFrame`. Первый из них представляет собой одномерный индексированный массив данных некоторого фиксированного типа. Второй - это двухмерная структура данных, представляющая собой таблицу, каждый столбец которой содержит данные одного типа. Можно представлять её как словарь объектов типа `Series`. Структура `DataFrame` отлично подходит для представления реальных данных: строки соответствуют признаковым описаниям отдельных объектов, а столбцы соответствуют признакам." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l_Ell72CtU7w" + }, + "source": [ + "---------\n", + "\n", + "## Демонстрация основных методов Pandas \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YMu_ER8WtU7y" + }, + "source": [ + "### Чтение из файла и первичный анализ" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "efGYx1kqtU7z" + }, + "source": [ + "Прочитаем данные и посмотрим на первые 5 строк с помощью метода `head`:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true, + "executionInfo": { + "elapsed": 597, + "status": "ok", + "timestamp": 1633609637892, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ByXZK9MFtU71", + "scrolled": true + }, + "outputs": [], + "source": [ + "df = pd.read_csv(\"https://raw.githubusercontent.com/Yorko/mlcourse.ai/master/data/telecom_churn.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241 + }, + "executionInfo": { + "elapsed": 77, + "status": "ok", + "timestamp": 1633609637895, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "hFaFpz2utU73", + "outputId": "cbd457e9-c2bd-4beb-a1fa-c7ba8a4c5b97", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
0KS128415NoYes25265.111045.07197.49916.78244.79111.0110.032.701False
1OH107415NoYes26161.612327.47195.510316.62254.410311.4513.733.701False
2NJ137415NoNo0243.411441.38121.211010.30162.61047.3212.253.290False
3OH84408YesNo0299.47150.9061.9885.26196.9898.866.671.782False
4OK75415YesNo0166.711328.34148.312212.61186.91218.4110.132.733False
\n", + "
" + ], + "text/plain": [ + " State Account length ... Customer service calls Churn\n", + "0 KS 128 ... 1 False\n", + "1 OH 107 ... 1 False\n", + "2 NJ 137 ... 0 False\n", + "3 OH 84 ... 2 False\n", + "4 OK 75 ... 3 False\n", + "\n", + "[5 rows x 20 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CpV496POtU75" + }, + "source": [ + "В Jupyter-ноутбуках датафреймы `Pandas` выводятся в виде вот таких красивых табличек, и `print(df.head())` выглядит хуже.\n", + "\n", + "Кстати, по умолчанию `Pandas` выводит всего 20 столбцов и 60 строк, поэтому если ваш датафрейм больше, воспользуйтесь функцией `set_option`:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true, + "executionInfo": { + "elapsed": 68, + "status": "ok", + "timestamp": 1633609637897, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "CYFyCCGGtU77" + }, + "outputs": [], + "source": [ + "# задание проанализировать все опции и выбрать 3-5 самых полезных по личному мнению \n", + "# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.set_option.html\n", + "pd.set_option(\"display.max_columns\", 100)\n", + "pd.set_option(\"display.max_rows\", 100)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CbfNn4a9tU78" + }, + "source": [ + "А также укажем значение параметра `presicion` равным 2, чтобы отображать два знака после запятой (а не 6, как установлено по умолчанию." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true, + "executionInfo": { + "elapsed": 67, + "status": "ok", + "timestamp": 1633609637899, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "-0MCBxGItU78" + }, + "outputs": [], + "source": [ + "pd.set_option(\"precision\", 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cu652IOYtU79" + }, + "source": [ + "**Посмотрим на размер данных, названия признаков и их типы**" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 66, + "status": "ok", + "timestamp": 1633609637901, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "LQw6THQytU79", + "outputId": "b2d6d2f1-a6d1-47c6-e4bb-5c5f33834c4a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(3333, 20)\n" + ] + } + ], + "source": [ + "print(df.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LJEPKleBtU7-" + }, + "source": [ + "Видим, что в таблице 3333 строки и 20 столбцов. Выведем названия столбцов:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 57, + "status": "ok", + "timestamp": 1633609637903, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "CQArdzC8tU7_", + "outputId": "08e4c81f-5a94-4589-c4d3-c6792128de13" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['State', 'Account length', 'Area code', 'International plan',\n", + " 'Voice mail plan', 'Number vmail messages', 'Total day minutes',\n", + " 'Total day calls', 'Total day charge', 'Total eve minutes',\n", + " 'Total eve calls', 'Total eve charge', 'Total night minutes',\n", + " 'Total night calls', 'Total night charge', 'Total intl minutes',\n", + " 'Total intl calls', 'Total intl charge', 'Customer service calls',\n", + " 'Churn'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "print(df.columns)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RoZn1MpBtU8A" + }, + "source": [ + "Чтобы посмотреть общую информацию по датафрейму и всем признакам, воспользуемся методом **`info`**:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 54, + "status": "ok", + "timestamp": 1633609637906, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "W_ZF3eM8tU8B", + "outputId": "b4d58f04-d867-458f-bb5e-7b91fbdc9cd9", + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 3333 entries, 0 to 3332\n", + "Data columns (total 20 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 State 3333 non-null object \n", + " 1 Account length 3333 non-null int64 \n", + " 2 Area code 3333 non-null int64 \n", + " 3 International plan 3333 non-null object \n", + " 4 Voice mail plan 3333 non-null object \n", + " 5 Number vmail messages 3333 non-null int64 \n", + " 6 Total day minutes 3333 non-null float64\n", + " 7 Total day calls 3333 non-null int64 \n", + " 8 Total day charge 3333 non-null float64\n", + " 9 Total eve minutes 3333 non-null float64\n", + " 10 Total eve calls 3333 non-null int64 \n", + " 11 Total eve charge 3333 non-null float64\n", + " 12 Total night minutes 3333 non-null float64\n", + " 13 Total night calls 3333 non-null int64 \n", + " 14 Total night charge 3333 non-null float64\n", + " 15 Total intl minutes 3333 non-null float64\n", + " 16 Total intl calls 3333 non-null int64 \n", + " 17 Total intl charge 3333 non-null float64\n", + " 18 Customer service calls 3333 non-null int64 \n", + " 19 Churn 3333 non-null bool \n", + "dtypes: bool(1), float64(8), int64(8), object(3)\n", + "memory usage: 498.1+ KB\n", + "None\n" + ] + } + ], + "source": [ + "print(df.info())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FYDNyB6CtU8C" + }, + "source": [ + "`bool`, `int64`, `float64` и `object` — это типы признаков. Видим, что 1 признак — логический (`bool`), 3 признака имеют тип `object` и 16 признаков — числовые.\n", + "\n", + "**Изменить тип колонки** можно с помощью метода `astype`. Применим этот метод к признаку `Churn` и переведём его в `int64`:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": true, + "executionInfo": { + "elapsed": 48, + "status": "ok", + "timestamp": 1633609637909, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "byRJQVM5tU8D" + }, + "outputs": [], + "source": [ + "df[\"Churn\"] = df[\"Churn\"].astype(\"int64\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sBTm0lLYtU8D" + }, + "source": [ + "Метод **`describe`** показывает основные статистические характеристики данных по каждому числовому признаку (типы `int64` и `float64`): число непропущенных значений, среднее, стандартное отклонение, диапазон, медиану, 0.25 и 0.75 квартили." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 335 + }, + "executionInfo": { + "elapsed": 48, + "status": "ok", + "timestamp": 1633609637911, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "bAsmrRI6tU8D", + "outputId": "32a7192a-b49b-4be7-9b6e-9b7f08f57731" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Account lengthArea codeNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
count3333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.00
mean101.06437.188.10179.78100.4430.56200.98100.1117.08200.87100.119.0410.244.482.761.560.14
std39.8242.3713.6954.4720.079.2650.7119.924.3150.5719.572.282.792.460.751.320.35
min1.00408.000.000.000.000.000.000.000.0023.2033.001.040.000.000.000.000.00
25%74.00408.000.00143.7087.0024.43166.6087.0014.16167.0087.007.528.503.002.301.000.00
50%101.00415.000.00179.40101.0030.50201.40100.0017.12201.20100.009.0510.304.002.781.000.00
75%127.00510.0020.00216.40114.0036.79235.30114.0020.00235.30113.0010.5912.106.003.272.000.00
max243.00510.0051.00350.80165.0059.64363.70170.0030.91395.00175.0017.7720.0020.005.409.001.00
\n", + "
" + ], + "text/plain": [ + " Account length Area code Number vmail messages Total day minutes \\\n", + "count 3333.00 3333.00 3333.00 3333.00 \n", + "mean 101.06 437.18 8.10 179.78 \n", + "std 39.82 42.37 13.69 54.47 \n", + "min 1.00 408.00 0.00 0.00 \n", + "25% 74.00 408.00 0.00 143.70 \n", + "50% 101.00 415.00 0.00 179.40 \n", + "75% 127.00 510.00 20.00 216.40 \n", + "max 243.00 510.00 51.00 350.80 \n", + "\n", + " Total day calls Total day charge Total eve minutes Total eve calls \\\n", + "count 3333.00 3333.00 3333.00 3333.00 \n", + "mean 100.44 30.56 200.98 100.11 \n", + "std 20.07 9.26 50.71 19.92 \n", + "min 0.00 0.00 0.00 0.00 \n", + "25% 87.00 24.43 166.60 87.00 \n", + "50% 101.00 30.50 201.40 100.00 \n", + "75% 114.00 36.79 235.30 114.00 \n", + "max 165.00 59.64 363.70 170.00 \n", + "\n", + " Total eve charge Total night minutes Total night calls \\\n", + "count 3333.00 3333.00 3333.00 \n", + "mean 17.08 200.87 100.11 \n", + "std 4.31 50.57 19.57 \n", + "min 0.00 23.20 33.00 \n", + "25% 14.16 167.00 87.00 \n", + "50% 17.12 201.20 100.00 \n", + "75% 20.00 235.30 113.00 \n", + "max 30.91 395.00 175.00 \n", + "\n", + " Total night charge Total intl minutes Total intl calls \\\n", + "count 3333.00 3333.00 3333.00 \n", + "mean 9.04 10.24 4.48 \n", + "std 2.28 2.79 2.46 \n", + "min 1.04 0.00 0.00 \n", + "25% 7.52 8.50 3.00 \n", + "50% 9.05 10.30 4.00 \n", + "75% 10.59 12.10 6.00 \n", + "max 17.77 20.00 20.00 \n", + "\n", + " Total intl charge Customer service calls Churn \n", + "count 3333.00 3333.00 3333.00 \n", + "mean 2.76 1.56 0.14 \n", + "std 0.75 1.32 0.35 \n", + "min 0.00 0.00 0.00 \n", + "25% 2.30 1.00 0.00 \n", + "50% 2.78 1.00 0.00 \n", + "75% 3.27 2.00 0.00 \n", + "max 5.40 9.00 1.00 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l6MzhnkotU8D" + }, + "source": [ + "Чтобы посмотреть статистику по нечисловым признакам, нужно явно указать интересующие нас типы в параметре `include`. Можно также задать `include`='all', чтоб вывести статистику по всем имеющимся признакам." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 639, + "status": "ok", + "timestamp": 1633609638506, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ewJscFGZtU8F", + "outputId": "187fb398-e4bf-4c36-f3ff-e395013e994f", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateInternational planVoice mail plan
count333333333333
unique5122
topWVNoNo
freq10630102411
\n", + "
" + ], + "text/plain": [ + " State International plan Voice mail plan\n", + "count 3333 3333 3333\n", + "unique 51 2 2\n", + "top WV No No\n", + "freq 106 3010 2411" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe(include=[\"object\", \"bool\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1qbs0vug9TCh" + }, + "source": [ + "Тот же принцип работает при выборе столбцов указанного типа." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "executionInfo": { + "elapsed": 120, + "status": "ok", + "timestamp": 1633609638538, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "tbL3f9OD9Tg7", + "outputId": "1489c427-200c-45fa-f127-369a97e46ea8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateInternational planVoice mail plan
0KSNoYes
1OHNoYes
2NJNoNo
3OHYesNo
4OKYesNo
............
3328AZNoYes
3329WVNoNo
3330RINoNo
3331CTYesNo
3332TNNoYes
\n", + "

3333 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " State International plan Voice mail plan\n", + "0 KS No Yes\n", + "1 OH No Yes\n", + "2 NJ No No\n", + "3 OH Yes No\n", + "4 OK Yes No\n", + "... ... ... ...\n", + "3328 AZ No Yes\n", + "3329 WV No No\n", + "3330 RI No No\n", + "3331 CT Yes No\n", + "3332 TN No Yes\n", + "\n", + "[3333 rows x 3 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.select_dtypes(include=['object', 'bool']) # exclude" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ge-uZsFvtU8G" + }, + "source": [ + "Для категориальных (тип `object`) и булевых (тип `bool`) признаков можно воспользоваться методом **`value_counts`**. Посмотрим на распределение нашей целевой переменной — `Churn`:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 115, + "status": "ok", + "timestamp": 1633609638540, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "eeDu-JiYtU8G", + "outputId": "19761b7d-d89b-49eb-e4bd-371bd68907d7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2850\n", + "1 483\n", + "Name: Churn, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Churn\"].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KANMt5q2tU8I" + }, + "source": [ + "2850 пользователей из 3333 — лояльные, значение переменной `Churn` у них — `0`.\n", + "\n", + "Посмотрим на распределение пользователей по переменной `Area code`. Укажем значение параметра `normalize=True`, чтобы посмотреть не абсолютные частоты, а относительные." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 109, + "status": "ok", + "timestamp": 1633609638542, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "pMenDSyHtU8I", + "outputId": "a99c176c-d2b0-45b9-e54f-653c1f060dd0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "415 0.50\n", + "510 0.25\n", + "408 0.25\n", + "Name: Area code, dtype: float64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Area code\"].value_counts(normalize=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l4ikQZaptU8I" + }, + "source": [ + "### Сортировка\n", + "\n", + "`DataFrame` можно отсортировать по значению какого-нибудь из признаков. В нашем случае, например, по `Total day charge` (`ascending=False` для сортировки по убыванию):" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241 + }, + "executionInfo": { + "elapsed": 102, + "status": "ok", + "timestamp": 1633609638544, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "GrbzIXBQtU8J", + "outputId": "7cf76892-8c0d-42fa-fa98-aa49f8c2ab6e" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
365CO154415NoNo0350.87559.64216.59418.40253.910011.4310.192.7311
985NY64415YesNo0346.85558.96249.57921.21275.410212.3913.393.5911
2594OH115510YesNo0345.38158.70203.410617.29217.51079.7911.883.1911
156OH83415NoNo0337.412057.36227.411619.33153.91146.9315.874.2701
605MO112415NoNo0335.57757.04212.510918.06265.013211.9312.783.4321
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "365 CO 154 415 No No \n", + "985 NY 64 415 Yes No \n", + "2594 OH 115 510 Yes No \n", + "156 OH 83 415 No No \n", + "605 MO 112 415 No No \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "365 0 350.8 75 \n", + "985 0 346.8 55 \n", + "2594 0 345.3 81 \n", + "156 0 337.4 120 \n", + "605 0 335.5 77 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "365 59.64 216.5 94 18.40 \n", + "985 58.96 249.5 79 21.21 \n", + "2594 58.70 203.4 106 17.29 \n", + "156 57.36 227.4 116 19.33 \n", + "605 57.04 212.5 109 18.06 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "365 253.9 100 11.43 \n", + "985 275.4 102 12.39 \n", + "2594 217.5 107 9.79 \n", + "156 153.9 114 6.93 \n", + "605 265.0 132 11.93 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "365 10.1 9 2.73 \n", + "985 13.3 9 3.59 \n", + "2594 11.8 8 3.19 \n", + "156 15.8 7 4.27 \n", + "605 12.7 8 3.43 \n", + "\n", + " Customer service calls Churn \n", + "365 1 1 \n", + "985 1 1 \n", + "2594 1 1 \n", + "156 0 1 \n", + "605 2 1 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_values(by=\"Total day charge\", ascending=False).head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "apUOhvc_tU8J" + }, + "source": [ + "Сортировать можно и по группе столбцов:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241 + }, + "executionInfo": { + "elapsed": 100, + "status": "ok", + "timestamp": 1633609638545, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "KUU1Xp63tU8K", + "outputId": "0bbacb6a-bbf7-4697-b720-20033f341ff3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
688MN13510NoYes21315.610553.65208.97117.76260.112311.7012.133.2730
2259NC210415NoYes31313.88753.35147.710312.55192.7978.6710.172.7330
534LA67510NoNo0310.49752.7766.51235.65246.59911.099.2102.4840
575SD114415NoYes36309.99052.68200.38917.03183.51058.2614.223.8310
2858AL141510NoYes28308.012352.36247.812821.06152.91036.887.432.0010
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "688 MN 13 510 No Yes \n", + "2259 NC 210 415 No Yes \n", + "534 LA 67 510 No No \n", + "575 SD 114 415 No Yes \n", + "2858 AL 141 510 No Yes \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "688 21 315.6 105 \n", + "2259 31 313.8 87 \n", + "534 0 310.4 97 \n", + "575 36 309.9 90 \n", + "2858 28 308.0 123 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "688 53.65 208.9 71 17.76 \n", + "2259 53.35 147.7 103 12.55 \n", + "534 52.77 66.5 123 5.65 \n", + "575 52.68 200.3 89 17.03 \n", + "2858 52.36 247.8 128 21.06 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "688 260.1 123 11.70 \n", + "2259 192.7 97 8.67 \n", + "534 246.5 99 11.09 \n", + "575 183.5 105 8.26 \n", + "2858 152.9 103 6.88 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "688 12.1 3 3.27 \n", + "2259 10.1 7 2.73 \n", + "534 9.2 10 2.48 \n", + "575 14.2 2 3.83 \n", + "2858 7.4 3 2.00 \n", + "\n", + " Customer service calls Churn \n", + "688 3 0 \n", + "2259 3 0 \n", + "534 4 0 \n", + "575 1 0 \n", + "2858 1 0 " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_values(by=[\"Churn\", \"Total day charge\"], ascending=[True, False]).head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VCTKeJUYtU8L" + }, + "source": [ + "### Индексация и извлечение данных" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lveNXBbztU8L" + }, + "source": [ + "`DataFrame` можно индексировать по-разному. В связи с этим рассмотрим различные способы индексации и извлечения нужных нам данных из датафрейма на примере простых вопросов.\n", + "\n", + "Для извлечения отдельного столбца можно использовать конструкцию вида `DataFrame['Name']`. Воспользуемся этим для ответа на вопрос: **какова доля нелояльных пользователей в нашем датафрейме?**" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 98, + "status": "ok", + "timestamp": 1633609638547, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "FLaA5u1ztU8L", + "outputId": "d1b61bde-7b0c-45d0-c2e9-32d9bb9539c0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.14491449144914492" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Churn\"].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QiJUnpEJtU8M" + }, + "source": [ + "14,5% — довольно плохой показатель для компании, с таким процентом оттока можно и разориться." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2v6CRyJ3tU8M" + }, + "source": [ + "Очень удобной является логическая индексация `DataFrame` по одному столбцу. Выглядит она следующим образом: `df[P(df['Name'])]`, где `P` - это некоторое логическое условие, проверяемое для каждого элемента столбца `Name`. Итогом такой индексации является `DataFrame`, состоящий только из строк, удовлетворяющих условию `P` по столбцу `Name`. \n", + "\n", + "Воспользуемся этим для ответа на вопрос: **каковы средние значения числовых признаков среди нелояльных пользователей?**" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 90, + "status": "ok", + "timestamp": 1633609638548, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "0G0_4zPytU8O", + "outputId": "79d763ca-3a4e-4408-f218-e5996dbd68bb", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Account length 102.66\n", + "Area code 437.82\n", + "Number vmail messages 5.12\n", + "Total day minutes 206.91\n", + "Total day calls 101.34\n", + "Total day charge 35.18\n", + "Total eve minutes 212.41\n", + "Total eve calls 100.56\n", + "Total eve charge 18.05\n", + "Total night minutes 205.23\n", + "Total night calls 100.40\n", + "Total night charge 9.24\n", + "Total intl minutes 10.70\n", + "Total intl calls 4.16\n", + "Total intl charge 2.89\n", + "Customer service calls 2.23\n", + "Churn 1.00\n", + "dtype: float64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df[\"Churn\"] == 1].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vX7Kv82ztU8O" + }, + "source": [ + "Скомбинировав предыдущие два вида индексации, ответим на вопрос: **сколько в среднем в течение дня разговаривают по телефону нелояльные пользователи**?" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 87, + "status": "ok", + "timestamp": 1633609638551, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ZmpzMz9LtU8O", + "outputId": "f4ef2f49-5d18-4228-b513-96402e23b1b4" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "206.91407867494814" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df[\"Churn\"] == 1][\"Total day minutes\"].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rME2EKe8tU8P" + }, + "source": [ + "**Какова максимальная длина международных звонков среди лояльных пользователей (`Churn == 0`), не пользующихся услугой международного роуминга (`'International plan' == 'No'`)?**" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 82, + "status": "ok", + "timestamp": 1633609638552, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "DQ0H-bJttU8Q", + "outputId": "3c8a6304-7ede-495b-f2cc-dcf70beb252f" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "18.9" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[(df[\"Churn\"] == 0) & (df[\"International plan\"] == \"No\")][\"Total intl minutes\"].max()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f6IelrO4tU8Q" + }, + "source": [ + "Датафреймы можно индексировать как по названию столбца или строки, так и по порядковому номеру. Для индексации **по названию** используется метод **`loc`**, **по номеру** — **`iloc`**.\n", + "\n", + "В первом случае мы говорим _«передай нам значения для id строк от 0 до 5 и для столбцов от State до Area code»_, а во втором — _«передай нам значения первых пяти строк в первых трёх столбцах»_. \n", + "\n", + "В случае `iloc` срез работает как обычно, однако в случае `loc` учитываются и начало, и конец среза. Да, неудобно, да, вызывает путаницу." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "executionInfo": { + "elapsed": 78, + "status": "ok", + "timestamp": 1633609638554, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "Pp82lj7ktU8R", + "outputId": "8e2a9392-b3f0-44ee-e383-b19a46f8d708", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Account lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
0128415NoYes25265.111045.07197.49916.78244.79111.0110.032.7010
1107415NoYes26161.612327.47195.510316.62254.410311.4513.733.7010
2137415NoNo0243.411441.38121.211010.30162.61047.3212.253.2900
375415YesNo0166.711328.34148.312212.61186.91218.4110.132.7330
4118510YesNo0223.49837.98220.610118.75203.91189.186.361.7000
5121510NoYes24218.28837.09348.510829.62212.61189.577.572.0330
6147415YesNo0157.07926.69103.1948.76211.8969.537.161.9200
7117408NoNo0184.59731.37351.68029.89215.8909.718.742.3510
8141415YesYes37258.68443.96222.011118.87326.49714.6911.253.0200
965415NoNo0129.113721.95228.58319.42208.81119.4012.763.4341
1074415NoNo0187.712731.91163.414813.89196.0948.829.152.4600
11168408NoNo0128.89621.90104.9718.92141.11286.3511.223.0210
1295510NoNo0156.68826.62247.67521.05192.31158.6512.353.3230
13161415NoNo0332.96756.59317.89727.01160.61287.235.491.4641
1485408NoYes27196.413933.39280.99023.8889.3754.0213.843.7310
1593510NoNo0190.711432.42218.211118.55129.61215.838.132.1930
1676510NoYes33189.76632.25212.86518.09165.71087.4610.052.7010
1773415NoNo0224.49038.15159.58813.56192.8748.6813.023.5110
18147415NoNo0155.111726.37239.79320.37208.81339.4010.642.8600
1977408NoNo062.48910.61169.912114.44209.6649.435.761.5451
20130415NoNo0183.011231.1172.9996.20181.8788.189.5192.5700
21111415NoNo0110.410318.77137.310211.67189.61058.537.762.0820
22174415NoNo0124.37621.13277.111223.55250.711511.2815.554.1930
2357408NoYes39213.011536.21191.111216.24182.71158.229.532.5700
2449510NoNo0119.311720.28215.110918.28178.7908.0411.113.0010
25142415NoNo084.89514.42136.76311.62250.514811.2714.263.8320
2675510NoNo0226.110538.44201.510717.13246.29811.0810.352.7810
2772415NoYes37220.08037.40217.310218.47152.8716.8814.763.9730
2836408NoYes30146.312824.87162.58013.81129.31095.8214.563.9200
29135408YesYes41173.18529.43203.910717.33122.2785.5014.6153.9401
3034510NoNo0124.88221.22282.29823.99311.57814.0210.042.7020
3164510NoNo0154.06726.18225.811819.19265.38611.943.530.9510
3259408NoYes28120.99720.55213.09218.11163.11167.348.552.3020
3365415NoNo0211.312035.92162.612213.82134.71186.0613.253.5630
34142408NoNo0187.013331.79134.67411.44242.212710.907.452.0020
3596415NoNo0160.211727.23267.56722.74228.56810.289.352.5120
36116415NoYes34268.68345.66178.214215.15166.31067.4811.633.1320
3774510NoYes33193.79132.93246.19620.92138.0926.2114.633.9420
38149408NoYes28180.79230.72187.86415.96265.55311.9512.633.4030
3938408NoNo0131.29822.30162.99713.85159.01067.158.262.2120
4040415NoYes41148.17425.18169.58814.41214.11029.636.251.6720
41147510NoNo0248.68342.26148.98512.66172.51097.768.042.1630
4290415NoNo0203.414634.58226.711719.27152.41056.867.341.9710
4382415NoNo0300.310951.05181.010015.39270.17312.1511.743.1601
4474415NoYes35154.110426.20123.48410.49202.1579.0910.992.9420
4578415NoNo0252.99342.99178.411215.16263.910511.889.572.5730
46120408NoNo0212.113136.06209.410417.80167.2967.525.351.4311
4778415NoNo0149.711925.45182.211515.49261.512611.779.782.6200
4882415NoYes24155.213126.38244.510620.78122.4685.5110.732.8910
49199415NoYes34230.612139.20219.49918.65299.39413.478.022.1600
5079408NoNo0205.712334.97214.510818.23226.110610.176.7181.8110
\n", + "
" + ], + "text/plain": [ + " Account length Area code International plan Voice mail plan \\\n", + "0 128 415 No Yes \n", + "1 107 415 No Yes \n", + "2 137 415 No No \n", + "3 75 415 Yes No \n", + "4 118 510 Yes No \n", + "5 121 510 No Yes \n", + "6 147 415 Yes No \n", + "7 117 408 No No \n", + "8 141 415 Yes Yes \n", + "9 65 415 No No \n", + "10 74 415 No No \n", + "11 168 408 No No \n", + "12 95 510 No No \n", + "13 161 415 No No \n", + "14 85 408 No Yes \n", + "15 93 510 No No \n", + "16 76 510 No Yes \n", + "17 73 415 No No \n", + "18 147 415 No No \n", + "19 77 408 No No \n", + "20 130 415 No No \n", + "21 111 415 No No \n", + "22 174 415 No No \n", + "23 57 408 No Yes \n", + "24 49 510 No No \n", + "25 142 415 No No \n", + "26 75 510 No No \n", + "27 72 415 No Yes \n", + "28 36 408 No Yes \n", + "29 135 408 Yes Yes \n", + "30 34 510 No No \n", + "31 64 510 No No \n", + "32 59 408 No Yes \n", + "33 65 415 No No \n", + "34 142 408 No No \n", + "35 96 415 No No \n", + "36 116 415 No Yes \n", + "37 74 510 No Yes \n", + "38 149 408 No Yes \n", + "39 38 408 No No \n", + "40 40 415 No Yes \n", + "41 147 510 No No \n", + "42 90 415 No No \n", + "43 82 415 No No \n", + "44 74 415 No Yes \n", + "45 78 415 No No \n", + "46 120 408 No No \n", + "47 78 415 No No \n", + "48 82 415 No Yes \n", + "49 199 415 No Yes \n", + "50 79 408 No No \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "1 26 161.6 123 \n", + "2 0 243.4 114 \n", + "3 0 166.7 113 \n", + "4 0 223.4 98 \n", + "5 24 218.2 88 \n", + "6 0 157.0 79 \n", + "7 0 184.5 97 \n", + "8 37 258.6 84 \n", + "9 0 129.1 137 \n", + "10 0 187.7 127 \n", + "11 0 128.8 96 \n", + "12 0 156.6 88 \n", + "13 0 332.9 67 \n", + "14 27 196.4 139 \n", + "15 0 190.7 114 \n", + "16 33 189.7 66 \n", + "17 0 224.4 90 \n", + "18 0 155.1 117 \n", + "19 0 62.4 89 \n", + "20 0 183.0 112 \n", + "21 0 110.4 103 \n", + "22 0 124.3 76 \n", + "23 39 213.0 115 \n", + "24 0 119.3 117 \n", + "25 0 84.8 95 \n", + "26 0 226.1 105 \n", + "27 37 220.0 80 \n", + "28 30 146.3 128 \n", + "29 41 173.1 85 \n", + "30 0 124.8 82 \n", + "31 0 154.0 67 \n", + "32 28 120.9 97 \n", + "33 0 211.3 120 \n", + "34 0 187.0 133 \n", + "35 0 160.2 117 \n", + "36 34 268.6 83 \n", + "37 33 193.7 91 \n", + "38 28 180.7 92 \n", + "39 0 131.2 98 \n", + "40 41 148.1 74 \n", + "41 0 248.6 83 \n", + "42 0 203.4 146 \n", + "43 0 300.3 109 \n", + "44 35 154.1 104 \n", + "45 0 252.9 93 \n", + "46 0 212.1 131 \n", + "47 0 149.7 119 \n", + "48 24 155.2 131 \n", + "49 34 230.6 121 \n", + "50 0 205.7 123 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "1 27.47 195.5 103 16.62 \n", + "2 41.38 121.2 110 10.30 \n", + "3 28.34 148.3 122 12.61 \n", + "4 37.98 220.6 101 18.75 \n", + "5 37.09 348.5 108 29.62 \n", + "6 26.69 103.1 94 8.76 \n", + "7 31.37 351.6 80 29.89 \n", + "8 43.96 222.0 111 18.87 \n", + "9 21.95 228.5 83 19.42 \n", + "10 31.91 163.4 148 13.89 \n", + "11 21.90 104.9 71 8.92 \n", + "12 26.62 247.6 75 21.05 \n", + "13 56.59 317.8 97 27.01 \n", + "14 33.39 280.9 90 23.88 \n", + "15 32.42 218.2 111 18.55 \n", + "16 32.25 212.8 65 18.09 \n", + "17 38.15 159.5 88 13.56 \n", + "18 26.37 239.7 93 20.37 \n", + "19 10.61 169.9 121 14.44 \n", + "20 31.11 72.9 99 6.20 \n", + "21 18.77 137.3 102 11.67 \n", + "22 21.13 277.1 112 23.55 \n", + "23 36.21 191.1 112 16.24 \n", + "24 20.28 215.1 109 18.28 \n", + "25 14.42 136.7 63 11.62 \n", + "26 38.44 201.5 107 17.13 \n", + "27 37.40 217.3 102 18.47 \n", + "28 24.87 162.5 80 13.81 \n", + "29 29.43 203.9 107 17.33 \n", + "30 21.22 282.2 98 23.99 \n", + "31 26.18 225.8 118 19.19 \n", + "32 20.55 213.0 92 18.11 \n", + "33 35.92 162.6 122 13.82 \n", + "34 31.79 134.6 74 11.44 \n", + "35 27.23 267.5 67 22.74 \n", + "36 45.66 178.2 142 15.15 \n", + "37 32.93 246.1 96 20.92 \n", + "38 30.72 187.8 64 15.96 \n", + "39 22.30 162.9 97 13.85 \n", + "40 25.18 169.5 88 14.41 \n", + "41 42.26 148.9 85 12.66 \n", + "42 34.58 226.7 117 19.27 \n", + "43 51.05 181.0 100 15.39 \n", + "44 26.20 123.4 84 10.49 \n", + "45 42.99 178.4 112 15.16 \n", + "46 36.06 209.4 104 17.80 \n", + "47 25.45 182.2 115 15.49 \n", + "48 26.38 244.5 106 20.78 \n", + "49 39.20 219.4 99 18.65 \n", + "50 34.97 214.5 108 18.23 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "1 254.4 103 11.45 \n", + "2 162.6 104 7.32 \n", + "3 186.9 121 8.41 \n", + "4 203.9 118 9.18 \n", + "5 212.6 118 9.57 \n", + "6 211.8 96 9.53 \n", + "7 215.8 90 9.71 \n", + "8 326.4 97 14.69 \n", + "9 208.8 111 9.40 \n", + "10 196.0 94 8.82 \n", + "11 141.1 128 6.35 \n", + "12 192.3 115 8.65 \n", + "13 160.6 128 7.23 \n", + "14 89.3 75 4.02 \n", + "15 129.6 121 5.83 \n", + "16 165.7 108 7.46 \n", + "17 192.8 74 8.68 \n", + "18 208.8 133 9.40 \n", + "19 209.6 64 9.43 \n", + "20 181.8 78 8.18 \n", + "21 189.6 105 8.53 \n", + "22 250.7 115 11.28 \n", + "23 182.7 115 8.22 \n", + "24 178.7 90 8.04 \n", + "25 250.5 148 11.27 \n", + "26 246.2 98 11.08 \n", + "27 152.8 71 6.88 \n", + "28 129.3 109 5.82 \n", + "29 122.2 78 5.50 \n", + "30 311.5 78 14.02 \n", + "31 265.3 86 11.94 \n", + "32 163.1 116 7.34 \n", + "33 134.7 118 6.06 \n", + "34 242.2 127 10.90 \n", + "35 228.5 68 10.28 \n", + "36 166.3 106 7.48 \n", + "37 138.0 92 6.21 \n", + "38 265.5 53 11.95 \n", + "39 159.0 106 7.15 \n", + "40 214.1 102 9.63 \n", + "41 172.5 109 7.76 \n", + "42 152.4 105 6.86 \n", + "43 270.1 73 12.15 \n", + "44 202.1 57 9.09 \n", + "45 263.9 105 11.88 \n", + "46 167.2 96 7.52 \n", + "47 261.5 126 11.77 \n", + "48 122.4 68 5.51 \n", + "49 299.3 94 13.47 \n", + "50 226.1 106 10.17 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "1 13.7 3 3.70 \n", + "2 12.2 5 3.29 \n", + "3 10.1 3 2.73 \n", + "4 6.3 6 1.70 \n", + "5 7.5 7 2.03 \n", + "6 7.1 6 1.92 \n", + "7 8.7 4 2.35 \n", + "8 11.2 5 3.02 \n", + "9 12.7 6 3.43 \n", + "10 9.1 5 2.46 \n", + "11 11.2 2 3.02 \n", + "12 12.3 5 3.32 \n", + "13 5.4 9 1.46 \n", + "14 13.8 4 3.73 \n", + "15 8.1 3 2.19 \n", + "16 10.0 5 2.70 \n", + "17 13.0 2 3.51 \n", + "18 10.6 4 2.86 \n", + "19 5.7 6 1.54 \n", + "20 9.5 19 2.57 \n", + "21 7.7 6 2.08 \n", + "22 15.5 5 4.19 \n", + "23 9.5 3 2.57 \n", + "24 11.1 1 3.00 \n", + "25 14.2 6 3.83 \n", + "26 10.3 5 2.78 \n", + "27 14.7 6 3.97 \n", + "28 14.5 6 3.92 \n", + "29 14.6 15 3.94 \n", + "30 10.0 4 2.70 \n", + "31 3.5 3 0.95 \n", + "32 8.5 5 2.30 \n", + "33 13.2 5 3.56 \n", + "34 7.4 5 2.00 \n", + "35 9.3 5 2.51 \n", + "36 11.6 3 3.13 \n", + "37 14.6 3 3.94 \n", + "38 12.6 3 3.40 \n", + "39 8.2 6 2.21 \n", + "40 6.2 5 1.67 \n", + "41 8.0 4 2.16 \n", + "42 7.3 4 1.97 \n", + "43 11.7 4 3.16 \n", + "44 10.9 9 2.94 \n", + "45 9.5 7 2.57 \n", + "46 5.3 5 1.43 \n", + "47 9.7 8 2.62 \n", + "48 10.7 3 2.89 \n", + "49 8.0 2 2.16 \n", + "50 6.7 18 1.81 \n", + "\n", + " Customer service calls Churn \n", + "0 1 0 \n", + "1 1 0 \n", + "2 0 0 \n", + "3 3 0 \n", + "4 0 0 \n", + "5 3 0 \n", + "6 0 0 \n", + "7 1 0 \n", + "8 0 0 \n", + "9 4 1 \n", + "10 0 0 \n", + "11 1 0 \n", + "12 3 0 \n", + "13 4 1 \n", + "14 1 0 \n", + "15 3 0 \n", + "16 1 0 \n", + "17 1 0 \n", + "18 0 0 \n", + "19 5 1 \n", + "20 0 0 \n", + "21 2 0 \n", + "22 3 0 \n", + "23 0 0 \n", + "24 1 0 \n", + "25 2 0 \n", + "26 1 0 \n", + "27 3 0 \n", + "28 0 0 \n", + "29 0 1 \n", + "30 2 0 \n", + "31 1 0 \n", + "32 2 0 \n", + "33 3 0 \n", + "34 2 0 \n", + "35 2 0 \n", + "36 2 0 \n", + "37 2 0 \n", + "38 3 0 \n", + "39 2 0 \n", + "40 2 0 \n", + "41 3 0 \n", + "42 1 0 \n", + "43 0 1 \n", + "44 2 0 \n", + "45 3 0 \n", + "46 1 1 \n", + "47 0 0 \n", + "48 1 0 \n", + "49 0 0 \n", + "50 1 0 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d = df.copy()\n", + "d = d.drop_duplicates('State')\n", + "d = d.set_index('State')\n", + "# d = d.reset_index() # сбрасываем столбец-индекс не удаляя его\n", + "d = d.reset_index(drop=True) # сбрасываем столбец-индекс удаляя его\n", + "d\n", + "# d.loc['KS':'OK','Area code':'Total day minutes']" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "executionInfo": { + "elapsed": 75, + "status": "ok", + "timestamp": 1633609638556, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "qGN5gaALtU8R", + "outputId": "1f803cde-6306-4ebe-cd3a-1cb7ad85010e", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea code
0KS128415
1OH107415
2NJ137415
3OH84408
4OK75415
\n", + "
" + ], + "text/plain": [ + " State Account length Area code\n", + "0 KS 128 415\n", + "1 OH 107 415\n", + "2 NJ 137 415\n", + "3 OH 84 408\n", + "4 OK 75 415" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[0:5, 0:3]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UCMKdcx9tU8S" + }, + "source": [ + "Метод `ix` индексирует и по названию, и по номеру, но он вызывает путаницу, и поэтому был объявлен устаревшим (deprecated)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HnMAXWTAtU8S" + }, + "source": [ + "Если нам нужна первая или последняя строчка датафрейма, пользуемся конструкцией `df[:1]` или `df[-1:]`:" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 115 + }, + "executionInfo": { + "elapsed": 74, + "status": "ok", + "timestamp": 1633609638558, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "OrwoqAGPtU8U", + "outputId": "53bc6332-8c03-4b98-9335-295812d859cd", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
3332TN74415NoYes25234.411339.85265.98222.6241.47710.8613.743.700
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "3332 TN 74 415 No Yes \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "3332 25 234.4 113 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "3332 39.85 265.9 82 22.6 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "3332 241.4 77 10.86 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "3332 13.7 4 3.7 \n", + "\n", + " Customer service calls Churn \n", + "3332 0 0 " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[-1:]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ur_--vTVtU8W" + }, + "source": [ + "### Применение функций: `apply`, `map` и др." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "da6UVfVjtU8W" + }, + "source": [ + "**Применение функции к каждому столбцу:**" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 71, + "status": "ok", + "timestamp": 1633609638559, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "LIlX4ORVtU8W", + "outputId": "315e42ff-9efa-4fa9-e41f-cba08a9534d2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "State WY\n", + "Account length 243\n", + "Area code 510\n", + "International plan Yes\n", + "Voice mail plan Yes\n", + "Number vmail messages 51\n", + "Total day minutes 3.5e+02\n", + "Total day calls 165\n", + "Total day charge 60\n", + "Total eve minutes 3.6e+02\n", + "Total eve calls 170\n", + "Total eve charge 31\n", + "Total night minutes 4e+02\n", + "Total night calls 175\n", + "Total night charge 18\n", + "Total intl minutes 20\n", + "Total intl calls 20\n", + "Total intl charge 5.4\n", + "Customer service calls 9\n", + "Churn 1\n", + "dtype: object" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.apply(np.max)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 478 + }, + "executionInfo": { + "elapsed": 66, + "status": "ok", + "timestamp": 1633609638561, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "p-mUIP9HQakx", + "outputId": "bbe218f1-eddb-4ff4-eb09-6a148e04a29c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415NoYes25265.111045.07197.49916.78244.79111.0110.032.7010100
1OH107415NoYes26161.612327.47195.510316.62254.410311.4513.733.7010104
2NJ137415NoNo0243.411441.38121.211010.30162.61047.3212.253.29004
3OH84408YesNo0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415YesNo0166.711328.34148.312212.61186.91218.4110.132.73304
..................................................................
3328AZ192415NoYes36156.27726.55215.512618.32279.18312.569.962.6720144
3329WV68415NoNo0231.15739.29153.45513.04191.31238.619.642.59304
3330RI28510NoNo0180.810930.74288.85824.55191.9918.6414.163.81204
3331CT184510YesNo0213.810536.35159.68413.57139.21376.265.0101.35204
3332TN74415NoYes25234.411339.85265.98222.60241.47710.8613.743.7000100
\n", + "

3333 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "0 KS 128 415 No Yes \n", + "1 OH 107 415 No Yes \n", + "2 NJ 137 415 No No \n", + "3 OH 84 408 Yes No \n", + "4 OK 75 415 Yes No \n", + "... ... ... ... ... ... \n", + "3328 AZ 192 415 No Yes \n", + "3329 WV 68 415 No No \n", + "3330 RI 28 510 No No \n", + "3331 CT 184 510 Yes No \n", + "3332 TN 74 415 No Yes \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "1 26 161.6 123 \n", + "2 0 243.4 114 \n", + "3 0 299.4 71 \n", + "4 0 166.7 113 \n", + "... ... ... ... \n", + "3328 36 156.2 77 \n", + "3329 0 231.1 57 \n", + "3330 0 180.8 109 \n", + "3331 0 213.8 105 \n", + "3332 25 234.4 113 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "1 27.47 195.5 103 16.62 \n", + "2 41.38 121.2 110 10.30 \n", + "3 50.90 61.9 88 5.26 \n", + "4 28.34 148.3 122 12.61 \n", + "... ... ... ... ... \n", + "3328 26.55 215.5 126 18.32 \n", + "3329 39.29 153.4 55 13.04 \n", + "3330 30.74 288.8 58 24.55 \n", + "3331 36.35 159.6 84 13.57 \n", + "3332 39.85 265.9 82 22.60 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "1 254.4 103 11.45 \n", + "2 162.6 104 7.32 \n", + "3 196.9 89 8.86 \n", + "4 186.9 121 8.41 \n", + "... ... ... ... \n", + "3328 279.1 83 12.56 \n", + "3329 191.3 123 8.61 \n", + "3330 191.9 91 8.64 \n", + "3331 139.2 137 6.26 \n", + "3332 241.4 77 10.86 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "1 13.7 3 3.70 \n", + "2 12.2 5 3.29 \n", + "3 6.6 7 1.78 \n", + "4 10.1 3 2.73 \n", + "... ... ... ... \n", + "3328 9.9 6 2.67 \n", + "3329 9.6 4 2.59 \n", + "3330 14.1 6 3.81 \n", + "3331 5.0 10 1.35 \n", + "3332 13.7 4 3.70 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages \n", + "0 1 0 100 \n", + "1 1 0 104 \n", + "2 0 0 4 \n", + "3 2 0 4 \n", + "4 3 0 4 \n", + "... ... ... ... \n", + "3328 2 0 144 \n", + "3329 3 0 4 \n", + "3330 2 0 4 \n", + "3331 2 0 4 \n", + "3332 0 0 100 \n", + "\n", + "[3333 rows x 21 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def make_feature(row):\n", + " if row['Voice mail plan'] == 'Yes':\n", + " return row['Number vmail messages'] * 4\n", + " return row['Number vmail messages'] + 4\n", + "df['new_Number_vmail_messages'] = df.apply(make_feature, axis=1)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j2fEIU5ptU8Y" + }, + "source": [ + "Метод `apply` можно использовать и для того, чтобы применить функцию к каждой строке. Для этого нужно указать `axis=1`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e-TxyhUttU8Y" + }, + "source": [ + "**Применение функции к каждой ячейке столбца**\n", + "\n", + "Допустим, по какой-то причине нас интересуют все люди из штатов, названия которых начинаются на 'W'. В данному случае это можно сделать по-разному, но наибольшую свободу дает связка `apply`-`lambda` – применение функции ко всем значениям в столбце." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 261 + }, + "executionInfo": { + "elapsed": 65, + "status": "ok", + "timestamp": 1633609638563, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "-jnLxPnWtU8Z", + "outputId": "bcab7df7-ed63-45c1-816b-ec70b6e4fa7b", + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
9WV141415YesYes37258.68443.96222.011118.87326.49714.6911.253.0200148
26WY57408NoYes39213.011536.21191.111216.24182.71158.229.532.5700156
44WI64510NoNo0154.06726.18225.811819.19265.38611.943.530.95104
49WY97415NoYes24133.213522.64217.25818.4670.6793.1811.032.971096
54WY87415NoNo0151.08325.67219.711618.67203.91279.189.732.62514
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "9 WV 141 415 Yes Yes \n", + "26 WY 57 408 No Yes \n", + "44 WI 64 510 No No \n", + "49 WY 97 415 No Yes \n", + "54 WY 87 415 No No \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "9 37 258.6 84 \n", + "26 39 213.0 115 \n", + "44 0 154.0 67 \n", + "49 24 133.2 135 \n", + "54 0 151.0 83 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "9 43.96 222.0 111 18.87 \n", + "26 36.21 191.1 112 16.24 \n", + "44 26.18 225.8 118 19.19 \n", + "49 22.64 217.2 58 18.46 \n", + "54 25.67 219.7 116 18.67 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "9 326.4 97 14.69 \n", + "26 182.7 115 8.22 \n", + "44 265.3 86 11.94 \n", + "49 70.6 79 3.18 \n", + "54 203.9 127 9.18 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "9 11.2 5 3.02 \n", + "26 9.5 3 2.57 \n", + "44 3.5 3 0.95 \n", + "49 11.0 3 2.97 \n", + "54 9.7 3 2.62 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages \n", + "9 0 0 148 \n", + "26 0 0 156 \n", + "44 1 0 4 \n", + "49 1 0 96 \n", + "54 5 1 4 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df[\"State\"].apply(lambda state: state[0] == \"W\")].head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q6SkeDiJtU8Z" + }, + "source": [ + "Метод `map` можно использовать и для **замены значений в колонке**, передав ему в качестве аргумента словарь вида `{old_value: new_value}`:" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 261 + }, + "executionInfo": { + "elapsed": 63, + "status": "ok", + "timestamp": 1633609638564, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "q3lbm6XXtU8a", + "outputId": "10f505a4-3227-4ff5-b868-1efaadf4a181" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415FalseYes25265.111045.07197.49916.78244.79111.0110.032.7010100
1OH107415FalseYes26161.612327.47195.510316.62254.410311.4513.733.7010104
2NJ137415FalseNo0243.411441.38121.211010.30162.61047.3212.253.29004
3OH84408TrueNo0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415TrueNo0166.711328.34148.312212.61186.91218.4110.132.73304
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "0 KS 128 415 False Yes \n", + "1 OH 107 415 False Yes \n", + "2 NJ 137 415 False No \n", + "3 OH 84 408 True No \n", + "4 OK 75 415 True No \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "1 26 161.6 123 \n", + "2 0 243.4 114 \n", + "3 0 299.4 71 \n", + "4 0 166.7 113 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "1 27.47 195.5 103 16.62 \n", + "2 41.38 121.2 110 10.30 \n", + "3 50.90 61.9 88 5.26 \n", + "4 28.34 148.3 122 12.61 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "1 254.4 103 11.45 \n", + "2 162.6 104 7.32 \n", + "3 196.9 89 8.86 \n", + "4 186.9 121 8.41 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "1 13.7 3 3.70 \n", + "2 12.2 5 3.29 \n", + "3 6.6 7 1.78 \n", + "4 10.1 3 2.73 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages \n", + "0 1 0 100 \n", + "1 1 0 104 \n", + "2 0 0 4 \n", + "3 2 0 4 \n", + "4 3 0 4 " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d = {\"No\": False, \"Yes\": True}\n", + "df[\"International plan\"] = df[\"International plan\"].map(d)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YkK8_gEBtU8b" + }, + "source": [ + "Аналогичную операцию можно провернуть с помощью метода `replace`:" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 261 + }, + "executionInfo": { + "elapsed": 653, + "status": "ok", + "timestamp": 1633609639156, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "xop7OSmZtU8b", + "outputId": "16d543f3-5373-45d1-cb55-b69355e6a5cb" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.7010100
1OH107415FalseTrue26161.612327.47195.510316.62254.410311.4513.733.7010104
2NJ137415FalseFalse0243.411441.38121.211010.30162.61047.3212.253.29004
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.73304
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "0 KS 128 415 False True \n", + "1 OH 107 415 False True \n", + "2 NJ 137 415 False False \n", + "3 OH 84 408 True False \n", + "4 OK 75 415 True False \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "1 26 161.6 123 \n", + "2 0 243.4 114 \n", + "3 0 299.4 71 \n", + "4 0 166.7 113 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "1 27.47 195.5 103 16.62 \n", + "2 41.38 121.2 110 10.30 \n", + "3 50.90 61.9 88 5.26 \n", + "4 28.34 148.3 122 12.61 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "1 254.4 103 11.45 \n", + "2 162.6 104 7.32 \n", + "3 196.9 89 8.86 \n", + "4 186.9 121 8.41 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "1 13.7 3 3.70 \n", + "2 12.2 5 3.29 \n", + "3 6.6 7 1.78 \n", + "4 10.1 3 2.73 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages \n", + "0 1 0 100 \n", + "1 1 0 104 \n", + "2 0 0 4 \n", + "3 2 0 4 \n", + "4 3 0 4 " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.replace({\"Voice mail plan\": d})\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sJ9KC2CrtU8d" + }, + "source": [ + "### Группировка данных\n", + "\n", + "В общем случае группировка данных в Pandas выглядит следующим образом:\n", + "\n", + "```\n", + "df.groupby(by=grouping_columns)[columns_to_show].function()\n", + "```\n", + "\n", + "1. К датафрейму применяется метод **`groupby`**, который разделяет данные по `grouping_columns` – признаку или набору признаков.\n", + "3. Индексируем по нужным нам столбцам (`columns_to_show`). \n", + "2. К полученным группам применяется функция или несколько функций." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wiHvK8LFtU8d" + }, + "source": [ + "**Группирование данных в зависимости от значения признака `Churn` и вывод статистик по трём столбцам в каждой группе.**" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 99, + "status": "ok", + "timestamp": 1633609639159, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "pXrstrQgtU8d", + "outputId": "42ed5c1c-65c1-457f-cd03-6a26bb60da9f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Total day minutesTotal eve minutesTotal night minutes
countmeanstdmin50%maxcountmeanstdmin50%maxcountmeanstdmin50%max
Churn
02850.0175.1850.180.0177.2315.62850.0199.0450.290.0199.6361.82850.0200.1351.1123.2200.25395.0
1483.0206.9169.000.0217.6350.8483.0212.4151.7370.9211.3363.7483.0205.2347.1347.4204.80354.9
\n", + "
" + ], + "text/plain": [ + " Total day minutes Total eve minutes \\\n", + " count mean std min 50% max count \n", + "Churn \n", + "0 2850.0 175.18 50.18 0.0 177.2 315.6 2850.0 \n", + "1 483.0 206.91 69.00 0.0 217.6 350.8 483.0 \n", + "\n", + " Total night minutes \\\n", + " mean std min 50% max count mean std \n", + "Churn \n", + "0 199.04 50.29 0.0 199.6 361.8 2850.0 200.13 51.11 \n", + "1 212.41 51.73 70.9 211.3 363.7 483.0 205.23 47.13 \n", + "\n", + " \n", + " min 50% max \n", + "Churn \n", + "0 23.2 200.25 395.0 \n", + "1 47.4 204.80 354.9 " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "columns_to_show = [\"Total day minutes\", \"Total eve minutes\", \"Total night minutes\"]\n", + "\n", + "df.groupby([\"Churn\"])[columns_to_show].describe(percentiles=[])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_EZVRvNptU8d" + }, + "source": [ + "Сделаем то же самое, но немного по-другому, передав в `agg` список функций:" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 97, + "status": "ok", + "timestamp": 1633609639161, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "9x5emqSwtU8e", + "outputId": "9e81a4db-b89a-4e06-d928-70be8f26fdc5" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Total day minutesTotal eve minutesTotal night minutes
meanstdaminamaxmeanstdaminamaxmeanstdaminamax
Churn
0175.1850.180.0315.6199.0450.290.0361.8200.1351.1123.2395.0
1206.9169.000.0350.8212.4151.7370.9363.7205.2347.1347.4354.9
\n", + "
" + ], + "text/plain": [ + " Total day minutes Total eve minutes \\\n", + " mean std amin amax mean std amin \n", + "Churn \n", + "0 175.18 50.18 0.0 315.6 199.04 50.29 0.0 \n", + "1 206.91 69.00 0.0 350.8 212.41 51.73 70.9 \n", + "\n", + " Total night minutes \n", + " amax mean std amin amax \n", + "Churn \n", + "0 361.8 200.13 51.11 23.2 395.0 \n", + "1 363.7 205.23 47.13 47.4 354.9 " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "columns_to_show = [\"Total day minutes\", \"Total eve minutes\", \"Total night minutes\"]\n", + "\n", + "df.groupby([\"Churn\"])[columns_to_show].agg([np.mean, np.std, np.min, np.max])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bMsnErVv_o77" + }, + "source": [ + "Сбрасываем индекс с группирующего поля" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 112 + }, + "executionInfo": { + "elapsed": 93, + "status": "ok", + "timestamp": 1633609639162, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "8HFw5er5_DhM", + "outputId": "bbdaaf9f-323e-42aa-e086-768f78599e65" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ChurnState
002850
11483
\n", + "
" + ], + "text/plain": [ + " Churn State\n", + "0 0 2850\n", + "1 1 483" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('Churn', as_index=False)['State'].count()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mwqgfLAVtU8e" + }, + "source": [ + "### Сводные таблицы" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gYnSr64ptU8e" + }, + "source": [ + "Допустим, мы хотим посмотреть, как наблюдения в нашей выборке распределены в контексте двух признаков — `Churn` и `Customer service calls`. Для этого мы можем построить **таблицу сопряженности**, воспользовавшись методом **`crosstab`**:" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "executionInfo": { + "elapsed": 91, + "status": "ok", + "timestamp": 1633609639163, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "yhgrYerutU8f", + "outputId": "25d6f12a-34f1-4f8b-8f49-d8914548046d" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
International planFalseTrue
Churn
02664186
1346137
\n", + "
" + ], + "text/plain": [ + "International plan False True \n", + "Churn \n", + "0 2664 186\n", + "1 346 137" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.crosstab(df[\"Churn\"], df[\"International plan\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "executionInfo": { + "elapsed": 90, + "status": "ok", + "timestamp": 1633609639165, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "cR0WankTtU8f", + "outputId": "a949d3c8-88f3-4f4a-8a67-6208d2c44445", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Voice mail planFalseTrue
Churn
00.600.25
10.120.02
\n", + "
" + ], + "text/plain": [ + "Voice mail plan False True \n", + "Churn \n", + "0 0.60 0.25\n", + "1 0.12 0.02" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.crosstab(df[\"Churn\"], df[\"Voice mail plan\"], normalize=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 87, + "status": "ok", + "timestamp": 1633609639166, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "P_eONQ24W0aU", + "outputId": "525dd141-f2d7-47a6-c4ea-6bd3be3386a9" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 0, 2, 3, 4, 5, 7, 9, 6, 8])" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Customer service calls\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 75, + "status": "ok", + "timestamp": 1633609639167, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "abLbV3cKARwi", + "outputId": "c5f227e8-5570-4c48-e7bf-48bb31227734" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Customer service calls\"].nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1F8uRUIwtU8h" + }, + "source": [ + "Мы видим, что большинство пользователей — лояльные и пользуются дополнительными услугами (международного роуминга / голосовой почты)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "reNYiSlJtU8h" + }, + "source": [ + "Продвинутые пользователи `Excel` наверняка вспомнят о такой фиче, как **сводные таблицы** (`pivot tables`). В `Pandas` за сводные таблицы отвечает метод **`pivot_table`**, который принимает в качестве параметров:\n", + "\n", + "* `values` – список переменных, по которым требуется рассчитать нужные статистики,\n", + "* `index` – список переменных, по которым нужно сгруппировать данные,\n", + "* `aggfunc` — то, что нам, собственно, нужно посчитать по группам — сумму, среднее, максимум, минимум или что-то ещё.\n", + "\n", + "Давайте посмотрим среднее число дневных, вечерних и ночных звонков для разных `Area code`:" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 68, + "status": "ok", + "timestamp": 1633609639168, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "xabiD5fktU8h", + "outputId": "a51980ac-bbb9-44b4-c453-1085d9014a7c", + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Total day callsTotal eve callsTotal night calls
Area code
408100.5099.7999.04
415100.58100.50100.40
510100.1099.67100.60
\n", + "
" + ], + "text/plain": [ + " Total day calls Total eve calls Total night calls\n", + "Area code \n", + "408 100.50 99.79 99.04\n", + "415 100.58 100.50 100.40\n", + "510 100.10 99.67 100.60" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.pivot_table(\n", + " [\"Total day calls\", \"Total eve calls\", \"Total night calls\"],\n", + " [\"Area code\"],\n", + " aggfunc=\"mean\",\n", + ").head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0_haYJdjtU8h" + }, + "source": [ + "### Преобразование датафреймов\n", + "\n", + "Как и многие другие вещи, добавлять столбцы в `DataFrame` можно несколькими способами." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "35zMtFv8tU8i" + }, + "source": [ + "Например, мы хотим посчитать общее количество звонков для всех пользователей. Создадим объект `total_calls` типа `Series` и вставим его в датафрейм:" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 261 + }, + "executionInfo": { + "elapsed": 67, + "status": "ok", + "timestamp": 1633609639171, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "z1ktVfD0tU8i", + "outputId": "a0c006bf-4504-4c46-af1d-8b8ab8167d79" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messagesTotal calls
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.7010100303
1OH107415FalseTrue26161.612327.47195.510316.62254.410311.4513.733.7010104332
2NJ137415FalseFalse0243.411441.38121.211010.30162.61047.3212.253.29004333
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.78204255
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.73304359
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "0 KS 128 415 False True \n", + "1 OH 107 415 False True \n", + "2 NJ 137 415 False False \n", + "3 OH 84 408 True False \n", + "4 OK 75 415 True False \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "1 26 161.6 123 \n", + "2 0 243.4 114 \n", + "3 0 299.4 71 \n", + "4 0 166.7 113 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "1 27.47 195.5 103 16.62 \n", + "2 41.38 121.2 110 10.30 \n", + "3 50.90 61.9 88 5.26 \n", + "4 28.34 148.3 122 12.61 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "1 254.4 103 11.45 \n", + "2 162.6 104 7.32 \n", + "3 196.9 89 8.86 \n", + "4 186.9 121 8.41 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "1 13.7 3 3.70 \n", + "2 12.2 5 3.29 \n", + "3 6.6 7 1.78 \n", + "4 10.1 3 2.73 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages Total calls \n", + "0 1 0 100 303 \n", + "1 1 0 104 332 \n", + "2 0 0 4 333 \n", + "3 2 0 4 255 \n", + "4 3 0 4 359 " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_calls = (\n", + " df[\"Total day calls\"]\n", + " + df[\"Total eve calls\"]\n", + " + df[\"Total night calls\"]\n", + " + df[\"Total intl calls\"]\n", + ")\n", + "df.insert(loc=len(df.columns), column=\"Total calls\", value=total_calls)\n", + "# loc - номер столбца, после которого нужно вставить данный Series\n", + "# мы указали len(df.columns), чтобы вставить его в самом конце\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nB0mpCA1tU8j" + }, + "source": [ + "Добавить столбец из имеющихся можно и проще, не создавая промежуточных `Series`:" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 261 + }, + "executionInfo": { + "elapsed": 64, + "status": "ok", + "timestamp": 1633609639173, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ZVpdhf1etU8k", + "outputId": "93b3fe31-2757-4cb7-afcc-7c10f765bb46" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messagesTotal callsTotal charge
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.701010030375.56
1OH107415FalseTrue26161.612327.47195.510316.62254.410311.4513.733.701010433259.24
2NJ137415FalseFalse0243.411441.38121.211010.30162.61047.3212.253.2900433362.29
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.7820425566.80
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.7330435952.09
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "0 KS 128 415 False True \n", + "1 OH 107 415 False True \n", + "2 NJ 137 415 False False \n", + "3 OH 84 408 True False \n", + "4 OK 75 415 True False \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "1 26 161.6 123 \n", + "2 0 243.4 114 \n", + "3 0 299.4 71 \n", + "4 0 166.7 113 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "1 27.47 195.5 103 16.62 \n", + "2 41.38 121.2 110 10.30 \n", + "3 50.90 61.9 88 5.26 \n", + "4 28.34 148.3 122 12.61 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "1 254.4 103 11.45 \n", + "2 162.6 104 7.32 \n", + "3 196.9 89 8.86 \n", + "4 186.9 121 8.41 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "1 13.7 3 3.70 \n", + "2 12.2 5 3.29 \n", + "3 6.6 7 1.78 \n", + "4 10.1 3 2.73 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages Total calls \\\n", + "0 1 0 100 303 \n", + "1 1 0 104 332 \n", + "2 0 0 4 333 \n", + "3 2 0 4 255 \n", + "4 3 0 4 359 \n", + "\n", + " Total charge \n", + "0 75.56 \n", + "1 59.24 \n", + "2 62.29 \n", + "3 66.80 \n", + "4 52.09 " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Total charge\"] = (\n", + " df[\"Total day charge\"]\n", + " + df[\"Total eve charge\"]\n", + " + df[\"Total night charge\"]\n", + " + df[\"Total intl charge\"]\n", + ")\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xrn0pZo1tU8l" + }, + "source": [ + "Чтобы удалить столбцы или строки, воспользуйтесь методом `drop`, передавая в качестве аргумента нужные индексы и требуемое значение параметра `axis` (`1`, если удаляете столбцы, и ничего или `0`, если удаляете строки):" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 261 + }, + "executionInfo": { + "elapsed": 62, + "status": "ok", + "timestamp": 1633609639175, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "oSvOmNv-tU8l", + "outputId": "d0304dcc-8e8a-42c9-8765-1822ae6f5c44", + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.7010100
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.73304
5AL118510TrueFalse0223.49837.98220.610118.75203.91189.186.361.70004
6MA121510FalseTrue24218.28837.09348.510829.62212.61189.577.572.033096
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "0 KS 128 415 False True \n", + "3 OH 84 408 True False \n", + "4 OK 75 415 True False \n", + "5 AL 118 510 True False \n", + "6 MA 121 510 False True \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "3 0 299.4 71 \n", + "4 0 166.7 113 \n", + "5 0 223.4 98 \n", + "6 24 218.2 88 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "3 50.90 61.9 88 5.26 \n", + "4 28.34 148.3 122 12.61 \n", + "5 37.98 220.6 101 18.75 \n", + "6 37.09 348.5 108 29.62 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "3 196.9 89 8.86 \n", + "4 186.9 121 8.41 \n", + "5 203.9 118 9.18 \n", + "6 212.6 118 9.57 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "3 6.6 7 1.78 \n", + "4 10.1 3 2.73 \n", + "5 6.3 6 1.70 \n", + "6 7.5 7 2.03 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages \n", + "0 1 0 100 \n", + "3 2 0 4 \n", + "4 3 0 4 \n", + "5 0 0 4 \n", + "6 3 0 96 " + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# избавляемся от созданных только что столбцов\n", + "df = df.drop([\"Total charge\", \"Total calls\"], axis=1)\n", + "\n", + "df.drop([1, 2]).head() # а вот так можно удалить строчки" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JLDUG5hNtU8l" + }, + "source": [ + "--------\n", + "\n", + "\n", + "\n", + "## Первые попытки прогнозирования оттока\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1sv6q4lNtU8m" + }, + "source": [ + "Посмотрим, как отток связан с признаком *\"Подключение международного роуминга\"* (`International plan`). Сделаем это с помощью сводной таблички `crosstab`, а также путем иллюстрации с `Seaborn` (как именно строить такие картинки и анализировать с их помощью графики – материал следующей статьи.)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": true, + "executionInfo": { + "elapsed": 57, + "status": "ok", + "timestamp": 1633609639176, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "M7cBvVn-tU8m" + }, + "outputs": [], + "source": [ + "# надо дополнительно установить (команда в терминале)\n", + "# чтоб картинки рисовались в тетрадке\n", + "# !conda install seaborn\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "plt.rcParams[\"figure.figsize\"] = (8, 6)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 56, + "status": "ok", + "timestamp": 1633609639177, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "8ZJBwL8NtU8m", + "outputId": "334f814d-2c27-4f67-cabd-17159188ca2b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
International planFalseTrueAll
Churn
026641862850
1346137483
All30103233333
\n", + "
" + ], + "text/plain": [ + "International plan False True All\n", + "Churn \n", + "0 2664 186 2850\n", + "1 346 137 483\n", + "All 3010 323 3333" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.crosstab(df[\"Churn\"], df[\"International plan\"], margins=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 388 + }, + "executionInfo": { + "elapsed": 1509, + "status": "ok", + "timestamp": 1633609640633, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "BGwuNSretU8n", + "outputId": "0b7deac2-30bb-4ec9-e84b-ec60b64ceeff" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=\"International plan\", hue=\"Churn\", data=df)\n", + "plt.savefig(\"int_plan_and_churn.png\", dpi=300);" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j8CpA17FtU8o" + }, + "source": [ + "Видим, что когда роуминг подключен, доля оттока намного выше – интересное наблюдение! Возможно, большие и плохо контролируемые траты в роуминге очень конфликтогенны и приводят к недовольству клиентов телеком-оператора и, соответственно, к их оттоку. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JDm9ePM4tU8o" + }, + "source": [ + "Далее посмотрим на еще один важный признак – *\"Число обращений в сервисный центр\"* (`Customer service calls`). Также построим сводную таблицу и картинку." + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 25, + "status": "ok", + "timestamp": 1633609640635, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "UKGrw7fbtU8p", + "outputId": "7801c2d5-2a6d-4872-cde2-f847b851b3c5" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Customer service calls0123456789All
Churn
06051059672385902684102850
1921228744764014512483
All697118175942916666229223333
\n", + "
" + ], + "text/plain": [ + "Customer service calls 0 1 2 3 4 5 6 7 8 9 All\n", + "Churn \n", + "0 605 1059 672 385 90 26 8 4 1 0 2850\n", + "1 92 122 87 44 76 40 14 5 1 2 483\n", + "All 697 1181 759 429 166 66 22 9 2 2 3333" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.crosstab(df[\"Churn\"], df[\"Customer service calls\"], margins=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 388 + }, + "executionInfo": { + "elapsed": 2104, + "status": "ok", + "timestamp": 1633609642719, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "sMJh9m1VtU8p", + "outputId": "43183fcc-f324-4492-acd6-30a6cf0615b5" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfoAAAFzCAYAAADWqstZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAdcklEQVR4nO3de5RdZZnn8e9DEgggcgnhYip0wmW4qkAKRGmZCLZAoAPYyMBCLsKs6CxQlB5asGcaxJ4RF3YjtDPOZBGUTCuIXIRxGJSrtxEwAQyQwBCQS2W4xIAo2hECz/xx3mARk9QJnDq7zlvfz1q1au937/Pup4rLr/btfSMzkSRJdVqv6QIkSdLwMeglSaqYQS9JUsUMekmSKmbQS5JUMYNekqSKjW26gOGw5ZZb5pQpU5ouQ5Kkrpk/f/6vMnPiqu1VBv2UKVOYN29e02VIktQ1EfHE6tq9dC9JUsUMekmSKmbQS5JUsSrv0a/OK6+8wsDAAMuXL2+6lLUaP348fX19jBs3rulSJEkVGDVBPzAwwCabbMKUKVOIiKbLWa3MZNmyZQwMDDB16tSmy5EkVWDUXLpfvnw5EyZMGLEhDxARTJgwYcRfdZAk9Y5RE/TAiA75lXqhRklS7xhVQb82zzzzDMceeyw77LAD06ZNY8aMGcyePZvDDz+86dIkSXrTDHpa98aPOuoopk+fzqOPPsr8+fP54he/yLPPPvuW+l2xYkWHKpQk6c0x6IHbb7+dcePG8YlPfOL1tne/+928//3v56WXXuLoo49ml1124fjjjyczgdboe7/61a8AmDdvHtOnTwfgvPPO44QTTmD//ffnhBNO4LzzzuOUU05h+vTpbL/99lxyySVd//kkSaOXQQ888MADTJs2bbXb7r33Xr7yla+wcOFCHnvsMX76058O2d/ChQu55ZZbuOKKKwB46KGH+P73v8/dd9/N5z//eV555ZWO1i9J0poY9EPYd9996evrY7311mPPPffk8ccfH/IzM2fOZMMNN3x9/bDDDmODDTZgyy23ZKuttnrLtwQkSWqXQQ/svvvuzJ8/f7XbNthgg9eXx4wZ8/p997Fjx/Laa68B/MnrcBtvvHFbfUiSNNxGzYA5a3PggQfyuc99jtmzZzNr1iwAFixYwI9//OM1fmbKlCnMnz+fQw89lGuuuabjNU07a25H+pl/4Ykd6UeS1Js8o6f17vp1113HLbfcwg477MDuu+/OOeecwzbbbLPGz5x77rmcccYZ9Pf3M2bMmC5WK0lS+2LlU+Q16e/vz1Xno1+0aBG77rprQxWtm0WLFvHRy37ekb48o5ek0SEi5mdm/6rtntFLklQxg16SpIoZ9JIkVcyglySpYga9JEkVM+glSaqYQd9lN910EzvvvDM77rgjF1xwQdPlSJIqN2pHxuvUyHMrtfO++quvvsppp53GzTffTF9fH/vssw8zZ85kt91262gtkiSt5Bl9F919993suOOObL/99qy//voce+yxXH/99U2XJUmqmEHfRUuWLGHy5Mmvr/f19bFkyZIGK5Ik1c6glySpYsMW9BFxWUQ8FxEPDGrbIiJujohHyvfNS3tExCURsTgiFkTE3oM+c1LZ/5GIOGm46u2GSZMm8dRTT72+PjAwwKRJkxqsSJJUu+E8o/8GcMgqbWcDt2bmTsCtZR3gUGCn8jUL+Bq0/jAAzgXeA+wLnLvyj4NetM8++/DII4/wy1/+kpdffpkrr7ySmTNnNl2WJKliwxb0mfkj4PlVmo8ALi/LlwNHDmqfmy13AptFxLbAwcDNmfl8Zr4A3Myf/vHQM8aOHctXv/pVDj74YHbddVeOOeYYdt9996bLkiRVrNuv122dmU+X5WeArcvyJOCpQfsNlLY1tf+JiJhF62oA22233ZCFNDV964wZM5gxY0Yjx5YkjT6NPYyXmQlkB/ubnZn9mdk/ceLETnUrSVJP63bQP1suyVO+P1falwCTB+3XV9rW1C5JktrQ7aC/AVj55PxJwPWD2k8sT9/vB7xYLvF/H/hQRGxeHsL7UGmTJEltGLZ79BFxBTAd2DIiBmg9PX8BcFVEnAo8ARxTdr8RmAEsBn4PfAwgM5+PiC8APy/7nZ+Zqz7gJ0mS1mDYgj4zj1vDpoNWs28Cp62hn8uAyzpYmiRJo4Yj40mSVDGDvotOOeUUttpqK/bYY4+mS5EkjRKjdpraJ89/Z0f72+7v7h9yn5NPPpnTTz+dE09s5h1+SdLo4xl9Fx1wwAFsscUWTZchSRpFDHpJkipm0EuSVDGDXpKkihn0kiRVzKDvouOOO473vve9PPzww/T19TFnzpymS5IkVW7Uvl7XzutwnXbFFVd0/ZiSpNHNM3pJkipm0EuSVDGDXpKkio2qoG9Nkjey9UKNkqTeMWqCfvz48SxbtmxEB2lmsmzZMsaPH990KZKkSoyap+77+voYGBhg6dKlTZeyVuPHj6evr6/pMiRJlRg1QT9u3DimTp3adBmSJHXVqLl0L0nSaGTQS5JUMYNekqSKGfSSJFXMoJckqWIGvSRJFTPoJUmqmEEvSVLFDHpJkipm0EuSVDGDXpKkihn0kiRVzKCXJKliBr0kSRUz6CVJqphBL0lSxQx6SZIqZtBLklQxg16SpIoZ9JIkVcyglySpYga9JEkVM+glSaqYQS9JUsUMekmSKmbQS5JUMYNekqSKGfSSJFXMoJckqWIGvSRJFTPoJUmqmEEvSVLFDHpJkirWSNBHxGci4sGIeCAiroiI8RExNSLuiojFEfHtiFi/7LtBWV9ctk9pomZJknpR14M+IiYBnwL6M3MPYAxwLPAl4KLM3BF4ATi1fORU4IXSflHZT5IktaGpS/djgQ0jYiywEfA0cCBwddl+OXBkWT6irFO2HxQR0cVaJUnqWV0P+sxcAnwZeJJWwL8IzAd+nZkrym4DwKSyPAl4qnx2Rdl/wqr9RsSsiJgXEfOWLl06vD+EJEk9oolL95vTOkufCrwD2Bg45K32m5mzM7M/M/snTpz4VruTJKkKTVy6/yDwy8xcmpmvANcC+wOblUv5AH3AkrK8BJgMULZvCizrbsmSJPWmJoL+SWC/iNio3Gs/CFgI3A4cXfY5Cbi+LN9Q1inbb8vM7GK9kiT1rCbu0d9F66G6e4D7Sw2zgc8CZ0bEYlr34OeUj8wBJpT2M4Gzu12zJEm9auzQu3ReZp4LnLtK82PAvqvZdznwkW7UJUlSbRwZT5Kkihn0kiRVzKCXJKliBr0kSRUz6CVJqphBL0lSxQx6SZIqZtBLklQxg16SpIoZ9JIkVcyglySpYga9JEkVM+glSaqYQS9JUsUMekmSKmbQS5JUMYNekqSKGfSSJFXMoJckqWIGvSRJFTPoJUmqmEEvSVLFDHpJkipm0EuSVDGDXpKkihn0kiRVzKCXJKliBr0kSRUz6CVJqphBL0lSxQx6SZIqZtBLklSxsU0XoHpMO2tuR/qZf+GJHelHkuQZvSRJVTPoJUmqmEEvSVLFDHpJkipm0EuSVDGDXpKkihn0kiRVzKCXJKliBr0kSRUz6CVJqphBL0lSxQx6SZIqZtBLklQxg16SpIoZ9JIkVcyglySpYo0EfURsFhFXR8RDEbEoIt4bEVtExM0R8Uj5vnnZNyLikohYHBELImLvJmqWJKkXNXVGfzFwU2buArwbWAScDdyamTsBt5Z1gEOBncrXLOBr3S9XkqTeNLbbB4yITYEDgJMBMvNl4OWIOAKYXna7HLgD+CxwBDA3MxO4s1wN2DYzn16X4047a25H6p9/4Ykd6UeSpG5o4ox+KrAU+HpE3BsRl0bExsDWg8L7GWDrsjwJeGrQ5wdKmyRJGkITQT8W2Bv4WmbuBfyOP16mB6Ccvee6dBoRsyJiXkTMW7p0aceKlSSpl7UV9BFxazttbRoABjLzrrJ+Na3gfzYiti19bws8V7YvASYP+nxfaXuDzJydmf2Z2T9x4sQ3WZokSXVZa9BHxPiI2ALYMiI2L0/GbxERU3iTl88z8xngqYjYuTQdBCwEbgBOKm0nAdeX5RuAE8vT9/sBL67r/XlJkkaroR7G+zjwaeAdwHwgSvtvgK++heN+EvhmRKwPPAZ8jNYfHVdFxKnAE8AxZd8bgRnAYuD3ZV9JktSGtQZ9Zl4MXBwRn8zMf+rUQTPzPqB/NZsOWs2+CZzWqWNLkjSatPV6XWb+U0S8D5gy+DOZ2Zl31iRJ0rBoK+gj4n8AOwD3Aa+W5gQMekmSRrB2B8zpB3Yrl9ElSVKPaPc9+geAbYazEEmS1HntntFvCSyMiLuBP6xszMyZw1KVJEnqiHaD/rzhLEKSJA2Pdp+6/+FwFyJJkjqv3afuf8sfx55fHxgH/C4z3z5chUmSpLeu3TP6TVYuR0TQmjp2v+EqSpIkdcY6z16XLd8FDh6GeiRJUge1e+n+w4NW16P1Xv3yYalIkiR1TLtP3f/loOUVwOO0Lt9LkqQRrN179M4YJ0lSD2rrHn1E9EXEdRHxXPm6JiL6hrs4SZL01rT7MN7XgRtozUv/DuB/ljZJkjSCtRv0EzPz65m5onx9A5g4jHVJkqQOaDfol0XERyNiTPn6KLBsOAuTJElvXbtBfwpwDPAM8DRwNHDyMNUkSZI6pN3X684HTsrMFwAiYgvgy7T+AJAkSSNUu2f071oZ8gCZ+Tyw1/CUJEmSOqXdoF8vIjZfuVLO6Nu9GiBJkhrSblj/A/CziPhOWf8I8J+GpyRJktQp7Y6MNzci5gEHlqYPZ+bC4StLkiR1QtuX30uwG+6SJPWQdZ6mVpIk9Q6DXpKkihn0kiRVzKCXJKliBr0kSRUz6CVJqphBL0lSxQx6SZIqZtBLklQxg16SpIoZ9JIkVcyglySpYga9JEkVM+glSaqYQS9JUsUMekmSKmbQS5JUMYNekqSKGfSSJFXMoJckqWIGvSRJFTPoJUmqmEEvSVLFDHpJkipm0EuSVDGDXpKkijUW9BExJiLujYjvlfWpEXFXRCyOiG9HxPqlfYOyvrhsn9JUzZIk9Zomz+jPABYNWv8ScFFm7gi8AJxa2k8FXijtF5X9JElSGxoJ+ojoAw4DLi3rARwIXF12uRw4siwfUdYp2w8q+0uSpCE0dUb/FeBvgNfK+gTg15m5oqwPAJPK8iTgKYCy/cWy/xtExKyImBcR85YuXTqctUuS1DO6HvQRcTjwXGbO72S/mTk7M/szs3/ixImd7FqSpJ41toFj7g/MjIgZwHjg7cDFwGYRMbactfcBS8r+S4DJwEBEjAU2BZZ1v2xJknpP18/oM/OczOzLzCnAscBtmXk8cDtwdNntJOD6snxDWadsvy0zs4slS5LUs0bSe/SfBc6MiMW07sHPKe1zgAml/Uzg7IbqkySp5zRx6f51mXkHcEdZfgzYdzX7LAc+0tXCNGpMO2tuR/qZf+GJHelHkjptJJ3RS5KkDjPoJUmqmEEvSVLFDHpJkipm0EuSVDGDXpKkihn0kiRVzKCXJKliBr0kSRUz6CVJqphBL0lSxQx6SZIqZtBLklQxg16SpIoZ9JIkVcyglySpYga9JEkVM+glSaqYQS9JUsUMekmSKmbQS5JUMYNekqSKGfSSJFXMoJckqWIGvSRJFTPoJUmqmEEvSVLFDHpJkipm0EuSVDGDXpKkihn0kiRVzKCXJKliBr0kSRUz6CVJqphBL0lSxQx6SZIqZtBLklQxg16SpIoZ9JIkVcyglySpYga9JEkVM+glSaqYQS9JUsUMekmSKmbQS5JUMYNekqSKGfSSJFXMoJckqWIGvSRJFTPoJUmqWNeDPiImR8TtEbEwIh6MiDNK+xYRcXNEPFK+b17aIyIuiYjFEbEgIvbuds2SJPWqJs7oVwB/nZm7AfsBp0XEbsDZwK2ZuRNwa1kHOBTYqXzNAr7W/ZIlSepNXQ/6zHw6M+8py78FFgGTgCOAy8tulwNHluUjgLnZciewWURs2+WyJUnqSY3eo4+IKcBewF3A1pn5dNn0DLB1WZ4EPDXoYwOlbdW+ZkXEvIiYt3Tp0mGrWZKkXtJY0EfE24BrgE9n5m8Gb8vMBHJd+svM2ZnZn5n9EydO7GClkiT1rkaCPiLG0Qr5b2bmtaX52ZWX5Mv350r7EmDyoI/3lTZJkjSEJp66D2AOsCgz/3HQphuAk8ryScD1g9pPLE/f7we8OOgSvyRJWouxDRxzf+AE4P6IuK+0fQ64ALgqIk4FngCOKdtuBGYAi4HfAx/rbrmSJPWurgd9Zv4EiDVsPmg1+ydw2rAWJUlSpRwZT5Kkihn0kiRVzKCXJKliBr0kSRUz6CVJqphBL0lSxQx6SZIqZtBLklQxg16SpIoZ9JIkVcyglySpYga9JEkVM+glSaqYQS9JUsUMekmSKmbQS5JUMYNekqSKjW26AA2vJ89/Z0f62e7v7u9IP5Kk7vKMXpKkihn0kiRVzKCXJKliBr0kSRUz6CVJqphBL0lSxQx6SZIqZtBLklQxB8xZRw5AoyZNO2tuR/qZf+GJHelH0shn0GvE8Y8pSeocL91LklQxg16SpIoZ9JIkVcx79NIo5HMQ0ujhGb0kSRUz6CVJqphBL0lSxQx6SZIqZtBLklQxg16SpIoZ9JIkVcyglySpYg6YI3VAJwagcfAZScPBoJfUExzNT3pzvHQvSVLFDHpJkipm0EuSVDHv0UsaVtPOmtuRfq7bpCPdSKOOZ/SSJFXMoJckqWIGvSRJFeuZe/QRcQhwMTAGuDQzL2i4JElaI9/710jRE0EfEWOA/wL8BTAA/DwibsjMhc1WJqlGnXiA0IcHNVL0RNAD+wKLM/MxgIi4EjgCMOglqUO8ClGnXgn6ScBTg9YHgPc0VIskjSi+wtg7mvhjKjKzIwcdThFxNHBIZv7bsn4C8J7MPH3QPrOAWWV1Z+DhYSpnS+BXw9T3cOi1esGau6HX6oXeq7nX6gVr7obhrPfPMnPiqo29cka/BJg8aL2vtL0uM2cDs4e7kIiYl5n9w32cTum1esGau6HX6oXeq7nX6gVr7oYm6u2V1+t+DuwUEVMjYn3gWOCGhmuSJGnE64kz+sxcERGnA9+n9XrdZZn5YMNlSZI04vVE0ANk5o3AjU3XQRduD3RYr9UL1twNvVYv9F7NvVYvWHM3dL3enngYT5IkvTm9co9ekiS9CQZ9myLikIh4OCIWR8TZTdczlIi4LCKei4gHmq6lHRExOSJuj4iFEfFgRJzRdE1DiYjxEXF3RPyi1Pz5pmtqV0SMiYh7I+J7TdcylIh4PCLuj4j7ImJe0/W0IyI2i4irI+KhiFgUEe9tuqa1iYidy+935ddvIuLTTde1NhHxmfLf3QMRcUVEjG+6pqFExBml3ge7+fv10n0byhC8/5dBQ/ACx43kIXgj4gDgJWBuZu7RdD1DiYhtgW0z856I2ASYDxw5wn/HAWycmS9FxDjgJ8AZmXlnw6UNKSLOBPqBt2fm4U3XszYR8TjQn5k98650RFwO/DgzLy1vCm2Umb9uuq52lP/fLaE1VskTTdezOhExidZ/b7tl5r9ExFXAjZn5jWYrW7OI2AO4ktZIry8DNwGfyMzFw31sz+jb8/oQvJn5Mq1/WEc0XNNaZeaPgOebrqNdmfl0Zt5Tln8LLKI1IuKIlS0vldVx5WvE/+UcEX3AYcClTddSo4jYFDgAmAOQmS/3SsgXBwGPjtSQH2QssGFEjAU2Av5fw/UMZVfgrsz8fWauAH4IfLgbBzbo27O6IXhHdAj1soiYAuwF3NVsJUMrl8DvA54Dbs7MEV8z8BXgb4DXmi6kTQn8ICLmlxEwR7qpwFLg6+X2yKURsXHTRa2DY4Ermi5ibTJzCfBl4EngaeDFzPxBs1UN6QHg/RExISI2AmbwxoHgho1BrxElIt4GXAN8OjN/03Q9Q8nMVzNzT1qjNe5bLs+NWBFxOPBcZs5vupZ18OeZuTdwKHBauS01ko0F9ga+lpl7Ab8DRvxzPQDlNsNM4DtN17I2EbE5rauqU4F3ABtHxEebrWrtMnMR8CXgB7Qu298HvNqNYxv07RlyCF69deU+9zXANzPz2qbrWRfl0uztwCFN1zKE/YGZ5b73lcCBEfHPzZa0duXsjcx8DriO1q20kWwAGBh0dedqWsHfCw4F7snMZ5suZAgfBH6ZmUsz8xXgWuB9Ddc0pMyck5nTMvMA4AVaz34NO4O+PQ7BO8zKg21zgEWZ+Y9N19OOiJgYEZuV5Q1pPaz5ULNVrV1mnpOZfZk5hda/x7dl5og9E4qIjcvDmZTL3x+idQl0xMrMZ4CnImLn0nQQvTOl9nGM8Mv2xZPAfhGxUfl/x0G0nusZ0SJiq/J9O1r357/VjeP2zMh4TerFIXgj4gpgOrBlRAwA52bmnGarWqv9gROA+8s9b4DPlRERR6ptgcvLU8rrAVdl5oh/Xa3HbA1c1/p/OWOBb2XmTc2W1JZPAt8sJwaPAR9ruJ4hlT+k/gL4eNO1DCUz74qIq4F7gBXAvfTGCHnXRMQE4BXgtG49pOnrdZIkVcxL95IkVcyglySpYga9JEkVM+glSaqYQS9JUsUMeqnLImKbiLgyIh4tw7reGBH/6k30c2RE7DYcNTYpIvoj4pIGjvtS+T6lV2Z9lNph0EtdVAb3uA64IzN3yMxpwDm03hdfV0cCXQ36MmZAJ/pZ4xgemTkvMz/VieNIMuilbvsA8Epm/reVDZn5i8z8cURMHzw/fER8NSJOLssXRMTCiFgQEV+OiPfRGpP8wjJ/+A4RsWdE3Fn2ua6MB05E3BERF0XEvDI3+j4RcW1EPBIRfz/oeB+NiLtLf/99ZahHxEsR8Q8R8QvgDfOqR8SnBtV1ZWnbOCIuK33dGxFHlPaTI+KGiLgNuLVc1ThsUF/fiIijB/8eIuJtEfH1aM1HvyAi/qq0fygifhYR90TEd8ocCaxS244RcUtE/KLst0Pp79ayfv/K2tYkInYf9DtZEBE7tfMPWRpJHBlP6q49gHWaUKaMpHUUsEtmZkRslpm/jogbgO9l5tVlvwXAJzPzhxFxPnAu8OnSzcuZ2R8RZwDXA9NoTWP8aERcBGwF/Btg/8x8JSL+K3A8MBfYmNb0mn+9mvLOBqZm5h9WDgcM/C2toXVPKW13R8QtZdvewLsy8/mIOAo4BvhfZQS5g4B/B7xnUP//kdbMZO8sP+PmEbEl8B+AD2bm7yLis8CZwPmr1PZN4ILMvC4ixtM6sXkZOCozf1P6uTMibsg1jxz2CeDizFw5yl1HrmhI3WTQSyPfi8ByYE450/2TYXajNQf6Zpn5w9J0OW+cgWzl3Az3Aw9m5tPlc4/RmrDpz2mF/8/LcLMb0pp6F1ozbF2zhtoW0Brq9bvAd0vbh2hNnPPvy/p4YLuyfHNmPl+W/zdwcURsQGsyoB9l5r+U46/0QVpj8gOQmS9Eawa+3YCfln3XB362yu9jE2BSZl5XPre8tI8D/nO0ZsB7jdZ001sDz6zh5/sZ8LcR0Qdcm5mPrGE/acQy6KXuehA4eg3bVvDG22nj4fW5FvaldcZ7NHA6cOA6HvcP5ftrg5ZXro8FArg8M89ZzWeXZ+aaptM8DDgA+EtagfjO0tdfZebDg3eMiPfQmrKV8nMtj4g7gINpXU24ss2fJWj9wXBcm/sPdjwwEZhWrlw8Tvk9r05mfisi7qL1c94YER/PzNvexHGlxniPXuqu24ANImLWyoaIeFdEvB94AtgtIjYol7wPKtvfBmxaJvj5DPDu8tHfApsAZOaLwAulH2hNELTy7L4dtwJHxx9n19oiIv5sbR+IiPWAyZl5O/BZYFPgbbQmf/pklNPtiNhrLd18m9aEL++nNUf3qm4GTht0zM2BO4H9I2LH0rZxrPLWQmb+FhiIiCPLPhtExEalxudKyH8AGOpn3B54LDMvoXXL411r218aiQx6qYvKveCjgA9G6/W6B4EvAs9k5lPAVbSmYb2K1oxc0Arz75V78D+hdT8aWmfAZ5UH3nYATqL1cN4CYE/+9J712upaSOu+9w/K52+mNTvf2owB/jki7i+1XlJm4/oCMA5YUH6+L6yljx8A/xq4JTNfXs32vwc2j4gHysOAH8jMpcDJwBWl1p8Bu6zmsycAnyr7/B9gG1r37ftLzScy9LTCxwAPRGtGxT1oPbMg9RRnr5MkqWKe0UuSVDGDXpKkihn0kiRVzKCXJKliBr0kSRUz6CVJqphBL0lSxQx6SZIq9v8B6Op63JUagfwAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=\"Customer service calls\", hue=\"Churn\", data=df)\n", + "plt.savefig(\"serv_calls__and_churn.png\", dpi=300);" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x2ZJPe-DtU8q" + }, + "source": [ + "Может быть, по сводной табличке это не так хорошо видно (или скучно ползать взглядом по строчкам с цифрами), а вот картинка красноречиво свидетельствует о том, что доля оттока сильно возрастает начиная с 4 звонков в сервисный центр. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dqj4LVe3tU8q" + }, + "source": [ + "Добавим теперь в наш DataFrame бинарный признак — результат сравнения `Customer service calls > 3`. И еще раз посмотрим, как он связан с оттоком. " + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 14, + "status": "ok", + "timestamp": 1633609642722, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "o9R6NM8ltU8q", + "outputId": "ab3e7ee2-24f1-455b-c973-e994cf85a4c7", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Churn01All
Many_service_calls
027213453066
1129138267
All28504833333
\n", + "
" + ], + "text/plain": [ + "Churn 0 1 All\n", + "Many_service_calls \n", + "0 2721 345 3066\n", + "1 129 138 267\n", + "All 2850 483 3333" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Many_service_calls\"] = (df[\"Customer service calls\"] > 3).astype(\"int\")\n", + "\n", + "pd.crosstab(df[\"Many_service_calls\"], df[\"Churn\"], margins=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 389 + }, + "executionInfo": { + "elapsed": 777, + "status": "ok", + "timestamp": 1633609643487, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "nUQk7G96tU8r", + "outputId": "ed0370de-9813-45fa-f666-ee36ce5206a1" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=\"Many_service_calls\", hue=\"Churn\", data=df)\n", + "plt.savefig(\"many_serv_calls__and_churn.png\", dpi=300);" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XfYD5KqGtU8s" + }, + "source": [ + "Объединим рассмотренные выше условия и построим сводную табличку для этого объединения и оттока." + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "executionInfo": { + "elapsed": 36, + "status": "ok", + "timestamp": 1633609643489, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ZuaXCibrtU8s", + "outputId": "dbd40bfd-2dd2-41f1-ba50-cf159b12ddd7" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Churn01
row_0
False2841464
True919
\n", + "
" + ], + "text/plain": [ + "Churn 0 1\n", + "row_0 \n", + "False 2841 464\n", + "True 9 19" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.crosstab(df[\"Many_service_calls\"] & df[\"International plan\"], df[\"Churn\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VyaMr43HtU8t" + }, + "source": [ + "Значит, прогнозируя отток клиента в случае, когда число звонков в сервисный центр больше 3 и подключен роуминг (и прогнозируя лояльность – в противном случае), можно ожидать около 85.8% правильных попаданий (ошибаемся всего 464 + 9 раз). Эти 85.8%, которые мы получили с помощью очень простых рассуждений – это неплохая отправная точка (*baseline*) для дальнейших моделей машинного обучения, которые мы будем строить. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d6_n0ESntU8u" + }, + "source": [ + "В целом до появления машинного обучения процесс анализа данных выглядел примерно так. Прорезюмируем:\n", + " \n", + "- Доля лояльных клиентов в выборке – 85.5%. Самая наивная модель, ответ которой \"Клиент всегда лоялен\" на подобных данных будет угадывать примерно в 85.5% случаев. То есть доли правильных ответов (*accuracy*) последующих моделей должны быть как минимум не меньше, а лучше, значительно выше этой цифры;\n", + "- С помощью простого прогноза , который условно можно выразить такой формулой: \"International plan = True & Customer Service calls > 3 => Churn = 1, else Churn = 0\", можно ожидать долю угадываний 85.8%, что еще чуть выше 85.5%\n", + "- Эти два бейзлайна мы получили без всякого машинного обучения, и они служат отправной точной для наших последующих моделей. Если окажется, что мы громадными усилиями увеличиваем долю правильных ответов всего, скажем, на 0.5%, то возможно, мы что-то делаем не так, и достаточно ограничиться простой моделью из двух условий. \n", + "- Перед обучением сложных моделей рекомендуется немного покрутить данные и проверить простые предположения. Более того, в бизнес-приложениях машинного обучения чаще всего начинают именно с простых решений, а потом экспериментируют с их усложнением. " + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "colab": { + "collapsed_sections": [], + "name": "02_Pandas.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + }, + "name": "seminar02_part2_pandas.ipynb" + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/Pandas and EDA (12.03)/Pandas and EDA. Task.ipynb b/Pandas and EDA (12.03)/Pandas and EDA. Task.ipynb index bb60a1c..ecb20a7 100644 --- a/Pandas and EDA (12.03)/Pandas and EDA. Task.ipynb +++ b/Pandas and EDA (12.03)/Pandas and EDA. Task.ipynb @@ -1 +1,646 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.8"},"colab":{"name":"02_pandas_task.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"EmV0s8YY05p7"},"source":["- __ID__ - Unique number for each athlete\n","- __Name__ - Athlete's name\n","- __Sex__ - M or F\n","- __Age__ - Integer\n","- __Height__ - In centimeters\n","- __Weight__ - In kilograms\n","- __Team__ - Team name\n","- __NOC__ - National Olympic Committee 3-letter code\n","- __Games__ - Year and season\n","- __Year__ - Integer\n","- __Season__ - Summer or Winter\n","- __City__ - Host city\n","- __Sport__ - Sport\n","- __Event__ - Event\n","- __Medal__ - Gold, Silver, Bronze, or NA"]},{"cell_type":"code","metadata":{"id":"rVCrMDMh05p_"},"source":["import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"D5Q4Z-JW05qC"},"source":["# не меняем путь!\n","PATH = 'https://github.com/aksenov7/Kaggle_competition_group/blob/master/athlete_events.csv.zip?raw=true'"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"mI0LtqkY4Kp-"},"source":["__0. Откройте файл используя необходимые параметры и не меняя переменную PATH__"]},{"cell_type":"code","metadata":{"id":"h5SQwBLr05qG","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1615627554682,"user_tz":-300,"elapsed":2477,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"882f9e83-5fd7-4c3b-b005-56917b15a0fd"},"source":["data = \n","data.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
IDNameSexAgeHeightWeightTeamNOCGamesYearSeasonCitySportEventMedal
01A DijiangM24.0180.080.0ChinaCHN1992 Summer1992SummerBarcelonaBasketballBasketball Men's BasketballNaN
12A LamusiM23.0170.060.0ChinaCHN2012 Summer2012SummerLondonJudoJudo Men's Extra-LightweightNaN
23Gunnar Nielsen AabyM24.0NaNNaNDenmarkDEN1920 Summer1920SummerAntwerpenFootballFootball Men's FootballNaN
34Edgar Lindenau AabyeM34.0NaNNaNDenmark/SwedenDEN1900 Summer1900SummerParisTug-Of-WarTug-Of-War Men's Tug-Of-WarGold
45Christine Jacoba AaftinkF21.0185.082.0NetherlandsNED1988 Winter1988WinterCalgarySpeed SkatingSpeed Skating Women's 500 metresNaN
\n","
"],"text/plain":[" ID Name ... Event Medal\n","0 1 A Dijiang ... Basketball Men's Basketball NaN\n","1 2 A Lamusi ... Judo Men's Extra-Lightweight NaN\n","2 3 Gunnar Nielsen Aaby ... Football Men's Football NaN\n","3 4 Edgar Lindenau Aabye ... Tug-Of-War Men's Tug-Of-War Gold\n","4 5 Christine Jacoba Aaftink ... Speed Skating Women's 500 metres NaN\n","\n","[5 rows x 15 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"stYR4EbV05qP"},"source":["__1. Сколько лет было самым молодым мужчинам и женщинам-участникам Олимпийских игр 1992 года ?__\n","- 16 и 15\n","- 14 и 13 \n","- 13 и 11\n","- 11 и 12"]},{"cell_type":"code","metadata":{"id":"HgiqBXtb05qR"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"GQ290dsi05qc"},"source":["__2. Каков был процент баскетболистов-мужчин среди всех мужчин-участников Олимпийских игр 2012 года? Округлите ответ до первого десятичного знака.__\n","\n","Здесь и далее при необходимости отбрасывайте дублированных спортсменов, чтобы считать только уникальных . \n","- 0.2\n","- 1.5 \n","- 2.5\n","- 7.7"]},{"cell_type":"code","metadata":{"id":"-fI5MqWP05qi"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"u5WrTgIC05qv"},"source":["__3. Каковы среднее и стандартное отклонение роста теннисисток, участвовавших в Олимпийских играх 2000 года? Округлите ответ до первого десятичного знака.__\n","\n","- 171.8 и 6.5\n","- 179.4 и 10\n","- 180.7 и 6.7\n","- 182.4 и 9.1 "]},{"cell_type":"code","metadata":{"id":"vsKTqn6405qw"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"xOOEzhNQ05qy"},"source":["__4. Найдите спортсмена, который участвовал в Олимпийских играх 2006 года, с наибольшим весом среди других участников той же Олимпиады. Каким спортом он или она занимался?__\n","\n","- Judo\n","- Bobsleigh \n","- Skeleton\n","- Boxing"]},{"cell_type":"code","metadata":{"id":"EkWD1Tnb05qz"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"UQzxZ3HT05q0"},"source":["__5. Сколько раз John Aalberg участвовал в Олимпийских играх в разные годы?__\n","\n","Один год - это один раз. Неважно сколько участий внутри одного года\n","- 0\n","- 1 \n","- 2\n","- 3 "]},{"cell_type":"code","metadata":{"id":"ZSfkdjPO05q0"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"8EnLcNrk05q3"},"source":["__6. Сколько золотых медалей по теннису выиграли спортсмены сборной Switzerland на Олимпиаде-2008? Считайте каждую медаль от каждого спортсмена.__\n","\n","- 0\n","- 1 \n","- 2\n","- 3 "]},{"cell_type":"code","metadata":{"id":"Y754OGI-05q3"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"v3h5sQF805q5"},"source":["__7. Правда ли, что на Олимпийских играх 2016 Spain выиграла меньше медалей, чем Италия?__ \n","\n","- Да\n","- Нет"]},{"cell_type":"code","metadata":{"id":"gqJqDi2605q7"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"kkSYL5mK05q-"},"source":["__8. К какой возрастной категории принадлежало наименьшее и наибольшее количество участников Олимпиады-2008?__\n","\n","- [45-55] и [25-35) соответственно\n","- [45-55] и [15-25) соответственно\n","- [35-45) и [25-35) соответственно\n","- [45-55] и [35-45) соответственно"]},{"cell_type":"code","metadata":{"id":"pMAQtW7i05q_"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"JQmJPiXv05rB"},"source":["__9. Правда ли, что в Atlanta проводились летние Олимпийские игры? Правда ли, что в Squaw Valley проводились зимние Олимпийские игры? ?__\n","\n","- Да, Да\n","- Да, Нет\n","- Нет, Да \n","- Нет, Нет "]},{"cell_type":"code","metadata":{"id":"UU66wRHC05rB"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4hxR5D-t05rF"},"source":["__10. Какова абсолютная разница между количеством уникальных видов спорта на Олимпиаде 1986 года и Олимпиаде 2002 года?__\n","\n","- 3 \n","- 10\n","- 15\n","- 27 "]},{"cell_type":"code","metadata":{"id":"WKIr-TR105rF"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "EmV0s8YY05p7" + }, + "source": [ + "- __ID__ - Unique number for each athlete\n", + "- __Name__ - Athlete's name\n", + "- __Sex__ - M or F\n", + "- __Age__ - Integer\n", + "- __Height__ - In centimeters\n", + "- __Weight__ - In kilograms\n", + "- __Team__ - Team name\n", + "- __NOC__ - National Olympic Committee 3-letter code\n", + "- __Games__ - Year and season\n", + "- __Year__ - Integer\n", + "- __Season__ - Summer or Winter\n", + "- __City__ - Host city\n", + "- __Sport__ - Sport\n", + "- __Event__ - Event\n", + "- __Medal__ - Gold, Silver, Bronze, or NA" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "rVCrMDMh05p_" + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "D5Q4Z-JW05qC" + }, + "outputs": [], + "source": [ + "# не меняем путь!\n", + "PATH = 'https://github.com/aksenov7/Kaggle_competition_group/blob/master/athlete_events.csv.zip?raw=true'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mI0LtqkY4Kp-" + }, + "source": [ + "__0. Откройте файл используя необходимые параметры и не меняя переменную PATH__" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "executionInfo": { + "elapsed": 2477, + "status": "ok", + "timestamp": 1615627554682, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "h5SQwBLr05qG", + "outputId": "882f9e83-5fd7-4c3b-b005-56917b15a0fd" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDNameSexAgeHeightWeightTeamNOCGamesYearSeasonCitySportEventMedal
01A DijiangM24.0180.080.0ChinaCHN1992 Summer1992SummerBarcelonaBasketballBasketball Men's BasketballNaN
12A LamusiM23.0170.060.0ChinaCHN2012 Summer2012SummerLondonJudoJudo Men's Extra-LightweightNaN
23Gunnar Nielsen AabyM24.0NaNNaNDenmarkDEN1920 Summer1920SummerAntwerpenFootballFootball Men's FootballNaN
34Edgar Lindenau AabyeM34.0NaNNaNDenmark/SwedenDEN1900 Summer1900SummerParisTug-Of-WarTug-Of-War Men's Tug-Of-WarGold
45Christine Jacoba AaftinkF21.0185.082.0NetherlandsNED1988 Winter1988WinterCalgarySpeed SkatingSpeed Skating Women's 500 metresNaN
\n", + "
" + ], + "text/plain": [ + " ID Name Sex Age Height Weight Team \\\n", + "0 1 A Dijiang M 24.0 180.0 80.0 China \n", + "1 2 A Lamusi M 23.0 170.0 60.0 China \n", + "2 3 Gunnar Nielsen Aaby M 24.0 NaN NaN Denmark \n", + "3 4 Edgar Lindenau Aabye M 34.0 NaN NaN Denmark/Sweden \n", + "4 5 Christine Jacoba Aaftink F 21.0 185.0 82.0 Netherlands \n", + "\n", + " NOC Games Year Season City Sport \\\n", + "0 CHN 1992 Summer 1992 Summer Barcelona Basketball \n", + "1 CHN 2012 Summer 2012 Summer London Judo \n", + "2 DEN 1920 Summer 1920 Summer Antwerpen Football \n", + "3 DEN 1900 Summer 1900 Summer Paris Tug-Of-War \n", + "4 NED 1988 Winter 1988 Winter Calgary Speed Skating \n", + "\n", + " Event Medal \n", + "0 Basketball Men's Basketball NaN \n", + "1 Judo Men's Extra-Lightweight NaN \n", + "2 Football Men's Football NaN \n", + "3 Tug-Of-War Men's Tug-Of-War Gold \n", + "4 Speed Skating Women's 500 metres NaN " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(PATH, compression='zip', sep=',')\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "stYR4EbV05qP" + }, + "source": [ + "__1. Сколько лет было самым молодым мужчинам и женщинам-участникам Олимпийских игр 1992 года ?__\n", + "- 16 и 15\n", + "- 14 и 13 \n", + "- 13 и 11\n", + "- 11 и 12" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "id": "HgiqBXtb05qR" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sex\n", + "F 12.0\n", + "M 11.0\n", + "Name: Age, dtype: float64\n" + ] + } + ], + "source": [ + "olympic1992 = data[data.Year == 1992]\n", + "print(olympic1992.groupby('Sex')['Age'].min())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GQ290dsi05qc" + }, + "source": [ + "__2. Каков был процент баскетболистов-мужчин среди всех мужчин-участников Олимпийских игр 2012 года? Округлите ответ до первого десятичного знака.__\n", + "\n", + "Здесь и далее при необходимости отбрасывайте дублированных спортсменов, чтобы считать только уникальных . \n", + "- 0.2\n", + "- 1.5 \n", + "- 2.5\n", + "- 7.7" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "id": "-fI5MqWP05qi" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 2.0% (Не понятно)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Deeksmayker\\AppData\\Local\\Temp\\ipykernel_12004\\2525794570.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", + " basketball_mans = mans[data.Sport == \"Basketball\"]\n" + ] + } + ], + "source": [ + "mans = data[(data.Year == 2012) & (data.Sex == 'M')]\n", + "basketball_mans = mans[data.Sport == \"Basketball\"]\n", + "print(f\"{basketball_mans.shape[0] / mans.shape[0] * 100 : .1f}% (Не понятно)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u5WrTgIC05qv" + }, + "source": [ + "__3. Каковы среднее и стандартное отклонение роста теннисисток, участвовавших в Олимпийских играх 2000 года? Округлите ответ до первого десятичного знака.__\n", + "\n", + "- 171.8 и 6.5\n", + "- 179.4 и 10\n", + "- 180.7 и 6.7\n", + "- 182.4 и 9.1 " + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "id": "vsKTqn6405qw" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Среднее: 171.8\n", + "Стандартное: 6.5\n" + ] + } + ], + "source": [ + "tenn = data[(data.Year == 2000) & (data.Sex == 'F') & (data.Sport == \"Tennis\")]\n", + "print(f\"Среднее: {tenn['Height'].mean() : .1f}\")\n", + "print(f\"Стандартное: {tenn['Height'].std() : .1f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xOOEzhNQ05qy" + }, + "source": [ + "__4. Найдите спортсмена, который участвовал в Олимпийских играх 2006 года, с наибольшим весом среди других участников той же Олимпиады. Каким спортом он или она занимался?__\n", + "\n", + "- Judo\n", + "- Bobsleigh \n", + "- Skeleton\n", + "- Boxing" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "id": "EkWD1Tnb05qz" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Skeleton'" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data.Year == 2006].sort_values('Weight', ascending=False).iloc[0]['Sport']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UQzxZ3HT05q0" + }, + "source": [ + "__5. Сколько раз John Aalberg участвовал в Олимпийских играх в разные годы?__\n", + "\n", + "Один год - это один раз. Неважно сколько участий внутри одного года\n", + "- 0\n", + "- 1 \n", + "- 2\n", + "- 3 " + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data.Name == \"John Aalberg\"].drop_duplicates([\"Year\"]).shape[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8EnLcNrk05q3" + }, + "source": [ + "__6. Сколько золотых медалей по теннису выиграли спортсмены сборной Switzerland на Олимпиаде-2008? Считайте каждую медаль от каждого спортсмена.__\n", + "\n", + "- 0\n", + "- 1 \n", + "- 2\n", + "- 3 " + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "id": "Y754OGI-05q3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[(data.Year == 2008) & (data.Team == \"Switzerland\") & (data.Medal == \"Gold\") & (data.Sport == \"Tennis\")].shape[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v3h5sQF805q5" + }, + "source": [ + "__7. Правда ли, что на Олимпийских играх 2016 Spain выиграла меньше медалей, чем Италия?__ \n", + "\n", + "- Да\n", + "- Нет" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "id": "gqJqDi2605q7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Да\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "print(\"Да\" if data[(data['Year'] == 2016) & (data['Team'] == 'Spain') & (data['Medal'] is not np.nan)].shape[0]\n", + " < data[(data['Year'] == 2016) & (data['Team'] == 'Italy') & (data['Medal'] is not np.nan)].shape[0] else \"Нет\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kkSYL5mK05q-" + }, + "source": [ + "__8. К какой возрастной категории принадлежало наименьшее и наибольшее количество участников Олимпиады-2008?__\n", + "\n", + "- [45-55] и [25-35) соответственно\n", + "- [45-55] и [15-25) соответственно\n", + "- [35-45) и [25-35) соответственно\n", + "- [45-55] и [35-45) соответственно" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "id": "pMAQtW7i05q_" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Видимо [45-55] и [15-25]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAASMElEQVR4nO3dbaxlV13H8e+PFnnWtnYcm7Y6RSdqVah1aGtA5SGUPigFHxAiMpLGMbEkEI0yoLGowdQXUEGlscBIC8qzyAgTYahE4gtop1hLHyAdYWpnKJ3RVgpCii1/X5x17XG4965z27PPPffe7yc5OXuvvc9eayXnnt9da+99TqoKSZKW84jVboAkaf4ZFpKkLsNCktRlWEiSugwLSVLXsavdgCGceOKJtWXLltVuhiStKddff/1/VNWmxbaty7DYsmUL+/btW+1mSNKakuT2pbY5DSVJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSepal3dwa+W27PzwouUHLrtwxi2RNI8cWUiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1GVYSJK6BguLJKcm+XiSW5LcnOTlrfyEJHuT3Naej2/lSfLGJPuT3JjkzLFjbW/735Zk+1BtliQtbsiRxf3Ab1XV6cA5wCVJTgd2AtdU1VbgmrYOcD6wtT12AFfAKFyAS4GzgbOASxcCRpI0G4OFRVXdWVWfbstfAW4FTgYuAq5qu10FPK8tXwRcXSOfBI5LchLwHGBvVd1dVfcAe4Hzhmq3JOlbzeScRZItwI8BnwI2V9WdbdOXgM1t+WTgjrGXHWxlS5UfXceOJPuS7Dty5Mh0OyBJG9zgYZHk8cD7gVdU1b3j26qqgJpGPVV1ZVVtq6ptmzZtmsYhJUnNoGGR5JGMguKvq+pvW/FdbXqJ9ny4lR8CTh17+SmtbKlySdKMDHk1VIC3ArdW1evHNu0GFq5o2g58cKz8Je2qqHOAL7fpqo8A5yY5vp3YPreVSZJm5NgBj/1U4FeAzyS5oZW9GrgMeE+Si4HbgRe0bXuAC4D9wNeAlwJU1d1J/gi4ru33h1V194DtliQdZbCwqKp/BrLE5mctsn8BlyxxrF3Arum1TpK0Et7BLUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpK7BwiLJriSHk9w0VvaaJIeS3NAeF4xte1WS/Uk+l+Q5Y+XntbL9SXYO1V5J0tKGHFm8DThvkfLLq+qM9tgDkOR04IXAD7fXvCnJMUmOAf4COB84HXhR21eSNEPHDnXgqvpEki0T7n4R8K6qug/4QpL9wFlt2/6q+jxAkne1fW+ZdnslSUtbjXMWL0tyY5umOr6VnQzcMbbPwVa2VLkkaYZmHRZXAN8HnAHcCbxuWgdOsiPJviT7jhw5Mq3DSpKYcVhU1V1V9UBVfRN4Mw9ONR0CTh3b9ZRWtlT5Yse+sqq2VdW2TZs2Tb/xkrSBzTQskpw0tvp8YOFKqd3AC5M8KslpwFbgWuA6YGuS05J8G6OT4Ltn2WZJ0oAnuJO8E3g6cGKSg8ClwNOTnAEUcAD4dYCqujnJexiduL4fuKSqHmjHeRnwEeAYYFdV3TxUmyVJixvyaqgXLVL81mX2fy3w2kXK9wB7ptg0SdIKeQe3JKnLsJAkdU0UFkl+dOiGSJLm16QjizcluTbJbyT5jkFbJEmaOxOFRVX9JPDLjO55uD7J3yR59qAtkyTNjYnPWVTVbcDvAa8Efhp4Y5LPJvm5oRonSZoPk56zeFKSy4FbgWcCP1tVP9SWLx+wfZKkOTDpfRZ/BrwFeHVVfX2hsKq+mOT3BmmZJGluTBoWFwJfH7ur+hHAo6vqa1X19sFaJ0maC5Oes/gY8Jix9ce2MknSBjBpWDy6qr66sNKWHztMkyRJ82bSsPjvJGcurCT5ceDry+wvSVpHJj1n8QrgvUm+CAT4buCXhmqUJGm+TBQWVXVdkh8EfqAVfa6q/me4ZkmS5slKvqL8KcCW9pozk1BVVw/SKknSXJkoLJK8ndFvZ98APNCKCzAsJGkDmHRksQ04vapqyMZIkubTpFdD3cTopLYkaQOadGRxInBLkmuB+xYKq+q5g7RKkjRXJg2L1wzZCEnSfJv00tl/SvK9wNaq+liSxwLHDNs0SdK8mPQryn8NeB/wl63oZODvBmqTJGnOTHqC+xLgqcC98H8/hPRdQzVKkjRfJg2L+6rqGwsrSY5ldJ+FJGkDmDQs/inJq4HHtN/efi/w98M1S5I0TyYNi53AEeAzwK8Dexj9HrckaQOY9GqobwJvbg9J0gYz6XdDfYFFzlFU1ROn3iJJ0txZyXdDLXg08IvACdNvjiRpHk10zqKq/nPscaiq/hS4cNimSZLmxaTTUGeOrT6C0UhjJb+FIUlawyb9wH/d2PL9wAHgBVNvjSRpLk16NdQzhm6IJGl+TToN9ZvLba+q10+nOZKkebSSq6GeAuxu6z8LXAvcNkSjJEnzZdKwOAU4s6q+ApDkNcCHq+rFQzVMkjQ/Jg2LzcA3xta/0cq0zm3Z+eFFyw9c5pXT0kYyaVhcDVyb5ANt/XnAVYO0SJI0dya9Ke+1wEuBe9rjpVX1x8u9JsmuJIeT3DRWdkKSvUlua8/Ht/IkeWOS/UluHL+vI8n2tv9tSbY/lE5Kkh6eldxY91jg3qr6qySbkpxWVV9YZv+3AX/OaFSyYCdwTVVdlmRnW38lcD6wtT3OBq4Azk5yAnApoxPsBVyfZHdV3bOCdqtZakpJknom/VnVSxl9qL+qFT0SeMdyr6mqTwB3H1V8EQ9OX13FaDprofzqGvkkcFySk4DnAHur6u4WEHuB8yZpsyRpeib9PYvnA88F/hugqr4IPOEh1Le5qu5sy1/iwZPkJwN3jO13sJUtVf4tkuxIsi/JviNHjjyEpkmSljLpNNQ3qqqSFECSxz3cisePNw1VdSVwJcC2bdv8ydeBeZWUtLFMOrJ4T5K/ZDQ99GvAx3hoP4R0V5teoj0fbuWHgFPH9jullS1VLkmaoW5YJAnwbuB9wPuBHwB+v6r+7CHUtxtYuKJpO/DBsfKXtKuizgG+3KarPgKcm+T4duXUua1MkjRD3WmoNl20p6p+lNEJ5okkeSfwdODEJAcZXdV0GaNRysXA7Tz4zbV7gAuA/cDXGF2mS1XdneSPgOvafn9YVUefNJckDWzScxafTvKUqrquv+tIVb1oiU3PWmTfAi5Z4ji7gF2T1itJmr5Jw+Js4MVJDjC6IiqMPuOfNFTDJEnzY9mwSPI9VfXvjO53kCRtUL2Rxd8x+rbZ25O8v6p+fgZt0sPkndqSpq0XFhlbfuKQDdH64P0X0vrUu3S2lliWJG0gvZHFk5Pcy2iE8Zi2DA+e4P72QVsnSZoLy4ZFVR0zq4ZIkubXpF/3IUnawAwLSVKXYSFJ6jIsJEldhoUkqWslv8EtPWTerCetbY4sJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl/dZrGH+Ip6kWXFkIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSurzPQqvK37mQ1gZHFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldXjqrueQltdJ8cWQhSepalZFFkgPAV4AHgPuraluSE4B3A1uAA8ALquqeJAHeAFwAfA341ar69Gq0e7X4I0eSVttqjiyeUVVnVNW2tr4TuKaqtgLXtHWA84Gt7bEDuGLmLZWkDW6epqEuAq5qy1cBzxsrv7pGPgkcl+SkVWifJG1YqxUWBXw0yfVJdrSyzVV1Z1v+ErC5LZ8M3DH22oOt7P9JsiPJviT7jhw5MlS7JWlDWq2roZ5WVYeSfBewN8lnxzdWVSWplRywqq4ErgTYtm3bil4rSVreqowsqupQez4MfAA4C7hrYXqpPR9uux8CTh17+SmtTJI0IzMPiySPS/KEhWXgXOAmYDewve22HfhgW94NvCQj5wBfHpuukiTNwGpMQ20GPjC6IpZjgb+pqn9Ich3wniQXA7cDL2j772F02ex+RpfOvnT2Tda88GY9aXXMPCyq6vPAkxcp/0/gWYuUF3DJDJomSVqCX/ehdcERhzSsebrPQpI0pxxZaENa7itUHI1I38qRhSSpy7CQJHU5DTVH/HZZSfPKkYUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeryPguta967Ik2HIwtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLi+dXQVezilprXFkIUnqMiwkSV2GhSSpy3MWA/LchKT1wpGFJKnLsJAkdRkWkqQuz1lIR1nqXNOByy6ccUuk+eHIQpLUZVhIkrqchpIm5PSUNjLDQnqYDBFtBE5DSZK6HFlMgXdqS1rvDAtpICv9J8JpK80zw0LaIDy3oofDsJDmnB/ymgdrJiySnAe8ATgGeEtVXbbKTZKmalrnvjyHpiGsibBIcgzwF8CzgYPAdUl2V9Utq9syafWsZrg4qtl41kRYAGcB+6vq8wBJ3gVcBMw0LPyPTRqZ1sl7p9jWjrUSFicDd4ytHwTOHt8hyQ5gR1v9apLPHXWME4H/GKyFq1fXeq9vPfdt1vWtWt/yJyt74Ur3P7o+PWTfu9SGtRIWXVV1JXDlUtuT7KuqbbNoyyzrWu/1ree+zbq+9dy31ahvo1krd3AfAk4dWz+llUmSZmCthMV1wNYkpyX5NuCFwO5VbpMkbRhrYhqqqu5P8jLgI4wund1VVTev8DBLTlENYJZ1rff61nPfZl3feu7batS3oaSqVrsNkqQ5t1amoSRJq8iwkCR1rbuwSLIryeEkN42VnZBkb5Lb2vPxA9f3miSHktzQHhdMqa5Tk3w8yS1Jbk7y8lY+SP+WqW+o/j06ybVJ/rXV9wet/LQkn0qyP8m720UOQ9X1tiRfGOvbGQ+3rqPqPSbJvyT5UFufet+WqWuwviU5kOQz7bj7WtmQf3eL1TfI+1Ij6y4sgLcB5x1VthO4pqq2Ate09SHrA7i8qs5ojz1Tqut+4Leq6nTgHOCSJKczXP+Wqg+G6d99wDOr6snAGcB5Sc4B/qTV9/3APcDFA9YF8NtjfbthCnWNezlw69j6EH1bqi4Ytm/PaMdduNdhyL+7xeqDYd6XYh2GRVV9Arj7qOKLgKva8lXA8waubxBVdWdVfbotf4XRB8HJDNS/ZeobRI18ta0+sj0KeCbwvlY+lf4tU9dgkpwCXAi8pa2HAfq2WF2rZLC/O83euguLJWyuqjvb8peAzTOo82VJbmzTVFMbfi9IsgX4MeBTzKB/R9UHA/WvTZ3cABwG9gL/BvxXVd3fdjnIlALr6LqqaqFvr219uzzJo6ZRV/OnwO8A32zr38lAfVukrgVD9a2Ajya5PqOv3oFh35eL1QcD/91tZBslLP5Pja4VHvp64SuA72M0vXEn8LppHjzJ44H3A6+oqnvHtw3Rv0XqG6x/VfVAVZ3B6C79s4AfnNaxe3Ul+RHgVa3OpwAnAK+cRl1JfgY4XFXXT+N4D7GuQfrWPK2qzgTOZzRd+VPjGwd4Xy5W36B/dxvdRgmLu5KcBNCeDw9ZWVXd1T6Ivgm8mdGH3lQkeSSjD+6/rqq/bcWD9W+x+obs34Kq+i/g48BPAMclWbiBdOpf9TJW13lt6q2q6j7gr5he354KPDfJAeBdjKaf3sAwffuWupK8Y8C+UVWH2vNh4APt2IO9Lxerbxbvy41so4TFbmB7W94OfHDIyhb+QJrnAzctte8KjxvgrcCtVfX6sU2D9G+p+gbs36Ykx7XlxzD6/ZJbGX2Q/0LbbSr9W6Kuz459uIXRHPtU+lZVr6qqU6pqC6Ovq/nHqvplBujbEnW9eKi+JXlckicsLAPntmMP9b5ctL6h3pdqqmpdPYB3MhqC/g+jOeCLGc0NXwPcBnwMOGHg+t4OfAa4kdEfzElTqutpjIbyNwI3tMcFQ/VvmfqG6t+TgH9px70J+P1W/kTgWmA/8F7gUQPW9Y+tbzcB7wAeP8B79OnAh4bq2zJ1DdK31od/bY+bgd9t5UO9L5eqb5D3pY/Rw6/7kCR1bZRpKEnSw2BYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHX9L+SSoMT/BseUAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "data[data['Year'] == 2008]['Age'].plot(bins=50, kind='hist', xticks=range(10, 60, 5))\n", + "print(\"Видимо [45-55] и [15-25]\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JQmJPiXv05rB" + }, + "source": [ + "__9. Правда ли, что в Atlanta проводились летние Олимпийские игры? Правда ли, что в Squaw Valley проводились зимние Олимпийские игры? ?__\n", + "\n", + "- Да, Да\n", + "- Да, Нет\n", + "- Нет, Да \n", + "- Нет, Нет " + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "id": "UU66wRHC05rB" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Да, Да\n" + ] + } + ], + "source": [ + "print(\"Да\" if not data[(data.City == \"Atlanta\") & (data.Season == \"Summer\")].empty else \"Нет\", end=', ')\n", + "print(\"Да\" if not data[(data.City == \"Squaw Valley\") & (data.Season == \"Winter\")].empty else \"Нет\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4hxR5D-t05rF" + }, + "source": [ + "__10. Какова абсолютная разница между количеством уникальных видов спорта на Олимпиаде 1986 года и Олимпиаде 2002 года?__\n", + "\n", + "- 3 \n", + "- 10\n", + "- 15\n", + "- 27 " + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": { + "id": "WKIr-TR105rF" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "15" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abs(data[data.Year == 1986].drop_duplicates(['Sport']).shape[0] - data[data.Year == 2002].drop_duplicates(['Sport']).shape[0])" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "02_pandas_task.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}