From bf5d1e25eeb504815aad8de0cfeee08e3c789471 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=AE=D1=80=D0=B8=D0=B9?= Date: Wed, 27 Mar 2024 00:31:06 +0500 Subject: [PATCH 1/3] lab2 --- .../inspectionProfiles/profiles_settings.xml | 6 +++ .idea/lab2.iml | 8 ++++ .idea/misc.xml | 4 ++ .idea/modules.xml | 8 ++++ .idea/vcs.xml | 6 +++ .idea/workspace.xml | 42 +++++++++++++++++++ 6 files changed, 74 insertions(+) create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/lab2.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/lab2.iml b/.idea/lab2.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/.idea/lab2.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..a971a2c --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..530e0f6 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..5a9c72e --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + 1711477545007 + + + + \ No newline at end of file From 9cab36cf656e53a29a4c4c2eb088f388c63a3a43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=AE=D1=80=D0=B8=D0=B9?= Date: Wed, 27 Mar 2024 00:33:53 +0500 Subject: [PATCH 2/3] lab2.2 --- correlation.ipynb | 53 +++++++++--------- pandas.ipynb | 133 +++++++++++++++++++++++++++++++--------------- 2 files changed, 115 insertions(+), 71 deletions(-) diff --git a/correlation.ipynb b/correlation.ipynb index 51c1fea..24d9653 100644 --- a/correlation.ipynb +++ b/correlation.ipynb @@ -79,12 +79,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", - "brainFrame = pd.read_csv(?, delimiter='\\t')" + "brainFrame = pd.read_csv('titanic.csv', delimiter=',')" ] }, { @@ -102,7 +102,7 @@ "metadata": {}, "outputs": [], "source": [ - "brainFrame.?()" + "brainFrame.head()" ] }, { @@ -118,7 +118,7 @@ "metadata": {}, "outputs": [], "source": [ - "brainFrame.head(?)" + "brainFrame.head(10)" ] }, { @@ -134,7 +134,7 @@ "metadata": {}, "outputs": [], "source": [ - "?" + "brainFrame.tail(8)" ] }, { @@ -159,7 +159,7 @@ "metadata": {}, "outputs": [], "source": [ - "brainFrame.?()" + "brainFrame.describe()" ] }, { @@ -176,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -195,12 +195,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ - "menDf = brainFrame[?]\n", - "womenDf = brainFrame[?]" + "menDf = brainFrame[brainFrame['Sex'] == 'male']\n", + "womenDf = brainFrame[brainFrame['Sex'] == 'female']" ] }, { @@ -244,11 +244,10 @@ "source": [ "# Ячейка для кода № 7\n", "# Постройка графика диаграммы рассеяния для кадра данных с женскими записями\n", - "womenMeanSmarts = ???\n", - "plt.scatter(???)\n", - "\n", - "#\n", - "#" + "womenMeanSmarts =womenDf[[\"PIQ\", \"FSIQ\", \"VIQ\"]].mean(axis=1)\n", + "plt.scatter(womenMeanSmarts, womenDf[\"MRI_Count\"])\n", + "plt.show()\n", + "%matplotlib inline\n" ] }, { @@ -272,7 +271,7 @@ "metadata": {}, "outputs": [], "source": [ - "brainFrame.?(method='pearson')" + "brainFrame.corr(method='pearson')" ] }, { @@ -286,7 +285,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "(ответ)" + " Корреляция переменной с самой собой всегда равна 1, потому что это полная линейная зависимость" ] }, { @@ -300,7 +299,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "(ответ)" + "Корреляция между переменными A и B также равна корреляции между переменными B и A" ] }, { @@ -316,7 +315,7 @@ "metadata": {}, "outputs": [], "source": [ - "womenDf.?(method='pearson')" + "womenDf.corr(method='pearson')" ] }, { @@ -332,7 +331,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Используйте corr() для расчёта критерия корреляции Пирсона для кадра данных с мужчинами\n" + "menDf.corr(method='pearson')\n" ] }, { @@ -404,10 +403,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Ячейка для кода № 14\n", - "mcorr = ???\n", - "#\n", - "#" + "import seaborn as sns\n", + "\n", + "mcorr = menDf.corr()\n", + "sns.heatmap(wcorr)" ] }, { @@ -421,7 +420,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "(ответ)" + "Это означает, что между этими переменными нет линейной зависимости" ] }, { @@ -435,7 +434,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "(ответ)" + "Для удобного анализа" ] }, { @@ -470,7 +469,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/pandas.ipynb b/pandas.ipynb index bd16b4f..640f1e3 100644 --- a/pandas.ipynb +++ b/pandas.ipynb @@ -35,14 +35,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "bfc3346f-3843-4aff-aec0-54321b9774f0", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# write your code here" + "import pandas as pd" ] }, { @@ -57,14 +57,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "fa5deec6-c85e-4d88-89df-bea7d75fcbba", "metadata": {}, "outputs": [], "source": [ - "# Чтение данных из файла 'titanic.csv'\n", - "# Используйте метод pd.read_csv()\n", - "# write your code here" + "import pandas as pd\n", + "\n", + "data = pd.read_csv('titanic.csv')" ] }, { @@ -83,9 +83,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Вывод первых 5 строк данных\n", - "# Используйте метод .head()\n", - "# write your code here" + "import pandas as pd\n", + "\n", + "data = pd.read_csv('titanic.csv')\n", + "print(data.head(5))" ] }, { @@ -103,7 +104,12 @@ "id": "43650b4f-f3e7-4480-b874-b5552f564383", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import pandas as pd\n", + "\n", + "data = pd.read_csv('titanic.csv')\n", + "print(data.info())" + ] }, { "cell_type": "markdown", @@ -118,16 +124,19 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "f7910fde-24f7-4cf8-991d-01f08bc45b63", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Проверка на наличие NaN в DataFrame\n", - "# Используйте метод .isna()\n", - "# write your code here" + "import pandas as pd\n", + "\n", + "data = pd.read_csv('titanic.csv')\n", + "\n", + "nan_check = data.isna()\n", + "print(nan_check)" ] }, { @@ -145,9 +154,12 @@ "metadata": {}, "outputs": [], "source": [ - "# Заполнение NaN определенным значением (например, нулем)\n", - "# Используйте метод .fillna()\n", - "# write your code here" + "import pandas as pd\n", + "\n", + "data = pd.read_csv('titanic.csv')\n", + "\n", + "data_filled = data.fillna(0)\n", + "print(data_filled)" ] }, { @@ -160,16 +172,19 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "4d188deb-0818-4b01-b3a5-9d20d2166d10", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Удаление строк, содержащих NaN\n", - "# Используйте метод .dropna()\n", - "# write your code here" + "import pandas as pd\n", + "\n", + "data = pd.read_csv('titanic.csv')\n", + "\n", + "data_without_na = data.dropna()\n", + "print(data_without_na)" ] }, { @@ -184,28 +199,32 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "d11f6114-ce4b-4e71-afec-adf1d8c1ec6e", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Выбор столбца по метке\n", - "# Используйте синтаксис DataFrame['название_столбца']\n", - "# write your code here\n", + "import pandas as pd\n", + "\n", + "data = pd.read_csv('titanic.csv')\n", + "\n", + "#Выбор по метке\n", + "selected_column = data['Age']\n", + "print(selected_column)\n", "\n", - "# Выбор нескольких столбцов\n", - "# Используйте синтаксис DataFrame[['столбец_1', 'столбец_2']]\n", - "# write your code here\n", + "#Выбор нескольких столбцов\n", + "selected_columns = data[['Name', 'Sex']]\n", + "print(selected_columns)\n", "\n", - "# Выбор строк по индексу\n", - "# Используйте метод .loc[]\n", - "# write your code here\n", + "#Выбор по индексу\n", + "selected_row = data.loc[0] \n", + "print(selected_row)\n", "\n", - "# Выбор строк и столбцов по условию\n", - "# Используя логические операции, выберите мужчин старше 30\n", - "# write your code here" + "#Выбор строк и столбцов \n", + "selected_data = data[(data['Sex'] == 'male') & (data['Age'] > 30)]\n", + "print(selected_data)" ] }, { @@ -219,16 +238,18 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "de5e850c-e920-4ae4-aadb-3f1953438b09", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Сортировка данных по столбцу 'столбец_1' по возрастанию\n", - "# Используйте метод .sort_values()\n", - "# write your code here" + "import pandas as pd\n", + "data = pd.read_csv('titanic.csv')\n", + "\n", + "sorted_data = data.sort_values(by='Age')\n", + "print(sorted_data)" ] }, { @@ -242,16 +263,19 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "25ced901-0482-49a8-8c12-d192e84e3fb3", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Найдите долю выживших среди всех PClass\n", - "# Используйте метод .groupby()\n", - "# write your code here" + "import pandas as pd\n", + "\n", + "data = pd.read_csv('titanic.csv')\n", + "\n", + "survival_rate_by_pclass = data.groupby('Pclass')['Survived'].mean()\n", + "print(survival_rate_by_pclass)" ] }, { @@ -274,7 +298,28 @@ "id": "a1b4deaa-cd06-41b3-8084-5c2d3a867811", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import pandas as pd\n", + "\n", + "data = pd.read_csv('titanic.csv')\n", + "\n", + "data_filled = data.fillna(0)\n", + "\n", + "print(\"Первые 10 строк данных:\")\n", + "print(data_filled.head(10))\n", + "\n", + "data_age_gt_30 = data_filled[data_filled['Age'] > 30]\n", + "\n", + "sorted_data_by_fare = data_age_gt_30.sort_values(by='Fare', ascending=False)\n", + "\n", + "print(\"\\nСтроки, где возраст больше 30, отсортированные по Fare:\")\n", + "print(sorted_data_by_fare)\n", + "\n", + "average_age_by_pclass = data_filled.groupby('Pclass')['Age'].mean()\n", + "\n", + "print(\"\\nСредний возраст для каждого класса:\")\n", + "print(average_age_by_pclass)" + ] } ], "metadata": { @@ -293,7 +338,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.12.2" } }, "nbformat": 4, From 8086b78eaacace2c20f528d1fb06125da901a2e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=AE=D1=80=D0=B8=D0=B9?= Date: Sun, 7 Apr 2024 21:41:20 +0500 Subject: [PATCH 3/3] lab2 --- pandas.ipynb | 460 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 428 insertions(+), 32 deletions(-) diff --git a/pandas.ipynb b/pandas.ipynb index 640f1e3..c883d6d 100644 --- a/pandas.ipynb +++ b/pandas.ipynb @@ -16,10 +16,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "7484df51-b002-414c-ae42-75a2df57c78d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pandas in c:\\users\\хозяин\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (2.2.1)\n", + "Requirement already satisfied: numpy<2,>=1.26.0 in c:\\users\\хозяин\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (1.26.4)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\хозяин\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in c:\\users\\хозяин\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in c:\\users\\хозяин\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n", + "Requirement already satisfied: six>=1.5 in c:\\users\\хозяин\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n" + ] + } + ], "source": [ "!pip install pandas" ] @@ -57,12 +70,11 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "fa5deec6-c85e-4d88-89df-bea7d75fcbba", "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", "\n", "data = pd.read_csv('titanic.csv')" ] @@ -78,12 +90,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "216e104c-259f-4ecd-9cd4-40362f61ca4e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S \n" + ] + } + ], "source": [ - "import pandas as pd\n", "\n", "data = pd.read_csv('titanic.csv')\n", "print(data.head(5))" @@ -100,12 +138,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "43650b4f-f3e7-4480-b874-b5552f564383", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 PassengerId 891 non-null int64 \n", + " 1 Survived 891 non-null int64 \n", + " 2 Pclass 891 non-null int64 \n", + " 3 Name 891 non-null object \n", + " 4 Sex 891 non-null object \n", + " 5 Age 714 non-null float64\n", + " 6 SibSp 891 non-null int64 \n", + " 7 Parch 891 non-null int64 \n", + " 8 Ticket 891 non-null object \n", + " 9 Fare 891 non-null float64\n", + " 10 Cabin 204 non-null object \n", + " 11 Embarked 889 non-null object \n", + "dtypes: float64(2), int64(5), object(5)\n", + "memory usage: 83.7+ KB\n", + "None\n" + ] + } + ], "source": [ - "import pandas as pd\n", "\n", "data = pd.read_csv('titanic.csv')\n", "print(data.info())" @@ -124,14 +188,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "f7910fde-24f7-4cf8-991d-01f08bc45b63", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket \\\n", + "0 False False False False False False False False False \n", + "1 False False False False False False False False False \n", + "2 False False False False False False False False False \n", + "3 False False False False False False False False False \n", + "4 False False False False False False False False False \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "886 False False False False False False False False False \n", + "887 False False False False False False False False False \n", + "888 False False False False False True False False False \n", + "889 False False False False False False False False False \n", + "890 False False False False False False False False False \n", + "\n", + " Fare Cabin Embarked \n", + "0 False True False \n", + "1 False False False \n", + "2 False True False \n", + "3 False False False \n", + "4 False True False \n", + ".. ... ... ... \n", + "886 False True False \n", + "887 False False False \n", + "888 False True False \n", + "889 False False False \n", + "890 False True False \n", + "\n", + "[891 rows x 12 columns]\n" + ] + } + ], "source": [ - "import pandas as pd\n", "\n", "data = pd.read_csv('titanic.csv')\n", "\n", @@ -149,12 +246,58 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "7d901187-75a9-497e-8774-6e0dde584197", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + ".. ... ... ... \n", + "886 887 0 2 \n", + "887 888 1 1 \n", + "888 889 0 3 \n", + "889 890 1 1 \n", + "890 891 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + ".. ... ... ... ... \n", + "886 Montvila, Rev. Juozas male 27.0 0 \n", + "887 Graham, Miss. Margaret Edith female 19.0 0 \n", + "888 Johnston, Miss. Catherine Helen \"Carrie\" female 0.0 1 \n", + "889 Behr, Mr. Karl Howell male 26.0 0 \n", + "890 Dooley, Mr. Patrick male 32.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 0 S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 0 S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 0 S \n", + ".. ... ... ... ... ... \n", + "886 0 211536 13.0000 0 S \n", + "887 0 112053 30.0000 B42 S \n", + "888 2 W./C. 6607 23.4500 0 S \n", + "889 0 111369 30.0000 C148 C \n", + "890 0 370376 7.7500 0 Q \n", + "\n", + "[891 rows x 12 columns]\n" + ] + } + ], "source": [ - "import pandas as pd\n", "\n", "data = pd.read_csv('titanic.csv')\n", "\n", @@ -172,14 +315,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "4d188deb-0818-4b01-b3a5-9d20d2166d10", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " PassengerId Survived Pclass \\\n", + "1 2 1 1 \n", + "3 4 1 1 \n", + "6 7 0 1 \n", + "10 11 1 3 \n", + "11 12 1 1 \n", + ".. ... ... ... \n", + "871 872 1 1 \n", + "872 873 0 1 \n", + "879 880 1 1 \n", + "887 888 1 1 \n", + "889 890 1 1 \n", + "\n", + " Name Sex Age SibSp \\\n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "6 McCarthy, Mr. Timothy J male 54.0 0 \n", + "10 Sandstrom, Miss. Marguerite Rut female 4.0 1 \n", + "11 Bonnell, Miss. Elizabeth female 58.0 0 \n", + ".. ... ... ... ... \n", + "871 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 \n", + "872 Carlsson, Mr. Frans Olof male 33.0 0 \n", + "879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 \n", + "887 Graham, Miss. Margaret Edith female 19.0 0 \n", + "889 Behr, Mr. Karl Howell male 26.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "1 0 PC 17599 71.2833 C85 C \n", + "3 0 113803 53.1000 C123 S \n", + "6 0 17463 51.8625 E46 S \n", + "10 1 PP 9549 16.7000 G6 S \n", + "11 0 113783 26.5500 C103 S \n", + ".. ... ... ... ... ... \n", + "871 1 11751 52.5542 D35 S \n", + "872 0 695 5.0000 B51 B53 B55 S \n", + "879 1 11767 83.1583 C50 C \n", + "887 0 112053 30.0000 B42 S \n", + "889 0 111369 30.0000 C148 C \n", + "\n", + "[183 rows x 12 columns]\n" + ] + } + ], "source": [ - "import pandas as pd\n", "\n", "data = pd.read_csv('titanic.csv')\n", "\n", @@ -199,14 +388,86 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "d11f6114-ce4b-4e71-afec-adf1d8c1ec6e", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 22.0\n", + "1 38.0\n", + "2 26.0\n", + "3 35.0\n", + "4 35.0\n", + " ... \n", + "886 27.0\n", + "887 19.0\n", + "888 NaN\n", + "889 26.0\n", + "890 32.0\n", + "Name: Age, Length: 891, dtype: float64\n", + " Name Sex\n", + "0 Braund, Mr. Owen Harris male\n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female\n", + "2 Heikkinen, Miss. Laina female\n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female\n", + "4 Allen, Mr. William Henry male\n", + ".. ... ...\n", + "886 Montvila, Rev. Juozas male\n", + "887 Graham, Miss. Margaret Edith female\n", + "888 Johnston, Miss. Catherine Helen \"Carrie\" female\n", + "889 Behr, Mr. Karl Howell male\n", + "890 Dooley, Mr. Patrick male\n", + "\n", + "[891 rows x 2 columns]\n", + "PassengerId 1\n", + "Survived 0\n", + "Pclass 3\n", + "Name Braund, Mr. Owen Harris\n", + "Sex male\n", + "Age 22.0\n", + "SibSp 1\n", + "Parch 0\n", + "Ticket A/5 21171\n", + "Fare 7.25\n", + "Cabin NaN\n", + "Embarked S\n", + "Name: 0, dtype: object\n", + " PassengerId Survived Pclass Name \\\n", + "4 5 0 3 Allen, Mr. William Henry \n", + "6 7 0 1 McCarthy, Mr. Timothy J \n", + "13 14 0 3 Andersson, Mr. Anders Johan \n", + "20 21 0 2 Fynney, Mr. Joseph J \n", + "21 22 1 2 Beesley, Mr. Lawrence \n", + ".. ... ... ... ... \n", + "867 868 0 1 Roebling, Mr. Washington Augustus II \n", + "872 873 0 1 Carlsson, Mr. Frans Olof \n", + "873 874 0 3 Vander Cruyssen, Mr. Victor \n", + "881 882 0 3 Markun, Mr. Johann \n", + "890 891 0 3 Dooley, Mr. Patrick \n", + "\n", + " Sex Age SibSp Parch Ticket Fare Cabin Embarked \n", + "4 male 35.0 0 0 373450 8.0500 NaN S \n", + "6 male 54.0 0 0 17463 51.8625 E46 S \n", + "13 male 39.0 1 5 347082 31.2750 NaN S \n", + "20 male 35.0 0 0 239865 26.0000 NaN S \n", + "21 male 34.0 0 0 248698 13.0000 D56 S \n", + ".. ... ... ... ... ... ... ... ... \n", + "867 male 31.0 0 0 PC 17590 50.4958 A24 S \n", + "872 male 33.0 0 0 695 5.0000 B51 B53 B55 S \n", + "873 male 47.0 0 0 345765 9.0000 NaN S \n", + "881 male 33.0 0 0 349257 7.8958 NaN S \n", + "890 male 32.0 0 0 370376 7.7500 NaN Q \n", + "\n", + "[202 rows x 12 columns]\n" + ] + } + ], "source": [ - "import pandas as pd\n", "\n", "data = pd.read_csv('titanic.csv')\n", "\n", @@ -238,14 +499,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "de5e850c-e920-4ae4-aadb-3f1953438b09", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " PassengerId Survived Pclass Name \\\n", + "803 804 1 3 Thomas, Master. Assad Alexander \n", + "755 756 1 2 Hamalainen, Master. Viljo \n", + "644 645 1 3 Baclini, Miss. Eugenie \n", + "469 470 1 3 Baclini, Miss. Helene Barbara \n", + "78 79 1 2 Caldwell, Master. Alden Gates \n", + ".. ... ... ... ... \n", + "859 860 0 3 Razi, Mr. Raihed \n", + "863 864 0 3 Sage, Miss. Dorothy Edith \"Dolly\" \n", + "868 869 0 3 van Melkebeke, Mr. Philemon \n", + "878 879 0 3 Laleff, Mr. Kristo \n", + "888 889 0 3 Johnston, Miss. Catherine Helen \"Carrie\" \n", + "\n", + " Sex Age SibSp Parch Ticket Fare Cabin Embarked \n", + "803 male 0.42 0 1 2625 8.5167 NaN C \n", + "755 male 0.67 1 1 250649 14.5000 NaN S \n", + "644 female 0.75 2 1 2666 19.2583 NaN C \n", + "469 female 0.75 2 1 2666 19.2583 NaN C \n", + "78 male 0.83 0 2 248738 29.0000 NaN S \n", + ".. ... ... ... ... ... ... ... ... \n", + "859 male NaN 0 0 2629 7.2292 NaN C \n", + "863 female NaN 8 2 CA. 2343 69.5500 NaN S \n", + "868 male NaN 0 0 345777 9.5000 NaN S \n", + "878 male NaN 0 0 349217 7.8958 NaN S \n", + "888 female NaN 1 2 W./C. 6607 23.4500 NaN S \n", + "\n", + "[891 rows x 12 columns]\n" + ] + } + ], "source": [ - "import pandas as pd\n", + "\n", "data = pd.read_csv('titanic.csv')\n", "\n", "sorted_data = data.sort_values(by='Age')\n", @@ -263,14 +558,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "25ced901-0482-49a8-8c12-d192e84e3fb3", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pclass\n", + "1 0.629630\n", + "2 0.472826\n", + "3 0.242363\n", + "Name: Survived, dtype: float64\n" + ] + } + ], "source": [ - "import pandas as pd\n", "\n", "data = pd.read_csv('titanic.csv')\n", "\n", @@ -294,13 +600,103 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "a1b4deaa-cd06-41b3-8084-5c2d3a867811", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Первые 10 строк данных:\n", + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "5 6 0 3 \n", + "6 7 0 1 \n", + "7 8 0 3 \n", + "8 9 1 3 \n", + "9 10 1 2 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "5 Moran, Mr. James male 0.0 0 \n", + "6 McCarthy, Mr. Timothy J male 54.0 0 \n", + "7 Palsson, Master. Gosta Leonard male 2.0 3 \n", + "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n", + "9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 0 S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 0 S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 0 S \n", + "5 0 330877 8.4583 0 Q \n", + "6 0 17463 51.8625 E46 S \n", + "7 1 349909 21.0750 0 S \n", + "8 2 347742 11.1333 0 S \n", + "9 0 237736 30.0708 0 C \n", + "\n", + "Строки, где возраст больше 30, отсортированные по Fare:\n", + " PassengerId Survived Pclass \\\n", + "258 259 1 1 \n", + "679 680 1 1 \n", + "737 738 1 1 \n", + "438 439 0 1 \n", + "299 300 1 1 \n", + ".. ... ... ... \n", + "263 264 0 1 \n", + "179 180 0 3 \n", + "597 598 0 3 \n", + "822 823 0 1 \n", + "806 807 0 1 \n", + "\n", + " Name Sex Age SibSp \\\n", + "258 Ward, Miss. Anna female 35.0 0 \n", + "679 Cardeza, Mr. Thomas Drake Martinez male 36.0 0 \n", + "737 Lesurer, Mr. Gustave J male 35.0 0 \n", + "438 Fortune, Mr. Mark male 64.0 1 \n", + "299 Baxter, Mrs. James (Helene DeLaudeniere Chaput) female 50.0 0 \n", + ".. ... ... ... ... \n", + "263 Harrison, Mr. William male 40.0 0 \n", + "179 Leonard, Mr. Lionel male 36.0 0 \n", + "597 Johnson, Mr. Alfred male 49.0 0 \n", + "822 Reuchlin, Jonkheer. John George male 38.0 0 \n", + "806 Andrews, Mr. Thomas Jr male 39.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "258 0 PC 17755 512.3292 0 C \n", + "679 1 PC 17755 512.3292 B51 B53 B55 C \n", + "737 0 PC 17755 512.3292 B101 C \n", + "438 4 19950 263.0000 C23 C25 C27 S \n", + "299 1 PC 17558 247.5208 B58 B60 C \n", + ".. ... ... ... ... ... \n", + "263 0 112059 0.0000 B94 S \n", + "179 0 LINE 0.0000 0 S \n", + "597 0 LINE 0.0000 0 S \n", + "822 0 19972 0.0000 0 S \n", + "806 0 112050 0.0000 A36 S \n", + "\n", + "[305 rows x 12 columns]\n", + "\n", + "Средний возраст для каждого класса:\n", + "Pclass\n", + "1 32.923241\n", + "2 28.091467\n", + "3 18.177026\n", + "Name: Age, dtype: float64\n" + ] + } + ], "source": [ - "import pandas as pd\n", - "\n", "data = pd.read_csv('titanic.csv')\n", "\n", "data_filled = data.fillna(0)\n",