From 7ee477a9b172a70bcce69d044ba1db17a5ee8694 Mon Sep 17 00:00:00 2001
From: cristhian <caamanocristhian6@gmail.com>
Date: Sun, 12 Jan 2025 21:33:04 +0100
Subject: [PATCH] week 5 lab 4

---
 lab-hypothesis-testing.ipynb | 303 ++++++++++++++++++++++++++++-------
 1 file changed, 249 insertions(+), 54 deletions(-)
diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb
index 18ad6d5..d13f229 100644
--- a/lab-hypothesis-testing.ipynb
+++ b/lab-hypothesis-testing.ipynb
@@ -45,13 +45,12 @@
     "#libraries\n",
     "import pandas as pd\n",
     "import scipy.stats as st\n",
-    "import numpy as np\n",
-    "\n"
+    "import numpy as np"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -101,7 +100,7 @@
        "      <td>65</td>\n",
        "      <td>45</td>\n",
        "      <td>1</td>\n",
-       "      <td>False</td>\n",
+       "      <td>Non-Legendary</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -115,7 +114,7 @@
        "      <td>80</td>\n",
        "      <td>60</td>\n",
        "      <td>1</td>\n",
-       "      <td>False</td>\n",
+       "      <td>Non-Legendary</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -129,7 +128,7 @@
        "      <td>100</td>\n",
        "      <td>80</td>\n",
        "      <td>1</td>\n",
-       "      <td>False</td>\n",
+       "      <td>Non-Legendary</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -143,7 +142,7 @@
        "      <td>120</td>\n",
        "      <td>80</td>\n",
        "      <td>1</td>\n",
-       "      <td>False</td>\n",
+       "      <td>Non-Legendary</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -157,7 +156,7 @@
        "      <td>50</td>\n",
        "      <td>65</td>\n",
        "      <td>1</td>\n",
-       "      <td>False</td>\n",
+       "      <td>Non-Legendary</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
@@ -185,7 +184,7 @@
        "      <td>150</td>\n",
        "      <td>50</td>\n",
        "      <td>6</td>\n",
-       "      <td>True</td>\n",
+       "      <td>Legendary</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>796</th>\n",
@@ -199,7 +198,7 @@
        "      <td>110</td>\n",
        "      <td>110</td>\n",
        "      <td>6</td>\n",
-       "      <td>True</td>\n",
+       "      <td>Legendary</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>797</th>\n",
@@ -213,7 +212,7 @@
        "      <td>130</td>\n",
        "      <td>70</td>\n",
        "      <td>6</td>\n",
-       "      <td>True</td>\n",
+       "      <td>Legendary</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>798</th>\n",
@@ -227,7 +226,7 @@
        "      <td>130</td>\n",
        "      <td>80</td>\n",
        "      <td>6</td>\n",
-       "      <td>True</td>\n",
+       "      <td>Legendary</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>799</th>\n",
@@ -241,7 +240,7 @@
        "      <td>90</td>\n",
        "      <td>70</td>\n",
        "      <td>6</td>\n",
-       "      <td>True</td>\n",
+       "      <td>Legendary</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -262,29 +261,30 @@
        "798   Hoopa Unbound  Psychic    Dark  80     160       60      170      130   \n",
        "799       Volcanion     Fire   Water  80     110      120      130       90   \n",
        "\n",
-       "     Speed  Generation  Legendary  \n",
-       "0       45           1      False  \n",
-       "1       60           1      False  \n",
-       "2       80           1      False  \n",
-       "3       80           1      False  \n",
-       "4       65           1      False  \n",
-       "..     ...         ...        ...  \n",
-       "795     50           6       True  \n",
-       "796    110           6       True  \n",
-       "797     70           6       True  \n",
-       "798     80           6       True  \n",
-       "799     70           6       True  \n",
+       "     Speed  Generation      Legendary  \n",
+       "0       45           1  Non-Legendary  \n",
+       "1       60           1  Non-Legendary  \n",
+       "2       80           1  Non-Legendary  \n",
+       "3       80           1  Non-Legendary  \n",
+       "4       65           1  Non-Legendary  \n",
+       "..     ...         ...            ...  \n",
+       "795     50           6      Legendary  \n",
+       "796    110           6      Legendary  \n",
+       "797     70           6      Legendary  \n",
+       "798     80           6      Legendary  \n",
+       "799     70           6      Legendary  \n",
        "\n",
        "[800 rows x 11 columns]"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\")\n",
+    "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\"\n",
+    "pokemon_data = pd.read_csv(url)\n",
     "df"
    ]
   },
@@ -297,11 +297,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 23,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-Statistic: 3.3350\n",
+      "P-Value: 0.0008\n",
+      "Reject the null hypothesis: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\n"
+     ]
+    }
+   ],
    "source": [
-    "#code here"
+    "from scipy import stats\n",
+    "\n",
+    "dragon_hp = pokemon_data[pokemon_data['Type 1'] == 'Dragon']['HP']\n",
+    "grass_hp = pokemon_data[pokemon_data['Type 1'] == 'Grass']['HP']\n",
+    "\n",
+    "t_statistic, p_value = stats.ttest_ind(dragon_hp, grass_hp, equal_var=False, alternative='greater')\n",
+    "\n",
+    "print(f\"T-Statistic: {t_statistic:.4f}\")\n",
+    "print(f\"P-Value: {p_value:.4f}\")\n",
+    "\n",
+    "if p_value < 0.05:\n",
+    "    print(\"Reject the null hypothesis: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\")\n",
+    "else:\n",
+    "    print(\"Fail to reject the null hypothesis: No significant difference in HP between Dragon-type and Grass-type Pokémon.\")"
    ]
   },
   {
@@ -313,11 +336,54 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 39,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Index(['Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp_Atk',\n",
+      "       'Sp_Def', 'Speed', 'Generation', 'Legendary'],\n",
+      "      dtype='object')\n",
+      "HP         0\n",
+      "Attack     0\n",
+      "Defense    0\n",
+      "Sp_Atk     0\n",
+      "Sp_Def     0\n",
+      "Speed      0\n",
+      "dtype: int64\n",
+      "Legendary count: 800\n",
+      "Non-Legendary count: 0\n",
+      "Insufficient data for one or both groups.\n"
+     ]
+    }
+   ],
    "source": [
-    "#code here"
+    "from statsmodels.multivariate.manova import MANOVA\n",
+    "\n",
+    "pokemon_data = pokemon_data.rename(columns={'Sp. Atk': 'Sp_Atk', 'Sp. Def': 'Sp_Def'})\n",
+    "\n",
+    "print(pokemon_data.columns)\n",
+    "\n",
+    "print(pokemon_data[['HP', 'Attack', 'Defense', 'Sp_Atk', 'Sp_Def', 'Speed']].isnull().sum())\n",
+    "\n",
+    "pokemon_data['Legendary'] = pokemon_data['Legendary'].astype(bool)\n",
+    "\n",
+    "legendary_count = pokemon_data['Legendary'].sum()\n",
+    "non_legendary_count = len(pokemon_data) - legendary_count\n",
+    "\n",
+    "print(\"Legendary count:\", legendary_count)\n",
+    "print(\"Non-Legendary count:\", non_legendary_count)\n",
+    "\n",
+    "if legendary_count > 0 and non_legendary_count > 0:\n",
+    "    # Perform MANOVA\n",
+    "    formula = 'HP + Attack + Defense + Sp_Atk + Sp_Def + Speed ~ Legendary'\n",
+    "    manova = MANOVA.from_formula(formula, data=pokemon_data)\n",
+    "    fit = manova.mv_test()\n",
+    "    print(fit)\n",
+    "else:\n",
+    "    print(\"Insufficient data for one or both groups.\")"
    ]
   },
   {
@@ -337,7 +403,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
@@ -433,34 +499,122 @@
        "      <td>1.9250</td>\n",
        "      <td>65500.0</td>\n",
        "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16995</th>\n",
+       "      <td>-124.26</td>\n",
+       "      <td>40.58</td>\n",
+       "      <td>52.0</td>\n",
+       "      <td>2217.0</td>\n",
+       "      <td>394.0</td>\n",
+       "      <td>907.0</td>\n",
+       "      <td>369.0</td>\n",
+       "      <td>2.3571</td>\n",
+       "      <td>111400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16996</th>\n",
+       "      <td>-124.27</td>\n",
+       "      <td>40.69</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>2349.0</td>\n",
+       "      <td>528.0</td>\n",
+       "      <td>1194.0</td>\n",
+       "      <td>465.0</td>\n",
+       "      <td>2.5179</td>\n",
+       "      <td>79000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16997</th>\n",
+       "      <td>-124.30</td>\n",
+       "      <td>41.84</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>2677.0</td>\n",
+       "      <td>531.0</td>\n",
+       "      <td>1244.0</td>\n",
+       "      <td>456.0</td>\n",
+       "      <td>3.0313</td>\n",
+       "      <td>103600.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16998</th>\n",
+       "      <td>-124.30</td>\n",
+       "      <td>41.80</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>2672.0</td>\n",
+       "      <td>552.0</td>\n",
+       "      <td>1298.0</td>\n",
+       "      <td>478.0</td>\n",
+       "      <td>1.9797</td>\n",
+       "      <td>85800.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16999</th>\n",
+       "      <td>-124.35</td>\n",
+       "      <td>40.54</td>\n",
+       "      <td>52.0</td>\n",
+       "      <td>1820.0</td>\n",
+       "      <td>300.0</td>\n",
+       "      <td>806.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>3.0147</td>\n",
+       "      <td>94600.0</td>\n",
+       "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
+       "<p>17000 rows × 9 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \\\n",
-       "0    -114.31     34.19                15.0       5612.0          1283.0   \n",
-       "1    -114.47     34.40                19.0       7650.0          1901.0   \n",
-       "2    -114.56     33.69                17.0        720.0           174.0   \n",
-       "3    -114.57     33.64                14.0       1501.0           337.0   \n",
-       "4    -114.57     33.57                20.0       1454.0           326.0   \n",
+       "       longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \\\n",
+       "0        -114.31     34.19                15.0       5612.0          1283.0   \n",
+       "1        -114.47     34.40                19.0       7650.0          1901.0   \n",
+       "2        -114.56     33.69                17.0        720.0           174.0   \n",
+       "3        -114.57     33.64                14.0       1501.0           337.0   \n",
+       "4        -114.57     33.57                20.0       1454.0           326.0   \n",
+       "...          ...       ...                 ...          ...             ...   \n",
+       "16995    -124.26     40.58                52.0       2217.0           394.0   \n",
+       "16996    -124.27     40.69                36.0       2349.0           528.0   \n",
+       "16997    -124.30     41.84                17.0       2677.0           531.0   \n",
+       "16998    -124.30     41.80                19.0       2672.0           552.0   \n",
+       "16999    -124.35     40.54                52.0       1820.0           300.0   \n",
+       "\n",
+       "       population  households  median_income  median_house_value  \n",
+       "0          1015.0       472.0         1.4936             66900.0  \n",
+       "1          1129.0       463.0         1.8200             80100.0  \n",
+       "2           333.0       117.0         1.6509             85700.0  \n",
+       "3           515.0       226.0         3.1917             73400.0  \n",
+       "4           624.0       262.0         1.9250             65500.0  \n",
+       "...           ...         ...            ...                 ...  \n",
+       "16995       907.0       369.0         2.3571            111400.0  \n",
+       "16996      1194.0       465.0         2.5179             79000.0  \n",
+       "16997      1244.0       456.0         3.0313            103600.0  \n",
+       "16998      1298.0       478.0         1.9797             85800.0  \n",
+       "16999       806.0       270.0         3.0147             94600.0  \n",
        "\n",
-       "   population  households  median_income  median_house_value  \n",
-       "0      1015.0       472.0         1.4936             66900.0  \n",
-       "1      1129.0       463.0         1.8200             80100.0  \n",
-       "2       333.0       117.0         1.6509             85700.0  \n",
-       "3       515.0       226.0         3.1917             73400.0  \n",
-       "4       624.0       262.0         1.9250             65500.0  "
+       "[17000 rows x 9 columns]"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 41,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n",
-    "df.head()"
+    "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\"\n",
+    "data = pd.read_csv(url)\n",
+    "data"
    ]
   },
   {
@@ -483,10 +637,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 49,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "school_coords = (-118, 37)\n",
+    "hospital_coords = (-122, 34)\n",
+    "\n",
+    "def calculate_distance(coords1, coords2):\n",
+    "    return np.sqrt((coords1[0] - coords2[0])**2 + (coords1[1] - coords2[1])**2)\n",
+    "\n",
+    "data['distance_to_school'] = data.apply(lambda row: calculate_distance((row['longitude'], row['latitude']), school_coords), axis=1)\n",
+    "data['distance_to_hospital'] = data.apply(lambda row: calculate_distance((row['longitude'], row['latitude']), hospital_coords), axis=1)\n",
+    "\n",
+    "data['close_to_amenity'] = (data['distance_to_school'] < 0.50) | (data['distance_to_hospital'] < 0.50)\n",
+    "\n",
+    "close_houses = data[data['close_to_amenity'] == True]\n",
+    "far_houses = data[data['close_to_amenity'] == False]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-Statistic: -17.1742\n",
+      "P-Value: 0.0001\n",
+      "Reject the null hypothesis: Houses close to a school or hospital are significantly more expensive.\n"
+     ]
+    }
+   ],
+   "source": [
+    "t_statistic, p_value = stats.ttest_ind(close_houses['median_house_value'], far_houses['median_house_value'], equal_var=False)\n",
+    "\n",
+    "print(f\"T-Statistic: {t_statistic:.4f}\")\n",
+    "print(f\"P-Value: {p_value:.4f}\")\n",
+    "\n",
+    "if p_value < 0.05:\n",
+    "    print(\"Reject the null hypothesis: Houses close to a school or hospital are significantly more expensive.\")\n",
+    "else:\n",
+    "    print(\"Fail to reject the null hypothesis: No significant difference in house prices based on proximity to a school or hospital.\")"
+   ]
   },
   {
    "cell_type": "code",
@@ -498,7 +693,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -512,9 +707,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }