Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lab 12.1 solved #198

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
257 changes: 241 additions & 16 deletions lab-hypothesis-testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,19 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#libraries\n",
"import pandas as pd\n",
"import scipy.stats as st\n",
"import numpy as np\n",
"\n"
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -278,7 +277,7 @@
"[800 rows x 11 columns]"
]
},
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -297,27 +296,175 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Filtro los HP de los Pokémon tipo Dragon y tipo Grass\n",
"dragon_hp = df[df['Type 1'] == 'Dragon']['HP']\n",
"grass_hp = df[df['Type 1'] == 'Grass']['HP']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#code here"
"# Prueba t para dos muestras independientes\n",
"# Uso\"equal_var=False\" porque entiendo que las varianzas pueden ser diferentes\n",
"t_stat, p_value = st.ttest_ind(dragon_hp, grass_hp, equal_var=False)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T-Statistic: 3.3349632905124063, P-Value: 0.0015987219490841197\n"
]
}
],
"source": [
"print(f\"T-Statistic: {t_stat}, P-Value: {p_value}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- We posit that Legendary Pokemons have different stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) when comparing with Non-Legendary. Choose the propper test and, with 5% significance, comment your findings.\n"
"- We posit that Legendary Pokemons have different stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) when comparing with Non-Legy. Choose the propper test and, with 5% significance, comment your findings.endar\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# Creamos dos grupos: Legendarios y No Legendarios\n",
"legendary_stats = df[df['Legendary'] == True][['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']]\n",
"non_legendary_stats = df[df['Legendary'] == False][['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Inicializamos un diccionario para almacenar los resultados\n",
"results = {}"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Realizamos la prueba t para cada estadística\n",
"for stat in ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']:\n",
" t_stat, p_value = st.ttest_ind(legendary_stats[stat], non_legendary_stats[stat], equal_var=False)\n",
" results[stat] = {'T-Statistic': t_stat, 'P-Value': p_value}"
]
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#code here"
"# Convertimos los resultados a un DataFrame para analizarlos mejor\n",
"results_df = pd.DataFrame(results).T"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T-Statistic</th>\n",
" <th>P-Value</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>HP</th>\n",
" <td>8.981370</td>\n",
" <td>1.002691e-13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Attack</th>\n",
" <td>10.438134</td>\n",
" <td>2.520372e-16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Defense</th>\n",
" <td>7.637078</td>\n",
" <td>4.826998e-11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Sp. Atk</th>\n",
" <td>13.417450</td>\n",
" <td>1.551461e-21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Sp. Def</th>\n",
" <td>10.015697</td>\n",
" <td>2.294933e-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Speed</th>\n",
" <td>11.475044</td>\n",
" <td>1.049016e-18</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T-Statistic P-Value\n",
"HP 8.981370 1.002691e-13\n",
"Attack 10.438134 2.520372e-16\n",
"Defense 7.637078 4.826998e-11\n",
"Sp. Atk 13.417450 1.551461e-21\n",
"Sp. Def 10.015697 2.294933e-15\n",
"Speed 11.475044 1.049016e-18"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results_df"
]
},
{
Expand All @@ -337,7 +484,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 12,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -453,7 +600,7 @@
"4 624.0 262.0 1.9250 65500.0 "
]
},
"execution_count": 5,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -483,17 +630,95 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# Coordenadas de la escuela y el hospital\n",
"school_coords = (-118, 37)\n",
"hospital_coords = (-122, 34)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# Función para calcular distancia euclidiana\n",
"def calc_dist(lat1, lon1, lat2, lon2):\n",
" return np.sqrt((lat1 - lat2)**2 + (lon1 - lon2)**2)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": []
"source": [
"# Calcular las distancias\n",
"df['distance_to_school'] = calc_dist(df['longitude'], df['latitude'], school_coords[0], school_coords[1])\n",
"df['distance_to_hospital'] = calc_dist(df['longitude'], df['latitude'], hospital_coords[0], hospital_coords[1])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"# Clasifico las casas como cercanas o lejanas\n",
"df['close_to_school_or_hospital'] = (df['distance_to_school'] < 0.50) | (df['distance_to_hospital'] < 0.50)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"# Divido el dataset en dos grupos\n",
"close_group = df[df['close_to_school_or_hospital'] == True]['median_house_value']\n",
"far_group = df[df['close_to_school_or_hospital'] == False]['median_house_value']"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"# Realizo prueba t para dos muestras independientes\n",
"t_stat, p_value = st.ttest_ind(close_group, far_group, equal_var=False)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Estadístico t: -17.174167998688404\n",
"Valor p: 5.220018561223529e-05\n"
]
}
],
"source": [
"print(f\"Estadístico t: {t_stat}\")\n",
"print(f\"Valor p: {p_value}\")"
]
}
],
"metadata": {
Expand All @@ -512,7 +737,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.8.2"
}
},
"nbformat": 4,
Expand Down