diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb index 18ad6d5..c2130eb 100644 --- a/lab-hypothesis-testing.ipynb +++ b/lab-hypothesis-testing.ipynb @@ -38,20 +38,19 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#libraries\n", "import pandas as pd\n", "import scipy.stats as st\n", - "import numpy as np\n", - "\n" + "import numpy as np" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -278,7 +277,7 @@ "[800 rows x 11 columns]" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -297,27 +296,175 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Filtro los HP de los Pokémon tipo Dragon y tipo Grass\n", + "dragon_hp = df[df['Type 1'] == 'Dragon']['HP']\n", + "grass_hp = df[df['Type 1'] == 'Grass']['HP']" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "#code here" + "# Prueba t para dos muestras independientes\n", + "# Uso\"equal_var=False\" porque entiendo que las varianzas pueden ser diferentes\n", + "t_stat, p_value = st.ttest_ind(dragon_hp, grass_hp, equal_var=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-Statistic: 3.3349632905124063, P-Value: 0.0015987219490841197\n" + ] + } + ], + "source": [ + "print(f\"T-Statistic: {t_stat}, P-Value: {p_value}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "- We posit that Legendary Pokemons have different stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) when comparing with Non-Legendary. Choose the propper test and, with 5% significance, comment your findings.\n" + "- We posit that Legendary Pokemons have different stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) when comparing with Non-Legy. Choose the propper test and, with 5% significance, comment your findings.endar\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Creamos dos grupos: Legendarios y No Legendarios\n", + "legendary_stats = df[df['Legendary'] == True][['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']]\n", + "non_legendary_stats = df[df['Legendary'] == False][['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Inicializamos un diccionario para almacenar los resultados\n", + "results = {}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Realizamos la prueba t para cada estadística\n", + "for stat in ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']:\n", + " t_stat, p_value = st.ttest_ind(legendary_stats[stat], non_legendary_stats[stat], equal_var=False)\n", + " results[stat] = {'T-Statistic': t_stat, 'P-Value': p_value}" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "#code here" + "# Convertimos los resultados a un DataFrame para analizarlos mejor\n", + "results_df = pd.DataFrame(results).T" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | T-Statistic | \n", + "P-Value | \n", + "
---|---|---|
HP | \n", + "8.981370 | \n", + "1.002691e-13 | \n", + "
Attack | \n", + "10.438134 | \n", + "2.520372e-16 | \n", + "
Defense | \n", + "7.637078 | \n", + "4.826998e-11 | \n", + "
Sp. Atk | \n", + "13.417450 | \n", + "1.551461e-21 | \n", + "
Sp. Def | \n", + "10.015697 | \n", + "2.294933e-15 | \n", + "
Speed | \n", + "11.475044 | \n", + "1.049016e-18 | \n", + "