diff --git a/Lessons/Lesson22_Basic_Stats_II_Percents.ipynb b/Lessons/Lesson22_Basic_Stats_II_Percents.ipynb index 2f323b5..ab6c1fc 100644 --- a/Lessons/Lesson22_Basic_Stats_II_Percents.ipynb +++ b/Lessons/Lesson22_Basic_Stats_II_Percents.ipynb @@ -128,7 +128,8 @@ "source": [ "# Load the dataset of house prices in Ames, and convert to\n", "# a data frame format so it's easier to view and process\n", - "ames_df = pd.DataFrame(housing['data'])\n", + "ames_df = pd.DataFrame(housing['data'], columns = housing['feature_names'])\n", + "ames_df['SalePrice'] = housing.target\n", "ames_df" ] }, @@ -175,7 +176,7 @@ }, "outputs": [], "source": [ - "# Determine number of tracts that bound the Charles River two ways:\n", + "# Determine number of homes sold normally two ways:\n", "# (1) with the query function\n" ] }, @@ -218,19 +219,6 @@ "# Now calculate the percentage of houses sold normally.\n" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "AJZKng3Bs7Vd" - }, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, { "cell_type": "markdown", "metadata": { @@ -264,7 +252,7 @@ "id": "RLZ-k3L7s7Vq" }, "source": [ - "What percentage of tracts have a median price **between** $200,000 and $500,000?" + "What percentage of houses have a sale price **between** $200,000 and $500,000?" ] }, { diff --git a/Lessons/_Keys/KEY_Lesson22_Basic_Stats_II_Percents.ipynb b/Lessons/_Keys/KEY_Lesson22_Basic_Stats_II_Percents.ipynb index 7380dd1..c28eabf 100644 --- a/Lessons/_Keys/KEY_Lesson22_Basic_Stats_II_Percents.ipynb +++ b/Lessons/_Keys/KEY_Lesson22_Basic_Stats_II_Percents.ipynb @@ -1,31 +1,10 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, "cells": [ { "cell_type": "markdown", "metadata": { - "id": "pcMCEdrks7Ut", - "colab_type": "text" + "colab_type": "text", + "id": "pcMCEdrks7Ut" }, "source": [ "# Basic Statistics I: Percents" @@ -34,8 +13,8 @@ { "cell_type": "markdown", "metadata": { - "id": "wAk5jXgBs7U0", - "colab_type": "text" + "colab_type": "text", + "id": "wAk5jXgBs7U0" }, "source": [ "A **percentage** is a number or ratio expressed as a fraction of 100. We'll do some examples together to learn how to calculate percentages." @@ -44,8 +23,8 @@ { "cell_type": "markdown", "metadata": { - "id": "vvDdKp98s7U3", - "colab_type": "text" + "colab_type": "text", + "id": "vvDdKp98s7U3" }, "source": [ "**Example 1:** For a basket of 18 fruits, there are 5 apples, 3 bananas, 6 peaches, and 4 oranges." @@ -54,8 +33,8 @@ { "cell_type": "markdown", "metadata": { - "id": "DFsUN3HAs7U4", - "colab_type": "text" + "colab_type": "text", + "id": "DFsUN3HAs7U4" }, "source": [ "What percentage of fruits are apples? " @@ -63,23 +42,34 @@ }, { "cell_type": "code", + "execution_count": 1, "metadata": { - "id": "agUStgpUs7U5", + "colab": {}, "colab_type": "code", - "colab": {} + "id": "agUStgpUs7U5" }, + "outputs": [ + { + "data": { + "text/plain": [ + "27.77777777777778" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Calculate percentage for apples\n", "5/18*100" - ], - "execution_count": 0, - "outputs": [] + ] }, { "cell_type": "markdown", "metadata": { - "id": "zGoaA_fhs7U9", - "colab_type": "text" + "colab_type": "text", + "id": "zGoaA_fhs7U9" }, "source": [ "What percentage of fruits are oranges **and** peaches? " @@ -87,131 +77,539 @@ }, { "cell_type": "code", + "execution_count": 2, "metadata": { - "id": "DIr9ZO4us7U-", + "colab": {}, "colab_type": "code", - "colab": {} + "id": "DIr9ZO4us7U-" }, + "outputs": [ + { + "data": { + "text/plain": [ + "55.55555555555556" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Calculate percentage for oranges and peaches\n", "(4+6)/18*100" - ], - "execution_count": 0, - "outputs": [] + ] }, { "cell_type": "markdown", "metadata": { - "id": "5ADm2TV-s7VG", - "colab_type": "text" + "colab_type": "text", + "id": "5ADm2TV-s7VG" }, "source": [ - "**Example 2:** Let's learn to calculate percentages by using real world data. We will work with a dataset of Boston housing prices." + "**Example 2:** Let's learn to calculate percentages by using real world data. We will work with a dataset of Ames, Iowa housing prices." ] }, { "cell_type": "code", + "execution_count": 6, "metadata": { - "id": "CSoS_MUus7VH", + "colab": {}, "colab_type": "code", - "colab": {} + "id": "CSoS_MUus7VH" }, + "outputs": [], "source": [ - "# Import the load_boston method \n", - "from sklearn.datasets import load_boston" - ], - "execution_count": 0, - "outputs": [] + "# Import the fetch_openml method \n", + "from sklearn.datasets import fetch_openml\n", + "housing = fetch_openml(name=\"house_prices\", as_frame=True, parser=\"auto\")" + ] }, { "cell_type": "code", + "execution_count": 7, "metadata": { - "id": "9Q6sI8C0s7VL", + "colab": {}, "colab_type": "code", - "colab": {} + "id": "9Q6sI8C0s7VL" }, + "outputs": [], "source": [ - "# Import pandas, so that we can work with the data frame version of the Boston housing data\n", + "# Import pandas, so that we can work with the data frame version of the Ames housing data\n", "import pandas as pd" - ], - "execution_count": 0, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": 10, "metadata": { - "scrolled": true, - "id": "hepVTCgss7VR", + "colab": {}, "colab_type": "code", - "colab": {} + "id": "hepVTCgss7VR", + "scrolled": true }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Id | \n", + "MSSubClass | \n", + "MSZoning | \n", + "LotFrontage | \n", + "LotArea | \n", + "Street | \n", + "Alley | \n", + "LotShape | \n", + "LandContour | \n", + "Utilities | \n", + "... | \n", + "PoolArea | \n", + "PoolQC | \n", + "Fence | \n", + "MiscFeature | \n", + "MiscVal | \n", + "MoSold | \n", + "YrSold | \n", + "SaleType | \n", + "SaleCondition | \n", + "SalePrice | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "1 | \n", + "60 | \n", + "RL | \n", + "65.0 | \n", + "8450 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "2 | \n", + "2008 | \n", + "WD | \n", + "Normal | \n", + "208500 | \n", + "
1 | \n", + "2 | \n", + "20 | \n", + "RL | \n", + "80.0 | \n", + "9600 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "5 | \n", + "2007 | \n", + "WD | \n", + "Normal | \n", + "181500 | \n", + "
2 | \n", + "3 | \n", + "60 | \n", + "RL | \n", + "68.0 | \n", + "11250 | \n", + "Pave | \n", + "NaN | \n", + "IR1 | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "9 | \n", + "2008 | \n", + "WD | \n", + "Normal | \n", + "223500 | \n", + "
3 | \n", + "4 | \n", + "70 | \n", + "RL | \n", + "60.0 | \n", + "9550 | \n", + "Pave | \n", + "NaN | \n", + "IR1 | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "2 | \n", + "2006 | \n", + "WD | \n", + "Abnorml | \n", + "140000 | \n", + "
4 | \n", + "5 | \n", + "60 | \n", + "RL | \n", + "84.0 | \n", + "14260 | \n", + "Pave | \n", + "NaN | \n", + "IR1 | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "12 | \n", + "2008 | \n", + "WD | \n", + "Normal | \n", + "250000 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
1455 | \n", + "1456 | \n", + "60 | \n", + "RL | \n", + "62.0 | \n", + "7917 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "8 | \n", + "2007 | \n", + "WD | \n", + "Normal | \n", + "175000 | \n", + "
1456 | \n", + "1457 | \n", + "20 | \n", + "RL | \n", + "85.0 | \n", + "13175 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "MnPrv | \n", + "NaN | \n", + "0 | \n", + "2 | \n", + "2010 | \n", + "WD | \n", + "Normal | \n", + "210000 | \n", + "
1457 | \n", + "1458 | \n", + "70 | \n", + "RL | \n", + "66.0 | \n", + "9042 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "GdPrv | \n", + "Shed | \n", + "2500 | \n", + "5 | \n", + "2010 | \n", + "WD | \n", + "Normal | \n", + "266500 | \n", + "
1458 | \n", + "1459 | \n", + "20 | \n", + "RL | \n", + "68.0 | \n", + "9717 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "4 | \n", + "2010 | \n", + "WD | \n", + "Normal | \n", + "142125 | \n", + "
1459 | \n", + "1460 | \n", + "20 | \n", + "RL | \n", + "75.0 | \n", + "9937 | \n", + "Pave | \n", + "NaN | \n", + "Reg | \n", + "Lvl | \n", + "AllPub | \n", + "... | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "6 | \n", + "2008 | \n", + "WD | \n", + "Normal | \n", + "147500 | \n", + "
1460 rows × 81 columns
\n", + "