diff --git a/lab-dw-pandas.ipynb b/lab-dw-pandas.ipynb
index fbd46831..8b075574 100644
--- a/lab-dw-pandas.ipynb
+++ b/lab-dw-pandas.ipynb
@@ -82,12 +82,507 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 62,
"id": "dd4e8cd8-a6f6-486c-a5c4-1745b0c035f4",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education Customer Lifetime Value \\\n",
+ "0 RB50392 Washington NaN Master NaN \n",
+ "1 QZ44356 Arizona F Bachelor 697953.59% \n",
+ "2 AI49188 Nevada F Bachelor 1288743.17% \n",
+ "3 WW63253 California M Bachelor 764586.18% \n",
+ "4 GA49547 Washington M High School or Below 536307.65% \n",
+ "\n",
+ " Income Monthly Premium Auto Number of Open Complaints Policy Type \\\n",
+ "0 0.0 1000.0 1/0/00 Personal Auto \n",
+ "1 0.0 94.0 1/0/00 Personal Auto \n",
+ "2 48767.0 108.0 1/0/00 Personal Auto \n",
+ "3 0.0 106.0 1/0/00 Corporate Auto \n",
+ "4 36357.0 68.0 1/0/00 Personal Auto \n",
+ "\n",
+ " Vehicle Class Total Claim Amount \n",
+ "0 Four-Door Car 2.704934 \n",
+ "1 Four-Door Car 1131.464935 \n",
+ "2 Two-Door Car 566.472247 \n",
+ "3 SUV 529.881344 \n",
+ "4 Four-Door Car 17.269323 "
+ ]
+ },
+ "execution_count": 62,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here\n",
+ "import pandas as pd\n",
+ "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\")\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "id": "3160d342-7f99-4421-ba8b-77253c9442f1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(4008, 11)"
+ ]
+ },
+ "execution_count": 63,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Number of rows and columns\n",
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "id": "b7721069-b889-488c-a6ea-e9b969ae60ad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Customer object\n",
+ "ST object\n",
+ "GENDER object\n",
+ "Education object\n",
+ "Customer Lifetime Value object\n",
+ "Income float64\n",
+ "Monthly Premium Auto float64\n",
+ "Number of Open Complaints object\n",
+ "Policy Type object\n",
+ "Vehicle Class object\n",
+ "Total Claim Amount float64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 64,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.dtypes\n",
+ "\n",
+ "# Problems: \n",
+ "# Customer Lifetime Value is an object because of the % symbol\n",
+ "# Number of Open Complaints is an object because of the format used 1/0/00"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "id": "7b75e069-c1bb-4c44-aa5b-1b1be0794c23",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Customer 1071\n",
+ "ST 8\n",
+ "GENDER 5\n",
+ "Education 6\n",
+ "Customer Lifetime Value 1027\n",
+ "Income 774\n",
+ "Monthly Premium Auto 132\n",
+ "Number of Open Complaints 6\n",
+ "Policy Type 3\n",
+ "Vehicle Class 6\n",
+ "Total Claim Amount 761\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.nunique()\n",
+ "# Categorical columns: ST, GENDER, Education, Policy Type, Vehicle Class"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "id": "bdd728ff-e780-4b52-84f7-28882dd4f30c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ST: ['Washington' 'Arizona' 'Nevada' 'California' 'Oregon' 'Cali' 'AZ' 'WA'\n",
+ " nan]\n",
+ "GENDER: [nan 'F' 'M' 'Femal' 'Male' 'female']\n",
+ "Education: ['Master' 'Bachelor' 'High School or Below' 'College' 'Bachelors' 'Doctor'\n",
+ " nan]\n",
+ "Policy Type: ['Personal Auto' 'Corporate Auto' 'Special Auto' nan]\n",
+ "Vehicle Class: ['Four-Door Car' 'Two-Door Car' 'SUV' 'Luxury SUV' 'Sports Car'\n",
+ " 'Luxury Car' nan]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"ST: \", df[\"ST\"].unique())\n",
+ "print(\"GENDER: \", df[\"GENDER\"].unique())\n",
+ "print(\"Education: \",df[\"Education\"].unique())\n",
+ "print(\"Policy Type: \",df[\"Policy Type\"].unique())\n",
+ "print(\"Vehicle Class: \",df[\"Vehicle Class\"].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "id": "fc50b0aa-23a5-49c8-a9ac-6fb7b0c3297c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The maximum value of Income is 99960.0 and the minimum value is 0.0\n",
+ "The maximum value of Monthly Premium Auto is 35354.0 and the minimum value is 61.0\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here"
+ "print(\"The maximum value of Income is \", df[\"Income\"].max(),\"and the minimum value is \" , df[\"Income\"].min())\n",
+ "print(\"The maximum value of Monthly Premium Auto is \", df[\"Monthly Premium Auto\"].max(),\"and the minimum value is \", df[\"Monthly Premium Auto\"].min())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "id": "c16899ad-11c9-4791-bd40-d2ee2460916b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 1071.000000 | \n",
+ " 1071.000000 | \n",
+ " 1071.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 39295.701214 | \n",
+ " 193.234360 | \n",
+ " 404.986909 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 30469.427060 | \n",
+ " 1601.190369 | \n",
+ " 293.027260 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 0.000000 | \n",
+ " 61.000000 | \n",
+ " 0.382107 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 14072.000000 | \n",
+ " 68.000000 | \n",
+ " 202.157702 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 36234.000000 | \n",
+ " 83.000000 | \n",
+ " 354.729129 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 64631.000000 | \n",
+ " 109.500000 | \n",
+ " 532.800000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 99960.000000 | \n",
+ " 35354.000000 | \n",
+ " 2893.239678 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Income Monthly Premium Auto Total Claim Amount\n",
+ "count 1071.000000 1071.000000 1071.000000\n",
+ "mean 39295.701214 193.234360 404.986909\n",
+ "std 30469.427060 1601.190369 293.027260\n",
+ "min 0.000000 61.000000 0.382107\n",
+ "25% 14072.000000 68.000000 202.157702\n",
+ "50% 36234.000000 83.000000 354.729129\n",
+ "75% 64631.000000 109.500000 532.800000\n",
+ "max 99960.000000 35354.000000 2893.239678"
+ ]
+ },
+ "execution_count": 85,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "id": "6def634e-3aca-4082-856f-1b37e9c94cb3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 1071 | \n",
+ " 1071 | \n",
+ " 954 | \n",
+ " 1071 | \n",
+ " 1068 | \n",
+ " 1071 | \n",
+ " 1071 | \n",
+ " 1071 | \n",
+ "
\n",
+ " \n",
+ " unique | \n",
+ " 1071 | \n",
+ " 8 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 1027 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " top | \n",
+ " RB50392 | \n",
+ " Oregon | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 445811.34% | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " freq | \n",
+ " 1 | \n",
+ " 320 | \n",
+ " 457 | \n",
+ " 324 | \n",
+ " 4 | \n",
+ " 830 | \n",
+ " 780 | \n",
+ " 576 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education Customer Lifetime Value \\\n",
+ "count 1071 1071 954 1071 1068 \n",
+ "unique 1071 8 5 6 1027 \n",
+ "top RB50392 Oregon F Bachelor 445811.34% \n",
+ "freq 1 320 457 324 4 \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \n",
+ "count 1071 1071 1071 \n",
+ "unique 6 3 6 \n",
+ "top 1/0/00 Personal Auto Four-Door Car \n",
+ "freq 830 780 576 "
+ ]
+ },
+ "execution_count": 91,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe(include=object)"
]
},
{
@@ -116,12 +611,33 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 125,
"id": "2dca5073-4520-4f42-9390-4b92733284ed",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "ST\n",
+ "AZ 25\n",
+ "WA 30\n",
+ "Washington 81\n",
+ "Nevada 98\n",
+ "Cali 120\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 125,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here"
+ "location_series = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\", usecols=[\"ST\"]).squeeze(\"columns\")\n",
+ "location_series_count = location_series.value_counts()\n",
+ "sorted_locations = location_series_count.sort_values()\n",
+ "less_common = sorted_locations.head(5)\n",
+ "less_common"
]
},
{
@@ -146,12 +662,30 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "bcfad6c1-9af2-4b0b-9aa9-0dc5c17473c0",
+ "execution_count": 145,
+ "id": "e0fc76bd-dedf-495b-a25b-c1e2f784510b",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Policy Type\n",
+ "Personal Auto 780\n",
+ "Corporate Auto 234\n",
+ "Special Auto 57\n",
+ "Name: count, dtype: int64\n",
+ "----\n",
+ "780\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here"
+ "policy_series = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\", usecols=[\"Policy Type\"]).squeeze(\"columns\")\n",
+ "policy_series_count = policy_series.value_counts()\n",
+ "print(policy_series_count)\n",
+ "print(\"----\")\n",
+ "print(policy_series_count.max())"
]
},
{
@@ -164,6 +698,17 @@
"The sales team wants to know if customers with Personal Auto have a lower income than those with Corporate Auto. How does the average income compare between the two policy types?"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 155,
+ "id": "bcfad6c1-9af2-4b0b-9aa9-0dc5c17473c0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "personal_auto = df.loc[df[\"Policy Type\"] == \"Personal Auto\"]\n",
+ "corporate_auto = df.loc[df[\"Policy Type\"] == \"Corporate Auto\"]"
+ ]
+ },
{
"cell_type": "markdown",
"id": "b1386d75-2810-4aa1-93e0-9485aa12d552",
@@ -176,12 +721,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 165,
"id": "0c0563cf-6f8b-463d-a321-651a972f82e5",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The average income of the customer with a personal policy is 38180.69871794872\n",
+ "The average income of the customer with a corporate policy is 41390.31196581197\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here"
+ "avg_personal = sum(personal_auto[\"Income\"]) / len(personal_auto[\"Income\"])\n",
+ "avg_corporate = sum(corporate_auto[\"Income\"]) / len(corporate_auto[\"Income\"])\n",
+ "print(\"The average income of the customer with a personal policy is \", avg_personal)\n",
+ "print(\"The average income of the customer with a corporate policy is \", avg_corporate)"
]
},
{
@@ -226,12 +783,138 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 169,
"id": "b731bca6-a760-4860-a27b-a33efa712ce0",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 1071.000000 | \n",
+ " 1071.000000 | \n",
+ " 1071.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 39295.701214 | \n",
+ " 193.234360 | \n",
+ " 404.986909 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 30469.427060 | \n",
+ " 1601.190369 | \n",
+ " 293.027260 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 0.000000 | \n",
+ " 61.000000 | \n",
+ " 0.382107 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 14072.000000 | \n",
+ " 68.000000 | \n",
+ " 202.157702 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 36234.000000 | \n",
+ " 83.000000 | \n",
+ " 354.729129 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 64631.000000 | \n",
+ " 109.500000 | \n",
+ " 532.800000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 99960.000000 | \n",
+ " 35354.000000 | \n",
+ " 2893.239678 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Income Monthly Premium Auto Total Claim Amount\n",
+ "count 1071.000000 1071.000000 1071.000000\n",
+ "mean 39295.701214 193.234360 404.986909\n",
+ "std 30469.427060 1601.190369 293.027260\n",
+ "min 0.000000 61.000000 0.382107\n",
+ "25% 14072.000000 68.000000 202.157702\n",
+ "50% 36234.000000 83.000000 354.729129\n",
+ "75% 64631.000000 109.500000 532.800000\n",
+ "max 99960.000000 35354.000000 2893.239678"
+ ]
+ },
+ "execution_count": 169,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 183,
+ "id": "20fd3459-ed16-4492-b376-47cd2ab4a804",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 264.000000\n",
+ "mean 782.228263\n",
+ "std 292.751640\n",
+ "min 537.600000\n",
+ "25% 606.521741\n",
+ "50% 679.597985\n",
+ "75% 851.400000\n",
+ "max 2893.239678\n",
+ "Name: Total Claim Amount, dtype: float64"
+ ]
+ },
+ "execution_count": 183,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here"
+ "customers_high_claim = df.loc[df[\"Total Claim Amount\"] > 532.80]\n",
+ "customers_high_claim[\"Total Claim Amount\"].describe()"
]
}
],
@@ -251,7 +934,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.12.4"
}
},
"nbformat": 4,