From a7baad06428624a7a92050ec55044d150ff6b3b5 Mon Sep 17 00:00:00 2001
From: claragal <claragalfer@gmail.com>
Date: Sat, 23 Nov 2024 15:45:02 +0100
Subject: [PATCH 1/2] Lab solved

---
 lab-dw-pandas.ipynb | 392 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 385 insertions(+), 7 deletions(-)
diff --git a/lab-dw-pandas.ipynb b/lab-dw-pandas.ipynb
index fbd468314..39df433f2 100644
--- a/lab-dw-pandas.ipynb
+++ b/lab-dw-pandas.ipynb
@@ -82,12 +82,369 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "id": "dd4e8cd8-a6f6-486c-a5c4-1745b0c035f4",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here"
+    "# Your code here\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "url = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv'\n",
+    "insurance_data = pd.read_csv(url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "215cc012",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The dataset contains 4008 rows and 11 columns.\n"
+     ]
+    }
+   ],
+   "source": [
+    "rows, columns = insurance_data.shape\n",
+    "print(f\"The dataset contains {rows} rows and {columns} columns.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "c1a82b79",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Customer                      object\n",
+      "ST                            object\n",
+      "GENDER                        object\n",
+      "Education                     object\n",
+      "Customer Lifetime Value       object\n",
+      "Income                       float64\n",
+      "Monthly Premium Auto         float64\n",
+      "Number of Open Complaints     object\n",
+      "Policy Type                   object\n",
+      "Vehicle Class                 object\n",
+      "Total Claim Amount           float64\n",
+      "dtype: object\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(insurance_data.dtypes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "88ba8bc2",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Customer</th>\n",
+       "      <th>ST</th>\n",
+       "      <th>GENDER</th>\n",
+       "      <th>Education</th>\n",
+       "      <th>Customer Lifetime Value</th>\n",
+       "      <th>Income</th>\n",
+       "      <th>Monthly Premium Auto</th>\n",
+       "      <th>Number of Open Complaints</th>\n",
+       "      <th>Policy Type</th>\n",
+       "      <th>Vehicle Class</th>\n",
+       "      <th>Total Claim Amount</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>RB50392</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Master</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1000.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>2.704934</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>QZ44356</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>697953.59%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>94.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>1131.464935</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AI49188</td>\n",
+       "      <td>Nevada</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>1288743.17%</td>\n",
+       "      <td>48767.0</td>\n",
+       "      <td>108.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Two-Door Car</td>\n",
+       "      <td>566.472247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>WW63253</td>\n",
+       "      <td>California</td>\n",
+       "      <td>M</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>764586.18%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>529.881344</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>GA49547</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>M</td>\n",
+       "      <td>High School or Below</td>\n",
+       "      <td>536307.65%</td>\n",
+       "      <td>36357.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>17.269323</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  Customer          ST GENDER             Education Customer Lifetime Value  \\\n",
+       "0  RB50392  Washington    NaN                Master                     NaN   \n",
+       "1  QZ44356     Arizona      F              Bachelor              697953.59%   \n",
+       "2  AI49188      Nevada      F              Bachelor             1288743.17%   \n",
+       "3  WW63253  California      M              Bachelor              764586.18%   \n",
+       "4  GA49547  Washington      M  High School or Below              536307.65%   \n",
+       "\n",
+       "    Income  Monthly Premium Auto Number of Open Complaints     Policy Type  \\\n",
+       "0      0.0                1000.0                    1/0/00   Personal Auto   \n",
+       "1      0.0                  94.0                    1/0/00   Personal Auto   \n",
+       "2  48767.0                 108.0                    1/0/00   Personal Auto   \n",
+       "3      0.0                 106.0                    1/0/00  Corporate Auto   \n",
+       "4  36357.0                  68.0                    1/0/00   Personal Auto   \n",
+       "\n",
+       "   Vehicle Class  Total Claim Amount  \n",
+       "0  Four-Door Car            2.704934  \n",
+       "1  Four-Door Car         1131.464935  \n",
+       "2   Two-Door Car          566.472247  \n",
+       "3            SUV          529.881344  \n",
+       "4  Four-Door Car           17.269323  "
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "insurance_data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "aa69e012",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Customer</th>\n",
+       "      <th>ST</th>\n",
+       "      <th>GENDER</th>\n",
+       "      <th>Education</th>\n",
+       "      <th>Customer Lifetime Value</th>\n",
+       "      <th>Income</th>\n",
+       "      <th>Monthly Premium Auto</th>\n",
+       "      <th>Number of Open Complaints</th>\n",
+       "      <th>Policy Type</th>\n",
+       "      <th>Vehicle Class</th>\n",
+       "      <th>Total Claim Amount</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>4003</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4004</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4005</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4006</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4007</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     Customer   ST GENDER Education Customer Lifetime Value  Income  \\\n",
+       "4003      NaN  NaN    NaN       NaN                     NaN     NaN   \n",
+       "4004      NaN  NaN    NaN       NaN                     NaN     NaN   \n",
+       "4005      NaN  NaN    NaN       NaN                     NaN     NaN   \n",
+       "4006      NaN  NaN    NaN       NaN                     NaN     NaN   \n",
+       "4007      NaN  NaN    NaN       NaN                     NaN     NaN   \n",
+       "\n",
+       "      Monthly Premium Auto Number of Open Complaints Policy Type  \\\n",
+       "4003                   NaN                       NaN         NaN   \n",
+       "4004                   NaN                       NaN         NaN   \n",
+       "4005                   NaN                       NaN         NaN   \n",
+       "4006                   NaN                       NaN         NaN   \n",
+       "4007                   NaN                       NaN         NaN   \n",
+       "\n",
+       "     Vehicle Class  Total Claim Amount  \n",
+       "4003           NaN                 NaN  \n",
+       "4004           NaN                 NaN  \n",
+       "4005           NaN                 NaN  \n",
+       "4006           NaN                 NaN  \n",
+       "4007           NaN                 NaN  "
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "insurance_data.tail()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0bc13307",
+   "metadata": {},
+   "source": [
+    "The data types of the following variables could be fixed/changed:\n",
+    "\n",
+    "\n",
+    "Customer Lifetime Value - could be changed as a float. However, the % symbol should be removed and we have to convert the values to a decimal form"
    ]
   },
   {
@@ -116,12 +473,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "2dca5073-4520-4f42-9390-4b92733284ed",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ST\n",
+      "AZ             25\n",
+      "WA             30\n",
+      "Washington     81\n",
+      "Nevada         98\n",
+      "Cali          120\n",
+      "Name: count, dtype: int64\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "# Your code here\n",
+    "# Count the frequencies of each customer location (State)\n",
+    "location_counts = insurance_data['ST'].value_counts()\n",
+    "\n",
+    "# Get the top 5 less common locations in ascending order\n",
+    "top_5_less_common_locations = location_counts.nsmallest(5)\n",
+    "\n",
+    "print(top_5_less_common_locations)"
    ]
   },
   {
@@ -237,7 +615,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -251,7 +629,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.12.6"
   }
  },
  "nbformat": 4,

From bea1cb40e3f5189e22382e38415864cace3a57b9 Mon Sep 17 00:00:00 2001
From: claragal <claragalfer@gmail.com>
Date: Sat, 23 Nov 2024 16:37:17 +0100
Subject: [PATCH 2/2] lab okey

---
 lab-dw-pandas.ipynb | 195 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 167 insertions(+), 28 deletions(-)

diff --git a/lab-dw-pandas.ipynb b/lab-dw-pandas.ipynb
index 39df433f2..7f641ce36 100644
--- a/lab-dw-pandas.ipynb
+++ b/lab-dw-pandas.ipynb
@@ -82,7 +82,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 10,
    "id": "dd4e8cd8-a6f6-486c-a5c4-1745b0c035f4",
    "metadata": {},
    "outputs": [],
@@ -91,12 +91,12 @@
     "import numpy as np\n",
     "import pandas as pd\n",
     "url = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv'\n",
-    "insurance_data = pd.read_csv(url)"
+    "data = pd.read_csv(url)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 11,
    "id": "215cc012",
    "metadata": {},
    "outputs": [
@@ -109,13 +109,13 @@
     }
    ],
    "source": [
-    "rows, columns = insurance_data.shape\n",
+    "rows, columns = data.shape\n",
     "print(f\"The dataset contains {rows} rows and {columns} columns.\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 12,
    "id": "c1a82b79",
    "metadata": {},
    "outputs": [
@@ -139,12 +139,12 @@
     }
    ],
    "source": [
-    "print(insurance_data.dtypes)"
+    "print(data.dtypes)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 13,
    "id": "88ba8bc2",
    "metadata": {},
    "outputs": [
@@ -280,18 +280,18 @@
        "4  Four-Door Car           17.269323  "
       ]
      },
-     "execution_count": 17,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "insurance_data.head()"
+    "data.head()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 14,
    "id": "aa69e012",
    "metadata": {},
    "outputs": [
@@ -427,13 +427,13 @@
        "4007           NaN                 NaN  "
       ]
      },
-     "execution_count": 18,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "insurance_data.tail()"
+    "data.tail()"
    ]
   },
   {
@@ -447,6 +447,69 @@
     "Customer Lifetime Value - could be changed as a float. However, the % symbol should be removed and we have to convert the values to a decimal form"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "89a2a9bb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Customer                     1071\n",
+      "ST                              8\n",
+      "GENDER                          5\n",
+      "Education                       6\n",
+      "Customer Lifetime Value      1027\n",
+      "Income                        774\n",
+      "Monthly Premium Auto          132\n",
+      "Number of Open Complaints       6\n",
+      "Policy Type                     3\n",
+      "Vehicle Class                   6\n",
+      "Total Claim Amount            761\n",
+      "dtype: int64\n",
+      "\n",
+      "Categorical Columns Unique Values:\n",
+      "Customer: ['RB50392' 'QZ44356' 'AI49188' ... 'CW49887' 'MY31220' nan]\n",
+      "ST: ['Washington' 'Arizona' 'Nevada' 'California' 'Oregon' 'Cali' 'AZ' 'WA'\n",
+      " nan]\n",
+      "GENDER: [nan 'F' 'M' 'Femal' 'Male' 'female']\n",
+      "Education: ['Master' 'Bachelor' 'High School or Below' 'College' 'Bachelors' 'Doctor'\n",
+      " nan]\n",
+      "Customer Lifetime Value: [nan '697953.59%' '1288743.17%' ... '2031499.76%' '323912.47%'\n",
+      " '899704.02%']\n",
+      "Number of Open Complaints: ['1/0/00' '1/2/00' '1/1/00' '1/3/00' '1/5/00' '1/4/00' nan]\n",
+      "Policy Type: ['Personal Auto' 'Corporate Auto' 'Special Auto' nan]\n",
+      "Vehicle Class: ['Four-Door Car' 'Two-Door Car' 'SUV' 'Luxury SUV' 'Sports Car'\n",
+      " 'Luxury Car' nan]\n",
+      "\n",
+      "Numerical Columns Range:\n",
+      "Income: Min = 0.0, Max = 99960.0\n",
+      "Monthly Premium Auto: Min = 61.0, Max = 35354.0\n",
+      "Total Claim Amount: Min = 0.382107, Max = 2893.239678\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Count unique values for each column\n",
+    "print(data.nunique())\n",
+    "\n",
+    "# Identify categorical and numerical columns\n",
+    "categorical_columns = data.select_dtypes(include=['object']).columns\n",
+    "numerical_columns = data.select_dtypes(include=['number']).columns\n",
+    "\n",
+    "# Show unique values for categorical columns\n",
+    "print(\"\\nCategorical Columns Unique Values:\")\n",
+    "for column in categorical_columns:\n",
+    "    print(f\"{column}: {data[column].unique()}\")\n",
+    "\n",
+    "# Show range of values for numerical columns\n",
+    "print(\"\\nNumerical Columns Range:\")\n",
+    "for column in numerical_columns:\n",
+    "    print(f\"{column}: Min = {data[column].min()}, Max = {data[column].max()}\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "4a703890-63db-4944-b7ab-95a4f8185120",
@@ -473,7 +536,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 21,
    "id": "2dca5073-4520-4f42-9390-4b92733284ed",
    "metadata": {},
    "outputs": [
@@ -492,13 +555,13 @@
     }
    ],
    "source": [
-    "# Your code here\n",
-    "# Count the frequencies of each customer location (State)\n",
-    "location_counts = insurance_data['ST'].value_counts()\n",
+    "# Step 1: Count the occurrences of each location (State) and sort by ascending order\n",
+    "location_counts = data['ST'].value_counts().sort_values(ascending=True)\n",
     "\n",
-    "# Get the top 5 less common locations in ascending order\n",
-    "top_5_less_common_locations = location_counts.nsmallest(5)\n",
+    "# Step 2: Retrieve the top 5 less common locations\n",
+    "top_5_less_common_locations = location_counts.head(5)\n",
     "\n",
+    "# Step 3: Display the result\n",
     "print(top_5_less_common_locations)"
    ]
   },
@@ -524,12 +587,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "id": "bcfad6c1-9af2-4b0b-9aa9-0dc5c17473c0",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Policy Type with Highest Number of Policies Sold:\n",
+      "Personal Auto\n",
+      "\n",
+      "Total Number of Policies Sold for Each Type:\n",
+      "Policy Type\n",
+      "Personal Auto     780\n",
+      "Corporate Auto    234\n",
+      "Special Auto       57\n",
+      "Name: count, dtype: int64\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "# Step 1: Count the occurrences of each policy type\n",
+    "policy_counts = data['Policy Type'].value_counts()\n",
+    "\n",
+    "# Step 2: Retrieve the policy type with the highest number of policies sold\n",
+    "policy_with_highest_sales = policy_counts.idxmax()\n",
+    "\n",
+    "# Step 3: Display the results\n",
+    "print(\"Policy Type with Highest Number of Policies Sold:\")\n",
+    "print(policy_with_highest_sales)\n",
+    "print(\"\\nTotal Number of Policies Sold for Each Type:\")\n",
+    "print(policy_counts)"
    ]
   },
   {
@@ -554,12 +643,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "id": "0c0563cf-6f8b-463d-a321-651a972f82e5",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Average Income for Personal Auto Policy: 38180.69871794872\n",
+      "Average Income for Corporate Auto Policy: 41390.31196581197\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "# Step 1: Filter the dataset into two separate dataframes\n",
+    "personal_auto_df = data.loc[data['Policy Type'] == 'Personal Auto']\n",
+    "corporate_auto_df = data.loc[data['Policy Type'] == 'Corporate Auto']\n",
+    "\n",
+    "# Step 2: Calculate the average income for each policy type\n",
+    "average_income_personal_auto = personal_auto_df['Income'].mean()\n",
+    "average_income_corporate_auto = corporate_auto_df['Income'].mean()\n",
+    "\n",
+    "# Step 3: Print the results\n",
+    "print(f\"Average Income for Personal Auto Policy: {average_income_personal_auto}\")\n",
+    "print(f\"Average Income for Corporate Auto Policy: {average_income_corporate_auto}\")"
    ]
   },
   {
@@ -604,12 +712,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "id": "b731bca6-a760-4860-a27b-a33efa712ce0",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Summary Statistics for High Policy Claim Amount Customers:\n",
+      "count     264.000000\n",
+      "mean      782.228263\n",
+      "std       292.751640\n",
+      "min       537.600000\n",
+      "25%       606.521741\n",
+      "50%       679.597985\n",
+      "75%       851.400000\n",
+      "max      2893.239678\n",
+      "Name: Total Claim Amount, dtype: float64\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "# Step 1: Calculate the 75th percentile (top 25%)\n",
+    "top_25_percentile = data['Total Claim Amount'].quantile(0.75)\n",
+    "\n",
+    "# Step 2: Create a Boolean mask that checks if Total Claim Amount > 75th percentile\n",
+    "high_claims_mask = data['Total Claim Amount'] > top_25_percentile\n",
+    "\n",
+    "# Step 3: Apply the mask to filter the DataFrame\n",
+    "high_claims_df = data[high_claims_mask]\n",
+    "\n",
+    "# Step 4: Display summary statistics for the high claim amount data\n",
+    "high_claims_summary = high_claims_df['Total Claim Amount'].describe()\n",
+    "\n",
+    "# Step 5: Print the results\n",
+    "print(\"Summary Statistics for High Policy Claim Amount Customers:\")\n",
+    "print(high_claims_summary)"
    ]
   }
  ],
@@ -629,7 +768,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.6"
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,

	Customer	ST	GENDER	Education	Customer Lifetime Value	Income	Monthly Premium Auto	Number of Open Complaints	Policy Type	Vehicle Class	Total Claim Amount
0	RB50392	Washington	NaN	Master	NaN	0.0	1000.0	1/0/00	Personal Auto	Four-Door Car	2.704934
1	QZ44356	Arizona	F	Bachelor	697953.59%	0.0	94.0	1/0/00	Personal Auto	Four-Door Car	1131.464935
2	AI49188	Nevada	F	Bachelor	1288743.17%	48767.0	108.0	1/0/00	Personal Auto	Two-Door Car	566.472247
3	WW63253	California	M	Bachelor	764586.18%	0.0	106.0	1/0/00	Corporate Auto	SUV	529.881344
4	GA49547	Washington	M	High School or Below	536307.65%	36357.0	68.0	1/0/00	Personal Auto	Four-Door Car	17.269323
	Customer	ST	GENDER	Education	Customer Lifetime Value	Income	Monthly Premium Auto	Number of Open Complaints	Policy Type	Vehicle Class	Total Claim Amount
4003	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4004	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4005	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4006	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4007	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN