From 3c210d4f0ef209e20ddd7f50c2c2547ab38f92e1 Mon Sep 17 00:00:00 2001 From: Charlotte Guedj Date: Wed, 6 Nov 2024 21:01:21 +0100 Subject: [PATCH] lab-dw-pandas --- .../lab-dw-pandas-checkpoint.ipynb | 4028 +++++++++++++++++ lab-dw-pandas.ipynb | 3799 +++++++++++++++- 2 files changed, 7812 insertions(+), 15 deletions(-) create mode 100644 .ipynb_checkpoints/lab-dw-pandas-checkpoint.ipynb diff --git a/.ipynb_checkpoints/lab-dw-pandas-checkpoint.ipynb b/.ipynb_checkpoints/lab-dw-pandas-checkpoint.ipynb new file mode 100644 index 000000000..049ec7fa8 --- /dev/null +++ b/.ipynb_checkpoints/lab-dw-pandas-checkpoint.ipynb @@ -0,0 +1,4028 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "25d7736c-ba17-4aff-b6bb-66eba20fbf4e", + "metadata": {}, + "source": [ + "# Lab | Pandas" + ] + }, + { + "cell_type": "markdown", + "id": "d1973e9e-8be6-4039-b70e-d73ee0d94c99", + "metadata": {}, + "source": [ + "In this lab, we will be working with the customer data from an insurance company, which can be found in the CSV file located at the following link: https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\n", + "\n", + "The data includes information such as customer ID, state, gender, education, income, and other variables that can be used to perform various analyses.\n", + "\n", + "Throughout the lab, we will be using the pandas library in Python to manipulate and analyze the data. Pandas is a powerful library that provides various data manipulation and analysis tools, including the ability to load and manipulate data from a variety of sources, including CSV files." + ] + }, + { + "cell_type": "markdown", + "id": "8045146f-f4f7-44d9-8cd9-130d6400c73a", + "metadata": {}, + "source": [ + "### Data Description\n", + "\n", + "- Customer - Customer ID\n", + "\n", + "- ST - State where customers live\n", + "\n", + "- Gender - Gender of the customer\n", + "\n", + "- Education - Background education of customers \n", + "\n", + "- Customer Lifetime Value - Customer lifetime value(CLV) is the total revenue the client will derive from their entire relationship with a customer. In other words, is the predicted or calculated value of a customer over their entire duration as a policyholder with the insurance company. It is an estimation of the net profit that the insurance company expects to generate from a customer throughout their relationship with the company. Customer Lifetime Value takes into account factors such as the duration of the customer's policy, premium payments, claim history, renewal likelihood, and potential additional services or products the customer may purchase. It helps insurers assess the long-term profitability and value associated with retaining a particular customer.\n", + "\n", + "- Income - Customers income\n", + "\n", + "- Monthly Premium Auto - Amount of money the customer pays on a monthly basis as a premium for their auto insurance coverage. It represents the recurring cost that the insured person must pay to maintain their insurance policy and receive coverage for potential damages, accidents, or other covered events related to their vehicle.\n", + "\n", + "- Number of Open Complaints - Number of complaints the customer opened\n", + "\n", + "- Policy Type - There are three type of policies in car insurance (Corporate Auto, Personal Auto, and Special Auto)\n", + "\n", + "- Vehicle Class - Type of vehicle classes that customers have Two-Door Car, Four-Door Car SUV, Luxury SUV, Sports Car, and Luxury Car\n", + "\n", + "- Total Claim Amount - the sum of all claims made by the customer. It represents the total monetary value of all approved claims for incidents such as accidents, theft, vandalism, or other covered events.\n" + ] + }, + { + "cell_type": "markdown", + "id": "3a72419b-20fc-4905-817a-8c83abc59de6", + "metadata": {}, + "source": [ + "External Resources: https://towardsdatascience.com/filtering-data-frames-in-pandas-b570b1f834b9" + ] + }, + { + "cell_type": "markdown", + "id": "8f8ece17-e919-4e23-96c0-c7c59778436a", + "metadata": {}, + "source": [ + "## Challenge 1: Understanding the data\n", + "\n", + "In this challenge, you will use pandas to explore a given dataset. Your task is to gain a deep understanding of the data by analyzing its characteristics, dimensions, and statistical properties." + ] + }, + { + "cell_type": "markdown", + "id": "91437bd5-59a6-49c0-8150-ef0e6e6eb253", + "metadata": {}, + "source": [ + "- Identify the dimensions of the dataset by determining the number of rows and columns it contains.\n", + "- Determine the data types of each column and evaluate whether they are appropriate for the nature of the variable. You should also provide suggestions for fixing any incorrect data types.\n", + "- Identify the number of unique values for each column and determine which columns appear to be categorical. You should also describe the unique values of each categorical column and the range of values for numerical columns, and give your insights.\n", + "- Compute summary statistics such as mean, median, mode, standard deviation, and quartiles to understand the central tendency and distribution of the data for numerical columns. You should also provide your conclusions based on these summary statistics.\n", + "- Compute summary statistics for categorical columns and providing your conclusions based on these statistics." + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "dd4e8cd8-a6f6-486c-a5c4-1745b0c035f4", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
....................................
4003NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4004NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4005NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4006NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4007NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

4008 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "4003 NaN NaN NaN NaN \n", + "4004 NaN NaN NaN NaN \n", + "4005 NaN NaN NaN NaN \n", + "4006 NaN NaN NaN NaN \n", + "4007 NaN NaN NaN NaN \n", + "\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "0 NaN 0.0 1000.0 \n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " Number of Open Complaints Policy Type Vehicle Class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " Total Claim Amount \n", + "0 2.704934 \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "... ... \n", + "4003 NaN \n", + "4004 NaN \n", + "4005 NaN \n", + "4006 NaN \n", + "4007 NaN \n", + "\n", + "[4008 rows x 11 columns]" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd \n", + "\n", + "URL = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\"\n", + "\n", + "df = pd.read_csv(URL) \n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "47349815-10da-4b5e-b816-d49a76ea7971", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Customer 2937\n", + "ST 2937\n", + "GENDER 3054\n", + "Education 2937\n", + "Customer Lifetime Value 2940\n", + "Income 2937\n", + "Monthly Premium Auto 2937\n", + "Number of Open Complaints 2937\n", + "Policy Type 2937\n", + "Vehicle Class 2937\n", + "Total Claim Amount 2937\n", + "dtype: int64" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Nan_values = df.isnull().sum()\n", + "Nan_values " + ] + }, + { + "cell_type": "markdown", + "id": "59ccf4f5-19c9-441a-91b4-12020fa64445", + "metadata": {}, + "source": [ + "- Identify the dimensions of the dataset by determining the number of rows and columns it contains.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "2b32f0bb-48d8-400b-8066-06c596c9d870", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The dataset contains 4008 rows and 11 columns.\n" + ] + } + ], + "source": [ + "rows, columns = df.shape\n", + "print(f\"The dataset contains {rows} rows and {columns} columns.\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "017c805f-ee46-4970-8309-b4b927603f6d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
5OC83172OregonFBachelor825629.78%62902.069.01/0/00Personal AutoTwo-Door Car159.383042
....................................
1066TM65736OregonMMaster305955.03%38644.078.01/1/00Personal AutoFour-Door Car361.455219
1067VJ51327CaliFHigh School or Below2031499.76%63209.0102.01/2/00Personal AutoSUV207.320041
1068GS98873ArizonaFBachelor323912.47%16061.088.01/0/00Personal AutoFour-Door Car633.600000
1069CW49887CaliforniaFMaster462680.11%79487.0114.01/0/00Special AutoSUV547.200000
1070MY31220CaliforniaFCollege899704.02%54230.0112.01/0/00Personal AutoTwo-Door Car537.600000
\n", + "

952 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education \\\n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "5 OC83172 Oregon F Bachelor \n", + "... ... ... ... ... \n", + "1066 TM65736 Oregon M Master \n", + "1067 VJ51327 Cali F High School or Below \n", + "1068 GS98873 Arizona F Bachelor \n", + "1069 CW49887 California F Master \n", + "1070 MY31220 California F College \n", + "\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "5 825629.78% 62902.0 69.0 \n", + "... ... ... ... \n", + "1066 305955.03% 38644.0 78.0 \n", + "1067 2031499.76% 63209.0 102.0 \n", + "1068 323912.47% 16061.0 88.0 \n", + "1069 462680.11% 79487.0 114.0 \n", + "1070 899704.02% 54230.0 112.0 \n", + "\n", + " Number of Open Complaints Policy Type Vehicle Class \\\n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "5 1/0/00 Personal Auto Two-Door Car \n", + "... ... ... ... \n", + "1066 1/1/00 Personal Auto Four-Door Car \n", + "1067 1/2/00 Personal Auto SUV \n", + "1068 1/0/00 Personal Auto Four-Door Car \n", + "1069 1/0/00 Special Auto SUV \n", + "1070 1/0/00 Personal Auto Two-Door Car \n", + "\n", + " Total Claim Amount \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "5 159.383042 \n", + "... ... \n", + "1066 361.455219 \n", + "1067 207.320041 \n", + "1068 633.600000 \n", + "1069 547.200000 \n", + "1070 537.600000 \n", + "\n", + "[952 rows x 11 columns]" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df= df.dropna()\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "a4fd9658-bc8e-47b7-8700-d2fff532e239", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Customer 0\n", + "ST 0\n", + "GENDER 0\n", + "Education 0\n", + "Customer Lifetime Value 0\n", + "Income 0\n", + "Monthly Premium Auto 0\n", + "Number of Open Complaints 0\n", + "Policy Type 0\n", + "Vehicle Class 0\n", + "Total Claim Amount 0\n", + "dtype: int64" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_null = df.isnull().sum()\n", + "df_null" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "4644ba16-2a7e-4de3-964f-606defd1c342", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(952, 11)\n" + ] + } + ], + "source": [ + "print(df.shape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "68f57ba7-9be6-40e7-9ee9-7793938ff673", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
5OC83172OregonFBachelor825629.78%62902.069.01/0/00Personal AutoTwo-Door Car159.383042
\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value \\\n", + "1 QZ44356 Arizona F Bachelor 697953.59% \n", + "2 AI49188 Nevada F Bachelor 1288743.17% \n", + "3 WW63253 California M Bachelor 764586.18% \n", + "4 GA49547 Washington M High School or Below 536307.65% \n", + "5 OC83172 Oregon F Bachelor 825629.78% \n", + "\n", + " Income Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "1 0.0 94.0 1/0/00 Personal Auto \n", + "2 48767.0 108.0 1/0/00 Personal Auto \n", + "3 0.0 106.0 1/0/00 Corporate Auto \n", + "4 36357.0 68.0 1/0/00 Personal Auto \n", + "5 62902.0 69.0 1/0/00 Personal Auto \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "3 SUV 529.881344 \n", + "4 Four-Door Car 17.269323 \n", + "5 Two-Door Car 159.383042 " + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "77ab57bc-9681-4631-beb7-102344802cf5", + "metadata": {}, + "source": [ + "- Determine the data types of each column and evaluate whether they are appropriate for the nature of the variable. You should also provide suggestions for fixing any incorrect data types.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "a2faff4a-9956-40f1-af46-bca592411911", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer object\n", + "ST object\n", + "GENDER object\n", + "Education object\n", + "Customer Lifetime Value object\n", + "Income float64\n", + "Monthly Premium Auto float64\n", + "Number of Open Complaints object\n", + "Policy Type object\n", + "Vehicle Class object\n", + "Total Claim Amount float64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "data_types = df.dtypes\n", + "print(data_types)\n", + "\n", + "#Customer Lifetime Value should be float \n", + "#Number of Open Complaints\tshould be int" + ] + }, + { + "cell_type": "markdown", + "id": "4c8c9eac-7fe9-4ccb-9b82-20a6c74620d7", + "metadata": {}, + "source": [ + "- Identify the number of unique values for each column and determine which columns appear to be categorical. You should also describe the unique values of each categorical column and the range of values for numerical columns, and give your insights.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "bb84b80c-7cf9-46d7-b53b-1ec7015233f6", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer:\n", + "['QZ44356' 'AI49188' 'WW63253' 'GA49547' 'OC83172' 'XZ87318' 'CF85061'\n", + " 'DY87989' 'BQ94931' 'SX51350' 'QK46697' 'HS14476' 'HD95276' 'YD87931'\n", + " 'NW21079' 'YB66933' 'OW15518' 'GP39118' 'SG20925' 'FM14335' 'HS28694'\n", + " 'LH92841' 'AZ95587' 'DS81757' 'OJ94107' 'LP84436' 'FF22360' 'LM19287'\n", + " 'ZU18643' 'AZ82578' 'XC67861' 'YC43143' 'EK59571' 'PA38372' 'RO18530'\n", + " 'PD27940' 'BS77946' 'YM50253' 'NR15332' 'RC62865' 'CC15295' 'KA61892'\n", + " 'OS94884' 'ND87334' 'OY51402' 'YL74911' 'GK92563' 'HL53154' 'RI78966'\n", + " 'IC13702' 'BE10809' 'HT87217' 'TH95618' 'TS19868' 'LP45550' 'QR87004'\n", + " 'OE75747' 'DX91392' 'AB72731' 'GX84338' 'IS12901' 'BN90616' 'HH90090'\n", + " 'IU25463' 'KC11055' 'PD33979' 'NK71023' 'AB13432' 'OZ97704' 'UF46533'\n", + " 'XP47431' 'GK73582' 'RV98763' 'II62831' 'XK33449' 'TR85083' 'EO95328'\n", + " 'EN21086' 'YL83902' 'AZ62651' 'ZW25874' 'EH41854' 'MW70227' 'SL22297'\n", + " 'RV14138' 'UO62808' 'ZX64745' 'FL34139' 'TS11219' 'XX12304' 'SD64087'\n", + " 'OY38576' 'BG76355' 'IP66913' 'LE95702' 'KX54357' 'EZ78112' 'XN16891'\n", + " 'XK31350' 'CC30924' 'IT78748' 'KY33386' 'CO44221' 'LK60013' 'DE21533'\n", + " 'YS94121' 'UK68427' 'TE49565' 'RA88421' 'KQ51983' 'CD88896' 'YV22553'\n", + " 'WU14435' 'XV84099' 'RI24911' 'KO26461' 'HI14283' 'PT50227' 'BH36570'\n", + " 'TX17484' 'CT41158' 'AO87348' 'DE55857' 'LF66923' 'CN24514' 'UW32074'\n", + " 'HP36979' 'PP40919' 'RO73268' 'HO61691' 'BS13062' 'FO35655' 'HR10526'\n", + " 'IA63417' 'BH35016' 'PK52952' 'OD76309' 'IL28481' 'GY55092' 'UF33451'\n", + " 'CF15558' 'JM62924' 'EM66435' 'QX45933' 'JI71369' 'JU93290' 'GU66096'\n", + " 'UC33108' 'LW93867' 'OU78470' 'XW90265' 'HS67749' 'VZ51506' 'UI64281'\n", + " 'AE98193' 'AZ74055' 'XS76911' 'AY40674' 'NA12740' 'UA84837' 'DJ51510'\n", + " 'VM58985' 'OH60605' 'UO98052' 'NC53424' 'LQ13873' 'LA97014' 'NB79936'\n", + " 'NT89061' 'AF10970' 'ZG48513' 'JQ59145' 'FE84989' 'JT52858' 'MC62068'\n", + " 'EU27538' 'RH42306' 'US23612' 'WV76014' 'RK96223' 'MF82000' 'FM46980'\n", + " 'SY56792' 'RF61565' 'IM94808' 'VI14730' 'YR34119' 'RR77985' 'QD28391'\n", + " 'WV17090' 'TM23514' 'MQ68407' 'GJ59592' 'FY56083' 'UA94723' 'FW91032'\n", + " 'DE34457' 'HD32044' 'HH30454' 'AH84063' 'QA17596' 'XI41052' 'DI30528'\n", + " 'SC66359' 'EN61670' 'DQ10761' 'BQ51587' 'JE21522' 'WS47147' 'ZA64638'\n", + " 'EW38459' 'QW87316' 'IC43478' 'TE34064' 'WU60905' 'YM18992' 'PD55753'\n", + " 'KU56006' 'MJ69973' 'TW43626' 'XX84133' 'ZW84453' 'HO29524' 'VE89726'\n", + " 'GE87503' 'PX90263' 'NI17718' 'FY32213' 'RZ13254' 'GN45013' 'NM39588'\n", + " 'KU84464' 'YH43527' 'RO30676' 'QL59704' 'QH19450' 'SA54664' 'CI38330'\n", + " 'WB38524' 'CE56187' 'JL19416' 'JZ61422' 'LA13377' 'NC99948' 'QD34785'\n", + " 'RO26085' 'ES57969' 'JK55587' 'RN97635' 'BI76326' 'JA34909' 'OJ90342'\n", + " 'CM88932' 'JJ97525' 'XV21647' 'MC83487' 'BL90769' 'CR57148' 'CP85232'\n", + " 'YL74732' 'FG16766' 'NV55438' 'RM10880' 'GL56175' 'UK52289' 'OT85112'\n", + " 'BC62782' 'TI19722' 'JP30654' 'UM45563' 'EN60878' 'JF36291' 'BK59444'\n", + " 'MK70700' 'IW71076' 'AP98768' 'OM24164' 'HR85211' 'VC87846' 'ZM92052'\n", + " 'ON73702' 'QQ90441' 'HU35721' 'YP47665' 'FU99476' 'AG85615' 'OY74069'\n", + " 'DJ91267' 'KB72438' 'TR67616' 'GF65731' 'HB67642' 'DP84567' 'VV77534'\n", + " 'GL67540' 'SV50502' 'UK59698' 'OA57352' 'ZF84449' 'AX86150' 'HG39060'\n", + " 'EM29359' 'SF57173' 'OT47603' 'SW31412' 'JS36322' 'RE81445' 'RM24280'\n", + " 'LC25393' 'UX38930' 'HD95496' 'RX24650' 'DW19309' 'MT41386' 'WZ40465'\n", + " 'DB42794' 'JB50798' 'IP69763' 'TE35785' 'HX74855' 'QN65180' 'GE47180'\n", + " 'VQ38776' 'BH86846' 'IN17648' 'DF95759' 'QG45324' 'MN61620' 'YH86390'\n", + " 'FY13480' 'YH61661' 'NL93182' 'WE68644' 'EZ30498' 'QY74517' 'NM88660'\n", + " 'MZ82036' 'ID20929' 'EY50028' 'TT82373' 'OH64088' 'SK97780' 'IO33050'\n", + " 'XA55917' 'JK32620' 'RQ19236' 'QC47433' 'RA93608' 'XH97711' 'AU96286'\n", + " 'KC17170' 'ZN47335' 'EI46264' 'EK87864' 'GV45403' 'QK31192' 'LU89008'\n", + " 'NS10490' 'KL98495' 'IU96845' 'QL93655' 'PF40592' 'LZ34046' 'JC80093'\n", + " 'YE88490' 'YC80498' 'AI85843' 'XD66024' 'FY51713' 'PH26378' 'WQ18638'\n", + " 'KY14688' 'TC97762' 'QC87108' 'CX12134' 'SM73248' 'CK19789' 'UV12583'\n", + " 'JC11405' 'KA89683' 'BG85305' 'UQ87917' 'XN11823' 'OS46571' 'PX17116'\n", + " 'RP19541' 'ZR25747' 'NQ86532' 'JY27336' 'PB54378' 'SV38190' 'CV24005'\n", + " 'EX28656' 'CF57022' 'GM16780' 'BX94438' 'RM41745' 'XR70252' 'YH92099'\n", + " 'SG81493' 'ZX23819' 'FJ54907' 'CU26127' 'YH60476' 'ZZ97035' 'GE82737'\n", + " 'KY21873' 'UA51318' 'BV55014' 'HX21307' 'LQ68252' 'CR92802' 'SL35268'\n", + " 'RD62882' 'JS42382' 'BT30554' 'VP57424' 'VU19243' 'TA82973' 'GK71720'\n", + " 'OQ61223' 'LL62746' 'JQ56711' 'AW77988' 'QP84605' 'MY97912' 'IB87349'\n", + " 'AW73065' 'BW80872' 'PX70175' 'KF75098' 'IS50283' 'MY64920' 'KN34250'\n", + " 'GN46207' 'KL57176' 'MN94234' 'JY90595' 'HK26543' 'PN86062' 'VW27730'\n", + " 'SH55671' 'MO56878' 'VO38365' 'SV35618' 'RX12347' 'FR55658' 'XS12556'\n", + " 'ZU73588' 'WT43034' 'VM13430' 'TC78849' 'VC34764' 'WO90953' 'IU47468'\n", + " 'KO46064' 'RB34917' 'BI38192' 'PU18983' 'SW79912' 'ES39217' 'KP72427'\n", + " 'UA19178' 'PR53785' 'XF57481' 'CN90378' 'KI56154' 'UI55951' 'FF28650'\n", + " 'FS55302' 'TN79487' 'HG32616' 'UK41984' 'LZ52266' 'PM27367' 'ZK21724'\n", + " 'BH35482' 'QE22757' 'ON77649' 'RN82884' 'CQ75652' 'FF58467' 'BS83666'\n", + " 'WO29605' 'TL77607' 'EZ50606' 'OS39723' 'FN69743' 'XW96958' 'TU92578'\n", + " 'TL43709' 'YE68736' 'OB96537' 'EU68825' 'CC31456' 'DJ77787' 'LN26837'\n", + " 'YI92916' 'NW54906' 'ME77513' 'UK76891' 'SI26888' 'YD74948' 'HB64268'\n", + " 'BW52697' 'NL41409' 'OD69005' 'ZZ91716' 'UK70255' 'QT25383' 'AW18068'\n", + " 'NS45347' 'FV19421' 'XW89091' 'YC11951' 'UY18770' 'RA49085' 'BG84194'\n", + " 'PT64580' 'MR67738' 'DM95829' 'DB75522' 'LM34525' 'WW30771' 'QP65569'\n", + " 'TN50051' 'UO86707' 'JA41698' 'NX18774' 'DA69469' 'CN23147' 'RA68844'\n", + " 'GH42026' 'BD16530' 'JH91579' 'WK23685' 'GR62267' 'PI78084' 'GF97874'\n", + " 'ZH19885' 'UK25655' 'QR45101' 'EL93539' 'EE99484' 'DP46882' 'WP41146'\n", + " 'TK60799' 'DN29808' 'SS59521' 'NG66579' 'TC14209' 'ED50963' 'GP40701'\n", + " 'CP98451' 'NX52648' 'ZC32510' 'NG27780' 'HN95240' 'EB59129' 'RA70851'\n", + " 'PM19162' 'MS59005' 'SU71163' 'BD35676' 'NI44621' 'EW33419' 'HX44948'\n", + " 'DL36983' 'XR87264' 'NN99001' 'XV95530' 'OL97871' 'HQ23708' 'WR63188'\n", + " 'NG82219' 'KU29408' 'RE46783' 'RU94434' 'GI82355' 'VO26340' 'NV61299'\n", + " 'DX31066' 'CY50337' 'TJ20375' 'EP72155' 'JJ76159' 'BG15419' 'AO74776'\n", + " 'HQ82233' 'OL72737' 'ZQ59828' 'NZ15548' 'XK61304' 'EJ44139' 'CM94425'\n", + " 'OV54878' 'JF57282' 'MY37953' 'XP64922' 'WL65572' 'LN50325' 'HJ15383'\n", + " 'KH59823' 'YM79169' 'DR38127' 'PU42145' 'KM33477' 'RI53167' 'OF77789'\n", + " 'YB33445' 'BA17836' 'JS43228' 'BB11622' 'HQ70429' 'WK88044' 'LA80525'\n", + " 'EH16250' 'PU41872' 'HB85743' 'MM71959' 'MB83663' 'KR43119' 'KH24214'\n", + " 'AC40767' 'HP55391' 'EG62398' 'VS19949' 'AM92343' 'GI68556' 'JT11876'\n", + " 'XR64251' 'MK34957' 'GP18756' 'AP23850' 'KQ65521' 'EJ19449' 'QB70027'\n", + " 'QW47320' 'KH64733' 'ON59472' 'HP94242' 'RV15398' 'EA25683' 'PW73754'\n", + " 'MC71942' 'OX72195' 'YQ99152' 'KI19439' 'PM76175' 'US45383' 'GT38956'\n", + " 'SN41301' 'BE62503' 'PA16884' 'NC58480' 'NS39326' 'PN18507' 'EK91340'\n", + " 'JY16280' 'ZW71731' 'ZC24631' 'YR34689' 'RT65829' 'BZ12077' 'WM65373'\n", + " 'NH35059' 'QD38160' 'BM15160' 'VY79030' 'EV19512' 'TE13577' 'WY97929'\n", + " 'YG20683' 'FK75497' 'NE60110' 'TN36521' 'HG33568' 'TW17878' 'ZO83562'\n", + " 'CH97539' 'CV29889' 'MO33320' 'QZ81258' 'NY56352' 'EA27048' 'UT38865'\n", + " 'QC89139' 'LA14484' 'HN57556' 'CV31235' 'WR45726' 'LB25094' 'KW56110'\n", + " 'XO36233' 'ZX86243' 'DW29763' 'CT83377' 'OQ90898' 'GO77248' 'QW33258'\n", + " 'OU79745' 'VZ79886' 'FI92440' 'YG85980' 'QM74621' 'EI71732' 'VN79010'\n", + " 'FI61723' 'OH55411' 'TF10720' 'NW30838' 'CB58476' 'WI69346' 'FS76657'\n", + " 'YX89016' 'PK28821' 'MB51200' 'XG44587' 'FG91922' 'OM99303' 'RV67546'\n", + " 'UJ79253' 'PN98247' 'IB67546' 'OE19087' 'CM95716' 'MW62634' 'QW67581'\n", + " 'SN16059' 'OE51254' 'RM42344' 'GB35238' 'ML82674' 'EI85244' 'DE28132'\n", + " 'TV25678' 'TY26512' 'OB69153' 'QZ77637' 'XN41715' 'QR15857' 'FL69363'\n", + " 'IS30295' 'WA25797' 'NL59519' 'ZU93025' 'DK94262' 'UQ30615' 'OR40060'\n", + " 'DK32872' 'FA46418' 'ER19995' 'KI75855' 'ND41876' 'PN21042' 'GJ43254'\n", + " 'AL46984' 'JP58047' 'ZE85014' 'KU88219' 'UU98729' 'WS82822' 'YB49933'\n", + " 'XC16387' 'XJ96748' 'TM98684' 'AY18433' 'DM74502' 'FT56968' 'OX36896'\n", + " 'BZ65376' 'LN34660' 'JC29295' 'KJ87930' 'XT36360' 'IX35050' 'UN97379'\n", + " 'MR57294' 'UG79499' 'UA50747' 'GL20444' 'SP58110' 'XM91635' 'TV82603'\n", + " 'BB82067' 'JP94676' 'VU53417' 'IW54795' 'RN78170' 'IX55883' 'XM72420'\n", + " 'GC15104' 'RX13282' 'QA85890' 'IR62668' 'AL96740' 'SS48498' 'PE39479'\n", + " 'JH62891' 'FI20423' 'PM13394' 'YV67971' 'QD31377' 'YG10247' 'FE73696'\n", + " 'SW19699' 'QJ40732' 'HM76207' 'NT59303' 'PU41393' 'QO86948' 'QN10888'\n", + " 'VY19543' 'XC15133' 'ST43550' 'FX36546' 'JX68983' 'HX78576' 'ZQ11381'\n", + " 'ON39271' 'SB18278' 'ZT30559' 'XI41106' 'ZS88847' 'RU49126' 'KR62797'\n", + " 'ZJ73220' 'FY62633' 'CU36986' 'WZ53904' 'AA71604' 'TD10493' 'LY97989'\n", + " 'VX39856' 'TP51897' 'QQ89253' 'EI91403' 'QG15435' 'FZ55002' 'HX77930'\n", + " 'UN37063' 'VB87946' 'AB60627' 'TA34903' 'AQ51368' 'NZ26102' 'GB45753'\n", + " 'BV79904' 'OB49075' 'DS97676' 'JO63462' 'NJ10602' 'RS24501' 'VT78274'\n", + " 'SU56153' 'MN20737' 'KL43114' 'YQ15567' 'TR88637' 'TC88986' 'XX88577'\n", + " 'NE49052' 'KX17826' 'CC91503' 'WH32183' 'ES90681' 'DW96592' 'MT23134'\n", + " 'BM69081' 'MB90871' 'QL77686' 'ON77827' 'KP18988' 'TI92884' 'JH73503'\n", + " 'YE97964' 'VA30351' 'PV55726' 'UC88305' 'TS53809' 'ZV32120' 'FB80807'\n", + " 'AS55677' 'WA15684' 'SA50567' 'KJ31611' 'VL37375' 'KN21017' 'PX44289'\n", + " 'AM97901' 'RE42925' 'TR81766' 'CH85057' 'UP71482' 'EG40670' 'HV83672'\n", + " 'MG10140' 'TC44716' 'QO65264' 'EB66698' 'OT52034' 'CH85444' 'PU85769'\n", + " 'UI73201' 'SL50592' 'XP11075' 'SI31236' 'JN26745' 'VK48036' 'JX76668'\n", + " 'DS45802' 'OA96690' 'EM27919' 'QO41043' 'OV50124' 'PR31642' 'BU41599'\n", + " 'TK30357' 'NF31087' 'NH16984' 'OS75493' 'VT63298' 'QS75550' 'SZ16483'\n", + " 'VM92311' 'NJ46849' 'WZ31900' 'RG30482' 'ZM86949' 'QQ39596' 'FH51383'\n", + " 'BJ53923' 'CZ96653' 'FB23788' 'NT43594' 'RJ85627' 'KJ86296' 'PI47776'\n", + " 'MD73554' 'UX92071' 'YG44474' 'UH45301' 'RY92647' 'IK12620' 'GQ66762'\n", + " 'YT69858' 'XD85577' 'TM65736' 'VJ51327' 'GS98873' 'CW49887' 'MY31220']\n", + "\n", + "\n", + "ST:\n", + "['Arizona' 'Nevada' 'California' 'Washington' 'Oregon' 'AZ' 'WA' 'Cali']\n", + "\n", + "\n", + "GENDER:\n", + "['F' 'M' 'Femal' 'Male' 'female']\n", + "\n", + "\n", + "Education:\n", + "['Bachelor' 'High School or Below' 'College' 'Master' 'Bachelors' 'Doctor']\n", + "\n", + "\n", + "Customer Lifetime Value:\n", + "['697953.59%' '1288743.17%' '764586.18%' '536307.65%' '825629.78%'\n", + " '538089.86%' '721610.03%' '2412750.40%' '738817.81%' '473899.20%'\n", + " '617710.93%' '916206.32%' '473787.17%' '495165.61%' '487938.48%'\n", + " '538275.20%' '1595001.95%' '499655.27%' '518579.76%' '1048491.54%'\n", + " '282986.39%' '725595.38%' '1038855.32%' '247012.12%' '561906.85%'\n", + " '904711.92%' '268731.41%' '373150.46%' '366077.03%' '792882.93%'\n", + " '501175.16%' '798825.83%' '388545.64%' '935773.78%' '254068.98%'\n", + " '488516.25%' '975330.71%' '294615.37%' '258111.09%' '351738.58%'\n", + " '974335.01%' '387364.70%' '764928.20%' '228759.69%' '825576.39%'\n", + " '871492.21%' '681923.12%' '741619.73%' '777115.90%' '696834.19%'\n", + " '425028.26%' '1977656.65%' '2134346.60%' '241313.97%' '1536384.72%'\n", + " '845696.19%' '218964.25%' '578018.22%' '463998.16%' '382443.13%'\n", + " '596811.89%' '859033.50%' '407663.47%' '1225260.18%' '1693627.15%'\n", + " '489243.55%' '994230.48%' '373583.81%' '1311752.22%' '457452.41%'\n", + " '547006.06%' '297884.60%' '641096.75%' '447902.31%' '238373.19%'\n", + " '276449.37%' '792010.54%' '688909.80%' '327419.46%' '995170.77%'\n", + " '252155.57%' '2370611.34%' '604702.52%' '1114030.25%' '433406.41%'\n", + " '279974.79%' '792313.66%' '368811.09%' '1206745.60%' '292497.67%'\n", + " '1501409.27%' '927723.38%' '627412.39%' '388664.74%' '438627.76%'\n", + " '1136526.77%' '561096.43%' '291289.20%' '691572.99%' '626266.33%'\n", + " '650339.70%' '800739.94%' '292991.65%' '596955.30%' '547315.99%'\n", + " '564539.67%' '636926.24%' '1183376.73%' '612110.79%' '515936.97%'\n", + " '251459.20%' '866861.13%' '496096.54%' '550413.90%' '750745.54%'\n", + " '3226985.14%' '565703.16%' '506175.79%' '591278.38%' '1518227.98%'\n", + " '1074703.09%' '205062.35%' '246544.49%' '534312.13%' '811982.91%'\n", + " '460526.52%' '640878.56%' '237653.35%' '321107.00%' '509452.23%'\n", + " '2575527.82%' '867222.97%' '804473.07%' '400151.91%' '1670611.70%'\n", + " '854441.11%' '780531.29%' '611275.69%' '477294.38%' '1097909.56%'\n", + " '500426.38%' '1322304.38%' '262331.54%' '1784019.56%' '510611.18%'\n", + " '1793060.45%' '545734.26%' '656364.41%' '481252.52%' '2932804.19%'\n", + " '577352.07%' '684711.89%' '359531.29%' '2285561.21%' '785941.46%'\n", + " '411557.74%' '502963.88%' '482141.85%' '500431.05%' '863005.39%'\n", + " '932208.51%' '1672756.06%' '365253.24%' '615860.12%' '437608.40%'\n", + " '556945.62%' '257651.30%' '834698.32%' '632392.39%' '597314.34%'\n", + " '470667.70%' '809341.03%' '503574.46%' '902786.72%' '728888.48%'\n", + " '1804247.94%' '499206.30%' '910226.78%' '254040.77%' '1294189.19%'\n", + " '236534.86%' '572076.51%' '703553.41%' '263697.77%' '539583.20%'\n", + " '838263.01%' '3116174.52%' '288774.23%' '742584.53%' '2558572.78%'\n", + " '1027260.82%' '437636.36%' '531889.66%' '471976.22%' '442803.16%'\n", + " '587917.61%' '941690.85%' '828815.56%' '3265483.83%' '471945.01%'\n", + " '390347.48%' '545725.97%' '272535.64%' '443397.37%' '533246.27%'\n", + " '231509.50%' '541195.37%' '958733.23%' '2210350.72%' '976494.53%'\n", + " '256715.15%' '265062.28%' '1126436.33%' '216852.35%' '861066.75%'\n", + " '283464.62%' '893013.97%' '553638.70%' '284085.43%' '808288.10%'\n", + " '525473.43%' '511623.76%' '303464.70%' '802489.99%' '1821114.32%'\n", + " '512156.33%' '215017.86%' '559538.99%' '756282.40%' '538585.32%'\n", + " '267805.83%' '942256.79%' '360586.03%' '776259.06%' '2344490.05%'\n", + " '255817.82%' '265438.10%' '254978.61%' '296959.33%' '436312.46%'\n", + " '588430.86%' '527219.16%' '550989.57%' '1631368.35%' '567805.02%'\n", + " '1210120.88%' '245357.08%' '507566.27%' '321497.94%' '1227534.31%'\n", + " '272221.07%' '245744.09%' '355484.53%' '492954.97%' '803645.03%'\n", + " '427691.53%' '3347334.95%' '596058.14%' '4479546.94%' '383211.81%'\n", + " '683793.26%' '528526.82%' '309651.12%' '358971.07%' '258240.85%'\n", + " '340391.94%' '1357567.60%' '343613.43%' '2868582.79%' '450267.97%'\n", + " '618311.15%' '1892933.06%' '555329.58%' '501125.92%' '1044244.63%'\n", + " '219961.78%' '512317.09%' '748431.05%' '261302.31%' '908063.97%'\n", + " '1377097.62%' '287682.29%' '540891.15%' '677030.68%' '2414387.56%'\n", + " '353805.95%' '2909123.94%' '1983420.12%' '473136.70%' '3553784.60%'\n", + " '3461137.90%' '2021630.88%' '1397651.93%' '590408.82%' '559583.50%'\n", + " '229430.36%' '627391.19%' '372672.80%' '265671.31%' '511068.08%'\n", + " '712659.65%' '460163.41%' '915523.97%' '1480805.62%' '890167.84%'\n", + " '573459.82%' '417769.70%' '2777628.91%' '1036434.75%' '785190.14%'\n", + " '1422650.49%' '287543.24%' '504129.96%' '436293.12%' '962452.44%'\n", + " '2191440.55%' '694842.22%' '247152.84%' '2190391.36%' '902882.14%'\n", + " '530375.95%' '2070825.88%' '512376.81%' '949234.30%' '820486.32%'\n", + " '987729.57%' '481500.97%' '627701.17%' '826063.98%' '254945.00%'\n", + " '380392.18%' '863540.35%' '551055.90%' '358588.41%' '488925.28%'\n", + " '275694.17%' '328954.74%' '1093717.85%' '737556.79%' '1011077.82%'\n", + " '511941.43%' '853383.20%' '222476.80%' '804280.38%' '255443.71%'\n", + " '1807394.00%' '243050.66%' '316765.84%' '546560.40%' '1035751.42%'\n", + " '253781.39%' '282194.72%' '775712.81%' '407913.27%' '595554.46%'\n", + " '1415861.36%' '848723.80%' '628547.69%' '1147348.15%' '494263.06%'\n", + " '2359468.02%' '257250.66%' '477055.09%' '675665.14%' '593601.18%'\n", + " '357076.05%' '601996.05%' '542686.40%' '273031.38%' '498268.14%'\n", + " '876926.68%' '422061.35%' '1153750.51%' '588718.20%' '470058.38%'\n", + " '1096395.72%' '252317.12%' '375780.47%' '1294173.35%' '376446.51%'\n", + " '688955.70%' '362345.42%' '758211.38%' '827774.56%' '257645.56%'\n", + " '820538.79%' '1958246.89%' '648152.66%' '259931.09%' '983033.76%'\n", + " '1044265.14%' '3605753.70%' '847003.68%' '827878.65%' '478893.26%'\n", + " '308799.99%' '444373.62%' '798408.65%' '718097.10%' '1565603.43%'\n", + " '2071494.04%' '533735.24%' '505082.62%' '511662.40%' '726873.70%'\n", + " '261661.39%' '373843.62%' '545386.12%' '684615.03%' '617291.42%'\n", + " '1034632.45%' '699700.86%' '419625.77%' '785810.98%' '606434.40%'\n", + " '1749752.20%' '897064.73%' '592311.72%' '3585059.94%' '870984.53%'\n", + " '1330933.52%' '452850.49%' '279190.65%' '443441.12%' '799600.75%'\n", + " '512973.90%' '569717.52%' '921713.06%' '1020892.76%' '417068.73%'\n", + " '450540.58%' '310756.86%' '552866.50%' '504586.67%' '296272.25%'\n", + " '866595.64%' '1141344.12%' '1548843.20%' '886114.95%' '593474.15%'\n", + " '354323.21%' '349002.83%' '368309.99%' '598977.39%' '1250084.30%'\n", + " '860915.82%' '249745.51%' '701917.72%' '538792.63%' '616555.75%'\n", + " '273020.29%' '516211.69%' '793706.48%' '860815.72%' '263254.58%'\n", + " '778500.42%' '2163983.86%' '498082.50%' '745723.78%' '1064093.93%'\n", + " '686250.83%' '904898.34%' '554803.19%' '831268.16%' '238998.10%'\n", + " '445811.34%' '529574.17%' '383960.61%' '277890.37%' '401654.20%'\n", + " '493094.93%' '249131.70%' '290887.59%' '428294.80%' '834162.37%'\n", + " '509078.13%' '736618.83%' '243687.51%' '885268.87%' '2387547.68%'\n", + " '560049.65%' '463654.65%' '757334.51%' '1469663.55%' '897214.03%'\n", + " '772484.01%' '594667.07%' '800054.51%' '645756.10%' '728144.01%'\n", + " '259243.78%' '467842.34%' '1386992.71%' '871777.78%' '523398.68%'\n", + " '476418.97%' '247246.92%' '281369.26%' '550505.70%' '260620.85%'\n", + " '1048194.38%' '325676.64%' '3047578.05%' '636490.22%' '946850.93%'\n", + " '563145.19%' '778099.93%' '981652.83%' '751913.36%' '1017971.70%'\n", + " '277283.92%' '403750.18%' '419196.61%' '267686.79%' '252395.96%'\n", + " '698840.16%' '874205.78%' '267331.96%' '1215732.99%' '295776.40%'\n", + " '717390.94%' '309953.80%' '841568.46%' '2684312.45%' '1305717.07%'\n", + " '959995.02%' '853510.89%' '829348.19%' '663685.98%' '560908.25%'\n", + " '507732.09%' '527562.70%' '343525.01%' '662461.18%' '575744.23%'\n", + " '288645.16%' '534143.88%' '416001.81%' '284624.54%' '477025.66%'\n", + " '505961.62%' '909574.46%' '268886.40%' '827763.76%' '905793.53%'\n", + " '380175.04%' '933934.16%' '252012.32%' '498409.53%' '259574.80%'\n", + " '743769.33%' '1453678.76%' '591330.59%' '277166.30%' '2919436.64%'\n", + " '988038.58%' '1511440.24%' '575991.08%' '849516.42%' '438118.42%'\n", + " '699782.74%' '1143058.85%' '748248.61%' '859691.66%' '785496.08%'\n", + " '907576.82%' '411858.86%' '515281.96%' '651297.65%' '2778969.24%'\n", + " '266727.00%' '1092840.71%' '761538.13%' '200435.07%' '243468.12%'\n", + " '1419536.03%' '942768.49%' '1198242.09%' '310278.95%' '422263.12%'\n", + " '402381.44%' '529715.18%' '2142363.72%' '441620.62%' '463903.52%'\n", + " '486354.46%' '2583090.98%' '297431.49%' '206445.88%' '1006460.83%'\n", + " '803240.19%' '548921.41%' '261275.67%' '857346.39%' '855038.66%'\n", + " '230864.80%' '425462.07%' '898285.04%' '786816.60%' '770424.87%'\n", + " '1055217.00%' '1604510.95%' '873783.75%' '545489.07%' '770528.33%'\n", + " '703926.24%' '883808.56%' '873352.73%' '959747.48%' '450666.02%'\n", + " '1785797.23%' '249780.82%' '542613.62%' '799814.38%' '289762.07%'\n", + " '1159950.22%' '1514793.06%' '543576.78%' '1329771.23%' '992704.97%'\n", + " '1777154.90%' '1826927.02%' '708321.24%' '588950.91%' '1577139.34%'\n", + " '528817.33%' '2758055.40%' '777853.23%' '734186.13%' '791919.70%'\n", + " '216387.02%' '978780.88%' '520764.08%' '2114727.72%' '1228076.66%'\n", + " '244139.42%' '653556.06%' '920659.83%' '483820.90%' '462554.81%'\n", + " '1404210.30%' '754661.35%' '517035.84%' '1391737.72%' '1131813.08%'\n", + " '427636.36%' '432224.03%' '800230.83%' '447177.82%' '849635.28%'\n", + " '2250088.35%' '1630196.76%' '231973.59%' '871704.98%' '286011.17%'\n", + " '245340.83%' '2498022.55%' '748263.95%' '245757.60%' '237974.12%'\n", + " '828696.44%' '239391.54%' '465715.95%' '1319792.89%' '280391.67%'\n", + " '864650.41%' '742587.06%' '452873.74%' '222707.28%' '729294.88%'\n", + " '318435.52%' '1131520.37%' '253070.51%' '1836155.53%' '864153.00%'\n", + " '327853.19%' '527198.21%' '376363.77%' '1395556.96%' '500152.75%'\n", + " '708283.04%' '761948.28%' '1255088.20%' '3219660.04%' '679377.41%'\n", + " '527231.97%' '626534.33%' '854758.61%' '278742.37%' '462680.11%'\n", + " '866336.40%' '387222.22%' '517081.15%' '896028.02%' '547183.43%'\n", + " '3493100.17%' '262039.23%' '1906949.95%' '4022401.36%' '270148.83%'\n", + " '371243.05%' '2185084.00%' '784016.58%' '823703.79%' '224347.39%'\n", + " '843446.41%' '517002.60%' '264144.61%' '279068.30%' '274512.98%'\n", + " '757953.27%' '1097878.03%' '825506.01%' '474773.46%' '354090.43%'\n", + " '258218.53%' '1166509.78%' '739628.37%' '433079.98%' '1463545.16%'\n", + " '856476.82%' '1156568.75%' '277104.50%' '850712.88%' '332309.25%'\n", + " '523433.17%' '2470959.96%' '1778627.78%' '255122.67%' '724771.37%'\n", + " '453884.78%' '3164210.46%' '873042.20%' '833273.06%' '235774.70%'\n", + " '463716.40%' '535719.27%' '539197.10%' '369414.05%' '504041.24%'\n", + " '2749542.19%' '484228.50%' '2738281.89%' '522710.19%' '259009.60%'\n", + " '1053607.80%' '858127.87%' '946311.33%' '1344100.64%' '388650.48%'\n", + " '678489.37%' '253862.63%' '563994.20%' '1168137.43%' '2599775.00%'\n", + " '1377836.93%' '492318.17%' '227233.54%' '1489539.80%' '975604.50%'\n", + " '942297.41%' '383735.76%' '1095213.19%' '815913.66%' '1948049.98%'\n", + " '391936.67%' '798514.21%' '1216874.49%' '584932.15%' '508583.66%'\n", + " '290393.98%' '627317.34%' '1832141.90%' '517870.42%' '1402435.84%'\n", + " '530943.59%' '494980.38%' '859566.53%' '2295189.20%' '379213.03%'\n", + " '275574.80%' '488033.96%' '905190.53%' '1011544.62%' '826907.54%'\n", + " '807165.30%' '772699.36%' '831113.59%' '257402.04%' '572732.71%'\n", + " '367914.21%' '274451.96%' '563674.03%' '1687038.82%' '273800.20%'\n", + " '464470.05%' '474668.65%' '1687432.82%' '238760.61%' '446533.57%'\n", + " '459162.59%' '1309258.58%' '911226.66%' '416516.66%' '265998.06%'\n", + " '1198659.21%' '289873.27%' '289424.39%' '350045.44%' '262180.86%'\n", + " '1022180.50%' '257827.10%' '624259.57%' '522028.10%' '313643.21%'\n", + " '452536.58%' '497035.73%' '1546778.90%' '583889.92%' '1065688.20%'\n", + " '228961.87%' '543980.42%' '795615.01%' '255505.15%' '330799.90%'\n", + " '845905.32%' '1335012.09%' '493122.13%' '777683.52%' '255367.22%'\n", + " '487646.97%' '903430.58%' '810591.08%' '561968.91%' '1572713.06%'\n", + " '661801.64%' '467004.80%' '1016936.98%' '832307.40%' '241776.00%'\n", + " '804487.24%' '532572.45%' '694752.40%' '584741.52%' '472478.61%'\n", + " '279022.80%' '2153133.28%' '1262283.27%' '2017196.15%' '1646436.59%'\n", + " '266544.71%' '709891.41%' '397134.51%' '552821.28%' '833899.58%'\n", + " '3844585.59%' '544855.52%' '1080806.60%' '618509.65%' '320822.59%'\n", + " '548010.41%' '2298615.39%' '1310792.59%' '746292.63%' '1146399.10%'\n", + " '723613.25%' '623268.79%' '2839332.99%' '374675.16%' '2156933.73%'\n", + " '501208.37%' '337185.84%' '386477.68%' '414571.19%' '515607.27%'\n", + " '366737.50%' '783568.35%' '1456726.84%' '1017133.90%' '413577.52%'\n", + " '551149.11%' '1131424.39%' '541461.73%' '742159.35%' '1447612.49%'\n", + " '493688.84%' '452527.65%' '558176.13%' '1413434.74%' '2472318.31%'\n", + " '283806.78%' '384848.36%' '1950447.39%' '248004.59%' '436137.29%'\n", + " '252907.75%' '250444.48%' '864970.06%' '1366835.53%' '2063508.46%'\n", + " '251753.36%' '532667.77%' '260027.21%' '853479.28%' '882883.50%'\n", + " '224844.96%' '1230276.24%' '455659.30%' '525198.40%' '674311.93%'\n", + " '1401472.13%' '943891.56%' '1050677.17%' '421391.86%' '477368.64%'\n", + " '544142.01%' '284226.69%' '305955.03%' '2031499.76%' '323912.47%'\n", + " '899704.02%']\n", + "\n", + "\n", + "Income:\n", + "[ 0. 48767. 36357. 62902. 55350. 14072. 28812. 61040. 29723. 46896.\n", + " 67163. 77552. 71600. 99428. 61108. 25317. 88891. 61222. 50335. 82210.\n", + " 64495. 28859. 77330. 33060. 42557. 26372. 17514. 89270. 29757. 51814.\n", + " 24028. 28142. 52705. 54040. 22492. 21876. 70699. 27501. 15897. 25141.\n", + " 51066. 28358. 62530. 90972. 63110. 29549. 39411. 21709. 67890. 84311.\n", + " 99316. 54507. 64586. 61709. 94656. 61085. 89284. 31686. 56855. 53703.\n", + " 20396. 27679. 23904. 65351. 64459. 32961. 71416. 68964. 78108. 10621.\n", + " 84910. 77493. 81097. 96610. 30110. 22081. 98473. 97431. 93870. 50366.\n", + " 34498. 16552. 26787. 43860. 21474. 18174. 60920. 41520. 72208. 53863.\n", + " 66446. 64997. 64460. 46618. 49988. 16269. 72006. 44320. 19782. 63933.\n", + " 28224. 21073. 63243. 20993. 94827. 39161. 37534. 80210. 21708. 94731.\n", + " 32375. 16531. 32006. 81676. 71038. 20832. 52405. 26583. 25486. 24065.\n", + " 70435. 39679. 53565. 37574. 48259. 78532. 96163. 72672. 99002. 79494.\n", + " 35704. 26049. 70125. 52369. 41770. 31911. 70051. 19683. 30916. 84302.\n", + " 60145. 63774. 25134. 37057. 58577. 85857. 70602. 33816. 89642. 60068.\n", + " 50044. 36650. 50653. 68931. 39266. 40864. 39035. 34923. 24506. 52220.\n", + " 53554. 34476. 68205. 53690. 74454. 29664. 72450. 47272. 21585. 23827.\n", + " 69906. 73196. 72217. 46131. 54514. 96668. 78879. 29735. 23082. 53984.\n", + " 52135. 17576. 29486. 58557. 25632. 18768. 33190. 47945. 58778. 15192.\n", + " 51859. 23422. 21604. 79298. 76731. 38460. 48534. 30817. 48412. 68798.\n", + " 23712. 72196. 68197. 75248. 89879. 46998. 57099. 33897. 59207. 40171.\n", + " 80192. 74422. 34226. 65989. 30686. 20090. 77094. 97413. 79189. 18577.\n", + " 62777. 17483. 84394. 41440. 98132. 88220. 58327. 25950. 65726. 84768.\n", + " 36234. 58842. 25629. 77311. 95697. 22254. 65974. 92079. 67752. 25398.\n", + " 33321. 20325. 13129. 49080. 42536. 29926. 21450. 33345. 15752. 40169.\n", + " 55411. 12459. 64620. 83140. 54422. 68309. 56621. 38977. 83235. 32390.\n", + " 66538. 23285. 76358. 21104. 12964. 33288. 56168. 23105. 36218. 52275.\n", + " 49665. 32471. 62773. 76694. 55687. 36633. 77060. 92600. 64801. 45257.\n", + " 26854. 85840. 26463. 30689. 29590. 25965. 17269. 90330. 18558. 46384.\n", + " 42303. 71731. 26173. 24445. 72302. 27208. 30855. 99960. 55873. 18052.\n", + " 28937. 12829. 92163. 17588. 41546. 70340. 34549. 93459. 86148. 27048.\n", + " 73259. 35482. 29462. 67801. 16042. 28056. 16495. 41163. 33799. 90125.\n", + " 87747. 35695. 90985. 66839. 79090. 24825. 26806. 56835. 46135. 22862.\n", + " 95854. 44897. 64455. 53265. 50450. 54780. 67798. 21442. 27615. 17622.\n", + " 50200. 73570. 70412. 36631. 35895. 93018. 70014. 48875. 67969. 68665.\n", + " 26802. 45345. 89689. 32051. 81139. 63834. 37548. 72421. 83102. 28432.\n", + " 83707. 63259. 63860. 43836. 86132. 28519. 39102. 97298. 41986. 17291.\n", + " 37256. 96306. 14290. 37038. 90760. 77048. 54480. 16244. 22436. 71592.\n", + " 28728. 57449. 83318. 75217. 55308. 36068. 48804. 55790. 70258. 47274.\n", + " 44705. 70446. 64348. 88997. 10312. 96263. 28919. 41869. 32808. 79780.\n", + " 91025. 33043. 69442. 47234. 86863. 25805. 43676. 59855. 35296. 36576.\n", + " 28513. 85448. 23791. 20597. 56940. 93210. 48992. 53736. 25378. 37722.\n", + " 69379. 33806. 94041. 74965. 34095. 96045. 86355. 27824. 42995. 21235.\n", + " 74585. 41833. 23908. 61953. 73760. 23333. 20440. 27658. 50943. 19003.\n", + " 46703. 21733. 20811. 11904. 43490. 57340. 49088. 47761. 61281. 25290.\n", + " 24239. 82664. 83210. 38736. 55437. 68041. 29066. 54337. 67616. 41082.\n", + " 50631. 19592. 55761. 41449. 62007. 21921. 42621. 63786. 82877. 10475.\n", + " 21952. 49721. 88340. 24589. 73769. 66670. 88854. 31266. 67267. 79270.\n", + " 36692. 44624. 19614. 83846. 51159. 83772. 88440. 25666. 52926. 40001.\n", + " 18024. 23220. 64125. 58042. 90034. 27972. 50989. 11885. 89451. 78904.\n", + " 70247. 32653. 93595. 90279. 53310. 22234. 91375. 22250. 51179. 38667.\n", + " 79487. 67763. 71943. 53526. 35005. 24721. 48587. 76310. 73205. 51056.\n", + " 58414. 23940. 44216. 29305. 53882. 91757. 33906. 68158. 42165. 68074.\n", + " 84978. 71135. 64642. 50071. 46754. 70410. 66957. 24213. 99790. 79751.\n", + " 86122. 82297. 89057. 43259. 25064. 25816. 41662. 96170. 46072. 37931.\n", + " 35127. 45473. 93087. 22398. 92983. 27689. 69654. 80744. 18608. 73168.\n", + " 70930. 62262. 91474. 61469. 16618. 48081. 67632. 34115. 23051. 23748.\n", + " 40589. 50809. 66676. 52339. 14973. 31546. 20836. 88592. 66943. 81872.\n", + " 22404. 21342. 34621. 62396. 97212. 49648. 97984. 26308. 63528. 20225.\n", + " 87620. 34990. 99934. 60804. 94648. 24516. 61063. 15169. 55390. 27592.\n", + " 61846. 83297. 55897. 21297. 87560. 89398. 36843. 34946. 75680. 49532.\n", + " 10269. 49714. 77517. 81082. 72540. 61546. 44818. 79797. 92717. 63568.\n", + " 73935. 18846. 38893. 84824. 20068. 97245. 51808. 71391. 23496. 23986.\n", + " 22974. 61844. 24804. 27760. 52266. 23599. 36088. 70534. 27398. 85296.\n", + " 31063. 52367. 58651. 84831. 70263. 45354. 28334. 38772. 41479. 23909.\n", + " 48328. 86689. 24204. 25943. 62375. 70200. 79027. 62935. 26893. 47406.\n", + " 27572. 32802. 62739. 90844. 44685. 42589. 93383. 79583. 89129. 94389.\n", + " 84106. 76717. 51978. 47325. 86721. 24910. 43817. 59537. 54193. 86946.\n", + " 12160. 33701. 85702. 69417. 38644. 63209. 16061. 54230.]\n", + "\n", + "\n", + "Monthly Premium Auto:\n", + "[ 94. 108. 106. 68. 69. 67. 101. 71. 93. 79.\n", + " 80. 130. 35354. 61. 87. 63. 6464. 89. 74. 140.\n", + " 127. 96. 92. 72. 99. 105. 117. 65. 126. 82.\n", + " 64. 103. 118. 85. 77. 73. 95. 104. 115. 217.\n", + " 62. 110. 111. 114. 138. 76. 70. 81. 255. 150.\n", + " 123. 113. 116. 190. 78. 98. 159. 84. 112. 83.\n", + " 66. 90. 152. 136. 91. 86. 245. 109. 133. 135.\n", + " 1005. 225. 132. 153. 121. 139. 102. 119. 154. 161.\n", + " 156. 88. 100. 171. 244. 183. 176. 230. 129. 177.\n", + " 124. 181. 196. 131. 276. 253. 137. 180. 203. 192.\n", + " 107. 147. 165. 198. 125. 193. 128. 271. 97. 142.\n", + " 10202. 206. 182. 189. 160. 295. 210. 134. 35353. 239.\n", + " 188. 256. 173. 285. 148. 141. 197. 199.]\n", + "\n", + "\n", + "Number of Open Complaints:\n", + "['1/0/00' '1/1/00' '1/2/00' '1/4/00' '1/3/00' '1/5/00']\n", + "\n", + "\n", + "Policy Type:\n", + "['Personal Auto' 'Corporate Auto' 'Special Auto']\n", + "\n", + "\n", + "Vehicle Class:\n", + "['Four-Door Car' 'Two-Door Car' 'SUV' 'Sports Car' 'Luxury Car'\n", + " 'Luxury SUV']\n", + "\n", + "\n", + "Total Claim Amount:\n", + "[1.13146493e+03 5.66472247e+02 5.29881344e+02 1.72693230e+01\n", + " 1.59383042e+02 3.21600000e+02 3.63029680e+02 5.11200000e+02\n", + " 4.25527834e+02 4.82400000e+02 2.03828760e+01 2.09851050e+01\n", + " 2.38201580e+01 3.17073170e+01 3.31928030e+01 4.52150590e+01\n", + " 4.60414520e+01 4.61581170e+01 4.80468690e+01 4.94511170e+01\n", + " 5.04221810e+01 5.05283550e+01 3.92604371e+02 7.21242206e+02\n", + " 4.56523850e+02 1.08799543e+03 5.19619150e+01 4.60800000e+02\n", + " 2.51992083e+02 3.45600000e+02 9.92576080e+01 5.04000000e+02\n", + " 5.67315780e+01 5.75623240e+01 6.04800000e+02 8.31272900e+00\n", + " 3.16599228e+02 3.12000000e+02 5.63007240e+01 3.93600000e+02\n", + " 7.01708239e+02 1.28705563e+02 6.79368378e+02 5.99871260e+01\n", + " 5.66400000e+02 6.16542620e+01 5.54400000e+02 4.65414770e+02\n", + " 4.96800000e+02 2.92800000e+02 2.56813837e+02 3.55200000e+02\n", + " 5.42319401e+02 3.03148399e+02 2.54380630e+01 7.87993313e+02\n", + " 8.40244130e+01 6.16937710e+01 2.32926145e+02 6.81797210e+01\n", + " 7.10433775e+02 5.52000000e+02 1.12265890e+03 4.08374746e+02\n", + " 4.08000000e+02 7.92000000e+02 5.32800000e+02 7.54358929e+02\n", + " 7.02990032e+02 2.06837111e+02 2.75395894e+02 4.59738128e+02\n", + " 3.36000000e+02 3.02400000e+02 4.30994107e+02 1.83600000e+03\n", + " 6.76324760e+01 8.44481918e+02 3.64800000e+02 7.22486994e+02\n", + " 5.90400000e+02 4.81027516e+02 1.12442773e+03 6.69682001e+02\n", + " 1.28409317e+03 2.40259479e+02 9.12000000e+02 5.56800000e+02\n", + " 1.15086827e+02 4.70400000e+02 3.83167471e+02 3.07963291e+02\n", + " 5.20364752e+02 4.66436375e+02 4.51670309e+02 5.37600000e+02\n", + " 3.31200000e+02 4.28734656e+02 3.98400000e+02 4.94400000e+02\n", + " 3.69600000e+02 8.31752839e+02 1.56124914e+02 3.73428187e+02\n", + " 3.95934815e+02 3.50400000e+02 2.31201886e+02 2.89904105e+02\n", + " 7.29600000e+02 3.26400000e+02 1.05765111e+02 6.39464548e+02\n", + " 3.83442328e+02 3.16800000e+02 9.90859430e+01 3.07200000e+02\n", + " 5.66935022e+02 4.36800000e+02 4.12800000e+02 3.88800000e+02\n", + " 2.34541344e+03 7.73470977e+02 4.24883448e+02 5.23200000e+02\n", + " 5.08800000e+02 3.64240307e+02 6.38400000e+02 3.54729129e+02\n", + " 2.83995953e+02 4.03200000e+02 2.05431760e+01 3.85115437e+02\n", + " 1.40165035e+02 1.02879769e+02 5.68682890e+01 4.63158502e+02\n", + " 2.05444066e+02 7.41600000e+02 5.63719670e+01 8.13600000e+02\n", + " 7.95864079e+02 6.14400000e+02 7.24386810e+01 6.78815460e+01\n", + " 4.02636829e+02 6.41388616e+02 3.42481173e+02 3.40800000e+02\n", + " 4.12101933e+02 7.37005730e+01 3.74400000e+02 7.22024742e+02\n", + " 2.59361117e+02 3.96295614e+02 3.58281562e+02 6.53388564e+02\n", + " 1.21032372e+02 9.28133960e+01 5.28000000e+02 1.59636956e+02\n", + " 4.76156957e+02 7.27200000e+02 4.20783450e+01 5.61600000e+02\n", + " 2.00116060e+02 6.76944023e+02 7.66092950e+01 8.30623064e+02\n", + " 5.80473259e+02 6.00366830e+01 4.75310100e+01 3.03872752e+02\n", + " 1.00620067e+02 4.81339891e+02 1.10160000e+03 8.63200220e+01\n", + " 6.67200000e+02 5.66033300e+01 2.62865172e+02 3.09577946e+02\n", + " 3.65364581e+02 3.84000000e+02 4.89600000e+02 7.05600000e+02\n", + " 9.51528000e+00 2.44564334e+02 6.39105556e+02 4.53600000e+02\n", + " 5.54522969e+02 4.02449823e+02 3.27020539e+02 5.39530000e+00\n", + " 1.31401291e+02 9.93829430e+01 8.56800000e+02 7.39200000e+02\n", + " 5.18400000e+02 8.42183630e+01 6.45468770e+01 4.41600000e+02\n", + " 3.79106230e+01 2.02860399e+02 8.58098170e+01 9.11466610e+01\n", + " 2.86234931e+02 1.15920000e+03 4.66570791e+02 1.25240623e+03\n", + " 4.21484456e+02 4.30505942e+02 3.98502948e+02 7.64676300e+00\n", + " 5.55105260e+01 3.51270869e+02 8.06400000e+02 6.47454583e+02\n", + " 1.28438230e+02 3.02033971e+02 4.80000000e+02 1.00316063e+03\n", + " 3.79200000e+02 2.44362072e+02 2.01455005e+02 9.15509800e+01\n", + " 9.18346680e+01 7.07430832e+02 9.29152510e+01 3.76126419e+02\n", + " 1.52184244e+02 1.76819414e+02 1.04454624e+02 9.41718054e+02\n", + " 4.68000000e+02 9.40303080e+01 6.78598810e+01 5.39843003e+02\n", + " 9.51931570e+01 9.53385050e+01 6.26400000e+02 4.94395024e+02\n", + " 1.15545086e+02 1.99389810e+01 7.99926741e+02 8.78400000e+02\n", + " 4.44470676e+02 2.68819985e+02 2.97600000e+02 1.01288069e+02\n", + " 1.31760000e+03 6.91412378e+02 8.04811859e+02 1.09086434e+03\n", + " 5.05870350e+01 1.51528482e+02 3.47075948e+02 2.71697529e+02\n", + " 8.49600000e+02 1.10484661e+02 1.13534474e+02 5.95200000e+02\n", + " 1.13609508e+02 1.33706349e+03 1.14273025e+02 1.25194389e+02\n", + " 2.35220971e+02 3.95341110e+02 1.14798771e+02 7.72798511e+02\n", + " 6.33600000e+02 1.68517149e+02 8.45654042e+02 6.68293970e+02\n", + " 7.00901632e+02 5.25600000e+02 2.52990000e+01 3.75330097e+02\n", + " 4.18233667e+02 3.98240791e+02 9.38513425e+02 3.38619869e+02\n", + " 3.39344531e+02 3.02818833e+02 6.19165344e+02 2.59561195e+02\n", + " 1.79161843e+02 6.62400000e+02 7.55018520e+01 3.06983596e+02\n", + " 8.40268480e+01 1.39489926e+02 6.07445900e+02 6.31124372e+02\n", + " 6.28023494e+02 4.26655599e+02 9.14179230e+01 3.17844812e+02\n", + " 1.18446235e+02 8.64615820e+01 1.18454974e+02 3.62774545e+02\n", + " 2.21856184e+02 9.80169081e+02 6.14675906e+02 1.45252168e+02\n", + " 3.74240783e+02 6.19200000e+02 8.06692570e+01 1.32480000e+03\n", + " 4.68566133e+02 8.42437850e+02 4.75200000e+02 1.17193117e+03\n", + " 2.68471802e+02 1.38722385e+02 4.67248020e+02 4.67503236e+02\n", + " 1.21440000e+03 1.92085299e+02 6.19973889e+02 1.00278255e+03\n", + " 2.51774574e+02 8.66208321e+02 1.21092095e+03 5.99648466e+02\n", + " 2.02772444e+03 7.28520470e+01 4.49819671e+02 2.59060862e+02\n", + " 4.56000000e+02 2.99356083e+02 6.99167900e+02 1.17959654e+02\n", + " 5.45240341e+02 3.28231432e+02 6.65686420e+01 1.38240000e+03\n", + " 1.21306839e+02 6.09600000e+02 5.42400000e+02 1.25933005e+02\n", + " 6.55413330e+02 6.73342650e+02 5.13600000e+02 2.38104910e+01\n", + " 4.51200000e+02 2.31922173e+02 7.72800000e+02 6.64980242e+02\n", + " 1.28645946e+02 4.64920390e+01 4.32000000e+02 7.80851490e+01\n", + " 5.64466556e+02 2.89040734e+02 1.55938593e+02 3.26549425e+02\n", + " 6.53656680e+02 1.03935601e+02 8.09532341e+02 5.80725310e+02\n", + " 1.33735395e+02 2.38005074e+02 1.57767442e+03 1.35892444e+02\n", + " 1.35261250e+02 5.71200000e+02 6.37063458e+02 7.59360960e+01\n", + " 7.99200000e+02 1.36291083e+02 6.75309040e+01 3.12921256e+02\n", + " 1.66937747e+02 1.35382194e+02 1.77429540e+01 1.83836700e+00\n", + " 1.97776009e+02 6.12102262e+02 1.19303615e+03 6.00000000e+02\n", + " 1.36829537e+02 9.26400000e+02 3.92636400e+02 2.15226476e+02\n", + " 1.60598662e+02 3.71803029e+02 4.43670399e+02 6.12300581e+02\n", + " 2.90391526e+02 3.16795337e+02 4.05527937e+02 2.08598246e+02\n", + " 1.38130879e+02 3.01437365e+02 4.56473115e+02 1.52338562e+02\n", + " 4.91755368e+02 4.85174390e+01 4.30375049e+02 2.46489123e+02\n", + " 1.28969729e+02 1.85355353e+02 1.30080000e+03 9.07130500e+00\n", + " 1.33475315e+02 1.41199465e+02 6.23223617e+02 4.27200000e+02\n", + " 1.41725051e+02 2.11336937e+02 1.47080303e+02 8.62762957e+02\n", + " 1.13367765e+02 9.43200000e+02 1.17672722e+02 2.25145949e+02\n", + " 1.82432565e+02 1.48173152e+02 1.55570802e+02 2.12391975e+02\n", + " 1.74041566e+02 4.86278557e+02 1.92470900e+00 2.62122050e+02\n", + " 9.21600000e+02 5.41695658e+02 3.27682669e+02 1.26557030e+03\n", + " 6.30431970e+01 1.56313630e+01 3.75866091e+02 4.52616872e+02\n", + " 1.13450122e+02 1.60345100e+01 6.15272280e+02 6.15256301e+02\n", + " 5.12662450e+02 7.45239350e+01 4.26072946e+02 1.69287785e+02\n", + " 4.19464143e+02 1.58077504e+02 6.69600000e+02 3.95729716e+02\n", + " 1.59756733e+02 5.85575520e+01 8.58297100e+00 2.85418473e+02\n", + " 4.10508316e+02 2.44231350e+02 2.18598065e+02 6.43826716e+02\n", + " 4.97047297e+02 1.02240000e+03 6.57600000e+02 2.30245772e+02\n", + " 5.62275100e+00 1.60075260e+02 3.46513050e+01 6.78100487e+02\n", + " 1.74588413e+02 1.59391681e+02 6.98400000e+02 2.32711071e+02\n", + " 1.63046956e+02 7.98656050e+01 3.82085897e+02 4.83483190e+01\n", + " 1.33425609e+02 5.61414794e+02 4.45287788e+02 1.65570243e+02\n", + " 4.07996840e+02 8.44229478e+02 3.40306584e+02 1.06647493e+02\n", + " 6.26116259e+02 7.84657810e+02 5.47200000e+02 8.62777200e+01\n", + " 6.35165720e+01 1.20015609e+02 1.25413790e+03 4.17600000e+02\n", + " 7.98002689e+02 8.20416840e+01 2.28190880e+01 4.22400000e+02\n", + " 6.39971388e+02 5.11497882e+02 2.39540223e+02 5.18180364e+02\n", + " 7.15252366e+02 4.02070719e+02 9.48140320e+01 9.72000000e+02\n", + " 7.38830440e+01 5.74024018e+02 6.32715382e+02 4.23862600e+00\n", + " 4.22480870e+01 2.71451510e+01 2.70002766e+02 4.94946438e+02\n", + " 1.42567008e+02 4.25266308e+02 2.51334247e+02 3.69818708e+02\n", + " 1.61419528e+02 2.55999709e+02 4.84800000e+02 6.17695640e+01\n", + " 1.31040000e+03 2.50001424e+02 1.41799422e+02 4.97970160e+01\n", + " 3.72175592e+02 5.42143850e+02 1.36080000e+03 7.68000000e+02\n", + " 7.26329340e+01 7.20601429e+02 5.76000000e+02 4.81025786e+02\n", + " 4.66176731e+02 1.59266473e+02 4.12831670e+01 5.03808329e+02\n", + " 8.59599411e+02 3.05653785e+02 2.78902846e+02 1.41600000e+03\n", + " 1.39963594e+02 7.34400000e+02 3.72998650e+01 9.58165160e+01\n", + " 1.00800000e+03 4.46400000e+02 7.22053620e+01 4.01592109e+02\n", + " 2.53183568e+02 6.43200000e+02 7.99673766e+02 2.79878670e+01\n", + " 1.66772960e+02 2.70563995e+02 1.78006524e+02 4.04265696e+02\n", + " 1.89189350e+01 5.13818403e+02 1.31828507e+02 6.28800000e+02\n", + " 1.78986788e+02 3.92235698e+02 8.73600000e+02 3.82107000e-01\n", + " 1.87363583e+02 3.50853987e+02 2.17973168e+02 2.59340640e+01\n", + " 1.88938397e+02 1.53205591e+02 2.89323968e+03 6.29532731e+02\n", + " 3.61284757e+02 1.18598830e+03 4.25800112e+02 1.90434460e+02\n", + " 1.93505325e+02 2.40877740e+01 2.19288706e+02 8.81360959e+02\n", + " 2.69516270e+01 4.66122541e+02 6.07095655e+02 2.90381707e+02\n", + " 5.58099357e+02 8.28000000e+02 5.40514115e+02 1.00049832e+02\n", + " 5.98619630e+01 1.93570320e+02 1.99797270e+02 5.30847530e+01\n", + " 2.56438030e+02 2.11136067e+02 7.43508930e+01 6.21464468e+02\n", + " 3.60055890e+02 6.59548130e+01 4.07450118e+02 1.22880000e+03\n", + " 2.40636930e+01 3.11329282e+02 4.26891350e+01 4.60323855e+02\n", + " 2.13225001e+02 3.00607591e+02 2.62504882e+02 2.63365432e+02\n", + " 2.56268091e+02 2.15818200e+02 1.35360000e+03 4.99200000e+02\n", + " 4.55079520e+01 6.91200000e+02 8.24099220e+01 3.57642982e+02\n", + " 4.20356980e+02 1.36787725e+02 2.20186677e+02 1.70798204e+02\n", + " 2.66165535e+02 9.89217820e+01 2.81295903e+02 6.31743039e+02\n", + " 2.92095210e+01 1.36883999e+02 3.17556010e+01 2.24275820e+02\n", + " 8.30400000e+02 4.80159011e+02 3.90792553e+02 7.20711950e+01\n", + " 4.19652520e+01 1.02700003e+03 3.19820747e+02 4.11011162e+02\n", + " 6.48000000e+02 4.11058500e+00 3.51149904e+02 2.84000172e+02\n", + " 4.96474767e+02 9.25137143e+02 1.31640970e+01 4.63335061e+02\n", + " 1.80667969e+02 3.02764283e+02 1.41922839e+02 3.49783046e+02\n", + " 5.40655380e+01 3.42515136e+02 1.02717726e+03 2.75989978e+02\n", + " 4.72599683e+02 1.33693172e+03 7.34594600e+00 5.40141566e+02\n", + " 3.83363758e+02 5.37765151e+02 3.21873474e+02 1.95756830e+01\n", + " 1.08138715e+02 7.07303416e+02 1.06733313e+03 8.07947292e+02\n", + " 6.18630955e+02 4.04272806e+02 7.10400000e+02 7.07977614e+02\n", + " 2.32242326e+02 6.82260010e+01 2.87149807e+02 3.91636628e+02\n", + " 2.39328571e+02 2.32674417e+02 3.36509610e+02 2.44212286e+02\n", + " 3.28870868e+02 8.50637080e+01 1.41840000e+03 3.34408717e+02\n", + " 4.20964150e+01 3.00528579e+02 1.44782152e+02 6.45982160e+01\n", + " 3.47857619e+02 2.45447622e+02 9.55200000e+02 7.20752945e+02\n", + " 3.40656963e+02 5.46524896e+02 4.89411833e+02 1.71325856e+02\n", + " 2.49085887e+02 3.08748690e+01 3.61455219e+02 2.07320041e+02]\n", + "\n", + "\n" + ] + } + ], + "source": [ + "for i in df.columns:\n", + " print(f\"{i}:\\n{df[i].unique()}\\n\\n\") # to print categories name only\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "8c4ccb37-ed72-4585-964e-2532bfa49326", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + "5 0\n", + " ..\n", + "1066 1\n", + "1067 2\n", + "1068 0\n", + "1069 0\n", + "1070 0\n", + "Name: Number of Open Complaints, Length: 952, dtype: object" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.copy()\n", + "# Extracting only the first number as the count of complaints\n", + "df['Number of Open Complaints'] = df['Number of Open Complaints'].str.split('/').str[1]\n", + "# Verifying the cleaned column to ensure it only reflects the complaint count\n", + "df['Number of Open Complaints']" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "ffb308fc-dc58-42ae-bbbb-4f7f37e0665a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nan_in_complaints_column_alternative = df['Number of Open Complaints'].isnull().sum()\n", + "nan_in_complaints_column_alternative\n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "7864376b-9b1c-4b4a-a62a-41531424ffe1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['0', '1', '2', '4', '3', '5'], dtype=object)" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Number_of_Open_Complaints_uniques = df['Number of Open Complaints'].unique()\n", + "Number_of_Open_Complaints_uniques" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "6a220812-1dfe-4fc1-85b5-48fd4a25bef1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + "5 0\n", + " ..\n", + "1066 1\n", + "1067 2\n", + "1068 0\n", + "1069 0\n", + "1070 0\n", + "Name: Number of Open Complaints, Length: 952, dtype: int64" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Number of Open Complaints'] = pd.to_numeric(df['Number of Open Complaints'], errors = \"coerce\")\n", + "df['Number of Open Complaints']" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "ae04da64-4147-4639-910e-a53e91ac071f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 2, 4, 3, 5])" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Number of Open Complaints'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "0f0166b7-962a-4e6c-ab9c-9adb65d99e84", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "int64\n" + ] + } + ], + "source": [ + "print(df['Number of Open Complaints'].dtype)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "5a0afcb6-ee92-440d-89b9-742281731e5f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1 697.95\n", + "2 1288.74\n", + "3 764.59\n", + "4 536.31\n", + "5 825.63\n", + " ... \n", + "1066 305.96\n", + "1067 2031.50\n", + "1068 323.91\n", + "1069 462.68\n", + "1070 899.70\n", + "Name: Customer Lifetime Value, Length: 952, dtype: float64" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Customer Lifetime Value'] = df['Customer Lifetime Value'].str.replace('%', '')\n", + "df['Customer Lifetime Value'] = df['Customer Lifetime Value'].astype(float)\n", + "df['Customer Lifetime Value'] = df['Customer Lifetime Value'] / 1000\n", + "df['Customer Lifetime Value'] = df['Customer Lifetime Value'].round(2)\n", + "\n", + "df['Customer Lifetime Value'] " + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "a0d424bf-5d04-483c-a150-f2bccc00a307", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dtype('float64')" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Customer Lifetime Value'].dtype" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "7114f8a5-aaf3-4ef8-9a0a-dd66fd3fe82d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 697.95 1288.74 764.59 536.31 825.63 538.09 721.61 2412.75 738.82\n", + " 473.9 617.71 916.21 473.79 495.17 487.94 538.28 1595. 499.66\n", + " 518.58 1048.49]\n" + ] + } + ], + "source": [ + "unique_values = df['Customer Lifetime Value'].unique()[:20]\n", + "print(unique_values)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "a2181f7c-17dd-4903-b61b-abe357fd5182", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
1QZ44356ArizonaFBachelor697.950.094.00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288.7448767.0108.00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764.590.0106.00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536.3136357.068.00Personal AutoFour-Door Car17.269323
5OC83172OregonFBachelor825.6362902.069.00Personal AutoTwo-Door Car159.383042
\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value \\\n", + "1 QZ44356 Arizona F Bachelor 697.95 \n", + "2 AI49188 Nevada F Bachelor 1288.74 \n", + "3 WW63253 California M Bachelor 764.59 \n", + "4 GA49547 Washington M High School or Below 536.31 \n", + "5 OC83172 Oregon F Bachelor 825.63 \n", + "\n", + " Income Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "1 0.0 94.0 0 Personal Auto \n", + "2 48767.0 108.0 0 Personal Auto \n", + "3 0.0 106.0 0 Corporate Auto \n", + "4 36357.0 68.0 0 Personal Auto \n", + "5 62902.0 69.0 0 Personal Auto \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "3 SUV 529.881344 \n", + "4 Four-Door Car 17.269323 \n", + "5 Two-Door Car 159.383042 " + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "c3999f49-ce34-409b-8954-6515f1530e1e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Customer', 'ST', 'GENDER', 'Education', 'Policy Type', 'Vehicle Class']" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "categorical_columns = [col for col in df.columns if df[col].dtype == 'object']\n", + "categorical_columns " + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "94d4781f-0ca2-4ec6-a7fd-26bfb5da4f6b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer:\n", + "['QZ44356' 'AI49188' 'WW63253' 'GA49547' 'OC83172' 'XZ87318' 'CF85061'\n", + " 'DY87989' 'BQ94931' 'SX51350' 'QK46697' 'HS14476' 'HD95276' 'YD87931'\n", + " 'NW21079' 'YB66933' 'OW15518' 'GP39118' 'SG20925' 'FM14335' 'HS28694'\n", + " 'LH92841' 'AZ95587' 'DS81757' 'OJ94107' 'LP84436' 'FF22360' 'LM19287'\n", + " 'ZU18643' 'AZ82578' 'XC67861' 'YC43143' 'EK59571' 'PA38372' 'RO18530'\n", + " 'PD27940' 'BS77946' 'YM50253' 'NR15332' 'RC62865' 'CC15295' 'KA61892'\n", + " 'OS94884' 'ND87334' 'OY51402' 'YL74911' 'GK92563' 'HL53154' 'RI78966'\n", + " 'IC13702' 'BE10809' 'HT87217' 'TH95618' 'TS19868' 'LP45550' 'QR87004'\n", + " 'OE75747' 'DX91392' 'AB72731' 'GX84338' 'IS12901' 'BN90616' 'HH90090'\n", + " 'IU25463' 'KC11055' 'PD33979' 'NK71023' 'AB13432' 'OZ97704' 'UF46533'\n", + " 'XP47431' 'GK73582' 'RV98763' 'II62831' 'XK33449' 'TR85083' 'EO95328'\n", + " 'EN21086' 'YL83902' 'AZ62651' 'ZW25874' 'EH41854' 'MW70227' 'SL22297'\n", + " 'RV14138' 'UO62808' 'ZX64745' 'FL34139' 'TS11219' 'XX12304' 'SD64087'\n", + " 'OY38576' 'BG76355' 'IP66913' 'LE95702' 'KX54357' 'EZ78112' 'XN16891'\n", + " 'XK31350' 'CC30924' 'IT78748' 'KY33386' 'CO44221' 'LK60013' 'DE21533'\n", + " 'YS94121' 'UK68427' 'TE49565' 'RA88421' 'KQ51983' 'CD88896' 'YV22553'\n", + " 'WU14435' 'XV84099' 'RI24911' 'KO26461' 'HI14283' 'PT50227' 'BH36570'\n", + " 'TX17484' 'CT41158' 'AO87348' 'DE55857' 'LF66923' 'CN24514' 'UW32074'\n", + " 'HP36979' 'PP40919' 'RO73268' 'HO61691' 'BS13062' 'FO35655' 'HR10526'\n", + " 'IA63417' 'BH35016' 'PK52952' 'OD76309' 'IL28481' 'GY55092' 'UF33451'\n", + " 'CF15558' 'JM62924' 'EM66435' 'QX45933' 'JI71369' 'JU93290' 'GU66096'\n", + " 'UC33108' 'LW93867' 'OU78470' 'XW90265' 'HS67749' 'VZ51506' 'UI64281'\n", + " 'AE98193' 'AZ74055' 'XS76911' 'AY40674' 'NA12740' 'UA84837' 'DJ51510'\n", + " 'VM58985' 'OH60605' 'UO98052' 'NC53424' 'LQ13873' 'LA97014' 'NB79936'\n", + " 'NT89061' 'AF10970' 'ZG48513' 'JQ59145' 'FE84989' 'JT52858' 'MC62068'\n", + " 'EU27538' 'RH42306' 'US23612' 'WV76014' 'RK96223' 'MF82000' 'FM46980'\n", + " 'SY56792' 'RF61565' 'IM94808' 'VI14730' 'YR34119' 'RR77985' 'QD28391'\n", + " 'WV17090' 'TM23514' 'MQ68407' 'GJ59592' 'FY56083' 'UA94723' 'FW91032'\n", + " 'DE34457' 'HD32044' 'HH30454' 'AH84063' 'QA17596' 'XI41052' 'DI30528'\n", + " 'SC66359' 'EN61670' 'DQ10761' 'BQ51587' 'JE21522' 'WS47147' 'ZA64638'\n", + " 'EW38459' 'QW87316' 'IC43478' 'TE34064' 'WU60905' 'YM18992' 'PD55753'\n", + " 'KU56006' 'MJ69973' 'TW43626' 'XX84133' 'ZW84453' 'HO29524' 'VE89726'\n", + " 'GE87503' 'PX90263' 'NI17718' 'FY32213' 'RZ13254' 'GN45013' 'NM39588'\n", + " 'KU84464' 'YH43527' 'RO30676' 'QL59704' 'QH19450' 'SA54664' 'CI38330'\n", + " 'WB38524' 'CE56187' 'JL19416' 'JZ61422' 'LA13377' 'NC99948' 'QD34785'\n", + " 'RO26085' 'ES57969' 'JK55587' 'RN97635' 'BI76326' 'JA34909' 'OJ90342'\n", + " 'CM88932' 'JJ97525' 'XV21647' 'MC83487' 'BL90769' 'CR57148' 'CP85232'\n", + " 'YL74732' 'FG16766' 'NV55438' 'RM10880' 'GL56175' 'UK52289' 'OT85112'\n", + " 'BC62782' 'TI19722' 'JP30654' 'UM45563' 'EN60878' 'JF36291' 'BK59444'\n", + " 'MK70700' 'IW71076' 'AP98768' 'OM24164' 'HR85211' 'VC87846' 'ZM92052'\n", + " 'ON73702' 'QQ90441' 'HU35721' 'YP47665' 'FU99476' 'AG85615' 'OY74069'\n", + " 'DJ91267' 'KB72438' 'TR67616' 'GF65731' 'HB67642' 'DP84567' 'VV77534'\n", + " 'GL67540' 'SV50502' 'UK59698' 'OA57352' 'ZF84449' 'AX86150' 'HG39060'\n", + " 'EM29359' 'SF57173' 'OT47603' 'SW31412' 'JS36322' 'RE81445' 'RM24280'\n", + " 'LC25393' 'UX38930' 'HD95496' 'RX24650' 'DW19309' 'MT41386' 'WZ40465'\n", + " 'DB42794' 'JB50798' 'IP69763' 'TE35785' 'HX74855' 'QN65180' 'GE47180'\n", + " 'VQ38776' 'BH86846' 'IN17648' 'DF95759' 'QG45324' 'MN61620' 'YH86390'\n", + " 'FY13480' 'YH61661' 'NL93182' 'WE68644' 'EZ30498' 'QY74517' 'NM88660'\n", + " 'MZ82036' 'ID20929' 'EY50028' 'TT82373' 'OH64088' 'SK97780' 'IO33050'\n", + " 'XA55917' 'JK32620' 'RQ19236' 'QC47433' 'RA93608' 'XH97711' 'AU96286'\n", + " 'KC17170' 'ZN47335' 'EI46264' 'EK87864' 'GV45403' 'QK31192' 'LU89008'\n", + " 'NS10490' 'KL98495' 'IU96845' 'QL93655' 'PF40592' 'LZ34046' 'JC80093'\n", + " 'YE88490' 'YC80498' 'AI85843' 'XD66024' 'FY51713' 'PH26378' 'WQ18638'\n", + " 'KY14688' 'TC97762' 'QC87108' 'CX12134' 'SM73248' 'CK19789' 'UV12583'\n", + " 'JC11405' 'KA89683' 'BG85305' 'UQ87917' 'XN11823' 'OS46571' 'PX17116'\n", + " 'RP19541' 'ZR25747' 'NQ86532' 'JY27336' 'PB54378' 'SV38190' 'CV24005'\n", + " 'EX28656' 'CF57022' 'GM16780' 'BX94438' 'RM41745' 'XR70252' 'YH92099'\n", + " 'SG81493' 'ZX23819' 'FJ54907' 'CU26127' 'YH60476' 'ZZ97035' 'GE82737'\n", + " 'KY21873' 'UA51318' 'BV55014' 'HX21307' 'LQ68252' 'CR92802' 'SL35268'\n", + " 'RD62882' 'JS42382' 'BT30554' 'VP57424' 'VU19243' 'TA82973' 'GK71720'\n", + " 'OQ61223' 'LL62746' 'JQ56711' 'AW77988' 'QP84605' 'MY97912' 'IB87349'\n", + " 'AW73065' 'BW80872' 'PX70175' 'KF75098' 'IS50283' 'MY64920' 'KN34250'\n", + " 'GN46207' 'KL57176' 'MN94234' 'JY90595' 'HK26543' 'PN86062' 'VW27730'\n", + " 'SH55671' 'MO56878' 'VO38365' 'SV35618' 'RX12347' 'FR55658' 'XS12556'\n", + " 'ZU73588' 'WT43034' 'VM13430' 'TC78849' 'VC34764' 'WO90953' 'IU47468'\n", + " 'KO46064' 'RB34917' 'BI38192' 'PU18983' 'SW79912' 'ES39217' 'KP72427'\n", + " 'UA19178' 'PR53785' 'XF57481' 'CN90378' 'KI56154' 'UI55951' 'FF28650'\n", + " 'FS55302' 'TN79487' 'HG32616' 'UK41984' 'LZ52266' 'PM27367' 'ZK21724'\n", + " 'BH35482' 'QE22757' 'ON77649' 'RN82884' 'CQ75652' 'FF58467' 'BS83666'\n", + " 'WO29605' 'TL77607' 'EZ50606' 'OS39723' 'FN69743' 'XW96958' 'TU92578'\n", + " 'TL43709' 'YE68736' 'OB96537' 'EU68825' 'CC31456' 'DJ77787' 'LN26837'\n", + " 'YI92916' 'NW54906' 'ME77513' 'UK76891' 'SI26888' 'YD74948' 'HB64268'\n", + " 'BW52697' 'NL41409' 'OD69005' 'ZZ91716' 'UK70255' 'QT25383' 'AW18068'\n", + " 'NS45347' 'FV19421' 'XW89091' 'YC11951' 'UY18770' 'RA49085' 'BG84194'\n", + " 'PT64580' 'MR67738' 'DM95829' 'DB75522' 'LM34525' 'WW30771' 'QP65569'\n", + " 'TN50051' 'UO86707' 'JA41698' 'NX18774' 'DA69469' 'CN23147' 'RA68844'\n", + " 'GH42026' 'BD16530' 'JH91579' 'WK23685' 'GR62267' 'PI78084' 'GF97874'\n", + " 'ZH19885' 'UK25655' 'QR45101' 'EL93539' 'EE99484' 'DP46882' 'WP41146'\n", + " 'TK60799' 'DN29808' 'SS59521' 'NG66579' 'TC14209' 'ED50963' 'GP40701'\n", + " 'CP98451' 'NX52648' 'ZC32510' 'NG27780' 'HN95240' 'EB59129' 'RA70851'\n", + " 'PM19162' 'MS59005' 'SU71163' 'BD35676' 'NI44621' 'EW33419' 'HX44948'\n", + " 'DL36983' 'XR87264' 'NN99001' 'XV95530' 'OL97871' 'HQ23708' 'WR63188'\n", + " 'NG82219' 'KU29408' 'RE46783' 'RU94434' 'GI82355' 'VO26340' 'NV61299'\n", + " 'DX31066' 'CY50337' 'TJ20375' 'EP72155' 'JJ76159' 'BG15419' 'AO74776'\n", + " 'HQ82233' 'OL72737' 'ZQ59828' 'NZ15548' 'XK61304' 'EJ44139' 'CM94425'\n", + " 'OV54878' 'JF57282' 'MY37953' 'XP64922' 'WL65572' 'LN50325' 'HJ15383'\n", + " 'KH59823' 'YM79169' 'DR38127' 'PU42145' 'KM33477' 'RI53167' 'OF77789'\n", + " 'YB33445' 'BA17836' 'JS43228' 'BB11622' 'HQ70429' 'WK88044' 'LA80525'\n", + " 'EH16250' 'PU41872' 'HB85743' 'MM71959' 'MB83663' 'KR43119' 'KH24214'\n", + " 'AC40767' 'HP55391' 'EG62398' 'VS19949' 'AM92343' 'GI68556' 'JT11876'\n", + " 'XR64251' 'MK34957' 'GP18756' 'AP23850' 'KQ65521' 'EJ19449' 'QB70027'\n", + " 'QW47320' 'KH64733' 'ON59472' 'HP94242' 'RV15398' 'EA25683' 'PW73754'\n", + " 'MC71942' 'OX72195' 'YQ99152' 'KI19439' 'PM76175' 'US45383' 'GT38956'\n", + " 'SN41301' 'BE62503' 'PA16884' 'NC58480' 'NS39326' 'PN18507' 'EK91340'\n", + " 'JY16280' 'ZW71731' 'ZC24631' 'YR34689' 'RT65829' 'BZ12077' 'WM65373'\n", + " 'NH35059' 'QD38160' 'BM15160' 'VY79030' 'EV19512' 'TE13577' 'WY97929'\n", + " 'YG20683' 'FK75497' 'NE60110' 'TN36521' 'HG33568' 'TW17878' 'ZO83562'\n", + " 'CH97539' 'CV29889' 'MO33320' 'QZ81258' 'NY56352' 'EA27048' 'UT38865'\n", + " 'QC89139' 'LA14484' 'HN57556' 'CV31235' 'WR45726' 'LB25094' 'KW56110'\n", + " 'XO36233' 'ZX86243' 'DW29763' 'CT83377' 'OQ90898' 'GO77248' 'QW33258'\n", + " 'OU79745' 'VZ79886' 'FI92440' 'YG85980' 'QM74621' 'EI71732' 'VN79010'\n", + " 'FI61723' 'OH55411' 'TF10720' 'NW30838' 'CB58476' 'WI69346' 'FS76657'\n", + " 'YX89016' 'PK28821' 'MB51200' 'XG44587' 'FG91922' 'OM99303' 'RV67546'\n", + " 'UJ79253' 'PN98247' 'IB67546' 'OE19087' 'CM95716' 'MW62634' 'QW67581'\n", + " 'SN16059' 'OE51254' 'RM42344' 'GB35238' 'ML82674' 'EI85244' 'DE28132'\n", + " 'TV25678' 'TY26512' 'OB69153' 'QZ77637' 'XN41715' 'QR15857' 'FL69363'\n", + " 'IS30295' 'WA25797' 'NL59519' 'ZU93025' 'DK94262' 'UQ30615' 'OR40060'\n", + " 'DK32872' 'FA46418' 'ER19995' 'KI75855' 'ND41876' 'PN21042' 'GJ43254'\n", + " 'AL46984' 'JP58047' 'ZE85014' 'KU88219' 'UU98729' 'WS82822' 'YB49933'\n", + " 'XC16387' 'XJ96748' 'TM98684' 'AY18433' 'DM74502' 'FT56968' 'OX36896'\n", + " 'BZ65376' 'LN34660' 'JC29295' 'KJ87930' 'XT36360' 'IX35050' 'UN97379'\n", + " 'MR57294' 'UG79499' 'UA50747' 'GL20444' 'SP58110' 'XM91635' 'TV82603'\n", + " 'BB82067' 'JP94676' 'VU53417' 'IW54795' 'RN78170' 'IX55883' 'XM72420'\n", + " 'GC15104' 'RX13282' 'QA85890' 'IR62668' 'AL96740' 'SS48498' 'PE39479'\n", + " 'JH62891' 'FI20423' 'PM13394' 'YV67971' 'QD31377' 'YG10247' 'FE73696'\n", + " 'SW19699' 'QJ40732' 'HM76207' 'NT59303' 'PU41393' 'QO86948' 'QN10888'\n", + " 'VY19543' 'XC15133' 'ST43550' 'FX36546' 'JX68983' 'HX78576' 'ZQ11381'\n", + " 'ON39271' 'SB18278' 'ZT30559' 'XI41106' 'ZS88847' 'RU49126' 'KR62797'\n", + " 'ZJ73220' 'FY62633' 'CU36986' 'WZ53904' 'AA71604' 'TD10493' 'LY97989'\n", + " 'VX39856' 'TP51897' 'QQ89253' 'EI91403' 'QG15435' 'FZ55002' 'HX77930'\n", + " 'UN37063' 'VB87946' 'AB60627' 'TA34903' 'AQ51368' 'NZ26102' 'GB45753'\n", + " 'BV79904' 'OB49075' 'DS97676' 'JO63462' 'NJ10602' 'RS24501' 'VT78274'\n", + " 'SU56153' 'MN20737' 'KL43114' 'YQ15567' 'TR88637' 'TC88986' 'XX88577'\n", + " 'NE49052' 'KX17826' 'CC91503' 'WH32183' 'ES90681' 'DW96592' 'MT23134'\n", + " 'BM69081' 'MB90871' 'QL77686' 'ON77827' 'KP18988' 'TI92884' 'JH73503'\n", + " 'YE97964' 'VA30351' 'PV55726' 'UC88305' 'TS53809' 'ZV32120' 'FB80807'\n", + " 'AS55677' 'WA15684' 'SA50567' 'KJ31611' 'VL37375' 'KN21017' 'PX44289'\n", + " 'AM97901' 'RE42925' 'TR81766' 'CH85057' 'UP71482' 'EG40670' 'HV83672'\n", + " 'MG10140' 'TC44716' 'QO65264' 'EB66698' 'OT52034' 'CH85444' 'PU85769'\n", + " 'UI73201' 'SL50592' 'XP11075' 'SI31236' 'JN26745' 'VK48036' 'JX76668'\n", + " 'DS45802' 'OA96690' 'EM27919' 'QO41043' 'OV50124' 'PR31642' 'BU41599'\n", + " 'TK30357' 'NF31087' 'NH16984' 'OS75493' 'VT63298' 'QS75550' 'SZ16483'\n", + " 'VM92311' 'NJ46849' 'WZ31900' 'RG30482' 'ZM86949' 'QQ39596' 'FH51383'\n", + " 'BJ53923' 'CZ96653' 'FB23788' 'NT43594' 'RJ85627' 'KJ86296' 'PI47776'\n", + " 'MD73554' 'UX92071' 'YG44474' 'UH45301' 'RY92647' 'IK12620' 'GQ66762'\n", + " 'YT69858' 'XD85577' 'TM65736' 'VJ51327' 'GS98873' 'CW49887' 'MY31220']\n", + "\n", + "\n", + "ST:\n", + "['Arizona' 'Nevada' 'California' 'Washington' 'Oregon' 'AZ' 'WA' 'Cali']\n", + "\n", + "\n", + "GENDER:\n", + "['F' 'M' 'Femal' 'Male' 'female']\n", + "\n", + "\n", + "Education:\n", + "['Bachelor' 'High School or Below' 'College' 'Master' 'Bachelors' 'Doctor']\n", + "\n", + "\n", + "Policy Type:\n", + "['Personal Auto' 'Corporate Auto' 'Special Auto']\n", + "\n", + "\n", + "Vehicle Class:\n", + "['Four-Door Car' 'Two-Door Car' 'SUV' 'Sports Car' 'Luxury Car'\n", + " 'Luxury SUV']\n", + "\n", + "\n" + ] + } + ], + "source": [ + "for i in categorical_columns:\n", + " print(f\"{i}:\\n{df[i].unique()}\\n\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "9cfb9eb5-1196-4956-a72d-5bce3d31e6b2", + "metadata": {}, + "source": [ + "the range of values for numerical columns" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "b9b55dcc-cea6-421b-8c10-370ba75177b0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Customer Lifetime Value': (200.44, 4479.55),\n", + " 'Income': (0.0, 99960.0),\n", + " 'Monthly Premium Auto': (61.0, 35354.0),\n", + " 'Number of Open Complaints': (0, 5),\n", + " 'Total Claim Amount': (0.382107, 2893.239678)}" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "numerical_columns = df.select_dtypes(include=['float64', 'int64']).columns\n", + "numerical_ranges = {col: (df[col].min(), df[col].max()) for col in numerical_columns}\n", + "numerical_ranges" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "aa55c98e-a9a7-42a9-9f14-889b97d4eecb", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer object\n", + "ST object\n", + "GENDER object\n", + "Education object\n", + "Customer Lifetime Value float64\n", + "Income float64\n", + "Monthly Premium Auto float64\n", + "Number of Open Complaints int64\n", + "Policy Type object\n", + "Vehicle Class object\n", + "Total Claim Amount float64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "\n", + "\n", + "print(df.dtypes)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "5cb155c6-af3c-4082-8fbc-5a1e441abc27", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Customer 952\n", + "ST 8\n", + "GENDER 5\n", + "Education 6\n", + "Customer Lifetime Value 918\n", + "Income 688\n", + "Monthly Premium Auto 128\n", + "Number of Open Complaints 6\n", + "Policy Type 3\n", + "Vehicle Class 6\n", + "Total Claim Amount 688\n", + "dtype: int64" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.nunique()\n", + "\n", + "#ST, GENDER, Education " + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "4f0b2465-5a67-44a5-b331-2e261d976d14", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 2, 4, 3, 5])" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Number of Open Complaints'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "5dee1d53-08fa-4f63-bd59-0c05d1ef2e4f", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "\n", + "df['ST'] = df['ST'].replace({\n", + " 'Arizona': 'AZ', \n", + " 'Nevada': 'NV', \n", + " 'California': 'CA', \n", + " 'Washington': 'WA', \n", + " 'Oregon': 'OR', \n", + " 'Cali': 'CA' # Standardiser les variantes\n", + "})\n", + "\n", + "\n", + "df['GENDER'] = df['GENDER'].replace({\n", + " 'Femal': 'F', \n", + " 'female': 'F', \n", + " 'Female': 'F', \n", + " 'Male': 'M', \n", + " 'male': 'M'\n", + "})\n", + "\n", + "\n", + "df['Education'] = df['Education'].replace({\n", + " 'Bachelor': 'Bachelors', \n", + " 'Bachelors': 'Bachelors', \n", + " 'High School or Below': 'High School', \n", + " 'Master': 'Masters'\n", + "})\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "80080e8a-341f-4a66-bdf2-7932983850d8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unique values in 'Customer': ['QZ44356' 'AI49188' 'WW63253' 'GA49547' 'OC83172' 'XZ87318' 'CF85061'\n", + " 'DY87989' 'BQ94931' 'SX51350' 'QK46697' 'HS14476' 'HD95276' 'YD87931'\n", + " 'NW21079' 'YB66933' 'OW15518' 'GP39118' 'SG20925' 'FM14335' 'HS28694'\n", + " 'LH92841' 'AZ95587' 'DS81757' 'OJ94107' 'LP84436' 'FF22360' 'LM19287'\n", + " 'ZU18643' 'AZ82578' 'XC67861' 'YC43143' 'EK59571' 'PA38372' 'RO18530'\n", + " 'PD27940' 'BS77946' 'YM50253' 'NR15332' 'RC62865' 'CC15295' 'KA61892'\n", + " 'OS94884' 'ND87334' 'OY51402' 'YL74911' 'GK92563' 'HL53154' 'RI78966'\n", + " 'IC13702' 'BE10809' 'HT87217' 'TH95618' 'TS19868' 'LP45550' 'QR87004'\n", + " 'OE75747' 'DX91392' 'AB72731' 'GX84338' 'IS12901' 'BN90616' 'HH90090'\n", + " 'IU25463' 'KC11055' 'PD33979' 'NK71023' 'AB13432' 'OZ97704' 'UF46533'\n", + " 'XP47431' 'GK73582' 'RV98763' 'II62831' 'XK33449' 'TR85083' 'EO95328'\n", + " 'EN21086' 'YL83902' 'AZ62651' 'ZW25874' 'EH41854' 'MW70227' 'SL22297'\n", + " 'RV14138' 'UO62808' 'ZX64745' 'FL34139' 'TS11219' 'XX12304' 'SD64087'\n", + " 'OY38576' 'BG76355' 'IP66913' 'LE95702' 'KX54357' 'EZ78112' 'XN16891'\n", + " 'XK31350' 'CC30924' 'IT78748' 'KY33386' 'CO44221' 'LK60013' 'DE21533'\n", + " 'YS94121' 'UK68427' 'TE49565' 'RA88421' 'KQ51983' 'CD88896' 'YV22553'\n", + " 'WU14435' 'XV84099' 'RI24911' 'KO26461' 'HI14283' 'PT50227' 'BH36570'\n", + " 'TX17484' 'CT41158' 'AO87348' 'DE55857' 'LF66923' 'CN24514' 'UW32074'\n", + " 'HP36979' 'PP40919' 'RO73268' 'HO61691' 'BS13062' 'FO35655' 'HR10526'\n", + " 'IA63417' 'BH35016' 'PK52952' 'OD76309' 'IL28481' 'GY55092' 'UF33451'\n", + " 'CF15558' 'JM62924' 'EM66435' 'QX45933' 'JI71369' 'JU93290' 'GU66096'\n", + " 'UC33108' 'LW93867' 'OU78470' 'XW90265' 'HS67749' 'VZ51506' 'UI64281'\n", + " 'AE98193' 'AZ74055' 'XS76911' 'AY40674' 'NA12740' 'UA84837' 'DJ51510'\n", + " 'VM58985' 'OH60605' 'UO98052' 'NC53424' 'LQ13873' 'LA97014' 'NB79936'\n", + " 'NT89061' 'AF10970' 'ZG48513' 'JQ59145' 'FE84989' 'JT52858' 'MC62068'\n", + " 'EU27538' 'RH42306' 'US23612' 'WV76014' 'RK96223' 'MF82000' 'FM46980'\n", + " 'SY56792' 'RF61565' 'IM94808' 'VI14730' 'YR34119' 'RR77985' 'QD28391'\n", + " 'WV17090' 'TM23514' 'MQ68407' 'GJ59592' 'FY56083' 'UA94723' 'FW91032'\n", + " 'DE34457' 'HD32044' 'HH30454' 'AH84063' 'QA17596' 'XI41052' 'DI30528'\n", + " 'SC66359' 'EN61670' 'DQ10761' 'BQ51587' 'JE21522' 'WS47147' 'ZA64638'\n", + " 'EW38459' 'QW87316' 'IC43478' 'TE34064' 'WU60905' 'YM18992' 'PD55753'\n", + " 'KU56006' 'MJ69973' 'TW43626' 'XX84133' 'ZW84453' 'HO29524' 'VE89726'\n", + " 'GE87503' 'PX90263' 'NI17718' 'FY32213' 'RZ13254' 'GN45013' 'NM39588'\n", + " 'KU84464' 'YH43527' 'RO30676' 'QL59704' 'QH19450' 'SA54664' 'CI38330'\n", + " 'WB38524' 'CE56187' 'JL19416' 'JZ61422' 'LA13377' 'NC99948' 'QD34785'\n", + " 'RO26085' 'ES57969' 'JK55587' 'RN97635' 'BI76326' 'JA34909' 'OJ90342'\n", + " 'CM88932' 'JJ97525' 'XV21647' 'MC83487' 'BL90769' 'CR57148' 'CP85232'\n", + " 'YL74732' 'FG16766' 'NV55438' 'RM10880' 'GL56175' 'UK52289' 'OT85112'\n", + " 'BC62782' 'TI19722' 'JP30654' 'UM45563' 'EN60878' 'JF36291' 'BK59444'\n", + " 'MK70700' 'IW71076' 'AP98768' 'OM24164' 'HR85211' 'VC87846' 'ZM92052'\n", + " 'ON73702' 'QQ90441' 'HU35721' 'YP47665' 'FU99476' 'AG85615' 'OY74069'\n", + " 'DJ91267' 'KB72438' 'TR67616' 'GF65731' 'HB67642' 'DP84567' 'VV77534'\n", + " 'GL67540' 'SV50502' 'UK59698' 'OA57352' 'ZF84449' 'AX86150' 'HG39060'\n", + " 'EM29359' 'SF57173' 'OT47603' 'SW31412' 'JS36322' 'RE81445' 'RM24280'\n", + " 'LC25393' 'UX38930' 'HD95496' 'RX24650' 'DW19309' 'MT41386' 'WZ40465'\n", + " 'DB42794' 'JB50798' 'IP69763' 'TE35785' 'HX74855' 'QN65180' 'GE47180'\n", + " 'VQ38776' 'BH86846' 'IN17648' 'DF95759' 'QG45324' 'MN61620' 'YH86390'\n", + " 'FY13480' 'YH61661' 'NL93182' 'WE68644' 'EZ30498' 'QY74517' 'NM88660'\n", + " 'MZ82036' 'ID20929' 'EY50028' 'TT82373' 'OH64088' 'SK97780' 'IO33050'\n", + " 'XA55917' 'JK32620' 'RQ19236' 'QC47433' 'RA93608' 'XH97711' 'AU96286'\n", + " 'KC17170' 'ZN47335' 'EI46264' 'EK87864' 'GV45403' 'QK31192' 'LU89008'\n", + " 'NS10490' 'KL98495' 'IU96845' 'QL93655' 'PF40592' 'LZ34046' 'JC80093'\n", + " 'YE88490' 'YC80498' 'AI85843' 'XD66024' 'FY51713' 'PH26378' 'WQ18638'\n", + " 'KY14688' 'TC97762' 'QC87108' 'CX12134' 'SM73248' 'CK19789' 'UV12583'\n", + " 'JC11405' 'KA89683' 'BG85305' 'UQ87917' 'XN11823' 'OS46571' 'PX17116'\n", + " 'RP19541' 'ZR25747' 'NQ86532' 'JY27336' 'PB54378' 'SV38190' 'CV24005'\n", + " 'EX28656' 'CF57022' 'GM16780' 'BX94438' 'RM41745' 'XR70252' 'YH92099'\n", + " 'SG81493' 'ZX23819' 'FJ54907' 'CU26127' 'YH60476' 'ZZ97035' 'GE82737'\n", + " 'KY21873' 'UA51318' 'BV55014' 'HX21307' 'LQ68252' 'CR92802' 'SL35268'\n", + " 'RD62882' 'JS42382' 'BT30554' 'VP57424' 'VU19243' 'TA82973' 'GK71720'\n", + " 'OQ61223' 'LL62746' 'JQ56711' 'AW77988' 'QP84605' 'MY97912' 'IB87349'\n", + " 'AW73065' 'BW80872' 'PX70175' 'KF75098' 'IS50283' 'MY64920' 'KN34250'\n", + " 'GN46207' 'KL57176' 'MN94234' 'JY90595' 'HK26543' 'PN86062' 'VW27730'\n", + " 'SH55671' 'MO56878' 'VO38365' 'SV35618' 'RX12347' 'FR55658' 'XS12556'\n", + " 'ZU73588' 'WT43034' 'VM13430' 'TC78849' 'VC34764' 'WO90953' 'IU47468'\n", + " 'KO46064' 'RB34917' 'BI38192' 'PU18983' 'SW79912' 'ES39217' 'KP72427'\n", + " 'UA19178' 'PR53785' 'XF57481' 'CN90378' 'KI56154' 'UI55951' 'FF28650'\n", + " 'FS55302' 'TN79487' 'HG32616' 'UK41984' 'LZ52266' 'PM27367' 'ZK21724'\n", + " 'BH35482' 'QE22757' 'ON77649' 'RN82884' 'CQ75652' 'FF58467' 'BS83666'\n", + " 'WO29605' 'TL77607' 'EZ50606' 'OS39723' 'FN69743' 'XW96958' 'TU92578'\n", + " 'TL43709' 'YE68736' 'OB96537' 'EU68825' 'CC31456' 'DJ77787' 'LN26837'\n", + " 'YI92916' 'NW54906' 'ME77513' 'UK76891' 'SI26888' 'YD74948' 'HB64268'\n", + " 'BW52697' 'NL41409' 'OD69005' 'ZZ91716' 'UK70255' 'QT25383' 'AW18068'\n", + " 'NS45347' 'FV19421' 'XW89091' 'YC11951' 'UY18770' 'RA49085' 'BG84194'\n", + " 'PT64580' 'MR67738' 'DM95829' 'DB75522' 'LM34525' 'WW30771' 'QP65569'\n", + " 'TN50051' 'UO86707' 'JA41698' 'NX18774' 'DA69469' 'CN23147' 'RA68844'\n", + " 'GH42026' 'BD16530' 'JH91579' 'WK23685' 'GR62267' 'PI78084' 'GF97874'\n", + " 'ZH19885' 'UK25655' 'QR45101' 'EL93539' 'EE99484' 'DP46882' 'WP41146'\n", + " 'TK60799' 'DN29808' 'SS59521' 'NG66579' 'TC14209' 'ED50963' 'GP40701'\n", + " 'CP98451' 'NX52648' 'ZC32510' 'NG27780' 'HN95240' 'EB59129' 'RA70851'\n", + " 'PM19162' 'MS59005' 'SU71163' 'BD35676' 'NI44621' 'EW33419' 'HX44948'\n", + " 'DL36983' 'XR87264' 'NN99001' 'XV95530' 'OL97871' 'HQ23708' 'WR63188'\n", + " 'NG82219' 'KU29408' 'RE46783' 'RU94434' 'GI82355' 'VO26340' 'NV61299'\n", + " 'DX31066' 'CY50337' 'TJ20375' 'EP72155' 'JJ76159' 'BG15419' 'AO74776'\n", + " 'HQ82233' 'OL72737' 'ZQ59828' 'NZ15548' 'XK61304' 'EJ44139' 'CM94425'\n", + " 'OV54878' 'JF57282' 'MY37953' 'XP64922' 'WL65572' 'LN50325' 'HJ15383'\n", + " 'KH59823' 'YM79169' 'DR38127' 'PU42145' 'KM33477' 'RI53167' 'OF77789'\n", + " 'YB33445' 'BA17836' 'JS43228' 'BB11622' 'HQ70429' 'WK88044' 'LA80525'\n", + " 'EH16250' 'PU41872' 'HB85743' 'MM71959' 'MB83663' 'KR43119' 'KH24214'\n", + " 'AC40767' 'HP55391' 'EG62398' 'VS19949' 'AM92343' 'GI68556' 'JT11876'\n", + " 'XR64251' 'MK34957' 'GP18756' 'AP23850' 'KQ65521' 'EJ19449' 'QB70027'\n", + " 'QW47320' 'KH64733' 'ON59472' 'HP94242' 'RV15398' 'EA25683' 'PW73754'\n", + " 'MC71942' 'OX72195' 'YQ99152' 'KI19439' 'PM76175' 'US45383' 'GT38956'\n", + " 'SN41301' 'BE62503' 'PA16884' 'NC58480' 'NS39326' 'PN18507' 'EK91340'\n", + " 'JY16280' 'ZW71731' 'ZC24631' 'YR34689' 'RT65829' 'BZ12077' 'WM65373'\n", + " 'NH35059' 'QD38160' 'BM15160' 'VY79030' 'EV19512' 'TE13577' 'WY97929'\n", + " 'YG20683' 'FK75497' 'NE60110' 'TN36521' 'HG33568' 'TW17878' 'ZO83562'\n", + " 'CH97539' 'CV29889' 'MO33320' 'QZ81258' 'NY56352' 'EA27048' 'UT38865'\n", + " 'QC89139' 'LA14484' 'HN57556' 'CV31235' 'WR45726' 'LB25094' 'KW56110'\n", + " 'XO36233' 'ZX86243' 'DW29763' 'CT83377' 'OQ90898' 'GO77248' 'QW33258'\n", + " 'OU79745' 'VZ79886' 'FI92440' 'YG85980' 'QM74621' 'EI71732' 'VN79010'\n", + " 'FI61723' 'OH55411' 'TF10720' 'NW30838' 'CB58476' 'WI69346' 'FS76657'\n", + " 'YX89016' 'PK28821' 'MB51200' 'XG44587' 'FG91922' 'OM99303' 'RV67546'\n", + " 'UJ79253' 'PN98247' 'IB67546' 'OE19087' 'CM95716' 'MW62634' 'QW67581'\n", + " 'SN16059' 'OE51254' 'RM42344' 'GB35238' 'ML82674' 'EI85244' 'DE28132'\n", + " 'TV25678' 'TY26512' 'OB69153' 'QZ77637' 'XN41715' 'QR15857' 'FL69363'\n", + " 'IS30295' 'WA25797' 'NL59519' 'ZU93025' 'DK94262' 'UQ30615' 'OR40060'\n", + " 'DK32872' 'FA46418' 'ER19995' 'KI75855' 'ND41876' 'PN21042' 'GJ43254'\n", + " 'AL46984' 'JP58047' 'ZE85014' 'KU88219' 'UU98729' 'WS82822' 'YB49933'\n", + " 'XC16387' 'XJ96748' 'TM98684' 'AY18433' 'DM74502' 'FT56968' 'OX36896'\n", + " 'BZ65376' 'LN34660' 'JC29295' 'KJ87930' 'XT36360' 'IX35050' 'UN97379'\n", + " 'MR57294' 'UG79499' 'UA50747' 'GL20444' 'SP58110' 'XM91635' 'TV82603'\n", + " 'BB82067' 'JP94676' 'VU53417' 'IW54795' 'RN78170' 'IX55883' 'XM72420'\n", + " 'GC15104' 'RX13282' 'QA85890' 'IR62668' 'AL96740' 'SS48498' 'PE39479'\n", + " 'JH62891' 'FI20423' 'PM13394' 'YV67971' 'QD31377' 'YG10247' 'FE73696'\n", + " 'SW19699' 'QJ40732' 'HM76207' 'NT59303' 'PU41393' 'QO86948' 'QN10888'\n", + " 'VY19543' 'XC15133' 'ST43550' 'FX36546' 'JX68983' 'HX78576' 'ZQ11381'\n", + " 'ON39271' 'SB18278' 'ZT30559' 'XI41106' 'ZS88847' 'RU49126' 'KR62797'\n", + " 'ZJ73220' 'FY62633' 'CU36986' 'WZ53904' 'AA71604' 'TD10493' 'LY97989'\n", + " 'VX39856' 'TP51897' 'QQ89253' 'EI91403' 'QG15435' 'FZ55002' 'HX77930'\n", + " 'UN37063' 'VB87946' 'AB60627' 'TA34903' 'AQ51368' 'NZ26102' 'GB45753'\n", + " 'BV79904' 'OB49075' 'DS97676' 'JO63462' 'NJ10602' 'RS24501' 'VT78274'\n", + " 'SU56153' 'MN20737' 'KL43114' 'YQ15567' 'TR88637' 'TC88986' 'XX88577'\n", + " 'NE49052' 'KX17826' 'CC91503' 'WH32183' 'ES90681' 'DW96592' 'MT23134'\n", + " 'BM69081' 'MB90871' 'QL77686' 'ON77827' 'KP18988' 'TI92884' 'JH73503'\n", + " 'YE97964' 'VA30351' 'PV55726' 'UC88305' 'TS53809' 'ZV32120' 'FB80807'\n", + " 'AS55677' 'WA15684' 'SA50567' 'KJ31611' 'VL37375' 'KN21017' 'PX44289'\n", + " 'AM97901' 'RE42925' 'TR81766' 'CH85057' 'UP71482' 'EG40670' 'HV83672'\n", + " 'MG10140' 'TC44716' 'QO65264' 'EB66698' 'OT52034' 'CH85444' 'PU85769'\n", + " 'UI73201' 'SL50592' 'XP11075' 'SI31236' 'JN26745' 'VK48036' 'JX76668'\n", + " 'DS45802' 'OA96690' 'EM27919' 'QO41043' 'OV50124' 'PR31642' 'BU41599'\n", + " 'TK30357' 'NF31087' 'NH16984' 'OS75493' 'VT63298' 'QS75550' 'SZ16483'\n", + " 'VM92311' 'NJ46849' 'WZ31900' 'RG30482' 'ZM86949' 'QQ39596' 'FH51383'\n", + " 'BJ53923' 'CZ96653' 'FB23788' 'NT43594' 'RJ85627' 'KJ86296' 'PI47776'\n", + " 'MD73554' 'UX92071' 'YG44474' 'UH45301' 'RY92647' 'IK12620' 'GQ66762'\n", + " 'YT69858' 'XD85577' 'TM65736' 'VJ51327' 'GS98873' 'CW49887' 'MY31220']\n", + "Unique values in 'ST': ['AZ' 'NV' 'CA' 'WA' 'OR']\n", + "Unique values in 'GENDER': ['F' 'M']\n", + "Unique values in 'Education': ['Bachelors' 'High School' 'College' 'Masters' 'Doctor']\n", + "Unique values in 'Policy Type': ['Personal Auto' 'Corporate Auto' 'Special Auto']\n", + "Unique values in 'Vehicle Class': ['Four-Door Car' 'Two-Door Car' 'SUV' 'Sports Car' 'Luxury Car'\n", + " 'Luxury SUV']\n" + ] + } + ], + "source": [ + "categorical_columns = ['Customer', 'ST', 'GENDER', 'Education', 'Policy Type', 'Vehicle Class']\n", + "for column in categorical_columns:\n", + " unique_values = df[column].unique()\n", + " print(f\"Unique values in '{column}': {unique_values}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "605b8a8b-b755-4c11-b01e-83b066ea341d", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Summary Statistics for Numerical Columns:\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "count 952.000000 952.000000 952.000000 \n", + "mean 794.201187 39557.350840 205.231092 \n", + "std 625.879148 30744.561851 1697.752504 \n", + "min 200.440000 0.000000 61.000000 \n", + "25% 411.785000 13087.750000 68.000000 \n", + "50% 588.575000 36632.000000 83.000000 \n", + "75% 903.750000 65085.500000 110.000000 \n", + "max 4479.550000 99960.000000 35354.000000 \n", + "\n", + " Number of Open Complaints Total Claim Amount \n", + "count 952.000000 952.000000 \n", + "mean 0.378151 404.999156 \n", + "std 0.872446 299.238726 \n", + "min 0.000000 0.382107 \n", + "25% 0.000000 196.724587 \n", + "50% 0.000000 350.400000 \n", + "75% 0.000000 534.000000 \n", + "max 5.000000 2893.239678 \n" + ] + } + ], + "source": [ + "summary_statistics = df[numerical_columns].describe()\n", + "print(\"Summary Statistics for Numerical Columns:\")\n", + "print(summary_statistics)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "dbc53b33-12a4-4fea-868d-9d567e56a533", + "metadata": {}, + "outputs": [], + "source": [ + "#Conclusion:\n", + "#Numerical Columns and Unique Values: \n", + "#the range of Income, 99960.000000 is high \n", + "\n", + "#Categorical Columns and Unique Values: \n", + "#Policy Type: Categorical values are Auto, Home, and Life.\n", + "# 6 different Vehicle Classes \n" + ] + }, + { + "cell_type": "markdown", + "id": "71ea37ed-f264-4675-899d-5d08ced5f7e4", + "metadata": {}, + "source": [ + "- Compute summary statistics such as mean, median, mode, standard deviation, and quartiles to understand the central tendency and distribution of the data for numerical columns. You should also provide your conclusions based on these summary statistics.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "a67e48db-aefd-4311-b793-6124ac61154f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Customer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsTotal Claim Amount
count952.000000952.000000952.000000952.000000952.000000
mean794.20118739557.350840205.2310920.378151404.999156
std625.87914830744.5618511697.7525040.872446299.238726
min200.4400000.00000061.0000000.0000000.382107
25%411.78500013087.75000068.0000000.000000196.724587
50%588.57500036632.00000083.0000000.000000350.400000
75%903.75000065085.500000110.0000000.000000534.000000
max4479.55000099960.00000035354.0000005.0000002893.239678
\n", + "
" + ], + "text/plain": [ + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "count 952.000000 952.000000 952.000000 \n", + "mean 794.201187 39557.350840 205.231092 \n", + "std 625.879148 30744.561851 1697.752504 \n", + "min 200.440000 0.000000 61.000000 \n", + "25% 411.785000 13087.750000 68.000000 \n", + "50% 588.575000 36632.000000 83.000000 \n", + "75% 903.750000 65085.500000 110.000000 \n", + "max 4479.550000 99960.000000 35354.000000 \n", + "\n", + " Number of Open Complaints Total Claim Amount \n", + "count 952.000000 952.000000 \n", + "mean 0.378151 404.999156 \n", + "std 0.872446 299.238726 \n", + "min 0.000000 0.382107 \n", + "25% 0.000000 196.724587 \n", + "50% 0.000000 350.400000 \n", + "75% 0.000000 534.000000 \n", + "max 5.000000 2893.239678 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "ef6b10ff-3dcc-416d-bd86-7eb920bffc08", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer:\n", + "count 952\n", + "unique 952\n", + "top QZ44356\n", + "freq 1\n", + "Name: Customer, dtype: object\n", + "\n", + "ST:\n", + "count 952\n", + "unique 5\n", + "top CA\n", + "freq 293\n", + "Name: ST, dtype: object\n", + "\n", + "GENDER:\n", + "count 952\n", + "unique 2\n", + "top F\n", + "freq 501\n", + "Name: GENDER, dtype: object\n", + "\n", + "Education:\n", + "count 952\n", + "unique 5\n", + "top Bachelors\n", + "freq 291\n", + "Name: Education, dtype: object\n", + "\n", + "Policy Type:\n", + "count 952\n", + "unique 3\n", + "top Personal Auto\n", + "freq 689\n", + "Name: Policy Type, dtype: object\n", + "\n", + "Vehicle Class:\n", + "count 952\n", + "unique 6\n", + "top Four-Door Car\n", + "freq 506\n", + "Name: Vehicle Class, dtype: object\n", + "\n" + ] + } + ], + "source": [ + "\n", + "for i in df.select_dtypes(include=['object']).columns:\n", + " print(f\"{i}:\\n{df[i].describe()}\\n\")\n", + "\n", + "#there is a lot of nan values" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "ff0d32b9-f897-4f97-b472-d5da35281bcd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer:\n", + "count 952\n", + "unique 952\n", + "top QZ44356\n", + "freq 1\n", + "Name: Customer, dtype: object\n", + "\n", + "ST:\n", + "count 952\n", + "unique 5\n", + "top CA\n", + "freq 293\n", + "Name: ST, dtype: object\n", + "\n", + "GENDER:\n", + "count 952\n", + "unique 2\n", + "top F\n", + "freq 501\n", + "Name: GENDER, dtype: object\n", + "\n", + "Education:\n", + "count 952\n", + "unique 5\n", + "top Bachelors\n", + "freq 291\n", + "Name: Education, dtype: object\n", + "\n", + "Policy Type:\n", + "count 952\n", + "unique 3\n", + "top Personal Auto\n", + "freq 689\n", + "Name: Policy Type, dtype: object\n", + "\n", + "Vehicle Class:\n", + "count 952\n", + "unique 6\n", + "top Four-Door Car\n", + "freq 506\n", + "Name: Vehicle Class, dtype: object\n", + "\n" + ] + } + ], + "source": [ + "\n", + "for i in df.select_dtypes(include=['object']).columns:\n", + " print(f\"{i}:\\n{df[i].describe()}\\n\")\n", + "\n", + "#there is a lot of nan values" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "ba638bbc-61da-49fc-81c0-710630e3d194", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
1QZ44356AZFBachelors697.950.094.00Personal AutoFour-Door Car1131.464935
2AI49188NVFBachelors1288.7448767.0108.00Personal AutoTwo-Door Car566.472247
3WW63253CAMBachelors764.590.0106.00Corporate AutoSUV529.881344
4GA49547WAMHigh School536.3136357.068.00Personal AutoFour-Door Car17.269323
5OC83172ORFBachelors825.6362902.069.00Personal AutoTwo-Door Car159.383042
....................................
1066TM65736ORMMasters305.9638644.078.01Personal AutoFour-Door Car361.455219
1067VJ51327CAFHigh School2031.5063209.0102.02Personal AutoSUV207.320041
1068GS98873AZFBachelors323.9116061.088.00Personal AutoFour-Door Car633.600000
1069CW49887CAFMasters462.6879487.0114.00Special AutoSUV547.200000
1070MY31220CAFCollege899.7054230.0112.00Personal AutoTwo-Door Car537.600000
\n", + "

952 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value Income \\\n", + "1 QZ44356 AZ F Bachelors 697.95 0.0 \n", + "2 AI49188 NV F Bachelors 1288.74 48767.0 \n", + "3 WW63253 CA M Bachelors 764.59 0.0 \n", + "4 GA49547 WA M High School 536.31 36357.0 \n", + "5 OC83172 OR F Bachelors 825.63 62902.0 \n", + "... ... .. ... ... ... ... \n", + "1066 TM65736 OR M Masters 305.96 38644.0 \n", + "1067 VJ51327 CA F High School 2031.50 63209.0 \n", + "1068 GS98873 AZ F Bachelors 323.91 16061.0 \n", + "1069 CW49887 CA F Masters 462.68 79487.0 \n", + "1070 MY31220 CA F College 899.70 54230.0 \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "1 94.0 0 Personal Auto \n", + "2 108.0 0 Personal Auto \n", + "3 106.0 0 Corporate Auto \n", + "4 68.0 0 Personal Auto \n", + "5 69.0 0 Personal Auto \n", + "... ... ... ... \n", + "1066 78.0 1 Personal Auto \n", + "1067 102.0 2 Personal Auto \n", + "1068 88.0 0 Personal Auto \n", + "1069 114.0 0 Special Auto \n", + "1070 112.0 0 Personal Auto \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "3 SUV 529.881344 \n", + "4 Four-Door Car 17.269323 \n", + "5 Two-Door Car 159.383042 \n", + "... ... ... \n", + "1066 Four-Door Car 361.455219 \n", + "1067 SUV 207.320041 \n", + "1068 Four-Door Car 633.600000 \n", + "1069 SUV 547.200000 \n", + "1070 Two-Door Car 537.600000 \n", + "\n", + "[952 rows x 11 columns]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_cleaned = df.dropna()\n", + "df_cleaned" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "f6ed27cb-bf40-4ce0-b29b-718e248abb79", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Customer 0\n", + "ST 0\n", + "GENDER 0\n", + "Education 0\n", + "Customer Lifetime Value 0\n", + "Income 0\n", + "Monthly Premium Auto 0\n", + "Number of Open Complaints 0\n", + "Policy Type 0\n", + "Vehicle Class 0\n", + "Total Claim Amount 0\n", + "dtype: int64" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()\n" + ] + }, + { + "cell_type": "markdown", + "id": "957c7f80-9179-45bf-b444-0a5c1d20e39c", + "metadata": {}, + "source": [ + "- Compute summary statistics for categorical columns and providing your conclusions based on these statistics." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "848c15aa-314e-4fee-960e-ee58d5d97f2c", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Customer ST GENDER Education Policy Type Vehicle Class\n", + "count 952 952 952 952 952 952\n", + "unique 952 5 2 5 3 6\n", + "top QZ44356 CA F Bachelors Personal Auto Four-Door Car\n", + "freq 1 293 501 291 689 506\n" + ] + } + ], + "source": [ + "categorical_summary = df.describe(include=['object'])\n", + "print(categorical_summary)" + ] + }, + { + "cell_type": "markdown", + "id": "4a703890-63db-4944-b7ab-95a4f8185120", + "metadata": {}, + "source": [ + "## Challenge 2: analyzing the data" + ] + }, + { + "cell_type": "markdown", + "id": "0776a403-c56a-452f-ac33-5fd4fdb06fc7", + "metadata": {}, + "source": [ + "### Exercise 1" + ] + }, + { + "cell_type": "markdown", + "id": "eedbc484-da4d-4f9c-9343-e1d44311a87e", + "metadata": {}, + "source": [ + "The marketing team wants to know the top 5 less common customer locations. Create a pandas Series object that contains the customer locations and their frequencies, and then retrieve the top 5 less common locations in ascending order." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "2dca5073-4520-4f42-9390-4b92733284ed", + "metadata": {}, + "outputs": [], + "source": [ + "df['ST'] = df['ST'].replace({\n", + " 'Washington': 'WA', # Replace full name with abbreviation\n", + " 'Cali': 'CA', \n", + " 'California':'CA', \n", + " 'Arizona':'AZ',\n", + " 'Oregon':'OR',\n", + " 'Nevada':'NV'\n", + " \n", + "})\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "89bb2442-4a84-4915-be49-943f931ccb27", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['AZ', 'NV', 'CA', 'WA', 'OR'], dtype=object)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['ST'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "79d0aadd-9279-472a-a0b9-17c38863992d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
ST
AZ188188188188188188188188188188
CA293293293293293293293293293293
NV89898989898989898989
OR276276276276276276276276276276
WA106106106106106106106106106106
\n", + "
" + ], + "text/plain": [ + " Customer GENDER Education Customer Lifetime Value Income \\\n", + "ST \n", + "AZ 188 188 188 188 188 \n", + "CA 293 293 293 293 293 \n", + "NV 89 89 89 89 89 \n", + "OR 276 276 276 276 276 \n", + "WA 106 106 106 106 106 \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "ST \n", + "AZ 188 188 188 \n", + "CA 293 293 293 \n", + "NV 89 89 89 \n", + "OR 276 276 276 \n", + "WA 106 106 106 \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "ST \n", + "AZ 188 188 \n", + "CA 293 293 \n", + "NV 89 89 \n", + "OR 276 276 \n", + "WA 106 106 " + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('ST').count()\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "6c62d985-e67e-4190-9f45-26d6aa7ac887", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ST\n", + "AZ 188\n", + "CA 293\n", + "NV 89\n", + "OR 276\n", + "WA 106\n", + "dtype: int64" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "location_counts = df.groupby('ST').size()\n", + "location_counts " + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "c955588d-0c5e-4ad2-b397-970413297d1f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ST\n", + "NV 89\n", + "WA 106\n", + "AZ 188\n", + "OR 276\n", + "CA 293\n", + "dtype: int64" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sorted_locations = location_counts.sort_values(ascending=True)\n", + "sorted_locations" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "a1697b84-d1a4-4ac2-9ca3-2cf6d1f696fc", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "top_5_less_common_locations = sorted_locations.head(5)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "e6a1575f-664e-4d09-9419-29b7a162eb91", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ST\n", + "NV 89\n", + "WA 106\n", + "AZ 188\n", + "OR 276\n", + "CA 293\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "print(top_5_less_common_locations)" + ] + }, + { + "cell_type": "markdown", + "id": "0ce80f43-4afa-43c7-a78a-c917444da4e0", + "metadata": {}, + "source": [ + "### Exercise 2\n", + "\n", + "The sales team wants to know the total number of policies sold for each type of policy. Create a pandas Series object that contains the policy types and their total number of policies sold, and then retrieve the policy type with the highest number of policies sold." + ] + }, + { + "cell_type": "markdown", + "id": "a9f13997-1555-4f98-aca6-970fda1d2c3f", + "metadata": {}, + "source": [ + "*Hint:*\n", + "- *Using value_counts() method simplifies this analysis.*\n", + "- *Futhermore, there is a method that returns the index of the maximum value in a column or row.*\n" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "a8fe7df7-2397-4e86-a238-401e63336886", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value',\n", + " 'Income', 'Monthly Premium Auto', 'Number of Open Complaints',\n", + " 'Policy Type', 'Vehicle Class', 'Total Claim Amount'],\n", + " dtype='object')" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns = df.columns.str.strip() # This will remove any leading or trailing spaces\n", + "df.columns " + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "bcfad6c1-9af2-4b0b-9aa9-0dc5c17473c0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total number of policies sold for each type of policy:\n", + "Policy Type\n", + "Personal Auto 689\n", + "Corporate Auto 211\n", + "Special Auto 52\n", + "Name: count, dtype: int64\n", + "\n", + "The policy type with the highest number of policies sold is 'Personal Auto' with 689 policies sold.\n" + ] + } + ], + "source": [ + "\n", + "policy_counts = df['Policy Type'].value_counts()\n", + "\n", + "# Step 2: Create a Series object from the counts (already a Series from value_counts)\n", + "# (This step is just illustrative as policy_counts is already a Series)\n", + "\n", + "# Step 3: Retrieve the policy type with the highest number of policies sold\n", + "highest_policy_type = policy_counts.idxmax() # Get the policy type name with max policies\n", + "highest_count = policy_counts.max() # Get the count of that policy type\n", + "\n", + "# Print the results\n", + "print(\"Total number of policies sold for each type of policy:\")\n", + "print(policy_counts)\n", + "print(f\"\\nThe policy type with the highest number of policies sold is '{highest_policy_type}' with {highest_count} policies sold.\")" + ] + }, + { + "cell_type": "markdown", + "id": "0b863fd3-bf91-4d5d-86eb-be29ed9f5b70", + "metadata": {}, + "source": [ + "### Exercise 3\n", + "\n", + "The sales team wants to know if customers with Personal Auto have a lower income than those with Corporate Auto. How does the average income compare between the two policy types?" + ] + }, + { + "cell_type": "markdown", + "id": "b1386d75-2810-4aa1-93e0-9485aa12d552", + "metadata": {}, + "source": [ + "- Use *loc* to create two dataframes: one containing only Personal Auto policies and one containing only Corporate Auto policies.\n", + "- Calculate the average income for each policy.\n", + "- Print the results." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "0c0563cf-6f8b-463d-a321-651a972f82e5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average income for Personal Auto policies: 38180.69871794872\n", + "Average income for Corporate Auto policies: 41390.31196581197\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.read_csv('https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv')\n", + "\n", + "personal_auto_df = df.loc[df['Policy Type'] == 'Personal Auto']\n", + "corporate_auto_df = df.loc[df['Policy Type'] == 'Corporate Auto']\n", + "\n", + "average_income_personal = personal_auto_df['Income'].mean()\n", + "average_income_corporate = corporate_auto_df['Income'].mean()\n", + "\n", + "print(f\"Average income for Personal Auto policies: {average_income_personal}\")\n", + "print(f\"Average income for Corporate Auto policies: {average_income_corporate}\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37e400d6-9d7d-458a-8d4d-07b8a30e709b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "80b16c27-f4a5-4727-a229-1f88671cf4e2", + "metadata": {}, + "source": [ + "### Bonus: Exercise 4\n" + ] + }, + { + "cell_type": "markdown", + "id": "ac584986-299b-475f-ac2e-928c16c3f512", + "metadata": {}, + "source": [ + "Your goal is to identify customers with a high policy claim amount.\n", + "\n", + "Instructions:\n", + "\n", + "- Review again the statistics for total claim amount to gain an understanding of the data.\n", + "- To identify potential areas for improving customer retention and profitability, we want to focus on customers with a high policy claim amount. Consider customers with a high policy claim amount to be those in the top 25% of the total claim amount. Create a pandas DataFrame object that contains information about customers with a policy claim amount greater than the 75th percentile.\n", + "- Use DataFrame methods to calculate summary statistics about the high policy claim amount data. " + ] + }, + { + "cell_type": "markdown", + "id": "4e3af5f1-6023-4b05-9c01-d05392daa650", + "metadata": {}, + "source": [ + "*Note: When analyzing data, we often want to focus on certain groups of values to gain insights. Percentiles are a useful tool to help us define these groups. A percentile is a measure that tells us what percentage of values in a dataset are below a certain value. For example, the 75th percentile represents the value below which 75% of the data falls. Similarly, the 25th percentile represents the value below which 25% of the data falls. When we talk about the top 25%, we are referring to the values that fall above the 75th percentile, which represent the top quarter of the data. On the other hand, when we talk about the bottom 25%, we are referring to the values that fall below the 25th percentile, which represent the bottom quarter of the data. By focusing on these groups, we can identify patterns and trends that may be useful for making decisions and taking action.*\n", + "\n", + "*Hint: look for a method that gives you the percentile or quantile 0.75 and 0.25 for a Pandas Series.*" + ] + }, + { + "cell_type": "markdown", + "id": "2d234634-50bd-41e0-88f7-d5ba684455d1", + "metadata": {}, + "source": [ + "*Hint 2: check `Boolean selection according to the values of a single column` in https://towardsdatascience.com/filtering-data-frames-in-pandas-b570b1f834b9*" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "43cde443-7e8c-4a2a-bd10-5c5e2a9aad83", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "532.8" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "percentile_75 = df['Total Claim Amount'].quantile(0.75)\n", + "percentile_75" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "18c31ba6-d3cd-463d-8c24-3c785e9607ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
17OE15005CaliNaNCollege394524.16%28855.0101.01/0/00Personal AutoSUV647.442031
23TZ98966NevadaNaNBachelor245019.10%0.073.01/3/00Corporate AutoFour-Door Car554.376763
26US89481CaliforniaNaNBachelor394637.21%0.0111.01/0/00Personal AutoFour-Door Car799.200000
....................................
1059YG44474OregonMCollege1401472.13%54193.0117.01/0/00Corporate AutoSUV720.752945
1061RY92647CaliFBachelor1050677.17%0.092.01/0/00Personal AutoFour-Door Car546.524896
1068GS98873ArizonaFBachelor323912.47%16061.088.01/0/00Personal AutoFour-Door Car633.600000
1069CW49887CaliforniaFMaster462680.11%79487.0114.01/0/00Special AutoSUV547.200000
1070MY31220CaliforniaFCollege899704.02%54230.0112.01/0/00Personal AutoTwo-Door Car537.600000
\n", + "

264 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value Income \\\n", + "1 QZ44356 Arizona F Bachelor 697953.59% 0.0 \n", + "2 AI49188 Nevada F Bachelor 1288743.17% 48767.0 \n", + "17 OE15005 Cali NaN College 394524.16% 28855.0 \n", + "23 TZ98966 Nevada NaN Bachelor 245019.10% 0.0 \n", + "26 US89481 California NaN Bachelor 394637.21% 0.0 \n", + "... ... ... ... ... ... ... \n", + "1059 YG44474 Oregon M College 1401472.13% 54193.0 \n", + "1061 RY92647 Cali F Bachelor 1050677.17% 0.0 \n", + "1068 GS98873 Arizona F Bachelor 323912.47% 16061.0 \n", + "1069 CW49887 California F Master 462680.11% 79487.0 \n", + "1070 MY31220 California F College 899704.02% 54230.0 \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "1 94.0 1/0/00 Personal Auto \n", + "2 108.0 1/0/00 Personal Auto \n", + "17 101.0 1/0/00 Personal Auto \n", + "23 73.0 1/3/00 Corporate Auto \n", + "26 111.0 1/0/00 Personal Auto \n", + "... ... ... ... \n", + "1059 117.0 1/0/00 Corporate Auto \n", + "1061 92.0 1/0/00 Personal Auto \n", + "1068 88.0 1/0/00 Personal Auto \n", + "1069 114.0 1/0/00 Special Auto \n", + "1070 112.0 1/0/00 Personal Auto \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "17 SUV 647.442031 \n", + "23 Four-Door Car 554.376763 \n", + "26 Four-Door Car 799.200000 \n", + "... ... ... \n", + "1059 SUV 720.752945 \n", + "1061 Four-Door Car 546.524896 \n", + "1068 Four-Door Car 633.600000 \n", + "1069 SUV 547.200000 \n", + "1070 Two-Door Car 537.600000 \n", + "\n", + "[264 rows x 11 columns]" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_claim_customers = df[df['Total Claim Amount'] > percentile_75]\n", + "high_claim_customers\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "fe341300-5f3c-4687-bfc6-245003d33178", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IncomeMonthly Premium AutoTotal Claim Amount
count264.000000264.000000264.000000
mean23677.344697165.193182782.228263
std27013.483721623.930992292.751640
min0.00000063.000000537.600000
25%0.00000099.000000606.521741
50%18807.000000114.000000679.597985
75%42423.750000133.250000851.400000
max99316.00000010202.0000002893.239678
\n", + "
" + ], + "text/plain": [ + " Income Monthly Premium Auto Total Claim Amount\n", + "count 264.000000 264.000000 264.000000\n", + "mean 23677.344697 165.193182 782.228263\n", + "std 27013.483721 623.930992 292.751640\n", + "min 0.000000 63.000000 537.600000\n", + "25% 0.000000 99.000000 606.521741\n", + "50% 18807.000000 114.000000 679.597985\n", + "75% 42423.750000 133.250000 851.400000\n", + "max 99316.000000 10202.000000 2893.239678" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_claim_summary = high_claim_customers.describe()\n", + "high_claim_summary\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09d36799-d398-470b-b3ee-7dd99bc9fb3d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/lab-dw-pandas.ipynb b/lab-dw-pandas.ipynb index fbd468314..049ec7fa8 100644 --- a/lab-dw-pandas.ipynb +++ b/lab-dw-pandas.ipynb @@ -82,12 +82,3033 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 49, "id": "dd4e8cd8-a6f6-486c-a5c4-1745b0c035f4", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
....................................
4003NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4004NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4005NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4006NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4007NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

4008 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "4003 NaN NaN NaN NaN \n", + "4004 NaN NaN NaN NaN \n", + "4005 NaN NaN NaN NaN \n", + "4006 NaN NaN NaN NaN \n", + "4007 NaN NaN NaN NaN \n", + "\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "0 NaN 0.0 1000.0 \n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " Number of Open Complaints Policy Type Vehicle Class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " Total Claim Amount \n", + "0 2.704934 \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "... ... \n", + "4003 NaN \n", + "4004 NaN \n", + "4005 NaN \n", + "4006 NaN \n", + "4007 NaN \n", + "\n", + "[4008 rows x 11 columns]" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd \n", + "\n", + "URL = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\"\n", + "\n", + "df = pd.read_csv(URL) \n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "47349815-10da-4b5e-b816-d49a76ea7971", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Customer 2937\n", + "ST 2937\n", + "GENDER 3054\n", + "Education 2937\n", + "Customer Lifetime Value 2940\n", + "Income 2937\n", + "Monthly Premium Auto 2937\n", + "Number of Open Complaints 2937\n", + "Policy Type 2937\n", + "Vehicle Class 2937\n", + "Total Claim Amount 2937\n", + "dtype: int64" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Nan_values = df.isnull().sum()\n", + "Nan_values " + ] + }, + { + "cell_type": "markdown", + "id": "59ccf4f5-19c9-441a-91b4-12020fa64445", + "metadata": {}, + "source": [ + "- Identify the dimensions of the dataset by determining the number of rows and columns it contains.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "2b32f0bb-48d8-400b-8066-06c596c9d870", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The dataset contains 4008 rows and 11 columns.\n" + ] + } + ], + "source": [ + "rows, columns = df.shape\n", + "print(f\"The dataset contains {rows} rows and {columns} columns.\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "017c805f-ee46-4970-8309-b4b927603f6d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
5OC83172OregonFBachelor825629.78%62902.069.01/0/00Personal AutoTwo-Door Car159.383042
....................................
1066TM65736OregonMMaster305955.03%38644.078.01/1/00Personal AutoFour-Door Car361.455219
1067VJ51327CaliFHigh School or Below2031499.76%63209.0102.01/2/00Personal AutoSUV207.320041
1068GS98873ArizonaFBachelor323912.47%16061.088.01/0/00Personal AutoFour-Door Car633.600000
1069CW49887CaliforniaFMaster462680.11%79487.0114.01/0/00Special AutoSUV547.200000
1070MY31220CaliforniaFCollege899704.02%54230.0112.01/0/00Personal AutoTwo-Door Car537.600000
\n", + "

952 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education \\\n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "5 OC83172 Oregon F Bachelor \n", + "... ... ... ... ... \n", + "1066 TM65736 Oregon M Master \n", + "1067 VJ51327 Cali F High School or Below \n", + "1068 GS98873 Arizona F Bachelor \n", + "1069 CW49887 California F Master \n", + "1070 MY31220 California F College \n", + "\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "5 825629.78% 62902.0 69.0 \n", + "... ... ... ... \n", + "1066 305955.03% 38644.0 78.0 \n", + "1067 2031499.76% 63209.0 102.0 \n", + "1068 323912.47% 16061.0 88.0 \n", + "1069 462680.11% 79487.0 114.0 \n", + "1070 899704.02% 54230.0 112.0 \n", + "\n", + " Number of Open Complaints Policy Type Vehicle Class \\\n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "5 1/0/00 Personal Auto Two-Door Car \n", + "... ... ... ... \n", + "1066 1/1/00 Personal Auto Four-Door Car \n", + "1067 1/2/00 Personal Auto SUV \n", + "1068 1/0/00 Personal Auto Four-Door Car \n", + "1069 1/0/00 Special Auto SUV \n", + "1070 1/0/00 Personal Auto Two-Door Car \n", + "\n", + " Total Claim Amount \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "5 159.383042 \n", + "... ... \n", + "1066 361.455219 \n", + "1067 207.320041 \n", + "1068 633.600000 \n", + "1069 547.200000 \n", + "1070 537.600000 \n", + "\n", + "[952 rows x 11 columns]" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df= df.dropna()\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "a4fd9658-bc8e-47b7-8700-d2fff532e239", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Customer 0\n", + "ST 0\n", + "GENDER 0\n", + "Education 0\n", + "Customer Lifetime Value 0\n", + "Income 0\n", + "Monthly Premium Auto 0\n", + "Number of Open Complaints 0\n", + "Policy Type 0\n", + "Vehicle Class 0\n", + "Total Claim Amount 0\n", + "dtype: int64" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_null = df.isnull().sum()\n", + "df_null" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "4644ba16-2a7e-4de3-964f-606defd1c342", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(952, 11)\n" + ] + } + ], + "source": [ + "print(df.shape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "68f57ba7-9be6-40e7-9ee9-7793938ff673", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
5OC83172OregonFBachelor825629.78%62902.069.01/0/00Personal AutoTwo-Door Car159.383042
\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value \\\n", + "1 QZ44356 Arizona F Bachelor 697953.59% \n", + "2 AI49188 Nevada F Bachelor 1288743.17% \n", + "3 WW63253 California M Bachelor 764586.18% \n", + "4 GA49547 Washington M High School or Below 536307.65% \n", + "5 OC83172 Oregon F Bachelor 825629.78% \n", + "\n", + " Income Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "1 0.0 94.0 1/0/00 Personal Auto \n", + "2 48767.0 108.0 1/0/00 Personal Auto \n", + "3 0.0 106.0 1/0/00 Corporate Auto \n", + "4 36357.0 68.0 1/0/00 Personal Auto \n", + "5 62902.0 69.0 1/0/00 Personal Auto \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "3 SUV 529.881344 \n", + "4 Four-Door Car 17.269323 \n", + "5 Two-Door Car 159.383042 " + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "77ab57bc-9681-4631-beb7-102344802cf5", + "metadata": {}, + "source": [ + "- Determine the data types of each column and evaluate whether they are appropriate for the nature of the variable. You should also provide suggestions for fixing any incorrect data types.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "a2faff4a-9956-40f1-af46-bca592411911", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer object\n", + "ST object\n", + "GENDER object\n", + "Education object\n", + "Customer Lifetime Value object\n", + "Income float64\n", + "Monthly Premium Auto float64\n", + "Number of Open Complaints object\n", + "Policy Type object\n", + "Vehicle Class object\n", + "Total Claim Amount float64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "data_types = df.dtypes\n", + "print(data_types)\n", + "\n", + "#Customer Lifetime Value should be float \n", + "#Number of Open Complaints\tshould be int" + ] + }, + { + "cell_type": "markdown", + "id": "4c8c9eac-7fe9-4ccb-9b82-20a6c74620d7", + "metadata": {}, + "source": [ + "- Identify the number of unique values for each column and determine which columns appear to be categorical. You should also describe the unique values of each categorical column and the range of values for numerical columns, and give your insights.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "bb84b80c-7cf9-46d7-b53b-1ec7015233f6", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer:\n", + "['QZ44356' 'AI49188' 'WW63253' 'GA49547' 'OC83172' 'XZ87318' 'CF85061'\n", + " 'DY87989' 'BQ94931' 'SX51350' 'QK46697' 'HS14476' 'HD95276' 'YD87931'\n", + " 'NW21079' 'YB66933' 'OW15518' 'GP39118' 'SG20925' 'FM14335' 'HS28694'\n", + " 'LH92841' 'AZ95587' 'DS81757' 'OJ94107' 'LP84436' 'FF22360' 'LM19287'\n", + " 'ZU18643' 'AZ82578' 'XC67861' 'YC43143' 'EK59571' 'PA38372' 'RO18530'\n", + " 'PD27940' 'BS77946' 'YM50253' 'NR15332' 'RC62865' 'CC15295' 'KA61892'\n", + " 'OS94884' 'ND87334' 'OY51402' 'YL74911' 'GK92563' 'HL53154' 'RI78966'\n", + " 'IC13702' 'BE10809' 'HT87217' 'TH95618' 'TS19868' 'LP45550' 'QR87004'\n", + " 'OE75747' 'DX91392' 'AB72731' 'GX84338' 'IS12901' 'BN90616' 'HH90090'\n", + " 'IU25463' 'KC11055' 'PD33979' 'NK71023' 'AB13432' 'OZ97704' 'UF46533'\n", + " 'XP47431' 'GK73582' 'RV98763' 'II62831' 'XK33449' 'TR85083' 'EO95328'\n", + " 'EN21086' 'YL83902' 'AZ62651' 'ZW25874' 'EH41854' 'MW70227' 'SL22297'\n", + " 'RV14138' 'UO62808' 'ZX64745' 'FL34139' 'TS11219' 'XX12304' 'SD64087'\n", + " 'OY38576' 'BG76355' 'IP66913' 'LE95702' 'KX54357' 'EZ78112' 'XN16891'\n", + " 'XK31350' 'CC30924' 'IT78748' 'KY33386' 'CO44221' 'LK60013' 'DE21533'\n", + " 'YS94121' 'UK68427' 'TE49565' 'RA88421' 'KQ51983' 'CD88896' 'YV22553'\n", + " 'WU14435' 'XV84099' 'RI24911' 'KO26461' 'HI14283' 'PT50227' 'BH36570'\n", + " 'TX17484' 'CT41158' 'AO87348' 'DE55857' 'LF66923' 'CN24514' 'UW32074'\n", + " 'HP36979' 'PP40919' 'RO73268' 'HO61691' 'BS13062' 'FO35655' 'HR10526'\n", + " 'IA63417' 'BH35016' 'PK52952' 'OD76309' 'IL28481' 'GY55092' 'UF33451'\n", + " 'CF15558' 'JM62924' 'EM66435' 'QX45933' 'JI71369' 'JU93290' 'GU66096'\n", + " 'UC33108' 'LW93867' 'OU78470' 'XW90265' 'HS67749' 'VZ51506' 'UI64281'\n", + " 'AE98193' 'AZ74055' 'XS76911' 'AY40674' 'NA12740' 'UA84837' 'DJ51510'\n", + " 'VM58985' 'OH60605' 'UO98052' 'NC53424' 'LQ13873' 'LA97014' 'NB79936'\n", + " 'NT89061' 'AF10970' 'ZG48513' 'JQ59145' 'FE84989' 'JT52858' 'MC62068'\n", + " 'EU27538' 'RH42306' 'US23612' 'WV76014' 'RK96223' 'MF82000' 'FM46980'\n", + " 'SY56792' 'RF61565' 'IM94808' 'VI14730' 'YR34119' 'RR77985' 'QD28391'\n", + " 'WV17090' 'TM23514' 'MQ68407' 'GJ59592' 'FY56083' 'UA94723' 'FW91032'\n", + " 'DE34457' 'HD32044' 'HH30454' 'AH84063' 'QA17596' 'XI41052' 'DI30528'\n", + " 'SC66359' 'EN61670' 'DQ10761' 'BQ51587' 'JE21522' 'WS47147' 'ZA64638'\n", + " 'EW38459' 'QW87316' 'IC43478' 'TE34064' 'WU60905' 'YM18992' 'PD55753'\n", + " 'KU56006' 'MJ69973' 'TW43626' 'XX84133' 'ZW84453' 'HO29524' 'VE89726'\n", + " 'GE87503' 'PX90263' 'NI17718' 'FY32213' 'RZ13254' 'GN45013' 'NM39588'\n", + " 'KU84464' 'YH43527' 'RO30676' 'QL59704' 'QH19450' 'SA54664' 'CI38330'\n", + " 'WB38524' 'CE56187' 'JL19416' 'JZ61422' 'LA13377' 'NC99948' 'QD34785'\n", + " 'RO26085' 'ES57969' 'JK55587' 'RN97635' 'BI76326' 'JA34909' 'OJ90342'\n", + " 'CM88932' 'JJ97525' 'XV21647' 'MC83487' 'BL90769' 'CR57148' 'CP85232'\n", + " 'YL74732' 'FG16766' 'NV55438' 'RM10880' 'GL56175' 'UK52289' 'OT85112'\n", + " 'BC62782' 'TI19722' 'JP30654' 'UM45563' 'EN60878' 'JF36291' 'BK59444'\n", + " 'MK70700' 'IW71076' 'AP98768' 'OM24164' 'HR85211' 'VC87846' 'ZM92052'\n", + " 'ON73702' 'QQ90441' 'HU35721' 'YP47665' 'FU99476' 'AG85615' 'OY74069'\n", + " 'DJ91267' 'KB72438' 'TR67616' 'GF65731' 'HB67642' 'DP84567' 'VV77534'\n", + " 'GL67540' 'SV50502' 'UK59698' 'OA57352' 'ZF84449' 'AX86150' 'HG39060'\n", + " 'EM29359' 'SF57173' 'OT47603' 'SW31412' 'JS36322' 'RE81445' 'RM24280'\n", + " 'LC25393' 'UX38930' 'HD95496' 'RX24650' 'DW19309' 'MT41386' 'WZ40465'\n", + " 'DB42794' 'JB50798' 'IP69763' 'TE35785' 'HX74855' 'QN65180' 'GE47180'\n", + " 'VQ38776' 'BH86846' 'IN17648' 'DF95759' 'QG45324' 'MN61620' 'YH86390'\n", + " 'FY13480' 'YH61661' 'NL93182' 'WE68644' 'EZ30498' 'QY74517' 'NM88660'\n", + " 'MZ82036' 'ID20929' 'EY50028' 'TT82373' 'OH64088' 'SK97780' 'IO33050'\n", + " 'XA55917' 'JK32620' 'RQ19236' 'QC47433' 'RA93608' 'XH97711' 'AU96286'\n", + " 'KC17170' 'ZN47335' 'EI46264' 'EK87864' 'GV45403' 'QK31192' 'LU89008'\n", + " 'NS10490' 'KL98495' 'IU96845' 'QL93655' 'PF40592' 'LZ34046' 'JC80093'\n", + " 'YE88490' 'YC80498' 'AI85843' 'XD66024' 'FY51713' 'PH26378' 'WQ18638'\n", + " 'KY14688' 'TC97762' 'QC87108' 'CX12134' 'SM73248' 'CK19789' 'UV12583'\n", + " 'JC11405' 'KA89683' 'BG85305' 'UQ87917' 'XN11823' 'OS46571' 'PX17116'\n", + " 'RP19541' 'ZR25747' 'NQ86532' 'JY27336' 'PB54378' 'SV38190' 'CV24005'\n", + " 'EX28656' 'CF57022' 'GM16780' 'BX94438' 'RM41745' 'XR70252' 'YH92099'\n", + " 'SG81493' 'ZX23819' 'FJ54907' 'CU26127' 'YH60476' 'ZZ97035' 'GE82737'\n", + " 'KY21873' 'UA51318' 'BV55014' 'HX21307' 'LQ68252' 'CR92802' 'SL35268'\n", + " 'RD62882' 'JS42382' 'BT30554' 'VP57424' 'VU19243' 'TA82973' 'GK71720'\n", + " 'OQ61223' 'LL62746' 'JQ56711' 'AW77988' 'QP84605' 'MY97912' 'IB87349'\n", + " 'AW73065' 'BW80872' 'PX70175' 'KF75098' 'IS50283' 'MY64920' 'KN34250'\n", + " 'GN46207' 'KL57176' 'MN94234' 'JY90595' 'HK26543' 'PN86062' 'VW27730'\n", + " 'SH55671' 'MO56878' 'VO38365' 'SV35618' 'RX12347' 'FR55658' 'XS12556'\n", + " 'ZU73588' 'WT43034' 'VM13430' 'TC78849' 'VC34764' 'WO90953' 'IU47468'\n", + " 'KO46064' 'RB34917' 'BI38192' 'PU18983' 'SW79912' 'ES39217' 'KP72427'\n", + " 'UA19178' 'PR53785' 'XF57481' 'CN90378' 'KI56154' 'UI55951' 'FF28650'\n", + " 'FS55302' 'TN79487' 'HG32616' 'UK41984' 'LZ52266' 'PM27367' 'ZK21724'\n", + " 'BH35482' 'QE22757' 'ON77649' 'RN82884' 'CQ75652' 'FF58467' 'BS83666'\n", + " 'WO29605' 'TL77607' 'EZ50606' 'OS39723' 'FN69743' 'XW96958' 'TU92578'\n", + " 'TL43709' 'YE68736' 'OB96537' 'EU68825' 'CC31456' 'DJ77787' 'LN26837'\n", + " 'YI92916' 'NW54906' 'ME77513' 'UK76891' 'SI26888' 'YD74948' 'HB64268'\n", + " 'BW52697' 'NL41409' 'OD69005' 'ZZ91716' 'UK70255' 'QT25383' 'AW18068'\n", + " 'NS45347' 'FV19421' 'XW89091' 'YC11951' 'UY18770' 'RA49085' 'BG84194'\n", + " 'PT64580' 'MR67738' 'DM95829' 'DB75522' 'LM34525' 'WW30771' 'QP65569'\n", + " 'TN50051' 'UO86707' 'JA41698' 'NX18774' 'DA69469' 'CN23147' 'RA68844'\n", + " 'GH42026' 'BD16530' 'JH91579' 'WK23685' 'GR62267' 'PI78084' 'GF97874'\n", + " 'ZH19885' 'UK25655' 'QR45101' 'EL93539' 'EE99484' 'DP46882' 'WP41146'\n", + " 'TK60799' 'DN29808' 'SS59521' 'NG66579' 'TC14209' 'ED50963' 'GP40701'\n", + " 'CP98451' 'NX52648' 'ZC32510' 'NG27780' 'HN95240' 'EB59129' 'RA70851'\n", + " 'PM19162' 'MS59005' 'SU71163' 'BD35676' 'NI44621' 'EW33419' 'HX44948'\n", + " 'DL36983' 'XR87264' 'NN99001' 'XV95530' 'OL97871' 'HQ23708' 'WR63188'\n", + " 'NG82219' 'KU29408' 'RE46783' 'RU94434' 'GI82355' 'VO26340' 'NV61299'\n", + " 'DX31066' 'CY50337' 'TJ20375' 'EP72155' 'JJ76159' 'BG15419' 'AO74776'\n", + " 'HQ82233' 'OL72737' 'ZQ59828' 'NZ15548' 'XK61304' 'EJ44139' 'CM94425'\n", + " 'OV54878' 'JF57282' 'MY37953' 'XP64922' 'WL65572' 'LN50325' 'HJ15383'\n", + " 'KH59823' 'YM79169' 'DR38127' 'PU42145' 'KM33477' 'RI53167' 'OF77789'\n", + " 'YB33445' 'BA17836' 'JS43228' 'BB11622' 'HQ70429' 'WK88044' 'LA80525'\n", + " 'EH16250' 'PU41872' 'HB85743' 'MM71959' 'MB83663' 'KR43119' 'KH24214'\n", + " 'AC40767' 'HP55391' 'EG62398' 'VS19949' 'AM92343' 'GI68556' 'JT11876'\n", + " 'XR64251' 'MK34957' 'GP18756' 'AP23850' 'KQ65521' 'EJ19449' 'QB70027'\n", + " 'QW47320' 'KH64733' 'ON59472' 'HP94242' 'RV15398' 'EA25683' 'PW73754'\n", + " 'MC71942' 'OX72195' 'YQ99152' 'KI19439' 'PM76175' 'US45383' 'GT38956'\n", + " 'SN41301' 'BE62503' 'PA16884' 'NC58480' 'NS39326' 'PN18507' 'EK91340'\n", + " 'JY16280' 'ZW71731' 'ZC24631' 'YR34689' 'RT65829' 'BZ12077' 'WM65373'\n", + " 'NH35059' 'QD38160' 'BM15160' 'VY79030' 'EV19512' 'TE13577' 'WY97929'\n", + " 'YG20683' 'FK75497' 'NE60110' 'TN36521' 'HG33568' 'TW17878' 'ZO83562'\n", + " 'CH97539' 'CV29889' 'MO33320' 'QZ81258' 'NY56352' 'EA27048' 'UT38865'\n", + " 'QC89139' 'LA14484' 'HN57556' 'CV31235' 'WR45726' 'LB25094' 'KW56110'\n", + " 'XO36233' 'ZX86243' 'DW29763' 'CT83377' 'OQ90898' 'GO77248' 'QW33258'\n", + " 'OU79745' 'VZ79886' 'FI92440' 'YG85980' 'QM74621' 'EI71732' 'VN79010'\n", + " 'FI61723' 'OH55411' 'TF10720' 'NW30838' 'CB58476' 'WI69346' 'FS76657'\n", + " 'YX89016' 'PK28821' 'MB51200' 'XG44587' 'FG91922' 'OM99303' 'RV67546'\n", + " 'UJ79253' 'PN98247' 'IB67546' 'OE19087' 'CM95716' 'MW62634' 'QW67581'\n", + " 'SN16059' 'OE51254' 'RM42344' 'GB35238' 'ML82674' 'EI85244' 'DE28132'\n", + " 'TV25678' 'TY26512' 'OB69153' 'QZ77637' 'XN41715' 'QR15857' 'FL69363'\n", + " 'IS30295' 'WA25797' 'NL59519' 'ZU93025' 'DK94262' 'UQ30615' 'OR40060'\n", + " 'DK32872' 'FA46418' 'ER19995' 'KI75855' 'ND41876' 'PN21042' 'GJ43254'\n", + " 'AL46984' 'JP58047' 'ZE85014' 'KU88219' 'UU98729' 'WS82822' 'YB49933'\n", + " 'XC16387' 'XJ96748' 'TM98684' 'AY18433' 'DM74502' 'FT56968' 'OX36896'\n", + " 'BZ65376' 'LN34660' 'JC29295' 'KJ87930' 'XT36360' 'IX35050' 'UN97379'\n", + " 'MR57294' 'UG79499' 'UA50747' 'GL20444' 'SP58110' 'XM91635' 'TV82603'\n", + " 'BB82067' 'JP94676' 'VU53417' 'IW54795' 'RN78170' 'IX55883' 'XM72420'\n", + " 'GC15104' 'RX13282' 'QA85890' 'IR62668' 'AL96740' 'SS48498' 'PE39479'\n", + " 'JH62891' 'FI20423' 'PM13394' 'YV67971' 'QD31377' 'YG10247' 'FE73696'\n", + " 'SW19699' 'QJ40732' 'HM76207' 'NT59303' 'PU41393' 'QO86948' 'QN10888'\n", + " 'VY19543' 'XC15133' 'ST43550' 'FX36546' 'JX68983' 'HX78576' 'ZQ11381'\n", + " 'ON39271' 'SB18278' 'ZT30559' 'XI41106' 'ZS88847' 'RU49126' 'KR62797'\n", + " 'ZJ73220' 'FY62633' 'CU36986' 'WZ53904' 'AA71604' 'TD10493' 'LY97989'\n", + " 'VX39856' 'TP51897' 'QQ89253' 'EI91403' 'QG15435' 'FZ55002' 'HX77930'\n", + " 'UN37063' 'VB87946' 'AB60627' 'TA34903' 'AQ51368' 'NZ26102' 'GB45753'\n", + " 'BV79904' 'OB49075' 'DS97676' 'JO63462' 'NJ10602' 'RS24501' 'VT78274'\n", + " 'SU56153' 'MN20737' 'KL43114' 'YQ15567' 'TR88637' 'TC88986' 'XX88577'\n", + " 'NE49052' 'KX17826' 'CC91503' 'WH32183' 'ES90681' 'DW96592' 'MT23134'\n", + " 'BM69081' 'MB90871' 'QL77686' 'ON77827' 'KP18988' 'TI92884' 'JH73503'\n", + " 'YE97964' 'VA30351' 'PV55726' 'UC88305' 'TS53809' 'ZV32120' 'FB80807'\n", + " 'AS55677' 'WA15684' 'SA50567' 'KJ31611' 'VL37375' 'KN21017' 'PX44289'\n", + " 'AM97901' 'RE42925' 'TR81766' 'CH85057' 'UP71482' 'EG40670' 'HV83672'\n", + " 'MG10140' 'TC44716' 'QO65264' 'EB66698' 'OT52034' 'CH85444' 'PU85769'\n", + " 'UI73201' 'SL50592' 'XP11075' 'SI31236' 'JN26745' 'VK48036' 'JX76668'\n", + " 'DS45802' 'OA96690' 'EM27919' 'QO41043' 'OV50124' 'PR31642' 'BU41599'\n", + " 'TK30357' 'NF31087' 'NH16984' 'OS75493' 'VT63298' 'QS75550' 'SZ16483'\n", + " 'VM92311' 'NJ46849' 'WZ31900' 'RG30482' 'ZM86949' 'QQ39596' 'FH51383'\n", + " 'BJ53923' 'CZ96653' 'FB23788' 'NT43594' 'RJ85627' 'KJ86296' 'PI47776'\n", + " 'MD73554' 'UX92071' 'YG44474' 'UH45301' 'RY92647' 'IK12620' 'GQ66762'\n", + " 'YT69858' 'XD85577' 'TM65736' 'VJ51327' 'GS98873' 'CW49887' 'MY31220']\n", + "\n", + "\n", + "ST:\n", + "['Arizona' 'Nevada' 'California' 'Washington' 'Oregon' 'AZ' 'WA' 'Cali']\n", + "\n", + "\n", + "GENDER:\n", + "['F' 'M' 'Femal' 'Male' 'female']\n", + "\n", + "\n", + "Education:\n", + "['Bachelor' 'High School or Below' 'College' 'Master' 'Bachelors' 'Doctor']\n", + "\n", + "\n", + "Customer Lifetime Value:\n", + "['697953.59%' '1288743.17%' '764586.18%' '536307.65%' '825629.78%'\n", + " '538089.86%' '721610.03%' '2412750.40%' '738817.81%' '473899.20%'\n", + " '617710.93%' '916206.32%' '473787.17%' '495165.61%' '487938.48%'\n", + " '538275.20%' '1595001.95%' '499655.27%' '518579.76%' '1048491.54%'\n", + " '282986.39%' '725595.38%' '1038855.32%' '247012.12%' '561906.85%'\n", + " '904711.92%' '268731.41%' '373150.46%' '366077.03%' '792882.93%'\n", + " '501175.16%' '798825.83%' '388545.64%' '935773.78%' '254068.98%'\n", + " '488516.25%' '975330.71%' '294615.37%' '258111.09%' '351738.58%'\n", + " '974335.01%' '387364.70%' '764928.20%' '228759.69%' '825576.39%'\n", + " '871492.21%' '681923.12%' '741619.73%' '777115.90%' '696834.19%'\n", + " '425028.26%' '1977656.65%' '2134346.60%' '241313.97%' '1536384.72%'\n", + " '845696.19%' '218964.25%' '578018.22%' '463998.16%' '382443.13%'\n", + " '596811.89%' '859033.50%' '407663.47%' '1225260.18%' '1693627.15%'\n", + " '489243.55%' '994230.48%' '373583.81%' '1311752.22%' '457452.41%'\n", + " '547006.06%' '297884.60%' '641096.75%' '447902.31%' '238373.19%'\n", + " '276449.37%' '792010.54%' '688909.80%' '327419.46%' '995170.77%'\n", + " '252155.57%' '2370611.34%' '604702.52%' '1114030.25%' '433406.41%'\n", + " '279974.79%' '792313.66%' '368811.09%' '1206745.60%' '292497.67%'\n", + " '1501409.27%' '927723.38%' '627412.39%' '388664.74%' '438627.76%'\n", + " '1136526.77%' '561096.43%' '291289.20%' '691572.99%' '626266.33%'\n", + " '650339.70%' '800739.94%' '292991.65%' '596955.30%' '547315.99%'\n", + " '564539.67%' '636926.24%' '1183376.73%' '612110.79%' '515936.97%'\n", + " '251459.20%' '866861.13%' '496096.54%' '550413.90%' '750745.54%'\n", + " '3226985.14%' '565703.16%' '506175.79%' '591278.38%' '1518227.98%'\n", + " '1074703.09%' '205062.35%' '246544.49%' '534312.13%' '811982.91%'\n", + " '460526.52%' '640878.56%' '237653.35%' '321107.00%' '509452.23%'\n", + " '2575527.82%' '867222.97%' '804473.07%' '400151.91%' '1670611.70%'\n", + " '854441.11%' '780531.29%' '611275.69%' '477294.38%' '1097909.56%'\n", + " '500426.38%' '1322304.38%' '262331.54%' '1784019.56%' '510611.18%'\n", + " '1793060.45%' '545734.26%' '656364.41%' '481252.52%' '2932804.19%'\n", + " '577352.07%' '684711.89%' '359531.29%' '2285561.21%' '785941.46%'\n", + " '411557.74%' '502963.88%' '482141.85%' '500431.05%' '863005.39%'\n", + " '932208.51%' '1672756.06%' '365253.24%' '615860.12%' '437608.40%'\n", + " '556945.62%' '257651.30%' '834698.32%' '632392.39%' '597314.34%'\n", + " '470667.70%' '809341.03%' '503574.46%' '902786.72%' '728888.48%'\n", + " '1804247.94%' '499206.30%' '910226.78%' '254040.77%' '1294189.19%'\n", + " '236534.86%' '572076.51%' '703553.41%' '263697.77%' '539583.20%'\n", + " '838263.01%' '3116174.52%' '288774.23%' '742584.53%' '2558572.78%'\n", + " '1027260.82%' '437636.36%' '531889.66%' '471976.22%' '442803.16%'\n", + " '587917.61%' '941690.85%' '828815.56%' '3265483.83%' '471945.01%'\n", + " '390347.48%' '545725.97%' '272535.64%' '443397.37%' '533246.27%'\n", + " '231509.50%' '541195.37%' '958733.23%' '2210350.72%' '976494.53%'\n", + " '256715.15%' '265062.28%' '1126436.33%' '216852.35%' '861066.75%'\n", + " '283464.62%' '893013.97%' '553638.70%' '284085.43%' '808288.10%'\n", + " '525473.43%' '511623.76%' '303464.70%' '802489.99%' '1821114.32%'\n", + " '512156.33%' '215017.86%' '559538.99%' '756282.40%' '538585.32%'\n", + " '267805.83%' '942256.79%' '360586.03%' '776259.06%' '2344490.05%'\n", + " '255817.82%' '265438.10%' '254978.61%' '296959.33%' '436312.46%'\n", + " '588430.86%' '527219.16%' '550989.57%' '1631368.35%' '567805.02%'\n", + " '1210120.88%' '245357.08%' '507566.27%' '321497.94%' '1227534.31%'\n", + " '272221.07%' '245744.09%' '355484.53%' '492954.97%' '803645.03%'\n", + " '427691.53%' '3347334.95%' '596058.14%' '4479546.94%' '383211.81%'\n", + " '683793.26%' '528526.82%' '309651.12%' '358971.07%' '258240.85%'\n", + " '340391.94%' '1357567.60%' '343613.43%' '2868582.79%' '450267.97%'\n", + " '618311.15%' '1892933.06%' '555329.58%' '501125.92%' '1044244.63%'\n", + " '219961.78%' '512317.09%' '748431.05%' '261302.31%' '908063.97%'\n", + " '1377097.62%' '287682.29%' '540891.15%' '677030.68%' '2414387.56%'\n", + " '353805.95%' '2909123.94%' '1983420.12%' '473136.70%' '3553784.60%'\n", + " '3461137.90%' '2021630.88%' '1397651.93%' '590408.82%' '559583.50%'\n", + " '229430.36%' '627391.19%' '372672.80%' '265671.31%' '511068.08%'\n", + " '712659.65%' '460163.41%' '915523.97%' '1480805.62%' '890167.84%'\n", + " '573459.82%' '417769.70%' '2777628.91%' '1036434.75%' '785190.14%'\n", + " '1422650.49%' '287543.24%' '504129.96%' '436293.12%' '962452.44%'\n", + " '2191440.55%' '694842.22%' '247152.84%' '2190391.36%' '902882.14%'\n", + " '530375.95%' '2070825.88%' '512376.81%' '949234.30%' '820486.32%'\n", + " '987729.57%' '481500.97%' '627701.17%' '826063.98%' '254945.00%'\n", + " '380392.18%' '863540.35%' '551055.90%' '358588.41%' '488925.28%'\n", + " '275694.17%' '328954.74%' '1093717.85%' '737556.79%' '1011077.82%'\n", + " '511941.43%' '853383.20%' '222476.80%' '804280.38%' '255443.71%'\n", + " '1807394.00%' '243050.66%' '316765.84%' '546560.40%' '1035751.42%'\n", + " '253781.39%' '282194.72%' '775712.81%' '407913.27%' '595554.46%'\n", + " '1415861.36%' '848723.80%' '628547.69%' '1147348.15%' '494263.06%'\n", + " '2359468.02%' '257250.66%' '477055.09%' '675665.14%' '593601.18%'\n", + " '357076.05%' '601996.05%' '542686.40%' '273031.38%' '498268.14%'\n", + " '876926.68%' '422061.35%' '1153750.51%' '588718.20%' '470058.38%'\n", + " '1096395.72%' '252317.12%' '375780.47%' '1294173.35%' '376446.51%'\n", + " '688955.70%' '362345.42%' '758211.38%' '827774.56%' '257645.56%'\n", + " '820538.79%' '1958246.89%' '648152.66%' '259931.09%' '983033.76%'\n", + " '1044265.14%' '3605753.70%' '847003.68%' '827878.65%' '478893.26%'\n", + " '308799.99%' '444373.62%' '798408.65%' '718097.10%' '1565603.43%'\n", + " '2071494.04%' '533735.24%' '505082.62%' '511662.40%' '726873.70%'\n", + " '261661.39%' '373843.62%' '545386.12%' '684615.03%' '617291.42%'\n", + " '1034632.45%' '699700.86%' '419625.77%' '785810.98%' '606434.40%'\n", + " '1749752.20%' '897064.73%' '592311.72%' '3585059.94%' '870984.53%'\n", + " '1330933.52%' '452850.49%' '279190.65%' '443441.12%' '799600.75%'\n", + " '512973.90%' '569717.52%' '921713.06%' '1020892.76%' '417068.73%'\n", + " '450540.58%' '310756.86%' '552866.50%' '504586.67%' '296272.25%'\n", + " '866595.64%' '1141344.12%' '1548843.20%' '886114.95%' '593474.15%'\n", + " '354323.21%' '349002.83%' '368309.99%' '598977.39%' '1250084.30%'\n", + " '860915.82%' '249745.51%' '701917.72%' '538792.63%' '616555.75%'\n", + " '273020.29%' '516211.69%' '793706.48%' '860815.72%' '263254.58%'\n", + " '778500.42%' '2163983.86%' '498082.50%' '745723.78%' '1064093.93%'\n", + " '686250.83%' '904898.34%' '554803.19%' '831268.16%' '238998.10%'\n", + " '445811.34%' '529574.17%' '383960.61%' '277890.37%' '401654.20%'\n", + " '493094.93%' '249131.70%' '290887.59%' '428294.80%' '834162.37%'\n", + " '509078.13%' '736618.83%' '243687.51%' '885268.87%' '2387547.68%'\n", + " '560049.65%' '463654.65%' '757334.51%' '1469663.55%' '897214.03%'\n", + " '772484.01%' '594667.07%' '800054.51%' '645756.10%' '728144.01%'\n", + " '259243.78%' '467842.34%' '1386992.71%' '871777.78%' '523398.68%'\n", + " '476418.97%' '247246.92%' '281369.26%' '550505.70%' '260620.85%'\n", + " '1048194.38%' '325676.64%' '3047578.05%' '636490.22%' '946850.93%'\n", + " '563145.19%' '778099.93%' '981652.83%' '751913.36%' '1017971.70%'\n", + " '277283.92%' '403750.18%' '419196.61%' '267686.79%' '252395.96%'\n", + " '698840.16%' '874205.78%' '267331.96%' '1215732.99%' '295776.40%'\n", + " '717390.94%' '309953.80%' '841568.46%' '2684312.45%' '1305717.07%'\n", + " '959995.02%' '853510.89%' '829348.19%' '663685.98%' '560908.25%'\n", + " '507732.09%' '527562.70%' '343525.01%' '662461.18%' '575744.23%'\n", + " '288645.16%' '534143.88%' '416001.81%' '284624.54%' '477025.66%'\n", + " '505961.62%' '909574.46%' '268886.40%' '827763.76%' '905793.53%'\n", + " '380175.04%' '933934.16%' '252012.32%' '498409.53%' '259574.80%'\n", + " '743769.33%' '1453678.76%' '591330.59%' '277166.30%' '2919436.64%'\n", + " '988038.58%' '1511440.24%' '575991.08%' '849516.42%' '438118.42%'\n", + " '699782.74%' '1143058.85%' '748248.61%' '859691.66%' '785496.08%'\n", + " '907576.82%' '411858.86%' '515281.96%' '651297.65%' '2778969.24%'\n", + " '266727.00%' '1092840.71%' '761538.13%' '200435.07%' '243468.12%'\n", + " '1419536.03%' '942768.49%' '1198242.09%' '310278.95%' '422263.12%'\n", + " '402381.44%' '529715.18%' '2142363.72%' '441620.62%' '463903.52%'\n", + " '486354.46%' '2583090.98%' '297431.49%' '206445.88%' '1006460.83%'\n", + " '803240.19%' '548921.41%' '261275.67%' '857346.39%' '855038.66%'\n", + " '230864.80%' '425462.07%' '898285.04%' '786816.60%' '770424.87%'\n", + " '1055217.00%' '1604510.95%' '873783.75%' '545489.07%' '770528.33%'\n", + " '703926.24%' '883808.56%' '873352.73%' '959747.48%' '450666.02%'\n", + " '1785797.23%' '249780.82%' '542613.62%' '799814.38%' '289762.07%'\n", + " '1159950.22%' '1514793.06%' '543576.78%' '1329771.23%' '992704.97%'\n", + " '1777154.90%' '1826927.02%' '708321.24%' '588950.91%' '1577139.34%'\n", + " '528817.33%' '2758055.40%' '777853.23%' '734186.13%' '791919.70%'\n", + " '216387.02%' '978780.88%' '520764.08%' '2114727.72%' '1228076.66%'\n", + " '244139.42%' '653556.06%' '920659.83%' '483820.90%' '462554.81%'\n", + " '1404210.30%' '754661.35%' '517035.84%' '1391737.72%' '1131813.08%'\n", + " '427636.36%' '432224.03%' '800230.83%' '447177.82%' '849635.28%'\n", + " '2250088.35%' '1630196.76%' '231973.59%' '871704.98%' '286011.17%'\n", + " '245340.83%' '2498022.55%' '748263.95%' '245757.60%' '237974.12%'\n", + " '828696.44%' '239391.54%' '465715.95%' '1319792.89%' '280391.67%'\n", + " '864650.41%' '742587.06%' '452873.74%' '222707.28%' '729294.88%'\n", + " '318435.52%' '1131520.37%' '253070.51%' '1836155.53%' '864153.00%'\n", + " '327853.19%' '527198.21%' '376363.77%' '1395556.96%' '500152.75%'\n", + " '708283.04%' '761948.28%' '1255088.20%' '3219660.04%' '679377.41%'\n", + " '527231.97%' '626534.33%' '854758.61%' '278742.37%' '462680.11%'\n", + " '866336.40%' '387222.22%' '517081.15%' '896028.02%' '547183.43%'\n", + " '3493100.17%' '262039.23%' '1906949.95%' '4022401.36%' '270148.83%'\n", + " '371243.05%' '2185084.00%' '784016.58%' '823703.79%' '224347.39%'\n", + " '843446.41%' '517002.60%' '264144.61%' '279068.30%' '274512.98%'\n", + " '757953.27%' '1097878.03%' '825506.01%' '474773.46%' '354090.43%'\n", + " '258218.53%' '1166509.78%' '739628.37%' '433079.98%' '1463545.16%'\n", + " '856476.82%' '1156568.75%' '277104.50%' '850712.88%' '332309.25%'\n", + " '523433.17%' '2470959.96%' '1778627.78%' '255122.67%' '724771.37%'\n", + " '453884.78%' '3164210.46%' '873042.20%' '833273.06%' '235774.70%'\n", + " '463716.40%' '535719.27%' '539197.10%' '369414.05%' '504041.24%'\n", + " '2749542.19%' '484228.50%' '2738281.89%' '522710.19%' '259009.60%'\n", + " '1053607.80%' '858127.87%' '946311.33%' '1344100.64%' '388650.48%'\n", + " '678489.37%' '253862.63%' '563994.20%' '1168137.43%' '2599775.00%'\n", + " '1377836.93%' '492318.17%' '227233.54%' '1489539.80%' '975604.50%'\n", + " '942297.41%' '383735.76%' '1095213.19%' '815913.66%' '1948049.98%'\n", + " '391936.67%' '798514.21%' '1216874.49%' '584932.15%' '508583.66%'\n", + " '290393.98%' '627317.34%' '1832141.90%' '517870.42%' '1402435.84%'\n", + " '530943.59%' '494980.38%' '859566.53%' '2295189.20%' '379213.03%'\n", + " '275574.80%' '488033.96%' '905190.53%' '1011544.62%' '826907.54%'\n", + " '807165.30%' '772699.36%' '831113.59%' '257402.04%' '572732.71%'\n", + " '367914.21%' '274451.96%' '563674.03%' '1687038.82%' '273800.20%'\n", + " '464470.05%' '474668.65%' '1687432.82%' '238760.61%' '446533.57%'\n", + " '459162.59%' '1309258.58%' '911226.66%' '416516.66%' '265998.06%'\n", + " '1198659.21%' '289873.27%' '289424.39%' '350045.44%' '262180.86%'\n", + " '1022180.50%' '257827.10%' '624259.57%' '522028.10%' '313643.21%'\n", + " '452536.58%' '497035.73%' '1546778.90%' '583889.92%' '1065688.20%'\n", + " '228961.87%' '543980.42%' '795615.01%' '255505.15%' '330799.90%'\n", + " '845905.32%' '1335012.09%' '493122.13%' '777683.52%' '255367.22%'\n", + " '487646.97%' '903430.58%' '810591.08%' '561968.91%' '1572713.06%'\n", + " '661801.64%' '467004.80%' '1016936.98%' '832307.40%' '241776.00%'\n", + " '804487.24%' '532572.45%' '694752.40%' '584741.52%' '472478.61%'\n", + " '279022.80%' '2153133.28%' '1262283.27%' '2017196.15%' '1646436.59%'\n", + " '266544.71%' '709891.41%' '397134.51%' '552821.28%' '833899.58%'\n", + " '3844585.59%' '544855.52%' '1080806.60%' '618509.65%' '320822.59%'\n", + " '548010.41%' '2298615.39%' '1310792.59%' '746292.63%' '1146399.10%'\n", + " '723613.25%' '623268.79%' '2839332.99%' '374675.16%' '2156933.73%'\n", + " '501208.37%' '337185.84%' '386477.68%' '414571.19%' '515607.27%'\n", + " '366737.50%' '783568.35%' '1456726.84%' '1017133.90%' '413577.52%'\n", + " '551149.11%' '1131424.39%' '541461.73%' '742159.35%' '1447612.49%'\n", + " '493688.84%' '452527.65%' '558176.13%' '1413434.74%' '2472318.31%'\n", + " '283806.78%' '384848.36%' '1950447.39%' '248004.59%' '436137.29%'\n", + " '252907.75%' '250444.48%' '864970.06%' '1366835.53%' '2063508.46%'\n", + " '251753.36%' '532667.77%' '260027.21%' '853479.28%' '882883.50%'\n", + " '224844.96%' '1230276.24%' '455659.30%' '525198.40%' '674311.93%'\n", + " '1401472.13%' '943891.56%' '1050677.17%' '421391.86%' '477368.64%'\n", + " '544142.01%' '284226.69%' '305955.03%' '2031499.76%' '323912.47%'\n", + " '899704.02%']\n", + "\n", + "\n", + "Income:\n", + "[ 0. 48767. 36357. 62902. 55350. 14072. 28812. 61040. 29723. 46896.\n", + " 67163. 77552. 71600. 99428. 61108. 25317. 88891. 61222. 50335. 82210.\n", + " 64495. 28859. 77330. 33060. 42557. 26372. 17514. 89270. 29757. 51814.\n", + " 24028. 28142. 52705. 54040. 22492. 21876. 70699. 27501. 15897. 25141.\n", + " 51066. 28358. 62530. 90972. 63110. 29549. 39411. 21709. 67890. 84311.\n", + " 99316. 54507. 64586. 61709. 94656. 61085. 89284. 31686. 56855. 53703.\n", + " 20396. 27679. 23904. 65351. 64459. 32961. 71416. 68964. 78108. 10621.\n", + " 84910. 77493. 81097. 96610. 30110. 22081. 98473. 97431. 93870. 50366.\n", + " 34498. 16552. 26787. 43860. 21474. 18174. 60920. 41520. 72208. 53863.\n", + " 66446. 64997. 64460. 46618. 49988. 16269. 72006. 44320. 19782. 63933.\n", + " 28224. 21073. 63243. 20993. 94827. 39161. 37534. 80210. 21708. 94731.\n", + " 32375. 16531. 32006. 81676. 71038. 20832. 52405. 26583. 25486. 24065.\n", + " 70435. 39679. 53565. 37574. 48259. 78532. 96163. 72672. 99002. 79494.\n", + " 35704. 26049. 70125. 52369. 41770. 31911. 70051. 19683. 30916. 84302.\n", + " 60145. 63774. 25134. 37057. 58577. 85857. 70602. 33816. 89642. 60068.\n", + " 50044. 36650. 50653. 68931. 39266. 40864. 39035. 34923. 24506. 52220.\n", + " 53554. 34476. 68205. 53690. 74454. 29664. 72450. 47272. 21585. 23827.\n", + " 69906. 73196. 72217. 46131. 54514. 96668. 78879. 29735. 23082. 53984.\n", + " 52135. 17576. 29486. 58557. 25632. 18768. 33190. 47945. 58778. 15192.\n", + " 51859. 23422. 21604. 79298. 76731. 38460. 48534. 30817. 48412. 68798.\n", + " 23712. 72196. 68197. 75248. 89879. 46998. 57099. 33897. 59207. 40171.\n", + " 80192. 74422. 34226. 65989. 30686. 20090. 77094. 97413. 79189. 18577.\n", + " 62777. 17483. 84394. 41440. 98132. 88220. 58327. 25950. 65726. 84768.\n", + " 36234. 58842. 25629. 77311. 95697. 22254. 65974. 92079. 67752. 25398.\n", + " 33321. 20325. 13129. 49080. 42536. 29926. 21450. 33345. 15752. 40169.\n", + " 55411. 12459. 64620. 83140. 54422. 68309. 56621. 38977. 83235. 32390.\n", + " 66538. 23285. 76358. 21104. 12964. 33288. 56168. 23105. 36218. 52275.\n", + " 49665. 32471. 62773. 76694. 55687. 36633. 77060. 92600. 64801. 45257.\n", + " 26854. 85840. 26463. 30689. 29590. 25965. 17269. 90330. 18558. 46384.\n", + " 42303. 71731. 26173. 24445. 72302. 27208. 30855. 99960. 55873. 18052.\n", + " 28937. 12829. 92163. 17588. 41546. 70340. 34549. 93459. 86148. 27048.\n", + " 73259. 35482. 29462. 67801. 16042. 28056. 16495. 41163. 33799. 90125.\n", + " 87747. 35695. 90985. 66839. 79090. 24825. 26806. 56835. 46135. 22862.\n", + " 95854. 44897. 64455. 53265. 50450. 54780. 67798. 21442. 27615. 17622.\n", + " 50200. 73570. 70412. 36631. 35895. 93018. 70014. 48875. 67969. 68665.\n", + " 26802. 45345. 89689. 32051. 81139. 63834. 37548. 72421. 83102. 28432.\n", + " 83707. 63259. 63860. 43836. 86132. 28519. 39102. 97298. 41986. 17291.\n", + " 37256. 96306. 14290. 37038. 90760. 77048. 54480. 16244. 22436. 71592.\n", + " 28728. 57449. 83318. 75217. 55308. 36068. 48804. 55790. 70258. 47274.\n", + " 44705. 70446. 64348. 88997. 10312. 96263. 28919. 41869. 32808. 79780.\n", + " 91025. 33043. 69442. 47234. 86863. 25805. 43676. 59855. 35296. 36576.\n", + " 28513. 85448. 23791. 20597. 56940. 93210. 48992. 53736. 25378. 37722.\n", + " 69379. 33806. 94041. 74965. 34095. 96045. 86355. 27824. 42995. 21235.\n", + " 74585. 41833. 23908. 61953. 73760. 23333. 20440. 27658. 50943. 19003.\n", + " 46703. 21733. 20811. 11904. 43490. 57340. 49088. 47761. 61281. 25290.\n", + " 24239. 82664. 83210. 38736. 55437. 68041. 29066. 54337. 67616. 41082.\n", + " 50631. 19592. 55761. 41449. 62007. 21921. 42621. 63786. 82877. 10475.\n", + " 21952. 49721. 88340. 24589. 73769. 66670. 88854. 31266. 67267. 79270.\n", + " 36692. 44624. 19614. 83846. 51159. 83772. 88440. 25666. 52926. 40001.\n", + " 18024. 23220. 64125. 58042. 90034. 27972. 50989. 11885. 89451. 78904.\n", + " 70247. 32653. 93595. 90279. 53310. 22234. 91375. 22250. 51179. 38667.\n", + " 79487. 67763. 71943. 53526. 35005. 24721. 48587. 76310. 73205. 51056.\n", + " 58414. 23940. 44216. 29305. 53882. 91757. 33906. 68158. 42165. 68074.\n", + " 84978. 71135. 64642. 50071. 46754. 70410. 66957. 24213. 99790. 79751.\n", + " 86122. 82297. 89057. 43259. 25064. 25816. 41662. 96170. 46072. 37931.\n", + " 35127. 45473. 93087. 22398. 92983. 27689. 69654. 80744. 18608. 73168.\n", + " 70930. 62262. 91474. 61469. 16618. 48081. 67632. 34115. 23051. 23748.\n", + " 40589. 50809. 66676. 52339. 14973. 31546. 20836. 88592. 66943. 81872.\n", + " 22404. 21342. 34621. 62396. 97212. 49648. 97984. 26308. 63528. 20225.\n", + " 87620. 34990. 99934. 60804. 94648. 24516. 61063. 15169. 55390. 27592.\n", + " 61846. 83297. 55897. 21297. 87560. 89398. 36843. 34946. 75680. 49532.\n", + " 10269. 49714. 77517. 81082. 72540. 61546. 44818. 79797. 92717. 63568.\n", + " 73935. 18846. 38893. 84824. 20068. 97245. 51808. 71391. 23496. 23986.\n", + " 22974. 61844. 24804. 27760. 52266. 23599. 36088. 70534. 27398. 85296.\n", + " 31063. 52367. 58651. 84831. 70263. 45354. 28334. 38772. 41479. 23909.\n", + " 48328. 86689. 24204. 25943. 62375. 70200. 79027. 62935. 26893. 47406.\n", + " 27572. 32802. 62739. 90844. 44685. 42589. 93383. 79583. 89129. 94389.\n", + " 84106. 76717. 51978. 47325. 86721. 24910. 43817. 59537. 54193. 86946.\n", + " 12160. 33701. 85702. 69417. 38644. 63209. 16061. 54230.]\n", + "\n", + "\n", + "Monthly Premium Auto:\n", + "[ 94. 108. 106. 68. 69. 67. 101. 71. 93. 79.\n", + " 80. 130. 35354. 61. 87. 63. 6464. 89. 74. 140.\n", + " 127. 96. 92. 72. 99. 105. 117. 65. 126. 82.\n", + " 64. 103. 118. 85. 77. 73. 95. 104. 115. 217.\n", + " 62. 110. 111. 114. 138. 76. 70. 81. 255. 150.\n", + " 123. 113. 116. 190. 78. 98. 159. 84. 112. 83.\n", + " 66. 90. 152. 136. 91. 86. 245. 109. 133. 135.\n", + " 1005. 225. 132. 153. 121. 139. 102. 119. 154. 161.\n", + " 156. 88. 100. 171. 244. 183. 176. 230. 129. 177.\n", + " 124. 181. 196. 131. 276. 253. 137. 180. 203. 192.\n", + " 107. 147. 165. 198. 125. 193. 128. 271. 97. 142.\n", + " 10202. 206. 182. 189. 160. 295. 210. 134. 35353. 239.\n", + " 188. 256. 173. 285. 148. 141. 197. 199.]\n", + "\n", + "\n", + "Number of Open Complaints:\n", + "['1/0/00' '1/1/00' '1/2/00' '1/4/00' '1/3/00' '1/5/00']\n", + "\n", + "\n", + "Policy Type:\n", + "['Personal Auto' 'Corporate Auto' 'Special Auto']\n", + "\n", + "\n", + "Vehicle Class:\n", + "['Four-Door Car' 'Two-Door Car' 'SUV' 'Sports Car' 'Luxury Car'\n", + " 'Luxury SUV']\n", + "\n", + "\n", + "Total Claim Amount:\n", + "[1.13146493e+03 5.66472247e+02 5.29881344e+02 1.72693230e+01\n", + " 1.59383042e+02 3.21600000e+02 3.63029680e+02 5.11200000e+02\n", + " 4.25527834e+02 4.82400000e+02 2.03828760e+01 2.09851050e+01\n", + " 2.38201580e+01 3.17073170e+01 3.31928030e+01 4.52150590e+01\n", + " 4.60414520e+01 4.61581170e+01 4.80468690e+01 4.94511170e+01\n", + " 5.04221810e+01 5.05283550e+01 3.92604371e+02 7.21242206e+02\n", + " 4.56523850e+02 1.08799543e+03 5.19619150e+01 4.60800000e+02\n", + " 2.51992083e+02 3.45600000e+02 9.92576080e+01 5.04000000e+02\n", + " 5.67315780e+01 5.75623240e+01 6.04800000e+02 8.31272900e+00\n", + " 3.16599228e+02 3.12000000e+02 5.63007240e+01 3.93600000e+02\n", + " 7.01708239e+02 1.28705563e+02 6.79368378e+02 5.99871260e+01\n", + " 5.66400000e+02 6.16542620e+01 5.54400000e+02 4.65414770e+02\n", + " 4.96800000e+02 2.92800000e+02 2.56813837e+02 3.55200000e+02\n", + " 5.42319401e+02 3.03148399e+02 2.54380630e+01 7.87993313e+02\n", + " 8.40244130e+01 6.16937710e+01 2.32926145e+02 6.81797210e+01\n", + " 7.10433775e+02 5.52000000e+02 1.12265890e+03 4.08374746e+02\n", + " 4.08000000e+02 7.92000000e+02 5.32800000e+02 7.54358929e+02\n", + " 7.02990032e+02 2.06837111e+02 2.75395894e+02 4.59738128e+02\n", + " 3.36000000e+02 3.02400000e+02 4.30994107e+02 1.83600000e+03\n", + " 6.76324760e+01 8.44481918e+02 3.64800000e+02 7.22486994e+02\n", + " 5.90400000e+02 4.81027516e+02 1.12442773e+03 6.69682001e+02\n", + " 1.28409317e+03 2.40259479e+02 9.12000000e+02 5.56800000e+02\n", + " 1.15086827e+02 4.70400000e+02 3.83167471e+02 3.07963291e+02\n", + " 5.20364752e+02 4.66436375e+02 4.51670309e+02 5.37600000e+02\n", + " 3.31200000e+02 4.28734656e+02 3.98400000e+02 4.94400000e+02\n", + " 3.69600000e+02 8.31752839e+02 1.56124914e+02 3.73428187e+02\n", + " 3.95934815e+02 3.50400000e+02 2.31201886e+02 2.89904105e+02\n", + " 7.29600000e+02 3.26400000e+02 1.05765111e+02 6.39464548e+02\n", + " 3.83442328e+02 3.16800000e+02 9.90859430e+01 3.07200000e+02\n", + " 5.66935022e+02 4.36800000e+02 4.12800000e+02 3.88800000e+02\n", + " 2.34541344e+03 7.73470977e+02 4.24883448e+02 5.23200000e+02\n", + " 5.08800000e+02 3.64240307e+02 6.38400000e+02 3.54729129e+02\n", + " 2.83995953e+02 4.03200000e+02 2.05431760e+01 3.85115437e+02\n", + " 1.40165035e+02 1.02879769e+02 5.68682890e+01 4.63158502e+02\n", + " 2.05444066e+02 7.41600000e+02 5.63719670e+01 8.13600000e+02\n", + " 7.95864079e+02 6.14400000e+02 7.24386810e+01 6.78815460e+01\n", + " 4.02636829e+02 6.41388616e+02 3.42481173e+02 3.40800000e+02\n", + " 4.12101933e+02 7.37005730e+01 3.74400000e+02 7.22024742e+02\n", + " 2.59361117e+02 3.96295614e+02 3.58281562e+02 6.53388564e+02\n", + " 1.21032372e+02 9.28133960e+01 5.28000000e+02 1.59636956e+02\n", + " 4.76156957e+02 7.27200000e+02 4.20783450e+01 5.61600000e+02\n", + " 2.00116060e+02 6.76944023e+02 7.66092950e+01 8.30623064e+02\n", + " 5.80473259e+02 6.00366830e+01 4.75310100e+01 3.03872752e+02\n", + " 1.00620067e+02 4.81339891e+02 1.10160000e+03 8.63200220e+01\n", + " 6.67200000e+02 5.66033300e+01 2.62865172e+02 3.09577946e+02\n", + " 3.65364581e+02 3.84000000e+02 4.89600000e+02 7.05600000e+02\n", + " 9.51528000e+00 2.44564334e+02 6.39105556e+02 4.53600000e+02\n", + " 5.54522969e+02 4.02449823e+02 3.27020539e+02 5.39530000e+00\n", + " 1.31401291e+02 9.93829430e+01 8.56800000e+02 7.39200000e+02\n", + " 5.18400000e+02 8.42183630e+01 6.45468770e+01 4.41600000e+02\n", + " 3.79106230e+01 2.02860399e+02 8.58098170e+01 9.11466610e+01\n", + " 2.86234931e+02 1.15920000e+03 4.66570791e+02 1.25240623e+03\n", + " 4.21484456e+02 4.30505942e+02 3.98502948e+02 7.64676300e+00\n", + " 5.55105260e+01 3.51270869e+02 8.06400000e+02 6.47454583e+02\n", + " 1.28438230e+02 3.02033971e+02 4.80000000e+02 1.00316063e+03\n", + " 3.79200000e+02 2.44362072e+02 2.01455005e+02 9.15509800e+01\n", + " 9.18346680e+01 7.07430832e+02 9.29152510e+01 3.76126419e+02\n", + " 1.52184244e+02 1.76819414e+02 1.04454624e+02 9.41718054e+02\n", + " 4.68000000e+02 9.40303080e+01 6.78598810e+01 5.39843003e+02\n", + " 9.51931570e+01 9.53385050e+01 6.26400000e+02 4.94395024e+02\n", + " 1.15545086e+02 1.99389810e+01 7.99926741e+02 8.78400000e+02\n", + " 4.44470676e+02 2.68819985e+02 2.97600000e+02 1.01288069e+02\n", + " 1.31760000e+03 6.91412378e+02 8.04811859e+02 1.09086434e+03\n", + " 5.05870350e+01 1.51528482e+02 3.47075948e+02 2.71697529e+02\n", + " 8.49600000e+02 1.10484661e+02 1.13534474e+02 5.95200000e+02\n", + " 1.13609508e+02 1.33706349e+03 1.14273025e+02 1.25194389e+02\n", + " 2.35220971e+02 3.95341110e+02 1.14798771e+02 7.72798511e+02\n", + " 6.33600000e+02 1.68517149e+02 8.45654042e+02 6.68293970e+02\n", + " 7.00901632e+02 5.25600000e+02 2.52990000e+01 3.75330097e+02\n", + " 4.18233667e+02 3.98240791e+02 9.38513425e+02 3.38619869e+02\n", + " 3.39344531e+02 3.02818833e+02 6.19165344e+02 2.59561195e+02\n", + " 1.79161843e+02 6.62400000e+02 7.55018520e+01 3.06983596e+02\n", + " 8.40268480e+01 1.39489926e+02 6.07445900e+02 6.31124372e+02\n", + " 6.28023494e+02 4.26655599e+02 9.14179230e+01 3.17844812e+02\n", + " 1.18446235e+02 8.64615820e+01 1.18454974e+02 3.62774545e+02\n", + " 2.21856184e+02 9.80169081e+02 6.14675906e+02 1.45252168e+02\n", + " 3.74240783e+02 6.19200000e+02 8.06692570e+01 1.32480000e+03\n", + " 4.68566133e+02 8.42437850e+02 4.75200000e+02 1.17193117e+03\n", + " 2.68471802e+02 1.38722385e+02 4.67248020e+02 4.67503236e+02\n", + " 1.21440000e+03 1.92085299e+02 6.19973889e+02 1.00278255e+03\n", + " 2.51774574e+02 8.66208321e+02 1.21092095e+03 5.99648466e+02\n", + " 2.02772444e+03 7.28520470e+01 4.49819671e+02 2.59060862e+02\n", + " 4.56000000e+02 2.99356083e+02 6.99167900e+02 1.17959654e+02\n", + " 5.45240341e+02 3.28231432e+02 6.65686420e+01 1.38240000e+03\n", + " 1.21306839e+02 6.09600000e+02 5.42400000e+02 1.25933005e+02\n", + " 6.55413330e+02 6.73342650e+02 5.13600000e+02 2.38104910e+01\n", + " 4.51200000e+02 2.31922173e+02 7.72800000e+02 6.64980242e+02\n", + " 1.28645946e+02 4.64920390e+01 4.32000000e+02 7.80851490e+01\n", + " 5.64466556e+02 2.89040734e+02 1.55938593e+02 3.26549425e+02\n", + " 6.53656680e+02 1.03935601e+02 8.09532341e+02 5.80725310e+02\n", + " 1.33735395e+02 2.38005074e+02 1.57767442e+03 1.35892444e+02\n", + " 1.35261250e+02 5.71200000e+02 6.37063458e+02 7.59360960e+01\n", + " 7.99200000e+02 1.36291083e+02 6.75309040e+01 3.12921256e+02\n", + " 1.66937747e+02 1.35382194e+02 1.77429540e+01 1.83836700e+00\n", + " 1.97776009e+02 6.12102262e+02 1.19303615e+03 6.00000000e+02\n", + " 1.36829537e+02 9.26400000e+02 3.92636400e+02 2.15226476e+02\n", + " 1.60598662e+02 3.71803029e+02 4.43670399e+02 6.12300581e+02\n", + " 2.90391526e+02 3.16795337e+02 4.05527937e+02 2.08598246e+02\n", + " 1.38130879e+02 3.01437365e+02 4.56473115e+02 1.52338562e+02\n", + " 4.91755368e+02 4.85174390e+01 4.30375049e+02 2.46489123e+02\n", + " 1.28969729e+02 1.85355353e+02 1.30080000e+03 9.07130500e+00\n", + " 1.33475315e+02 1.41199465e+02 6.23223617e+02 4.27200000e+02\n", + " 1.41725051e+02 2.11336937e+02 1.47080303e+02 8.62762957e+02\n", + " 1.13367765e+02 9.43200000e+02 1.17672722e+02 2.25145949e+02\n", + " 1.82432565e+02 1.48173152e+02 1.55570802e+02 2.12391975e+02\n", + " 1.74041566e+02 4.86278557e+02 1.92470900e+00 2.62122050e+02\n", + " 9.21600000e+02 5.41695658e+02 3.27682669e+02 1.26557030e+03\n", + " 6.30431970e+01 1.56313630e+01 3.75866091e+02 4.52616872e+02\n", + " 1.13450122e+02 1.60345100e+01 6.15272280e+02 6.15256301e+02\n", + " 5.12662450e+02 7.45239350e+01 4.26072946e+02 1.69287785e+02\n", + " 4.19464143e+02 1.58077504e+02 6.69600000e+02 3.95729716e+02\n", + " 1.59756733e+02 5.85575520e+01 8.58297100e+00 2.85418473e+02\n", + " 4.10508316e+02 2.44231350e+02 2.18598065e+02 6.43826716e+02\n", + " 4.97047297e+02 1.02240000e+03 6.57600000e+02 2.30245772e+02\n", + " 5.62275100e+00 1.60075260e+02 3.46513050e+01 6.78100487e+02\n", + " 1.74588413e+02 1.59391681e+02 6.98400000e+02 2.32711071e+02\n", + " 1.63046956e+02 7.98656050e+01 3.82085897e+02 4.83483190e+01\n", + " 1.33425609e+02 5.61414794e+02 4.45287788e+02 1.65570243e+02\n", + " 4.07996840e+02 8.44229478e+02 3.40306584e+02 1.06647493e+02\n", + " 6.26116259e+02 7.84657810e+02 5.47200000e+02 8.62777200e+01\n", + " 6.35165720e+01 1.20015609e+02 1.25413790e+03 4.17600000e+02\n", + " 7.98002689e+02 8.20416840e+01 2.28190880e+01 4.22400000e+02\n", + " 6.39971388e+02 5.11497882e+02 2.39540223e+02 5.18180364e+02\n", + " 7.15252366e+02 4.02070719e+02 9.48140320e+01 9.72000000e+02\n", + " 7.38830440e+01 5.74024018e+02 6.32715382e+02 4.23862600e+00\n", + " 4.22480870e+01 2.71451510e+01 2.70002766e+02 4.94946438e+02\n", + " 1.42567008e+02 4.25266308e+02 2.51334247e+02 3.69818708e+02\n", + " 1.61419528e+02 2.55999709e+02 4.84800000e+02 6.17695640e+01\n", + " 1.31040000e+03 2.50001424e+02 1.41799422e+02 4.97970160e+01\n", + " 3.72175592e+02 5.42143850e+02 1.36080000e+03 7.68000000e+02\n", + " 7.26329340e+01 7.20601429e+02 5.76000000e+02 4.81025786e+02\n", + " 4.66176731e+02 1.59266473e+02 4.12831670e+01 5.03808329e+02\n", + " 8.59599411e+02 3.05653785e+02 2.78902846e+02 1.41600000e+03\n", + " 1.39963594e+02 7.34400000e+02 3.72998650e+01 9.58165160e+01\n", + " 1.00800000e+03 4.46400000e+02 7.22053620e+01 4.01592109e+02\n", + " 2.53183568e+02 6.43200000e+02 7.99673766e+02 2.79878670e+01\n", + " 1.66772960e+02 2.70563995e+02 1.78006524e+02 4.04265696e+02\n", + " 1.89189350e+01 5.13818403e+02 1.31828507e+02 6.28800000e+02\n", + " 1.78986788e+02 3.92235698e+02 8.73600000e+02 3.82107000e-01\n", + " 1.87363583e+02 3.50853987e+02 2.17973168e+02 2.59340640e+01\n", + " 1.88938397e+02 1.53205591e+02 2.89323968e+03 6.29532731e+02\n", + " 3.61284757e+02 1.18598830e+03 4.25800112e+02 1.90434460e+02\n", + " 1.93505325e+02 2.40877740e+01 2.19288706e+02 8.81360959e+02\n", + " 2.69516270e+01 4.66122541e+02 6.07095655e+02 2.90381707e+02\n", + " 5.58099357e+02 8.28000000e+02 5.40514115e+02 1.00049832e+02\n", + " 5.98619630e+01 1.93570320e+02 1.99797270e+02 5.30847530e+01\n", + " 2.56438030e+02 2.11136067e+02 7.43508930e+01 6.21464468e+02\n", + " 3.60055890e+02 6.59548130e+01 4.07450118e+02 1.22880000e+03\n", + " 2.40636930e+01 3.11329282e+02 4.26891350e+01 4.60323855e+02\n", + " 2.13225001e+02 3.00607591e+02 2.62504882e+02 2.63365432e+02\n", + " 2.56268091e+02 2.15818200e+02 1.35360000e+03 4.99200000e+02\n", + " 4.55079520e+01 6.91200000e+02 8.24099220e+01 3.57642982e+02\n", + " 4.20356980e+02 1.36787725e+02 2.20186677e+02 1.70798204e+02\n", + " 2.66165535e+02 9.89217820e+01 2.81295903e+02 6.31743039e+02\n", + " 2.92095210e+01 1.36883999e+02 3.17556010e+01 2.24275820e+02\n", + " 8.30400000e+02 4.80159011e+02 3.90792553e+02 7.20711950e+01\n", + " 4.19652520e+01 1.02700003e+03 3.19820747e+02 4.11011162e+02\n", + " 6.48000000e+02 4.11058500e+00 3.51149904e+02 2.84000172e+02\n", + " 4.96474767e+02 9.25137143e+02 1.31640970e+01 4.63335061e+02\n", + " 1.80667969e+02 3.02764283e+02 1.41922839e+02 3.49783046e+02\n", + " 5.40655380e+01 3.42515136e+02 1.02717726e+03 2.75989978e+02\n", + " 4.72599683e+02 1.33693172e+03 7.34594600e+00 5.40141566e+02\n", + " 3.83363758e+02 5.37765151e+02 3.21873474e+02 1.95756830e+01\n", + " 1.08138715e+02 7.07303416e+02 1.06733313e+03 8.07947292e+02\n", + " 6.18630955e+02 4.04272806e+02 7.10400000e+02 7.07977614e+02\n", + " 2.32242326e+02 6.82260010e+01 2.87149807e+02 3.91636628e+02\n", + " 2.39328571e+02 2.32674417e+02 3.36509610e+02 2.44212286e+02\n", + " 3.28870868e+02 8.50637080e+01 1.41840000e+03 3.34408717e+02\n", + " 4.20964150e+01 3.00528579e+02 1.44782152e+02 6.45982160e+01\n", + " 3.47857619e+02 2.45447622e+02 9.55200000e+02 7.20752945e+02\n", + " 3.40656963e+02 5.46524896e+02 4.89411833e+02 1.71325856e+02\n", + " 2.49085887e+02 3.08748690e+01 3.61455219e+02 2.07320041e+02]\n", + "\n", + "\n" + ] + } + ], + "source": [ + "for i in df.columns:\n", + " print(f\"{i}:\\n{df[i].unique()}\\n\\n\") # to print categories name only\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "8c4ccb37-ed72-4585-964e-2532bfa49326", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + "5 0\n", + " ..\n", + "1066 1\n", + "1067 2\n", + "1068 0\n", + "1069 0\n", + "1070 0\n", + "Name: Number of Open Complaints, Length: 952, dtype: object" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.copy()\n", + "# Extracting only the first number as the count of complaints\n", + "df['Number of Open Complaints'] = df['Number of Open Complaints'].str.split('/').str[1]\n", + "# Verifying the cleaned column to ensure it only reflects the complaint count\n", + "df['Number of Open Complaints']" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "ffb308fc-dc58-42ae-bbbb-4f7f37e0665a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nan_in_complaints_column_alternative = df['Number of Open Complaints'].isnull().sum()\n", + "nan_in_complaints_column_alternative\n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "7864376b-9b1c-4b4a-a62a-41531424ffe1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['0', '1', '2', '4', '3', '5'], dtype=object)" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Number_of_Open_Complaints_uniques = df['Number of Open Complaints'].unique()\n", + "Number_of_Open_Complaints_uniques" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "6a220812-1dfe-4fc1-85b5-48fd4a25bef1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + "5 0\n", + " ..\n", + "1066 1\n", + "1067 2\n", + "1068 0\n", + "1069 0\n", + "1070 0\n", + "Name: Number of Open Complaints, Length: 952, dtype: int64" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Number of Open Complaints'] = pd.to_numeric(df['Number of Open Complaints'], errors = \"coerce\")\n", + "df['Number of Open Complaints']" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "ae04da64-4147-4639-910e-a53e91ac071f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 2, 4, 3, 5])" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Number of Open Complaints'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "0f0166b7-962a-4e6c-ab9c-9adb65d99e84", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "int64\n" + ] + } + ], + "source": [ + "print(df['Number of Open Complaints'].dtype)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "5a0afcb6-ee92-440d-89b9-742281731e5f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1 697.95\n", + "2 1288.74\n", + "3 764.59\n", + "4 536.31\n", + "5 825.63\n", + " ... \n", + "1066 305.96\n", + "1067 2031.50\n", + "1068 323.91\n", + "1069 462.68\n", + "1070 899.70\n", + "Name: Customer Lifetime Value, Length: 952, dtype: float64" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Customer Lifetime Value'] = df['Customer Lifetime Value'].str.replace('%', '')\n", + "df['Customer Lifetime Value'] = df['Customer Lifetime Value'].astype(float)\n", + "df['Customer Lifetime Value'] = df['Customer Lifetime Value'] / 1000\n", + "df['Customer Lifetime Value'] = df['Customer Lifetime Value'].round(2)\n", + "\n", + "df['Customer Lifetime Value'] " + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "a0d424bf-5d04-483c-a150-f2bccc00a307", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dtype('float64')" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Customer Lifetime Value'].dtype" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "7114f8a5-aaf3-4ef8-9a0a-dd66fd3fe82d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 697.95 1288.74 764.59 536.31 825.63 538.09 721.61 2412.75 738.82\n", + " 473.9 617.71 916.21 473.79 495.17 487.94 538.28 1595. 499.66\n", + " 518.58 1048.49]\n" + ] + } + ], + "source": [ + "unique_values = df['Customer Lifetime Value'].unique()[:20]\n", + "print(unique_values)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "a2181f7c-17dd-4903-b61b-abe357fd5182", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
1QZ44356ArizonaFBachelor697.950.094.00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288.7448767.0108.00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764.590.0106.00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536.3136357.068.00Personal AutoFour-Door Car17.269323
5OC83172OregonFBachelor825.6362902.069.00Personal AutoTwo-Door Car159.383042
\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value \\\n", + "1 QZ44356 Arizona F Bachelor 697.95 \n", + "2 AI49188 Nevada F Bachelor 1288.74 \n", + "3 WW63253 California M Bachelor 764.59 \n", + "4 GA49547 Washington M High School or Below 536.31 \n", + "5 OC83172 Oregon F Bachelor 825.63 \n", + "\n", + " Income Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "1 0.0 94.0 0 Personal Auto \n", + "2 48767.0 108.0 0 Personal Auto \n", + "3 0.0 106.0 0 Corporate Auto \n", + "4 36357.0 68.0 0 Personal Auto \n", + "5 62902.0 69.0 0 Personal Auto \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "3 SUV 529.881344 \n", + "4 Four-Door Car 17.269323 \n", + "5 Two-Door Car 159.383042 " + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "c3999f49-ce34-409b-8954-6515f1530e1e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Customer', 'ST', 'GENDER', 'Education', 'Policy Type', 'Vehicle Class']" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "categorical_columns = [col for col in df.columns if df[col].dtype == 'object']\n", + "categorical_columns " + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "94d4781f-0ca2-4ec6-a7fd-26bfb5da4f6b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer:\n", + "['QZ44356' 'AI49188' 'WW63253' 'GA49547' 'OC83172' 'XZ87318' 'CF85061'\n", + " 'DY87989' 'BQ94931' 'SX51350' 'QK46697' 'HS14476' 'HD95276' 'YD87931'\n", + " 'NW21079' 'YB66933' 'OW15518' 'GP39118' 'SG20925' 'FM14335' 'HS28694'\n", + " 'LH92841' 'AZ95587' 'DS81757' 'OJ94107' 'LP84436' 'FF22360' 'LM19287'\n", + " 'ZU18643' 'AZ82578' 'XC67861' 'YC43143' 'EK59571' 'PA38372' 'RO18530'\n", + " 'PD27940' 'BS77946' 'YM50253' 'NR15332' 'RC62865' 'CC15295' 'KA61892'\n", + " 'OS94884' 'ND87334' 'OY51402' 'YL74911' 'GK92563' 'HL53154' 'RI78966'\n", + " 'IC13702' 'BE10809' 'HT87217' 'TH95618' 'TS19868' 'LP45550' 'QR87004'\n", + " 'OE75747' 'DX91392' 'AB72731' 'GX84338' 'IS12901' 'BN90616' 'HH90090'\n", + " 'IU25463' 'KC11055' 'PD33979' 'NK71023' 'AB13432' 'OZ97704' 'UF46533'\n", + " 'XP47431' 'GK73582' 'RV98763' 'II62831' 'XK33449' 'TR85083' 'EO95328'\n", + " 'EN21086' 'YL83902' 'AZ62651' 'ZW25874' 'EH41854' 'MW70227' 'SL22297'\n", + " 'RV14138' 'UO62808' 'ZX64745' 'FL34139' 'TS11219' 'XX12304' 'SD64087'\n", + " 'OY38576' 'BG76355' 'IP66913' 'LE95702' 'KX54357' 'EZ78112' 'XN16891'\n", + " 'XK31350' 'CC30924' 'IT78748' 'KY33386' 'CO44221' 'LK60013' 'DE21533'\n", + " 'YS94121' 'UK68427' 'TE49565' 'RA88421' 'KQ51983' 'CD88896' 'YV22553'\n", + " 'WU14435' 'XV84099' 'RI24911' 'KO26461' 'HI14283' 'PT50227' 'BH36570'\n", + " 'TX17484' 'CT41158' 'AO87348' 'DE55857' 'LF66923' 'CN24514' 'UW32074'\n", + " 'HP36979' 'PP40919' 'RO73268' 'HO61691' 'BS13062' 'FO35655' 'HR10526'\n", + " 'IA63417' 'BH35016' 'PK52952' 'OD76309' 'IL28481' 'GY55092' 'UF33451'\n", + " 'CF15558' 'JM62924' 'EM66435' 'QX45933' 'JI71369' 'JU93290' 'GU66096'\n", + " 'UC33108' 'LW93867' 'OU78470' 'XW90265' 'HS67749' 'VZ51506' 'UI64281'\n", + " 'AE98193' 'AZ74055' 'XS76911' 'AY40674' 'NA12740' 'UA84837' 'DJ51510'\n", + " 'VM58985' 'OH60605' 'UO98052' 'NC53424' 'LQ13873' 'LA97014' 'NB79936'\n", + " 'NT89061' 'AF10970' 'ZG48513' 'JQ59145' 'FE84989' 'JT52858' 'MC62068'\n", + " 'EU27538' 'RH42306' 'US23612' 'WV76014' 'RK96223' 'MF82000' 'FM46980'\n", + " 'SY56792' 'RF61565' 'IM94808' 'VI14730' 'YR34119' 'RR77985' 'QD28391'\n", + " 'WV17090' 'TM23514' 'MQ68407' 'GJ59592' 'FY56083' 'UA94723' 'FW91032'\n", + " 'DE34457' 'HD32044' 'HH30454' 'AH84063' 'QA17596' 'XI41052' 'DI30528'\n", + " 'SC66359' 'EN61670' 'DQ10761' 'BQ51587' 'JE21522' 'WS47147' 'ZA64638'\n", + " 'EW38459' 'QW87316' 'IC43478' 'TE34064' 'WU60905' 'YM18992' 'PD55753'\n", + " 'KU56006' 'MJ69973' 'TW43626' 'XX84133' 'ZW84453' 'HO29524' 'VE89726'\n", + " 'GE87503' 'PX90263' 'NI17718' 'FY32213' 'RZ13254' 'GN45013' 'NM39588'\n", + " 'KU84464' 'YH43527' 'RO30676' 'QL59704' 'QH19450' 'SA54664' 'CI38330'\n", + " 'WB38524' 'CE56187' 'JL19416' 'JZ61422' 'LA13377' 'NC99948' 'QD34785'\n", + " 'RO26085' 'ES57969' 'JK55587' 'RN97635' 'BI76326' 'JA34909' 'OJ90342'\n", + " 'CM88932' 'JJ97525' 'XV21647' 'MC83487' 'BL90769' 'CR57148' 'CP85232'\n", + " 'YL74732' 'FG16766' 'NV55438' 'RM10880' 'GL56175' 'UK52289' 'OT85112'\n", + " 'BC62782' 'TI19722' 'JP30654' 'UM45563' 'EN60878' 'JF36291' 'BK59444'\n", + " 'MK70700' 'IW71076' 'AP98768' 'OM24164' 'HR85211' 'VC87846' 'ZM92052'\n", + " 'ON73702' 'QQ90441' 'HU35721' 'YP47665' 'FU99476' 'AG85615' 'OY74069'\n", + " 'DJ91267' 'KB72438' 'TR67616' 'GF65731' 'HB67642' 'DP84567' 'VV77534'\n", + " 'GL67540' 'SV50502' 'UK59698' 'OA57352' 'ZF84449' 'AX86150' 'HG39060'\n", + " 'EM29359' 'SF57173' 'OT47603' 'SW31412' 'JS36322' 'RE81445' 'RM24280'\n", + " 'LC25393' 'UX38930' 'HD95496' 'RX24650' 'DW19309' 'MT41386' 'WZ40465'\n", + " 'DB42794' 'JB50798' 'IP69763' 'TE35785' 'HX74855' 'QN65180' 'GE47180'\n", + " 'VQ38776' 'BH86846' 'IN17648' 'DF95759' 'QG45324' 'MN61620' 'YH86390'\n", + " 'FY13480' 'YH61661' 'NL93182' 'WE68644' 'EZ30498' 'QY74517' 'NM88660'\n", + " 'MZ82036' 'ID20929' 'EY50028' 'TT82373' 'OH64088' 'SK97780' 'IO33050'\n", + " 'XA55917' 'JK32620' 'RQ19236' 'QC47433' 'RA93608' 'XH97711' 'AU96286'\n", + " 'KC17170' 'ZN47335' 'EI46264' 'EK87864' 'GV45403' 'QK31192' 'LU89008'\n", + " 'NS10490' 'KL98495' 'IU96845' 'QL93655' 'PF40592' 'LZ34046' 'JC80093'\n", + " 'YE88490' 'YC80498' 'AI85843' 'XD66024' 'FY51713' 'PH26378' 'WQ18638'\n", + " 'KY14688' 'TC97762' 'QC87108' 'CX12134' 'SM73248' 'CK19789' 'UV12583'\n", + " 'JC11405' 'KA89683' 'BG85305' 'UQ87917' 'XN11823' 'OS46571' 'PX17116'\n", + " 'RP19541' 'ZR25747' 'NQ86532' 'JY27336' 'PB54378' 'SV38190' 'CV24005'\n", + " 'EX28656' 'CF57022' 'GM16780' 'BX94438' 'RM41745' 'XR70252' 'YH92099'\n", + " 'SG81493' 'ZX23819' 'FJ54907' 'CU26127' 'YH60476' 'ZZ97035' 'GE82737'\n", + " 'KY21873' 'UA51318' 'BV55014' 'HX21307' 'LQ68252' 'CR92802' 'SL35268'\n", + " 'RD62882' 'JS42382' 'BT30554' 'VP57424' 'VU19243' 'TA82973' 'GK71720'\n", + " 'OQ61223' 'LL62746' 'JQ56711' 'AW77988' 'QP84605' 'MY97912' 'IB87349'\n", + " 'AW73065' 'BW80872' 'PX70175' 'KF75098' 'IS50283' 'MY64920' 'KN34250'\n", + " 'GN46207' 'KL57176' 'MN94234' 'JY90595' 'HK26543' 'PN86062' 'VW27730'\n", + " 'SH55671' 'MO56878' 'VO38365' 'SV35618' 'RX12347' 'FR55658' 'XS12556'\n", + " 'ZU73588' 'WT43034' 'VM13430' 'TC78849' 'VC34764' 'WO90953' 'IU47468'\n", + " 'KO46064' 'RB34917' 'BI38192' 'PU18983' 'SW79912' 'ES39217' 'KP72427'\n", + " 'UA19178' 'PR53785' 'XF57481' 'CN90378' 'KI56154' 'UI55951' 'FF28650'\n", + " 'FS55302' 'TN79487' 'HG32616' 'UK41984' 'LZ52266' 'PM27367' 'ZK21724'\n", + " 'BH35482' 'QE22757' 'ON77649' 'RN82884' 'CQ75652' 'FF58467' 'BS83666'\n", + " 'WO29605' 'TL77607' 'EZ50606' 'OS39723' 'FN69743' 'XW96958' 'TU92578'\n", + " 'TL43709' 'YE68736' 'OB96537' 'EU68825' 'CC31456' 'DJ77787' 'LN26837'\n", + " 'YI92916' 'NW54906' 'ME77513' 'UK76891' 'SI26888' 'YD74948' 'HB64268'\n", + " 'BW52697' 'NL41409' 'OD69005' 'ZZ91716' 'UK70255' 'QT25383' 'AW18068'\n", + " 'NS45347' 'FV19421' 'XW89091' 'YC11951' 'UY18770' 'RA49085' 'BG84194'\n", + " 'PT64580' 'MR67738' 'DM95829' 'DB75522' 'LM34525' 'WW30771' 'QP65569'\n", + " 'TN50051' 'UO86707' 'JA41698' 'NX18774' 'DA69469' 'CN23147' 'RA68844'\n", + " 'GH42026' 'BD16530' 'JH91579' 'WK23685' 'GR62267' 'PI78084' 'GF97874'\n", + " 'ZH19885' 'UK25655' 'QR45101' 'EL93539' 'EE99484' 'DP46882' 'WP41146'\n", + " 'TK60799' 'DN29808' 'SS59521' 'NG66579' 'TC14209' 'ED50963' 'GP40701'\n", + " 'CP98451' 'NX52648' 'ZC32510' 'NG27780' 'HN95240' 'EB59129' 'RA70851'\n", + " 'PM19162' 'MS59005' 'SU71163' 'BD35676' 'NI44621' 'EW33419' 'HX44948'\n", + " 'DL36983' 'XR87264' 'NN99001' 'XV95530' 'OL97871' 'HQ23708' 'WR63188'\n", + " 'NG82219' 'KU29408' 'RE46783' 'RU94434' 'GI82355' 'VO26340' 'NV61299'\n", + " 'DX31066' 'CY50337' 'TJ20375' 'EP72155' 'JJ76159' 'BG15419' 'AO74776'\n", + " 'HQ82233' 'OL72737' 'ZQ59828' 'NZ15548' 'XK61304' 'EJ44139' 'CM94425'\n", + " 'OV54878' 'JF57282' 'MY37953' 'XP64922' 'WL65572' 'LN50325' 'HJ15383'\n", + " 'KH59823' 'YM79169' 'DR38127' 'PU42145' 'KM33477' 'RI53167' 'OF77789'\n", + " 'YB33445' 'BA17836' 'JS43228' 'BB11622' 'HQ70429' 'WK88044' 'LA80525'\n", + " 'EH16250' 'PU41872' 'HB85743' 'MM71959' 'MB83663' 'KR43119' 'KH24214'\n", + " 'AC40767' 'HP55391' 'EG62398' 'VS19949' 'AM92343' 'GI68556' 'JT11876'\n", + " 'XR64251' 'MK34957' 'GP18756' 'AP23850' 'KQ65521' 'EJ19449' 'QB70027'\n", + " 'QW47320' 'KH64733' 'ON59472' 'HP94242' 'RV15398' 'EA25683' 'PW73754'\n", + " 'MC71942' 'OX72195' 'YQ99152' 'KI19439' 'PM76175' 'US45383' 'GT38956'\n", + " 'SN41301' 'BE62503' 'PA16884' 'NC58480' 'NS39326' 'PN18507' 'EK91340'\n", + " 'JY16280' 'ZW71731' 'ZC24631' 'YR34689' 'RT65829' 'BZ12077' 'WM65373'\n", + " 'NH35059' 'QD38160' 'BM15160' 'VY79030' 'EV19512' 'TE13577' 'WY97929'\n", + " 'YG20683' 'FK75497' 'NE60110' 'TN36521' 'HG33568' 'TW17878' 'ZO83562'\n", + " 'CH97539' 'CV29889' 'MO33320' 'QZ81258' 'NY56352' 'EA27048' 'UT38865'\n", + " 'QC89139' 'LA14484' 'HN57556' 'CV31235' 'WR45726' 'LB25094' 'KW56110'\n", + " 'XO36233' 'ZX86243' 'DW29763' 'CT83377' 'OQ90898' 'GO77248' 'QW33258'\n", + " 'OU79745' 'VZ79886' 'FI92440' 'YG85980' 'QM74621' 'EI71732' 'VN79010'\n", + " 'FI61723' 'OH55411' 'TF10720' 'NW30838' 'CB58476' 'WI69346' 'FS76657'\n", + " 'YX89016' 'PK28821' 'MB51200' 'XG44587' 'FG91922' 'OM99303' 'RV67546'\n", + " 'UJ79253' 'PN98247' 'IB67546' 'OE19087' 'CM95716' 'MW62634' 'QW67581'\n", + " 'SN16059' 'OE51254' 'RM42344' 'GB35238' 'ML82674' 'EI85244' 'DE28132'\n", + " 'TV25678' 'TY26512' 'OB69153' 'QZ77637' 'XN41715' 'QR15857' 'FL69363'\n", + " 'IS30295' 'WA25797' 'NL59519' 'ZU93025' 'DK94262' 'UQ30615' 'OR40060'\n", + " 'DK32872' 'FA46418' 'ER19995' 'KI75855' 'ND41876' 'PN21042' 'GJ43254'\n", + " 'AL46984' 'JP58047' 'ZE85014' 'KU88219' 'UU98729' 'WS82822' 'YB49933'\n", + " 'XC16387' 'XJ96748' 'TM98684' 'AY18433' 'DM74502' 'FT56968' 'OX36896'\n", + " 'BZ65376' 'LN34660' 'JC29295' 'KJ87930' 'XT36360' 'IX35050' 'UN97379'\n", + " 'MR57294' 'UG79499' 'UA50747' 'GL20444' 'SP58110' 'XM91635' 'TV82603'\n", + " 'BB82067' 'JP94676' 'VU53417' 'IW54795' 'RN78170' 'IX55883' 'XM72420'\n", + " 'GC15104' 'RX13282' 'QA85890' 'IR62668' 'AL96740' 'SS48498' 'PE39479'\n", + " 'JH62891' 'FI20423' 'PM13394' 'YV67971' 'QD31377' 'YG10247' 'FE73696'\n", + " 'SW19699' 'QJ40732' 'HM76207' 'NT59303' 'PU41393' 'QO86948' 'QN10888'\n", + " 'VY19543' 'XC15133' 'ST43550' 'FX36546' 'JX68983' 'HX78576' 'ZQ11381'\n", + " 'ON39271' 'SB18278' 'ZT30559' 'XI41106' 'ZS88847' 'RU49126' 'KR62797'\n", + " 'ZJ73220' 'FY62633' 'CU36986' 'WZ53904' 'AA71604' 'TD10493' 'LY97989'\n", + " 'VX39856' 'TP51897' 'QQ89253' 'EI91403' 'QG15435' 'FZ55002' 'HX77930'\n", + " 'UN37063' 'VB87946' 'AB60627' 'TA34903' 'AQ51368' 'NZ26102' 'GB45753'\n", + " 'BV79904' 'OB49075' 'DS97676' 'JO63462' 'NJ10602' 'RS24501' 'VT78274'\n", + " 'SU56153' 'MN20737' 'KL43114' 'YQ15567' 'TR88637' 'TC88986' 'XX88577'\n", + " 'NE49052' 'KX17826' 'CC91503' 'WH32183' 'ES90681' 'DW96592' 'MT23134'\n", + " 'BM69081' 'MB90871' 'QL77686' 'ON77827' 'KP18988' 'TI92884' 'JH73503'\n", + " 'YE97964' 'VA30351' 'PV55726' 'UC88305' 'TS53809' 'ZV32120' 'FB80807'\n", + " 'AS55677' 'WA15684' 'SA50567' 'KJ31611' 'VL37375' 'KN21017' 'PX44289'\n", + " 'AM97901' 'RE42925' 'TR81766' 'CH85057' 'UP71482' 'EG40670' 'HV83672'\n", + " 'MG10140' 'TC44716' 'QO65264' 'EB66698' 'OT52034' 'CH85444' 'PU85769'\n", + " 'UI73201' 'SL50592' 'XP11075' 'SI31236' 'JN26745' 'VK48036' 'JX76668'\n", + " 'DS45802' 'OA96690' 'EM27919' 'QO41043' 'OV50124' 'PR31642' 'BU41599'\n", + " 'TK30357' 'NF31087' 'NH16984' 'OS75493' 'VT63298' 'QS75550' 'SZ16483'\n", + " 'VM92311' 'NJ46849' 'WZ31900' 'RG30482' 'ZM86949' 'QQ39596' 'FH51383'\n", + " 'BJ53923' 'CZ96653' 'FB23788' 'NT43594' 'RJ85627' 'KJ86296' 'PI47776'\n", + " 'MD73554' 'UX92071' 'YG44474' 'UH45301' 'RY92647' 'IK12620' 'GQ66762'\n", + " 'YT69858' 'XD85577' 'TM65736' 'VJ51327' 'GS98873' 'CW49887' 'MY31220']\n", + "\n", + "\n", + "ST:\n", + "['Arizona' 'Nevada' 'California' 'Washington' 'Oregon' 'AZ' 'WA' 'Cali']\n", + "\n", + "\n", + "GENDER:\n", + "['F' 'M' 'Femal' 'Male' 'female']\n", + "\n", + "\n", + "Education:\n", + "['Bachelor' 'High School or Below' 'College' 'Master' 'Bachelors' 'Doctor']\n", + "\n", + "\n", + "Policy Type:\n", + "['Personal Auto' 'Corporate Auto' 'Special Auto']\n", + "\n", + "\n", + "Vehicle Class:\n", + "['Four-Door Car' 'Two-Door Car' 'SUV' 'Sports Car' 'Luxury Car'\n", + " 'Luxury SUV']\n", + "\n", + "\n" + ] + } + ], + "source": [ + "for i in categorical_columns:\n", + " print(f\"{i}:\\n{df[i].unique()}\\n\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "9cfb9eb5-1196-4956-a72d-5bce3d31e6b2", + "metadata": {}, + "source": [ + "the range of values for numerical columns" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "b9b55dcc-cea6-421b-8c10-370ba75177b0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Customer Lifetime Value': (200.44, 4479.55),\n", + " 'Income': (0.0, 99960.0),\n", + " 'Monthly Premium Auto': (61.0, 35354.0),\n", + " 'Number of Open Complaints': (0, 5),\n", + " 'Total Claim Amount': (0.382107, 2893.239678)}" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "numerical_columns = df.select_dtypes(include=['float64', 'int64']).columns\n", + "numerical_ranges = {col: (df[col].min(), df[col].max()) for col in numerical_columns}\n", + "numerical_ranges" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "aa55c98e-a9a7-42a9-9f14-889b97d4eecb", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer object\n", + "ST object\n", + "GENDER object\n", + "Education object\n", + "Customer Lifetime Value float64\n", + "Income float64\n", + "Monthly Premium Auto float64\n", + "Number of Open Complaints int64\n", + "Policy Type object\n", + "Vehicle Class object\n", + "Total Claim Amount float64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "\n", + "\n", + "print(df.dtypes)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "5cb155c6-af3c-4082-8fbc-5a1e441abc27", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Customer 952\n", + "ST 8\n", + "GENDER 5\n", + "Education 6\n", + "Customer Lifetime Value 918\n", + "Income 688\n", + "Monthly Premium Auto 128\n", + "Number of Open Complaints 6\n", + "Policy Type 3\n", + "Vehicle Class 6\n", + "Total Claim Amount 688\n", + "dtype: int64" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.nunique()\n", + "\n", + "#ST, GENDER, Education " + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "4f0b2465-5a67-44a5-b331-2e261d976d14", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 2, 4, 3, 5])" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Number of Open Complaints'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "5dee1d53-08fa-4f63-bd59-0c05d1ef2e4f", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "\n", + "df['ST'] = df['ST'].replace({\n", + " 'Arizona': 'AZ', \n", + " 'Nevada': 'NV', \n", + " 'California': 'CA', \n", + " 'Washington': 'WA', \n", + " 'Oregon': 'OR', \n", + " 'Cali': 'CA' # Standardiser les variantes\n", + "})\n", + "\n", + "\n", + "df['GENDER'] = df['GENDER'].replace({\n", + " 'Femal': 'F', \n", + " 'female': 'F', \n", + " 'Female': 'F', \n", + " 'Male': 'M', \n", + " 'male': 'M'\n", + "})\n", + "\n", + "\n", + "df['Education'] = df['Education'].replace({\n", + " 'Bachelor': 'Bachelors', \n", + " 'Bachelors': 'Bachelors', \n", + " 'High School or Below': 'High School', \n", + " 'Master': 'Masters'\n", + "})\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "80080e8a-341f-4a66-bdf2-7932983850d8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unique values in 'Customer': ['QZ44356' 'AI49188' 'WW63253' 'GA49547' 'OC83172' 'XZ87318' 'CF85061'\n", + " 'DY87989' 'BQ94931' 'SX51350' 'QK46697' 'HS14476' 'HD95276' 'YD87931'\n", + " 'NW21079' 'YB66933' 'OW15518' 'GP39118' 'SG20925' 'FM14335' 'HS28694'\n", + " 'LH92841' 'AZ95587' 'DS81757' 'OJ94107' 'LP84436' 'FF22360' 'LM19287'\n", + " 'ZU18643' 'AZ82578' 'XC67861' 'YC43143' 'EK59571' 'PA38372' 'RO18530'\n", + " 'PD27940' 'BS77946' 'YM50253' 'NR15332' 'RC62865' 'CC15295' 'KA61892'\n", + " 'OS94884' 'ND87334' 'OY51402' 'YL74911' 'GK92563' 'HL53154' 'RI78966'\n", + " 'IC13702' 'BE10809' 'HT87217' 'TH95618' 'TS19868' 'LP45550' 'QR87004'\n", + " 'OE75747' 'DX91392' 'AB72731' 'GX84338' 'IS12901' 'BN90616' 'HH90090'\n", + " 'IU25463' 'KC11055' 'PD33979' 'NK71023' 'AB13432' 'OZ97704' 'UF46533'\n", + " 'XP47431' 'GK73582' 'RV98763' 'II62831' 'XK33449' 'TR85083' 'EO95328'\n", + " 'EN21086' 'YL83902' 'AZ62651' 'ZW25874' 'EH41854' 'MW70227' 'SL22297'\n", + " 'RV14138' 'UO62808' 'ZX64745' 'FL34139' 'TS11219' 'XX12304' 'SD64087'\n", + " 'OY38576' 'BG76355' 'IP66913' 'LE95702' 'KX54357' 'EZ78112' 'XN16891'\n", + " 'XK31350' 'CC30924' 'IT78748' 'KY33386' 'CO44221' 'LK60013' 'DE21533'\n", + " 'YS94121' 'UK68427' 'TE49565' 'RA88421' 'KQ51983' 'CD88896' 'YV22553'\n", + " 'WU14435' 'XV84099' 'RI24911' 'KO26461' 'HI14283' 'PT50227' 'BH36570'\n", + " 'TX17484' 'CT41158' 'AO87348' 'DE55857' 'LF66923' 'CN24514' 'UW32074'\n", + " 'HP36979' 'PP40919' 'RO73268' 'HO61691' 'BS13062' 'FO35655' 'HR10526'\n", + " 'IA63417' 'BH35016' 'PK52952' 'OD76309' 'IL28481' 'GY55092' 'UF33451'\n", + " 'CF15558' 'JM62924' 'EM66435' 'QX45933' 'JI71369' 'JU93290' 'GU66096'\n", + " 'UC33108' 'LW93867' 'OU78470' 'XW90265' 'HS67749' 'VZ51506' 'UI64281'\n", + " 'AE98193' 'AZ74055' 'XS76911' 'AY40674' 'NA12740' 'UA84837' 'DJ51510'\n", + " 'VM58985' 'OH60605' 'UO98052' 'NC53424' 'LQ13873' 'LA97014' 'NB79936'\n", + " 'NT89061' 'AF10970' 'ZG48513' 'JQ59145' 'FE84989' 'JT52858' 'MC62068'\n", + " 'EU27538' 'RH42306' 'US23612' 'WV76014' 'RK96223' 'MF82000' 'FM46980'\n", + " 'SY56792' 'RF61565' 'IM94808' 'VI14730' 'YR34119' 'RR77985' 'QD28391'\n", + " 'WV17090' 'TM23514' 'MQ68407' 'GJ59592' 'FY56083' 'UA94723' 'FW91032'\n", + " 'DE34457' 'HD32044' 'HH30454' 'AH84063' 'QA17596' 'XI41052' 'DI30528'\n", + " 'SC66359' 'EN61670' 'DQ10761' 'BQ51587' 'JE21522' 'WS47147' 'ZA64638'\n", + " 'EW38459' 'QW87316' 'IC43478' 'TE34064' 'WU60905' 'YM18992' 'PD55753'\n", + " 'KU56006' 'MJ69973' 'TW43626' 'XX84133' 'ZW84453' 'HO29524' 'VE89726'\n", + " 'GE87503' 'PX90263' 'NI17718' 'FY32213' 'RZ13254' 'GN45013' 'NM39588'\n", + " 'KU84464' 'YH43527' 'RO30676' 'QL59704' 'QH19450' 'SA54664' 'CI38330'\n", + " 'WB38524' 'CE56187' 'JL19416' 'JZ61422' 'LA13377' 'NC99948' 'QD34785'\n", + " 'RO26085' 'ES57969' 'JK55587' 'RN97635' 'BI76326' 'JA34909' 'OJ90342'\n", + " 'CM88932' 'JJ97525' 'XV21647' 'MC83487' 'BL90769' 'CR57148' 'CP85232'\n", + " 'YL74732' 'FG16766' 'NV55438' 'RM10880' 'GL56175' 'UK52289' 'OT85112'\n", + " 'BC62782' 'TI19722' 'JP30654' 'UM45563' 'EN60878' 'JF36291' 'BK59444'\n", + " 'MK70700' 'IW71076' 'AP98768' 'OM24164' 'HR85211' 'VC87846' 'ZM92052'\n", + " 'ON73702' 'QQ90441' 'HU35721' 'YP47665' 'FU99476' 'AG85615' 'OY74069'\n", + " 'DJ91267' 'KB72438' 'TR67616' 'GF65731' 'HB67642' 'DP84567' 'VV77534'\n", + " 'GL67540' 'SV50502' 'UK59698' 'OA57352' 'ZF84449' 'AX86150' 'HG39060'\n", + " 'EM29359' 'SF57173' 'OT47603' 'SW31412' 'JS36322' 'RE81445' 'RM24280'\n", + " 'LC25393' 'UX38930' 'HD95496' 'RX24650' 'DW19309' 'MT41386' 'WZ40465'\n", + " 'DB42794' 'JB50798' 'IP69763' 'TE35785' 'HX74855' 'QN65180' 'GE47180'\n", + " 'VQ38776' 'BH86846' 'IN17648' 'DF95759' 'QG45324' 'MN61620' 'YH86390'\n", + " 'FY13480' 'YH61661' 'NL93182' 'WE68644' 'EZ30498' 'QY74517' 'NM88660'\n", + " 'MZ82036' 'ID20929' 'EY50028' 'TT82373' 'OH64088' 'SK97780' 'IO33050'\n", + " 'XA55917' 'JK32620' 'RQ19236' 'QC47433' 'RA93608' 'XH97711' 'AU96286'\n", + " 'KC17170' 'ZN47335' 'EI46264' 'EK87864' 'GV45403' 'QK31192' 'LU89008'\n", + " 'NS10490' 'KL98495' 'IU96845' 'QL93655' 'PF40592' 'LZ34046' 'JC80093'\n", + " 'YE88490' 'YC80498' 'AI85843' 'XD66024' 'FY51713' 'PH26378' 'WQ18638'\n", + " 'KY14688' 'TC97762' 'QC87108' 'CX12134' 'SM73248' 'CK19789' 'UV12583'\n", + " 'JC11405' 'KA89683' 'BG85305' 'UQ87917' 'XN11823' 'OS46571' 'PX17116'\n", + " 'RP19541' 'ZR25747' 'NQ86532' 'JY27336' 'PB54378' 'SV38190' 'CV24005'\n", + " 'EX28656' 'CF57022' 'GM16780' 'BX94438' 'RM41745' 'XR70252' 'YH92099'\n", + " 'SG81493' 'ZX23819' 'FJ54907' 'CU26127' 'YH60476' 'ZZ97035' 'GE82737'\n", + " 'KY21873' 'UA51318' 'BV55014' 'HX21307' 'LQ68252' 'CR92802' 'SL35268'\n", + " 'RD62882' 'JS42382' 'BT30554' 'VP57424' 'VU19243' 'TA82973' 'GK71720'\n", + " 'OQ61223' 'LL62746' 'JQ56711' 'AW77988' 'QP84605' 'MY97912' 'IB87349'\n", + " 'AW73065' 'BW80872' 'PX70175' 'KF75098' 'IS50283' 'MY64920' 'KN34250'\n", + " 'GN46207' 'KL57176' 'MN94234' 'JY90595' 'HK26543' 'PN86062' 'VW27730'\n", + " 'SH55671' 'MO56878' 'VO38365' 'SV35618' 'RX12347' 'FR55658' 'XS12556'\n", + " 'ZU73588' 'WT43034' 'VM13430' 'TC78849' 'VC34764' 'WO90953' 'IU47468'\n", + " 'KO46064' 'RB34917' 'BI38192' 'PU18983' 'SW79912' 'ES39217' 'KP72427'\n", + " 'UA19178' 'PR53785' 'XF57481' 'CN90378' 'KI56154' 'UI55951' 'FF28650'\n", + " 'FS55302' 'TN79487' 'HG32616' 'UK41984' 'LZ52266' 'PM27367' 'ZK21724'\n", + " 'BH35482' 'QE22757' 'ON77649' 'RN82884' 'CQ75652' 'FF58467' 'BS83666'\n", + " 'WO29605' 'TL77607' 'EZ50606' 'OS39723' 'FN69743' 'XW96958' 'TU92578'\n", + " 'TL43709' 'YE68736' 'OB96537' 'EU68825' 'CC31456' 'DJ77787' 'LN26837'\n", + " 'YI92916' 'NW54906' 'ME77513' 'UK76891' 'SI26888' 'YD74948' 'HB64268'\n", + " 'BW52697' 'NL41409' 'OD69005' 'ZZ91716' 'UK70255' 'QT25383' 'AW18068'\n", + " 'NS45347' 'FV19421' 'XW89091' 'YC11951' 'UY18770' 'RA49085' 'BG84194'\n", + " 'PT64580' 'MR67738' 'DM95829' 'DB75522' 'LM34525' 'WW30771' 'QP65569'\n", + " 'TN50051' 'UO86707' 'JA41698' 'NX18774' 'DA69469' 'CN23147' 'RA68844'\n", + " 'GH42026' 'BD16530' 'JH91579' 'WK23685' 'GR62267' 'PI78084' 'GF97874'\n", + " 'ZH19885' 'UK25655' 'QR45101' 'EL93539' 'EE99484' 'DP46882' 'WP41146'\n", + " 'TK60799' 'DN29808' 'SS59521' 'NG66579' 'TC14209' 'ED50963' 'GP40701'\n", + " 'CP98451' 'NX52648' 'ZC32510' 'NG27780' 'HN95240' 'EB59129' 'RA70851'\n", + " 'PM19162' 'MS59005' 'SU71163' 'BD35676' 'NI44621' 'EW33419' 'HX44948'\n", + " 'DL36983' 'XR87264' 'NN99001' 'XV95530' 'OL97871' 'HQ23708' 'WR63188'\n", + " 'NG82219' 'KU29408' 'RE46783' 'RU94434' 'GI82355' 'VO26340' 'NV61299'\n", + " 'DX31066' 'CY50337' 'TJ20375' 'EP72155' 'JJ76159' 'BG15419' 'AO74776'\n", + " 'HQ82233' 'OL72737' 'ZQ59828' 'NZ15548' 'XK61304' 'EJ44139' 'CM94425'\n", + " 'OV54878' 'JF57282' 'MY37953' 'XP64922' 'WL65572' 'LN50325' 'HJ15383'\n", + " 'KH59823' 'YM79169' 'DR38127' 'PU42145' 'KM33477' 'RI53167' 'OF77789'\n", + " 'YB33445' 'BA17836' 'JS43228' 'BB11622' 'HQ70429' 'WK88044' 'LA80525'\n", + " 'EH16250' 'PU41872' 'HB85743' 'MM71959' 'MB83663' 'KR43119' 'KH24214'\n", + " 'AC40767' 'HP55391' 'EG62398' 'VS19949' 'AM92343' 'GI68556' 'JT11876'\n", + " 'XR64251' 'MK34957' 'GP18756' 'AP23850' 'KQ65521' 'EJ19449' 'QB70027'\n", + " 'QW47320' 'KH64733' 'ON59472' 'HP94242' 'RV15398' 'EA25683' 'PW73754'\n", + " 'MC71942' 'OX72195' 'YQ99152' 'KI19439' 'PM76175' 'US45383' 'GT38956'\n", + " 'SN41301' 'BE62503' 'PA16884' 'NC58480' 'NS39326' 'PN18507' 'EK91340'\n", + " 'JY16280' 'ZW71731' 'ZC24631' 'YR34689' 'RT65829' 'BZ12077' 'WM65373'\n", + " 'NH35059' 'QD38160' 'BM15160' 'VY79030' 'EV19512' 'TE13577' 'WY97929'\n", + " 'YG20683' 'FK75497' 'NE60110' 'TN36521' 'HG33568' 'TW17878' 'ZO83562'\n", + " 'CH97539' 'CV29889' 'MO33320' 'QZ81258' 'NY56352' 'EA27048' 'UT38865'\n", + " 'QC89139' 'LA14484' 'HN57556' 'CV31235' 'WR45726' 'LB25094' 'KW56110'\n", + " 'XO36233' 'ZX86243' 'DW29763' 'CT83377' 'OQ90898' 'GO77248' 'QW33258'\n", + " 'OU79745' 'VZ79886' 'FI92440' 'YG85980' 'QM74621' 'EI71732' 'VN79010'\n", + " 'FI61723' 'OH55411' 'TF10720' 'NW30838' 'CB58476' 'WI69346' 'FS76657'\n", + " 'YX89016' 'PK28821' 'MB51200' 'XG44587' 'FG91922' 'OM99303' 'RV67546'\n", + " 'UJ79253' 'PN98247' 'IB67546' 'OE19087' 'CM95716' 'MW62634' 'QW67581'\n", + " 'SN16059' 'OE51254' 'RM42344' 'GB35238' 'ML82674' 'EI85244' 'DE28132'\n", + " 'TV25678' 'TY26512' 'OB69153' 'QZ77637' 'XN41715' 'QR15857' 'FL69363'\n", + " 'IS30295' 'WA25797' 'NL59519' 'ZU93025' 'DK94262' 'UQ30615' 'OR40060'\n", + " 'DK32872' 'FA46418' 'ER19995' 'KI75855' 'ND41876' 'PN21042' 'GJ43254'\n", + " 'AL46984' 'JP58047' 'ZE85014' 'KU88219' 'UU98729' 'WS82822' 'YB49933'\n", + " 'XC16387' 'XJ96748' 'TM98684' 'AY18433' 'DM74502' 'FT56968' 'OX36896'\n", + " 'BZ65376' 'LN34660' 'JC29295' 'KJ87930' 'XT36360' 'IX35050' 'UN97379'\n", + " 'MR57294' 'UG79499' 'UA50747' 'GL20444' 'SP58110' 'XM91635' 'TV82603'\n", + " 'BB82067' 'JP94676' 'VU53417' 'IW54795' 'RN78170' 'IX55883' 'XM72420'\n", + " 'GC15104' 'RX13282' 'QA85890' 'IR62668' 'AL96740' 'SS48498' 'PE39479'\n", + " 'JH62891' 'FI20423' 'PM13394' 'YV67971' 'QD31377' 'YG10247' 'FE73696'\n", + " 'SW19699' 'QJ40732' 'HM76207' 'NT59303' 'PU41393' 'QO86948' 'QN10888'\n", + " 'VY19543' 'XC15133' 'ST43550' 'FX36546' 'JX68983' 'HX78576' 'ZQ11381'\n", + " 'ON39271' 'SB18278' 'ZT30559' 'XI41106' 'ZS88847' 'RU49126' 'KR62797'\n", + " 'ZJ73220' 'FY62633' 'CU36986' 'WZ53904' 'AA71604' 'TD10493' 'LY97989'\n", + " 'VX39856' 'TP51897' 'QQ89253' 'EI91403' 'QG15435' 'FZ55002' 'HX77930'\n", + " 'UN37063' 'VB87946' 'AB60627' 'TA34903' 'AQ51368' 'NZ26102' 'GB45753'\n", + " 'BV79904' 'OB49075' 'DS97676' 'JO63462' 'NJ10602' 'RS24501' 'VT78274'\n", + " 'SU56153' 'MN20737' 'KL43114' 'YQ15567' 'TR88637' 'TC88986' 'XX88577'\n", + " 'NE49052' 'KX17826' 'CC91503' 'WH32183' 'ES90681' 'DW96592' 'MT23134'\n", + " 'BM69081' 'MB90871' 'QL77686' 'ON77827' 'KP18988' 'TI92884' 'JH73503'\n", + " 'YE97964' 'VA30351' 'PV55726' 'UC88305' 'TS53809' 'ZV32120' 'FB80807'\n", + " 'AS55677' 'WA15684' 'SA50567' 'KJ31611' 'VL37375' 'KN21017' 'PX44289'\n", + " 'AM97901' 'RE42925' 'TR81766' 'CH85057' 'UP71482' 'EG40670' 'HV83672'\n", + " 'MG10140' 'TC44716' 'QO65264' 'EB66698' 'OT52034' 'CH85444' 'PU85769'\n", + " 'UI73201' 'SL50592' 'XP11075' 'SI31236' 'JN26745' 'VK48036' 'JX76668'\n", + " 'DS45802' 'OA96690' 'EM27919' 'QO41043' 'OV50124' 'PR31642' 'BU41599'\n", + " 'TK30357' 'NF31087' 'NH16984' 'OS75493' 'VT63298' 'QS75550' 'SZ16483'\n", + " 'VM92311' 'NJ46849' 'WZ31900' 'RG30482' 'ZM86949' 'QQ39596' 'FH51383'\n", + " 'BJ53923' 'CZ96653' 'FB23788' 'NT43594' 'RJ85627' 'KJ86296' 'PI47776'\n", + " 'MD73554' 'UX92071' 'YG44474' 'UH45301' 'RY92647' 'IK12620' 'GQ66762'\n", + " 'YT69858' 'XD85577' 'TM65736' 'VJ51327' 'GS98873' 'CW49887' 'MY31220']\n", + "Unique values in 'ST': ['AZ' 'NV' 'CA' 'WA' 'OR']\n", + "Unique values in 'GENDER': ['F' 'M']\n", + "Unique values in 'Education': ['Bachelors' 'High School' 'College' 'Masters' 'Doctor']\n", + "Unique values in 'Policy Type': ['Personal Auto' 'Corporate Auto' 'Special Auto']\n", + "Unique values in 'Vehicle Class': ['Four-Door Car' 'Two-Door Car' 'SUV' 'Sports Car' 'Luxury Car'\n", + " 'Luxury SUV']\n" + ] + } + ], + "source": [ + "categorical_columns = ['Customer', 'ST', 'GENDER', 'Education', 'Policy Type', 'Vehicle Class']\n", + "for column in categorical_columns:\n", + " unique_values = df[column].unique()\n", + " print(f\"Unique values in '{column}': {unique_values}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "605b8a8b-b755-4c11-b01e-83b066ea341d", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Summary Statistics for Numerical Columns:\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "count 952.000000 952.000000 952.000000 \n", + "mean 794.201187 39557.350840 205.231092 \n", + "std 625.879148 30744.561851 1697.752504 \n", + "min 200.440000 0.000000 61.000000 \n", + "25% 411.785000 13087.750000 68.000000 \n", + "50% 588.575000 36632.000000 83.000000 \n", + "75% 903.750000 65085.500000 110.000000 \n", + "max 4479.550000 99960.000000 35354.000000 \n", + "\n", + " Number of Open Complaints Total Claim Amount \n", + "count 952.000000 952.000000 \n", + "mean 0.378151 404.999156 \n", + "std 0.872446 299.238726 \n", + "min 0.000000 0.382107 \n", + "25% 0.000000 196.724587 \n", + "50% 0.000000 350.400000 \n", + "75% 0.000000 534.000000 \n", + "max 5.000000 2893.239678 \n" + ] + } + ], + "source": [ + "summary_statistics = df[numerical_columns].describe()\n", + "print(\"Summary Statistics for Numerical Columns:\")\n", + "print(summary_statistics)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "dbc53b33-12a4-4fea-868d-9d567e56a533", "metadata": {}, "outputs": [], "source": [ - "# Your code here" + "#Conclusion:\n", + "#Numerical Columns and Unique Values: \n", + "#the range of Income, 99960.000000 is high \n", + "\n", + "#Categorical Columns and Unique Values: \n", + "#Policy Type: Categorical values are Auto, Home, and Life.\n", + "# 6 different Vehicle Classes \n" + ] + }, + { + "cell_type": "markdown", + "id": "71ea37ed-f264-4675-899d-5d08ced5f7e4", + "metadata": {}, + "source": [ + "- Compute summary statistics such as mean, median, mode, standard deviation, and quartiles to understand the central tendency and distribution of the data for numerical columns. You should also provide your conclusions based on these summary statistics.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "a67e48db-aefd-4311-b793-6124ac61154f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Customer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsTotal Claim Amount
count952.000000952.000000952.000000952.000000952.000000
mean794.20118739557.350840205.2310920.378151404.999156
std625.87914830744.5618511697.7525040.872446299.238726
min200.4400000.00000061.0000000.0000000.382107
25%411.78500013087.75000068.0000000.000000196.724587
50%588.57500036632.00000083.0000000.000000350.400000
75%903.75000065085.500000110.0000000.000000534.000000
max4479.55000099960.00000035354.0000005.0000002893.239678
\n", + "
" + ], + "text/plain": [ + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "count 952.000000 952.000000 952.000000 \n", + "mean 794.201187 39557.350840 205.231092 \n", + "std 625.879148 30744.561851 1697.752504 \n", + "min 200.440000 0.000000 61.000000 \n", + "25% 411.785000 13087.750000 68.000000 \n", + "50% 588.575000 36632.000000 83.000000 \n", + "75% 903.750000 65085.500000 110.000000 \n", + "max 4479.550000 99960.000000 35354.000000 \n", + "\n", + " Number of Open Complaints Total Claim Amount \n", + "count 952.000000 952.000000 \n", + "mean 0.378151 404.999156 \n", + "std 0.872446 299.238726 \n", + "min 0.000000 0.382107 \n", + "25% 0.000000 196.724587 \n", + "50% 0.000000 350.400000 \n", + "75% 0.000000 534.000000 \n", + "max 5.000000 2893.239678 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "ef6b10ff-3dcc-416d-bd86-7eb920bffc08", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer:\n", + "count 952\n", + "unique 952\n", + "top QZ44356\n", + "freq 1\n", + "Name: Customer, dtype: object\n", + "\n", + "ST:\n", + "count 952\n", + "unique 5\n", + "top CA\n", + "freq 293\n", + "Name: ST, dtype: object\n", + "\n", + "GENDER:\n", + "count 952\n", + "unique 2\n", + "top F\n", + "freq 501\n", + "Name: GENDER, dtype: object\n", + "\n", + "Education:\n", + "count 952\n", + "unique 5\n", + "top Bachelors\n", + "freq 291\n", + "Name: Education, dtype: object\n", + "\n", + "Policy Type:\n", + "count 952\n", + "unique 3\n", + "top Personal Auto\n", + "freq 689\n", + "Name: Policy Type, dtype: object\n", + "\n", + "Vehicle Class:\n", + "count 952\n", + "unique 6\n", + "top Four-Door Car\n", + "freq 506\n", + "Name: Vehicle Class, dtype: object\n", + "\n" + ] + } + ], + "source": [ + "\n", + "for i in df.select_dtypes(include=['object']).columns:\n", + " print(f\"{i}:\\n{df[i].describe()}\\n\")\n", + "\n", + "#there is a lot of nan values" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "ff0d32b9-f897-4f97-b472-d5da35281bcd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer:\n", + "count 952\n", + "unique 952\n", + "top QZ44356\n", + "freq 1\n", + "Name: Customer, dtype: object\n", + "\n", + "ST:\n", + "count 952\n", + "unique 5\n", + "top CA\n", + "freq 293\n", + "Name: ST, dtype: object\n", + "\n", + "GENDER:\n", + "count 952\n", + "unique 2\n", + "top F\n", + "freq 501\n", + "Name: GENDER, dtype: object\n", + "\n", + "Education:\n", + "count 952\n", + "unique 5\n", + "top Bachelors\n", + "freq 291\n", + "Name: Education, dtype: object\n", + "\n", + "Policy Type:\n", + "count 952\n", + "unique 3\n", + "top Personal Auto\n", + "freq 689\n", + "Name: Policy Type, dtype: object\n", + "\n", + "Vehicle Class:\n", + "count 952\n", + "unique 6\n", + "top Four-Door Car\n", + "freq 506\n", + "Name: Vehicle Class, dtype: object\n", + "\n" + ] + } + ], + "source": [ + "\n", + "for i in df.select_dtypes(include=['object']).columns:\n", + " print(f\"{i}:\\n{df[i].describe()}\\n\")\n", + "\n", + "#there is a lot of nan values" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "ba638bbc-61da-49fc-81c0-710630e3d194", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
1QZ44356AZFBachelors697.950.094.00Personal AutoFour-Door Car1131.464935
2AI49188NVFBachelors1288.7448767.0108.00Personal AutoTwo-Door Car566.472247
3WW63253CAMBachelors764.590.0106.00Corporate AutoSUV529.881344
4GA49547WAMHigh School536.3136357.068.00Personal AutoFour-Door Car17.269323
5OC83172ORFBachelors825.6362902.069.00Personal AutoTwo-Door Car159.383042
....................................
1066TM65736ORMMasters305.9638644.078.01Personal AutoFour-Door Car361.455219
1067VJ51327CAFHigh School2031.5063209.0102.02Personal AutoSUV207.320041
1068GS98873AZFBachelors323.9116061.088.00Personal AutoFour-Door Car633.600000
1069CW49887CAFMasters462.6879487.0114.00Special AutoSUV547.200000
1070MY31220CAFCollege899.7054230.0112.00Personal AutoTwo-Door Car537.600000
\n", + "

952 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value Income \\\n", + "1 QZ44356 AZ F Bachelors 697.95 0.0 \n", + "2 AI49188 NV F Bachelors 1288.74 48767.0 \n", + "3 WW63253 CA M Bachelors 764.59 0.0 \n", + "4 GA49547 WA M High School 536.31 36357.0 \n", + "5 OC83172 OR F Bachelors 825.63 62902.0 \n", + "... ... .. ... ... ... ... \n", + "1066 TM65736 OR M Masters 305.96 38644.0 \n", + "1067 VJ51327 CA F High School 2031.50 63209.0 \n", + "1068 GS98873 AZ F Bachelors 323.91 16061.0 \n", + "1069 CW49887 CA F Masters 462.68 79487.0 \n", + "1070 MY31220 CA F College 899.70 54230.0 \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "1 94.0 0 Personal Auto \n", + "2 108.0 0 Personal Auto \n", + "3 106.0 0 Corporate Auto \n", + "4 68.0 0 Personal Auto \n", + "5 69.0 0 Personal Auto \n", + "... ... ... ... \n", + "1066 78.0 1 Personal Auto \n", + "1067 102.0 2 Personal Auto \n", + "1068 88.0 0 Personal Auto \n", + "1069 114.0 0 Special Auto \n", + "1070 112.0 0 Personal Auto \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "3 SUV 529.881344 \n", + "4 Four-Door Car 17.269323 \n", + "5 Two-Door Car 159.383042 \n", + "... ... ... \n", + "1066 Four-Door Car 361.455219 \n", + "1067 SUV 207.320041 \n", + "1068 Four-Door Car 633.600000 \n", + "1069 SUV 547.200000 \n", + "1070 Two-Door Car 537.600000 \n", + "\n", + "[952 rows x 11 columns]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_cleaned = df.dropna()\n", + "df_cleaned" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "f6ed27cb-bf40-4ce0-b29b-718e248abb79", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Customer 0\n", + "ST 0\n", + "GENDER 0\n", + "Education 0\n", + "Customer Lifetime Value 0\n", + "Income 0\n", + "Monthly Premium Auto 0\n", + "Number of Open Complaints 0\n", + "Policy Type 0\n", + "Vehicle Class 0\n", + "Total Claim Amount 0\n", + "dtype: int64" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()\n" + ] + }, + { + "cell_type": "markdown", + "id": "957c7f80-9179-45bf-b444-0a5c1d20e39c", + "metadata": {}, + "source": [ + "- Compute summary statistics for categorical columns and providing your conclusions based on these statistics." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "848c15aa-314e-4fee-960e-ee58d5d97f2c", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Customer ST GENDER Education Policy Type Vehicle Class\n", + "count 952 952 952 952 952 952\n", + "unique 952 5 2 5 3 6\n", + "top QZ44356 CA F Bachelors Personal Auto Four-Door Car\n", + "freq 1 293 501 291 689 506\n" + ] + } + ], + "source": [ + "categorical_summary = df.describe(include=['object'])\n", + "print(categorical_summary)" ] }, { @@ -116,12 +3137,296 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "id": "2dca5073-4520-4f42-9390-4b92733284ed", "metadata": {}, "outputs": [], "source": [ - "# Your code here" + "df['ST'] = df['ST'].replace({\n", + " 'Washington': 'WA', # Replace full name with abbreviation\n", + " 'Cali': 'CA', \n", + " 'California':'CA', \n", + " 'Arizona':'AZ',\n", + " 'Oregon':'OR',\n", + " 'Nevada':'NV'\n", + " \n", + "})\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "89bb2442-4a84-4915-be49-943f931ccb27", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['AZ', 'NV', 'CA', 'WA', 'OR'], dtype=object)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['ST'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "79d0aadd-9279-472a-a0b9-17c38863992d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
ST
AZ188188188188188188188188188188
CA293293293293293293293293293293
NV89898989898989898989
OR276276276276276276276276276276
WA106106106106106106106106106106
\n", + "
" + ], + "text/plain": [ + " Customer GENDER Education Customer Lifetime Value Income \\\n", + "ST \n", + "AZ 188 188 188 188 188 \n", + "CA 293 293 293 293 293 \n", + "NV 89 89 89 89 89 \n", + "OR 276 276 276 276 276 \n", + "WA 106 106 106 106 106 \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "ST \n", + "AZ 188 188 188 \n", + "CA 293 293 293 \n", + "NV 89 89 89 \n", + "OR 276 276 276 \n", + "WA 106 106 106 \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "ST \n", + "AZ 188 188 \n", + "CA 293 293 \n", + "NV 89 89 \n", + "OR 276 276 \n", + "WA 106 106 " + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('ST').count()\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "6c62d985-e67e-4190-9f45-26d6aa7ac887", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ST\n", + "AZ 188\n", + "CA 293\n", + "NV 89\n", + "OR 276\n", + "WA 106\n", + "dtype: int64" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "location_counts = df.groupby('ST').size()\n", + "location_counts " + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "c955588d-0c5e-4ad2-b397-970413297d1f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ST\n", + "NV 89\n", + "WA 106\n", + "AZ 188\n", + "OR 276\n", + "CA 293\n", + "dtype: int64" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sorted_locations = location_counts.sort_values(ascending=True)\n", + "sorted_locations" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "a1697b84-d1a4-4ac2-9ca3-2cf6d1f696fc", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "top_5_less_common_locations = sorted_locations.head(5)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "e6a1575f-664e-4d09-9419-29b7a162eb91", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ST\n", + "NV 89\n", + "WA 106\n", + "AZ 188\n", + "OR 276\n", + "CA 293\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "print(top_5_less_common_locations)" ] }, { @@ -146,12 +3451,65 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, + "id": "a8fe7df7-2397-4e86-a238-401e63336886", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value',\n", + " 'Income', 'Monthly Premium Auto', 'Number of Open Complaints',\n", + " 'Policy Type', 'Vehicle Class', 'Total Claim Amount'],\n", + " dtype='object')" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns = df.columns.str.strip() # This will remove any leading or trailing spaces\n", + "df.columns " + ] + }, + { + "cell_type": "code", + "execution_count": 44, "id": "bcfad6c1-9af2-4b0b-9aa9-0dc5c17473c0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total number of policies sold for each type of policy:\n", + "Policy Type\n", + "Personal Auto 689\n", + "Corporate Auto 211\n", + "Special Auto 52\n", + "Name: count, dtype: int64\n", + "\n", + "The policy type with the highest number of policies sold is 'Personal Auto' with 689 policies sold.\n" + ] + } + ], "source": [ - "# Your code here" + "\n", + "policy_counts = df['Policy Type'].value_counts()\n", + "\n", + "# Step 2: Create a Series object from the counts (already a Series from value_counts)\n", + "# (This step is just illustrative as policy_counts is already a Series)\n", + "\n", + "# Step 3: Retrieve the policy type with the highest number of policies sold\n", + "highest_policy_type = policy_counts.idxmax() # Get the policy type name with max policies\n", + "highest_count = policy_counts.max() # Get the count of that policy type\n", + "\n", + "# Print the results\n", + "print(\"Total number of policies sold for each type of policy:\")\n", + "print(policy_counts)\n", + "print(f\"\\nThe policy type with the highest number of policies sold is '{highest_policy_type}' with {highest_count} policies sold.\")" ] }, { @@ -176,14 +3534,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, "id": "0c0563cf-6f8b-463d-a321-651a972f82e5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average income for Personal Auto policies: 38180.69871794872\n", + "Average income for Corporate Auto policies: 41390.31196581197\n" + ] + } + ], "source": [ - "# Your code here" + "import pandas as pd\n", + "\n", + "df = pd.read_csv('https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv')\n", + "\n", + "personal_auto_df = df.loc[df['Policy Type'] == 'Personal Auto']\n", + "corporate_auto_df = df.loc[df['Policy Type'] == 'Corporate Auto']\n", + "\n", + "average_income_personal = personal_auto_df['Income'].mean()\n", + "average_income_corporate = corporate_auto_df['Income'].mean()\n", + "\n", + "print(f\"Average income for Personal Auto policies: {average_income_personal}\")\n", + "print(f\"Average income for Corporate Auto policies: {average_income_corporate}\")\n", + "\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "37e400d6-9d7d-458a-8d4d-07b8a30e709b", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "id": "80b16c27-f4a5-4727-a229-1f88671cf4e2", @@ -226,13 +3613,395 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "b731bca6-a760-4860-a27b-a33efa712ce0", + "execution_count": 46, + "id": "43cde443-7e8c-4a2a-bd10-5c5e2a9aad83", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "532.8" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "percentile_75 = df['Total Claim Amount'].quantile(0.75)\n", + "percentile_75" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "18c31ba6-d3cd-463d-8c24-3c785e9607ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
17OE15005CaliNaNCollege394524.16%28855.0101.01/0/00Personal AutoSUV647.442031
23TZ98966NevadaNaNBachelor245019.10%0.073.01/3/00Corporate AutoFour-Door Car554.376763
26US89481CaliforniaNaNBachelor394637.21%0.0111.01/0/00Personal AutoFour-Door Car799.200000
....................................
1059YG44474OregonMCollege1401472.13%54193.0117.01/0/00Corporate AutoSUV720.752945
1061RY92647CaliFBachelor1050677.17%0.092.01/0/00Personal AutoFour-Door Car546.524896
1068GS98873ArizonaFBachelor323912.47%16061.088.01/0/00Personal AutoFour-Door Car633.600000
1069CW49887CaliforniaFMaster462680.11%79487.0114.01/0/00Special AutoSUV547.200000
1070MY31220CaliforniaFCollege899704.02%54230.0112.01/0/00Personal AutoTwo-Door Car537.600000
\n", + "

264 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value Income \\\n", + "1 QZ44356 Arizona F Bachelor 697953.59% 0.0 \n", + "2 AI49188 Nevada F Bachelor 1288743.17% 48767.0 \n", + "17 OE15005 Cali NaN College 394524.16% 28855.0 \n", + "23 TZ98966 Nevada NaN Bachelor 245019.10% 0.0 \n", + "26 US89481 California NaN Bachelor 394637.21% 0.0 \n", + "... ... ... ... ... ... ... \n", + "1059 YG44474 Oregon M College 1401472.13% 54193.0 \n", + "1061 RY92647 Cali F Bachelor 1050677.17% 0.0 \n", + "1068 GS98873 Arizona F Bachelor 323912.47% 16061.0 \n", + "1069 CW49887 California F Master 462680.11% 79487.0 \n", + "1070 MY31220 California F College 899704.02% 54230.0 \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "1 94.0 1/0/00 Personal Auto \n", + "2 108.0 1/0/00 Personal Auto \n", + "17 101.0 1/0/00 Personal Auto \n", + "23 73.0 1/3/00 Corporate Auto \n", + "26 111.0 1/0/00 Personal Auto \n", + "... ... ... ... \n", + "1059 117.0 1/0/00 Corporate Auto \n", + "1061 92.0 1/0/00 Personal Auto \n", + "1068 88.0 1/0/00 Personal Auto \n", + "1069 114.0 1/0/00 Special Auto \n", + "1070 112.0 1/0/00 Personal Auto \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "17 SUV 647.442031 \n", + "23 Four-Door Car 554.376763 \n", + "26 Four-Door Car 799.200000 \n", + "... ... ... \n", + "1059 SUV 720.752945 \n", + "1061 Four-Door Car 546.524896 \n", + "1068 Four-Door Car 633.600000 \n", + "1069 SUV 547.200000 \n", + "1070 Two-Door Car 537.600000 \n", + "\n", + "[264 rows x 11 columns]" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_claim_customers = df[df['Total Claim Amount'] > percentile_75]\n", + "high_claim_customers\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "fe341300-5f3c-4687-bfc6-245003d33178", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IncomeMonthly Premium AutoTotal Claim Amount
count264.000000264.000000264.000000
mean23677.344697165.193182782.228263
std27013.483721623.930992292.751640
min0.00000063.000000537.600000
25%0.00000099.000000606.521741
50%18807.000000114.000000679.597985
75%42423.750000133.250000851.400000
max99316.00000010202.0000002893.239678
\n", + "
" + ], + "text/plain": [ + " Income Monthly Premium Auto Total Claim Amount\n", + "count 264.000000 264.000000 264.000000\n", + "mean 23677.344697 165.193182 782.228263\n", + "std 27013.483721 623.930992 292.751640\n", + "min 0.000000 63.000000 537.600000\n", + "25% 0.000000 99.000000 606.521741\n", + "50% 18807.000000 114.000000 679.597985\n", + "75% 42423.750000 133.250000 851.400000\n", + "max 99316.000000 10202.000000 2893.239678" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here" + "high_claim_summary = high_claim_customers.describe()\n", + "high_claim_summary\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09d36799-d398-470b-b3ee-7dd99bc9fb3d", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -251,7 +4020,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.12.2" } }, "nbformat": 4,