+def merge_df(df1,df2):
+    import pandas as pd
+    df=df1.merge(df2,how="outer")
+    return df
+def improve_title_columns(df):
+    import pandas as pd
+    df=df.rename(columns={df.columns[n]:df.columns[n].strip().replace(" ","_").lower() for n in range(len(df.columns))})
+    #rename column st to state
+    df=df.rename(columns={"st":"state"})
+    df.columns
+    return df
+def data_standarization(df):
+    import pandas as pd
+    state_values={ #creates a dictionary with the values and corrections
+        "Oregon":"Oregon",
+        "California":"California",
+        "Cali":"California",
+        "Arizona":"Arizona",
+        "AZ":"Arizona",
+        "Washington":"Washington",
+        "WA":"Washington",
+        "Nevada":"Nevada",
+        }
+    df["State"]=df["State"].replace(state_values) #replace with correct values
+    """
+    gender_values={ #creates a dictionary with the values and corrections
+    "F":"F",
+    "Femal":"F",
+    "female":"F",
+    "Male":"M"}
+    state_values={ #creates a dictionary with the values and corrections
+        "Oregon":"Oregon",
+        "California":"California",
+        "Cali":"California",
+        "Arizona":"Arizona",
+        "AZ":"Arizona",
+        "Washington":"Washington",
+        "WA":"Washington",
+        "Nevada":"Nevada",
+        }
+    education_values={ #creates a dictionary with the values and corrections
+        "Bachelors":"Bachelor"
+            }
+    vehicle_class_values={ #creates a dictionary with the values and corrections
+        "Sports Car":"Luxury",
+        "Luxury SUV":"Luxury",
+        "Luxury Car":"Luxury",
+        }
+    df["gender"]=df["gender"].replace(gender_values)
+    df["state"]=df["state"].replace(state_values) #replace with correct values 
+    df["state"]=df["state"].replace(state_values) #replace with correct values 
+    df["vehicle_class"]=df["vehicle_class"].replace(vehicle_class_values) #replace with correct values 
+    #Replace % caracter with none in customer_lifetime_value 
+    df["customer_lifetime_value"]=df["customer_lifetime_value"].str.replace("%","")
+    """
+    #Cleaning NaN and null values}
+    #First cleaning
+    datos_iniciales=df.shape[0] #valor del total de filas antes de limpieza
+    df=df.dropna(how="all")
+    df.fillna(0, inplace=True)
+    datos_finales=df.shape[0]
+    '''
+    #complains open format manage
+    list_complains_types=df["number_of_open_complaints"].unique()
+    list_complains_types=list(list_complains_types)
+    list_complains=[list_complains_types[n][2].split("/") for n in range(len(list_complains_types))]
+    dict_complains=dict(zip(list_complains_types,list_complains))
+    df["number_of_open_complaints"]=df["number_of_open_complaints"].replace(dict_complains)
+    #changing data type
+    df["vehicle_class"]=df["vehicle_class"].astype("object")
+    df["customer_lifetime_value"]=df["customer_lifetime_value"].astype("float64")
+    df["number_of_open_complaints"]=df["number_of_open_complaints"].astype(int)
+    '''
+    print(f"Data before cleaning: {datos_iniciales}\n Data after cleaning: {datos_finales}")
+    return df
\ No newline at end of file
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 10910 entries, 0 to 10909\n",
+      "Data columns (total 26 columns):\n",
+      " #   Column                         Non-Null Count  Dtype  \n",
+      "---  ------                         --------------  -----  \n",
+      " 0   Unnamed: 0                     10910 non-null  int64  \n",
+      " 1   Customer                       10910 non-null  object \n",
+      " 2   State                          10279 non-null  object \n",
+      " 3   Customer Lifetime Value        10910 non-null  float64\n",
+      " 4   Response                       10279 non-null  object \n",
+      " 5   Coverage                       10910 non-null  object \n",
+      " 6   Education                      10910 non-null  object \n",
+      " 7   Effective To Date              10910 non-null  object \n",
+      " 8   EmploymentStatus               10910 non-null  object \n",
+      " 9   Gender                         10910 non-null  object \n",
+      " 10  Income                         10910 non-null  int64  \n",
+      " 11  Location Code                  10910 non-null  object \n",
+      " 12  Marital Status                 10910 non-null  object \n",
+      " 13  Monthly Premium Auto           10910 non-null  int64  \n",
+      " 14  Months Since Last Claim        10277 non-null  float64\n",
+      " 15  Months Since Policy Inception  10910 non-null  int64  \n",
+      " 16  Number of Open Complaints      10277 non-null  float64\n",
+      " 17  Number of Policies             10910 non-null  int64  \n",
+      " 18  Policy Type                    10910 non-null  object \n",
+      " 19  Policy                         10910 non-null  object \n",
+      " 20  Renew Offer Type               10910 non-null  object \n",
+      " 21  Sales Channel                  10910 non-null  object \n",
+      " 22  Total Claim Amount             10910 non-null  float64\n",
+      " 23  Vehicle Class                  10288 non-null  object \n",
+      " 24  Vehicle Size                   10288 non-null  object \n",
+      " 25  Vehicle Type                   5428 non-null   object \n",
+      "dtypes: float64(4), int64(5), object(17)\n",
+      "memory usage: 2.2+ MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df.info()"
+   ]
+  },
-      "cell_type": "markdown",
-      "id": "b6aec097-c633-4017-a125-e77a97259cda",
-      "metadata": {
-        "id": "b6aec097-c633-4017-a125-e77a97259cda"
-      },
-      "source": [
-        "6.  Display a new DataFrame that contains the number of policies sold by month, by state, for the top 3 states with the highest number of policies sold.\n",
-        "\n",
-        "*Hint:*\n",
-        "- *To accomplish this, you will first need to group the data by state and month, then count the number of policies sold for each group. Afterwards, you will need to sort the data by the count of policies sold in descending order.*\n",
-        "- *Next, you will select the top 3 states with the highest number of policies sold.*\n",
-        "- *Finally, you will create a new DataFrame that contains the number of policies sold by month for each of the top 3 states.*"
-      ]
-    },
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Data before cleaning: 10910\n",
+      " Data after cleaning: 10910\n",
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 10910 entries, 0 to 10909\n",
+      "Data columns (total 26 columns):\n",
+      " #   Column                         Non-Null Count  Dtype  \n",
+      "---  ------                         --------------  -----  \n",
+      " 0   Unnamed: 0                     10910 non-null  int64  \n",
+      " 1   Customer                       10910 non-null  object \n",
+      " 2   State                          10910 non-null  object \n",
+      " 3   Customer Lifetime Value        10910 non-null  float64\n",
+      " 4   Response                       10910 non-null  object \n",
+      " 5   Coverage                       10910 non-null  object \n",
+      " 6   Education                      10910 non-null  object \n",
+      " 7   Effective To Date              10910 non-null  object \n",
+      " 8   EmploymentStatus               10910 non-null  object \n",
+      " 9   Gender                         10910 non-null  object \n",
+      " 10  Income                         10910 non-null  int64  \n",
+      " 11  Location Code                  10910 non-null  object \n",
+      " 12  Marital Status                 10910 non-null  object \n",
+      " 13  Monthly Premium Auto           10910 non-null  int64  \n",
+      " 14  Months Since Last Claim        10910 non-null  float64\n",
+      " 15  Months Since Policy Inception  10910 non-null  int64  \n",
+      " 16  Number of Open Complaints      10910 non-null  float64\n",
+      " 17  Number of Policies             10910 non-null  int64  \n",
+      " 18  Policy Type                    10910 non-null  object \n",
+      " 19  Policy                         10910 non-null  object \n",
+      " 20  Renew Offer Type               10910 non-null  object \n",
+      " 21  Sales Channel                  10910 non-null  object \n",
+      " 22  Total Claim Amount             10910 non-null  float64\n",
+      " 23  Vehicle Class                  10910 non-null  object \n",
+      " 24  Vehicle Size                   10910 non-null  object \n",
+      " 25  Vehicle Type                   10910 non-null  object \n",
+      "dtypes: float64(4), int64(5), object(17)\n",
+      "memory usage: 2.2+ MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df=f.data_standarization(df)\n",
+    "df.info()"
+   ]
+  },
