diff --git a/simba/SimBA.py b/simba/SimBA.py
index 624fd381f..072ed3001 100644
--- a/simba/SimBA.py
+++ b/simba/SimBA.py
@@ -853,7 +853,7 @@ def __init__(self):
         video_process_menu.add_command(label="Convert ROI definitions", compound="left", image=self.menu_icons["roi"]["img"], command=lambda: ConvertROIDefinitionsPopUp(), font=Formats.FONT_REGULAR.value)
         convert_data_menu = Menu(video_process_menu)
         convert_data_menu.add_command(label="Convert CSV to parquet", compound="left", image=self.menu_icons["parquet"]["img"], command=Csv2ParquetPopUp, font=Formats.FONT_REGULAR.value)
-        convert_data_menu.add_command(label="Convert parquet o CSV", compound="left", image=self.menu_icons["csv"]["img"], command=Parquet2CsvPopUp, font=Formats.FONT_REGULAR.value)
+        convert_data_menu.add_command(label="Convert parquet o CSV", compound="left", image=self.menu_icons["csv_grey"]["img"], command=Parquet2CsvPopUp, font=Formats.FONT_REGULAR.value)
 
         video_process_menu.add_cascade(label="Convert working file type...", compound="left", image=self.menu_icons["change"]["img"], menu=convert_data_menu, font=Formats.FONT_REGULAR.value)
 
diff --git a/simba/data_processors/timebins_movement_calculator.py b/simba/data_processors/timebins_movement_calculator.py
index fdf881970..6feaa83e1 100644
--- a/simba/data_processors/timebins_movement_calculator.py
+++ b/simba/data_processors/timebins_movement_calculator.py
@@ -209,7 +209,7 @@ def save(self):
             self.__create_plots()
 
 
-# test = TimeBinsMovementCalculator(config_path='/Users/simon/Desktop/envs/simba/troubleshooting/two_black_animals_14bp/project_folder/project_config.ini',
+# test = TimeBinsMovementCalculator(config_path=r"C:\troubleshooting\two_black_animals_14bp\project_folder\project_config.ini",
 #                                   bin_length=0.1,
 #                                   plots=True,
 #                                   body_parts=['Nose_1'])
diff --git a/simba/mixins/plotting_mixin.py b/simba/mixins/plotting_mixin.py
index 05dcd0661..1ad941a64 100644
--- a/simba/mixins/plotting_mixin.py
+++ b/simba/mixins/plotting_mixin.py
@@ -1052,58 +1052,70 @@ def joint_plot(
             return plot
 
     @staticmethod
-    def line_plot(
-        df: pd.DataFrame,
-        x: str,
-        y: str,
-        x_label: Optional[str] = None,
-        y_label: Optional[str] = None,
-        title: Optional[str] = None,
-        save_path: Optional[Union[str, os.PathLike]] = None,
-    ):
-
-        check_instance(
-            source=f"{PlottingMixin.line_plot.__name__} df",
-            instance=df,
-            accepted_types=(pd.DataFrame),
-        )
-        check_str(
-            name=f"{PlottingMixin.line_plot.__name__} x",
-            value=x,
-            options=tuple(df.columns),
-        )
-        check_str(
-            name=f"{PlottingMixin.line_plot.__name__} y",
-            value=y,
-            options=tuple(df.columns),
-        )
-
-        check_valid_lst(
-            data=list(df[y]),
-            source=f"{PlottingMixin.line_plot.__name__} y",
-            valid_dtypes=(np.float32, np.float64, np.int32, np.int64, int, float),
-        )
-        sns.set_style("whitegrid", {"grid.linestyle": "--"})
-        plot = sns.lineplot(data=df, x=x, y=y)
+    def line_plot(df: pd.DataFrame,
+                  x: str,
+                  y: Union[str, List[str]],
+                  error: Optional[Union[str, List[str]]] = None,
+                  x_label: Optional[str] = None,
+                  y_label: Optional[str] = None,
+                  title: Optional[str] = None,
+                  fig_size: Optional[Tuple[int]] = (10, 6),
+                  error_opacity: Optional[float] = 0.2,
+                  palette: Optional[str] = 'Set1',
+                  save_path: Optional[Union[str, os.PathLike]] = None, ):
+
+        check_instance(source=f"{PlottingMixin.line_plot.__name__} df", instance=df, accepted_types=(pd.DataFrame))
+        check_str(name=f"{PlottingMixin.line_plot.__name__} x", value=x, options=tuple(df.columns))
+        check_instance(source=f"{PlottingMixin.line_plot.__name__} y", instance=y, accepted_types=(str, list))
+        sns.set_style(style="whitegrid", rc={"grid.linestyle": "--"})
+
+        if isinstance(y, str):
+            check_str(name=f"{PlottingMixin.line_plot.__name__} y", value=y, options=tuple(df.columns))
+            check_valid_lst(data=list(df[y]), source=f"{PlottingMixin.line_plot.__name__} y",
+                            valid_dtypes=Formats.NUMERIC_DTYPES.value)
+            y = [y]
+            if error is not None:
+                check_instance(source=f"{PlottingMixin.line_plot.__name__} error", instance=error,
+                               accepted_types=(str,))
+                check_str(name=f"{PlottingMixin.line_plot.__name__} error", value=error, options=tuple(df.columns))
+                check_valid_lst(data=list(df[error]), source=f"{PlottingMixin.line_plot.__name__} error",
+                                valid_dtypes=Formats.NUMERIC_DTYPES.value)
+                error = [error]
+        else:
+            for i in y:
+                check_str(name=f"{PlottingMixin.line_plot.__name__} y", value=i, options=tuple(df.columns))
+                check_valid_lst(data=list(df[i]), source=f"{PlottingMixin.line_plot.__name__} error",
+                                valid_dtypes=Formats.NUMERIC_DTYPES.value)
+            if error is not None:
+                check_instance(source=f"{PlottingMixin.line_plot.__name__} error", instance=error,
+                               accepted_types=(list,))
+                for i in error:
+                    check_str(name=f"{PlottingMixin.line_plot.__name__} error", value=i, options=tuple(df.columns))
+                    check_valid_lst(data=list(df[i]), source=f"{PlottingMixin.line_plot.__name__} error",
+                                    valid_dtypes=Formats.NUMERIC_DTYPES.value)
+
+        fig, ax = plt.subplots(figsize=fig_size)
+        for i in range(len(y)):
+            sns.lineplot(data=df, x=x, y=y[i], label=y[i], palette=palette)
+            if error is not None:
+                ax.fill_between(df[x], df[y[i]] - df[error[i]], df[y[i]] + df[error[i]], alpha=error_opacity)
 
         if x_label is not None:
             check_str(name=f"{PlottingMixin.line_plot.__name__} x_label", value=x_label)
-            plt.xlabel(x_label)
+            ax.set_xlabel(x_label)
         if y_label is not None:
             check_str(name=f"{PlottingMixin.line_plot.__name__} y_label", value=y_label)
-            plt.ylabel(y_label)
+            ax.set_ylabel(y_label)
         if title is not None:
             check_str(name=f"{PlottingMixin.line_plot.__name__} title", value=title)
-            plt.title(title, ha="center", fontsize=15)
+            ax.set_title(title, ha="center", fontsize=15)
         if save_path is not None:
-            check_str(
-                name=f"{PlottingMixin.line_plot.__name__} save_path", value=save_path
-            )
+            check_str(name=f"{PlottingMixin.line_plot.__name__} save_path", value=save_path)
             check_if_dir_exists(in_dir=os.path.dirname(save_path))
-            plot.figure.savefig(save_path)
+            plt.savefig(save_path)
             plt.close("all")
         else:
-            return plot
+            return fig
 
     @staticmethod
     def make_line_plot(
diff --git a/simba/mixins/train_model_mixin.py b/simba/mixins/train_model_mixin.py
index a9e547973..c31aed18a 100644
--- a/simba/mixins/train_model_mixin.py
+++ b/simba/mixins/train_model_mixin.py
@@ -17,6 +17,7 @@
 from datetime import datetime
 from itertools import repeat
 from subprocess import call
+from json import loads
 
 import numpy as np
 import pandas as pd
@@ -27,14 +28,13 @@
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.feature_selection import VarianceThreshold
 from sklearn.inspection import partial_dependence, permutation_importance
-from sklearn.metrics import classification_report, precision_recall_curve
+from sklearn.metrics import precision_recall_curve, classification_report
 from sklearn.model_selection import ShuffleSplit, learning_curve
-from sklearn.preprocessing import (MinMaxScaler, QuantileTransformer,
-                                   StandardScaler)
+from sklearn.preprocessing import (MinMaxScaler, QuantileTransformer, StandardScaler)
 from sklearn.tree import export_graphviz
 from sklearn.utils import parallel_backend
 from tabulate import tabulate
-from yellowbrick.classifier import ClassificationReport
+import seaborn as sns
 
 try:
     from dtreeviz.trees import dtreeviz, tree
@@ -77,6 +77,7 @@
                                   MultiProcessingFailedWarning,
                                   NoModuleWarning, NotEnoughDataWarning,
                                   SamplingWarning, ShapWarning)
+from simba.mixins.plotting_mixin import PlottingMixin
 
 plt.switch_backend("agg")
 
@@ -88,11 +89,11 @@ def __init__(self):
         pass
 
     def read_all_files_in_folder(
-        self,
-        file_paths: List[str],
-        file_type: str,
-        classifier_names: Optional[List[str]] = None,
-        raise_bool_clf_error: bool = True,
+            self,
+            file_paths: List[str],
+            file_type: str,
+            classifier_names: Optional[List[str]] = None,
+            raise_bool_clf_error: bool = True,
     ) -> (pd.DataFrame, List[int]):
         """
         Read in all data files in a folder to a single pd.DataFrame for downstream ML algo.
@@ -134,8 +135,8 @@ def read_all_files_in_folder(
                             source=self.__class__.__name__,
                         )
                     elif (
-                        len(set(df[clf_name].unique()) - {0, 1}) > 0
-                        and raise_bool_clf_error
+                            len(set(df[clf_name].unique()) - {0, 1}) > 0
+                            and raise_bool_clf_error
                     ):
                         raise InvalidInputError(
                             msg=f"The annotation column for a classifier should contain only 0 or 1 values. However, in file {file} the {clf_name} field contains additional value(s): {list(set(df[clf_name].unique()) - {0, 1})}.",
@@ -155,8 +156,8 @@ def read_all_files_in_folder(
                 source=self.__class__.__name__,
             )
         df_concat = df_concat.loc[
-            :, ~df_concat.columns.str.contains("^Unnamed")
-        ].fillna(0)
+                    :, ~df_concat.columns.str.contains("^Unnamed")
+                    ].fillna(0)
         timer.stop_timer()
         memory_size = get_memory_usage_of_df(df=df_concat)
         print(
@@ -171,7 +172,7 @@ def read_all_files_in_folder(
         return df_concat.astype(np.float32), frm_number_lst
 
     def read_in_all_model_names_to_remove(
-        self, config: configparser.ConfigParser, model_cnt: int, clf_name: str
+            self, config: configparser.ConfigParser, model_cnt: int, clf_name: str
     ) -> List[str]:
         """
         Helper to find all field names that are annotations but are not the target.
@@ -195,7 +196,7 @@ def read_in_all_model_names_to_remove(
         return annotation_cols_to_remove
 
     def delete_other_annotation_columns(
-        self, df: pd.DataFrame, annotations_lst: List[str], raise_error: bool = True
+            self, df: pd.DataFrame, annotations_lst: List[str], raise_error: bool = True
     ) -> pd.DataFrame:
         """
         Helper to drop fields that contain annotations which are not the target.
@@ -221,7 +222,7 @@ def delete_other_annotation_columns(
         return df
 
     def split_df_to_x_y(
-        self, df: pd.DataFrame, clf_name: str
+            self, df: pd.DataFrame, clf_name: str
     ) -> (pd.DataFrame, pd.DataFrame):
         """
         Helper to split dataframe into features and target.
@@ -241,7 +242,7 @@ def split_df_to_x_y(
         return df, y
 
     def random_undersampler(
-        self, x_train: np.ndarray, y_train: np.ndarray, sample_ratio: float
+            self, x_train: np.ndarray, y_train: np.ndarray, sample_ratio: float
     ) -> (pd.DataFrame, pd.DataFrame):
         """
         Helper to perform random under-sampling of behavior-absent frames in a dataframe.
@@ -277,7 +278,7 @@ def random_undersampler(
         return self.split_df_to_x_y(data_df, y_train.name)
 
     def smoteen_oversampler(
-        self, x_train: pd.DataFrame, y_train: pd.DataFrame, sample_ratio: float
+            self, x_train: pd.DataFrame, y_train: pd.DataFrame, sample_ratio: float
     ) -> (np.ndarray, np.ndarray):
         """
         Helper to perform SMOTEEN oversampling of behavior-present annotations.
@@ -300,10 +301,10 @@ def smoteen_oversampler(
             return smt.fit_resample(x_train, y_train)
 
     def smote_oversampler(
-        self,
-        x_train: pd.DataFrame or np.array,
-        y_train: pd.DataFrame or np.array,
-        sample_ratio: float,
+            self,
+            x_train: pd.DataFrame or np.array,
+            y_train: pd.DataFrame or np.array,
+            sample_ratio: float,
     ) -> (np.ndarray, np.ndarray):
         """
         Helper to perform SMOTE oversampling of behavior-present annotations.
@@ -324,7 +325,6 @@ def smote_oversampler(
         else:
             return smt.fit_resample(x_train, y_train)
 
-
     def calc_permutation_importance(self,
                                     x_test: np.ndarray,
                                     y_test: np.ndarray,
@@ -332,7 +332,8 @@ def calc_permutation_importance(self,
                                     feature_names: List[str],
                                     clf_name: str,
                                     save_dir: Union[str, os.PathLike],
-                                    save_file_no: Optional[int] = None) -> None:
+                                    save_file_no: Optional[int] = None,
+                                    n_repeats: Optional[int] = 10) -> None:
         """
         Computes feature permutation importance scores.
 
@@ -347,8 +348,10 @@ def calc_permutation_importance(self,
 
         print("Calculating feature permutation importances...")
         timer = SimbaTimer(start=True)
-        p_importances = permutation_importance(clf, x_test, y_test, n_repeats=10, random_state=0)
-        df = pd.DataFrame(np.column_stack([feature_names, p_importances.importances_mean, p_importances.importances_std]), columns=["FEATURE_NAME","FEATURE_IMPORTANCE_MEAN","FEATURE_IMPORTANCE_STDEV"])
+        p_importances = permutation_importance(clf, x_test, y_test, n_repeats=n_repeats, random_state=0)
+        df = pd.DataFrame(
+            np.column_stack([feature_names, p_importances.importances_mean, p_importances.importances_std]),
+            columns=["FEATURE_NAME", "FEATURE_IMPORTANCE_MEAN", "FEATURE_IMPORTANCE_STDEV"])
         df = df.sort_values(by=["FEATURE_IMPORTANCE_MEAN"], ascending=False)
         if save_file_no != None:
             save_file_path = os.path.join(save_dir, f'{clf_name}_{save_file_no}_permutations_importances.csv')
@@ -367,7 +370,10 @@ def calc_learning_curve(self,
                             rf_clf: RandomForestClassifier,
                             save_dir: str,
                             save_file_no: Optional[int] = None,
-                            multiclass: bool = False) -> None:
+                            multiclass: Optional[bool] = False,
+                            scoring: Optional[str] = 'f1',
+                            plot: Optional[bool] = True) -> None:
+
         """
         Helper to compute random forest learning curves with cross-validation.
 
@@ -383,33 +389,52 @@ def calc_learning_curve(self,
         :parameter RandomForestClassifier rf_clf: sklearn RandomForestClassifier object
         :parameter str save_dir: Directory where to save output in csv file format.
         :parameter Optional[int] save_file_no: If integer, represents the count of the classifier within a grid search. If none, the classifier is not part of a grid search.
-        :parameter bool multiclass: If True, then target consist of several categories [0, 1, 2 ...] and scoring becomes ``None``. If False, then coring ``f1``.
+        :parameter bool multiclass: If True, then target consist of several categories [0, 1, 2 ...] and scoring becomes ``None``. If False, then scoring ``f1``.
+        :parameter Optional[str] scoring: The score of the models to present. Default: 'f1'.
         """
 
         print("Calculating learning curves...")
         timer = SimbaTimer(start=True)
         x_df, y_df = self.split_df_to_x_y(x_y_df, clf_name)
+        if save_file_no != None:
+            self.learning_curve_save_path = os.path.join(save_dir, f"{clf_name}_{save_file_no}_learning_curve.csv")
+            self.plot_path = os.path.join(save_dir, f"{clf_name}_{save_file_no}_learning_curve_plot.png")
+        else:
+            self.learning_curve_save_path = os.path.join(save_dir, f"{clf_name}_learning_curve.csv")
+            self.plot_path = os.path.join(save_dir, f"{clf_name}_learning_curve.png")
         check_int(name=f'calc_learning_curve shuffle_splits', value=shuffle_splits, min_value=2)
         check_int(name=f'calc_learning_curve dataset_splits', value=dataset_splits, min_value=2)
         cv = ShuffleSplit(n_splits=shuffle_splits, test_size=tt_size)
-        scoring = "f1"
         if multiclass:
             scoring = None
         if platform.system() == "Darwin":
             with parallel_backend("threading", n_jobs=-2):
-                train_sizes, train_scores, test_scores = learning_curve(estimator=rf_clf, X=x_df, y=y_df, cv=cv, scoring=scoring, shuffle=False, verbose=0, train_sizes=np.linspace(0.01, 1.0, dataset_splits), error_score="raise")
+                train_sizes, train_scores, test_scores = learning_curve(estimator=rf_clf, X=x_df, y=y_df, cv=cv,
+                                                                        scoring=scoring, shuffle=False, verbose=0,
+                                                                        train_sizes=np.linspace(0.01, 1.0,
+                                                                                                dataset_splits),
+                                                                        error_score="raise")
         else:
-            train_sizes, train_scores, test_scores = learning_curve(estimator=rf_clf, X=x_df, y=y_df, cv=cv, scoring=scoring, shuffle=False, n_jobs=-1, verbose=0, train_sizes=np.linspace(0.01, 1.0, dataset_splits), error_score="raise")
+            train_sizes, train_scores, test_scores = learning_curve(estimator=rf_clf, X=x_df, y=y_df, cv=cv,
+                                                                    scoring=scoring, shuffle=False, n_jobs=-1,
+                                                                    verbose=0,
+                                                                    train_sizes=np.linspace(0.01, 1.0, dataset_splits),
+                                                                    error_score="raise")
         results_df = pd.DataFrame()
         results_df["FRACTION TRAIN SIZE"] = np.linspace(0.01, 1.0, dataset_splits)
-        results_df["TRAIN_MEAN_F1"] = np.mean(train_scores, axis=1)
-        results_df["TEST_MEAN_F1"] = np.mean(test_scores, axis=1)
-        results_df["TRAIN_STDEV_F1"] = np.std(train_scores, axis=1)
-        results_df["TEST_STDEV_F1"] = np.std(test_scores, axis=1)
-        if save_file_no != None:
-            self.learning_curve_save_path = os.path.join(save_dir, f"{clf_name}_{save_file_no}_learning_curve.csv")
-        else:
-            self.learning_curve_save_path = os.path.join(save_dir, f"{clf_name}_learning_curve.csv")
+        results_df[f"TRAIN_MEAN_{scoring.upper()}"] = np.mean(train_scores, axis=1)
+        results_df[f"TEST_MEAN_{scoring.upper()}"] = np.mean(test_scores, axis=1)
+        results_df[f"TRAIN_STDEV_{scoring.upper()}"] = np.std(train_scores, axis=1)
+        results_df[f"TEST_STDEV_{scoring.upper()}"] = np.std(test_scores, axis=1)
+
+        if plot:
+            _ = PlottingMixin.line_plot(df=results_df,
+                                        x='FRACTION TRAIN SIZE',
+                                        y=[f"TRAIN_MEAN_{scoring.upper()}", f"TEST_MEAN_{scoring.upper()}"],
+                                        error=[f"TRAIN_STDEV_{scoring.upper()}", f"TEST_STDEV_{scoring.upper()}"],
+                                        save_path=self.plot_path, y_label=scoring.upper(),
+                                        title=f'SimBA learning curve {clf_name}')
+
         results_df.to_csv(self.learning_curve_save_path, index=False)
         timer.stop_timer()
         print(f"Learning curve calculation complete (elapsed time: {timer.elapsed_time_str}s) ...")
@@ -441,7 +466,8 @@ def calc_pr_curve(self,
         """
 
         if multiclass and classifier_map is None:
-            raise InvalidInputError(msg="Creating PR curve for multi-classifier but classifier_map not defined. Pass classifier_map argument")
+            raise InvalidInputError(
+                msg="Creating PR curve for multi-classifier but classifier_map not defined. Pass classifier_map argument")
         print("Calculating PR curves...")
         timer = SimbaTimer(start=True)
         if not multiclass:
@@ -462,7 +488,7 @@ def calc_pr_curve(self,
                 df = pd.DataFrame()
                 df["PRECISION"] = precision
                 df["RECALL"] = recall
-                df["F1"] = (2* (df["RECALL"] * df["PRECISION"]) / (df["RECALL"] + df["PRECISION"]))
+                df["F1"] = (2 * (df["RECALL"] * df["PRECISION"]) / (df["RECALL"] + df["PRECISION"]))
                 thresholds = list(thresholds)
                 thresholds.insert(0, 0.00)
                 df["DISCRIMINATION THRESHOLDS"] = thresholds
@@ -526,16 +552,47 @@ def create_example_dt(self,
         timer.stop_timer()
         print(f'Example tree saved at {file_name} (elapsed time: {timer.elapsed_time_str}s)')
 
+    def cuml_rf_x_importances(self, nodes: dict, n_features: int) -> List[float]:
+        """
+        Method for computing feature importance's from cuml RF object.
+
+        From `szchixy <https://github.com/szchixy/cuml-sklearn/blob/main/cuml-sklearn.ipynb>`__.
+        """
+
+        importances = np.zeros((len(nodes), n_features))
+        feature_gains = np.zeros(n_features)
+
+        def calculate_node_importances(node, i_root):
+            if "gain" not in node:
+                return
+
+            samples = node["instance_count"]
+            gain = node["gain"]
+            feature = node["split_feature"]
+            feature_gains[feature] += gain * samples
+
+            for child in node["children"]:
+                calculate_node_importances(child, i_root)
+
+        for i, root in enumerate(nodes):
+            calculate_node_importances(root, i)
+            importances[i] = feature_gains / feature_gains.sum()
+
+        return np.mean(importances, axis=0)
+
     def create_clf_report(self,
-                          rf_clf: RandomForestClassifier,
+                          rf_clf: Union[RandomForestClassifier, cuRF],
                           x_df: pd.DataFrame,
                           y_df: pd.DataFrame,
                           class_names: List[str],
                           save_dir: str,
+                          digits: Optional[int] = 4,
                           clf_name: Optional[str] = None,
+                          img_size: Optional[tuple] = (13.7, 8.27),
+                          cmap: Optional[str] = "coolwarm",
+                          threshold: Optional[int] = 0.5,
                           save_file_no: Optional[int] = None) -> None:
 
-
         """
         Helper to create classifier truth table report.
 
@@ -546,43 +603,53 @@ def create_clf_report(self,
            :width: 500
            :align: center
 
-        :parameter RandomForestClassifier rf_clf: sklearn RandomForestClassifier object.
-        :parameter pd.DataFrame x_df: dataframe holding test features
-        :parameter pd.DataFrame y_df: dataframe holding test target
-        :parameter List[str] class_names: List of classes. E.g., ['Attack absent', 'Attack present']
-        :parameter Optional[str] clf_name: Name of the classifier. If not None, then used in the output file name.
-        :parameter str save_dir: Directory where to save output in csv file format.
-        :parameter Optional[int] save_file_no: If integer, represents the count of the classifier within a grid search. If none, the classifier is not
-            part of a grid search.
+        :param RandomForestClassifier rf_clf: sklearn RandomForestClassifier object.
+        :param pd.DataFrame x_df: dataframe holding test features
+        :param pd.DataFrame y_df: dataframe holding test target
+        :param int digits: Number of floats in the classification report
+        :param str cmap: The palette to plot the heatmap in. Default blue to red ("coolwarm").
+        :param Tuple img_size: The size of the image in inches.
+        :param float threshold: The presence classification threshold. Default: 0.5.
+        :param List[str] class_names: List of classes. E.g., ['Attack absent', 'Attack present']
+        :param Optional[str] clf_name: Name of the classifier. If not None, then used in the output file name.
+        :param str save_dir: Directory where to save output in csv file format.
+        :param Optional[int] save_file_no: If integer, represents the count of the classifier within a grid search. If none, the classifier is not part of a grid search.
         """
 
         print("Creating classification report visualization...")
         timer = SimbaTimer(start=True)
-        try:
-            visualizer = ClassificationReport(rf_clf, classes=class_names, support=True)
-            visualizer.score(x_df, y_df)
-            if save_file_no != None:
-                if not clf_name:
-                    save_path = os.path.join(save_dir, f'{class_names[1]}_{save_file_no}_classification_report.png')
-                else:
-                    save_path = os.path.join(save_dir, f"{clf_name}_{save_file_no}_classification_report.png")
+        if save_file_no != None:
+            if not clf_name:
+                save_path = os.path.join(save_dir, f'{class_names[1]}_{save_file_no}_classification_report.png')
             else:
-                if not clf_name:
-                    save_path = os.path.join(save_dir, f"{class_names[1]}_classification_report.png")
-                else:
-                    save_path = os.path.join(save_dir, f"{clf_name}_classification_report.png")
-            visualizer.poof(outpath=save_path, clear_figure=True)
-            timer.stop_timer()
-            print(f'Classification report saved at {save_path} (elapsed time: {timer.elapsed_time_str}s)')
-        except KeyError as e:
-            print(e.args)
+                save_path = os.path.join(save_dir, f"{clf_name}_{save_file_no}_classification_report.png")
+        else:
             if not clf_name:
-                NotEnoughDataWarning(msg=f"Not enough data to create classification report, consider changing sampling settings or create more annotations: {class_names[1]}",source=self.__class__.__name__)
+                save_path = os.path.join(save_dir, f"{class_names[1]}_classification_report.png")
             else:
-                NotEnoughDataWarning(msg=f"Not enough data to create classification report, consider changing sampling settings or create more annotations: {clf_name}", source=self.__class__.__name__)
+                save_path = os.path.join(save_dir, f"{clf_name}_classification_report.png")
+
+        y_pred = self.clf_predict_proba(clf=rf_clf, x_df=x_df)
+        y_pred = np.where(y_pred > threshold, 1, 0)
+
+        plt.figure()
+        clf_report = classification_report(y_true=y_df.values, y_pred=y_pred, target_names=class_names, digits=digits,
+                                           output_dict=True, zero_division=0)
+        clf_report = pd.DataFrame.from_dict({key: clf_report[key] for key in class_names})
+        img = sns.heatmap(pd.DataFrame(clf_report).T, annot=True, cmap=cmap, vmin=0.0, vmax=1.0, linewidth=2.0,
+                          linecolor='black', fmt='g', annot_kws={"size": 20})
+        img.set_xticklabels(img.get_xticklabels(), size=16)
+        img.set_yticklabels(img.get_yticklabels(), size=16)
+
+        img.figure.set_size_inches(img_size)
+        plt.savefig(save_path, dpi=300)
+        plt.close("all")
+
+        timer.stop_timer()
+        print(f'Classification report saved at {save_path} (elapsed time: {timer.elapsed_time_str}s)')
 
     def create_x_importance_log(self,
-                                rf_clf: RandomForestClassifier,
+                                rf_clf: Union[RandomForestClassifier, cuRF],
                                 x_names: List[str],
                                 clf_name: str,
                                 save_dir: str,
@@ -602,16 +669,23 @@ def create_x_importance_log(self,
 
         print("Creating feature importance log...")
         timer = SimbaTimer(start=True)
-        importances = list(rf_clf.feature_importances_)
-        feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(x_names, importances)]
-        df = pd.DataFrame(feature_importances, columns=["FEATURE", "FEATURE_IMPORTANCE"]).sort_values(by=["FEATURE_IMPORTANCE"], ascending=False)
+        if isinstance(rf_clf, cuRF):
+            cuml_tree_nodes = loads(rf_clf.get_json())
+            importances = self.cuml_rf_x_importances(nodes=cuml_tree_nodes, n_features=len(x_names))
+        else:
+            importances = list(rf_clf.feature_importances_)
+        feature_importances = [(feature, round(importance, 25)) for feature, importance in zip(x_names, importances)]
+        df = pd.DataFrame(feature_importances, columns=["FEATURE", "FEATURE_IMPORTANCE"]).sort_values(
+            by=["FEATURE_IMPORTANCE"], ascending=False)
         if save_file_no != None:
-            self.f_importance_save_path = os.path.join(save_dir, f"{clf_name}_{save_file_no}_feature_importance_log.csv")
+            self.f_importance_save_path = os.path.join(save_dir,
+                                                       f"{clf_name}_{save_file_no}_feature_importance_log.csv")
         else:
             self.f_importance_save_path = os.path.join(save_dir, f"{clf_name}_feature_importance_log.csv")
         df.to_csv(self.f_importance_save_path, index=False)
         timer.stop_timer()
-        print(f'Feature importance log saved at {self.f_importance_save_path} (elapsed time: {timer.elapsed_time_str}s)')
+        print(
+            f'Feature importance log saved at {self.f_importance_save_path} (elapsed time: {timer.elapsed_time_str}s)')
 
     def create_x_importance_bar_chart(self,
                                       rf_clf: RandomForestClassifier,
@@ -626,7 +700,7 @@ def create_x_importance_bar_chart(self,
 
         .. seealso::
            `Documentation <https://github.com/sgoldenlab/simba/blob/master/docs/Scenario1.md#train-predictive-classifiers-settings>`_
-        
+
         .. image:: _static/img/gini_bar_chart.png
            :width: 600
            :align: center
@@ -647,7 +721,8 @@ def create_x_importance_bar_chart(self,
         importances_head = importances_df.head(n_bars)
         colors = create_color_palette(pallete_name=palette, increments=n_bars, as_rgb_ratio=True)
         colors = [x[::-1] for x in colors]
-        ax = importances_head.plot.bar(x="FEATURE", y="FEATURE_IMPORTANCE", legend=False, rot=90, fontsize=6, color=colors)
+        ax = importances_head.plot.bar(x="FEATURE", y="FEATURE_IMPORTANCE", legend=False, rot=90, fontsize=6,
+                                       color=colors)
         plt.ylabel("Feature importances' (mean decrease impurity)", fontsize=6)
         plt.tight_layout()
         if save_file_no != None:
@@ -657,15 +732,16 @@ def create_x_importance_bar_chart(self,
         plt.savefig(save_file_path, dpi=600)
         plt.close("all")
         timer.stop_timer()
-        print(f'Feature importance bar chart complete, saved at {save_file_path} (elapsed time: {timer.elapsed_time_str}s)')
+        print(
+            f'Feature importance bar chart complete, saved at {save_file_path} (elapsed time: {timer.elapsed_time_str}s)')
 
     def dviz_classification_visualization(
-        self,
-        x_train: np.ndarray,
-        y_train: np.ndarray,
-        clf_name: str,
-        class_names: List[str],
-        save_dir: str,
+            self,
+            x_train: np.ndarray,
+            y_train: np.ndarray,
+            clf_name: str,
+            class_names: List[str],
+            save_dir: str,
     ) -> None:
         """
         Helper to create visualization of example decision tree using dtreeviz.
@@ -706,7 +782,8 @@ def dviz_classification_visualization(
             )
 
     @staticmethod
-    def split_and_group_df(df: pd.DataFrame, splits: int, include_split_order: bool = True) -> (List[pd.DataFrame], int):
+    def split_and_group_df(df: pd.DataFrame, splits: int, include_split_order: bool = True) -> (
+    List[pd.DataFrame], int):
         """
         Helper to split a dataframe for multiprocessing. If include_split_order, then include the group number
         in split data as a column. Returns split data and approximations of number of observations per split.
@@ -834,16 +911,16 @@ def create_shap_log(self,
             out_df_raw.loc[len(out_df_raw)] = list(shap_df.iloc[frame])
             out_df_shap.loc[len(out_df_shap)] = frame_shap
             if (
-                (cnt % save_it == 0)
-                or (cnt == len(shap_df) - 1)
-                and (cnt != 0)
-                and (save_path is not None)
+                    (cnt % save_it == 0)
+                    or (cnt == len(shap_df) - 1)
+                    and (cnt != 0)
+                    and (save_path is not None)
             ):
                 print(f"Saving SHAP data after {cnt} iterations...")
                 out_df_shap.to_csv(self.out_df_shap_path)
                 out_df_raw.to_csv(self.out_df_raw_path)
             shap_frm_timer.stop_timer()
-            print( f"SHAP frame: {cnt + 1} / {len(shap_df)}, elapsed time: {shap_frm_timer.elapsed_time_str}...")
+            print(f"SHAP frame: {cnt + 1} / {len(shap_df)}, elapsed time: {shap_frm_timer.elapsed_time_str}...")
 
         shap_timer.stop_timer()
         stdout_success(
@@ -888,7 +965,8 @@ def print_machine_model_information(self, model_dict: dict) -> None:
         table = tabulate(table_view, ["Setting", "value"], tablefmt="grid")
         print(f"{table} {Defaults.STR_SPLIT_DELIMITER.value}TABLE")
 
-    def create_meta_data_csv_training_one_model(self, meta_data_lst: list, clf_name: str, save_dir: Union[str, os.PathLike]) -> None:
+    def create_meta_data_csv_training_one_model(self, meta_data_lst: list, clf_name: str,
+                                                save_dir: Union[str, os.PathLike]) -> None:
 
         """
         Helper to save single model meta data (hyperparameters, sampling settings etc.) from list format into SimBA
@@ -905,7 +983,8 @@ def create_meta_data_csv_training_one_model(self, meta_data_lst: list, clf_name:
         out_df.loc[len(out_df)] = meta_data_lst
         out_df.to_csv(save_path)
 
-    def create_meta_data_csv_training_multiple_models(self, meta_data, clf_name, save_dir, save_file_no: Optional[int] = None) -> None:
+    def create_meta_data_csv_training_multiple_models(self, meta_data, clf_name, save_dir,
+                                                      save_file_no: Optional[int] = None) -> None:
         print("Saving model meta data file...")
         save_path = os.path.join(save_dir, f"{clf_name}_{str(save_file_no)}_meta.csv")
         out_df = pd.DataFrame.from_dict(meta_data, orient="index").T
@@ -951,8 +1030,8 @@ def get_model_info(self, config: configparser.ConfigParser, model_cnt: int) -> D
                     )
                     continue
                 if (
-                    config.get("SML settings", "model_path_" + str(n + 1))
-                    == "No file selected"
+                        config.get("SML settings", "model_path_" + str(n + 1))
+                        == "No file selected"
                 ):
                     MissingUserInputWarning(
                         msg=f'Skipping {str(config.get("SML settings", "target_name_" + str(n + 1)))} classifier analysis: The classifier path is set to "No file selected',
@@ -980,17 +1059,17 @@ def get_model_info(self, config: configparser.ConfigParser, model_cnt: int) -> D
                 )
                 check_int("minimum_bout_length", model_dict[n]["minimum_bout_length"])
                 if config.has_option(
-                    ConfigKey.SML_SETTINGS.value, f"classifier_map_{n+1}"
+                        ConfigKey.SML_SETTINGS.value, f"classifier_map_{n + 1}"
                 ):
                     model_dict[n]["classifier_map"] = config.get(
-                        ConfigKey.SML_SETTINGS.value, f"classifier_map_{n+1}"
+                        ConfigKey.SML_SETTINGS.value, f"classifier_map_{n + 1}"
                     )
                     model_dict[n]["classifier_map"] = ast.literal_eval(
                         model_dict[n]["classifier_map"]
                     )
                     if type(model_dict[n]["classifier_map"]) != dict:
                         raise InvalidInputError(
-                            msg=f"SimBA found a classifier map for classifier {n+1} that could not be interpreted as a dictionary",
+                            msg=f"SimBA found a classifier map for classifier {n + 1} that could not be interpreted as a dictionary",
                             source=self.__class__.__name__,
                         )
 
@@ -1009,7 +1088,7 @@ def get_model_info(self, config: configparser.ConfigParser, model_cnt: int) -> D
             return model_dict
 
     def get_all_clf_names(
-        self, config: configparser.ConfigParser, target_cnt: int
+            self, config: configparser.ConfigParser, target_cnt: int
     ) -> List[str]:
         """
         Helper to get all classifier names in a SimBA project.
@@ -1037,10 +1116,10 @@ def get_all_clf_names(
         return model_names
 
     def insert_column_headers_for_outlier_correction(
-        self,
-        data_df: pd.DataFrame,
-        new_headers: List[str],
-        filepath: Union[str, os.PathLike],
+            self,
+            data_df: pd.DataFrame,
+            new_headers: List[str],
+            filepath: Union[str, os.PathLike],
     ) -> pd.DataFrame:
         """
         Helper to insert new column headers onto a dataframe following outlier correction.
@@ -1086,7 +1165,7 @@ def read_pickle(self, file_path: Union[str, os.PathLike]) -> object:
         return clf
 
     def bout_train_test_splitter(
-        self, x_df: pd.DataFrame, y_df: pd.Series, test_size: float
+            self, x_df: pd.DataFrame, y_df: pd.Series, test_size: float
     ) -> (pd.DataFrame, pd.DataFrame, pd.Series, pd.Series):
         """
         Helper to split train and test based on annotated `bouts`.
@@ -1156,9 +1235,9 @@ def find_bouts(s: pd.Series, type: str):
     @staticmethod
     @njit("(float32[:, :], float64, types.ListType(types.unicode_type))")
     def find_highly_correlated_fields(
-        data: np.ndarray,
-        threshold: float,
-        field_names: types.ListType(types.unicode_type),
+            data: np.ndarray,
+            threshold: float,
+            field_names: types.ListType(types.unicode_type),
     ) -> List[str]:
         """
         Find highly correlated fields in a dataset.
@@ -1198,7 +1277,7 @@ def find_highly_correlated_fields(
         return [field_names[x] for x in remove_col_idx]
 
     def check_sampled_dataset_integrity(
-        self, x_df: pd.DataFrame, y_df: pd.DataFrame
+            self, x_df: pd.DataFrame, y_df: pd.DataFrame
     ) -> None:
         """
         Helper to check for non-numerical entries post data sampling
@@ -1217,15 +1296,15 @@ def check_sampled_dataset_integrity(
             if len(x_nan_cnt) < 10:
                 raise FaultyTrainingSetError(
                     msg=f"{str(len(x_nan_cnt))} feature column(s) exist in some files within the project_folder/csv/targets_inserted directory, but missing in others. "
-                    f"SimBA expects all files within the project_folder/csv/targets_inserted directory to have the same number of features: the "
-                    f"column names with mismatches are: {list(x_nan_cnt.index)}",
+                        f"SimBA expects all files within the project_folder/csv/targets_inserted directory to have the same number of features: the "
+                        f"column names with mismatches are: {list(x_nan_cnt.index)}",
                     source=self.__class__.__name__,
                 )
             else:
                 raise FaultyTrainingSetError(
                     msg=f"{str(len(x_nan_cnt))} feature columns exist in some files, but missing in others. The feature files are found in the project_folder/csv/targets_inserted directory. "
-                    f"SimBA expects all files within the project_folder/csv/targets_inserted directory to have the same number of features: the first 10 "
-                    f"column names with mismatches are: {list(x_nan_cnt.index)[0:9]}",
+                        f"SimBA expects all files within the project_folder/csv/targets_inserted directory to have the same number of features: the first 10 "
+                        f"column names with mismatches are: {list(x_nan_cnt.index)[0:9]}",
                     source=self.__class__.__name__,
                 )
 
@@ -1242,12 +1321,12 @@ def check_sampled_dataset_integrity(
                 )
 
     def partial_dependence_calculator(
-        self,
-        clf: RandomForestClassifier,
-        x_df: pd.DataFrame,
-        clf_name: str,
-        save_dir: Union[str, os.PathLike],
-        clf_cnt: Optional[int] = None,
+            self,
+            clf: RandomForestClassifier,
+            x_df: pd.DataFrame,
+            clf_name: str,
+            save_dir: Union[str, os.PathLike],
+            clf_cnt: Optional[int] = None,
     ) -> None:
         """
         Compute feature partial dependencies for every feature in training set.
@@ -1282,11 +1361,11 @@ def partial_dependence_calculator(
             df = pd.DataFrame({"partial dependence": pdp[0], "feature value": axes[0]})
             df.to_csv(save_path)
             print(
-                f"Partial dependencies for {feature_name} complete ({feature_cnt+1}/{len(x_df.columns)})..."
+                f"Partial dependencies for {feature_name} complete ({feature_cnt + 1}/{len(x_df.columns)})..."
             )
 
-    def clf_predict_proba( self,
-                          clf: RandomForestClassifier,
+    def clf_predict_proba(self,
+                          clf: Union[RandomForestClassifier, cuRF],
                           x_df: pd.DataFrame,
                           multiclass: bool = False,
                           model_name: Optional[str] = None,
@@ -1306,26 +1385,29 @@ def clf_predict_proba( self,
         elif hasattr(clf, "n_features_in_"):
             clf_n_features = clf.n_features_in_
         else:
-            raise InvalidInputError(
-                msg=f"Could not determine the number of features in the classifier {model_name}",
-                source=self.__class__.__name__,
-            )
-        if not multiclass and clf.n_classes_ != 2:
+            raise InvalidInputError(msg=f"Could not determine the number of features in the classifier {model_name}",
+                                    source=self.__class__.__name__)
+
+        if hasattr(clf, "n_classes_"):
+            clf_n_classes = clf.n_classes_
+        elif hasattr(clf, "classes_"):
+            clf_n_classes = len(clf.classes_)
+        else:
+            raise InvalidInputError(msg=f"Could not determine the number of classes in the classifier {model_name}", source=self.__class__.__name__)
+
+        if not multiclass and clf_n_classes != 2:
             raise ClassifierInferenceError(
                 msg=f"The classifier {model_name} (data path {data_path}) has not been created properly. See The SimBA GitHub FAQ page or Gitter for more information and suggested fixes. The classifier is not a binary classifier and does not predict two targets (absence and presence of behavior). One or more files inside the project_folder/csv/targets_inserted directory has an annotation column with a value other than 0 or 1",
-                source=self.__class__.__name__,
-            )
+                source=self.__class__.__name__, )
         if len(x_df.columns) != clf_n_features:
             if model_name and data_path:
                 raise FeatureNumberMismatchError(
                     f"Mismatch in the number of features in input file {data_path}, and what is expected by the model {model_name}. The model expects {clf_n_features} features. The data contains {len(x_df.columns)} features.",
-                    source=self.__class__.__name__,
-                )
+                    source=self.__class__.__name__)
             else:
                 raise FeatureNumberMismatchError(
                     f"The model expects {clf_n_features} features. The data contains {len(x_df.columns)} features.",
-                    source=self.__class__.__name__,
-                )
+                    source=self.__class__.__name__)
         p_vals = clf.predict_proba(x_df)
         if multiclass and (clf.n_classes_ != p_vals.shape[1]):
             raise ClassifierInferenceError(
@@ -1333,12 +1415,13 @@ def clf_predict_proba( self,
                 source=self.__class__.__name__,
             )
         if not multiclass:
-            return p_vals[:, 1]
+            if isinstance(p_vals, pd.DataFrame):
+                return p_vals[1].values
+            else:
+                return p_vals[:, 1]
         else:
             return p_vals
 
-
-
     def clf_define(self,
                    n_estimators: Optional[int] = 2000,
                    max_depth: Optional[int] = None,
@@ -1351,30 +1434,32 @@ def clf_define(self,
                    class_weight: Optional[dict] = None,
                    cuda: Optional[bool] = False) -> RandomForestClassifier:
 
-
         if not cuda:
             return RandomForestClassifier(n_estimators=n_estimators,
-                                           max_depth=max_depth,
-                                           max_features=max_features,
-                                           n_jobs=n_jobs,
-                                           criterion=criterion,
-                                           min_samples_leaf=min_samples_leaf,
-                                           bootstrap=bootstrap,
-                                           verbose=verbose,
-                                           class_weight=class_weight)
+                                          max_depth=max_depth,
+                                          max_features=max_features,
+                                          n_jobs=n_jobs,
+                                          criterion=criterion,
+                                          min_samples_leaf=min_samples_leaf,
+                                          bootstrap=bootstrap,
+                                          verbose=verbose,
+                                          class_weight=class_weight)
 
         else:
             if cuRF is not None:
+                if max_depth is None:
+                    max_depth = 50
                 return cuRF(n_estimators=n_estimators,
                             split_criterion=criterion,
                             bootstrap=bootstrap,
                             max_depth=max_depth,
                             max_features=max_features,
                             min_samples_leaf=min_samples_leaf,
-                            verbose=verbose)
+                            verbose=6)
             else:
-                raise SimBAModuleNotFoundError(msg='SimBA could not find the cuml library for GPU machine learning algorithms.', source=self.__class__.__name__)
-
+                raise SimBAModuleNotFoundError(
+                    msg='SimBA could not find the cuml library for GPU machine learning algorithms.',
+                    source=self.__class__.__name__)
 
     def clf_fit(self,
                 clf: Union[RandomForestClassifier, cuRF],
@@ -1394,9 +1479,12 @@ def clf_fit(self,
         nan_target = y_df.loc[pd.to_numeric(y_df).isna()]
         if len(nan_features) > 0:
             raise FaultyTrainingSetError(
-                msg=f"{len(nan_features)} frame(s) in your project_folder/csv/targets_inserted directory contains FEATURES with non-numerical values", source=self.__class__.__name__)
+                msg=f"{len(nan_features)} frame(s) in your project_folder/csv/targets_inserted directory contains FEATURES with non-numerical values",
+                source=self.__class__.__name__)
         if len(nan_target) > 0:
-            raise FaultyTrainingSetError( msg=f"{len(nan_target)} frame(s) in your project_folder/csv/targets_inserted directory contains ANNOTATIONS with non-numerical values", source=self.__class__.__name__)
+            raise FaultyTrainingSetError(
+                msg=f"{len(nan_target)} frame(s) in your project_folder/csv/targets_inserted directory contains ANNOTATIONS with non-numerical values",
+                source=self.__class__.__name__)
 
         clf.fit(x_df, y_df)
 
@@ -1426,8 +1514,8 @@ def _read_data_file_helper(file_path: str,
                         source=TrainModelMixin._read_data_file_helper.__name__,
                     )
                 elif (
-                    len(set(df[clf_name].unique()) - {0, 1}) > 0
-                    and raise_bool_clf_error
+                        len(set(df[clf_name].unique()) - {0, 1}) > 0
+                        and raise_bool_clf_error
                 ):
                     raise InvalidInputError(
                         msg=f"The annotation column for a classifier should contain only 0 or 1 values. However, in file {file_path} the {clf_name} field contains additional value(s): {list(set(df[clf_name].unique()) - {0, 1})}.",
@@ -1440,10 +1528,10 @@ def _read_data_file_helper(file_path: str,
 
     @staticmethod
     def read_all_files_in_folder_mp(
-        file_paths: List[str],
-        file_type: Literal["csv", "parquet", "pickle"],
-        classifier_names: Optional[List[str]] = None,
-        raise_bool_clf_error: bool = True,
+            file_paths: List[str],
+            file_type: Literal["csv", "parquet", "pickle"],
+            classifier_names: Optional[List[str]] = None,
+            raise_bool_clf_error: bool = True,
     ) -> (pd.DataFrame, List[int]):
         """
 
@@ -1463,7 +1551,7 @@ def read_all_files_in_folder_mp(
 
         """
         if (platform.system() == "Darwin") and (
-            multiprocessing.get_start_method() != "spawn"
+                multiprocessing.get_start_method() != "spawn"
         ):
             multiprocessing.set_start_method("spawn", force=True)
         cpu_cnt, _ = find_core_cnt()
@@ -1471,11 +1559,11 @@ def read_all_files_in_folder_mp(
         try:
             with ProcessPoolExecutor(int(np.ceil(cpu_cnt / 2))) as pool:
                 for res in pool.map(
-                    TrainModelMixin._read_data_file_helper,
-                    file_paths,
-                    repeat(file_type),
-                    repeat(classifier_names),
-                    repeat(raise_bool_clf_error),
+                        TrainModelMixin._read_data_file_helper,
+                        file_paths,
+                        repeat(file_type),
+                        repeat(classifier_names),
+                        repeat(raise_bool_clf_error),
                 ):
                     df_lst.append(res[0])
                     frame_numbers_lst.extend((res[1]))
@@ -1488,8 +1576,8 @@ def read_all_files_in_folder_mp(
                     source=TrainModelMixin.read_all_files_in_folder_mp.__name__,
                 )
             df_concat = df_concat.loc[
-                :, ~df_concat.columns.str.contains("^Unnamed")
-            ].astype(np.float32)
+                        :, ~df_concat.columns.str.contains("^Unnamed")
+                        ].astype(np.float32)
             memory_size = get_memory_usage_of_df(df=df_concat)
             print(
                 f'Dataset size: {memory_size["megabytes"]}MB / {memory_size["gigabytes"]}GB'
@@ -1510,10 +1598,10 @@ def read_all_files_in_folder_mp(
 
     @staticmethod
     def _read_data_file_helper_futures(
-        file_path: str,
-        file_type: str,
-        clf_names: Optional[List[str]] = None,
-        raise_bool_clf_error: bool = True,
+            file_path: str,
+            file_type: str,
+            clf_names: Optional[List[str]] = None,
+            raise_bool_clf_error: bool = True,
     ):
         """
         Private function called by :meth:`simba.train_model_functions.read_all_files_in_folder_mp_futures`
@@ -1529,8 +1617,8 @@ def _read_data_file_helper_futures(
                 if not clf_name in df.columns:
                     raise ColumnNotFoundError(column_name=clf_name, file_name=file_path)
                 elif (
-                    len(set(df[clf_name].unique()) - {0, 1}) > 0
-                    and raise_bool_clf_error
+                        len(set(df[clf_name].unique()) - {0, 1}) > 0
+                        and raise_bool_clf_error
                 ):
                     raise InvalidInputError(
                         msg=f"The annotation column for a classifier should contain only 0 or 1 values. However, in file {file_path} the {clf_name} field contains additional value(s): {list(set(df[clf_name].unique()) - {0, 1})}."
@@ -1539,11 +1627,11 @@ def _read_data_file_helper_futures(
         return df, vid_name, timer.elapsed_time_str, frm_numbers
 
     def read_all_files_in_folder_mp_futures(
-        self,
-        annotations_file_paths: List[str],
-        file_type: Literal["csv", "parquet", "pickle"],
-        classifier_names: Optional[List[str]] = None,
-        raise_bool_clf_error: bool = True,
+            self,
+            annotations_file_paths: List[str],
+            file_type: Literal["csv", "parquet", "pickle"],
+            classifier_names: Optional[List[str]] = None,
+            raise_bool_clf_error: bool = True,
     ) -> (pd.DataFrame, List[int]):
         """
         Multiprocessing helper function to read in all data files in a folder to a single
@@ -1565,13 +1653,13 @@ def read_all_files_in_folder_mp_futures(
         """
         try:
             if (platform.system() == "Darwin") and (
-                multiprocessing.get_start_method() != "spawn"
+                    multiprocessing.get_start_method() != "spawn"
             ):
                 multiprocessing.set_start_method("spawn", force=True)
             cpu_cnt, _ = find_core_cnt()
             df_lst, frm_number_list = [], []
             with concurrent.futures.ProcessPoolExecutor(
-                max_workers=cpu_cnt
+                    max_workers=cpu_cnt
             ) as executor:
                 results = [
                     executor.submit(
@@ -1607,7 +1695,7 @@ def read_all_files_in_folder_mp_futures(
             )
 
     def check_raw_dataset_integrity(
-        self, df: pd.DataFrame, logs_path: Optional[Union[str, os.PathLike]]
+            self, df: pd.DataFrame, logs_path: Optional[Union[str, os.PathLike]]
     ) -> None:
         """
         Helper to check column-wise NaNs in raw input data for fitting model.
@@ -1644,18 +1732,18 @@ def check_raw_dataset_integrity(
             results.to_csv(save_log_path)
             raise FaultyTrainingSetError(
                 msg=f"{len(nan_cols)} feature columns exist in some files, but missing in others. The feature files are found in the project_folder/csv/targets_inserted directory. "
-                f"SimBA expects all files within the project_folder/csv/targets_inserted directory to have the same number of features: the first 10 "
-                f"column names with mismatches are: {nan_cols[0:9]}. For a log of the files that contain, and not contain, the mis-matched columns, see {save_log_path}",
+                    f"SimBA expects all files within the project_folder/csv/targets_inserted directory to have the same number of features: the first 10 "
+                    f"column names with mismatches are: {nan_cols[0:9]}. For a log of the files that contain, and not contain, the mis-matched columns, see {save_log_path}",
                 source=self.__class__.__name__,
             )
 
     @staticmethod
     def _create_shap_mp_helper(
-        data: pd.DataFrame,
-        explainer: shap.TreeExplainer,
-        clf_name: str,
-        rf_clf: RandomForestClassifier,
-        expected_value: float,
+            data: pd.DataFrame,
+            explainer: shap.TreeExplainer,
+            clf_name: str,
+            rf_clf: RandomForestClassifier,
+            expected_value: float,
     ):
 
         target = data.pop(clf_name).values.reshape(-1, 1)
@@ -1675,7 +1763,7 @@ def _create_shap_mp_helper(
 
     @staticmethod
     def _create_shap_mp_helper(
-        data: pd.DataFrame, explainer: shap.TreeExplainer, clf_name: str
+            data: pd.DataFrame, explainer: shap.TreeExplainer, clf_name: str
     ):
 
         target = data.pop(clf_name).values.reshape(-1, 1)
@@ -1689,18 +1777,18 @@ def _create_shap_mp_helper(
         return shap_vals, data.values, target
 
     def create_shap_log_mp(
-        self,
-        ini_file_path: str,
-        rf_clf: RandomForestClassifier,
-        x_df: pd.DataFrame,
-        y_df: pd.DataFrame,
-        x_names: List[str],
-        clf_name: str,
-        cnt_present: int,
-        cnt_absent: int,
-        batch_size: int = 10,
-        save_path: Optional[Union[str, os.PathLike]] = None,
-        save_file_no: Optional[int] = None,
+            self,
+            ini_file_path: str,
+            rf_clf: RandomForestClassifier,
+            x_df: pd.DataFrame,
+            y_df: pd.DataFrame,
+            x_names: List[str],
+            clf_name: str,
+            cnt_present: int,
+            cnt_absent: int,
+            batch_size: int = 10,
+            save_path: Optional[Union[str, os.PathLike]] = None,
+            save_file_no: Optional[int] = None,
     ) -> Union[None, Tuple[pd.DataFrame]]:
         """
         Helper to compute SHAP values using multiprocessing.
@@ -1773,7 +1861,8 @@ def create_shap_log_mp(
             batch_size = 1
         if len(shap_data_df) > 100:
             batch_size = 100
-        print(f"Computing {len(shap_data_df)} SHAP values (MULTI-CORE BATCH SIZE: {batch_size}, FOLLOW PROGRESS IN OS TERMINAL)...")
+        print(
+            f"Computing {len(shap_data_df)} SHAP values (MULTI-CORE BATCH SIZE: {batch_size}, FOLLOW PROGRESS IN OS TERMINAL)...")
         shap_data, _ = self.split_and_group_df(df=shap_data_df, splits=int(len(shap_data_df) / batch_size))
         shap_results, shap_raw = [], []
         try:
@@ -1782,10 +1871,10 @@ def create_shap_log_mp(
                     self._create_shap_mp_helper, explainer=explainer, clf_name=clf_name
                 )
                 for cnt, result in enumerate(
-                    pool.imap_unordered(constants, shap_data, chunksize=1)
+                        pool.imap_unordered(constants, shap_data, chunksize=1)
                 ):
                     print(
-                        f"Concatenating multi-processed SHAP data (batch {cnt+1}/{len(shap_data)})"
+                        f"Concatenating multi-processed SHAP data (batch {cnt + 1}/{len(shap_data)})"
                     )
                     proba = rf_clf.predict_proba(result[1])[:, 1].reshape(-1, 1)
                     shap_sum = np.sum(result[0], axis=1).reshape(-1, 1)
@@ -1807,7 +1896,7 @@ def create_shap_log_mp(
             shap_save_df = pd.DataFrame(
                 data=np.row_stack(shap_results),
                 columns=list(x_names)
-                + ["Expected_value", "Sum", "Prediction_probability", clf_name],
+                        + ["Expected_value", "Sum", "Prediction_probability", clf_name],
             )
             raw_save_df = pd.DataFrame(
                 data=np.row_stack(shap_raw), columns=list(x_names)
@@ -1852,7 +1941,9 @@ def create_shap_log_mp(
                 save_file_no=save_file_no,
             )
 
-    def check_df_dataset_integrity(self, df: pd.DataFrame, file_name: str, logs_path: Union[str, os.PathLike]) -> None:
+    def check_df_dataset_integrity(
+            self, df: pd.DataFrame, file_name: str, logs_path: Union[str, os.PathLike]
+    ) -> None:
         """
         Helper to check for non-numerical np.inf, -np.inf, NaN, None in a single dataframe.
         :parameter pd.DataFrame x_df: Features
@@ -1875,51 +1966,102 @@ def check_df_dataset_integrity(self, df: pd.DataFrame, file_name: str, logs_path
 
     def read_model_settings_from_config(self, config: configparser.ConfigParser):
 
-        self.model_dir_out = os.path.join(read_config_entry(config, ConfigKey.SML_SETTINGS.value, ConfigKey.MODEL_DIR.value, data_type=Dtypes.STR.value), "generated_models")
+        self.model_dir_out = os.path.join(
+            read_config_entry(config, ConfigKey.SML_SETTINGS.value, ConfigKey.MODEL_DIR.value,
+                              data_type=Dtypes.STR.value), "generated_models")
         if not os.path.exists(self.model_dir_out):
             os.makedirs(self.model_dir_out)
         self.eval_out_path = os.path.join(self.model_dir_out, "model_evaluations")
         if not os.path.exists(self.eval_out_path):
             os.makedirs(self.eval_out_path)
-        self.clf_name = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.CLASSIFIER.value, data_type=Dtypes.STR.value)
-        self.tt_size = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.TT_SIZE.value, data_type=Dtypes.FLOAT.value)
-        self.algo = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.MODEL_TO_RUN.value, data_type=Dtypes.STR.value, default_value="rf")
-        self.split_type = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.TRAIN_TEST_SPLIT_TYPE.value, data_type=Dtypes.STR.value, options=Options.TRAIN_TEST_SPLIT.value, default_value=Methods.SPLIT_TYPE_FRAMES.value)
-        self.under_sample_setting = (read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.UNDERSAMPLE_SETTING.value, data_type=Dtypes.STR.value).lower().strip())
-        self.over_sample_setting = (read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.OVERSAMPLE_SETTING.value, data_type=Dtypes.STR.value).lower().strip())
-        self.n_estimators = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.RF_ESTIMATORS.value, data_type=Dtypes.INT.value)
-        self.rf_max_depth = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.RF_MAX_DEPTH.value, data_type=Dtypes.INT.value, default_value=Dtypes.NONE.value)
+        self.clf_name = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                          MLParamKeys.CLASSIFIER.value, data_type=Dtypes.STR.value)
+        self.tt_size = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.TT_SIZE.value,
+                                         data_type=Dtypes.FLOAT.value)
+        self.algo = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.MODEL_TO_RUN.value,
+                                      data_type=Dtypes.STR.value, default_value="rf")
+        self.split_type = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                            MLParamKeys.TRAIN_TEST_SPLIT_TYPE.value, data_type=Dtypes.STR.value,
+                                            options=Options.TRAIN_TEST_SPLIT.value,
+                                            default_value=Methods.SPLIT_TYPE_FRAMES.value)
+        self.under_sample_setting = (
+            read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.UNDERSAMPLE_SETTING.value,
+                              data_type=Dtypes.STR.value).lower().strip())
+        self.over_sample_setting = (
+            read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.OVERSAMPLE_SETTING.value,
+                              data_type=Dtypes.STR.value).lower().strip())
+        self.n_estimators = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                              MLParamKeys.RF_ESTIMATORS.value, data_type=Dtypes.INT.value)
+        self.rf_max_depth = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                              MLParamKeys.RF_MAX_DEPTH.value, data_type=Dtypes.INT.value,
+                                              default_value=Dtypes.NONE.value)
         if self.rf_max_depth == "None":
             self.rf_max_depth = None
-        self.max_features = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.RF_MAX_FEATURES.value, data_type=Dtypes.STR.value)
-        self.criterion = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.RF_CRITERION.value, data_type=Dtypes.STR.value, options=Options.CLF_CRITERION.value)
-        self.min_sample_leaf = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.MIN_LEAF.value, data_type=Dtypes.INT.value)
-        self.compute_permutation_importance = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.PERMUTATION_IMPORTANCE.value, data_type=Dtypes.STR.value, default_value=False)
-        self.generate_learning_curve = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.LEARNING_CURVE.value, data_type=Dtypes.STR.value, default_value=False)
-        self.generate_precision_recall_curve = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.PRECISION_RECALL.value, data_type=Dtypes.STR.value, default_value=False)
-        self.generate_example_decision_tree = read_config_entry( config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.EX_DECISION_TREE.value, data_type=Dtypes.STR.value, default_value=False)
-        self.generate_classification_report = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.CLF_REPORT.value, data_type=Dtypes.STR.value, default_value=False)
-        self.generate_features_importance_log = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.IMPORTANCE_LOG.value, data_type=Dtypes.STR.value, default_value=False)
-        self.generate_features_importance_bar_graph = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.IMPORTANCE_LOG.value, data_type=Dtypes.STR.value, default_value=False)
-        self.generate_example_decision_tree_fancy = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.EX_DECISION_TREE_FANCY.value, data_type=Dtypes.STR.value, default_value=False)
-        self.generate_shap_scores = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.SHAP_SCORES.value, data_type=Dtypes.STR.value, default_value=False)
-        self.save_meta_data = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.RF_METADATA.value, data_type=Dtypes.STR.value, default_value=False)
-        self.compute_partial_dependency = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.PARTIAL_DEPENDENCY.value, data_type=Dtypes.STR.value, default_value=False)
-        self.save_train_test_frm_info = str_2_bool(read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.SAVE_TRAIN_TEST_FRM_IDX.value, data_type=Dtypes.STR.value, default_value="False"))
+        self.max_features = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                              MLParamKeys.RF_MAX_FEATURES.value, data_type=Dtypes.STR.value)
+        self.criterion = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                           MLParamKeys.RF_CRITERION.value, data_type=Dtypes.STR.value,
+                                           options=Options.CLF_CRITERION.value)
+        self.min_sample_leaf = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                 MLParamKeys.MIN_LEAF.value, data_type=Dtypes.INT.value)
+        self.compute_permutation_importance = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                                MLParamKeys.PERMUTATION_IMPORTANCE.value,
+                                                                data_type=Dtypes.STR.value, default_value=False)
+        self.generate_learning_curve = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                         MLParamKeys.LEARNING_CURVE.value, data_type=Dtypes.STR.value,
+                                                         default_value=False)
+        self.generate_precision_recall_curve = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                                 MLParamKeys.PRECISION_RECALL.value,
+                                                                 data_type=Dtypes.STR.value, default_value=False)
+        self.generate_example_decision_tree = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                                MLParamKeys.EX_DECISION_TREE.value,
+                                                                data_type=Dtypes.STR.value, default_value=False)
+        self.generate_classification_report = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                                MLParamKeys.CLF_REPORT.value,
+                                                                data_type=Dtypes.STR.value, default_value=False)
+        self.generate_features_importance_log = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                                  MLParamKeys.IMPORTANCE_LOG.value,
+                                                                  data_type=Dtypes.STR.value, default_value=False)
+        self.generate_features_importance_bar_graph = read_config_entry(config,
+                                                                        ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                                        MLParamKeys.IMPORTANCE_LOG.value,
+                                                                        data_type=Dtypes.STR.value, default_value=False)
+        self.generate_example_decision_tree_fancy = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                                      MLParamKeys.EX_DECISION_TREE_FANCY.value,
+                                                                      data_type=Dtypes.STR.value, default_value=False)
+        self.generate_shap_scores = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                      MLParamKeys.SHAP_SCORES.value, data_type=Dtypes.STR.value,
+                                                      default_value=False)
+        self.save_meta_data = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                MLParamKeys.RF_METADATA.value, data_type=Dtypes.STR.value,
+                                                default_value=False)
+        self.compute_partial_dependency = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                            MLParamKeys.PARTIAL_DEPENDENCY.value,
+                                                            data_type=Dtypes.STR.value, default_value=False)
+        self.save_train_test_frm_info = str_2_bool(read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                                     MLParamKeys.SAVE_TRAIN_TEST_FRM_IDX.value,
+                                                                     data_type=Dtypes.STR.value, default_value="False"))
 
         if self.under_sample_setting == Methods.RANDOM_UNDERSAMPLE.value:
-            self.under_sample_ratio = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.UNDERSAMPLE_RATIO.value, data_type=Dtypes.FLOAT.value, default_value=Dtypes.NAN.value)
+            self.under_sample_ratio = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                        MLParamKeys.UNDERSAMPLE_RATIO.value,
+                                                        data_type=Dtypes.FLOAT.value, default_value=Dtypes.NAN.value)
             check_float(name=MLParamKeys.UNDERSAMPLE_RATIO.value, value=self.under_sample_ratio)
         else:
             self.under_sample_ratio = Dtypes.NAN.value
-        if (self.over_sample_setting == Methods.SMOTEENN.value.lower()) or (self.over_sample_setting == Methods.SMOTE.value.lower()):
-            self.over_sample_ratio = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.OVERSAMPLE_RATIO.value, data_type=Dtypes.FLOAT.value, default_value=Dtypes.NAN.value)
+        if (self.over_sample_setting == Methods.SMOTEENN.value.lower()) or (
+                self.over_sample_setting == Methods.SMOTE.value.lower()):
+            self.over_sample_ratio = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                       MLParamKeys.OVERSAMPLE_RATIO.value, data_type=Dtypes.FLOAT.value,
+                                                       default_value=Dtypes.NAN.value)
             check_float(name=MLParamKeys.OVERSAMPLE_RATIO.value, value=self.over_sample_ratio)
         else:
             self.over_sample_ratio = Dtypes.NAN.value
 
         if config.has_option(ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.CLASS_WEIGHTS.value):
-            self.class_weights = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value, MLParamKeys.CLASS_WEIGHTS.value, data_type=Dtypes.STR.value, default_value=Dtypes.NONE.value)
+            self.class_weights = read_config_entry(config, ConfigKey.CREATE_ENSEMBLE_SETTINGS.value,
+                                                   MLParamKeys.CLASS_WEIGHTS.value, data_type=Dtypes.STR.value,
+                                                   default_value=Dtypes.NONE.value)
             if self.class_weights == "custom":
                 self.class_weights = ast.literal_eval(
                     read_config_entry(
@@ -2017,7 +2159,7 @@ def read_model_settings_from_config(self, config: configparser.ConfigParser):
                 self.shap_save_n = int(self.shap_save_n)
             except ValueError or TypeError:
                 self.shap_save_n = (
-                    self.shap_target_present_cnt + self.shap_target_absent_cnt
+                        self.shap_target_present_cnt + self.shap_target_absent_cnt
                 )
             check_int(
                 name=MLParamKeys.SHAP_PRESENT.value, value=self.shap_target_present_cnt
@@ -2029,7 +2171,7 @@ def read_model_settings_from_config(self, config: configparser.ConfigParser):
             check_int(name="RF MAX DEPTH", value=self.rf_max_depth, min_value=1)
 
     def check_validity_of_meta_files(
-        self, data_df: pd.DataFrame, meta_file_paths: List[Union[str, os.PathLike]]
+            self, data_df: pd.DataFrame, meta_file_paths: List[Union[str, os.PathLike]]
     ):
         meta_dicts, errors = {}, []
         for config_cnt, path in enumerate(meta_file_paths):
@@ -2101,8 +2243,8 @@ def check_validity_of_meta_files(
                 )[1]
             )
             if (
-                meta_dict[MLParamKeys.LEARNING_CURVE.value]
-                in Options.PERFORM_FLAGS.value
+                    meta_dict[MLParamKeys.LEARNING_CURVE.value]
+                    in Options.PERFORM_FLAGS.value
             ):
                 errors.append(
                     check_int(
@@ -2119,8 +2261,8 @@ def check_validity_of_meta_files(
                     )[1]
                 )
             if (
-                meta_dict[MLParamKeys.IMPORTANCE_BAR_CHART.value]
-                in Options.PERFORM_FLAGS.value
+                    meta_dict[MLParamKeys.IMPORTANCE_BAR_CHART.value]
+                    in Options.PERFORM_FLAGS.value
             ):
                 errors.append(
                     check_int(
@@ -2131,8 +2273,8 @@ def check_validity_of_meta_files(
                 )
             if MLParamKeys.SHAP_SCORES.value in meta_dict.keys():
                 if (
-                    meta_dict[MLParamKeys.SHAP_SCORES.value]
-                    in Options.PERFORM_FLAGS.value
+                        meta_dict[MLParamKeys.SHAP_SCORES.value]
+                        in Options.PERFORM_FLAGS.value
                 ):
                     errors.append(
                         check_int(
@@ -2171,8 +2313,8 @@ def check_validity_of_meta_files(
                 )[1]
             )
             if (
-                meta_dict[MLParamKeys.UNDERSAMPLE_SETTING.value].lower()
-                == Methods.RANDOM_UNDERSAMPLE.value
+                    meta_dict[MLParamKeys.UNDERSAMPLE_SETTING.value].lower()
+                    == Methods.RANDOM_UNDERSAMPLE.value
             ):
                 errors.append(
                     check_float(
@@ -2185,11 +2327,11 @@ def check_validity_of_meta_files(
                     present_len, absent_len = len(
                         data_df[
                             data_df[meta_dict[MLParamKeys.CLASSIFIER_NAME.value]] == 1
-                        ]
+                            ]
                     ), len(
                         data_df[
                             data_df[meta_dict[MLParamKeys.CLASSIFIER_NAME.value]] == 0
-                        ]
+                            ]
                     )
                     ratio_n = int(
                         present_len * meta_dict[MLParamKeys.UNDERSAMPLE_RATIO.value]
@@ -2202,11 +2344,11 @@ def check_validity_of_meta_files(
                     pass
 
             if (
-                meta_dict[MLParamKeys.OVERSAMPLE_SETTING.value].lower()
-                == Methods.SMOTEENN.value.lower()
+                    meta_dict[MLParamKeys.OVERSAMPLE_SETTING.value].lower()
+                    == Methods.SMOTEENN.value.lower()
             ) or (
-                meta_dict[MLParamKeys.OVERSAMPLE_SETTING.value].lower()
-                == Methods.SMOTE.value.lower()
+                    meta_dict[MLParamKeys.OVERSAMPLE_SETTING.value].lower()
+                    == Methods.SMOTE.value.lower()
             ):
                 errors.append(
                     check_float(
@@ -2307,8 +2449,8 @@ def check_validity_of_meta_files(
 
             if MLParamKeys.CLASS_WEIGHTS.value in meta_dict.keys():
                 if (
-                    meta_dict[MLParamKeys.CLASS_WEIGHTS.value]
-                    not in Options.CLASS_WEIGHT_OPTIONS.value
+                        meta_dict[MLParamKeys.CLASS_WEIGHTS.value]
+                        not in Options.CLASS_WEIGHT_OPTIONS.value
                 ):
                     meta_dict[MLParamKeys.CLASS_WEIGHTS.value] = None
                 if meta_dict[MLParamKeys.CLASS_WEIGHTS.value] == "custom":
@@ -2366,13 +2508,13 @@ def check_validity_of_meta_files(
         return meta_dicts
 
     def random_multiclass_frm_sampler(
-        self,
-        x_df: pd.DataFrame,
-        y_df: pd.DataFrame,
-        target_field: str,
-        target_var: int,
-        sampling_ratio: Union[float, Dict[int, float]],
-        raise_error: bool = False,
+            self,
+            x_df: pd.DataFrame,
+            y_df: pd.DataFrame,
+            target_field: str,
+            target_var: int,
+            sampling_ratio: Union[float, Dict[int, float]],
+            raise_error: bool = False,
     ):
         """
         Random multiclass undersampler.
@@ -2455,13 +2597,13 @@ def random_multiclass_frm_sampler(
         return results.drop([target_field], axis=1), results[target_field]
 
     def random_multiclass_bout_sampler(
-        self,
-        x_df: pd.DataFrame,
-        y_df: pd.DataFrame,
-        target_field: str,
-        target_var: int,
-        sampling_ratio: Union[float, Dict[int, float]],
-        raise_error: bool = False,
+            self,
+            x_df: pd.DataFrame,
+            y_df: pd.DataFrame,
+            target_field: str,
+            target_var: int,
+            sampling_ratio: Union[float, Dict[int, float]],
+            raise_error: bool = False,
     ) -> pd.DataFrame:
         """
         Randomly sample multiclass behavioral bouts.
@@ -2568,9 +2710,9 @@ def random_multiclass_bout_sampler(
 
     @staticmethod
     def scaler_inverse_transform(
-        data: pd.DataFrame,
-        scaler: Union[MinMaxScaler, StandardScaler, QuantileTransformer],
-        name: Optional[str] = "",
+            data: pd.DataFrame,
+            scaler: Union[MinMaxScaler, StandardScaler, QuantileTransformer],
+            name: Optional[str] = "",
     ) -> pd.DataFrame:
         check_instance(
             source=f"{TrainModelMixin.scaler_inverse_transform.__name__} data",
@@ -2600,7 +2742,7 @@ def scaler_inverse_transform(
 
     @staticmethod
     def define_scaler(
-        scaler_name: Literal["MIN-MAX", "STANDARD", "QUANTILE"]
+            scaler_name: Literal["MIN-MAX", "STANDARD", "QUANTILE"]
     ) -> Union[MinMaxScaler, StandardScaler, QuantileTransformer]:
         """
         Defines a sklearn scaler object. See ``UMLOptions.SCALER_OPTIONS.value`` for accepted scalers.
@@ -2623,9 +2765,9 @@ def define_scaler(
 
     @staticmethod
     def scaler_transform(
-        data: pd.DataFrame,
-        scaler: Union[MinMaxScaler, StandardScaler, QuantileTransformer],
-        name: Optional[str] = "",
+            data: pd.DataFrame,
+            scaler: Union[MinMaxScaler, StandardScaler, QuantileTransformer],
+            name: Optional[str] = "",
     ) -> pd.DataFrame:
         """
         Helper to run transform dataframe using previously fitted scaler.
@@ -2658,7 +2800,7 @@ def scaler_transform(
 
     @staticmethod
     def find_low_variance_fields(
-        data: pd.DataFrame, variance_threshold: float
+            data: pd.DataFrame, variance_threshold: float
     ) -> List[str]:
         """
         Finds fields with variance below provided threshold.
@@ -2683,9 +2825,6 @@ def find_low_variance_fields(
             )
         return low_variance_fields
 
-
-
-
 # test = TrainModelMixin()
 # test.read_all_files_in_folder(file_paths=['/Users/simon/Desktop/envs/troubleshooting/jake/project_folder/csv/targets_inserted/22-437C_c3_2022-11-01_13-16-23_color.csv', '/Users/simon/Desktop/envs/troubleshooting/jake/project_folder/csv/targets_inserted/22-437D_c4_2022-11-01_13-16-39_color.csv'],
 #                               file_type='csv', classifier_names=['attack', 'non-agresive parallel swimming'])
diff --git a/simba/utils/checks.py b/simba/utils/checks.py
index 465fc66d2..08cdfb109 100644
--- a/simba/utils/checks.py
+++ b/simba/utils/checks.py
@@ -834,7 +834,7 @@ def check_valid_array(data: np.ndarray,
             )
 
     if accepted_axis_0_shape is not None:
-        if data.ndim is not 2:
+        if data.ndim != 2:
             raise ArrayError(
                 msg=f"Array not of acceptable dimension. Found {data.ndim}, accepted: 2, {source}",
                 source=check_valid_array.__name__,
@@ -846,7 +846,7 @@ def check_valid_array(data: np.ndarray,
             )
 
     if accepted_axis_1_shape is not None:
-        if data.ndim is not 2:
+        if data.ndim != 2:
             raise ArrayError(
                 msg=f"Array not of acceptable dimension. Found {data.ndim}, accepted: 2, {source}",
                 source=check_valid_array.__name__,