sgoldenlab · tzukpolinsky · Nov 1, 2023 · Nov 8, 2023 · Nov 8, 2023 · Nov 12, 2023
diff --git a/requirements.txt b/requirements.txt
@@ -14,7 +14,7 @@ pandas==0.25.3;python_version=="3.6"
 pandas;python_version>="3.9"
 scikit-image
 scipy
-seaborn == 0.9.0
+seaborn
 scikit-learn
 tabulate == 0.8.3
 tqdm == 4.30.0
@@ -32,7 +32,7 @@ plotly == 4.9.0
 statsmodels
 cefpython3 == 66.0
 pyarrow == 6.0.1
-shap == 0.35.0
+shap
 tables>=3.6.1
 xlrd==1.2.0
 trafaret==2.1.1

diff --git a/simba/SimBA.py b/simba/SimBA.py
@@ -3,6 +3,8 @@
 import os.path
 import warnings
 
+from simba.ui.pop_ups.append_bodypart_directionality_features_pop_up import AppendBodyPartDirectionalityFeaturesPopUp
+
 warnings.filterwarnings("ignore", category=FutureWarning)
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 from simba.ui.pop_ups.direction_animal_to_bodypart_settings_pop_up import DirectionAnimalToBodyPartSettingsPopUp
@@ -140,7 +142,7 @@
 
 # from simba.unsupervised.unsupervised_ui import UnsupervisedGUI
 
-sys.setrecursionlimit(10**6)
+sys.setrecursionlimit(10 ** 6)
 currentPlatform = platform.system()
 
 
@@ -597,6 +599,14 @@ def activate(box, *args):
                 config_path=self.config_path
             ),
         )
+        append_body_part_directionality_features = Button(
+            roi_feature_frm,
+            text="APPEND BODY PART DIRECTIONALITY DATA TO FEATURES (CAUTION)",
+            fg="green",
+            command=lambda: AppendBodyPartDirectionalityFeaturesPopUp(
+                config_path=self.config_path
+            ),
+        )
         # remove_roi_features_from_feature_set = Button(
         #     roi_feature_frm,
         #     text="REMOVE ROI FEATURES FROM FEATURE SET",
@@ -831,7 +841,26 @@ def activate(box, *args):
                 )
             ).start(),
         )
-
+        label_run_model_on_all = label_model_validation = CreateLabelFrameWithIcon(
+            parent=tab9,
+            header="Run model on all the data",
+            icon_name=Keys.DOCUMENTATION.value,
+            icon_link=Links.OUT_OF_SAMPLE_VALIDATION.value,
+        )
+        button_run_model_on_all = Button(
+            label_run_model_on_all,
+            text="RUN",
+            fg="red",
+            command=lambda: self.validate_model_first_step(),
+        )
+        button_create_video_for_all = Button(
+            label_run_model_on_all,
+            text="CREATE VALIDATION VIDEOS",
+            fg="blue",
+            command=lambda: ValidationVideoPopUp(
+                config_path=config_path, simba_main_frm=self, run_on_all=True
+            ),
+        )
         label_model_validation = CreateLabelFrameWithIcon(
             parent=tab9,
             header="VALIDATE MODEL ON SINGLE VIDEO",
@@ -875,7 +904,7 @@ def activate(box, *args):
             text="CREATE VALIDATION VIDEO",
             fg="blue",
             command=lambda: ValidationVideoPopUp(
-                config_path=config_path, simba_main_frm=self
+                config_path=config_path, simba_main_frm=self,run_on_all=False
             ),
         )
 
@@ -1137,14 +1166,15 @@ def activate(box, *args):
         button_skipOC.grid(row=2, sticky=W, pady=5)
 
         label_extractfeatures.grid(row=0, column=0, sticky=NW)
-        button_extractfeatures.grid(row=0, column=0, sticky=NW)
-        labelframe_usrdef.grid(row=1, column=0, sticky=NW, pady=5)
+        button_extractfeatures.grid(row=1, column=0, sticky=NW)
+        labelframe_usrdef.grid(row=0, column=0, sticky=NW, pady=15)
         userscript.grid(row=1, column=0, sticky=NW)
         self.scriptfile.grid(row=2, column=0, sticky=NW)
 
         roi_feature_frm.grid(row=1, column=0, sticky=NW)
         append_roi_features_by_animal.grid(row=0, column=0, sticky=NW)
         append_roi_features_by_body_part.grid(row=1, column=0, sticky=NW)
+        append_body_part_directionality_features.grid(row=2, column=0, sticky=NW)
         # remove_roi_features_from_feature_set.grid(row=2, column=0, sticky=NW)
 
         feature_tools_frm.grid(row=2, column=0, sticky=NW)
@@ -1185,15 +1215,17 @@ def activate(box, *args):
         button_trainmachinesettings.grid(row=0, column=0, sticky=NW, padx=5)
         button_trainmachinemodel.grid(row=1, column=0, sticky=NW, padx=5)
         button_train_multimodel.grid(row=2, column=0, sticky=NW, padx=5)
-
-        label_model_validation.grid(row=7, sticky=W, pady=5)
-        self.csvfile.grid(row=0, sticky=W)
-        self.modelfile.grid(row=1, sticky=W)
-        button_runvalidmodel.grid(row=2, sticky=W)
-        button_generateplot.grid(row=3, sticky=W)
-        self.dis_threshold.grid(row=4, sticky=W)
-        self.min_behaviorbout.grid(row=5, sticky=W)
-        button_validate_model.grid(row=6, sticky=W)
+        label_run_model_on_all.grid(row=0,sticky=W)
+        button_run_model_on_all.grid(row=0,column=0,sticky=W)
+        button_create_video_for_all.grid(row=0,column=1,sticky=W)
+        label_model_validation.grid(row=1, sticky=W, pady=5)
+        self.csvfile.grid(row=1, sticky=W)
+        self.modelfile.grid(row=2, sticky=W)
+        button_runvalidmodel.grid(row=3, sticky=W)
+        button_generateplot.grid(row=4, sticky=W)
+        self.dis_threshold.grid(row=5, sticky=W)
+        self.min_behaviorbout.grid(row=6, sticky=W)
+        button_validate_model.grid(row=7, sticky=W)
 
         label_runmachinemodel.grid(row=8, sticky=NW)
         button_run_rfmodelsettings.grid(row=0, sticky=NW)
@@ -1255,7 +1287,6 @@ def directing_other_animals_analysis(self):
     def directing_animal_to_bp_analysis(self):
         _ = DirectionAnimalToBodyPartSettingsPopUp(config_path=self.config_path)
 
-
     def directing_other_animals_visualizer(self):
         _ = DirectingOtherAnimalsVisualizerPopUp(config_path=self.config_path)
 

diff --git a/simba/data_processors/directing_animal_to_bodypart.py b/simba/data_processors/directing_animal_to_bodypart.py
@@ -146,7 +146,7 @@ def create_directionality_dfs(self):
             for animal_permutation, permutation_data in video_data.items():
                 for bp_name, bp_data in permutation_data.items():
                     directing_df = (
-                        bp_data#[bp_data["Directing_BOOL"] == 1]
+                        bp_data  # [bp_data["Directing_BOOL"] == 1]
                         .reset_index()
                         .rename(
                             columns={
@@ -167,14 +167,18 @@ def create_directionality_dfs(self):
 
     def read_directionality_dfs(self):
         results = {}
+        body_parts_directionality = []
         for file_cnt, file_path in enumerate(self.body_part_directionality_paths):
             video_timer = SimbaTimer(start=True)
-            _, file_name, _ = get_fn_ext(file_path)
-            results[file_name] = read_df(file_path, self.file_type)
+            dir_name, file_name, _ = get_fn_ext(file_path)
+            bp_name = os.path.basename(dir_name)
+            body_parts_directionality.append(bp_name)
+            key = file_name+"_"+bp_name
+            results[key] = read_df(file_path, self.file_type)
             video_timer.stop_timer()
             print(
                 "read body part directionality data completed for video {} ({}/{}, elapsed time: {}s)...".format(
-                    file_name,
+                    key,
                     str(file_cnt + 1),
                     str(len(self.outlier_corrected_paths)),
                     video_timer.elapsed_time_str,
@@ -183,7 +187,7 @@ def read_directionality_dfs(self):
         stdout_success(
             msg='reading body part directionality data completed'
         )
-        return results
+        return results,body_parts_directionality
 
     def save_directionality_dfs(self):
         """
@@ -194,14 +198,15 @@ def save_directionality_dfs(self):
         -------
         None
         """
-        if not os.path.exists(self.body_part_directionality_df_dir):
-            os.makedirs(self.body_part_directionality_df_dir)
+        output_dir = os.path.join(self.body_part_directionality_df_dir, self.bodypart_direction)
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
         for video_name, video_data in self.directionality_df_dict.items():
-            save_name = os.path.join(self.body_part_directionality_df_dir, video_name + ".csv")
+            save_name = os.path.join(output_dir, video_name + ".csv")
             video_data.to_csv(save_name)
             print(f"Detailed directional data saved for video {video_name}...")
         stdout_success(
-            msg=f"All detailed directional data saved in the {self.body_part_directionality_df_dir} directory"
+            msg=f"All detailed directional data saved in the {output_dir} directory"
 
         )
 
@@ -238,7 +243,7 @@ def summary_statistics(self):
             .set_index("Video")
         )
         self.save_path = os.path.join(
-            self.logs_path, "Body_part_directions_data_{}.csv".format(str(self.datetime))
+            self.logs_path, "Body_part_directions_data_{}_{}.csv".format(self.bodypart_direction,str( self.datetime))
         )
         self.summary_df.to_csv(self.save_path)
         self.timer.stop_timer()

diff --git a/simba/feature_extractors/feature_extractor_freezing.py b/simba/feature_extractors/feature_extractor_freezing.py
@@ -0,0 +1,155 @@
+__author__ = "Tzuk Polinsky"
+
+import os
+from itertools import product
+
+import numpy as np
+import pandas as pd
+
+from simba.mixins.config_reader import ConfigReader
+from simba.mixins.feature_extraction_mixin import FeatureExtractionMixin
+from simba.utils.checks import check_str
+from simba.utils.enums import Paths
+from simba.utils.printing import SimbaTimer, stdout_success
+from simba.utils.read_write import get_fn_ext, read_df, write_df
+
+
+class MiceFreezingFeatureExtractor(ConfigReader, FeatureExtractionMixin):
+    """
+    Generic featurizer of data within SimBA project using user-defined body-parts in the pose-estimation data.
+    Results are stored in the `project_folder/csv/features_extracted` directory of the SimBA project.
+
+    :parameter str config_path: path to SimBA project config file in Configparser format
+
+    .. note::
+       `Feature extraction tutorial <https://github.com/sgoldenlab/simba/blob/master/docs/tutorial.md#step-5-extract-features>`__.
+
+    Examples
+    ----------
+    >>> feature_extractor = MiceFreezingFeatureExtractor(config_path='MyProjectConfig')
+    >>> feature_extractor.run()
+    """
+
+    def __init__(self, config_path: str):
+        FeatureExtractionMixin.__init__(self, config_path=config_path)
+        ConfigReader.__init__(self, config_path=config_path)
+        print(
+            "Extracting features from {} file(s)...".format(str(len(self.files_found)))
+        )
+
+    # Function to calculate the direction vector
+    def angle_between_vectors(self, v1, v2):
+        unit_vector_1 = v1 / np.linalg.norm(v1)
+        unit_vector_2 = v2 / np.linalg.norm(v2)
+        dot_product = unit_vector_2.dot(unit_vector_1.T)
+        angle = np.arccos(dot_product)
+        return np.degrees(angle)
+
+    def calculate_direction_vector(self, from_point, to_point):
+        return np.array(to_point) - np.array(from_point)
+
+    def extract_features(self, input_file_path: str, window_size: int, video_center: [int, int], pixel_mm: float,
+                         directionality_data: pd.DataFrame):
+        print("Calculating freezing features ...")
+
+        input_data = pd.read_csv(input_file_path)
+        output_data = pd.DataFrame(
+            columns=["activity"])
+        columns_to_drop = [col for col in input_data.columns if ('bug' in col) or ("_p" in col)]
+        columns_to_drop.append("Unnamed: 0")
+        without_bug = input_data.drop(columns_to_drop, axis=1)
+
+        body_parts_diffs = without_bug.diff(axis=0)
+        time_point_diff = body_parts_diffs.sum(axis=1)
+        #second_time_point_diff = time_point_diff.diff()
+        rolling_windows = time_point_diff.rolling(window=window_size, min_periods=1).sum()
+        output_data["activity"] = rolling_windows.abs().fillna(500)
+        bug_cols = [colName for colName in input_data.columns if ("bug" in colName) and ("_p") not in colName]
+        center_cols = [colName for colName in without_bug.columns if ("center" in colName) and ("_p") not in colName]
+        #tails_cols = [colName for colName in without_bug.columns if ("tail" in colName) and ("_p") not in colName]
+        nose_cols = [colName for colName in without_bug.columns if ("nose" in colName) and ("_p") not in colName]
+        centers = without_bug[center_cols].to_numpy()
+        #tails = without_bug[tails_cols].to_numpy()
+        noses = without_bug[nose_cols].to_numpy()
+        bug = input_data[bug_cols].to_numpy()
+        distances_from_bug = np.linalg.norm(bug - noses,axis=1)
+        video_centers = np.array([video_center]*len(centers))
+        distances_from_center = np.linalg.norm(video_centers - noses,axis=1)
+        #body_size = np.insert(np.diff(np.linalg.norm(tails - noses,axis=1), axis=0),0,0)
+        output_data["distances_from_bug"] = pd.DataFrame(distances_from_bug).rolling(window=window_size, min_periods=1).mean().fillna(100).to_numpy()
+        output_data["distances_from_center"] = pd.DataFrame(distances_from_center).rolling(window=window_size, min_periods=1).mean().fillna(100).to_numpy()
+        #output_data["body_size"] = pd.DataFrame(body_size).rolling(window=window_size, min_periods=1).sum().abs().fillna(100).to_numpy()
+        angles = []
+        for i, center in enumerate(centers):
+            nose = noses[i]
+            vector_fixed_to_center = self.calculate_direction_vector(video_center, center)
+            vector_center_to_nose = self.calculate_direction_vector(center, nose)
+            angles.append(self.angle_between_vectors(vector_center_to_nose, vector_fixed_to_center))
+        # output_data["nose_direction"] = angles
+        angles_df = pd.DataFrame(angles)
+        angles_diff = angles_df.diff()
+        angles_diff_sum = angles_diff.rolling(window=window_size, min_periods=1).sum()
+        output_data["nose_direction_sum_of_diffs"] = angles_diff_sum.abs().fillna(0)
+        # output_data["nose_direction_avg"] = angles_df.rolling(window=window_size, min_periods=1).mean().fillna(0)
+        directionality_rolling = directionality_data.rolling(window=window_size, min_periods=1)
+        output_data["amount_of_looking_at_bug"] = directionality_rolling.sum().fillna(0)
+        onsets = [-1] * len(output_data["amount_of_looking_at_bug"])
+        for j, rol in enumerate(directionality_rolling):
+            for i, r in enumerate(rol):
+                if r:
+                    onsets[j] = i
+                    break
+        output_data["looking_at_bug_onset"] = onsets
+        return output_data
+
+    def run(self):
+        """
+        Method to compute and save features to disk. Results are saved in the `project_folder/csv/features_extracted`
+        directory of the SimBA project.
+
+        Returns
+        -------
+        None
+        """
+        self.roi_coordinates_path = os.path.join(
+            self.logs_path, Paths.ROI_DEFINITIONS.value
+        )
+        polygons = pd.read_hdf(self.roi_coordinates_path, key="polygons")
+        directionality_dir_path = os.path.join(self.body_part_directionality_df_dir, "bug")
+        for file_cnt, file_path in enumerate(self.files_found):
+            video_timer = SimbaTimer(start=True)
+            print(
+                "Extracting features for video {}/{}...".format(
+                    str(file_cnt + 1), str(len(self.files_found))
+                )
+            )
+            _, file_name, _ = get_fn_ext(file_path)
+            current_polygon = polygons[polygons["Video"] == file_name]
+            directionality_data_path = os.path.join(directionality_dir_path, file_name + ".csv")
+            directionality_data = pd.read_csv(directionality_data_path)["Directing_BOOL"]
+            check_str("file name", file_name)
+            video_settings, self.px_per_mm, fps = self.read_video_info(
+                video_name=file_name
+            )
+            self.data_df = self.extract_features(file_path, 25, (
+                current_polygon["Center_X"].values[0], current_polygon["Center_Y"].values[0]),
+                                                 video_settings["pixels/mm"].values[0], directionality_data)
+            save_path = os.path.join(self.save_dir, file_name + "." + self.file_type)
+            self.data_df = self.data_df.reset_index(drop=True).fillna(0)
+            write_df(df=self.data_df, file_type=self.file_type, save_path=save_path)
+            video_timer.stop_timer()
+            print(
+                f"Feature extraction complete for video {file_name} (elapsed time: {video_timer.elapsed_time_str}s)"
+            )
+            print(
+                f"Feature extraction file for video {file_name} saved to {save_path})"
+            )
+
+        self.timer.stop_timer()
+        stdout_success(
+            f"Feature extraction complete for {str(len(self.files_found))} video(s). Results are saved inside the project_folder/csv/features_extracted directory",
+            elapsed_time=self.timer.elapsed_time_str,
+        )
+
+# test = UserDefinedFeatureExtractor(config_path='/Users/simon/Desktop/envs/troubleshooting/two_black_animals_14bp/project_folder/project_config.ini')
+# test.run()
diff --git a/simba/feature_extractors/feature_subsets.py b/simba/feature_extractors/feature_subsets.py
@@ -456,7 +456,7 @@ def append_to_data(self):
                     self.features_extracted_temp_path + f"/*.{self.file_type}"
                 )
                 self.data_df = self.read_all_files_in_folder_mp_futures(
-                    file_paths=file_paths, file_type=self.file_type
+                    annotations_file_paths=file_paths, file_type=self.file_type
                 )
                 self.check_raw_dataset_integrity(
                     df=self.data_df, logs_path=self.logs_path
@@ -469,7 +469,7 @@ def append_to_data(self):
                     self.targets_inserted_temp_path + f"/*.{self.file_type}"
                 )
                 self.data_df = self.read_all_files_in_folder_mp_futures(
-                    file_paths=file_paths, file_type=self.file_type
+                    annotations_file_paths=file_paths, file_type=self.file_type
                 )
                 self.check_raw_dataset_integrity(
                     df=self.data_df, logs_path=self.logs_path