diff --git a/docs/_static/img/spatial_density.webp b/docs/_static/img/spatial_density.webp new file mode 100644 index 000000000..1b1d2c534 Binary files /dev/null and b/docs/_static/img/spatial_density.webp differ diff --git a/docs/nb/CLI Example 1.ipynb b/docs/nb/CLI Example 1.ipynb index 50f0085b5..dd2dc9b56 100644 --- a/docs/nb/CLI Example 1.ipynb +++ b/docs/nb/CLI Example 1.ipynb @@ -438,9 +438,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python_3.6", + "display_name": "simba", "language": "python", - "name": "python_3.6" + "name": "simba" }, "language_info": { "codemirror_mode": { diff --git a/docs/nb/outlier_correction.ipynb b/docs/nb/outlier_correction.ipynb index 41778cf28..d56064eca 100644 --- a/docs/nb/outlier_correction.ipynb +++ b/docs/nb/outlier_correction.ipynb @@ -19,20 +19,20 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 1, "id": "cb913766", "metadata": {}, "outputs": [], "source": [ "from simba.outlier_tools.outlier_corrector_movement import OutlierCorrecterMovement\n", "from simba.outlier_tools.outlier_corrector_location import OutlierCorrecterLocation\n", - "from simba.utils.cli import set_outlier_correction_criteria_cli\n", - "from simba.pose_importers.dlc_importer_csv import import_multiple_dlc_tracking_csv_file" + "from simba.utils.cli.cli_tools import set_outlier_correction_criteria_cli\n", + "from simba.pose_importers.dlc_importer_csv import import_dlc_csv_data" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 2, "id": "f7cc63fa", "metadata": {}, "outputs": [], @@ -41,16 +41,15 @@ "# DATA, AND (III) THE ATTRIBUTES OF OUR NEW VIDEOS (FPS ETC.)\n", "\n", "## Define the path to our SimBA project config ini\n", - "CONFIG_PATH = '/Users/simon/Desktop/envs/troubleshooting/notebook_example/project_folder/project_config.ini'\n", + "CONFIG_PATH = r\"C:\\troubleshooting\\two_black_animals_14bp\\project_folder\\project_config.ini\"\n", "\n", "## Define the path to the directory holding our new DLC CSV pose-estimation data\n", - "DATA_DIR = '/Users/simon/Desktop/envs/troubleshooting/notebook_example/data'\n", + "DATA_DIR = r\"C:\\troubleshooting\\two_black_animals_14bp\\dlc_data\"\n", "\n", "## Define if / how you want to interpolate missing pose-estimation data,\n", "## and if/how you want to smooth the new pose estimation data: here we do neither.\n", - "INTERPOLATION_SETTING = 'None' # OPTIONS: 'None', Animal(s): Nearest', 'Animal(s): Linear', 'Animal(s): Quadratic','Body-parts: Nearest', 'Body-parts: Linear', 'Body-parts: Quadratic'\n", - "SMOOTHING_SETTING = None # OPTIONS: 'Gaussian', 'Savitzky Golay'\n", - "SMOOTHING_TIME = None # TIME IN MILLISECOND\n", + "INTERPOLATION_SETTING = None # OPTIONS: 'None', Animal(s): Nearest', 'Animal(s): Linear', 'Animal(s): Quadratic','Body-parts: Nearest', 'Body-parts: Linear', 'Body-parts: Quadratic'\n", + "SMOOTHING_SETTING = None # OPTIONS: {'time_window': 500, 'method': 'savitzky-golay'}, {'time_window': 500, 'method': 'gaussian'}\n", "\n", "## Define the fps and the pixels per millimeter of the incoming data: has to be the same for all new videos.\n", "## if you have varying fps / px per millimeter / resolutions, then use gui (2023/05)\n", @@ -71,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, "id": "c3d8767e", "metadata": {}, "outputs": [ @@ -79,29 +78,41 @@ "name": "stdout", "output_type": "stream", "text": [ - "Importing Aqu_FFJ_Cre_721 to SimBA project...\n", - "Pose-estimation data for video Aqu_FFJ_Cre_721 imported to SimBA project (elapsed time: 0.1718s)...\n", - "Importing Aqu_FFJ_Cre_723 to SimBA project...\n", - "Pose-estimation data for video Aqu_FFJ_Cre_723 imported to SimBA project (elapsed time: 0.1681s)...\n", - "Importing Aqu_FFJ_Cre_722 to SimBA project...\n", - "Pose-estimation data for video Aqu_FFJ_Cre_722 imported to SimBA project (elapsed time: 0.1617s)...\n", - "SIMBA COMPLETE: Imported 3 pose estimation file(s) (elapsed time: 0.5078s) \tcomplete\n" + "Importing Test_1 to SimBA project...\n", + "Pose-estimation data for video Test_1 imported to SimBA project (elapsed time: 0.0546s)...\n", + "Importing Test_2 to SimBA project...\n", + "Pose-estimation data for video Test_2 imported to SimBA project (elapsed time: 0.052s)...\n", + "Importing Test_3 to SimBA project...\n", + "Pose-estimation data for video Test_3 imported to SimBA project (elapsed time: 0.0463s)...\n", + "Importing Test_4 to SimBA project...\n", + "Pose-estimation data for video Test_4 imported to SimBA project (elapsed time: 0.0431s)...\n", + "Importing Test_5 to SimBA project...\n", + "Pose-estimation data for video Test_5 imported to SimBA project (elapsed time: 0.0482s)...\n", + "Importing Test_6 to SimBA project...\n", + "Pose-estimation data for video Test_6 imported to SimBA project (elapsed time: 0.0583s)...\n", + "Importing Test_7 to SimBA project...\n", + "Pose-estimation data for video Test_7 imported to SimBA project (elapsed time: 0.0503s)...\n", + "Importing Test_8 to SimBA project...\n", + "Pose-estimation data for video Test_8 imported to SimBA project (elapsed time: 0.05s)...\n", + "Importing Test_9 to SimBA project...\n", + "Pose-estimation data for video Test_9 imported to SimBA project (elapsed time: 0.058s)...\n", + "SIMBA COMPLETE: Imported 9 pose estimation file(s) to directory (elapsed time: 0.4758s) \tcomplete\n" ] } ], "source": [ "# WE RUN THE DATA IMPORTER FOR OUR DIRECTORY OF FILES\n", "## This imports your DLC files in the ``DATA_DIR`` according to the smoothing / interpolation settings defined above\n", - "import_multiple_dlc_tracking_csv_file(config_path=CONFIG_PATH,\n", - " interpolation_setting=INTERPOLATION_SETTING,\n", - " smoothing_setting=SMOOTHING_SETTING,\n", - " smoothing_time=SMOOTHING_TIME,\n", - " data_dir=DATA_DIR)" + "\n", + "import_dlc_csv_data(config_path=CONFIG_PATH,\n", + " interpolation_settings=INTERPOLATION_SETTING,\n", + " smoothing_settings=SMOOTHING_SETTING,\n", + " data_path=DATA_DIR)" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 5, "id": "38306f7e", "metadata": {}, "outputs": [ @@ -109,51 +120,136 @@ "name": "stdout", "output_type": "stream", "text": [ - "Processing video Aqu_FFJ_Cre_721. Video 1/3...\n", - "Corrected movement outliers for file Aqu_FFJ_Cre_721 (elapsed time: 0.2929s)...\n", - "Processing video Aqu_FFJ_Cre_723. Video 2/3...\n", - "Corrected movement outliers for file Aqu_FFJ_Cre_723 (elapsed time: 0.2713s)...\n", - "Processing video Aqu_FFJ_Cre_722. Video 3/3...\n", - "Corrected movement outliers for file Aqu_FFJ_Cre_722 (elapsed time: 0.2674s)...\n", - "SIMBA COMPLETE: Log for corrected \"movement outliers\" saved in project_folder/logs (elapsed time: 0.8572s) \tcomplete\n", - "Processing video Aqu_FFJ_Cre_721. Video 1/3..\n", - "Corrected location outliers for file Aqu_FFJ_Cre_721 (elapsed time: 49.6797s)...\n", - "Processing video Aqu_FFJ_Cre_723. Video 2/3..\n", - "Corrected location outliers for file Aqu_FFJ_Cre_723 (elapsed time: 24.645s)...\n", - "Processing video Aqu_FFJ_Cre_722. Video 3/3..\n", - "Corrected location outliers for file Aqu_FFJ_Cre_722 (elapsed time: 15.1142s)...\n", - "SIMBA COMPLETE: Log for corrected \"location outliers\" saved in project_folder/logs (elapsed time: 89.4743s) \tcomplete\n" + "SIMBA COMPLETE: Outlier parameters set (elapsed time: 0.003s) \tcomplete\n" ] } ], "source": [ - "#We set the outlier criteria in the project_config.ini and run the outlier correction. NOTE: You can also set this manually in the project_config.ini or thrugh\n", + "#We set the outlier criteria in the project_config.ini NOTE: You can also set this manually in the project_config.ini or thrugh\n", "# the SimBA GUI. If this has already been done, there is **no need** to call `set_outlier_correction_criteria_cli`.\n", "set_outlier_correction_criteria_cli(config_path=CONFIG_PATH,\n", " aggregation=AGGREGATION_METHOD,\n", " body_parts=BODY_PARTS,\n", " movement_criterion=MOVEMENT_CRITERION,\n", - " location_criterion=LOCATION_CRITERION)\n", - "\n", - "\n", + " location_criterion=LOCATION_CRITERION)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ff58e186", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing video Test_1. Video 1/9...\n", + "Corrected movement outliers for file Test_1 (elapsed time: 0.435s)...\n", + "Processing video Test_2. Video 2/9...\n", + "Corrected movement outliers for file Test_2 (elapsed time: 0.1166s)...\n", + "Processing video Test_3. Video 3/9...\n", + "Corrected movement outliers for file Test_3 (elapsed time: 0.1158s)...\n", + "Processing video Test_4. Video 4/9...\n", + "Corrected movement outliers for file Test_4 (elapsed time: 0.114s)...\n", + "Processing video Test_5. Video 5/9...\n", + "Corrected movement outliers for file Test_5 (elapsed time: 0.1183s)...\n", + "Processing video Test_6. Video 6/9...\n", + "Corrected movement outliers for file Test_6 (elapsed time: 0.1124s)...\n", + "Processing video Test_7. Video 7/9...\n", + "Corrected movement outliers for file Test_7 (elapsed time: 0.1151s)...\n", + "Processing video Test_8. Video 8/9...\n", + "Corrected movement outliers for file Test_8 (elapsed time: 0.117s)...\n", + "Processing video Test_9. Video 9/9...\n", + "Corrected movement outliers for file Test_9 (elapsed time: 0.1152s)...\n", + "SIMBA COMPLETE: Log for corrected \"movement outliers\" saved in C:\\troubleshooting\\two_black_animals_14bp\\project_folder\\logs (elapsed time: 1.3693s) \tcomplete\n", + "Processing video Test_1. Video 1/9..\n", + "Corrected location outliers for file Test_1 (elapsed time: 0.8654s)...\n", + "Processing video Test_2. Video 2/9..\n", + "Corrected location outliers for file Test_2 (elapsed time: 0.877s)...\n", + "Processing video Test_3. Video 3/9..\n", + "Corrected location outliers for file Test_3 (elapsed time: 0.8534s)...\n", + "Processing video Test_4. Video 4/9..\n", + "Corrected location outliers for file Test_4 (elapsed time: 0.8611s)...\n", + "Processing video Test_5. Video 5/9..\n", + "Corrected location outliers for file Test_5 (elapsed time: 0.8581s)...\n", + "Processing video Test_6. Video 6/9..\n", + "Corrected location outliers for file Test_6 (elapsed time: 0.8512s)...\n", + "Processing video Test_7. Video 7/9..\n", + "Corrected location outliers for file Test_7 (elapsed time: 0.8616s)...\n", + "Processing video Test_8. Video 8/9..\n", + "Corrected location outliers for file Test_8 (elapsed time: 0.8641s)...\n", + "Processing video Test_9. Video 9/9..\n", + "Corrected location outliers for file Test_9 (elapsed time: 0.8626s)...\n", + "SIMBA COMPLETE: Log for corrected \"location outliers\" saved in project_folder/logs (elapsed time: 7.8084s) \tcomplete\n" + ] + } + ], + "source": [ + "# Finally, we run the outlier correction (NOTE: SEE CELL BELOW FOR ALTERNATIVE WAY OF RUNNING OUTLIER CORRECTION ACROSS MULTIPLE CORES)\n", "_ = OutlierCorrecterMovement(config_path=CONFIG_PATH).run()\n", "_ = OutlierCorrecterLocation(config_path=CONFIG_PATH).run()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "47975a2a", "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Video Test_1 complete...\n", + "Video Test_2 complete...\n", + "Video Test_3 complete...\n", + "Video Test_4 complete...\n", + "Video Test_5 complete...\n", + "Video Test_6 complete...\n", + "Video Test_7 complete...\n", + "Video Test_8 complete...\n", + "Video Test_9 complete...\n", + "SIMBA COMPLETE: Log for corrected \"movement outliers\" saved in C:\\troubleshooting\\two_black_animals_14bp\\project_folder\\logs (elapsed time: 4.2945s) \tcomplete\n", + "Video Test_1 complete...\n", + "Video Test_2 complete...\n", + "Video Test_3 complete...\n", + "Video Test_4 complete...\n", + "Video Test_5 complete...\n", + "Video Test_6 complete...\n", + "Video Test_7 complete...\n", + "Video Test_8 complete...\n", + "Video Test_9 complete...\n", + "SIMBA COMPLETE: Log for corrected \"location outliers\" saved in project_folder/logs (elapsed time: 4.0219s) \tcomplete\n" + ] + } + ], + "source": [ + "# OPTIONAL: If you find that the outlier correction - as run in the immediate above cell - is slow, we could run outlier \n", + "# correction over multiple cores. If you choose this approach, make sure you are running the latest version of SimBA.\n", + "# You can update SimBA by running `pip install simba-uw-tf-dev --upgrade`\n", + "\n", + "from simba.outlier_tools.outlier_corrector_location_mp import OutlierCorrecterLocationMultiprocess\n", + "from simba.outlier_tools.outlier_corrector_movement_mp import OutlierCorrecterMovementMultiProcess\n", + "\n", + "_ = OutlierCorrecterMovementMultiProcess(config_path=CONFIG_PATH).run()\n", + "_ = OutlierCorrecterLocationMultiprocess(config_path=CONFIG_PATH).run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61fb78fa", + "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:simba_dev] *", + "display_name": "simba", "language": "python", - "name": "conda-env-simba_dev-py" + "name": "simba" }, "language_info": { "codemirror_mode": { diff --git a/setup.py b/setup.py index 56709a1af..131c0dad1 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ # Setup configuration setuptools.setup( name="Simba-UW-tf-dev", - version="2.2.8", + version="2.3.1", author="Simon Nilsson, Jia Jie Choong, Sophia Hwang", author_email="sronilsson@gmail.com", description="Toolkit for computer classification and analysis of behaviors in experimental animals", diff --git a/simba/mixins/config_reader.py b/simba/mixins/config_reader.py index 47ee64443..7e90618ff 100644 --- a/simba/mixins/config_reader.py +++ b/simba/mixins/config_reader.py @@ -152,15 +152,11 @@ def __init__( self.outlier_corrected_movement_dir + f"/*.{self.file_type}" ) self.cpu_cnt, self.cpu_to_use = find_core_cnt() - self.machine_results_paths = glob.glob( - self.machine_results_dir + f"/*.{self.file_type}" - ) + self.machine_results_paths = glob.glob(self.machine_results_dir + f"/*.{self.file_type}") self.logs_path = os.path.join(self.project_path, "logs") self.body_parts_path = os.path.join(self.project_path, Paths.BP_NAMES.value) check_file_exist_and_readable(file_path=self.body_parts_path) - self.body_parts_lst = ( - pd.read_csv(self.body_parts_path, header=None).iloc[:, 0].to_list() - ) + self.body_parts_lst = (pd.read_csv(self.body_parts_path, header=None).iloc[:, 0].to_list()) self.body_parts_lst = [x for x in self.body_parts_lst if str(x) != "nan"] self.get_body_part_names() self.get_bp_headers() diff --git a/simba/mixins/statistics_mixin.py b/simba/mixins/statistics_mixin.py index b5261a4a7..f6178bea9 100644 --- a/simba/mixins/statistics_mixin.py +++ b/simba/mixins/statistics_mixin.py @@ -4182,3 +4182,36 @@ def normalized_google_distance(x: np.ndarray, y: np.ndarray) -> float: return -1.0 else: return N / D + + def symmetry_index(x: np.ndarray, y: np.ndarray, agg_type: Literal['mean', 'median'] = 'mean') -> float: + + """ + Calculate the Symmetry Index (SI) between two arrays of measurements, `x` and `y`, over a given time series. + The Symmetry Index quantifies the relative difference between two measurements at each time point, expressed as a percentage. + The function returns either the mean or median Symmetry Index over the entire series, based on the specified aggregation type. + + Zero indicates perfect symmetry. Positive values pepresent increasing asymmetry between the two measurements. + + :param np.ndarray x: A 1-dimensional array of measurements from one side (e.g., left side), representing a time series or sequence of measurements. + :param np.ndarray y: A 1-dimensional array of measurements from the other side (e.g., right side), of the same length as `x`. + :param Literal['mean', 'median'] agg_type: The aggregation method used to summarize the Symmetry Index across all time points. + :return: The aggregated Symmetry Index over the series, either as the mean or median SI. + :rtype: float + + :example: + >>> x = np.random.randint(0, 155, (100,)) + >>>y = np.random.randint(0, 155, (100,)) + >>> symmetry_index(x=x, y=y) + """ + + check_valid_array(data=x, source=f'{Statistics.symmetry_index.__name__} x', accepted_ndims=(1,), min_axis_0=1, + accepted_dtypes=Formats.NUMERIC_DTYPES.value) + check_valid_array(data=x, source=f'{Statistics.symmetry_index.__name__} y', accepted_ndims=(1,), min_axis_0=1, + accepted_axis_0_shape=[x.shape[0]], accepted_dtypes=Formats.NUMERIC_DTYPES.value) + check_str(name=f'{Statistics.symmetry_index.__name__} agg_type', value=agg_type, options=('mean', 'median')) + si_values = np.abs(x - y) / (0.5 * (x + y)) * 100 + if agg_type == 'mean': + return np.float32(np.nanmean(si_values)) + else: + return np.float32(np.nanmedian(si_values)) + diff --git a/simba/outlier_tools/outlier_corrector_location.py b/simba/outlier_tools/outlier_corrector_location.py index 5fc2a8b52..b09d30ab2 100644 --- a/simba/outlier_tools/outlier_corrector_location.py +++ b/simba/outlier_tools/outlier_corrector_location.py @@ -30,8 +30,7 @@ class OutlierCorrecterLocation(ConfigReader): :param Union[str, os.PathLike] config_path: path to SimBA project config file in Configparser format - Examples - ---------- + :example: >>> _ = OutlierCorrecterLocation(config_path='MyProjectConfig').run() """ @@ -54,18 +53,8 @@ def __init__(self, self.outlier_bp_dict = {} for animal_name in self.animal_bp_dict.keys(): self.outlier_bp_dict[animal_name] = {} - self.outlier_bp_dict[animal_name]["bp_1"] = read_config_entry( - self.config, - ConfigKey.OUTLIER_SETTINGS.value, - "location_bodypart1_{}".format(animal_name.lower()), - "str", - ) - self.outlier_bp_dict[animal_name]["bp_2"] = read_config_entry( - self.config, - ConfigKey.OUTLIER_SETTINGS.value, - "location_bodypart2_{}".format(animal_name.lower()), - "str", - ) + self.outlier_bp_dict[animal_name]["bp_1"] = read_config_entry(self.config, ConfigKey.OUTLIER_SETTINGS.value, "location_bodypart1_{}".format(animal_name.lower()),"str") + self.outlier_bp_dict[animal_name]["bp_2"] = read_config_entry(self.config, ConfigKey.OUTLIER_SETTINGS.value, "location_bodypart2_{}".format(animal_name.lower()), "str") def __find_location_outliers(self): for animal_name, animal_data in self.bp_dict.items(): diff --git a/simba/outlier_tools/outlier_corrector_location_mp.py b/simba/outlier_tools/outlier_corrector_location_mp.py new file mode 100644 index 000000000..55663cbe0 --- /dev/null +++ b/simba/outlier_tools/outlier_corrector_location_mp.py @@ -0,0 +1,187 @@ +__author__ = "Simon Nilsson" + + +import os +from typing import Union, Optional, Dict + +import numpy as np +import pandas as pd +import multiprocessing +import functools + +from simba.utils.checks import check_float, check_if_dir_exists, check_int +from simba.mixins.config_reader import ConfigReader +from simba.mixins.feature_extraction_mixin import FeatureExtractionMixin +from simba.utils.enums import ConfigKey, Dtypes, Defaults +from simba.utils.printing import SimbaTimer, stdout_success +from simba.utils.read_write import (get_fn_ext, read_config_entry, read_df, write_df, find_core_cnt, find_files_of_filetypes_in_directory) + +def _location_outlier_corrector(data_path: str, + config: FeatureExtractionMixin, + animal_bp_dict: dict, + outlier_dict: dict, + file_type: str, + save_dir: str, + criterion: float): + + def __find_location_outliers(bp_dict: dict, animal_criteria: dict): + above_criteria_dict, below_criteria_dict = {}, {} + for animal_name, animal_data in bp_dict.items(): + animal_criterion = animal_criteria[animal_name] + above_criteria_dict[animal_name]= {} + for first_bp_cnt, (first_body_part_name, first_bp_cords) in enumerate(animal_data.items()): + second_bp_names = [x for x in list(animal_data.keys()) if x != first_body_part_name] + above_criterion_frms = [] + for second_bp_cnt, second_bp in enumerate(second_bp_names): + second_bp_cords = animal_data[second_bp] + distances = config.framewise_euclidean_distance(location_1=first_bp_cords, location_2=second_bp_cords, px_per_mm=1.0, centimeter=False) + above_criterion_frms.extend(np.argwhere(distances > animal_criterion).flatten()) + unique, counts = np.unique(above_criterion_frms, return_counts=True) + above_criteria_dict[animal_name][first_body_part_name] = np.sort(unique[counts > 1]) + return above_criteria_dict + + def __correct_outliers(df: pd.DataFrame, above_criteria_dict: dict): + for animal_name, animal_data in above_criteria_dict.items(): + for body_part_name, frm_idx in animal_data.items(): + col_names = [f'{body_part_name}_x', f'{body_part_name}_y'] + if len(frm_idx) > 0: + df.loc[frm_idx, col_names] = np.nan + return df.fillna(method='ffill', axis=1).fillna(0) + + video_timer = SimbaTimer(start=True) + _, video_name, _ = get_fn_ext(data_path) + print(f"Processing video {video_name}..") + save_path = os.path.join(save_dir, f"{video_name}.{file_type}") + above_criterion_dict, below_criterion_dict, animal_criteria, bp_dict = {}, {}, {}, {} + df = read_df(data_path, file_type) + for animal_name, animal_bps in outlier_dict.items(): + animal_bp_distances = np.sqrt((df[animal_bps["bp_1"] + "_x"] - df[animal_bps["bp_2"] + "_x"]) ** 2 + (df[animal_bps["bp_1"] + "_y"] - df[animal_bps["bp_2"] + "_y"]) ** 2) + animal_criteria[animal_name] = (animal_bp_distances.mean() * criterion) + for animal_name, animal_bps in animal_bp_dict.items(): + bp_col_names = np.array([[i, j] for i, j in zip(animal_bps["X_bps"], animal_bps["Y_bps"])]).ravel() + animal_arr = df[bp_col_names].to_numpy() + bp_dict[animal_name] = {} + for bp_cnt, bp_col_start in enumerate(range(0, animal_arr.shape[1], 2)): + bp_name = animal_bps["X_bps"][bp_cnt][:-2] + bp_dict[animal_name][bp_name] = animal_arr[:, bp_col_start: bp_col_start + 2] + + above_criteria_dict = __find_location_outliers(bp_dict=bp_dict, animal_criteria=animal_criteria) + df = __correct_outliers(df=df, above_criteria_dict=above_criteria_dict) + write_df(df=df, file_type=file_type, save_path=save_path) + video_timer.stop_timer() + print(f"Corrected location outliers for file {video_name} (elapsed time: {video_timer.elapsed_time_str}s)...") + return video_name, above_criteria_dict, len(df) + +class OutlierCorrecterLocationMultiprocess(ConfigReader, FeatureExtractionMixin): + """ + Detect and amend outliers in pose-estimation data based in the location of the body-parts + in the current frame relative to the location of the body-part in the preceding frame using heuristic rules. + + Uses heuristic rules critera is grabbed from the SimBA project project_config.ini under the [Outlier settings] header. + + .. note:: + `Documentation `_. + + .. image:: _static/img/location_outlier.png + :width: 500 + :align: center + + :param Union[str, os.PathLike] config_path: path to SimBA project config file in Configparser format + :param Optional[Union[str, os.PathLike]] data_dir: The directory storing the input data. If None, then the ``outlier_corrected_movement`` directory of the SimBA project. + :param Optional[Union[str, os.PathLike]] save_dir: The directory to store the results. If None, then the ``outlier_corrected_movement_location`` directory of the SimBA project. + :param Optional[int] core_cnt: The number of cores to use. If -1, then all available cores. Default: -1. + :param Optional[Dict[str, Dict[str, str]]] animal_dict: Dictionary holding the animal names, and the two body-parts to use to measure the mean or median size of the animals. If None, grabs the info from the SimBA project config. + :param Optional[float] criterion: The criterion multiplier. If None, grabs the info from the SimBA project config. + + :example: + >>> _ = OutlierCorrecterLocationMultiprocess(config_path='MyProjectConfig').run() + """ + + def __init__(self, + config_path: Union[str, os.PathLike], + data_dir: Optional[Union[str, os.PathLike]] = None, + save_dir: Optional[Union[str, os.PathLike]] = None, + core_cnt: Optional[int] = -1, + animal_dict: Optional[Dict[str, Dict[str, str]]] = None, + criterion: Optional[float] = None): + + ConfigReader.__init__(self, config_path=config_path, create_logger=False, read_video_info=False) + FeatureExtractionMixin.__init__(self) + if not os.path.exists(self.outlier_corrected_dir): + os.makedirs(self.outlier_corrected_dir) + if criterion is None: + self.criterion = read_config_entry(self.config, ConfigKey.OUTLIER_SETTINGS.value, ConfigKey.LOCATION_CRITERION.value, Dtypes.FLOAT.value) + else: + check_float(name=f'{criterion} criterion', value=criterion, min_value=10e-10) + self.criterion = criterion + if data_dir is not None: + check_if_dir_exists(in_dir=data_dir, source=self.__class__.__name__) + self.data_dir = data_dir + else: + self.data_dir = self.outlier_corrected_movement_dir + if save_dir is not None: + check_if_dir_exists(in_dir=save_dir, source=self.__class__.__name__) + self.save_dir = save_dir + else: + self.save_dir = self.outlier_corrected_dir + check_int(name=f'{self.__class__.__name__} core_cnt', value=core_cnt, min_value=-1, unaccepted_vals=[0]) + self.core_cnt = core_cnt + if self.core_cnt == -1: + self.core_cnt = find_core_cnt()[0] + + self.above_criterion_dict_dict, self.below_criterion_dict_dict = {},{} + if animal_dict is None: + self.outlier_bp_dict = {} + if self.animal_cnt == 1: + self.animal_id = read_config_entry(self.config, ConfigKey.MULTI_ANIMAL_ID_SETTING.value, ConfigKey.MULTI_ANIMAL_IDS.value, Dtypes.STR.value) + if self.animal_id != "None": + self.animal_bp_dict[self.animal_id] = self.animal_bp_dict.pop("Animal_1") + + for animal_name in self.animal_bp_dict.keys(): + self.outlier_bp_dict[animal_name] = {} + self.outlier_bp_dict[animal_name]["bp_1"] = read_config_entry(self.config, ConfigKey.OUTLIER_SETTINGS.value, "location_bodypart1_{}".format(animal_name.lower()),"str") + self.outlier_bp_dict[animal_name]["bp_2"] = read_config_entry(self.config, ConfigKey.OUTLIER_SETTINGS.value, "location_bodypart2_{}".format(animal_name.lower()),"str") + else: + self.outlier_bp_dict = animal_dict + + def run(self): + self.logs, self.frm_cnts = {}, {} + data_paths = find_files_of_filetypes_in_directory(directory=self.data_dir, extensions=[f'.{self.file_type}'], raise_error=True) + data_path_tuples = [(x) for x in data_paths] + with multiprocessing.Pool(self.core_cnt, maxtasksperchild=Defaults.MAXIMUM_MAX_TASK_PER_CHILD.value) as pool: + constants = functools.partial(_location_outlier_corrector, + config=self, + animal_bp_dict=self.animal_bp_dict, + outlier_dict=self.outlier_bp_dict, + save_dir=self.save_dir, + file_type=self.file_type, + criterion=self.criterion) + for cnt, (video_name, above_critera_dict, frm_cnt) in enumerate(pool.imap(constants, data_path_tuples, chunksize=1)): + self.frm_cnts[video_name] = frm_cnt + self.logs[video_name] = above_critera_dict + print(f"Video {video_name} complete...") + self.__save_log_file() + + def __save_log_file(self): + out_df = pd.DataFrame(columns=['VIDEO', 'ANIMAL', 'BODY-PART', 'CORRECTION COUNT', 'CORRECTION RATIO']) + for video_name, video_data in self.logs.items(): + for animal_name, animal_data in video_data.items(): + for bp_name, bp_data in animal_data.items(): + correction_ratio = round(len(bp_data) / self.frm_cnts[video_name], 6) + out_df.loc[len(out_df)] = [video_name, animal_name, bp_name, len(bp_data), correction_ratio] + self.logs_path = os.path.join(self.logs_path, f"Outliers_location_{self.datetime}.csv") + out_df.to_csv(self.logs_path) + self.timer.stop_timer() + stdout_success(msg='Log for corrected "location outliers" saved in project_folder/logs', elapsed_time=self.timer.elapsed_time_str) + +# if __name__ == "__main__": +# test = OutlierCorrecterLocationMultiprocess(config_path=r"C:\troubleshooting\two_black_animals_14bp\project_folder\project_config.ini") +# #test = OutlierCorrecterLocationMultiprocess(config_path=r"C:\troubleshooting\mitra\project_folder\project_config.ini") +# test.run() + + +# test = OutlierCorrecterLocation(config_path='/Users/simon/Desktop/envs/troubleshooting/naresh/project_folder/project_config.ini') +# test.run() + +# test = OutlierCorrecterLocation(config_path='/Users/simon/Desktop/envs/troubleshooting/two_black_animals_14bp/project_folder/project_config.ini') +# test.correct_location_outliers() diff --git a/simba/outlier_tools/outlier_corrector_movement.py b/simba/outlier_tools/outlier_corrector_movement.py index b413706da..9ba3107f2 100644 --- a/simba/outlier_tools/outlier_corrector_movement.py +++ b/simba/outlier_tools/outlier_corrector_movement.py @@ -70,34 +70,24 @@ def __init__(self, @staticmethod @jit(nopython=True) - def __corrector(data=np.ndarray, criterion=float): + def __corrector(data: np.ndarray, criterion: float): results, current_value, cnt = np.full(data.shape, np.nan), data[0, :], 0 for i in range(data.shape[0]): dist = abs(np.linalg.norm(current_value - data[i, :])) if dist <= criterion: current_value = data[i, :] + else: cnt += 1 results[i, :] = current_value return results, cnt def __outlier_replacer(self): for animal_name, animal_body_parts in self.animal_bp_dict.items(): - for bp_x_name, bp_y_name in zip( - animal_body_parts["X_bps"], animal_body_parts["Y_bps"] - ): - vals, cnt = self.__corrector( - data=self.data_df[[bp_x_name, bp_y_name]].values, - criterion=self.animal_criteria[animal_name], - ) + for bp_x_name, bp_y_name in zip(animal_body_parts["X_bps"], animal_body_parts["Y_bps"]): + vals, cnt = self.__corrector(data=self.data_df[[bp_x_name, bp_y_name]].values,criterion=self.animal_criteria[animal_name]) df = pd.DataFrame(vals, columns=[bp_x_name, bp_y_name]) self.data_df.update(df) - self.log.loc[len(self.log)] = [ - self.video_name, - animal_name, - bp_x_name[:-2], - cnt, - round(cnt / len(df), 6), - ] + self.log.loc[len(self.log)] = [self.video_name, animal_name, bp_x_name[:-2], cnt, round(cnt / len(df), 6)] def run(self): """ diff --git a/simba/outlier_tools/outlier_corrector_movement_mp.py b/simba/outlier_tools/outlier_corrector_movement_mp.py new file mode 100644 index 000000000..acda61eaa --- /dev/null +++ b/simba/outlier_tools/outlier_corrector_movement_mp.py @@ -0,0 +1,178 @@ +__author__ = "Simon Nilsson" + +import os +from typing import Union, Optional, Dict + +import numpy as np +import pandas as pd +from numba import jit +import multiprocessing +import functools + +from simba.mixins.config_reader import ConfigReader +from simba.mixins.feature_extraction_mixin import FeatureExtractionMixin +from simba.utils.enums import ConfigKey, Dtypes +from simba.utils.printing import SimbaTimer, stdout_success +from simba.utils.read_write import (get_fn_ext, read_config_entry, read_df, write_df, find_core_cnt, find_files_of_filetypes_in_directory) +from simba.utils.checks import check_int, check_float, check_if_dir_exists + + +def _movement_outlier_corrector(data_path: str, + config: ConfigReader, + animal_bp_dict: dict, + outlier_dict: dict, + file_type: str, + save_dir: str, + criterion: float): + + + @jit(nopython=True) + def _corrector(data: np.ndarray, criterion: float): + results, current_value, cnt = np.full(data.shape, np.nan), data[0, :], 0 + for i in range(data.shape[0]): + dist = abs(np.linalg.norm(current_value - data[i, :])) + if dist <= criterion: + current_value = data[i, :] + else: + cnt += 1 + results[i, :] = current_value + return results, cnt + + + def _outlier_replacer(data_df: pd.DataFrame, + animal_criteria: dict, + video_name: str): + + log = pd.DataFrame(columns=["VIDEO", "ANIMAL", "BODY-PART", "CORRECTION COUNT", "CORRECTION PCT"]) + for animal_name, animal_body_parts in animal_bp_dict.items(): + for bp_x_name, bp_y_name in zip(animal_body_parts["X_bps"], animal_body_parts["Y_bps"]): + vals, cnt = _corrector(data=data_df[[bp_x_name, bp_y_name]].values, criterion=animal_criteria[animal_name]) + df = pd.DataFrame(vals, columns=[bp_x_name, bp_y_name]) + data_df.update(df) + log.loc[len(log)] = [video_name, animal_name, bp_x_name[:-2], cnt, round(cnt / len(df), 6)] + + return data_df, log + + + video_timer = SimbaTimer(start=True) + _, video_name, _ = get_fn_ext(filepath=data_path) + save_path = os.path.join(save_dir, f"{video_name}.{file_type}") + df = read_df(data_path, file_type, check_multiindex=True) + df = config.insert_column_headers_for_outlier_correction(data_df=df, new_headers=config.bp_headers, filepath=data_path) + animal_criteria = {} + for animal_name, animal_bps in outlier_dict.items(): + animal_bp_distances = np.sqrt((df[animal_bps["bp_1"] + "_x"] - df[animal_bps["bp_2"] + "_x"]) ** 2 + (df[animal_bps["bp_1"] + "_y"] - df[animal_bps["bp_2"] + "_y"]) ** 2) + animal_criteria[animal_name] = (animal_bp_distances.mean() * criterion) + df, log = _outlier_replacer(animal_criteria=animal_criteria, data_df=df, video_name=video_name) + write_df(df=df, file_type=file_type, save_path=save_path) + video_timer.stop_timer() + print(f"Corrected movement outliers for file {video_name} (elapsed time: {video_timer.elapsed_time_str}s)...") + + return video_name, log + + + +class OutlierCorrecterMovementMultiProcess(ConfigReader, FeatureExtractionMixin): + """ + Detect and ammend outliers in pose-estimation data based on movement lenghth (Euclidean) of the body-parts + in the current frame from preceeding frame. If not passed, then uses critera stored in the SimBA project project_config.ini + under the [Outlier settings] headed. Uses multiprocessing. + + :param Union[str, os.PathLike] config_path: path to SimBA project config file in Configparser format + :param Optional[Union[str, os.PathLike]] data_dir: The directory storing the input data. If None, then the ``input_csv`` directory of the SimBA project. + :param Optional[Union[str, os.PathLike]] save_dir: The directory to store the results. If None, then the ``outlier_corrected_movement`` directory of the SimBA project. + :param Optional[int] core_cnt: The number of cores to use. If -1, then all available cores. Default: -1. + :param Optional[Dict[str, Dict[str, str]]] animal_dict: Dictionary holding the animal names, and the two body-parts to use to measure the mean or median size of the animals. If None, grabs the info from the SimBA project config. + :param Optional[float] criterion: The criterion multiplier. If None, grabs the info from the SimBA project config. + + .. image:: _static/img/movement_outlier.png + :width: 500 + :align: center + + .. note:: + `Outlier correction documentation `__. + + :example: + >>> outlier_correcter_movement = OutlierCorrecterMovementMultiProcess(config_path='MyProjectConfig') + >>> outlier_correcter_movement.run() + """ + + def __init__(self, + config_path: Union[str, os.PathLike], + data_dir: Optional[Union[str, os.PathLike]] = None, + save_dir: Optional[Union[str, os.PathLike]] = None, + core_cnt: Optional[int] = -1, + animal_dict: Optional[Dict[str, Dict[str, str]]] = None, + criterion: Optional[float] = None): + + ConfigReader.__init__(self, config_path=config_path, create_logger=False) + FeatureExtractionMixin.__init__(self) + if not os.path.exists(self.outlier_corrected_movement_dir): + os.makedirs(self.outlier_corrected_movement_dir) + if criterion is None: + self.criterion = read_config_entry(self.config, ConfigKey.OUTLIER_SETTINGS.value, ConfigKey.MOVEMENT_CRITERION.value, Dtypes.FLOAT.value) + else: + check_float(name=f'{criterion} criterion', value=criterion, min_value=10e-10) + self.criterion = criterion + if data_dir is not None: + check_if_dir_exists(in_dir=data_dir, source=self.__class__.__name__) + self.data_dir = data_dir + else: + self.data_dir = self.input_csv_dir + + if save_dir is not None: + check_if_dir_exists(in_dir=save_dir, source=self.__class__.__name__) + self.save_dir = save_dir + else: + self.save_dir = self.outlier_corrected_movement_dir + + check_int(name=f'{self.__class__.__name__} core_cnt', value=core_cnt, min_value=-1, unaccepted_vals=[0]) + self.core_cnt = core_cnt + if self.core_cnt == -1: + self.core_cnt = find_core_cnt()[0] + + self.outlier_bp_dict, self.above_criterion_dict_dict = {}, {} + if animal_dict is None: + if self.animal_cnt == 1: + self.animal_id = read_config_entry(self.config, ConfigKey.MULTI_ANIMAL_ID_SETTING.value, ConfigKey.MULTI_ANIMAL_IDS.value, Dtypes.STR.value) + if self.animal_id != "None": + self.animal_bp_dict[self.animal_id] = self.animal_bp_dict.pop("Animal_1") + + for animal_name in self.animal_bp_dict.keys(): + self.outlier_bp_dict[animal_name] = {} + self.outlier_bp_dict[animal_name]["bp_1"] = read_config_entry(self.config,"Outlier settings", "movement_bodypart1_{}".format(animal_name.lower()),"str") + self.outlier_bp_dict[animal_name]["bp_2"] = read_config_entry(self.config,"Outlier settings", "movement_bodypart2_{}".format(animal_name.lower()),"str") + else: + self.outlier_bp_dict = animal_dict + + def run(self): + self.logs = [] + data_paths = find_files_of_filetypes_in_directory(directory=self.data_dir, extensions=[f'.{self.file_type}'], raise_error=True) + data_path_tuples = [(x) for x in data_paths] + with multiprocessing.Pool(self.core_cnt, maxtasksperchild=self.maxtasksperchild) as pool: + constants = functools.partial(_movement_outlier_corrector, + config=self, + animal_bp_dict=self.animal_bp_dict, + outlier_dict=self.outlier_bp_dict, + save_dir=self.save_dir, + file_type=self.file_type, + criterion=self.criterion) + for cnt, (video_name, log) in enumerate(pool.imap(constants, data_path_tuples, chunksize=1)): + print(f"Video {video_name} complete...") + self.logs.append(log) + + self.__save_log_file() + + def __save_log_file(self): + log_fn = os.path.join(self.logs_path, f"Outliers_movement_{self.datetime}.csv") + self.logs = pd.concat(self.logs, axis=0) + self.logs.to_csv(log_fn) + self.timer.stop_timer() + stdout_success(msg=f'Log for corrected "movement outliers" saved in {self.logs_path}', elapsed_time=self.timer.elapsed_time_str) + +# +# if __name__ == "__main__": +# #test = OutlierCorrecterMovementMultiProcess(config_path=r"C:\troubleshooting\two_black_animals_14bp\project_folder\project_config.ini") +# test = OutlierCorrecterMovementMultiProcess(config_path=r"C:\troubleshooting\mitra\project_folder\project_config.ini") +# test.run() +# diff --git a/simba/utils/checks.py b/simba/utils/checks.py index d2b5ed234..13eccb469 100644 --- a/simba/utils/checks.py +++ b/simba/utils/checks.py @@ -850,12 +850,12 @@ def check_valid_array(data: np.ndarray, ) if accepted_axis_0_shape is not None: - if data.ndim != 2: - raise ArrayError( - msg=f"Array not of acceptable dimension. Found {data.ndim}, accepted: 2, {source}", - source=check_valid_array.__name__, - ) - elif data.shape[0] not in accepted_axis_0_shape: + # if data.ndim != 2: + # raise ArrayError( + # msg=f"Array not of acceptable dimension. Found {data.ndim}, accepted: 2, {source}", + # source=check_valid_array.__name__, + # ) + if data.shape[0] not in accepted_axis_0_shape: raise ArrayError( msg=f"Array not of acceptable shape. Found {data.shape[0]} rows, accepted: {accepted_axis_0_shape}, {source}", source=check_valid_array.__name__,