uml

sgoldenlab · Dec 20, 2024 · 8b301d2 · 8b301d2
1 parent 659cced
commit 8b301d2
Show file tree

Hide file tree

Showing 19 changed files with 1,247 additions and 69 deletions.
diff --git a/simba/mixins/statistics_mixin.py b/simba/mixins/statistics_mixin.py
@@ -18,12 +18,13 @@
                    types)
 from scipy import stats
 from scipy.stats.distributions import chi2
+from statsmodels.stats.libqsturng import psturng
+from statsmodels.stats.multicomp import pairwise_tukeyhsd
 from sklearn.covariance import EllipticEnvelope
 from sklearn.ensemble import IsolationForest
 
 from simba.mixins.feature_extraction_mixin import FeatureExtractionMixin
-from simba.utils.checks import (check_float, check_int, check_str,
-                                check_valid_array, check_valid_dataframe)
+from simba.utils.checks import (check_float, check_int, check_str, check_valid_array, check_valid_dataframe, check_valid_lst)
 from simba.utils.data import bucket_data, fast_mean_rank
 from simba.utils.enums import Formats, Options
 from simba.utils.errors import CountError, InvalidInputError
@@ -409,7 +410,7 @@ def one_way_anova(
         :rtype: Tuple[float, float]
 
         :example:
-        >>> sample_1 = np.array([1, 2, 3, 1, 3, 2, 1, 10, 8, 4, 10])
+        >>> saxfmple_1 = np.array([1, 2, 3, 1, 3, 2, 1, 10, 8, 4, 10])
         >>> sample_2 = np.array([8, 5, 5, 8, 8, 9, 10, 1, 7, 10, 10])
         >>> Statistics().one_way_anova(sample_1=sample_2, sample_2=sample_1)
         """
@@ -4377,3 +4378,113 @@ def sliding_iqr(x: np.ndarray, window_size: float, sample_rate: float) -> np.nda
             results[r - 1] = upper_val - lower_val
         return results
 
+    @staticmethod
+    def one_way_anova_scipy(x: np.ndarray,
+                            y: np.ndarray,
+                            variable_names: List[str],
+                            x_name: str = '',
+                            y_name: str = '') -> pd.DataFrame:
+        """
+        Compute one-way ANOVAs comparing each column (axis 1) on two arrays.
+
+        .. notes::
+           Use for computing and presenting aggregate statistics. Not suitable for featurization.
+
+        .. seealso::
+           For featurization instead use :func:`simba.mixins.statistics_mixin.Statistics.rolling_one_way_anova` or
+           :func:`simba.mixins.statistics_mixin.Statistics.one_way_anova`
+
+        :param np.ndarray x: First 2d array with observations rowwise and variables columnwise.
+        :param np.ndarray y: Second 2d array with observations rowwise and variables columnwise. Must be same number of columns as x.
+        :param List[str, ...] variable_names: Names of columnwise variable names. Same length as number of data columns.
+        :param str x_name: Name of the first group (x).
+        :param str y_name: Name of the second group (y).
+        :return: Dataframe with one row per column representing the ANOVA F-statistic and P-values comparing the variables between x and y.
+        :rtype: pd.DataFrame
+        """
+
+        check_valid_array(data=x, source=f'{Statistics.one_way_anova_scipy.__name__} x', accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        check_valid_array(data=y, source=f'{Statistics.one_way_anova_scipy.__name__} y', accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_1_shape=(x.shape[1],))
+        check_str(name=f'{Statistics.one_way_anova_scipy.__name__} x_name', value=x_name, allow_blank=True)
+        check_str(name=f'{Statistics.one_way_anova_scipy.__name__} y_name', value=y_name, allow_blank=True)
+        check_valid_lst(source=f'{Statistics.one_way_anova_scipy.__name__} variable_names', data=variable_names, valid_dtypes=(str,), exact_len=x.shape[1])
+        results = pd.DataFrame(variable_names, columns=['FEATURE'])
+        results[['GROUP_1', 'GROUP_2']] = x_name, y_name
+        results['F-STATISTIC'], results['P-VALUE'] = stats.f_oneway(x, y)
+
+        results['P-VALUE'] = results['P-VALUE'].round(8)
+
+        return results
+
+    @staticmethod
+    def kruskal_scipy(x: np.ndarray,
+                      y: np.ndarray,
+                      variable_names: List[str],
+                      x_name: str = '',
+                      y_name: str = '') -> pd.DataFrame:
+        """
+        Compute Kruskal-Wallis comparing each column (axis 1) on two arrays.
+
+        .. notes::
+           Use for computing and presenting aggregate statistics. Not suitable for featurization.
+
+        .. seealso::
+           For featurization instead use :func:`simba.mixins.statistics_mixin.Statistics.kruskal_wallis`
+
+        :param np.ndarray x: First 2d array with observations rowwise and variables columnwise.
+        :param np.ndarray y: Second 2d array with observations rowwise and variables columnwise. Must be same number of columns as x.
+        :param List[str, ...] variable_names: Names of columnwise variable names. Same length as number of data columns.
+        :param str x_name: Name of the first group (x).
+        :param str y_name: Name of the second group (y).
+        :return: Dataframe with one row per column representing the Kruskal-Wallis statistic and P-values comparing the variables between x and y.
+        :rtype: pd.DataFrame
+        """
+
+        check_valid_array(data=x, source=f'{Statistics.kruskal_scipy.__name__} x', accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        check_valid_array(data=y, source=f'{Statistics.kruskal_scipy.__name__} y', accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_1_shape=(x.shape[1],))
+        check_str(name=f'{Statistics.kruskal_scipy.__name__} x_name', value=x_name, allow_blank=True)
+        check_str(name=f'{Statistics.kruskal_scipy.__name__} y_name', value=y_name, allow_blank=True)
+        check_valid_lst(source=f'{Statistics.kruskal_scipy.__name__} variable_names', data=variable_names, valid_dtypes=(str,), exact_len=x.shape[1])
+        results = pd.DataFrame(variable_names, columns=['FEATURE'])
+        results[['GROUP_1', 'GROUP_2']] = x_name, y_name
+        results['STATISTIC'], results['P-VALUE'] = stats.kruskal(x, y)
+
+        results['P-VALUE'] = results['P-VALUE'].round(8)
+
+        return results
+
+
+
+    @staticmethod
+    def pairwise_tukeyhsd_scipy(data: np.ndarray,
+                                group: np.ndarray,
+                                variable_names: List[str],
+                                verbose: bool = False) -> pd.DataFrame:
+
+        """
+        Compute pairwise grouped Tukey-HSD tests.
+
+        .. notes::
+           Use for computing and presenting aggregate statistics. Not suitable for featurization.
+
+        :param np.ndarray data: 2D array  with observations rowwise (axis 0) and features columnwise (axis 1)
+        :param np.ndarray group: 1D array with the same number of observations as rows in ``data`` containing the group for each sample.
+        :param List[str, ...] variable_names: Names of columnwise variable names. Same length as number of data columns.
+        :return: Dataframe comparing each group for each variable.
+        :rtype: pd.DataFrame
+        """
+
+        check_valid_array(data=data, source=f'{Statistics.pairwise_tukeyhsd_scipy.__name__} data', accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        check_valid_array(data=group, source=f'{Statistics.pairwise_tukeyhsd_scipy.__name__} group', accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=(data.shape[0],))
+        check_valid_lst(source=f'{Statistics.pairwise_tukeyhsd_scipy.__name__} variable_names', data=variable_names, valid_dtypes=(str,), exact_len=data.shape[1])
+        results = []
+        for var in range(data.shape[1]):
+            if verbose:
+                print(f'Computing Tukey HSD for variable {var+1}/{data.shape[1]}...')
+            tukey_data = pairwise_tukeyhsd(data[:, var], group)
+            df = pd.DataFrame(data=tukey_data._results_table.data[1:], columns=tukey_data._results_table.data[0])
+            df['P-VALUE'] = psturng(np.abs(tukey_data.meandiffs / tukey_data.std_pairs), len(tukey_data.groupsunique), tukey_data.df_total)
+            df['FEATURE'] = variable_names[var]
+            results.append(df)
+
+        return pd.concat(results, axis=0)
diff --git a/simba/sandbox/bg_remover.py b/simba/sandbox/bg_remover.py
@@ -0,0 +1,117 @@
+import os
+from copy import deepcopy
+from typing import Optional, Tuple, Union
+
+import cv2
+import numpy as np
+try:
+    from typing import Literal
+except:
+    from typing_extensions import Literal
+
+from simba.utils.checks import (check_file_exist_and_readable,check_if_dir_exists)
+from simba.utils.enums import Formats
+from simba.utils.printing import SimbaTimer, stdout_success
+from simba.utils.read_write import (get_fn_ext, get_video_meta_data)
+from simba.video_processors.video_processing import create_average_frm
+
+def video_bg_subtraction(video_path: Union[str, os.PathLike],
+                         bg_video_path: Optional[Union[str, os.PathLike]] = None,
+                         bg_start_frm: Optional[int] = None,
+                         bg_end_frm: Optional[int] = None,
+                         bg_start_time: Optional[str] = None,
+                         bg_end_time: Optional[str] = None,
+                         bg_color: Optional[Tuple[int, int, int]] = (0, 0, 0),
+                         fg_color: Optional[Tuple[int, int, int]] = None,
+                         save_path: Optional[Union[str, os.PathLike]] = None,
+                         threshold: Optional[int] = 50,
+                         verbose: Optional[bool] = True) -> None:
+    """
+    Subtract the background from a video.
+
+    .. video:: _static/img/video_bg_subtraction.webm
+       :width: 800
+       :autoplay:
+       :loop:
+
+    .. video:: _static/img/bg_remover_example_1.webm
+       :width: 800
+       :autoplay:
+       :loop:
+
+    .. video:: _static/img/bg_remover_example_2.webm
+       :width: 800
+       :autoplay:
+       :loop:
+
+    .. note::
+       If  ``bg_video_path`` is passed, that video will be used to parse the background. If None, ``video_path`` will be use dto parse background.
+       Either pass ``start_frm`` and ``end_frm`` OR ``start_time`` and ``end_time`` OR pass all four arguments as None.
+       Those two arguments will be used to slice the background video, and the sliced part is used to parse the background.
+
+       For example, in the scenario where there is **no** animal in the ``video_path`` video for the first 20s, then the first 20s can be used to parse the background.
+       In this scenario, ``bg_video_path`` can be passed as ``None`` and bg_start_time and bg_end_time can be ``00:00:00`` and ``00:00:20``, repectively.
+
+       In the scenario where there **is** animal(s) in the entire ``video_path`` video, pass ``bg_video_path`` as a path to a video recording the arena without the animals.
+
+    :param Union[str, os.PathLike] video_path: The path to the video to remove the background from.
+    :param Optional[Union[str, os.PathLike]] bg_video_path: Path to the video which contains a segment with the background only. If None, then ``video_path`` will be used.
+    :param Optional[int] bg_start_frm: The first frame in the background video to use when creating a representative background image. Default: None.
+    :param Optional[int] bg_end_frm: The last frame in the background video to use when creating a representative background image. Default: None.
+    :param Optional[str] bg_start_time: The start timestamp in `HH:MM:SS` format in the background video to use to create a representative background image. Default: None.
+    :param Optional[str] bg_end_time: The end timestamp in `HH:MM:SS` format in the background video to use to create a representative background image. Default: None.
+    :param Optional[Tuple[int, int, int]] bg_color: The RGB color of the moving objects in the output video. Defaults to None, which represents the original colors of the moving objects.
+    :param Optional[Tuple[int, int, int]] fg_color: The RGB color of the background output video. Defaults to black (0, 0, 0).
+    :param Optional[Union[str, os.PathLike]] save_path: The patch to where to save the output video where the background is removed. If None, saves the output video in the same directory as the input video with the ``_bg_subtracted`` suffix. Default: None.
+    :return: None.
+
+    :example:
+    >>> video_bg_subtraction(video_path='/Users/simon/Downloads/1_LH_cropped.mp4', bg_start_time='00:00:00', bg_end_time='00:00:10', bg_color=(0, 106, 167), fg_color=(254, 204, 2))
+    """
+
+    timer = SimbaTimer(start=True)
+    check_file_exist_and_readable(file_path=video_path)
+    if bg_video_path is None:
+        bg_video_path = deepcopy(video_path)
+    video_meta_data = get_video_meta_data(video_path=video_path)
+    dir, video_name, ext = get_fn_ext(filepath=video_path)
+    if save_path is None:
+        save_path = os.path.join(dir, f'{video_name}_bg_subtracted{ext}')
+    else:
+        check_if_dir_exists(in_dir=os.path.dirname(save_path), source=video_bg_subtraction.__name__)
+    fourcc = cv2.VideoWriter_fourcc(*Formats.MP4_CODEC.value)
+    writer = cv2.VideoWriter(save_path, fourcc, video_meta_data['fps'],(video_meta_data['width'], video_meta_data['height']))
+    bg_frm = create_average_frm(video_path=bg_video_path, start_frm=bg_start_frm, end_frm=bg_end_frm, start_time=bg_start_time, end_time=bg_end_time)
+    bg_frm = cv2.resize(bg_frm, (video_meta_data['width'], video_meta_data['height']))
+    cap = cv2.VideoCapture(video_path)
+    frm_cnt = 0
+    while True:
+        ret, frm = cap.read()
+        if ret:
+            out_img = np.full_like(frm, fill_value=bg_color)
+            if not ret:
+                break
+            img_diff = np.abs(frm - bg_frm)
+            gray_diff = cv2.cvtColor(img_diff, cv2.COLOR_BGR2GRAY)
+            mask = np.where(gray_diff < threshold, 0, 1)
+            if fg_color is None:
+                out_img[mask == 1] = frm[mask == 1]
+            else:
+                out_img[mask == 1] = fg_color
+            writer.write(out_img)
+            frm_cnt += 1
+            if verbose:
+                print(f'Background subtraction frame {frm_cnt}/{video_meta_data["frame_count"]} (Video: {video_name})')
+        else:
+            break
+
+    writer.release()
+    cap.release()
+    timer.stop_timer()
+    if verbose:
+        stdout_success(msg=f'Background subtracted from {video_name} and saved at {save_path}', elapsed_time=timer.elapsed_time)
+
+
+
+video_bg_subtraction(video_path='/Users/simon/Desktop/envs/simba/troubleshooting/mitra/project_folder/videos/501_MA142_Gi_CNO_0514_clipped.mp4',
+                     fg_color=(255, 0, 0), threshold=255)
diff --git a/simba/sandbox/bout_aggregator.py b/simba/sandbox/bout_aggregator.py
@@ -0,0 +1,63 @@
+import os
+from copy import deepcopy
+from typing import Literal, Optional, List, Union
+try:
+    from typing import Literal
+except:
+    from typing_extensions import Literal
+import pandas as pd
+from simba.utils.checks import check_valid_lst, check_int, check_str, check_valid_dataframe, check_instance
+from simba.utils.read_write import find_core_cnt, read_video_info
+from simba.utils.printing import SimbaTimer
+from simba.utils.enums import Formats
+from simba.utils.data import detect_bouts, read_df
+from simba.utils.errors import InvalidInputError
+
+def video_bout_aggregator(data: Union[str, os.PathLike, pd.DataFrame],
+                          clfs: List[str],
+                          feature_names: List[str],
+                          sample_rate: int,
+                          min_bout_length: Optional[int] = None,
+                          method: Optional[Literal["MEAN", "MEDIAN"]] = "MEAN") -> pd.DataFrame:
+
+    check_valid_lst(data=clfs, source=f"{video_bout_aggregator.__name__} clfs", valid_dtypes=(str,), min_len=1)
+    check_valid_lst(data=feature_names, source=f"{video_bout_aggregator.__name__} feature_names", valid_dtypes=(str,), min_len=1)
+    check_instance(source=f'{video_bout_aggregator.__name__} data', accepted_types=(str, pd.DataFrame), instance=data)
+    if isinstance(data, (str, os.PathLike)):
+        df = read_df(file_path=data_path, file_type='csv', usecols=feature_names + clfs)
+    elif isinstance(data, (pd.DataFrame)):
+        df = deepcopy(data)
+    else:
+        raise InvalidInputError(msg=f'data is of invalid type: {type(df)}, accepted: {str, os.PathLike, pd.DataFrame}', source=video_bout_aggregator.__name__)
+    check_valid_dataframe(df=data, source=f"{video_bout_aggregator.__name__} data", valid_dtypes=Formats.NUMERIC_DTYPES.value, required_fields=feature_names + clfs)
+    check_int(name=f"{video_bout_aggregator.__name__} data", value=sample_rate, min_value=10e-6)
+    if min_bout_length is not None:
+        check_int(name=f"{video_bout_aggregator.__name__} min_bout_length", value=min_bout_length, min_value=0)
+    else:
+        min_bout_length = 0
+    check_str(name=f"{video_bout_aggregator.__name__} method", value=method, options=("MEAN", "MEDIAN"))
+
+
+# timer = SimbaTimer(start=True)
+    # core_cnt = find_core_cnt()[1]
+    # print("Calculating bout aggregate statistics...")
+
+    # check_valid_dataframe(df=data, source=f"{video_bout_aggregator.__name__} data", required_fields=feature_names + clfs, valid_dtypes=Formats.NUMERIC_DTYPES.value)
+    # check_valid_dataframe(df=video_info, source=f"{video_bout_aggregator.__name__} video_info", required_fields=['fps', 'video'], valid_dtypes=Formats.NUMERIC_DTYPES.value)
+    # if min_bout_length is not None:
+    #     check_int(name=f"{video_bout_aggregator.__name__} min_bout_length", value=min_bout_length, min_value=0)
+    # check_str(name=f"{video_bout_aggregator.__name__} aggregator", value=aggregator, options=("MEAN", "MEDIAN"))
+    # _, _, fps = read_video_info(vid_info_df=video_info, video_name=video)
+    #
+    #
+    # detect_bouts(data_df=data, target_lst=clfs)
+    #
+    #
+    # for cnt, video in enumerate(data["VIDEO"].unique()):
+    #     print(f'Processing video {video} ({str(cnt+1)}/{str(len(data["VIDEO"].unique()))})...')
+
+
+
+data_path = '/Users/simon/Desktop/envs/simba/troubleshooting/mitra/project_folder/csv/input_csv/501_MA142_Gi_CNO_0521.csv'
+
+video_bout_aggregator(data=data_path)
diff --git a/simba/sandbox/direction_reversals.py b/simba/sandbox/direction_reversals.py
@@ -0,0 +1,24 @@
+import numpy as np
+
+
+
+def direction_switches(x: np.ndarray, switch_degree: int = 180):
+
+    idx = 0
+    cDeg = x[idx]
+    tDeg1, tDeg2 = ((cDeg + switch_degree) % 360 + 360) % 360
+
+
+    print(cDeg)
+    print(tDeg1)
+
+
+
+
+    pass
+
+
+
+
+x = np.random.randint(0, 361, (100))
+direction_switches(x=x)
diff --git a/simba/sandbox/egocentric_align_nb.py b/simba/sandbox/egocentric_align_nb.py
@@ -0,0 +1,12 @@
+# In this notebook, we will "egocentrically" align pose estimation and pose-estimated video data.
+
+# This means that we will rotate the data, so that the animal, in every frame, is always "anchored" in the same location and directing to the same location.
+# (i) One body-part (e.g., the center or the tail-base of the animal is always located in the same pixel location of the video.
+# (ii) A second body-part (e.g., the nose, head, or nape) is always directing N degrees from the anchor point.
+
+# In short - we rotate the data so that the animal is always facing to the right, and the animal is always located at
+# the center of the image.
+
+
+
+