gibbs

sgoldenlab · Nov 19, 2024 · 376d815 · 376d815
1 parent 26e7cca
commit 376d815
Show file tree

Hide file tree

Showing 9 changed files with 7,808 additions and 197 deletions.
diff --git a/docs/_static/img/YOLOVisualizer_2.webm b/docs/_static/img/YOLOVisualizer_2.webm
diff --git a/docs/nb/yolo_ex_2.ipynb b/docs/nb/yolo_ex_2.ipynb
diff --git a/docs/notebooks.rst b/docs/notebooks.rst
@@ -48,6 +48,7 @@ Some example operations pertaining to working with animals and environments as g
    nb/geometry_example_6
    nb/geometry_example_7
    nb/yolo_ex_1
+   nb/yolo_ex_2
 
 Miscellaneous
 -------------------

diff --git a/simba/data_processors/gibbs_sampler.py b/simba/data_processors/gibbs_sampler.py
diff --git a/simba/mixins/circular_statistics.py b/simba/mixins/circular_statistics.py
@@ -4,6 +4,10 @@
 
 import numpy as np
 from numba import float32, float64, int64, jit, njit, prange, typed, types
+from simba.utils.checks import check_valid_array, check_float
+from simba.utils.enums import Formats
+from simba.utils.errors import InvalidInputError
+from simba.utils.data import get_mode
 
 
 class CircularStatisticsMixin(object):
@@ -1202,9 +1206,10 @@ def rotational_direction(data: np.ndarray, stride: int = 1) -> np.ndarray:
            :align: center
 
         The result array contains values:
-        - `0` where there is no change between points.
-        - `1` where the angle has increased in the positive direction.
-        - `2` where the angle has decreased in the negative direction.
+        - `-1`: Indicates no rotation is possible for the first frame. This serves as a placeholder since there is no prior frame to compare to.
+        - `0`: Represents no change in the angular value between consecutive frames
+        - `1`: Indicates an increase in the angular value (rotation in the positive direction, counterclockwise)
+        - `2`: Indicates a decrease in the angular value (rotation in the negative direction, clockwise)
 
         :param np.ndarray data: 1D array of size len(frames) representing degrees.
         :return: An array of directional indicators.
@@ -1330,6 +1335,84 @@ def fit_circle(data: np.ndarray, max_iterations: Optional[int] = 400) -> np.ndar
 
         return results
 
+    @staticmethod
+    def preferred_turning_direction(x: np.ndarray) -> int:
+        """
+        Determines the preferred turning direction from a 1D array of circular directional data.
+
+        .. note::
+           The input ``x`` can be created using any of the following methods:
+           - :func:`simba.mixins.circular_statistics.CircularStatisticsMixin.direction_two_bps`
+           - :func:`simba.data_processors.cuda.circular_statistics.direction_from_two_bps`
+           - :func:`simba.data_processors.cuda.circular_statistics.direction_from_three_bps`
+           - :func:`simba.mixins.circular_statistics.CircularStatisticsMixin.direction_three_bps`
+
+        .. seealso::
+           :func:`simba.mixins.circular_statistics.CircularStatisticsMixin.rotational_direction`, :func:`~simba.data_processors.cuda.circular_statistics.rotational_direction`,
+           :func:`simba.mixins.circular_statistics.CircularStatisticsMixin.sliding_preferred_turning_direction`
+
+        :param np.ndarray x: 1D array of circular directional data (values between 0 and 360, inclusive). The array represents angular directions measured in degrees.
+        :return:
+            The most frequent turning direction from the input data:
+            - `0`: No change in the angular value between consecutive frames.
+            - `1`: An increase in the angular value (rotation in the positive direction, counterclockwise).
+            - `2`: A decrease in the angular value (rotation in the negative direction, clockwise).
+        :rtype: int
+
+        :example:
+        >>> x = np.random.randint(0, 361, (200,))
+        >>> CircularStatisticsMixin.preferred_turning_direction(x=x)
+        """
+
+        check_valid_array(data=x, source=CircularStatisticsMixin.preferred_turning_direction.__name__, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        if np.max(x) > 360 or np.min(x) < 0:
+            raise InvalidInputError(msg='x has to be values between 0 and 360 inclusive', source=CircularStatisticsMixin.preferred_turning_direction.__name__)
+        rotational_direction = CircularStatisticsMixin.rotational_direction(data=x.astype(np.float32))
+        return get_mode(x=rotational_direction)
+
+    @staticmethod
+    def sliding_preferred_turning_direction(x: np.ndarray,
+                                            time_window: float,
+                                            sample_rate: float) -> np.ndarray:
+        """
+        Computes the sliding preferred turning direction over a given time window from a 1D array of circular directional data.
+
+        Calculates the most frequent turning direction (mode) within a sliding window  of a specified duration.
+
+        .. seealso::
+           :func:`simba.mixins.circular_statistics.CircularStatisticsMixin.rotational_direction`, :func:`~simba.data_processors.cuda.circular_statistics.rotational_direction`,
+           :func:`simba.mixins.circular_statistics.CircularStatisticsMixin.preferred_turning_direction`
+
+
+        :param np.ndarray x: A 1D array of circular directional data (values between 0 and 360, inclusive).  Each value represents an angular direction in degrees.
+        :param float time_window:  The duration of the sliding window in seconds.
+        :param float sample_rate: The sampling rate of the data in Hz (samples per second) or FPS (frames per seconds)
+        :return:
+            A 1D array of integers indicating the preferred turning direction for each window:
+            - `0`: No change in angular values within the window.
+            - `1`: An increase in angular values (counterclockwise rotation).
+            - `2`: A decrease in angular values (clockwise rotation).
+            For indices before the first full window, the value is `-1`.
+        :rtype: np.ndarray
+
+        :example:
+        >>> x = np.random.randint(0, 361, (213,))
+        >>> CircularStatisticsMixin.sliding_preferred_turning_direction(x=x, time_window=1, sample_rate=10)
+        """
+        check_valid_array(data=x, source=CircularStatisticsMixin.sliding_preferred_turning_direction.__name__, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        if np.max(x) > 360 or np.min(x) < 0:
+            raise InvalidInputError(msg='x has to be values between 0 and 360 inclusive', source=CircularStatisticsMixin.sliding_preferred_turning_direction.__name__)
+        check_float(name=f'{CircularStatisticsMixin.sliding_preferred_turning_direction.__name__} time_window', value=time_window)
+        check_float(name=f'{CircularStatisticsMixin.sliding_preferred_turning_direction.__name__} sample_rate', value=sample_rate)
+        rotational_directions = CircularStatisticsMixin.rotational_direction(data=x.astype(np.float32))
+        window_size = np.int64(np.max((1.0, (time_window * sample_rate))))
+        results = np.full(shape=(x.shape[0]), fill_value=-1, dtype=np.int32)
+        for r in range(window_size, x.shape[0] + 1):
+            l = r - window_size
+            sample = rotational_directions[l:r]
+            results[r - 1] = get_mode(x=sample)
+        return results.astype(np.int32)
+
 
 
 # data = np.array([260, 280, 300, 340, 360, 0, 10, 350, 0, 15]).astype(np.float32)

diff --git a/simba/mixins/timeseries_features_mixin.py b/simba/mixins/timeseries_features_mixin.py
@@ -2409,4 +2409,22 @@ def radial_dispersion_index(x: np.ndarray, reference_point: np.ndarray) -> float
         radial_distances = np.linalg.norm(x - reference_point, axis=1)
         return np.std(radial_distances) / np.mean(radial_distances)
 
+    @staticmethod
+    def avg_kinetic_energy(x: np.ndarray, mass: float, sample_rate: float) -> float:
+        """
+        Calculate the average kinetic energy of an object based on its velocity.
+
+        :param np.ndarray x: A 2D NumPy array of shape (n, 2), where each row contains the x and y  position coordinates of the object at each time step.
+        :param float mass: The mass of the object.
+        :param float sample_rate: The sampling rate (Hz), i.e., the number of data points per second.
+        :return: The average kinetic energy of the object.
+        :rtype: float: The mean kinetic energy calculated from the velocity data.
+        """
+        delta_t = np.round(1 / sample_rate, 2)
+        vx, vy = np.gradient(x[:, 0], delta_t), np.gradient(x[:, 1], delta_t)
+        speed = np.sqrt(vx ** 2 + vy ** 2)
+        kinetic_energy = 0.5 * mass * speed ** 2
+
+        return np.mean(kinetic_energy).astype(np.float32)
+
 
diff --git a/simba/model/regression/__init__.py b/simba/model/regression/__init__.py
diff --git a/simba/model/regression/metrics.py b/simba/model/regression/metrics.py
@@ -0,0 +1,139 @@
+from typing import Optional
+import numpy as np
+from simba.utils.checks import check_valid_array, check_float
+from simba.utils.enums import Formats
+
+def mean_absolute_percentage_error(y_true: np.ndarray,
+                                   y_pred: np.ndarray,
+                                   epsilon=1e-10,
+                                   weights: Optional[np.ndarray] = None) -> float:
+    """
+    Compute the Mean Absolute Percentage Error (MAPE)
+
+    :param np.ndarray y_true: The array containing the true values (dependent variable) of the dataset. Should be a 1D numeric array of shape (n,).
+    :param np.ndarray y_pred: The array containing the predicted values for the dataset. Should be a 1D numeric array of shape (n,) and of the same length as `y_true`.
+    :param float epsilon: A small pseudovalue to replace zeros in `y_true` to avoid division by zero errors.
+    :param Optional[np.ndarray] weights: An optional 1D array of weights to apply to each error. If provided, the weighted mean absolute percentage error is computed.
+    :return: The Mean Absolute Percentage Error (MAPE) as a float, in percentage format. A lower value indicates better prediction accuracy.
+    :rtype: float
+
+    :example:
+    >>> x, y = np.random.random(size=(100000,)), np.random.random(size=(100000,))
+    >>> mean_absolute_percentage_error(y_true=x, y_pred=y)
+    """
+
+    check_valid_array(data=y_true, source=mean_absolute_percentage_error.__name__, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+    check_valid_array(data=y_pred, source=mean_absolute_percentage_error.__name__, accepted_ndims=(1,), min_axis_0=y_true.shape[0],accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+    check_float(name=mean_absolute_percentage_error.__name__, value=epsilon)
+    y_true = np.where(y_true == 0, epsilon, y_true)
+    se = np.abs((y_true - y_pred) / y_true)
+    if weights is not None:
+        check_valid_array(data=weights, source=mean_absolute_percentage_error.__name__, accepted_ndims=(1,), min_axis_0=y_true.shape[0], accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        se = se * weights
+        return (np.sum(se) / np.sum(weights)) * 100
+    else:
+        return np.mean(se * 100)
+
+
+def mean_squared_error(y_true: np.ndarray,
+                       y_pred: np.ndarray,
+                       weights: Optional[np.ndarray] = None) -> float:
+
+    """
+    Compute the Mean Squared Error (MSE) between the true and predicted values.
+
+    :param np.ndarray y_true: The array containing the true values (dependent variable) of the dataset. Should be a 1D numeric array of shape (n,).
+    :param np.ndarray y_pred: The array containing the predicted values for the dataset. Should be a 1D numeric array of shape (n,) and of the same length as `y_true`.
+    :param Optional[np.ndarray] weights: An optional 1D array of weights to apply to each squared error. If provided, the weighted mean squared error is computed.
+    :return: The Mean Squared Error (MSE) as a float. A lower value indicates better model accuracy.
+    :rtype: float
+    """
+
+    check_valid_array(data=y_true, source=mean_absolute_percentage_error.__name__, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+    check_valid_array(data=y_pred, source=mean_absolute_percentage_error.__name__, accepted_ndims=(1,), min_axis_0=y_true.shape[0],accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+    se = (y_true - y_pred) ** 2
+    if weights is not None:
+        check_valid_array(data=weights, source=mean_absolute_percentage_error.__name__, accepted_ndims=(1,), min_axis_0=y_true.shape[0], accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        se = se * weights
+        return np.sum(se) / np.sum(weights)
+    else:
+        return np.mean(se)
+
+def mean_absolute_error(y_true: np.ndarray,
+                        y_pred: np.ndarray,
+                        weights: Optional[np.ndarray] = None) -> float:
+    """
+    Compute the Mean Absolute Error (MAE) between the true and predicted values.
+
+    :param np.ndarray y_true: A 1D array of true values (ground truth).
+    :param np.ndarray y_pred: A 1D array of predicted values.
+    :param np.ndarray weights: An optional 1D array of weights for each observation. If provided, the weighted MAE is computed.
+    :return: The Mean Absolute Error (MAE) as a float. A lower value indicates a better fit.
+    :rtype: float
+    """
+
+    check_valid_array(data=y_true, source=mean_absolute_percentage_error.__name__, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+    check_valid_array(data=y_pred, source=mean_absolute_percentage_error.__name__, accepted_ndims=(1,), min_axis_0=y_true.shape[0], accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+    absolute_error = np.abs(y_true - y_pred)
+    if weights is not None:
+        check_valid_array(data=weights, source=mean_absolute_percentage_error.__name__, accepted_ndims=(1,), min_axis_0=y_true.shape[0], accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        absolute_error = absolute_error * weights
+        return np.sum(absolute_error) / np.sum(weights)
+    else:
+        return np.mean(absolute_error)
+
+
+def r2_score(y_true: np.ndarray, y_pred: np.ndarray, weights: Optional[np.ndarray] = None) -> float:
+    """
+    Compute the R^2 (coefficient of determination) score.
+
+    :param np.ndarray y_true: 1D array of true values (dependent variable).
+    :param np.ndarray y_pred: 1D array of predicted values, same length as `y_true`.
+    :param np.ndarray weights: Optional 1D array of weights for each observation.
+    :return: The R^2 score as a float. A value closer to 1 indicates better fit.
+    :rtype: float
+
+    """
+
+    check_valid_array(data=y_true, source=r2_score.__name__, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+    check_valid_array(data=y_pred, source=r2_score.__name__, accepted_ndims=(1,), min_axis_0=y_true.shape[0], accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+
+    if weights is not None:
+        check_valid_array(data=weights, source=r2_score.__name__, accepted_ndims=(1,), min_axis_0=y_true.shape[0])
+
+    y_mean = np.average(y_true, weights=weights) if weights is not None else np.mean(y_true)
+    residuals, total = (y_true - y_pred) ** 2, (y_true - y_mean) ** 2
+
+    if weights is not None:
+        ss_residual = np.sum(residuals * weights)
+        ss_total = np.sum(total * weights)
+    else:
+        ss_residual = np.sum(residuals)
+        ss_total = np.sum(total)
+
+    return 1 - (ss_residual / ss_total)
+
+
+def root_mean_squared_error(y_true: np.ndarray,
+                            y_pred: np.ndarray,
+                            weights: Optional[np.ndarray] = None) -> float:
+
+    """
+    Compute the Root Mean Squared Error (RMSE) between the true and predicted values.
+
+    :param np.ndarray y_true: The array containing the true values (dependent variable) of the dataset. Should be a 1D numeric array of shape (n,).
+    :param np.ndarray y_pred: The array containing the predicted values for the dataset. Should be a 1D numeric array of shape (n,) and of the same length as `y_true`.
+    :param Optional[np.ndarray] weights: An optional 1D array of weights to apply to each squared error. If provided, the weighted mean squared error is computed.
+    :return: The Root Mean Squared Error (MSE) as a float. A lower value indicates better model accuracy.
+    :rtype: float
+    """
+
+    check_valid_array(data=y_true, source=mean_absolute_percentage_error.__name__, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+    check_valid_array(data=y_pred, source=mean_absolute_percentage_error.__name__, accepted_ndims=(1,), min_axis_0=y_true.shape[0],accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+    se = (y_true - y_pred)  ** 2
+    if weights is not None:
+        check_valid_array(data=weights, source=mean_absolute_percentage_error.__name__, accepted_ndims=(1,), min_axis_0=y_true.shape[0], accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+        weighted_mse = np.sum(se * weights) / np.sum(weights)
+        return np.sqrt(weighted_mse)
+    else:
+        return np.sqrt(np.mean(se))