Skip to content

Commit

Permalink
uml
Browse files Browse the repository at this point in the history
  • Loading branch information
sronilsson committed Dec 20, 2024
1 parent 659cced commit 8b301d2
Show file tree
Hide file tree
Showing 19 changed files with 1,247 additions and 69 deletions.
117 changes: 114 additions & 3 deletions simba/mixins/statistics_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
types)
from scipy import stats
from scipy.stats.distributions import chi2
from statsmodels.stats.libqsturng import psturng
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest

from simba.mixins.feature_extraction_mixin import FeatureExtractionMixin
from simba.utils.checks import (check_float, check_int, check_str,
check_valid_array, check_valid_dataframe)
from simba.utils.checks import (check_float, check_int, check_str, check_valid_array, check_valid_dataframe, check_valid_lst)
from simba.utils.data import bucket_data, fast_mean_rank
from simba.utils.enums import Formats, Options
from simba.utils.errors import CountError, InvalidInputError
Expand Down Expand Up @@ -409,7 +410,7 @@ def one_way_anova(
:rtype: Tuple[float, float]
:example:
>>> sample_1 = np.array([1, 2, 3, 1, 3, 2, 1, 10, 8, 4, 10])
>>> saxfmple_1 = np.array([1, 2, 3, 1, 3, 2, 1, 10, 8, 4, 10])
>>> sample_2 = np.array([8, 5, 5, 8, 8, 9, 10, 1, 7, 10, 10])
>>> Statistics().one_way_anova(sample_1=sample_2, sample_2=sample_1)
"""
Expand Down Expand Up @@ -4377,3 +4378,113 @@ def sliding_iqr(x: np.ndarray, window_size: float, sample_rate: float) -> np.nda
results[r - 1] = upper_val - lower_val
return results

@staticmethod
def one_way_anova_scipy(x: np.ndarray,
y: np.ndarray,
variable_names: List[str],
x_name: str = '',
y_name: str = '') -> pd.DataFrame:
"""
Compute one-way ANOVAs comparing each column (axis 1) on two arrays.
.. notes::
Use for computing and presenting aggregate statistics. Not suitable for featurization.
.. seealso::
For featurization instead use :func:`simba.mixins.statistics_mixin.Statistics.rolling_one_way_anova` or
:func:`simba.mixins.statistics_mixin.Statistics.one_way_anova`
:param np.ndarray x: First 2d array with observations rowwise and variables columnwise.
:param np.ndarray y: Second 2d array with observations rowwise and variables columnwise. Must be same number of columns as x.
:param List[str, ...] variable_names: Names of columnwise variable names. Same length as number of data columns.
:param str x_name: Name of the first group (x).
:param str y_name: Name of the second group (y).
:return: Dataframe with one row per column representing the ANOVA F-statistic and P-values comparing the variables between x and y.
:rtype: pd.DataFrame
"""

check_valid_array(data=x, source=f'{Statistics.one_way_anova_scipy.__name__} x', accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
check_valid_array(data=y, source=f'{Statistics.one_way_anova_scipy.__name__} y', accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_1_shape=(x.shape[1],))
check_str(name=f'{Statistics.one_way_anova_scipy.__name__} x_name', value=x_name, allow_blank=True)
check_str(name=f'{Statistics.one_way_anova_scipy.__name__} y_name', value=y_name, allow_blank=True)
check_valid_lst(source=f'{Statistics.one_way_anova_scipy.__name__} variable_names', data=variable_names, valid_dtypes=(str,), exact_len=x.shape[1])
results = pd.DataFrame(variable_names, columns=['FEATURE'])
results[['GROUP_1', 'GROUP_2']] = x_name, y_name
results['F-STATISTIC'], results['P-VALUE'] = stats.f_oneway(x, y)

results['P-VALUE'] = results['P-VALUE'].round(8)

return results

@staticmethod
def kruskal_scipy(x: np.ndarray,
y: np.ndarray,
variable_names: List[str],
x_name: str = '',
y_name: str = '') -> pd.DataFrame:
"""
Compute Kruskal-Wallis comparing each column (axis 1) on two arrays.
.. notes::
Use for computing and presenting aggregate statistics. Not suitable for featurization.
.. seealso::
For featurization instead use :func:`simba.mixins.statistics_mixin.Statistics.kruskal_wallis`
:param np.ndarray x: First 2d array with observations rowwise and variables columnwise.
:param np.ndarray y: Second 2d array with observations rowwise and variables columnwise. Must be same number of columns as x.
:param List[str, ...] variable_names: Names of columnwise variable names. Same length as number of data columns.
:param str x_name: Name of the first group (x).
:param str y_name: Name of the second group (y).
:return: Dataframe with one row per column representing the Kruskal-Wallis statistic and P-values comparing the variables between x and y.
:rtype: pd.DataFrame
"""

check_valid_array(data=x, source=f'{Statistics.kruskal_scipy.__name__} x', accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
check_valid_array(data=y, source=f'{Statistics.kruskal_scipy.__name__} y', accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_1_shape=(x.shape[1],))
check_str(name=f'{Statistics.kruskal_scipy.__name__} x_name', value=x_name, allow_blank=True)
check_str(name=f'{Statistics.kruskal_scipy.__name__} y_name', value=y_name, allow_blank=True)
check_valid_lst(source=f'{Statistics.kruskal_scipy.__name__} variable_names', data=variable_names, valid_dtypes=(str,), exact_len=x.shape[1])
results = pd.DataFrame(variable_names, columns=['FEATURE'])
results[['GROUP_1', 'GROUP_2']] = x_name, y_name
results['STATISTIC'], results['P-VALUE'] = stats.kruskal(x, y)

results['P-VALUE'] = results['P-VALUE'].round(8)

return results



@staticmethod
def pairwise_tukeyhsd_scipy(data: np.ndarray,
group: np.ndarray,
variable_names: List[str],
verbose: bool = False) -> pd.DataFrame:

"""
Compute pairwise grouped Tukey-HSD tests.
.. notes::
Use for computing and presenting aggregate statistics. Not suitable for featurization.
:param np.ndarray data: 2D array with observations rowwise (axis 0) and features columnwise (axis 1)
:param np.ndarray group: 1D array with the same number of observations as rows in ``data`` containing the group for each sample.
:param List[str, ...] variable_names: Names of columnwise variable names. Same length as number of data columns.
:return: Dataframe comparing each group for each variable.
:rtype: pd.DataFrame
"""

check_valid_array(data=data, source=f'{Statistics.pairwise_tukeyhsd_scipy.__name__} data', accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
check_valid_array(data=group, source=f'{Statistics.pairwise_tukeyhsd_scipy.__name__} group', accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=(data.shape[0],))
check_valid_lst(source=f'{Statistics.pairwise_tukeyhsd_scipy.__name__} variable_names', data=variable_names, valid_dtypes=(str,), exact_len=data.shape[1])
results = []
for var in range(data.shape[1]):
if verbose:
print(f'Computing Tukey HSD for variable {var+1}/{data.shape[1]}...')
tukey_data = pairwise_tukeyhsd(data[:, var], group)
df = pd.DataFrame(data=tukey_data._results_table.data[1:], columns=tukey_data._results_table.data[0])
df['P-VALUE'] = psturng(np.abs(tukey_data.meandiffs / tukey_data.std_pairs), len(tukey_data.groupsunique), tukey_data.df_total)
df['FEATURE'] = variable_names[var]
results.append(df)

return pd.concat(results, axis=0)
117 changes: 117 additions & 0 deletions simba/sandbox/bg_remover.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import os
from copy import deepcopy
from typing import Optional, Tuple, Union

import cv2
import numpy as np
try:
from typing import Literal
except:
from typing_extensions import Literal

from simba.utils.checks import (check_file_exist_and_readable,check_if_dir_exists)
from simba.utils.enums import Formats
from simba.utils.printing import SimbaTimer, stdout_success
from simba.utils.read_write import (get_fn_ext, get_video_meta_data)
from simba.video_processors.video_processing import create_average_frm

def video_bg_subtraction(video_path: Union[str, os.PathLike],
bg_video_path: Optional[Union[str, os.PathLike]] = None,
bg_start_frm: Optional[int] = None,
bg_end_frm: Optional[int] = None,
bg_start_time: Optional[str] = None,
bg_end_time: Optional[str] = None,
bg_color: Optional[Tuple[int, int, int]] = (0, 0, 0),
fg_color: Optional[Tuple[int, int, int]] = None,
save_path: Optional[Union[str, os.PathLike]] = None,
threshold: Optional[int] = 50,
verbose: Optional[bool] = True) -> None:
"""
Subtract the background from a video.
.. video:: _static/img/video_bg_subtraction.webm
:width: 800
:autoplay:
:loop:
.. video:: _static/img/bg_remover_example_1.webm
:width: 800
:autoplay:
:loop:
.. video:: _static/img/bg_remover_example_2.webm
:width: 800
:autoplay:
:loop:
.. note::
If ``bg_video_path`` is passed, that video will be used to parse the background. If None, ``video_path`` will be use dto parse background.
Either pass ``start_frm`` and ``end_frm`` OR ``start_time`` and ``end_time`` OR pass all four arguments as None.
Those two arguments will be used to slice the background video, and the sliced part is used to parse the background.
For example, in the scenario where there is **no** animal in the ``video_path`` video for the first 20s, then the first 20s can be used to parse the background.
In this scenario, ``bg_video_path`` can be passed as ``None`` and bg_start_time and bg_end_time can be ``00:00:00`` and ``00:00:20``, repectively.
In the scenario where there **is** animal(s) in the entire ``video_path`` video, pass ``bg_video_path`` as a path to a video recording the arena without the animals.
:param Union[str, os.PathLike] video_path: The path to the video to remove the background from.
:param Optional[Union[str, os.PathLike]] bg_video_path: Path to the video which contains a segment with the background only. If None, then ``video_path`` will be used.
:param Optional[int] bg_start_frm: The first frame in the background video to use when creating a representative background image. Default: None.
:param Optional[int] bg_end_frm: The last frame in the background video to use when creating a representative background image. Default: None.
:param Optional[str] bg_start_time: The start timestamp in `HH:MM:SS` format in the background video to use to create a representative background image. Default: None.
:param Optional[str] bg_end_time: The end timestamp in `HH:MM:SS` format in the background video to use to create a representative background image. Default: None.
:param Optional[Tuple[int, int, int]] bg_color: The RGB color of the moving objects in the output video. Defaults to None, which represents the original colors of the moving objects.
:param Optional[Tuple[int, int, int]] fg_color: The RGB color of the background output video. Defaults to black (0, 0, 0).
:param Optional[Union[str, os.PathLike]] save_path: The patch to where to save the output video where the background is removed. If None, saves the output video in the same directory as the input video with the ``_bg_subtracted`` suffix. Default: None.
:return: None.
:example:
>>> video_bg_subtraction(video_path='/Users/simon/Downloads/1_LH_cropped.mp4', bg_start_time='00:00:00', bg_end_time='00:00:10', bg_color=(0, 106, 167), fg_color=(254, 204, 2))
"""

timer = SimbaTimer(start=True)
check_file_exist_and_readable(file_path=video_path)
if bg_video_path is None:
bg_video_path = deepcopy(video_path)
video_meta_data = get_video_meta_data(video_path=video_path)
dir, video_name, ext = get_fn_ext(filepath=video_path)
if save_path is None:
save_path = os.path.join(dir, f'{video_name}_bg_subtracted{ext}')
else:
check_if_dir_exists(in_dir=os.path.dirname(save_path), source=video_bg_subtraction.__name__)
fourcc = cv2.VideoWriter_fourcc(*Formats.MP4_CODEC.value)
writer = cv2.VideoWriter(save_path, fourcc, video_meta_data['fps'],(video_meta_data['width'], video_meta_data['height']))
bg_frm = create_average_frm(video_path=bg_video_path, start_frm=bg_start_frm, end_frm=bg_end_frm, start_time=bg_start_time, end_time=bg_end_time)
bg_frm = cv2.resize(bg_frm, (video_meta_data['width'], video_meta_data['height']))
cap = cv2.VideoCapture(video_path)
frm_cnt = 0
while True:
ret, frm = cap.read()
if ret:
out_img = np.full_like(frm, fill_value=bg_color)
if not ret:
break
img_diff = np.abs(frm - bg_frm)
gray_diff = cv2.cvtColor(img_diff, cv2.COLOR_BGR2GRAY)
mask = np.where(gray_diff < threshold, 0, 1)
if fg_color is None:
out_img[mask == 1] = frm[mask == 1]
else:
out_img[mask == 1] = fg_color
writer.write(out_img)
frm_cnt += 1
if verbose:
print(f'Background subtraction frame {frm_cnt}/{video_meta_data["frame_count"]} (Video: {video_name})')
else:
break

writer.release()
cap.release()
timer.stop_timer()
if verbose:
stdout_success(msg=f'Background subtracted from {video_name} and saved at {save_path}', elapsed_time=timer.elapsed_time)



video_bg_subtraction(video_path='/Users/simon/Desktop/envs/simba/troubleshooting/mitra/project_folder/videos/501_MA142_Gi_CNO_0514_clipped.mp4',
fg_color=(255, 0, 0), threshold=255)
63 changes: 63 additions & 0 deletions simba/sandbox/bout_aggregator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import os
from copy import deepcopy
from typing import Literal, Optional, List, Union
try:
from typing import Literal
except:
from typing_extensions import Literal
import pandas as pd
from simba.utils.checks import check_valid_lst, check_int, check_str, check_valid_dataframe, check_instance
from simba.utils.read_write import find_core_cnt, read_video_info
from simba.utils.printing import SimbaTimer
from simba.utils.enums import Formats
from simba.utils.data import detect_bouts, read_df
from simba.utils.errors import InvalidInputError

def video_bout_aggregator(data: Union[str, os.PathLike, pd.DataFrame],
clfs: List[str],
feature_names: List[str],
sample_rate: int,
min_bout_length: Optional[int] = None,
method: Optional[Literal["MEAN", "MEDIAN"]] = "MEAN") -> pd.DataFrame:

check_valid_lst(data=clfs, source=f"{video_bout_aggregator.__name__} clfs", valid_dtypes=(str,), min_len=1)
check_valid_lst(data=feature_names, source=f"{video_bout_aggregator.__name__} feature_names", valid_dtypes=(str,), min_len=1)
check_instance(source=f'{video_bout_aggregator.__name__} data', accepted_types=(str, pd.DataFrame), instance=data)
if isinstance(data, (str, os.PathLike)):
df = read_df(file_path=data_path, file_type='csv', usecols=feature_names + clfs)
elif isinstance(data, (pd.DataFrame)):
df = deepcopy(data)
else:
raise InvalidInputError(msg=f'data is of invalid type: {type(df)}, accepted: {str, os.PathLike, pd.DataFrame}', source=video_bout_aggregator.__name__)
check_valid_dataframe(df=data, source=f"{video_bout_aggregator.__name__} data", valid_dtypes=Formats.NUMERIC_DTYPES.value, required_fields=feature_names + clfs)
check_int(name=f"{video_bout_aggregator.__name__} data", value=sample_rate, min_value=10e-6)
if min_bout_length is not None:
check_int(name=f"{video_bout_aggregator.__name__} min_bout_length", value=min_bout_length, min_value=0)
else:
min_bout_length = 0
check_str(name=f"{video_bout_aggregator.__name__} method", value=method, options=("MEAN", "MEDIAN"))


# timer = SimbaTimer(start=True)
# core_cnt = find_core_cnt()[1]
# print("Calculating bout aggregate statistics...")

# check_valid_dataframe(df=data, source=f"{video_bout_aggregator.__name__} data", required_fields=feature_names + clfs, valid_dtypes=Formats.NUMERIC_DTYPES.value)
# check_valid_dataframe(df=video_info, source=f"{video_bout_aggregator.__name__} video_info", required_fields=['fps', 'video'], valid_dtypes=Formats.NUMERIC_DTYPES.value)
# if min_bout_length is not None:
# check_int(name=f"{video_bout_aggregator.__name__} min_bout_length", value=min_bout_length, min_value=0)
# check_str(name=f"{video_bout_aggregator.__name__} aggregator", value=aggregator, options=("MEAN", "MEDIAN"))
# _, _, fps = read_video_info(vid_info_df=video_info, video_name=video)
#
#
# detect_bouts(data_df=data, target_lst=clfs)
#
#
# for cnt, video in enumerate(data["VIDEO"].unique()):
# print(f'Processing video {video} ({str(cnt+1)}/{str(len(data["VIDEO"].unique()))})...')



data_path = '/Users/simon/Desktop/envs/simba/troubleshooting/mitra/project_folder/csv/input_csv/501_MA142_Gi_CNO_0521.csv'

video_bout_aggregator(data=data_path)
24 changes: 24 additions & 0 deletions simba/sandbox/direction_reversals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import numpy as np



def direction_switches(x: np.ndarray, switch_degree: int = 180):

idx = 0
cDeg = x[idx]
tDeg1, tDeg2 = ((cDeg + switch_degree) % 360 + 360) % 360


print(cDeg)
print(tDeg1)




pass




x = np.random.randint(0, 361, (100))
direction_switches(x=x)
12 changes: 12 additions & 0 deletions simba/sandbox/egocentric_align_nb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# In this notebook, we will "egocentrically" align pose estimation and pose-estimated video data.

# This means that we will rotate the data, so that the animal, in every frame, is always "anchored" in the same location and directing to the same location.
# (i) One body-part (e.g., the center or the tail-base of the animal is always located in the same pixel location of the video.
# (ii) A second body-part (e.g., the nose, head, or nape) is always directing N degrees from the anchor point.

# In short - we rotate the data so that the animal is always facing to the right, and the animal is always located at
# the center of the image.




Loading

0 comments on commit 8b301d2

Please sign in to comment.