From 7cdfb02dfe2217d6d90bfcd5e8dfb43b03f49d8a Mon Sep 17 00:00:00 2001 From: sronilsson Date: Sat, 9 Nov 2024 13:53:00 -0500 Subject: [PATCH] cuda jit --- docs/tables/sliding_spatial_density_cuda.csv | 13 ++++ setup.py | 2 +- simba/data_processors/cuda/create_shap_log.py | 2 +- simba/data_processors/cuda/timeseries.py | 75 ++++++++++++++++++- simba/data_processors/cuda/utils.py | 4 + simba/mixins/timeseries_features_mixin.py | 2 +- 6 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 docs/tables/sliding_spatial_density_cuda.csv diff --git a/docs/tables/sliding_spatial_density_cuda.csv b/docs/tables/sliding_spatial_density_cuda.csv new file mode 100644 index 000000000..0f3867b87 --- /dev/null +++ b/docs/tables/sliding_spatial_density_cuda.csv @@ -0,0 +1,13 @@ +FRAMES (MILLIONS),GPU TIME (S),GPU TIME (STEV) +2,0.5252,0.00063 +4,1.0396,0.00099 +8,2.0821,0.00231 +16,4.2678,0.070249 +32,8.4416,0.121225 +64,16.941,0.097708 +128,34.092,0.03986 +256,68.052,0.27862 +512,138.78118,3.755006 +NVIDIA GeForce RTX 4070,, +time window = 2.5s @ 30 FPS,, +3 ITERATIONS,, diff --git a/setup.py b/setup.py index 131c0dad1..ea090a4f6 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ # Setup configuration setuptools.setup( name="Simba-UW-tf-dev", - version="2.3.1", + version="2.3.2", author="Simon Nilsson, Jia Jie Choong, Sophia Hwang", author_email="sronilsson@gmail.com", description="Toolkit for computer classification and analysis of behaviors in experimental animals", diff --git a/simba/data_processors/cuda/create_shap_log.py b/simba/data_processors/cuda/create_shap_log.py index eed22b939..f8eab416a 100644 --- a/simba/data_processors/cuda/create_shap_log.py +++ b/simba/data_processors/cuda/create_shap_log.py @@ -17,7 +17,7 @@ from simba.utils.enums import Formats from simba.utils.errors import FFMPEGCodecGPUError from simba.utils.printing import SimbaTimer, stdout_success -from simba.utils.read_write import read_df, write_df +from simba.utils.read_write import write_df from simba.utils.warnings import NotEnoughDataWarning diff --git a/simba/data_processors/cuda/timeseries.py b/simba/data_processors/cuda/timeseries.py index f34c443df..5d03ce6cc 100644 --- a/simba/data_processors/cuda/timeseries.py +++ b/simba/data_processors/cuda/timeseries.py @@ -1,9 +1,9 @@ from time import perf_counter from typing import Optional - import numpy as np from numba import cuda +from simba.data_processors.cuda.utils import _euclid_dist from simba.data_processors.cuda.utils import _cuda_mean, _cuda_std from simba.utils.checks import check_float, check_valid_array from simba.utils.enums import Formats @@ -116,3 +116,76 @@ def sliding_percent_beyond_n_std(data: np.ndarray, time_window: float, sample_ra +@cuda.jit() +def _sliding_spatial_density_kernel(x, time_window, radius, results): + r = cuda.grid(1) + if r >= x.shape[0] or r < 0: + return + l = int(r - time_window[0]) + if l < 0 or l >= r: + return + total_neighbors = 0 + n_points = r - l + if n_points <= 0: + results[r] = 0 + return + for i in range(l, r): + for j in range(l, r): + if i != j: + dist = _euclid_dist(x[i], x[j]) + if dist <= radius[0]: + total_neighbors += 1 + + results[r] = total_neighbors / n_points if n_points > 0 else 0 + + +def sliding_spatial_density_cuda(x: np.ndarray, + radius: float, + pixels_per_mm: float, + window_size: float, + sample_rate: float) -> np.ndarray: + """ + Computes the spatial density of points within a moving window along a trajectory using CUDA for acceleration. + + This function calculates a spatial density measure for each point along a 2D trajectory path by counting the number + of neighboring points within a specified radius. The computation is performed within a sliding window that moves + along the trajectory, using GPU acceleration to handle large datasets efficiently. + + .. csv-table:: + :header: EXPECTED RUNTIMES + :file: ../../../docs/tables/sliding_spatial_density_cuda.csv + :widths: 10, 45, 45 + :align: center + :header-rows: 1 + + :param np.ndarray x: A 2D array of shape (N, 2), where N is the number of points and each point has two spatial coordinates (x, y). The array represents the trajectory path of points in a 2D space (e.g., x and y positions in space). + :param float radius: The radius (in millimeters) within which to count neighboring points around each trajectory point. Defines the area of interest around each point. + :param float pixels_per_mm: The scaling factor that converts the physical radius (in millimeters) to pixel units for spatial density calculations. + :param float window_size: The size of the sliding window (in seconds or points) to compute the density of points. A larger window size will consider more points in each density calculation. + :param float sample_rate: The rate at which to sample the trajectory points (e.g., frames per second or samples per unit time). It adjusts the granularity of the sliding window. + :return: A 1D numpy array where each element represents the computed spatial density for the trajectory at the corresponding point in time (or frame). Higher values indicate more densely packed points within the specified radius, while lower values suggest more sparsely distributed points. + :rtype: np.ndarray + + :example: + >>> df = pd.read_csv("/mnt/c/troubleshooting/two_black_animals_14bp/project_folder/csv/outlier_corrected_movement_location/Test_3.csv") + >>> x = df[['Nose_1_x', 'Nose_1_y']].values + >>> results_cuda = sliding_spatial_density_cuda(x=x, radius=10.0, pixels_per_mm=4.0, window_size=1, sample_rate=20) + + """ + + check_valid_array(data=x, source=f'{sliding_spatial_density_cuda.__name__} x', accepted_ndims=(2,), accepted_axis_1_shape=[2, ], accepted_dtypes=Formats.NUMERIC_DTYPES.value) + check_float(name=f'{sliding_spatial_density_cuda.__name__} radius', value=radius) + check_float(name=f'{sliding_spatial_density_cuda.__name__} window_size', value=window_size) + check_float(name=f'{sliding_spatial_density_cuda.__name__} sample_rate', value=sample_rate) + check_float(name=f'{sliding_spatial_density_cuda.__name__} pixels_per_mm', value=pixels_per_mm) + + x = np.ascontiguousarray(x) + pixel_radius = np.array([np.ceil(max(1.0, (radius * pixels_per_mm)))]).astype(np.float64) + time_window_frames = np.array([np.ceil(window_size * sample_rate)]) + x_dev = cuda.to_device(x) + time_window_frames_dev = cuda.to_device(time_window_frames) + radius_dev = cuda.to_device(pixel_radius) + bpg = (x.shape[0] + (THREADS_PER_BLOCK - 1)) // THREADS_PER_BLOCK + results = cuda.device_array(shape=x.shape[0], dtype=np.float16) + _sliding_spatial_density_kernel[bpg, THREADS_PER_BLOCK](x_dev, time_window_frames_dev, radius_dev, results) + return results.copy_to_host() diff --git a/simba/data_processors/cuda/utils.py b/simba/data_processors/cuda/utils.py index a5d80fed5..5a393dc39 100644 --- a/simba/data_processors/cuda/utils.py +++ b/simba/data_processors/cuda/utils.py @@ -80,6 +80,10 @@ def _cuda_mse(img_1, img_2): # arr[j], arr[j + 1] = arr[j + 1], arr[j] # out = arr +@cuda.jit(device=True) +def _euclid_dist(x, y): + return math.sqrt(((y[0] - x[0]) ** 2) + ((y[1] - x[1]) ** 2)) + def _cuda_available() -> Tuple[bool, Dict[int, Any]]: """ Check if GPU available. If True, returns the GPUs, the model, physical slots and compute capabilitie(s). diff --git a/simba/mixins/timeseries_features_mixin.py b/simba/mixins/timeseries_features_mixin.py index fbb53076b..9717bd3dd 100644 --- a/simba/mixins/timeseries_features_mixin.py +++ b/simba/mixins/timeseries_features_mixin.py @@ -2235,7 +2235,7 @@ def spatial_density(x: np.ndarray, check_valid_array(data=x, source=f'{TimeseriesFeatureMixin.spatial_density.__name__} x', accepted_ndims=(2,), accepted_axis_1_shape=[2, ], accepted_dtypes=Formats.NUMERIC_DTYPES.value) check_float(name=f'{TimeseriesFeatureMixin.spatial_density.__name__} radius', value=radius) - check_float(name=f'{TimeseriesFeatureMixin.spatial_density.__name__} radius', value=pixels_per_mm) + check_float(name=f'{TimeseriesFeatureMixin.spatial_density.__name__} pixels_per_mm', value=pixels_per_mm) pixel_radius = np.ceil(max(1.0, (radius * pixels_per_mm))) n_points = x.shape[0] total_neighbors = 0