diff --git a/eks/core.py b/eks/core.py index 46c2ea7..bb178be 100644 --- a/eks/core.py +++ b/eks/core.py @@ -16,6 +16,7 @@ # ------------------------------------------------------------------------------------------ +# TODO: don't return arrays AND dicts def ensemble( markers_list: list, keys: list, @@ -38,6 +39,8 @@ def ensemble( shape (samples, n_keypoints) ensemble_vars: np.ndarray shape (samples, n_keypoints) + ensemble_likelihoods: np.ndarray + shape (samples, n_keypoints) ensemble_stacks: np.ndarray shape (n_models, samples, n_keypoints) keypoints_avg_dict: dict @@ -48,9 +51,10 @@ def ensemble( keys: model_ids, keys: marker keypoints, values: shape (samples) """ - ensemble_stacks = [] - ensemble_vars = [] ensemble_preds = [] + ensemble_vars = [] + ensemble_likes = [] + ensemble_stacks = [] keypoints_avg_dict = {} keypoints_var_dict = {} keypoints_stack_dict = defaultdict(dict) @@ -75,17 +79,18 @@ def ensemble( for i, keypoints in enumerate(stack.T): keypoints_stack_dict[i][key] = stack.T[i] + # collect likelihoods + likelihood_stack = np.ones((markers_list[0].shape[0], len(markers_list))) + likelihood_key = key[:-1] + 'likelihood' + if likelihood_key in markers_list[0]: + for k in range(len(markers_list)): + likelihood_stack[:, k] = markers_list[k][likelihood_key] + mean_conf_per_keypoint = np.mean(likelihood_stack, axis=1) + ensemble_likes.append(mean_conf_per_keypoint) + # compute variance var = np.nanvar(stack, axis=1) if var_mode in ['conf_weighted_var', 'confidence_weighted_var']: - likelihood_key = key[:-1] + 'likelihood' - if likelihood_key not in markers_list[0]: - raise ValueError( - f"{likelihood_key} needs to be in your marker_df to use {var_mode}") - likelihood_stack = np.zeros((markers_list[0].shape[0], len(markers_list))) - for k in range(len(markers_list)): - likelihood_stack[:, k] = markers_list[k][likelihood_key] - mean_conf_per_keypoint = np.mean(likelihood_stack, axis=1) var = var / mean_conf_per_keypoint # low-confidence --> inflated obs variances elif var_mode != 'var': raise ValueError(f"var_mode={var_mode} not supported") @@ -95,9 +100,12 @@ def ensemble( ensemble_preds = np.asarray(ensemble_preds).T ensemble_vars = np.asarray(ensemble_vars).T + ensemble_likes = np.asarray(ensemble_likes).T ensemble_stacks = np.asarray(ensemble_stacks).T - return ensemble_preds, ensemble_vars, ensemble_stacks, \ - keypoints_avg_dict, keypoints_var_dict, keypoints_stack_dict + return ( + ensemble_preds, ensemble_vars, ensemble_likes, ensemble_stacks, + keypoints_avg_dict, keypoints_var_dict, keypoints_stack_dict, + ) def forward_pass(y, m0, S0, C, R, A, Q, ensemble_vars): diff --git a/eks/ibl_paw_multiview_smoother.py b/eks/ibl_paw_multiview_smoother.py index dc7bde2..36f7fe2 100644 --- a/eks/ibl_paw_multiview_smoother.py +++ b/eks/ibl_paw_multiview_smoother.py @@ -132,13 +132,13 @@ def ensemble_kalman_smoother_ibl_paw( markers_list_right_cam.append(markers_right_cam) # compute ensemble median left camera - left_cam_ensemble_preds, left_cam_ensemble_vars, left_cam_ensemble_stacks, \ + left_cam_ensemble_preds, left_cam_ensemble_vars, _, left_cam_ensemble_stacks, \ left_cam_keypoints_mean_dict, left_cam_keypoints_var_dict, \ left_cam_keypoints_stack_dict = \ ensemble(markers_list_left_cam, keys, avg_mode=ensembling_mode, var_mode='var') # compute ensemble median right camera - right_cam_ensemble_preds, right_cam_ensemble_vars, right_cam_ensemble_stacks, \ + right_cam_ensemble_preds, right_cam_ensemble_vars, _, right_cam_ensemble_stacks, \ right_cam_keypoints_mean_dict, right_cam_keypoints_var_dict, \ right_cam_keypoints_stack_dict = \ ensemble(markers_list_right_cam, keys, avg_mode=ensembling_mode, var_mode='var') diff --git a/eks/ibl_pupil_smoother.py b/eks/ibl_pupil_smoother.py index 0925d74..74f797b 100644 --- a/eks/ibl_pupil_smoother.py +++ b/eks/ibl_pupil_smoother.py @@ -100,7 +100,7 @@ def fit_eks_pupil( Returns: tuple: - df_dicts (dict): Dictionary containing smoothed DataFrames. + df_smotthed (pd.DataFrame): smooth_params (list): Final smoothing parameters used. input_dfs_list (list): List of input DataFrames. keypoint_names (list): List of keypoint names. @@ -114,7 +114,7 @@ def fit_eks_pupil( print(f"Input data loaded for keypoints: {keypoint_names}") # Run the ensemble Kalman smoother - df_dicts, smooth_params, nll_values = ensemble_kalman_smoother_ibl_pupil( + df_smoothed, smooth_params, nll_values = ensemble_kalman_smoother_ibl_pupil( markers_list=input_dfs_list, keypoint_names=keypoint_names, smooth_params=smooth_params, @@ -125,10 +125,10 @@ def fit_eks_pupil( # Save the output DataFrame to CSV os.makedirs(os.path.dirname(save_file), exist_ok=True) - df_dicts['markers_df'].to_csv(save_file) + df_smoothed.to_csv(save_file) print("DataFrames successfully converted to CSV") - return df_dicts, smooth_params, input_dfs_list, keypoint_names, nll_values + return df_smoothed, smooth_params, input_dfs_list, keypoint_names, nll_values def ensemble_kalman_smoother_ibl_pupil( @@ -156,9 +156,8 @@ def ensemble_kalman_smoother_ibl_pupil( zscore metric (default 2). Returns: - tuple - dict: markers_df contains smoothed markers; latents_df contains 3d latents: pupil - diameter and pupil center of mass + tuple: + smoothed markers dataframe final smooth params values final nll @@ -169,8 +168,10 @@ def ensemble_kalman_smoother_ibl_pupil( 'pupil_top_r_x', 'pupil_top_r_y', 'pupil_bottom_r_x', 'pupil_bottom_r_y', 'pupil_right_r_x', 'pupil_right_r_y', 'pupil_left_r_x', 'pupil_left_r_y', ] - ensemble_preds, ensemble_vars, ensemble_stacks, keypoints_mean_dict, keypoints_var_dict, \ - keypoints_stack_dict = ensemble(markers_list, keys, avg_mode=avg_mode, var_mode=var_mode) + ( + ensemble_preds, ensemble_vars, ensemble_likes, ensemble_stacks, + keypoints_mean_dict, keypoints_var_dict, keypoints_stack_dict, + ) = ensemble(markers_list, keys, avg_mode=avg_mode, var_mode=var_mode) # compute center of mass pupil_locations = get_pupil_location(keypoints_mean_dict) @@ -246,55 +247,67 @@ def ensemble_kalman_smoother_ibl_pupil( # cleanup # -------------------------------------- - # save out marker info - pdindex = make_dlc_pandas_index( - keypoint_names, - labels=["x", "y", "likelihood", "x_var", "y_var", "zscore"] - ) + # collect data processed_arr_dict = add_mean_to_array(y_m_smooth, keys, mean_x_obs, mean_y_obs) - key_pair_list = [['pupil_top_r_x', 'pupil_top_r_y'], - ['pupil_right_r_x', 'pupil_right_r_y'], - ['pupil_bottom_r_x', 'pupil_bottom_r_y'], - ['pupil_left_r_x', 'pupil_left_r_y']] + key_pair_list = [ + ['pupil_top_r_x', 'pupil_top_r_y'], + ['pupil_right_r_x', 'pupil_right_r_y'], + ['pupil_bottom_r_x', 'pupil_bottom_r_y'], + ['pupil_left_r_x', 'pupil_left_r_y'], + ] ensemble_indices = [(0, 1), (4, 5), (2, 3), (6, 7)] - pred_arr = [] + data_arr = [] for i, key_pair in enumerate(key_pair_list): - pred_arr.append(processed_arr_dict[key_pair[0]]) - pred_arr.append(processed_arr_dict[key_pair[1]]) - var = np.empty(processed_arr_dict[key_pair[0]].shape) - var[:] = 1.0 # TODO: median of observed likelihoods - pred_arr.append(var) - x_var = y_v_smooth[:, i, i] - y_var = y_v_smooth[:, i + 1, i + 1] - pred_arr.append(x_var) - pred_arr.append(y_var) - # compute zscore for EKS to see how it deviates from the ensemble - eks_predictions = \ - np.asarray([processed_arr_dict[key_pair[0]], processed_arr_dict[key_pair[1]]]).T - ensemble_preds_curr = ensemble_preds[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1] - ensemble_vars_curr = ensemble_vars[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1] - zscore, _ = eks_zscore( - eks_predictions, - ensemble_preds_curr, - ensemble_vars_curr, - min_ensemble_std=zscore_threshold, - ) - pred_arr.append(zscore) - pred_arr = np.asarray(pred_arr) - markers_df = pd.DataFrame(pred_arr.T, columns=pdindex) - - # save out latents info: pupil diam, center of mass - pred_arr2 = np.asarray([ - ms[:, 0], - ms[:, 1] + mean_x_obs, # add back x mean of pupil location - ms[:, 2] + mean_y_obs, # add back ys mean of pupil location - ]) - tracker_name = 'ensemble-kalman_tracker' - arrays = [[tracker_name, tracker_name, tracker_name], ['diameter', 'com_x', 'com_y']] - pd_index2 = pd.MultiIndex.from_arrays(arrays, names=('scorer', 'latent')) - latents_df = pd.DataFrame(pred_arr2.T, columns=pd_index2) + # keep track of labels for each data entry + labels = [] + # smoothed x vals + data_arr.append(processed_arr_dict[key_pair[0]]) + labels.append('x') + # smoothed y vals + data_arr.append(processed_arr_dict[key_pair[1]]) + labels.append('y') + # mean likelihood + data_arr.append(ensemble_likes[:, ensemble_indices[i][0]]) + labels.append('likelihood') + # x vals ensemble median + data_arr.append(ensemble_preds[:, ensemble_indices[i][0]]) + labels.append('x_ens_median') + # y vals ensemble median + data_arr.append(ensemble_preds[:, ensemble_indices[i][1]]) + labels.append('y_ens_median') + # x vals ensemble variance + data_arr.append(ensemble_vars[:, ensemble_indices[i][0]]) + labels.append('x_ens_var') + # y vals ensemble variance + data_arr.append(ensemble_vars[:, ensemble_indices[i][1]]) + labels.append('y_ens_var') + # x vals posterior variance + data_arr.append(y_v_smooth[:, i, i]) + labels.append('x_posterior_var') + # y vals posterior variance + data_arr.append(y_v_smooth[:, i + 1, i + 1]) + labels.append('y_posterior_var') - return {'markers_df': markers_df, 'latents_df': latents_df}, smooth_params, nll_values + # compute zscore for EKS to see how it deviates from the ensemble + # eks_predictions = \ + # np.asarray([processed_arr_dict[key_pair[0]], processed_arr_dict[key_pair[1]]]).T + # ensemble_preds_curr = ensemble_preds[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1] + # ensemble_vars_curr = ensemble_vars[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1] + # zscore, _ = eks_zscore( + # eks_predictions, + # ensemble_preds_curr, + # ensemble_vars_curr, + # min_ensemble_std=zscore_threshold, + # ) + # data_arr.append(zscore) + + data_arr = np.asarray(data_arr) + + # put data in dataframe + pdindex = make_dlc_pandas_index(keypoint_names, labels=labels) + markers_df = pd.DataFrame(data_arr.T, columns=pdindex) + + return markers_df, smooth_params, nll_values def pupil_optimize_smooth( diff --git a/eks/multicam_smoother.py b/eks/multicam_smoother.py index 51fa208..bc8b5de 100644 --- a/eks/multicam_smoother.py +++ b/eks/multicam_smoother.py @@ -86,7 +86,7 @@ def ensemble_kalman_smoother_multicam( cam_keypoints_var_dict = [] cam_keypoints_stack_dict = [] for camera in range(num_cameras): - cam_ensemble_preds_curr, cam_ensemble_vars_curr, cam_ensemble_stacks_curr, \ + cam_ensemble_preds_curr, cam_ensemble_vars_curr, _, cam_ensemble_stacks_curr, \ cam_keypoints_mean_dict_curr, cam_keypoints_var_dict_curr, \ cam_keypoints_stack_dict_curr = \ ensemble(markers_list_cams[camera], keys, avg_mode=ensembling_mode) diff --git a/eks/utils.py b/eks/utils.py index f5e6d76..ffb945b 100644 --- a/eks/utils.py +++ b/eks/utils.py @@ -203,14 +203,15 @@ def populate_output_dataframe(keypoint_df, keypoint_ensemble, output_df, key_suf return output_df -def plot_results(output_df, input_dfs_list, - key, s_final, nll_values, idxs, save_dir, smoother_type): +def plot_results( + output_df, input_dfs_list, key, s_final, nll_values, idxs, save_dir, smoother_type, +): if nll_values is None: - fig, axes = plt.subplots(4, 1, figsize=(9, 10)) + fig, axes = plt.subplots(3, 1, figsize=(9, 10)) else: - fig, axes = plt.subplots(5, 1) + fig, axes = plt.subplots(4, 1) - for ax, coord in zip(axes, ['x', 'y', 'likelihood', 'zscore']): + for ax, coord in zip(axes, ['x', 'y', 'likelihood']): # Rename axes label for likelihood and zscore coordinates if coord == 'likelihood': ylabel = 'model likelihoods' diff --git a/scripts/ibl_pupil_example.py b/scripts/ibl_pupil_example.py index f043dca..9639f84 100644 --- a/scripts/ibl_pupil_example.py +++ b/scripts/ibl_pupil_example.py @@ -29,7 +29,7 @@ s_frames = args.s_frames # Run the smoothing function -df_dicts, smooth_params, input_dfs_list, keypoint_names, nll_values = fit_eks_pupil( +df_smoothed, smooth_params, input_dfs_list, keypoint_names, nll_values = fit_eks_pupil( input_source=input_source, save_file=os.path.join(save_dir, save_filename or 'eks_pupil.csv'), smooth_params=[diameter_s, com_s], @@ -38,7 +38,7 @@ # Plot results plot_results( - output_df=df_dicts['markers_df'], + output_df=df_smoothed, input_dfs_list=input_dfs_list, key=f'{keypoint_names[-1]}', idxs=(0, 500), diff --git a/tests/test_core.py b/tests/test_core.py index 87086bb..350d4c2 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -12,9 +12,9 @@ def test_ensemble(): # Simulate marker data with three models, each with two keypoints and 5 samples np.random.seed(0) num_samples = 5 - num_keypoints = 2 markers_list = [] keys = ['keypoint_1_x', 'keypoint_2_x'] + num_keypoints = len(keys) # Create random data for three different marker DataFrames # Adjust column names to match the function's expected 'keypoint_likelihood' format @@ -28,18 +28,20 @@ def test_ensemble(): markers_list.append(pd.DataFrame(data)) # Run the ensemble function with 'median' mode - ensemble_preds, ensemble_vars, ensemble_stacks, keypoints_avg_dict, \ + ensemble_preds, ensemble_vars, ensemble_likes, ensemble_stacks, keypoints_avg_dict, \ keypoints_var_dict, keypoints_stack_dict = ensemble( markers_list, keys, avg_mode='median', var_mode='var', ) # Verify shapes of output arrays assert ensemble_preds.shape == (num_samples, num_keypoints), \ - f"Expected shape {(num_samples, num_keypoints)}, got {ensemble_preds.shape}" + f"Means expected shape {(num_samples, num_keypoints)}, got {ensemble_preds.shape}" assert ensemble_vars.shape == (num_samples, num_keypoints), \ - f"Expected shape {(num_samples, num_keypoints)}, got {ensemble_vars.shape}" + f"Vars expected shape {(num_samples, num_keypoints)}, got {ensemble_vars.shape}" + assert ensemble_likes.shape == (num_samples, num_keypoints), \ + f"Likes expected shape {(num_samples, num_keypoints)}, got {ensemble_likes.shape}" assert ensemble_stacks.shape == (3, num_samples, num_keypoints), \ - f"Expected shape {(3, num_samples, num_keypoints)}, got {ensemble_stacks.shape}" + f"Stacks expected shape {(3, num_samples, num_keypoints)}, got {ensemble_stacks.shape}" # Verify contents of dictionaries assert set(keypoints_avg_dict.keys()) == set(keys), \ @@ -50,7 +52,7 @@ def test_ensemble(): f"Expected 3 models, got {len(keypoints_stack_dict)}" # Run the ensemble function with avg_mode='mean' and var_mode='conf_weighted_var' - ensemble_preds, ensemble_vars, ensemble_stacks, keypoints_avg_dict, \ + ensemble_preds, ensemble_vars, ensemble_likes, ensemble_stacks, keypoints_avg_dict, \ keypoints_var_dict, keypoints_stack_dict = ensemble( markers_list, keys, avg_mode='mean', var_mode='conf_weighted_var', ) @@ -60,9 +62,9 @@ def test_ensemble(): expected_mean = np.nanmean(stack, axis=1) expected_variance = 2.0 * np.nanvar(stack, axis=1) # 2x since likelihoods all 0.5 assert np.allclose(keypoints_avg_dict[key], expected_mean), \ - f"Expected {expected_mean} for {key}, got {keypoints_avg_dict[key]}" + f"Means expected {expected_mean} for {key}, got {keypoints_avg_dict[key]}" assert np.allclose(keypoints_var_dict[key], expected_variance), \ - f"Expected {expected_variance} for {key}, got {keypoints_var_dict[key]}" + f"Vars expected {expected_variance} for {key}, got {keypoints_var_dict[key]}" def test_kalman_dot_basic(): diff --git a/tests/test_ibl_pupil_smoother.py b/tests/test_ibl_pupil_smoother.py index cb345e1..3549016 100644 --- a/tests/test_ibl_pupil_smoother.py +++ b/tests/test_ibl_pupil_smoother.py @@ -1,8 +1,15 @@ -import pytest -import pandas as pd +from unittest.mock import MagicMock, patch + import numpy as np -from unittest.mock import patch, MagicMock -from eks.ibl_pupil_smoother import get_pupil_location, get_pupil_diameter, add_mean_to_array, ensemble_kalman_smoother_ibl_pupil +import pandas as pd +import pytest + +from eks.ibl_pupil_smoother import ( + add_mean_to_array, + ensemble_kalman_smoother_ibl_pupil, + get_pupil_diameter, + get_pupil_location, +) @pytest.fixture @@ -21,7 +28,7 @@ def mock_dlc_data(): 'pupil_left_r_x': np.random.rand(n_samples), 'pupil_left_r_y': np.random.rand(n_samples), 'pupil_right_r_x': np.random.rand(n_samples), - 'pupil_right_r_y': np.random.rand(n_samples) + 'pupil_right_r_y': np.random.rand(n_samples), } # Introduce some NaN values randomly @@ -105,7 +112,7 @@ def test_add_mean_to_array(mock_data_1): # Assertions to verify the result assert isinstance(result, dict), "Expected output to be a dictionary" - assert len(result) == len(keys), f"Expected dictionary to have {len(keys)} keys, got {len(result)}" + assert len(result) == len(keys), f"Expected dict to have {len(keys)} keys, got {len(result)}" # Check that the dictionary keys match the input keys assert set(result.keys()) == set(keys), "Keys in the output dictionary do not match input keys" @@ -116,7 +123,9 @@ def test_add_mean_to_array(mock_data_1): expected = pred_arr[:, i] + mean_x else: expected = pred_arr[:, i] + mean_y - np.testing.assert_array_almost_equal(result[key], expected, err_msg=f"Mismatch for key '{key}'") + np.testing.assert_array_almost_equal( + result[key], expected, err_msg=f"Mismatch for key '{key}'" + ) def test_add_mean_to_array_empty(): @@ -156,7 +165,9 @@ def test_add_mean_to_array_single_row(): # Assertions to verify the result for key, expected_value in expected_dict.items(): - np.testing.assert_array_almost_equal(result[key], expected_value, err_msg=f"Mismatch for key '{key}'") + np.testing.assert_array_almost_equal( + result[key], expected_value, err_msg=f"Mismatch for key '{key}'" + ) print("All tests for add_mean_to_array passed successfully.") @@ -202,6 +213,7 @@ def test_ensemble_kalman_smoother_ibl_pupil( # Mock the ensemble function ensemble_preds = np.random.randn(100, 8) ensemble_vars = np.random.rand(100, 8) * 0.1 + ensemble_likes = np.ones((100, 8)) ensemble_stacks = np.random.randn(5, 100, 8) keypoints_mean_dict = { k: np.random.randn(100) for k in [ @@ -212,8 +224,10 @@ def test_ensemble_kalman_smoother_ibl_pupil( keypoints_var_dict = keypoints_mean_dict.copy() keypoints_stack_dict = {i: keypoints_mean_dict for i in range(5)} - mock_ensemble.return_value = (ensemble_preds, ensemble_vars, ensemble_stacks, - keypoints_mean_dict, keypoints_var_dict, keypoints_stack_dict) + mock_ensemble.return_value = ( + ensemble_preds, ensemble_vars, ensemble_likes, ensemble_stacks, + keypoints_mean_dict, keypoints_var_dict, keypoints_stack_dict, + ) # Mock the get_pupil_location and get_pupil_diameter functions mock_get_location.return_value = np.random.randn(100, 2) @@ -237,24 +251,15 @@ def test_ensemble_kalman_smoother_ibl_pupil( mock_zscore.return_value = np.random.randn(100), None # Run the function with mocked data - result, smooth_params_out, nll_values = ensemble_kalman_smoother_ibl_pupil( + smoothed_df, smooth_params_out, nll_values = ensemble_kalman_smoother_ibl_pupil( markers_list, keypoint_names, smooth_params, s_frames, avg_mode='mean', var_mode='var', ) # Assertions - assert isinstance(result, dict), "Expected result to be a dictionary" - assert 'markers_df' in result, "Expected 'markers_df' in result" - assert 'latents_df' in result, "Expected 'latents_df' in result" - - markers_df = result['markers_df'] - latents_df = result['latents_df'] - - assert isinstance(markers_df, pd.DataFrame), "markers_df should be a DataFrame" - assert isinstance(latents_df, pd.DataFrame), "latents_df should be a DataFrame" + assert isinstance(smoothed_df, pd.DataFrame), "first return arg should be a DataFrame" # Verify the shape of the output DataFrames - assert markers_df.shape[0] == 100, "markers_df should have 100 rows" - assert latents_df.shape[0] == 100, "latents_df should have 100 rows" + assert smoothed_df.shape[0] == 100, "markers_df should have 100 rows" # Check if the smooth parameters and NLL values are correctly returned assert len(smooth_params_out) == 2, "Expected 2 smooth parameters"