Skip to content

Commit

Permalink
save mean likelihoods, ens means, and conf weighted ens vars in eks o…
Browse files Browse the repository at this point in the history
…utput df
  • Loading branch information
themattinthehatt committed Dec 11, 2024
1 parent 37e48fd commit 01bd293
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 106 deletions.
32 changes: 20 additions & 12 deletions eks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# ------------------------------------------------------------------------------------------


# TODO: don't return arrays AND dicts
def ensemble(
markers_list: list,
keys: list,
Expand All @@ -38,6 +39,8 @@ def ensemble(
shape (samples, n_keypoints)
ensemble_vars: np.ndarray
shape (samples, n_keypoints)
ensemble_likelihoods: np.ndarray
shape (samples, n_keypoints)
ensemble_stacks: np.ndarray
shape (n_models, samples, n_keypoints)
keypoints_avg_dict: dict
Expand All @@ -48,9 +51,10 @@ def ensemble(
keys: model_ids, keys: marker keypoints, values: shape (samples)
"""
ensemble_stacks = []
ensemble_vars = []
ensemble_preds = []
ensemble_vars = []
ensemble_likes = []
ensemble_stacks = []
keypoints_avg_dict = {}
keypoints_var_dict = {}
keypoints_stack_dict = defaultdict(dict)
Expand All @@ -75,17 +79,18 @@ def ensemble(
for i, keypoints in enumerate(stack.T):
keypoints_stack_dict[i][key] = stack.T[i]

# collect likelihoods
likelihood_stack = np.ones((markers_list[0].shape[0], len(markers_list)))
likelihood_key = key[:-1] + 'likelihood'
if likelihood_key in markers_list[0]:
for k in range(len(markers_list)):
likelihood_stack[:, k] = markers_list[k][likelihood_key]
mean_conf_per_keypoint = np.mean(likelihood_stack, axis=1)
ensemble_likes.append(mean_conf_per_keypoint)

# compute variance
var = np.nanvar(stack, axis=1)
if var_mode in ['conf_weighted_var', 'confidence_weighted_var']:
likelihood_key = key[:-1] + 'likelihood'
if likelihood_key not in markers_list[0]:
raise ValueError(
f"{likelihood_key} needs to be in your marker_df to use {var_mode}")
likelihood_stack = np.zeros((markers_list[0].shape[0], len(markers_list)))
for k in range(len(markers_list)):
likelihood_stack[:, k] = markers_list[k][likelihood_key]
mean_conf_per_keypoint = np.mean(likelihood_stack, axis=1)
var = var / mean_conf_per_keypoint # low-confidence --> inflated obs variances
elif var_mode != 'var':
raise ValueError(f"var_mode={var_mode} not supported")
Expand All @@ -95,9 +100,12 @@ def ensemble(

ensemble_preds = np.asarray(ensemble_preds).T
ensemble_vars = np.asarray(ensemble_vars).T
ensemble_likes = np.asarray(ensemble_likes).T
ensemble_stacks = np.asarray(ensemble_stacks).T
return ensemble_preds, ensemble_vars, ensemble_stacks, \
keypoints_avg_dict, keypoints_var_dict, keypoints_stack_dict
return (
ensemble_preds, ensemble_vars, ensemble_likes, ensemble_stacks,
keypoints_avg_dict, keypoints_var_dict, keypoints_stack_dict,
)


def forward_pass(y, m0, S0, C, R, A, Q, ensemble_vars):
Expand Down
4 changes: 2 additions & 2 deletions eks/ibl_paw_multiview_smoother.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,13 @@ def ensemble_kalman_smoother_ibl_paw(
markers_list_right_cam.append(markers_right_cam)

# compute ensemble median left camera
left_cam_ensemble_preds, left_cam_ensemble_vars, left_cam_ensemble_stacks, \
left_cam_ensemble_preds, left_cam_ensemble_vars, _, left_cam_ensemble_stacks, \
left_cam_keypoints_mean_dict, left_cam_keypoints_var_dict, \
left_cam_keypoints_stack_dict = \
ensemble(markers_list_left_cam, keys, avg_mode=ensembling_mode, var_mode='var')

# compute ensemble median right camera
right_cam_ensemble_preds, right_cam_ensemble_vars, right_cam_ensemble_stacks, \
right_cam_ensemble_preds, right_cam_ensemble_vars, _, right_cam_ensemble_stacks, \
right_cam_keypoints_mean_dict, right_cam_keypoints_var_dict, \
right_cam_keypoints_stack_dict = \
ensemble(markers_list_right_cam, keys, avg_mode=ensembling_mode, var_mode='var')
Expand Down
121 changes: 67 additions & 54 deletions eks/ibl_pupil_smoother.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def fit_eks_pupil(
Returns:
tuple:
df_dicts (dict): Dictionary containing smoothed DataFrames.
df_smotthed (pd.DataFrame):
smooth_params (list): Final smoothing parameters used.
input_dfs_list (list): List of input DataFrames.
keypoint_names (list): List of keypoint names.
Expand All @@ -114,7 +114,7 @@ def fit_eks_pupil(
print(f"Input data loaded for keypoints: {keypoint_names}")

# Run the ensemble Kalman smoother
df_dicts, smooth_params, nll_values = ensemble_kalman_smoother_ibl_pupil(
df_smoothed, smooth_params, nll_values = ensemble_kalman_smoother_ibl_pupil(
markers_list=input_dfs_list,
keypoint_names=keypoint_names,
smooth_params=smooth_params,
Expand All @@ -125,10 +125,10 @@ def fit_eks_pupil(

# Save the output DataFrame to CSV
os.makedirs(os.path.dirname(save_file), exist_ok=True)
df_dicts['markers_df'].to_csv(save_file)
df_smoothed.to_csv(save_file)
print("DataFrames successfully converted to CSV")

return df_dicts, smooth_params, input_dfs_list, keypoint_names, nll_values
return df_smoothed, smooth_params, input_dfs_list, keypoint_names, nll_values


def ensemble_kalman_smoother_ibl_pupil(
Expand Down Expand Up @@ -156,9 +156,8 @@ def ensemble_kalman_smoother_ibl_pupil(
zscore metric (default 2).
Returns:
tuple
dict: markers_df contains smoothed markers; latents_df contains 3d latents: pupil
diameter and pupil center of mass
tuple:
smoothed markers dataframe
final smooth params values
final nll
Expand All @@ -169,8 +168,10 @@ def ensemble_kalman_smoother_ibl_pupil(
'pupil_top_r_x', 'pupil_top_r_y', 'pupil_bottom_r_x', 'pupil_bottom_r_y',
'pupil_right_r_x', 'pupil_right_r_y', 'pupil_left_r_x', 'pupil_left_r_y',
]
ensemble_preds, ensemble_vars, ensemble_stacks, keypoints_mean_dict, keypoints_var_dict, \
keypoints_stack_dict = ensemble(markers_list, keys, avg_mode=avg_mode, var_mode=var_mode)
(
ensemble_preds, ensemble_vars, ensemble_likes, ensemble_stacks,
keypoints_mean_dict, keypoints_var_dict, keypoints_stack_dict,
) = ensemble(markers_list, keys, avg_mode=avg_mode, var_mode=var_mode)

# compute center of mass
pupil_locations = get_pupil_location(keypoints_mean_dict)
Expand Down Expand Up @@ -246,55 +247,67 @@ def ensemble_kalman_smoother_ibl_pupil(
# cleanup
# --------------------------------------

# save out marker info
pdindex = make_dlc_pandas_index(
keypoint_names,
labels=["x", "y", "likelihood", "x_var", "y_var", "zscore"]
)
# collect data
processed_arr_dict = add_mean_to_array(y_m_smooth, keys, mean_x_obs, mean_y_obs)
key_pair_list = [['pupil_top_r_x', 'pupil_top_r_y'],
['pupil_right_r_x', 'pupil_right_r_y'],
['pupil_bottom_r_x', 'pupil_bottom_r_y'],
['pupil_left_r_x', 'pupil_left_r_y']]
key_pair_list = [
['pupil_top_r_x', 'pupil_top_r_y'],
['pupil_right_r_x', 'pupil_right_r_y'],
['pupil_bottom_r_x', 'pupil_bottom_r_y'],
['pupil_left_r_x', 'pupil_left_r_y'],
]
ensemble_indices = [(0, 1), (4, 5), (2, 3), (6, 7)]
pred_arr = []
data_arr = []
for i, key_pair in enumerate(key_pair_list):
pred_arr.append(processed_arr_dict[key_pair[0]])
pred_arr.append(processed_arr_dict[key_pair[1]])
var = np.empty(processed_arr_dict[key_pair[0]].shape)
var[:] = 1.0 # TODO: median of observed likelihoods
pred_arr.append(var)
x_var = y_v_smooth[:, i, i]
y_var = y_v_smooth[:, i + 1, i + 1]
pred_arr.append(x_var)
pred_arr.append(y_var)
# compute zscore for EKS to see how it deviates from the ensemble
eks_predictions = \
np.asarray([processed_arr_dict[key_pair[0]], processed_arr_dict[key_pair[1]]]).T
ensemble_preds_curr = ensemble_preds[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1]
ensemble_vars_curr = ensemble_vars[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1]
zscore, _ = eks_zscore(
eks_predictions,
ensemble_preds_curr,
ensemble_vars_curr,
min_ensemble_std=zscore_threshold,
)
pred_arr.append(zscore)
pred_arr = np.asarray(pred_arr)
markers_df = pd.DataFrame(pred_arr.T, columns=pdindex)

# save out latents info: pupil diam, center of mass
pred_arr2 = np.asarray([
ms[:, 0],
ms[:, 1] + mean_x_obs, # add back x mean of pupil location
ms[:, 2] + mean_y_obs, # add back ys mean of pupil location
])
tracker_name = 'ensemble-kalman_tracker'
arrays = [[tracker_name, tracker_name, tracker_name], ['diameter', 'com_x', 'com_y']]
pd_index2 = pd.MultiIndex.from_arrays(arrays, names=('scorer', 'latent'))
latents_df = pd.DataFrame(pred_arr2.T, columns=pd_index2)
# keep track of labels for each data entry
labels = []
# smoothed x vals
data_arr.append(processed_arr_dict[key_pair[0]])
labels.append('x')
# smoothed y vals
data_arr.append(processed_arr_dict[key_pair[1]])
labels.append('y')
# mean likelihood
data_arr.append(ensemble_likes[:, ensemble_indices[i][0]])
labels.append('likelihood')
# x vals ensemble median
data_arr.append(ensemble_preds[:, ensemble_indices[i][0]])
labels.append('x_ens_median')
# y vals ensemble median
data_arr.append(ensemble_preds[:, ensemble_indices[i][1]])
labels.append('y_ens_median')
# x vals ensemble variance
data_arr.append(ensemble_vars[:, ensemble_indices[i][0]])
labels.append('x_ens_var')
# y vals ensemble variance
data_arr.append(ensemble_vars[:, ensemble_indices[i][1]])
labels.append('y_ens_var')
# x vals posterior variance
data_arr.append(y_v_smooth[:, i, i])
labels.append('x_posterior_var')
# y vals posterior variance
data_arr.append(y_v_smooth[:, i + 1, i + 1])
labels.append('y_posterior_var')

return {'markers_df': markers_df, 'latents_df': latents_df}, smooth_params, nll_values
# compute zscore for EKS to see how it deviates from the ensemble
# eks_predictions = \
# np.asarray([processed_arr_dict[key_pair[0]], processed_arr_dict[key_pair[1]]]).T
# ensemble_preds_curr = ensemble_preds[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1]
# ensemble_vars_curr = ensemble_vars[:, ensemble_indices[i][0]: ensemble_indices[i][1] + 1]
# zscore, _ = eks_zscore(
# eks_predictions,
# ensemble_preds_curr,
# ensemble_vars_curr,
# min_ensemble_std=zscore_threshold,
# )
# data_arr.append(zscore)

data_arr = np.asarray(data_arr)

# put data in dataframe
pdindex = make_dlc_pandas_index(keypoint_names, labels=labels)
markers_df = pd.DataFrame(data_arr.T, columns=pdindex)

return markers_df, smooth_params, nll_values


def pupil_optimize_smooth(
Expand Down
2 changes: 1 addition & 1 deletion eks/multicam_smoother.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def ensemble_kalman_smoother_multicam(
cam_keypoints_var_dict = []
cam_keypoints_stack_dict = []
for camera in range(num_cameras):
cam_ensemble_preds_curr, cam_ensemble_vars_curr, cam_ensemble_stacks_curr, \
cam_ensemble_preds_curr, cam_ensemble_vars_curr, _, cam_ensemble_stacks_curr, \
cam_keypoints_mean_dict_curr, cam_keypoints_var_dict_curr, \
cam_keypoints_stack_dict_curr = \
ensemble(markers_list_cams[camera], keys, avg_mode=ensembling_mode)
Expand Down
11 changes: 6 additions & 5 deletions eks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,14 +203,15 @@ def populate_output_dataframe(keypoint_df, keypoint_ensemble, output_df, key_suf
return output_df


def plot_results(output_df, input_dfs_list,
key, s_final, nll_values, idxs, save_dir, smoother_type):
def plot_results(
output_df, input_dfs_list, key, s_final, nll_values, idxs, save_dir, smoother_type,
):
if nll_values is None:
fig, axes = plt.subplots(4, 1, figsize=(9, 10))
fig, axes = plt.subplots(3, 1, figsize=(9, 10))
else:
fig, axes = plt.subplots(5, 1)
fig, axes = plt.subplots(4, 1)

for ax, coord in zip(axes, ['x', 'y', 'likelihood', 'zscore']):
for ax, coord in zip(axes, ['x', 'y', 'likelihood']):
# Rename axes label for likelihood and zscore coordinates
if coord == 'likelihood':
ylabel = 'model likelihoods'
Expand Down
4 changes: 2 additions & 2 deletions scripts/ibl_pupil_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
s_frames = args.s_frames

# Run the smoothing function
df_dicts, smooth_params, input_dfs_list, keypoint_names, nll_values = fit_eks_pupil(
df_smoothed, smooth_params, input_dfs_list, keypoint_names, nll_values = fit_eks_pupil(
input_source=input_source,
save_file=os.path.join(save_dir, save_filename or 'eks_pupil.csv'),
smooth_params=[diameter_s, com_s],
Expand All @@ -38,7 +38,7 @@

# Plot results
plot_results(
output_df=df_dicts['markers_df'],
output_df=df_smoothed,
input_dfs_list=input_dfs_list,
key=f'{keypoint_names[-1]}',
idxs=(0, 500),
Expand Down
18 changes: 10 additions & 8 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ def test_ensemble():
# Simulate marker data with three models, each with two keypoints and 5 samples
np.random.seed(0)
num_samples = 5
num_keypoints = 2
markers_list = []
keys = ['keypoint_1_x', 'keypoint_2_x']
num_keypoints = len(keys)

# Create random data for three different marker DataFrames
# Adjust column names to match the function's expected 'keypoint_likelihood' format
Expand All @@ -28,18 +28,20 @@ def test_ensemble():
markers_list.append(pd.DataFrame(data))

# Run the ensemble function with 'median' mode
ensemble_preds, ensemble_vars, ensemble_stacks, keypoints_avg_dict, \
ensemble_preds, ensemble_vars, ensemble_likes, ensemble_stacks, keypoints_avg_dict, \
keypoints_var_dict, keypoints_stack_dict = ensemble(
markers_list, keys, avg_mode='median', var_mode='var',
)

# Verify shapes of output arrays
assert ensemble_preds.shape == (num_samples, num_keypoints), \
f"Expected shape {(num_samples, num_keypoints)}, got {ensemble_preds.shape}"
f"Means expected shape {(num_samples, num_keypoints)}, got {ensemble_preds.shape}"
assert ensemble_vars.shape == (num_samples, num_keypoints), \
f"Expected shape {(num_samples, num_keypoints)}, got {ensemble_vars.shape}"
f"Vars expected shape {(num_samples, num_keypoints)}, got {ensemble_vars.shape}"
assert ensemble_likes.shape == (num_samples, num_keypoints), \
f"Likes expected shape {(num_samples, num_keypoints)}, got {ensemble_likes.shape}"
assert ensemble_stacks.shape == (3, num_samples, num_keypoints), \
f"Expected shape {(3, num_samples, num_keypoints)}, got {ensemble_stacks.shape}"
f"Stacks expected shape {(3, num_samples, num_keypoints)}, got {ensemble_stacks.shape}"

# Verify contents of dictionaries
assert set(keypoints_avg_dict.keys()) == set(keys), \
Expand All @@ -50,7 +52,7 @@ def test_ensemble():
f"Expected 3 models, got {len(keypoints_stack_dict)}"

# Run the ensemble function with avg_mode='mean' and var_mode='conf_weighted_var'
ensemble_preds, ensemble_vars, ensemble_stacks, keypoints_avg_dict, \
ensemble_preds, ensemble_vars, ensemble_likes, ensemble_stacks, keypoints_avg_dict, \
keypoints_var_dict, keypoints_stack_dict = ensemble(
markers_list, keys, avg_mode='mean', var_mode='conf_weighted_var',
)
Expand All @@ -60,9 +62,9 @@ def test_ensemble():
expected_mean = np.nanmean(stack, axis=1)
expected_variance = 2.0 * np.nanvar(stack, axis=1) # 2x since likelihoods all 0.5
assert np.allclose(keypoints_avg_dict[key], expected_mean), \
f"Expected {expected_mean} for {key}, got {keypoints_avg_dict[key]}"
f"Means expected {expected_mean} for {key}, got {keypoints_avg_dict[key]}"
assert np.allclose(keypoints_var_dict[key], expected_variance), \
f"Expected {expected_variance} for {key}, got {keypoints_var_dict[key]}"
f"Vars expected {expected_variance} for {key}, got {keypoints_var_dict[key]}"


def test_kalman_dot_basic():
Expand Down
Loading

0 comments on commit 01bd293

Please sign in to comment.