From 1b8b3f605c0d2837253bf8469a50fe3753a01e09 Mon Sep 17 00:00:00 2001 From: Malte Kuehl Date: Mon, 10 Jul 2023 14:52:55 +0200 Subject: [PATCH 1/3] Add Pearson correlation distance metric to XPySom. --- xpysom/distances.py | 8 ++++++++ xpysom/test_distances.py | 10 ++++++++++ xpysom/tests.py | 14 +++++++++++++- xpysom/xpysom.py | 2 +- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/xpysom/distances.py b/xpysom/distances.py index 8f6bb56..a576719 100644 --- a/xpysom/distances.py +++ b/xpysom/distances.py @@ -41,6 +41,13 @@ def euclidean_distance(x, w, w_sq=None, xp=default_xp): ) ) +def correlation_distance(x, w, xp=default_xp): + """Calculate Pearson correlation distance + + NB: result shape is (N,X*Y) + """ + return 1 - xp.corrcoef(x, w)[:x.shape[0], x.shape[0]:] + def cosine_distance(x, w, w_sq=None, xp=default_xp): """Calculate cosine distance @@ -163,6 +170,7 @@ def __init__(self, name, kwargs, xp): 'euclidean_no_opt': euclidean_squared_distance, 'manhattan': manhattan_distance, 'manhattan_no_opt': manhattan_distance_no_opt, + 'correlation': correlation_distance, 'cosine': cosine_distance, 'norm_p': norm_p_power_distance, 'norm_p_no_opt': norm_p_power_distance_generic, diff --git a/xpysom/test_distances.py b/xpysom/test_distances.py index 542867f..55c78fb 100644 --- a/xpysom/test_distances.py +++ b/xpysom/test_distances.py @@ -12,6 +12,7 @@ euclidean_squared_distance_part, euclidean_squared_distance, euclidean_distance, + correlation_distance, cosine_distance, manhattan_distance, norm_p_power_distance, @@ -105,6 +106,15 @@ def get_inputs(): lambda vx, vy: np.linalg.norm(vx - vy), {}, ), + ( + correlation_distance, + lambda vx, vy: 1 - np.nan_to_num( + np.dot(vx - np.mean(vx), vy - np.mean(vy)) / ( + np.linalg.norm(vx - np.mean(vx)) * np.linalg.norm(vy - np.mean(vy)) + ) + ), + {}, + ), ( cosine_distance, lambda vx, vy: 1 - np.nan_to_num( diff --git a/xpysom/tests.py b/xpysom/tests.py index c1dc80b..875b122 100644 --- a/xpysom/tests.py +++ b/xpysom/tests.py @@ -11,7 +11,7 @@ from minisom import MiniSom from .xpysom import XPySom -from .distances import cosine_distance, manhattan_distance, euclidean_squared_distance +from .distances import correlation_distance, cosine_distance, manhattan_distance, euclidean_squared_distance from .neighborhoods import gaussian_generic, gaussian_rect, mexican_hat_generic, mexican_hat_rect, bubble, triangle, prepare_neig_func import pickle @@ -158,6 +158,18 @@ def test_euclidean_distance(self): ms_dist = self.minisom._euclidean_distance(sample, w)**2 np.testing.assert_array_almost_equal(ms_dist, cs_dist[i]) + def test_correlation_distance(self): + x = np.random.rand(100, 20) + w = np.random.rand(10,10,20) + w_flat = w.reshape(-1, w.shape[2]) + cs_dist = correlation_distance(self.xp.array(x), self.xp.array(w_flat), xp=self.xp) + np_dist = correlation_distance(np.array(x), np.array(w_flat), xp=np) + if self.xp.__name__ == 'cupy': + cs_dist = cp.asnumpy(cs_dist) + cs_dist = cs_dist.reshape((100,10,10)) + np_dist = np_dist.reshape((100,10,10)) + np.testing.assert_array_almost_equal(np_dist, cs_dist) + def test_cosine_distance(self): x = np.random.rand(100, 20) w = np.random.rand(10,10,20) diff --git a/xpysom/xpysom.py b/xpysom/xpysom.py index e4bbc99..aad1bca 100644 --- a/xpysom/xpysom.py +++ b/xpysom/xpysom.py @@ -110,7 +110,7 @@ def __init__(self, x, y, input_len, activation_distance : string, optional (default='euclidean') Distance used to activate the map. - Possible values: 'euclidean', 'cosine', 'manhattan', 'norm_p' + Possible values: 'euclidean', 'correlation', 'cosine', 'manhattan', 'norm_p' activation_distance_kwargs : dict, optional (default={}) Pass additional argumets to distance function. From 2a02c7c41d70791ed909421299ed7aa5f0b60941 Mon Sep 17 00:00:00 2001 From: Malte Benedikt Kuehl Date: Mon, 10 Jul 2023 15:18:16 +0200 Subject: [PATCH 2/3] Use scipy correlation distance as test metric to compare Pearson correlation against. All tests now passing. --- setup.py | 2 +- xpysom/test_distances.py | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index 5c6d0bc..ef525ab 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ include_package_data=True, license="GNU General Public License v3.0", packages=['xpysom'], - install_requires=['numpy'], + install_requires=['numpy', 'scipy'], extras_require={ 'cuda90': ['cupy-cuda90'], 'cuda92': ['cupy-cuda92'], diff --git a/xpysom/test_distances.py b/xpysom/test_distances.py index 55c78fb..551b8f1 100644 --- a/xpysom/test_distances.py +++ b/xpysom/test_distances.py @@ -1,4 +1,5 @@ import numpy as np +import scipy.spatial.distance as distance XPS = [np] try: @@ -108,11 +109,7 @@ def get_inputs(): ), ( correlation_distance, - lambda vx, vy: 1 - np.nan_to_num( - np.dot(vx - np.mean(vx), vy - np.mean(vy)) / ( - np.linalg.norm(vx - np.mean(vx)) * np.linalg.norm(vy - np.mean(vy)) - ) - ), + lambda vx, vy: distance.correlation(vx, vy), {}, ), ( From f445628b4eb55ee925d21ab00b5c736738a38813 Mon Sep 17 00:00:00 2001 From: Malte Kuehl Date: Wed, 20 Sep 2023 16:29:07 +0200 Subject: [PATCH 3/3] Add tqdm to fit routine. --- setup.py | 2 +- xpysom/xpysom.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index ef525ab..9dcf706 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ include_package_data=True, license="GNU General Public License v3.0", packages=['xpysom'], - install_requires=['numpy', 'scipy'], + install_requires=['numpy', 'scipy', 'tqdm'], extras_require={ 'cuda90': ['cupy-cuda90'], 'cuda92': ['cupy-cuda92'], diff --git a/xpysom/xpysom.py b/xpysom/xpysom.py index aad1bca..d599b9a 100644 --- a/xpysom/xpysom.py +++ b/xpysom/xpysom.py @@ -10,6 +10,7 @@ import os import numpy as np +from tqdm import tqdm try: import cupy as cp default_xp = cp @@ -447,7 +448,7 @@ def train(self, data, num_epochs, iter_beg=0, iter_end=None, verbose=False): if verbose: print_progress(-1, num_epochs*len(data)) - for iteration in range(iter_beg, iter_end): + for iteration in tqdm(range(iter_beg, iter_end)): try: # reuse already allocated memory self._numerator_gpu.fill(0) self._denominator_gpu.fill(0)