Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pearson correlation distance metric #2

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
include_package_data=True,
license="GNU General Public License v3.0",
packages=['xpysom'],
install_requires=['numpy'],
install_requires=['numpy', 'scipy', 'tqdm'],
extras_require={
'cuda90': ['cupy-cuda90'],
'cuda92': ['cupy-cuda92'],
Expand Down
8 changes: 8 additions & 0 deletions xpysom/distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ def euclidean_distance(x, w, w_sq=None, xp=default_xp):
)
)

def correlation_distance(x, w, xp=default_xp):
"""Calculate Pearson correlation distance

NB: result shape is (N,X*Y)
"""
return 1 - xp.corrcoef(x, w)[:x.shape[0], x.shape[0]:]

def cosine_distance(x, w, w_sq=None, xp=default_xp):
"""Calculate cosine distance

Expand Down Expand Up @@ -163,6 +170,7 @@ def __init__(self, name, kwargs, xp):
'euclidean_no_opt': euclidean_squared_distance,
'manhattan': manhattan_distance,
'manhattan_no_opt': manhattan_distance_no_opt,
'correlation': correlation_distance,
'cosine': cosine_distance,
'norm_p': norm_p_power_distance,
'norm_p_no_opt': norm_p_power_distance_generic,
Expand Down
7 changes: 7 additions & 0 deletions xpysom/test_distances.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import scipy.spatial.distance as distance

XPS = [np]
try:
Expand All @@ -12,6 +13,7 @@
euclidean_squared_distance_part,
euclidean_squared_distance,
euclidean_distance,
correlation_distance,
cosine_distance,
manhattan_distance,
norm_p_power_distance,
Expand Down Expand Up @@ -105,6 +107,11 @@ def get_inputs():
lambda vx, vy: np.linalg.norm(vx - vy),
{},
),
(
correlation_distance,
lambda vx, vy: distance.correlation(vx, vy),
{},
),
(
cosine_distance,
lambda vx, vy: 1 - np.nan_to_num(
Expand Down
14 changes: 13 additions & 1 deletion xpysom/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from minisom import MiniSom

from .xpysom import XPySom
from .distances import cosine_distance, manhattan_distance, euclidean_squared_distance
from .distances import correlation_distance, cosine_distance, manhattan_distance, euclidean_squared_distance
from .neighborhoods import gaussian_generic, gaussian_rect, mexican_hat_generic, mexican_hat_rect, bubble, triangle, prepare_neig_func

import pickle
Expand Down Expand Up @@ -158,6 +158,18 @@ def test_euclidean_distance(self):
ms_dist = self.minisom._euclidean_distance(sample, w)**2
np.testing.assert_array_almost_equal(ms_dist, cs_dist[i])

def test_correlation_distance(self):
x = np.random.rand(100, 20)
w = np.random.rand(10,10,20)
w_flat = w.reshape(-1, w.shape[2])
cs_dist = correlation_distance(self.xp.array(x), self.xp.array(w_flat), xp=self.xp)
np_dist = correlation_distance(np.array(x), np.array(w_flat), xp=np)
if self.xp.__name__ == 'cupy':
cs_dist = cp.asnumpy(cs_dist)
cs_dist = cs_dist.reshape((100,10,10))
np_dist = np_dist.reshape((100,10,10))
np.testing.assert_array_almost_equal(np_dist, cs_dist)

def test_cosine_distance(self):
x = np.random.rand(100, 20)
w = np.random.rand(10,10,20)
Expand Down
5 changes: 3 additions & 2 deletions xpysom/xpysom.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import os

import numpy as np
from tqdm import tqdm
try:
import cupy as cp
default_xp = cp
Expand Down Expand Up @@ -110,7 +111,7 @@ def __init__(self, x, y, input_len,

activation_distance : string, optional (default='euclidean')
Distance used to activate the map.
Possible values: 'euclidean', 'cosine', 'manhattan', 'norm_p'
Possible values: 'euclidean', 'correlation', 'cosine', 'manhattan', 'norm_p'

activation_distance_kwargs : dict, optional (default={})
Pass additional argumets to distance function.
Expand Down Expand Up @@ -447,7 +448,7 @@ def train(self, data, num_epochs, iter_beg=0, iter_end=None, verbose=False):
if verbose:
print_progress(-1, num_epochs*len(data))

for iteration in range(iter_beg, iter_end):
for iteration in tqdm(range(iter_beg, iter_end)):
try: # reuse already allocated memory
self._numerator_gpu.fill(0)
self._denominator_gpu.fill(0)
Expand Down