From 1b8b3f605c0d2837253bf8469a50fe3753a01e09 Mon Sep 17 00:00:00 2001
From: Malte Kuehl <malte.benedikt.kuehl@studium.uni-hamburg.de>
Date: Mon, 10 Jul 2023 14:52:55 +0200
Subject: [PATCH 1/3] Add Pearson correlation distance metric to XPySom.

---
 xpysom/distances.py      |  8 ++++++++
 xpysom/test_distances.py | 10 ++++++++++
 xpysom/tests.py          | 14 +++++++++++++-
 xpysom/xpysom.py         |  2 +-
 4 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/xpysom/distances.py b/xpysom/distances.py
index 8f6bb56..a576719 100644
--- a/xpysom/distances.py
+++ b/xpysom/distances.py
@@ -41,6 +41,13 @@ def euclidean_distance(x, w, w_sq=None, xp=default_xp):
         )
     )
 
+def correlation_distance(x, w, xp=default_xp):
+    """Calculate Pearson correlation distance
+
+    NB: result shape is (N,X*Y)
+    """
+    return 1 - xp.corrcoef(x, w)[:x.shape[0], x.shape[0]:]
+
 def cosine_distance(x, w, w_sq=None, xp=default_xp):
     """Calculate cosine distance
 
@@ -163,6 +170,7 @@ def __init__(self, name, kwargs, xp):
             'euclidean_no_opt': euclidean_squared_distance,
             'manhattan': manhattan_distance,
             'manhattan_no_opt': manhattan_distance_no_opt,
+            'correlation': correlation_distance,
             'cosine': cosine_distance,
             'norm_p': norm_p_power_distance,
             'norm_p_no_opt': norm_p_power_distance_generic,
diff --git a/xpysom/test_distances.py b/xpysom/test_distances.py
index 542867f..55c78fb 100644
--- a/xpysom/test_distances.py
+++ b/xpysom/test_distances.py
@@ -12,6 +12,7 @@
     euclidean_squared_distance_part,
     euclidean_squared_distance,
     euclidean_distance,
+    correlation_distance,
     cosine_distance,
     manhattan_distance,
     norm_p_power_distance,
@@ -105,6 +106,15 @@ def get_inputs():
         lambda vx, vy: np.linalg.norm(vx - vy),
         {},
     ),
+    (
+        correlation_distance,
+        lambda vx, vy: 1 - np.nan_to_num(
+            np.dot(vx - np.mean(vx), vy - np.mean(vy)) / (
+                np.linalg.norm(vx - np.mean(vx)) * np.linalg.norm(vy - np.mean(vy))
+            )
+        ),
+        {},
+    ),
     (
         cosine_distance,
         lambda vx, vy: 1 - np.nan_to_num(
diff --git a/xpysom/tests.py b/xpysom/tests.py
index c1dc80b..875b122 100644
--- a/xpysom/tests.py
+++ b/xpysom/tests.py
@@ -11,7 +11,7 @@
 from minisom import MiniSom
 
 from .xpysom import XPySom
-from .distances import cosine_distance, manhattan_distance, euclidean_squared_distance
+from .distances import correlation_distance, cosine_distance, manhattan_distance, euclidean_squared_distance
 from .neighborhoods import gaussian_generic, gaussian_rect, mexican_hat_generic, mexican_hat_rect, bubble, triangle, prepare_neig_func
 
 import pickle
@@ -158,6 +158,18 @@ def test_euclidean_distance(self):
             ms_dist = self.minisom._euclidean_distance(sample, w)**2
             np.testing.assert_array_almost_equal(ms_dist, cs_dist[i])
 
+    def test_correlation_distance(self):
+        x = np.random.rand(100, 20)
+        w = np.random.rand(10,10,20)
+        w_flat = w.reshape(-1, w.shape[2])
+        cs_dist = correlation_distance(self.xp.array(x), self.xp.array(w_flat), xp=self.xp)
+        np_dist = correlation_distance(np.array(x), np.array(w_flat), xp=np)
+        if self.xp.__name__ == 'cupy':
+            cs_dist = cp.asnumpy(cs_dist)
+        cs_dist = cs_dist.reshape((100,10,10))
+        np_dist = np_dist.reshape((100,10,10))
+        np.testing.assert_array_almost_equal(np_dist, cs_dist)
+
     def test_cosine_distance(self):
         x = np.random.rand(100, 20)
         w = np.random.rand(10,10,20)
diff --git a/xpysom/xpysom.py b/xpysom/xpysom.py
index e4bbc99..aad1bca 100644
--- a/xpysom/xpysom.py
+++ b/xpysom/xpysom.py
@@ -110,7 +110,7 @@ def __init__(self, x, y, input_len,
 
         activation_distance : string, optional (default='euclidean')
             Distance used to activate the map.
-            Possible values: 'euclidean', 'cosine', 'manhattan', 'norm_p'
+            Possible values: 'euclidean', 'correlation', 'cosine', 'manhattan', 'norm_p'
 
         activation_distance_kwargs : dict, optional (default={})
             Pass additional argumets to distance function.

From 2a02c7c41d70791ed909421299ed7aa5f0b60941 Mon Sep 17 00:00:00 2001
From: Malte Benedikt Kuehl <malte.kuehl@clin.au.dk>
Date: Mon, 10 Jul 2023 15:18:16 +0200
Subject: [PATCH 2/3] Use scipy correlation distance as test metric to compare
 Pearson correlation against. All tests now passing.

---
 setup.py                 | 2 +-
 xpysom/test_distances.py | 7 ++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/setup.py b/setup.py
index 5c6d0bc..ef525ab 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
       include_package_data=True,
       license="GNU General Public License v3.0",
       packages=['xpysom'],
-      install_requires=['numpy'],
+      install_requires=['numpy', 'scipy'],
       extras_require={
             'cuda90': ['cupy-cuda90'],
             'cuda92': ['cupy-cuda92'],
diff --git a/xpysom/test_distances.py b/xpysom/test_distances.py
index 55c78fb..551b8f1 100644
--- a/xpysom/test_distances.py
+++ b/xpysom/test_distances.py
@@ -1,4 +1,5 @@
 import numpy as np
+import scipy.spatial.distance as distance
 
 XPS = [np]
 try:
@@ -108,11 +109,7 @@ def get_inputs():
     ),
     (
         correlation_distance,
-        lambda vx, vy: 1 - np.nan_to_num(
-            np.dot(vx - np.mean(vx), vy - np.mean(vy)) / (
-                np.linalg.norm(vx - np.mean(vx)) * np.linalg.norm(vy - np.mean(vy))
-            )
-        ),
+        lambda vx, vy: distance.correlation(vx, vy),
         {},
     ),
     (

From f445628b4eb55ee925d21ab00b5c736738a38813 Mon Sep 17 00:00:00 2001
From: Malte Kuehl <malte.benedikt.kuehl@studium.uni-hamburg.de>
Date: Wed, 20 Sep 2023 16:29:07 +0200
Subject: [PATCH 3/3] Add tqdm to fit routine.

---
 setup.py         | 2 +-
 xpysom/xpysom.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index ef525ab..9dcf706 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
       include_package_data=True,
       license="GNU General Public License v3.0",
       packages=['xpysom'],
-      install_requires=['numpy', 'scipy'],
+      install_requires=['numpy', 'scipy', 'tqdm'],
       extras_require={
             'cuda90': ['cupy-cuda90'],
             'cuda92': ['cupy-cuda92'],
diff --git a/xpysom/xpysom.py b/xpysom/xpysom.py
index aad1bca..d599b9a 100644
--- a/xpysom/xpysom.py
+++ b/xpysom/xpysom.py
@@ -10,6 +10,7 @@
 import os
 
 import numpy as np
+from tqdm import tqdm
 try:
     import cupy as cp
     default_xp = cp
@@ -447,7 +448,7 @@ def train(self, data, num_epochs, iter_beg=0, iter_end=None, verbose=False):
         if verbose:
             print_progress(-1, num_epochs*len(data))
 
-        for iteration in range(iter_beg, iter_end):
+        for iteration in tqdm(range(iter_beg, iter_end)):
             try: # reuse already allocated memory
                 self._numerator_gpu.fill(0)
                 self._denominator_gpu.fill(0)