-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcka.py
116 lines (110 loc) · 3.89 KB
/
cka.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# centered kernel alignment
# CVU 2019
import hoggorm as ho
import numpy as np
from sklearn.metrics.pairwise import linear_kernel
def center_kernel(K, copy=True):
'''
Centered version of a kernel matrix (corresponding to centering the)
implicit feature map.
'''
means = K.mean(axis=0)
if copy:
K = K - means[None, :]
else:
K -= means[None, :]
K -= means[:, None]
K += means.mean()
return K
def align(X, Y):
'''
Applies pairwise linear kernel to X and Y
to generate K1 and K2.
Centers each of these kernels.
Returns the kernel alignment
<K1, K2>_F / (||K1||_F ||K2||_F)
defined by
Cristianini, Shawe-Taylor, Elisseeff, and Kandola (2001).
On Kernel-Target Alignment. NIPS.
Note that the centered kernel alignment of
Cortes, Mohri, and Rostamizadeh (2012).
Algorithms for Learning Kernels Based on Centered Alignment. JMLR 13.
is just this applied to center_kernel()s.
'''
K1 = center_kernel(linear_kernel(X, X))
K2 = center_kernel(inear_kernel(Y, Y))
return np.sum(K1 * K2) / np.linalg.norm(K1) / np.linalg.norm(K2)
def cka(dataList1, dataList2):
"""
This function computes the RV matrix correlation coefficients between pairs
of arrays. The number and order of objects (rows) for the two arrays must
match. The number of variables in each array may vary.
Reference: `The STATIS method`_
.. _The STATIS method: https://www.utdallas.edu/~herve/Abdi-Statis2007-pretty.pdf
PARAMETERS
----------
dataList : list
A list holding numpy arrays for which the RV coefficient will be computed.
RETURNS
-------
numpy array
A numpy array holding RV coefficients for pairs of numpy arrays. The
diagonal in the result array holds ones, since RV is computed on
identical arrays, i.e. first array in ``dataList`` against frist array
in
Examples
--------
>>> import hoggorm as ho
>>> import numpy as np
>>>
>>> # Generate some random data. Note that number of rows must match across arrays
>>> arr1 = np.random.rand(50, 100)
>>> arr2 = np.random.rand(50, 20)
>>> arr3 = np.random.rand(50, 500)
>>>
>>> # Center the data before computation of RV coefficients
>>> arr1_cent = arr1 - np.mean(arr1, axis=0)
>>> arr2_cent = arr2 - np.mean(arr2, axis=0)
>>> arr3_cent = arr3 - np.mean(arr3, axis=0)
>>>
>>> # Compute RV matrix correlation coefficients on mean centered data
>>> rv_results = ho.RVcoeff([arr1_cent, arr2_cent, arr3_cent])
>>> array([[ 1. , 0.41751839, 0.77769025],
[ 0.41751839, 1. , 0.51194496],
[ 0.77769025, 0.51194496, 1. ]])
>>>
>>> # Get RV for arr1_cent and arr2_cent
>>> rv_results[0, 1]
0.41751838661314689
>>>
>>> # or
>>> rv_results[1, 0]
0.41751838661314689
>>>
>>> # Get RV for arr2_cent and arr3_cent
>>> rv_results[1, 2]
0.51194496245209853
>>>
>>> # or
>>> rv_results[2, 1]
0.51194496245209853
"""
# First compute the scalar product matrices for each data set X
scalArrList = []
for arr in dataList:
# center the data
arr -= np.mean(arr, axis=0)
scalArr = np.dot(arr, np.transpose(arr))
scalArrList.append(scalArr)
# Now compute the 'between study cosine matrix' C
C = np.zeros((len(dataList), len(dataList)), float)
for index, element in np.ndenumerate(C):
nom = np.trace(np.dot(np.transpose(scalArrList[index[0]]),
scalArrList[index[1]]))
denom1 = np.trace(np.dot(np.transpose(scalArrList[index[0]]),
scalArrList[index[0]]))
denom2 = np.trace(np.dot(np.transpose(scalArrList[index[1]]),
scalArrList[index[1]]))
Rv = nom / np.sqrt(np.dot(denom1, denom2))
C[index[0], index[1]] = Rv
return C