From 48587e359cf49781d20faa1ddd5f5eb801d3dc62 Mon Sep 17 00:00:00 2001 From: Rafael Hautekiet Date: Sun, 21 Oct 2018 14:54:00 +0200 Subject: [PATCH 1/7] initial implementation of sparse sim matrix using support vectors --- tslearn/svm.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/tslearn/svm.py b/tslearn/svm.py index 2b216704f..803871a34 100644 --- a/tslearn/svm.py +++ b/tslearn/svm.py @@ -32,6 +32,30 @@ def _kernel_func_gak(sz, d, gamma): gamma = 1. return lambda x, y: cdist_gak(x.reshape((-1, sz, d)), y.reshape((-1, sz, d)), sigma=numpy.sqrt(gamma / 2.)) +def _sparse_kernel_func_gak(sz, d, gamma, slice_support_vectors=None): + if gamma == "auto": + gamma = 1. + + def sparse_gak(X, X_fit): + + if X is X_fit: + X = X.reshape((-1, sz, d)) + return cdist_gak(X, X, sigma=numpy.sqrt(gamma / 2.)) + + if slice_support_vectors is not None: + # slice out support vectors and only compute cross sim with them + sliced_X_fit = X_fit[slice_support_vectors] + gak_sim_dense = cdist_gak(X.reshape((-1, sz, d)), sliced_X_fit.reshape((-1, sz, d)), sigma=numpy.sqrt(gamma / 2.)) + + # act like nothing has happend ... + gak_sim = numpy.empty((len(X), len(X_fit))) + gak_sim[:, slice_support_vectors] = gak_sim_dense + + return gak_sim + + return cdist_gak(X.reshape((-1, sz, d)), X_fit.reshape((-1, sz, d)), sigma=numpy.sqrt(gamma / 2.)) + + return sparse_gak class TimeSeriesSVC(BaseSVC): """Time-series specific Support Vector Classifier. @@ -148,7 +172,7 @@ def __init__(self, sz, d, C=1.0, kernel="gak", degree=3, gamma="auto", coef0=0.0 self.sz = sz self.d = d if kernel == "gak": - kernel = _kernel_func_gak(sz=sz, d=d, gamma=gamma) + kernel = _sparse_kernel_func_gak(sz=sz, d=d, gamma=gamma) super(TimeSeriesSVC, self).__init__(C=C, kernel=kernel, degree=degree, gamma=gamma, coef0=coef0, shrinking=shrinking, probability=probability, tol=tol, cache_size=cache_size, class_weight=class_weight, verbose=verbose, @@ -169,8 +193,10 @@ def fit(self, X, y, sample_weight=None): sklearn_X = _prepare_ts_datasets_sklearn(X) if self.kernel == "gak" and self.gamma == "auto": self.gamma = gamma_soft_dtw(to_time_series_dataset(X)) - self.kernel = _kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma) - return super(TimeSeriesSVC, self).fit(sklearn_X, y, sample_weight=sample_weight) + self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma) + _self = super(TimeSeriesSVC, self).fit(sklearn_X, y, sample_weight=sample_weight) + self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma, slice_support_vectors=self.support_) + return _self def predict(self, X): sklearn_X = _prepare_ts_datasets_sklearn(X) From c86fef3e834d246f8d8928c3e366f432d07794f7 Mon Sep 17 00:00:00 2001 From: Rafael Hautekiet Date: Sun, 21 Oct 2018 15:15:25 +0200 Subject: [PATCH 2/7] dataset2 should be set to None to trigger self_similarity --- tslearn/svm.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tslearn/svm.py b/tslearn/svm.py index 803871a34..b489347a8 100644 --- a/tslearn/svm.py +++ b/tslearn/svm.py @@ -40,7 +40,7 @@ def sparse_gak(X, X_fit): if X is X_fit: X = X.reshape((-1, sz, d)) - return cdist_gak(X, X, sigma=numpy.sqrt(gamma / 2.)) + return cdist_gak(X, None, sigma=numpy.sqrt(gamma / 2.)) if slice_support_vectors is not None: # slice out support vectors and only compute cross sim with them @@ -173,6 +173,7 @@ def __init__(self, sz, d, C=1.0, kernel="gak", degree=3, gamma="auto", coef0=0.0 self.d = d if kernel == "gak": kernel = _sparse_kernel_func_gak(sz=sz, d=d, gamma=gamma) + # kernel = _kernel_func_gak(sz=sz, d=d, gamma=gamma) super(TimeSeriesSVC, self).__init__(C=C, kernel=kernel, degree=degree, gamma=gamma, coef0=coef0, shrinking=shrinking, probability=probability, tol=tol, cache_size=cache_size, class_weight=class_weight, verbose=verbose, @@ -193,8 +194,10 @@ def fit(self, X, y, sample_weight=None): sklearn_X = _prepare_ts_datasets_sklearn(X) if self.kernel == "gak" and self.gamma == "auto": self.gamma = gamma_soft_dtw(to_time_series_dataset(X)) + # self.kernel = _kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma) self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma) _self = super(TimeSeriesSVC, self).fit(sklearn_X, y, sample_weight=sample_weight) + # self.kernel = _kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma) self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma, slice_support_vectors=self.support_) return _self From 67b8bb3f0cac57decbc0f7a12631b0b072bb008e Mon Sep 17 00:00:00 2001 From: Rafael Hautekiet Date: Sun, 21 Oct 2018 15:17:08 +0200 Subject: [PATCH 3/7] remove comments --- tslearn/svm.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tslearn/svm.py b/tslearn/svm.py index b489347a8..0a7eaafa8 100644 --- a/tslearn/svm.py +++ b/tslearn/svm.py @@ -173,7 +173,6 @@ def __init__(self, sz, d, C=1.0, kernel="gak", degree=3, gamma="auto", coef0=0.0 self.d = d if kernel == "gak": kernel = _sparse_kernel_func_gak(sz=sz, d=d, gamma=gamma) - # kernel = _kernel_func_gak(sz=sz, d=d, gamma=gamma) super(TimeSeriesSVC, self).__init__(C=C, kernel=kernel, degree=degree, gamma=gamma, coef0=coef0, shrinking=shrinking, probability=probability, tol=tol, cache_size=cache_size, class_weight=class_weight, verbose=verbose, @@ -194,10 +193,8 @@ def fit(self, X, y, sample_weight=None): sklearn_X = _prepare_ts_datasets_sklearn(X) if self.kernel == "gak" and self.gamma == "auto": self.gamma = gamma_soft_dtw(to_time_series_dataset(X)) - # self.kernel = _kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma) self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma) _self = super(TimeSeriesSVC, self).fit(sklearn_X, y, sample_weight=sample_weight) - # self.kernel = _kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma) self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma, slice_support_vectors=self.support_) return _self From 6ea1847d68351e6136c4271a381c1dbd1d5c82e6 Mon Sep 17 00:00:00 2001 From: Rafael Hautekiet Date: Sun, 21 Oct 2018 15:33:16 +0200 Subject: [PATCH 4/7] modified comment --- tslearn/svm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tslearn/svm.py b/tslearn/svm.py index 0a7eaafa8..dbf6c25ef 100644 --- a/tslearn/svm.py +++ b/tslearn/svm.py @@ -43,7 +43,7 @@ def sparse_gak(X, X_fit): return cdist_gak(X, None, sigma=numpy.sqrt(gamma / 2.)) if slice_support_vectors is not None: - # slice out support vectors and only compute cross sim with them + # slice out support vectors sliced_X_fit = X_fit[slice_support_vectors] gak_sim_dense = cdist_gak(X.reshape((-1, sz, d)), sliced_X_fit.reshape((-1, sz, d)), sigma=numpy.sqrt(gamma / 2.)) From bb75442944701d8467cb9086023b25f0e19acb97 Mon Sep 17 00:00:00 2001 From: Rafael Hautekiet Date: Sun, 21 Oct 2018 17:39:43 +0200 Subject: [PATCH 5/7] Made statement more compact --- tslearn/svm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tslearn/svm.py b/tslearn/svm.py index dbf6c25ef..3d43c3118 100644 --- a/tslearn/svm.py +++ b/tslearn/svm.py @@ -39,8 +39,7 @@ def _sparse_kernel_func_gak(sz, d, gamma, slice_support_vectors=None): def sparse_gak(X, X_fit): if X is X_fit: - X = X.reshape((-1, sz, d)) - return cdist_gak(X, None, sigma=numpy.sqrt(gamma / 2.)) + return cdist_gak(X.reshape((-1, sz, d)), None, sigma=numpy.sqrt(gamma / 2.)) if slice_support_vectors is not None: # slice out support vectors From e8ca1565ce3551e746dcaf6cd9d95418743f52a7 Mon Sep 17 00:00:00 2001 From: Rafael Hautekiet Date: Sun, 21 Oct 2018 17:40:54 +0200 Subject: [PATCH 6/7] Changed to more logical order --- tslearn/svm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tslearn/svm.py b/tslearn/svm.py index 3d43c3118..3c2163447 100644 --- a/tslearn/svm.py +++ b/tslearn/svm.py @@ -38,7 +38,7 @@ def _sparse_kernel_func_gak(sz, d, gamma, slice_support_vectors=None): def sparse_gak(X, X_fit): - if X is X_fit: + if X_fit is X: return cdist_gak(X.reshape((-1, sz, d)), None, sigma=numpy.sqrt(gamma / 2.)) if slice_support_vectors is not None: From 0f2a5b0f0e36440946944423a8045e15ffea3e3d Mon Sep 17 00:00:00 2001 From: Rafael Hautekiet Date: Mon, 22 Oct 2018 15:44:17 +0200 Subject: [PATCH 7/7] Ommit introduction of new variable --- tslearn/svm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tslearn/svm.py b/tslearn/svm.py index 3c2163447..ef2b29d92 100644 --- a/tslearn/svm.py +++ b/tslearn/svm.py @@ -193,9 +193,9 @@ def fit(self, X, y, sample_weight=None): if self.kernel == "gak" and self.gamma == "auto": self.gamma = gamma_soft_dtw(to_time_series_dataset(X)) self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma) - _self = super(TimeSeriesSVC, self).fit(sklearn_X, y, sample_weight=sample_weight) + super(TimeSeriesSVC, self).fit(sklearn_X, y, sample_weight=sample_weight) self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma, slice_support_vectors=self.support_) - return _self + return self def predict(self, X): sklearn_X = _prepare_ts_datasets_sklearn(X)