-
Notifications
You must be signed in to change notification settings - Fork 1
/
correlation_finder.py
51 lines (37 loc) · 1.35 KB
/
correlation_finder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from stat_helper import *
from hdf_helper import *
import pandas as pd
import numpy as np
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import SVR
import h5py
from scipy.stats import pearsonr,spearmanr
pearson_trshld = 0.95
spearman_trshld = 0.95
'''
Finds the correlation between two time series across 3 different metrics
'''
def get_correlation(data1, data2):
# Linear Correlation
pearson_corr, _ = pearsonr(data1, data2)
# Multivariate Correlation
spearman_corr, _ = spearmanr(data1, data2)
# Covaraiance
covariance = np.cov(data1, data2)
return pearson_corr, spearman_corr, covariance
'''
From a dataframe of time series, find all of the channel pairs that have
a covariance higher than the threshold
'''
def get_related_channels(df):
related_channels = []
for col in df.columns:
for subcol in df.columns:
if col != subcol:
pearson, spearman, covariance = get_correlation(df[col],df[subcol])
if (abs(pearson) > pearson_trshld and abs(spearman) > spearman_trshld):
related_channels.append([col,subcol])
# print("Channels: ", col, subcol)
# print(pearson, spearman)
# print("Correlation: ", covariance[1][0], '\n')
return related_channels