Skip to content

Commit

Permalink
add statistical test for correlation and regression coefficient
Browse files Browse the repository at this point in the history
  • Loading branch information
Yefee committed Oct 30, 2019
1 parent 58a5e2a commit ea32fe0
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 26 deletions.
141 changes: 120 additions & 21 deletions xMCA/core/xMCA.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,27 @@ def _svd(self, left, right):
self._V = V
self._s = s

def _double_t_test(self, t0, dof):

"""
Double tailed student t test.
**Arguments:**
*array*
t0 field `xarray.DataArray`;
dof `int`;
**Return:**
*array*
p: p values;
"""

from scipy.stats import t
from xarray.ufuncs import fabs
pvalue = 2 * t.sf(fabs(t0), dof) * xr.ones_like(t0)
return pvalue


def solver(self):
"""
Expand Down Expand Up @@ -400,7 +421,7 @@ def sigValues(self, n=10):
self._leftField.name + ' and ' + self._rightField.name +'.'})


def _correlationCalc(self, x, y, correlating=True):
def _correlationCalc(self, x, y, correlating=True, statistical_test=False):

"""
Calculate the correlations between two fields or time series.
Expand Down Expand Up @@ -433,10 +454,24 @@ def _correlationCalc(self, x, y, correlating=True):
else:
r = xy / xx ** 2

return r
if statistical_test:

from xarray.ufuncs import sqrt
if correlating:
r_coe = r
else:
r_coe = xy / xx / yy

dof = len(x[self._timeCoords.name]) - 2
t0 = r_coe * sqrt(dof / ((1-r_coe+1e-20)*(1+r_coe+1e-20)))
p = self._double_t_test(t0, dof)
else:
p = None

return r, p


def homogeneousPatterns(self, n=1, correlating=True):
def homogeneousPatterns(self, n=1, correlating=True, statistical_test=False):
"""
Sigular modes expressed as the
correlation between the expansion coefficient time series (PCs)
Expand All @@ -451,9 +486,15 @@ def homogeneousPatterns(self, n=1, correlating=True):
Expressed as correlation maps. Default is True. Otherwise, expressed as
regression coefficient maps.
*statistical_test*
Curry out the double tailed student-t test. Default is False. Otherwise, both left
and right HPs' p-values will be returned.
**Returns:**
*HPs*
Two `xarray.DataArray` containing the HPs.
*Ps*
Two `xarray.DataArray` containing the P-values.
**Examples:**
Initiate the instance::
Expand All @@ -472,33 +513,59 @@ def homogeneousPatterns(self, n=1, correlating=True):
le, re = self.expansionCoefs(n=n)

for f in ['l', 'r']:
r = []
r, pval = [], []
for i in range(n):
if f == 'l':
tmp = self._correlationCalc(self._leftField, le.sel(n=i))
tmp, p = self._correlationCalc(self._leftField, le.sel(n=i),
correlating=correlating,
statistical_test=statistical_test)
else:
tmp = self._correlationCalc(self._rightField, re.sel(n=i))
tmp, p = self._correlationCalc(self._rightField, re.sel(n=i),
correlating=correlating,
statistical_test=statistical_test)

tmp['n'] = i
r.append(tmp)
if p is not None:
p['n'] = i
pval.append(p)

if f == 'l':
lHP = xr.concat(r, dim='n')
# lHP.name = 'lHP'
if statistical_test:
pl = xr.concat(pval, dim='n')
else:
pl = None
else:
rHP = xr.concat(r, dim='n')
# rHP.name = 'rHP'
if statistical_test:
rl = xr.concat(pval, dim='n')
else:
rl = None


lHP.name = 'leftHomoPatterns'
lHP.attrs['long_name'] = 'Homogeneous Patterns for ' + self._leftField.name + '.'
rHP.name = 'rightHomoPatterns'
rHP.attrs['long_name'] = 'Homogeneous Patterns for ' + self._rightField.name + '.'

return lHP, rHP
if correlating:
lHP.attrs['long_name'] = 'Homogeneous Patterns (correlation coefficient) for ' + self._leftField.name + '.'
rHP.attrs['long_name'] = 'Homogeneous Patterns (correlation coefficient) for ' + self._rightField.name + '.'
else:
lHP.attrs['long_name'] = 'Homogeneous Patterns (regression coefficient) for ' + self._leftField.name + '.'
rHP.attrs['long_name'] = 'Homogeneous Patterns (regression coefficient) for ' + self._rightField.name + '.'

if pl is not None:
pl.name = 'leftPval'
rl.name = 'rightPval'
pl.attrs['long_name'] = 'p-value for ' + self._leftField.name + '.'
rl.attrs['long_name'] = 'p-value for ' + self._rightField.name + '.'
return lHP, rHP, pl, rl
else:
return lHP, rHP



def heterogeneousPatterns(self, n=1, correlating=True):
def heterogeneousPatterns(self, n=1, correlating=True, statistical_test=False):
"""
Sigular modes expressed as the
correlation between the expansion coefficient time series (PCs)
Expand All @@ -513,9 +580,16 @@ def heterogeneousPatterns(self, n=1, correlating=True):
Expressed as correlation maps. Default is True. Otherwise, expressed as
regression coefficient maps.
*statistical_test*
Curry out the double tailed student-t test. Default is False. Otherwise, both left
and right HPs' p-values will be returned.
**Returns:**
*HPs*
Two `xarray.DataArray` containing the HPs.
*Ps*
Two `xarray.DataArray` containing the P-values.
**Examples:**
Initiate the instance::
Expand All @@ -535,27 +609,52 @@ def heterogeneousPatterns(self, n=1, correlating=True):


for f in ['l', 'r']:
r = []
r, pval = [], []
for i in range(n):
if f == 'l':
tmp = self._correlationCalc(self._leftField, re.sel(n=i))
tmp, p = self._correlationCalc(self._leftField, re.sel(n=i),
correlating=correlating,
statistical_test=statistical_test)
else:
tmp = self._correlationCalc(self._rightField, le.sel(n=i))
tmp, p = self._correlationCalc(self._rightField, le.sel(n=i),
correlating=correlating,
statistical_test=statistical_test)

tmp['n'] = i
r.append(tmp)
if p is not None:
p['n'] = i
pval.append(p)


if f == 'l':
lHP = xr.concat(r, dim='n')
# lHP.name = 'lHP'
if statistical_test:
pl = xr.concat(pval, dim='n')
else:
pl = None
else:
rHP = xr.concat(r, dim='n')
# rHP.name = 'rHP'
if statistical_test:
rl = xr.concat(pval, dim='n')
else:
rl = None

lHP.name = 'leftHeteroPatterns'
lHP.attrs['long_name'] = 'Heterogeneous Patterns for ' + self._leftField.name + '.'
rHP.name = 'rightHeteroPatterns'
rHP.attrs['long_name'] = 'Heterogeneous Patterns for ' + self._rightField.name + '.'

return lHP, rHP

if correlating:
lHP.attrs['long_name'] = 'Heterogeneous Patterns (correlation coefficient) for ' + self._leftField.name + '.'
rHP.attrs['long_name'] = 'Heterogeneous Patterns (correlation coefficient) for ' + self._rightField.name + '.'
else:
lHP.attrs['long_name'] = 'Heterogeneous Patterns (regression coefficient) for ' + self._leftField.name + '.'
rHP.attrs['long_name'] = 'Heterogeneous Patterns (regression coefficient) for ' + self._rightField.name + '.'

if pl is not None:
pl.name = 'leftPval'
rl.name = 'rightPval'
pl.attrs['long_name'] = 'p-value for ' + self._leftField.name + '.'
rl.attrs['long_name'] = 'p-value for ' + self._rightField.name + '.'
return lHP, rHP, pl, rl
else:
return lHP, rHP
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
111 changes: 106 additions & 5 deletions xMCA/examples/example_svd.ipynb

Large diffs are not rendered by default.

0 comments on commit ea32fe0

Please sign in to comment.