Skip to content

Commit

Permalink
Merge branch 'release/0.3'
Browse files Browse the repository at this point in the history
Add distance correlation t test for independence in high dimension
  • Loading branch information
vnmabus committed Jun 6, 2019
2 parents b0ff127 + 552205a commit 2c765a6
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 8 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2
0.3
2 changes: 1 addition & 1 deletion dcor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import errno as _errno
import os as _os

from . import distances # noqa
from . import homogeneity # noqa
from . import independence # noqa
from ._dcor import (distance_covariance_sqr, distance_covariance, # noqa
Expand All @@ -28,7 +29,6 @@
from ._partial_dcor import (partial_distance_covariance, # noqa
partial_distance_correlation)


try:
with open(_os.path.join(_os.path.dirname(__file__),
'..', 'VERSION'), 'r') as version_file:
Expand Down
117 changes: 117 additions & 0 deletions dcor/independence.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from . import _dcor_internals
from . import _hypothesis
from ._utils import _random_state_init, _transform_to_2d
from ._dcor import u_distance_correlation_sqr
import numpy as np
import scipy.stats


def _distance_covariance_test_imp(x, y,
Expand Down Expand Up @@ -241,3 +244,117 @@ def partial_distance_covariance_test(x, y, z, **kwargs):
"""
return _partial_distance_covariance_test_imp(x, y, z, **kwargs)


def distance_correlation_t_statistic(x, y):
"""
Transformation of the bias corrected version of distance correlation used
in :func:`distance_correlation_t_test`.
Parameters
----------
x: array_like
First random vector. The columns correspond with the individual random
variables while the rows are individual instances of the random vector.
y: array_like
Second random vector. The columns correspond with the individual random
variables while the rows are individual instances of the random vector.
Returns
-------
numpy scalar
T statistic.
See Also
--------
distance_correlation_t_test
Examples
--------
>>> import numpy as np
>>> import dcor
>>> a = np.array([[1, 2, 3, 4],
... [5, 6, 7, 8],
... [9, 10, 11, 12],
... [13, 14, 15, 16]])
>>> b = np.array([[1, 0, 0, 1],
... [0, 1, 1, 1],
... [1, 1, 1, 1],
... [1, 1, 0, 1]])
>>> with np.errstate(divide='ignore'):
... dcor.independence.distance_correlation_t_statistic(a, a)
inf
>>> dcor.independence.distance_correlation_t_statistic(a, b)
... # doctest: +ELLIPSIS
-0.4430164...
>>> with np.errstate(divide='ignore'):
... dcor.independence.distance_correlation_t_statistic(b, b)
inf
"""
bcdcor = u_distance_correlation_sqr(x, y)

n = x.shape[0]
v = n * (n-3) / 2

return np.sqrt(v - 1) * bcdcor / np.sqrt(1 - bcdcor**2)


def distance_correlation_t_test(x, y):
"""
Test of independence for high dimension based on convergence to a Student t
distribution. The null hypothesis is that the two random vectors are
independent.
Parameters
----------
x: array_like
First random vector. The columns correspond with the individual random
variables while the rows are individual instances of the random vector.
y: array_like
Second random vector. The columns correspond with the individual random
variables while the rows are individual instances of the random vector.
Returns
-------
HypothesisTest
Results of the hypothesis test.
See Also
--------
distance_correlation_t_statistic
Examples
--------
>>> import numpy as np
>>> import dcor
>>> a = np.array([[1, 2, 3, 4],
... [5, 6, 7, 8],
... [9, 10, 11, 12],
... [13, 14, 15, 16]])
>>> b = np.array([[1, 0, 0, 1],
... [0, 1, 1, 1],
... [1, 1, 1, 1],
... [1, 1, 0, 1]])
>>> with np.errstate(divide='ignore'):
... dcor.independence.distance_correlation_t_test(a, a)
... # doctest: +ELLIPSIS
HypothesisTest(p_value=0.0, statistic=inf)
>>> dcor.independence.distance_correlation_t_test(a, b)
... # doctest: +ELLIPSIS
HypothesisTest(p_value=0.6327451..., statistic=-0.4430164...)
>>> with np.errstate(divide='ignore'):
... dcor.independence.distance_correlation_t_test(b, b)
... # doctest: +ELLIPSIS
HypothesisTest(p_value=0.0, statistic=inf)
"""
t_test = distance_correlation_t_statistic(x, y)

n = x.shape[0]
v = n * (n-3) / 2
df = v - 1

p_value = 1 - scipy.stats.t.cdf(t_test, df=df)

return _hypothesis.HypothesisTest(p_value=p_value, statistic=t_test)
3 changes: 3 additions & 0 deletions docs/_static/css/wide.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.wy-nav-content {
max-width: 100% !important;
}
2 changes: 2 additions & 0 deletions docs/apilist.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ The following functions are used to test if two random vectors are independent.
:toctree: functions

dcor.independence.distance_covariance_test
dcor.independence.distance_correlation_t_statistic
dcor.independence.distance_correlation_t_test

Internal computations
^^^^^^^^^^^^^^^^^^^^^
Expand Down
5 changes: 5 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@
#
html_theme = 'sphinx_rtd_theme'


def setup(app):
app.add_stylesheet('css/wide.css')


# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
Expand Down
21 changes: 19 additions & 2 deletions docs/energycomparison.rst
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ Table of energy-dcor equivalents
DX <- as.matrix(dx)
DY <- as.matrix(dy)

dcovU_stats(x, y)
dcovU_stats(DX, DY)

- .. code-block:: python

Expand Down Expand Up @@ -303,4 +303,21 @@ Table of energy-dcor equivalents
num_resamples=10)

-

* - .. code-block:: R

dcor.t(x, y)

- .. code-block:: python

dcor.independence.distance_correlation_t_statistic(x, y)

-
* - .. code-block:: R

dcor.ttest(x, y)

- .. code-block:: python

dcor.independence.distance_correlation_t_test(x, y)

-
8 changes: 4 additions & 4 deletions docs/theory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ and :math:`(B_{i, j})_{i,j=1}^n`

.. math::
A_{i, j} &= a_{i,j} - \frac{1}{n} \sum_{l=1}^n a_{il} - \frac{1}{n}
\sum_{k=1}^n a_{kj} + \frac{1}{n^2}\sum_{k=1}^n a_{kj}, \\
\sum_{k=1}^n a_{kj} + \frac{1}{n^2}\sum_{k,l=1}^n a_{kl}, \\
B_{i, j} &= b_{i,j} - \frac{1}{n} \sum_{l=1}^n b_{il} - \frac{1}{n}
\sum_{k=1}^n b_{kj} + \frac{1}{n^2}\sum_{k=1}^n b_{kj}.
\sum_{k=1}^n b_{kj} + \frac{1}{n^2}\sum_{k,l=1}^n b_{kl}.
Then

Expand Down Expand Up @@ -111,11 +111,11 @@ matrices :math:`(\widetilde{A}_{i, j})_{i,j=1}^n` and :math:`(\widetilde{B}_{i,
:label: ucentering
\widetilde{A}_{i, j} &= \begin{cases} a_{i,j} - \frac{1}{n-2} \sum_{l=1}^n a_{il} -
\frac{1}{n-2} \sum_{k=1}^n a_{kj} + \frac{1}{(n-1)(n-2)}\sum_{k=1}^n a_{kj}, &\text{if } i \neq j, \\
\frac{1}{n-2} \sum_{k=1}^n a_{kj} + \frac{1}{(n-1)(n-2)}\sum_{k,l=1}^n a_{kl}, &\text{if } i \neq j, \\
0, &\text{if } i = j,
\end{cases} \\
\widetilde{B}_{i, j} &= \begin{cases} b_{i,j} - \frac{1}{n-2} \sum_{l=1}^n b_{il} -
\frac{1}{n-2} \sum_{k=1}^n b_{kj} + \frac{1}{(n-1)(n-2)}\sum_{k=1}^n b_{kj}, &\text{if } i \neq j, \\
\frac{1}{n-2} \sum_{k=1}^n b_{kj} + \frac{1}{(n-1)(n-2)}\sum_{k,l=1}^n b_{kl}, &\text{if } i \neq j, \\
0, &\text{if } i = j.
\end{cases}
Expand Down

0 comments on commit 2c765a6

Please sign in to comment.