Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TEnt of discrete variable timeseries. Added example. #47

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 123 additions & 0 deletions examples/transfer_entropy_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import mdentropy
from mdentropy.core.information import ncmutinf
import numpy as np
import multiprocessing as mp
from functools import partial

"""
In this example, we demonstrate an application of the MDEntropy API to
calculate the transfer entropy between every possible pair of features
between two lists of timeseries, each with the same numbers of frames
but possibly different numbers of features.
"""


"""
Called by compute_tentropy_between. Computes transfer entropy between
all pairs of features between timeseries_i[t_id] and timeseries_j[t_id] at
lag_time.
"""
def compute_tentropy_between_singletraj(timeseries_tuple, lag_time):
t_i, t_j = timeseries_tuple
tentropy_pairs_single = []

t_i = t_i[::lag_time]
t_j = t_j[::lag_time]

k = 0

for i in range(0, t_i.shape[1]):
print("Examining, within timeseries, variable %d out of %d" %(i, t_i.shape[1]))
for j in range(0, t_j.shape[1]):
x = t_i[1:,i].reshape((-1,1))
y = t_j[:-1,j].reshape((-1,1))
z = t_i[:-1,i].reshape((-1,1))
n_frames = x.shape[0]
n_bins = None
tent = np.nan_to_num(ncmutinf(n_bins, x, y, z, method='grassberger'))

tentropy_pairs_single.append(tent * n_frames)

k += 1

return np.array(tentropy_pairs_single)

"""
Compute Transfer Entropy between all pairs of features
between two separate lists of featurized timeseries data.
----------
Parameters:

timeseries_tuples: list of tuples of two numpy arrays.
Each tuple contains two timeseries (each of type np.array),
each with the same number of rows/frames but possibly different numbers of
columns/features.
lag_time: int
Lag time to be used in computation of Transfer Entropy
titles_i: list of str
If your features have names (e.g., "Arg325" or "Temp. in Kansas"),
you can optionally include them here for timeseries_i list.
titles_j: list of str
If your features have names (e.g., "Arg325" or "Temp. in Kansas"),
you can optionally include them here for timeseries_j list.
worker_pool: object from iPyParallel module
parallel: bool. If True, will use multiprocessing module
to map over all trajectories in parallel.

Returns:
tentropy_array: Numpy array of shape (len(timeseries_i) * len(timeseries_j)), containing
the transfer entropy between each possible pair of features.
tentropy_pairs_id_tuples: List of tuples of ints.
In same order as entries of tentropy_array.
Each tuple contains two ints describing the ids of the features for which
tentropy was calculated.
tentropy_pairs_names: List of tuples of strings.

Caveats:
Implemented weighted mean is not optimal and may not be numerically stable.
This is an area for improvement.
"""

def compute_tentropy_between(timeseries_tuples, lag_time,
titles_i=None, titles_j=None, worker_pool=None,
parallel=False):
total_frames = 0.
n_tent_pairs = timeseries_tuples[0][0].shape[1] * timeseries_tuples[0][1].shape[1]

tentropy_array = np.zeros(n_tent_pairs)
tentropy_pairs_names = []
tentropy_pairs_id_tuples = []

compute_tentropy_between_singletraj_partial = partial(compute_tentropy_between_singletraj,
lag_time=lag_time)

if worker_pool is not None:
tentropy_pairs = worker_pool.map_sync(compute_tentropy_between_singletraj_partial, timeseries_tuples)
elif parallel:
pool = mp.Pool(mp.cpu_count())
tentropy_pairs = pool.map(compute_tentropy_between_singletraj_partial, timeseries_tuples)
pool.terminate()
else:
tentropy_pairs = []
for timeseries_tuple in timeseries_tuples:
tentropy_pairs.append(compute_tentropy_between_singletraj_partial(timeseries_tuple))

for arr in tentropy_pairs:
tentropy_array += arr


for t_id, timeseries_tuple in enumerate(timeseries_tuples):
t_i, t_j = timeseries_tuple
t_i = t_i[::lag_time]
t_j = t_j[::lag_time]
total_frames += (t_i.shape[0] - 1)
if t_id == 0:
for i in range(0, t_i.shape[1]):
for j in range(0, t_j.shape[1]):
if titles_i is not None:
tentropy_pairs_names.append((titles_i[i], titles_j[j]))
tentropy_pairs_id_tuples.append((i, j))

tentropy_array /= float(total_frames)

return tentropy_array, tentropy_pairs_id_tuples, tentropy_pairs_names
12 changes: 8 additions & 4 deletions mdentropy/core/entropy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

from numpy import ndarray
from numpy import sum as npsum
from numpy import (atleast_2d, arange, bincount, diff, finfo, float32,
from numpy import (atleast_2d, arange, array, bincount, diff, finfo, float32,
hsplit, linspace, log, log2, meshgrid, nan_to_num, nansum,
product, random, ravel, vstack, exp)
product, random, ravel, vstack, exp, bincount, issubdtype, integer)

from scipy.spatial import cKDTree
from scipy.stats import entropy as naive
Expand Down Expand Up @@ -57,7 +57,11 @@ def entropy(n_bins, rng, method, *args):
if method == 'kde':
return kde_entropy(rng, *args, grid_size=n_bins or 20)

counts = symbolic(n_bins, rng, *args)
if n_bins is not None or issubdtype(args[0].dtype, float):
counts = symbolic(n_bins, rng, *args)
else:
minlength = max([len(bincount(arg)) for arg in args])
counts = array([bincount(arg, minlength=minlength) for arg in args])

if method == 'chaowangjost':
return chaowangjost(counts)
Expand All @@ -67,7 +71,7 @@ def entropy(n_bins, rng, method, *args):
return naive(counts)


def centropy(n_bins, x, y, rng=None, method='grassberger'):
def centropy(n_bins, x, y, rng=None, method='knn'):
"""Condtional entropy calculation

Parameters
Expand Down
21 changes: 16 additions & 5 deletions mdentropy/core/information.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from .entropy import entropy, centropy
from ..utils import avgdigamma

from numpy import (atleast_2d, diff, finfo, float32, log, nan_to_num, random,
from numpy import (atleast_2d, diff, finfo, float32, integer, issubdtype,
log, nan_to_num, random,
sqrt, hstack)

from scipy.spatial import cKDTree
Expand All @@ -11,7 +12,7 @@
EPS = finfo(float32).eps


def mutinf(n_bins, x, y, rng=None, method='grassberger'):
def mutinf(n_bins, x, y, rng=None, method='knn'):
"""Mutual information calculation

Parameters
Expand Down Expand Up @@ -73,7 +74,7 @@ def knn_mutinf(x, y, k=None, boxsize=None):
return (-a - b + c + d) / log(2)


def nmutinf(n_bins, x, y, rng=None, method='grassberger'):
def nmutinf(n_bins, x, y, rng=None, method='knn'):
"""Normalized mutual information calculation

Parameters
Expand All @@ -97,7 +98,7 @@ def nmutinf(n_bins, x, y, rng=None, method='grassberger'):
entropy(n_bins, [rng], method, y)))


def cmutinf(n_bins, x, y, z, rng=None, method='grassberger'):
def cmutinf(n_bins, x, y, z, rng=None, method='knn'):
"""Conditional mutual information calculation

Parameters
Expand Down Expand Up @@ -164,13 +165,18 @@ def knn_cmutinf(x, y, z, k=None, boxsize=None):
return (-a - b + c + d) / log(2)


def ncmutinf(n_bins, x, y, z, rng=None, method='grassberger'):
def ncmutinf(n_bins, x, y, z, rng=None, method='knn'):
"""Normalized conditional mutual information calculation

Parameters
----------
n_bins : int
Number of bins.

If None, assumes data is pre-binned or a timeseries
of discrete variables. In this case, x, y, and z
must all be of some integer type: "int", "uint8", etc.

x : array_like, shape = (n_samples, n_dim)
Conditioned variable
y : array_like, shape = (n_samples, n_dim)
Expand All @@ -186,5 +192,10 @@ def ncmutinf(n_bins, x, y, z, rng=None, method='grassberger'):
ncmi : float
"""

if n_bins is None:
assert(issubdtype(x.dtype, integer))
method = 'grassberger'
rng = None

return (cmutinf(n_bins, x, y, z, rng=rng, method=method) /
centropy(n_bins, x, z, rng=rng, method=method))