From 0953746fbfa4dc7e8fcd5f8dd1a33f5c954f922d Mon Sep 17 00:00:00 2001 From: Kyle Beauchamp Date: Fri, 26 Apr 2013 12:17:02 -0700 Subject: [PATCH 1/5] First mcmc code added --- src/python/mcmc.py | 141 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 src/python/mcmc.py diff --git a/src/python/mcmc.py b/src/python/mcmc.py new file mode 100644 index 00000000..18018879 --- /dev/null +++ b/src/python/mcmc.py @@ -0,0 +1,141 @@ +import pymc # pymc 2.2 +import numpy as np +import scipy.linalg + +class TransitionSampler(): + def __init__(self, C): + self.C = C + self.num_states = C.shape[0] + initial = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states - 1)) + self.T_partial = pymc.Uninformative("T_partial",size=(self.num_states, self.num_states - 1),value=initial) + + @pymc.dtrm + def T(T_partial=self.T_partial): + T = np.zeros((self.num_states, self.num_states)) + T[:,:-1] = T_partial + T[:,-1] = 1 - T_partial.sum(1) + return T + self.T = T + + @pymc.potential + def log_likelihood(T=self.T): + return np.sum(self.C * np.log(T)) + self.log_likelihood = log_likelihood + + @pymc.potential + def normalization_constraint(T=self.T): + if T.min() < 0 or T.sum(1).max() > 1: + return -1 * np.inf + else: + return 0.0 + self.normalization_constraint = normalization_constraint + + def sample(self, num_steps, thin=1, filename=None): + + if filename == None: + db = "ram" + else: + db = "hdf5" + + self.mcmc = pymc.MCMC(self, db=db, dbname=filename) + self.mcmc.sample(num_steps) + + def iterate_T(self): + """Iterate over the sampled transition matrices.""" + Ti = self.mcmc.trace("T") + for i, T in enumerate(Ti): + yield T + +class RateSampler(): + def __init__(self, C): + self.C = C + self.num_states = C.shape[0] + initial = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states)) + self.K_unnormalized = pymc.Uninformative("K_unnormalized",size=(self.num_states, self.num_states),value=initial) + + @pymc.dtrm + def K(K_unnormalized=self.K_unnormalized): + K = K_unnormalized.copy() + diags = K.diagonal() - K.sum(1) + np.fill_diagonal(K, diags) + return K + self.K = K + + @pymc.dtrm + def T(K=self.K): + T = scipy.linalg.expm(K) + return T + self.T = T + + @pymc.potential + def log_likelihood(T=self.T): + return np.sum(self.C * np.log(T)) + self.log_likelihood = log_likelihood + + @pymc.potential + def normalization_constraint(K=self.K): + K = K.copy() + np.fill_diagonal(K, np.zeros(self.num_states)) + if K.min() < 0.0: + return -1 * np.inf + else: + return 0.0 + self.normalization_constraint = normalization_constraint + + def sample(self, num_steps, thin=1, filename=None): + + if filename == None: + db = "ram" + else: + db = "hdf5" + + self.mcmc = pymc.MCMC(self, db=db, dbname=filename) + self.mcmc.sample(num_steps) + + def iterate_K(self): + """Iterate over the sampled transition matrices.""" + Ki = self.mcmc.trace("K") + for i, K in enumerate(Ki): + yield K + +class ReversibleTransitionSampler(): + def __init__(self, C): + self.C = C + self.num_states = C.shape[0] + initial = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states - 1)) + self.X_partial = pymc.Uninformative("X_partial",size=(self.num_states, self.num_states - 1),value= initial) + + @pymc.dtrm + def X(X_partial=self.X_partial): + X = np.zeros((self.num_states, self.num_states)) + X[:,:-1] = X_partial + X[:,-1] = 1 - X_partial.sum(1) + X = 0.5 * (X + X.T) + return X + self.X = X + + @pymc.dtrm + def T(X=self.X): + Ni = X.sum(1) + D = np.diag(Ni**-1.) + T = D.dot(X) + return T + self.T = T + + @pymc.potential + def log_likelihood(T=self.T): + return np.sum(self.C * np.log(T)) + self.log_likelihood = log_likelihood + + @pymc.potential + def normalization_constraint(T=self.T): + if T.min() < 0 or T.sum(1).max() > 1: + return -1 * np.inf + else: + return 0.0 + self.normalization_constraint = normalization_constraint + + def sample(self, num_steps, thin=1): + self.mcmc = pymc.MCMC(self) + self.mcmc.sample(num_steps) + return self.mcmc.trace("T")[:] From a7313d78456f2b3c186db88d6a8b6326c0e80973 Mon Sep 17 00:00:00 2001 From: Kyle Beauchamp Date: Fri, 26 Apr 2013 18:30:03 -0700 Subject: [PATCH 2/5] Now use Dirichlet RVs to sample TProb --- src/python/mcmc.py | 143 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 115 insertions(+), 28 deletions(-) diff --git a/src/python/mcmc.py b/src/python/mcmc.py index 18018879..77df4701 100644 --- a/src/python/mcmc.py +++ b/src/python/mcmc.py @@ -1,19 +1,21 @@ import pymc # pymc 2.2 import numpy as np import scipy.linalg +from msmbuilder import MSMLib class TransitionSampler(): - def __init__(self, C): + def __init__(self, C, alpha=1.0): self.C = C self.num_states = C.shape[0] - initial = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states - 1)) - self.T_partial = pymc.Uninformative("T_partial",size=(self.num_states, self.num_states - 1),value=initial) + + self.T_incomplete = np.array([pymc.Dirichlet("T%d_incomplete" % i, alpha * np.ones(self.num_states)) for i in xrange(self.num_states)]) + self.T0 = np.array([pymc.CompletedDirichlet("T%d" % i,self.T_incomplete[i])[0] for i in xrange(self.num_states)]) @pymc.dtrm - def T(T_partial=self.T_partial): + def T(T0=self.T0): T = np.zeros((self.num_states, self.num_states)) - T[:,:-1] = T_partial - T[:,-1] = 1 - T_partial.sum(1) + for i in xrange(self.num_states): + T[i] = T0[i] return T self.T = T @@ -21,14 +23,6 @@ def T(T_partial=self.T_partial): def log_likelihood(T=self.T): return np.sum(self.C * np.log(T)) self.log_likelihood = log_likelihood - - @pymc.potential - def normalization_constraint(T=self.T): - if T.min() < 0 or T.sum(1).max() > 1: - return -1 * np.inf - else: - return 0.0 - self.normalization_constraint = normalization_constraint def sample(self, num_steps, thin=1, filename=None): @@ -50,8 +44,8 @@ class RateSampler(): def __init__(self, C): self.C = C self.num_states = C.shape[0] - initial = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states)) - self.K_unnormalized = pymc.Uninformative("K_unnormalized",size=(self.num_states, self.num_states),value=initial) + initial_value = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states)) + self.K_unnormalized = pymc.Uninformative("K_unnormalized",size=(self.num_states, self.num_states),value=initial_value) @pymc.dtrm def K(K_unnormalized=self.K_unnormalized): @@ -101,24 +95,34 @@ def iterate_K(self): class ReversibleTransitionSampler(): def __init__(self, C): self.C = C + self.N = C.sum() + self.num_states = C.shape[0] - initial = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states - 1)) - self.X_partial = pymc.Uninformative("X_partial",size=(self.num_states, self.num_states - 1),value= initial) + self.num_tril = np.tril_indices_from(C)[0].shape[0] + + self.X_flat = pymc.Uninformative("S_tril",size=(self.num_tril),value=np.ones(self.num_tril)) + self.tril_indices = np.tril_indices_from(C) + + self.p_incomplete = pymc.Dirichlet("p_incomplete", np.ones(self.num_states)) + self.p = pymc.CompletedDirichlet("p",self.p_incomplete)[0] @pymc.dtrm - def X(X_partial=self.X_partial): + def X(X_flat=self.X_flat): + X = np.zeros((self.num_states, self.num_states)) - X[:,:-1] = X_partial - X[:,-1] = 1 - X_partial.sum(1) - X = 0.5 * (X + X.T) + + X[self.tril_indices] = X_flat + X.T[self.tril_indices] = X_flat + + X /= X.sum() + X *= self.N + return X self.X = X @pymc.dtrm def T(X=self.X): - Ni = X.sum(1) - D = np.diag(Ni**-1.) - T = D.dot(X) + T = MSMLib.estimate_transition_matrix(X) return T self.T = T @@ -135,7 +139,90 @@ def normalization_constraint(T=self.T): return 0.0 self.normalization_constraint = normalization_constraint - def sample(self, num_steps, thin=1): - self.mcmc = pymc.MCMC(self) + def sample(self, num_steps, thin=1, filename=None): + + if filename == None: + db = "ram" + else: + db = "hdf5" + + self.mcmc = pymc.MCMC(self, db=db, dbname=filename) + self.mcmc.sample(num_steps) + + def iterate_T(self): + """Iterate over the sampled transition matrices.""" + Ti = self.mcmc.trace("T") + for i, T in enumerate(Ti): + yield T + + +class ReversibleRateSampler(): + def __init__(self, C): + self.C = C + self.num_states = C.shape[0] + self.num_tril = np.tril_indices_from(C,-1)[0].shape[0] + + self.S_flat = pymc.Uninformative("S_tril",size=(self.num_tril),value=np.ones(self.num_tril)) + self.tril_indices = np.tril_indices_from(C,-1) + + self.p_incomplete = pymc.Dirichlet("p_incomplete", np.ones(self.num_states)) + self.p = pymc.CompletedDirichlet("p",self.p_incomplete)[0] + + @pymc.dtrm + def S(S_flat=self.S_flat): + + S = np.zeros((self.num_states, self.num_states)) + + S[self.tril_indices] = S_flat + S.T[self.tril_indices] = S_flat + + return S + self.S = S + + @pymc.dtrm + def K(S=self.S, p=self.p): + D_pre = np.diag(p ** 0.5) + D_post = np.diag(p ** -0.5) + K = (D_pre.dot(S)).dot(D_post) + + np.fill_diagonal(K, -1 * K.sum(1)) + + return K + self.K = K + + @pymc.dtrm + def T(K=self.K): + T = scipy.linalg.expm(K) + return T + self.T = T + + @pymc.potential + def log_likelihood(T=self.T): + return np.sum(self.C * np.log(T)) + self.log_likelihood = log_likelihood + + @pymc.potential + def normalization_constraint(K=self.K): + K = K.copy() + np.fill_diagonal(K, np.zeros(self.num_states)) + if K.min() < 0.0: + return -1 * np.inf + else: + return 0.0 + self.normalization_constraint = normalization_constraint + + def sample(self, num_steps, thin=1, filename=None): + + if filename == None: + db = "ram" + else: + db = "hdf5" + + self.mcmc = pymc.MCMC(self, db=db, dbname=filename) self.mcmc.sample(num_steps) - return self.mcmc.trace("T")[:] + + def iterate_K(self): + """Iterate over the sampled transition matrices.""" + Ki = self.mcmc.trace("K") + for i, K in enumerate(Ki): + yield K From 34ffd140e0e41dc820322f0fc26228ef7b83d11f Mon Sep 17 00:00:00 2001 From: Kyle Beauchamp Date: Mon, 29 Apr 2013 11:17:59 -0700 Subject: [PATCH 3/5] Updated MCMC code --- src/python/mcmc.py | 257 ++++++++++++++++++++++++--------------------- 1 file changed, 140 insertions(+), 117 deletions(-) diff --git a/src/python/mcmc.py b/src/python/mcmc.py index 77df4701..f6c85c87 100644 --- a/src/python/mcmc.py +++ b/src/python/mcmc.py @@ -1,14 +1,77 @@ -import pymc # pymc 2.2 +import abc +import pymc import numpy as np import scipy.linalg from msmbuilder import MSMLib -class TransitionSampler(): - def __init__(self, C, alpha=1.0): +# Default values of prior parameters for Dirichlet and Exponential RVs +ALPHA = 1.0 # 1 pseudocount +BETA = 1.0E-6 # A nearly uninformative exponential. + +def log_likelihood_T(C, T): + """Get the log likelihood of a transition matrix given counts. + + Parameters + ---------- + C : ndarray + Count matrix + T : ndarray + Transition matrix + + Returns + ------- + log_likelihood : float + log-likelihood + """ + return np.sum(C * np.log(T)) + +class Sampler(): + """Base class for all MCMC samplers, provides `sample()` function.""" + __metaclass__ = abc.ABCMeta + + def __init__(self, C): + """Create Sampler and store the counts and num_states.""" self.C = C self.num_states = C.shape[0] - self.T_incomplete = np.array([pymc.Dirichlet("T%d_incomplete" % i, alpha * np.ones(self.num_states)) for i in xrange(self.num_states)]) + def sample(self, num_steps, thin=1, filename=None): + """Sample the MCMC chain and store the results. + + Parameters + ---------- + num_steps : int + How many MCMC samples to generate + thin : int, optional + Subsample MCMC chain by this amount (default 1). + filename : str, optional + If not None, store results as HDF file rather than in RAM. + + """ + if filename == None: + db = "ram" + else: + db = "hdf5" + + self.mcmc = pymc.MCMC(self, db=db, dbname=filename) + self.mcmc.sample(num_steps, thin=thin) + + +class TransitionSampler(Sampler): + """Sample transition matrices with Dirichlet prior.""" + def __init__(self, C, alpha=ALPHA): + """Create an object to sample transition matrices. + + Parameters + ---------- + C : ndarray + Count matrix + alpha : float + Strength of Dirichlet prior. Default %f + """ % ALPHA + Sampler.__init__(self, C) + + initial = MSMLib.estimate_transition_matrix(C) + self.T_incomplete = np.array([pymc.Dirichlet("T%d_incomplete" % i, alpha * np.ones(self.num_states), value=initial[i,:-1]) for i in xrange(self.num_states)]) self.T0 = np.array([pymc.CompletedDirichlet("T%d" % i,self.T_incomplete[i])[0] for i in xrange(self.num_states)]) @pymc.dtrm @@ -21,32 +84,29 @@ def T(T0=self.T0): @pymc.potential def log_likelihood(T=self.T): - return np.sum(self.C * np.log(T)) + return log_likelihood_T(self.C, T) self.log_likelihood = log_likelihood - def sample(self, num_steps, thin=1, filename=None): - - if filename == None: - db = "ram" - else: - db = "hdf5" - - self.mcmc = pymc.MCMC(self, db=db, dbname=filename) - self.mcmc.sample(num_steps) - def iterate_T(self): - """Iterate over the sampled transition matrices.""" - Ti = self.mcmc.trace("T") - for i, T in enumerate(Ti): - yield T - -class RateSampler(): - def __init__(self, C): - self.C = C - self.num_states = C.shape[0] - initial_value = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states)) - self.K_unnormalized = pymc.Uninformative("K_unnormalized",size=(self.num_states, self.num_states),value=initial_value) - +class RateSampler(Sampler): + """Sample rate matrices with an exponential prior.""" + def __init__(self, C, beta=BETA): + """Create an object to sample rate matrices. + + Parameters + ---------- + C : ndarray + Count matrix + beta : float + Parameter of exponential prior. Default %f + """ % BETA + Sampler.__init__(self, C) + + T = MSMLib.estimate_transition_matrix(C) + K0 = scipy.linalg.logm(T) + np.fill_diagonal(K0, np.zeros(self.num_states)) + self.K_unnormalized = pymc.Exponential("K_unnormalized", beta, size=(self.num_states, self.num_states), value=K0) + @pymc.dtrm def K(K_unnormalized=self.K_unnormalized): K = K_unnormalized.copy() @@ -63,49 +123,37 @@ def T(K=self.K): @pymc.potential def log_likelihood(T=self.T): - return np.sum(self.C * np.log(T)) + return log_likelihood_T(self.C, T) self.log_likelihood = log_likelihood - @pymc.potential - def normalization_constraint(K=self.K): - K = K.copy() - np.fill_diagonal(K, np.zeros(self.num_states)) - if K.min() < 0.0: - return -1 * np.inf - else: - return 0.0 - self.normalization_constraint = normalization_constraint - def sample(self, num_steps, thin=1, filename=None): +class ReversibleTransitionSampler(Sampler): + """Sample reversible transition matrices.""" + def __init__(self, C): + """Create an object to sample reversible transition matrices. - if filename == None: - db = "ram" - else: - db = "hdf5" + Parameters + ---------- + C : ndarray + Count matrix - self.mcmc = pymc.MCMC(self, db=db, dbname=filename) - self.mcmc.sample(num_steps) + Notes + ----- + To parameterize reversible transition matrices, we work with + symmetric count matrices X. The parameters are the upper (inclusive) + triangle of X, which we (arbitrarily) throw a uniform(0,1) prior on. + """ + Sampler.__init__(self, C) - def iterate_K(self): - """Iterate over the sampled transition matrices.""" - Ki = self.mcmc.trace("K") - for i, K in enumerate(Ki): - yield K - -class ReversibleTransitionSampler(): - def __init__(self, C): - self.C = C - self.N = C.sum() + C_sym = C + C.T + C_sym /= C_sym.sum() self.num_states = C.shape[0] self.num_tril = np.tril_indices_from(C)[0].shape[0] - self.X_flat = pymc.Uninformative("S_tril",size=(self.num_tril),value=np.ones(self.num_tril)) self.tril_indices = np.tril_indices_from(C) + self.X_flat = pymc.Uniform("X_flat", 0, 1, size=(self.num_tril),value=C_sym[self.tril_indices]) - self.p_incomplete = pymc.Dirichlet("p_incomplete", np.ones(self.num_states)) - self.p = pymc.CompletedDirichlet("p",self.p_incomplete)[0] - @pymc.dtrm def X(X_flat=self.X_flat): @@ -113,9 +161,6 @@ def X(X_flat=self.X_flat): X[self.tril_indices] = X_flat X.T[self.tril_indices] = X_flat - - X /= X.sum() - X *= self.N return X self.X = X @@ -128,44 +173,47 @@ def T(X=self.X): @pymc.potential def log_likelihood(T=self.T): - return np.sum(self.C * np.log(T)) + return log_likelihood_T(self.C, T) self.log_likelihood = log_likelihood - - @pymc.potential - def normalization_constraint(T=self.T): - if T.min() < 0 or T.sum(1).max() > 1: - return -1 * np.inf - else: - return 0.0 - self.normalization_constraint = normalization_constraint - def sample(self, num_steps, thin=1, filename=None): - if filename == None: - db = "ram" - else: - db = "hdf5" - - self.mcmc = pymc.MCMC(self, db=db, dbname=filename) - self.mcmc.sample(num_steps) +class ReversibleRateSampler(Sampler): + def __init__(self, C, beta=BETA, alpha=ALPHA): + """Create an object to sample reversible rate matrices. + + Parameters + ---------- + C : ndarray + Count matrix + beta : float + Parameter of exponential prior, default = %f + alpha : float + Parameter of Dirichlet prior, default = %f + + Notes + ----- + To parameterize reversible rate matrices, we work with both the + equilibrium populations (p) and the symmetrized rate matrix, + S = (diag(p**-0.5)) K (diag(p**0.5)) - def iterate_T(self): - """Iterate over the sampled transition matrices.""" - Ti = self.mcmc.trace("T") - for i, T in enumerate(Ti): - yield T - + We model the upper triangle (exclusive) of S using independent + exponential variables. We model the equilibrium populations + as a Dirichlet variable. + """ % (BETA, ALPHA) + Sampler.__init__(self, C) -class ReversibleRateSampler(): - def __init__(self, C): - self.C = C - self.num_states = C.shape[0] self.num_tril = np.tril_indices_from(C,-1)[0].shape[0] + + T = MSMLib.estimate_transition_matrix(C + C.T) + K0 = scipy.linalg.logm(T) + l,v = np.linalg.eig(K0.T) + p0 = v[:,l.argmax()] + p0 /= p0.sum() - self.S_flat = pymc.Uninformative("S_tril",size=(self.num_tril),value=np.ones(self.num_tril)) - self.tril_indices = np.tril_indices_from(C,-1) + self.tril_indices = np.tril_indices_from(C, -1) + self.S_flat = pymc.Exponential("S_tril", beta, size=(self.num_tril), value=K0[self.tril_indices]) - self.p_incomplete = pymc.Dirichlet("p_incomplete", np.ones(self.num_states)) + self.p_incomplete = pymc.Dirichlet("p_incomplete", alpha * np.ones(self.num_states), value=p0[:-1]) self.p = pymc.CompletedDirichlet("p",self.p_incomplete)[0] @pymc.dtrm @@ -198,31 +246,6 @@ def T(K=self.K): @pymc.potential def log_likelihood(T=self.T): - return np.sum(self.C * np.log(T)) + return log_likelihood_T(self.C, T) self.log_likelihood = log_likelihood - @pymc.potential - def normalization_constraint(K=self.K): - K = K.copy() - np.fill_diagonal(K, np.zeros(self.num_states)) - if K.min() < 0.0: - return -1 * np.inf - else: - return 0.0 - self.normalization_constraint = normalization_constraint - - def sample(self, num_steps, thin=1, filename=None): - - if filename == None: - db = "ram" - else: - db = "hdf5" - - self.mcmc = pymc.MCMC(self, db=db, dbname=filename) - self.mcmc.sample(num_steps) - - def iterate_K(self): - """Iterate over the sampled transition matrices.""" - Ki = self.mcmc.trace("K") - for i, K in enumerate(Ki): - yield K From aa53d11779653e2c12721b082184e0ef6399c399 Mon Sep 17 00:00:00 2001 From: Kyle Beauchamp Date: Mon, 29 Apr 2013 11:38:10 -0700 Subject: [PATCH 4/5] fixed docstrings --- src/python/mcmc.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/python/mcmc.py b/src/python/mcmc.py index f6c85c87..c92b6df1 100644 --- a/src/python/mcmc.py +++ b/src/python/mcmc.py @@ -66,8 +66,8 @@ def __init__(self, C, alpha=ALPHA): C : ndarray Count matrix alpha : float - Strength of Dirichlet prior. Default %f - """ % ALPHA + Strength of Dirichlet prior. + """ Sampler.__init__(self, C) initial = MSMLib.estimate_transition_matrix(C) @@ -98,8 +98,8 @@ def __init__(self, C, beta=BETA): C : ndarray Count matrix beta : float - Parameter of exponential prior. Default %f - """ % BETA + Parameter of exponential prior. + """ Sampler.__init__(self, C) T = MSMLib.estimate_transition_matrix(C) @@ -186,9 +186,9 @@ def __init__(self, C, beta=BETA, alpha=ALPHA): C : ndarray Count matrix beta : float - Parameter of exponential prior, default = %f + Parameter of exponential prior alpha : float - Parameter of Dirichlet prior, default = %f + Parameter of Dirichlet prior Notes ----- @@ -199,7 +199,7 @@ def __init__(self, C, beta=BETA, alpha=ALPHA): We model the upper triangle (exclusive) of S using independent exponential variables. We model the equilibrium populations as a Dirichlet variable. - """ % (BETA, ALPHA) + """ Sampler.__init__(self, C) self.num_tril = np.tril_indices_from(C,-1)[0].shape[0] From 407d1354dcf5e9f548a5ac8ea1ed06f587650f7e Mon Sep 17 00:00:00 2001 From: Kyle Beauchamp Date: Mon, 6 May 2013 11:44:46 -0700 Subject: [PATCH 5/5] Added tests for MCMC --- src/python/mcmc.py | 10 +++++---- tests/test_mcmc.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 tests/test_mcmc.py diff --git a/src/python/mcmc.py b/src/python/mcmc.py index c92b6df1..184d8d78 100644 --- a/src/python/mcmc.py +++ b/src/python/mcmc.py @@ -34,15 +34,17 @@ def __init__(self, C): self.C = C self.num_states = C.shape[0] - def sample(self, num_steps, thin=1, filename=None): + def sample(self, num_steps, thin=1, burn=0, filename=None): """Sample the MCMC chain and store the results. Parameters ---------- num_steps : int How many MCMC samples to generate - thin : int, optional - Subsample MCMC chain by this amount (default 1). + thin : int, optional, default 1 + Subsample MCMC chain by this amount. + burn : int, optional, default 0 + Discard first `burn` MCMC samples. filename : str, optional If not None, store results as HDF file rather than in RAM. @@ -53,7 +55,7 @@ def sample(self, num_steps, thin=1, filename=None): db = "hdf5" self.mcmc = pymc.MCMC(self, db=db, dbname=filename) - self.mcmc.sample(num_steps, thin=thin) + self.mcmc.sample(num_steps, thin=thin, burn=burn) class TransitionSampler(Sampler): diff --git a/tests/test_mcmc.py b/tests/test_mcmc.py new file mode 100644 index 00000000..f372d820 --- /dev/null +++ b/tests/test_mcmc.py @@ -0,0 +1,53 @@ +import numpy as np +from msmbuilder.testing import eq +from msmbuilder import mcmc, MSMLib +import scipy.linalg +from nose.tools import nottest +from nose.plugins.skip import SkipTest + +SKIP_TESTS = True + +num_states = 2 +num_steps = 1000000 +thin = 500 +burn = 50000 + +C = 1000000 * np.ones((num_states,num_states)) +C[0,0] *= 1000 +C[0,1] += 1 + +T0 = MSMLib.estimate_transition_matrix(C) +X0 = MSMLib.mle_reversible_count_matrix(scipy.sparse.csr_matrix(C)).toarray() +T0_rev = MSMLib.estimate_transition_matrix(X0) + +def test_TransitionSampler(): + if SKIP_TESTS == True: + raise(SkipTest("MCMC test skipped")) + sampler = mcmc.TransitionSampler(C) + sampler.sample(num_steps, thin=thin, burn=burn) + T = sampler.mcmc.trace("T")[:].mean(0) + eq(T0, T, decimal=5) + +def test_ReversibleTransitionSampler(): + if SKIP_TESTS == True: + raise(SkipTest("MCMC test skipped")) + sampler = mcmc.ReversibleTransitionSampler(C) + sampler.sample(1000000,thin=500, burn=burn) + T_rev = sampler.mcmc.trace("T")[:].mean(0) + eq(T0_rev, T_rev, decimal=5) + +def test_RateSampler(): + if SKIP_TESTS == True: + raise(SkipTest("MCMC test skipped")) + sampler = mcmc.RateSampler(C) + sampler.sample(1000000,thin=500, burn=burn) + T_K = sampler.mcmc.trace("T")[:].mean(0) + eq(T0, T_K, decimal=5) + +def test_ReversibleRateSampler(): + if SKIP_TESTS == True: + raise(SkipTest("MCMC test skipped")) + sampler = mcmc.ReversibleRateSampler(C) + sampler.sample(1000000,thin=500, burn=burn) + T_K_rev = sampler.mcmc.trace("T")[:].mean(0) + eq(T0_rev, T_K_rev, decimal=5)