From 0953746fbfa4dc7e8fcd5f8dd1a33f5c954f922d Mon Sep 17 00:00:00 2001
From: Kyle Beauchamp <kyleabeauchamp@gmail.com>
Date: Fri, 26 Apr 2013 12:17:02 -0700
Subject: [PATCH 1/5] First mcmc code added

---
 src/python/mcmc.py | 141 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 src/python/mcmc.py

diff --git a/src/python/mcmc.py b/src/python/mcmc.py
new file mode 100644
index 00000000..18018879
--- /dev/null
+++ b/src/python/mcmc.py
@@ -0,0 +1,141 @@
+import pymc  # pymc 2.2
+import numpy as np
+import scipy.linalg
+
+class TransitionSampler():
+    def __init__(self, C):
+        self.C = C
+        self.num_states = C.shape[0]
+        initial = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states - 1))
+        self.T_partial = pymc.Uninformative("T_partial",size=(self.num_states, self.num_states - 1),value=initial)
+
+        @pymc.dtrm
+        def T(T_partial=self.T_partial):
+            T = np.zeros((self.num_states, self.num_states))
+            T[:,:-1] = T_partial
+            T[:,-1] = 1 - T_partial.sum(1)
+            return T
+        self.T = T
+
+        @pymc.potential
+        def log_likelihood(T=self.T):
+            return np.sum(self.C * np.log(T))
+        self.log_likelihood = log_likelihood
+        
+        @pymc.potential
+        def normalization_constraint(T=self.T):
+            if T.min() < 0 or T.sum(1).max() > 1:
+                return -1 * np.inf
+            else:
+                return 0.0
+        self.normalization_constraint = normalization_constraint
+
+    def sample(self, num_steps, thin=1, filename=None):
+
+        if filename == None:
+            db = "ram"
+        else:
+            db = "hdf5"
+            
+        self.mcmc = pymc.MCMC(self, db=db, dbname=filename)
+        self.mcmc.sample(num_steps)
+        
+    def iterate_T(self):
+        """Iterate over the sampled transition matrices."""
+        Ti = self.mcmc.trace("T")
+        for i, T in enumerate(Ti):
+            yield T
+
+class RateSampler():
+    def __init__(self, C):
+        self.C = C
+        self.num_states = C.shape[0]
+        initial = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states))
+        self.K_unnormalized = pymc.Uninformative("K_unnormalized",size=(self.num_states, self.num_states),value=initial)
+
+        @pymc.dtrm
+        def K(K_unnormalized=self.K_unnormalized):
+            K = K_unnormalized.copy()
+            diags = K.diagonal() - K.sum(1)
+            np.fill_diagonal(K, diags)
+            return K
+        self.K = K
+
+        @pymc.dtrm
+        def T(K=self.K):
+            T = scipy.linalg.expm(K)
+            return T
+        self.T = T
+
+        @pymc.potential
+        def log_likelihood(T=self.T):
+            return np.sum(self.C * np.log(T))
+        self.log_likelihood = log_likelihood
+        
+        @pymc.potential
+        def normalization_constraint(K=self.K):
+            K = K.copy()
+            np.fill_diagonal(K, np.zeros(self.num_states))
+            if K.min() < 0.0:
+                return -1 * np.inf
+            else:
+                return 0.0
+        self.normalization_constraint = normalization_constraint
+
+    def sample(self, num_steps, thin=1, filename=None):
+
+        if filename == None:
+            db = "ram"
+        else:
+            db = "hdf5"
+            
+        self.mcmc = pymc.MCMC(self, db=db, dbname=filename)
+        self.mcmc.sample(num_steps)
+        
+    def iterate_K(self):
+        """Iterate over the sampled transition matrices."""
+        Ki = self.mcmc.trace("K")
+        for i, K in enumerate(Ki):
+            yield K
+
+class ReversibleTransitionSampler():
+    def __init__(self, C):
+        self.C = C
+        self.num_states = C.shape[0]
+        initial = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states - 1))
+        self.X_partial = pymc.Uninformative("X_partial",size=(self.num_states, self.num_states - 1),value= initial)
+
+        @pymc.dtrm
+        def X(X_partial=self.X_partial):
+            X = np.zeros((self.num_states, self.num_states))
+            X[:,:-1] = X_partial
+            X[:,-1] = 1 - X_partial.sum(1)
+            X = 0.5 * (X + X.T)
+            return X
+        self.X = X
+
+        @pymc.dtrm
+        def T(X=self.X):
+            Ni = X.sum(1)
+            D = np.diag(Ni**-1.)
+            T = D.dot(X)
+            return T
+        self.T = T
+
+        @pymc.potential
+        def log_likelihood(T=self.T):
+            return np.sum(self.C * np.log(T))
+        self.log_likelihood = log_likelihood
+        
+        @pymc.potential
+        def normalization_constraint(T=self.T):
+            if T.min() < 0 or T.sum(1).max() > 1:
+                return -1 * np.inf
+            else:
+                return 0.0
+        self.normalization_constraint = normalization_constraint
+
+    def sample(self, num_steps, thin=1):
+        self.mcmc = pymc.MCMC(self)
+        self.mcmc.sample(num_steps)
+        return self.mcmc.trace("T")[:]

From a7313d78456f2b3c186db88d6a8b6326c0e80973 Mon Sep 17 00:00:00 2001
From: Kyle Beauchamp <kyleabeauchamp@gmail.com>
Date: Fri, 26 Apr 2013 18:30:03 -0700
Subject: [PATCH 2/5] Now use Dirichlet RVs to sample TProb

---
 src/python/mcmc.py | 143 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 115 insertions(+), 28 deletions(-)

diff --git a/src/python/mcmc.py b/src/python/mcmc.py
index 18018879..77df4701 100644
--- a/src/python/mcmc.py
+++ b/src/python/mcmc.py
@@ -1,19 +1,21 @@
 import pymc  # pymc 2.2
 import numpy as np
 import scipy.linalg
+from msmbuilder import MSMLib
 
 class TransitionSampler():
-    def __init__(self, C):
+    def __init__(self, C, alpha=1.0):
         self.C = C
         self.num_states = C.shape[0]
-        initial = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states - 1))
-        self.T_partial = pymc.Uninformative("T_partial",size=(self.num_states, self.num_states - 1),value=initial)
+
+        self.T_incomplete = np.array([pymc.Dirichlet("T%d_incomplete" % i, alpha * np.ones(self.num_states)) for i in xrange(self.num_states)])
+        self.T0 = np.array([pymc.CompletedDirichlet("T%d" % i,self.T_incomplete[i])[0] for i in xrange(self.num_states)])
 
         @pymc.dtrm
-        def T(T_partial=self.T_partial):
+        def T(T0=self.T0):
             T = np.zeros((self.num_states, self.num_states))
-            T[:,:-1] = T_partial
-            T[:,-1] = 1 - T_partial.sum(1)
+            for i in xrange(self.num_states):
+                T[i] = T0[i]
             return T
         self.T = T
 
@@ -21,14 +23,6 @@ def T(T_partial=self.T_partial):
         def log_likelihood(T=self.T):
             return np.sum(self.C * np.log(T))
         self.log_likelihood = log_likelihood
-        
-        @pymc.potential
-        def normalization_constraint(T=self.T):
-            if T.min() < 0 or T.sum(1).max() > 1:
-                return -1 * np.inf
-            else:
-                return 0.0
-        self.normalization_constraint = normalization_constraint
 
     def sample(self, num_steps, thin=1, filename=None):
 
@@ -50,8 +44,8 @@ class RateSampler():
     def __init__(self, C):
         self.C = C
         self.num_states = C.shape[0]
-        initial = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states))
-        self.K_unnormalized = pymc.Uninformative("K_unnormalized",size=(self.num_states, self.num_states),value=initial)
+        initial_value = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states))
+        self.K_unnormalized = pymc.Uninformative("K_unnormalized",size=(self.num_states, self.num_states),value=initial_value)
 
         @pymc.dtrm
         def K(K_unnormalized=self.K_unnormalized):
@@ -101,24 +95,34 @@ def iterate_K(self):
 class ReversibleTransitionSampler():
     def __init__(self, C):
         self.C = C
+        self.N = C.sum()
+        
         self.num_states = C.shape[0]
-        initial = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states - 1))
-        self.X_partial = pymc.Uninformative("X_partial",size=(self.num_states, self.num_states - 1),value= initial)
+        self.num_tril = np.tril_indices_from(C)[0].shape[0]
+        
+        self.X_flat = pymc.Uninformative("S_tril",size=(self.num_tril),value=np.ones(self.num_tril))
+        self.tril_indices = np.tril_indices_from(C)
+        
+        self.p_incomplete = pymc.Dirichlet("p_incomplete", np.ones(self.num_states))
+        self.p = pymc.CompletedDirichlet("p",self.p_incomplete)[0]
 
         @pymc.dtrm
-        def X(X_partial=self.X_partial):
+        def X(X_flat=self.X_flat):
+
             X = np.zeros((self.num_states, self.num_states))
-            X[:,:-1] = X_partial
-            X[:,-1] = 1 - X_partial.sum(1)
-            X = 0.5 * (X + X.T)
+
+            X[self.tril_indices] = X_flat
+            X.T[self.tril_indices] = X_flat            
+
+            X /= X.sum()
+            X *= self.N
+            
             return X
         self.X = X
 
         @pymc.dtrm
         def T(X=self.X):
-            Ni = X.sum(1)
-            D = np.diag(Ni**-1.)
-            T = D.dot(X)
+            T = MSMLib.estimate_transition_matrix(X)
             return T
         self.T = T
 
@@ -135,7 +139,90 @@ def normalization_constraint(T=self.T):
                 return 0.0
         self.normalization_constraint = normalization_constraint
 
-    def sample(self, num_steps, thin=1):
-        self.mcmc = pymc.MCMC(self)
+    def sample(self, num_steps, thin=1, filename=None):
+
+        if filename == None:
+            db = "ram"
+        else:
+            db = "hdf5"
+            
+        self.mcmc = pymc.MCMC(self, db=db, dbname=filename)
+        self.mcmc.sample(num_steps)
+        
+    def iterate_T(self):
+        """Iterate over the sampled transition matrices."""
+        Ti = self.mcmc.trace("T")
+        for i, T in enumerate(Ti):
+            yield T
+
+
+class ReversibleRateSampler():
+    def __init__(self, C):
+        self.C = C
+        self.num_states = C.shape[0]
+        self.num_tril = np.tril_indices_from(C,-1)[0].shape[0]
+        
+        self.S_flat = pymc.Uninformative("S_tril",size=(self.num_tril),value=np.ones(self.num_tril))
+        self.tril_indices = np.tril_indices_from(C,-1)
+        
+        self.p_incomplete = pymc.Dirichlet("p_incomplete", np.ones(self.num_states))
+        self.p = pymc.CompletedDirichlet("p",self.p_incomplete)[0]
+
+        @pymc.dtrm
+        def S(S_flat=self.S_flat):
+
+            S = np.zeros((self.num_states, self.num_states))
+
+            S[self.tril_indices] = S_flat
+            S.T[self.tril_indices] = S_flat            
+
+            return S
+        self.S = S
+
+        @pymc.dtrm
+        def K(S=self.S, p=self.p):
+            D_pre = np.diag(p ** 0.5)
+            D_post = np.diag(p ** -0.5)
+            K = (D_pre.dot(S)).dot(D_post)
+            
+            np.fill_diagonal(K, -1 * K.sum(1))
+            
+            return K
+        self.K = K
+
+        @pymc.dtrm
+        def T(K=self.K):
+            T = scipy.linalg.expm(K)
+            return T
+        self.T = T
+
+        @pymc.potential
+        def log_likelihood(T=self.T):
+            return np.sum(self.C * np.log(T))
+        self.log_likelihood = log_likelihood
+        
+        @pymc.potential
+        def normalization_constraint(K=self.K):
+            K = K.copy()
+            np.fill_diagonal(K, np.zeros(self.num_states))
+            if K.min() < 0.0:
+                return -1 * np.inf
+            else:
+                return 0.0
+        self.normalization_constraint = normalization_constraint
+
+    def sample(self, num_steps, thin=1, filename=None):
+
+        if filename == None:
+            db = "ram"
+        else:
+            db = "hdf5"
+            
+        self.mcmc = pymc.MCMC(self, db=db, dbname=filename)
         self.mcmc.sample(num_steps)
-        return self.mcmc.trace("T")[:]
+        
+    def iterate_K(self):
+        """Iterate over the sampled transition matrices."""
+        Ki = self.mcmc.trace("K")
+        for i, K in enumerate(Ki):
+            yield K

From 34ffd140e0e41dc820322f0fc26228ef7b83d11f Mon Sep 17 00:00:00 2001
From: Kyle Beauchamp <kyleabeauchamp@gmail.com>
Date: Mon, 29 Apr 2013 11:17:59 -0700
Subject: [PATCH 3/5] Updated MCMC code

---
 src/python/mcmc.py | 257 ++++++++++++++++++++++++---------------------
 1 file changed, 140 insertions(+), 117 deletions(-)

diff --git a/src/python/mcmc.py b/src/python/mcmc.py
index 77df4701..f6c85c87 100644
--- a/src/python/mcmc.py
+++ b/src/python/mcmc.py
@@ -1,14 +1,77 @@
-import pymc  # pymc 2.2
+import abc
+import pymc
 import numpy as np
 import scipy.linalg
 from msmbuilder import MSMLib
 
-class TransitionSampler():
-    def __init__(self, C, alpha=1.0):
+# Default values of prior parameters for Dirichlet and Exponential RVs
+ALPHA = 1.0 # 1 pseudocount
+BETA = 1.0E-6  # A nearly uninformative exponential.
+
+def log_likelihood_T(C, T):
+    """Get the log likelihood of a transition matrix given counts.
+
+    Parameters
+    ----------
+    C : ndarray
+        Count matrix
+    T : ndarray
+        Transition matrix
+
+    Returns
+    -------
+    log_likelihood : float
+        log-likelihood
+    """    
+    return np.sum(C * np.log(T))
+
+class Sampler():
+    """Base class for all MCMC samplers, provides `sample()` function."""
+    __metaclass__ = abc.ABCMeta
+
+    def __init__(self, C):
+        """Create Sampler and store the counts and num_states."""
         self.C = C
         self.num_states = C.shape[0]
 
-        self.T_incomplete = np.array([pymc.Dirichlet("T%d_incomplete" % i, alpha * np.ones(self.num_states)) for i in xrange(self.num_states)])
+    def sample(self, num_steps, thin=1, filename=None):
+        """Sample the MCMC chain and store the results.
+
+        Parameters
+        ----------
+        num_steps : int
+            How many MCMC samples to generate
+        thin : int, optional
+            Subsample MCMC chain by this amount (default 1).
+        filename : str, optional
+            If not None, store results as HDF file rather than in RAM.
+
+        """        
+        if filename == None:
+            db = "ram"
+        else:
+            db = "hdf5"
+            
+        self.mcmc = pymc.MCMC(self, db=db, dbname=filename)
+        self.mcmc.sample(num_steps, thin=thin)    
+
+
+class TransitionSampler(Sampler):
+    """Sample transition matrices with Dirichlet prior."""
+    def __init__(self, C, alpha=ALPHA):
+        """Create an object to sample transition matrices.
+
+        Parameters
+        ----------
+        C : ndarray
+            Count matrix
+        alpha : float
+            Strength of Dirichlet prior.  Default %f
+        """ % ALPHA
+        Sampler.__init__(self, C)
+
+        initial = MSMLib.estimate_transition_matrix(C)
+        self.T_incomplete = np.array([pymc.Dirichlet("T%d_incomplete" % i, alpha * np.ones(self.num_states), value=initial[i,:-1]) for i in xrange(self.num_states)])
         self.T0 = np.array([pymc.CompletedDirichlet("T%d" % i,self.T_incomplete[i])[0] for i in xrange(self.num_states)])
 
         @pymc.dtrm
@@ -21,32 +84,29 @@ def T(T0=self.T0):
 
         @pymc.potential
         def log_likelihood(T=self.T):
-            return np.sum(self.C * np.log(T))
+            return log_likelihood_T(self.C, T)
         self.log_likelihood = log_likelihood
 
-    def sample(self, num_steps, thin=1, filename=None):
-
-        if filename == None:
-            db = "ram"
-        else:
-            db = "hdf5"
-            
-        self.mcmc = pymc.MCMC(self, db=db, dbname=filename)
-        self.mcmc.sample(num_steps)
         
-    def iterate_T(self):
-        """Iterate over the sampled transition matrices."""
-        Ti = self.mcmc.trace("T")
-        for i, T in enumerate(Ti):
-            yield T
-
-class RateSampler():
-    def __init__(self, C):
-        self.C = C
-        self.num_states = C.shape[0]
-        initial_value = (1 / float(self.num_states)) * np.ones((self.num_states, self.num_states))
-        self.K_unnormalized = pymc.Uninformative("K_unnormalized",size=(self.num_states, self.num_states),value=initial_value)
-
+class RateSampler(Sampler):
+    """Sample rate matrices with an exponential prior."""
+    def __init__(self, C, beta=BETA):
+        """Create an object to sample rate matrices.
+
+        Parameters
+        ----------
+        C : ndarray
+            Count matrix
+        beta : float
+            Parameter of exponential prior.  Default %f
+        """ % BETA
+        Sampler.__init__(self, C)
+
+        T = MSMLib.estimate_transition_matrix(C)
+        K0 = scipy.linalg.logm(T)
+        np.fill_diagonal(K0, np.zeros(self.num_states))
+        self.K_unnormalized = pymc.Exponential("K_unnormalized", beta, size=(self.num_states, self.num_states), value=K0)
+ 
         @pymc.dtrm
         def K(K_unnormalized=self.K_unnormalized):
             K = K_unnormalized.copy()
@@ -63,49 +123,37 @@ def T(K=self.K):
 
         @pymc.potential
         def log_likelihood(T=self.T):
-            return np.sum(self.C * np.log(T))
+            return log_likelihood_T(self.C, T)
         self.log_likelihood = log_likelihood
         
-        @pymc.potential
-        def normalization_constraint(K=self.K):
-            K = K.copy()
-            np.fill_diagonal(K, np.zeros(self.num_states))
-            if K.min() < 0.0:
-                return -1 * np.inf
-            else:
-                return 0.0
-        self.normalization_constraint = normalization_constraint
 
-    def sample(self, num_steps, thin=1, filename=None):
+class ReversibleTransitionSampler(Sampler):
+    """Sample reversible transition matrices."""
+    def __init__(self, C):
+        """Create an object to sample reversible transition matrices.
 
-        if filename == None:
-            db = "ram"
-        else:
-            db = "hdf5"
+        Parameters
+        ----------
+        C : ndarray
+            Count matrix
             
-        self.mcmc = pymc.MCMC(self, db=db, dbname=filename)
-        self.mcmc.sample(num_steps)
+        Notes
+        -----
+        To parameterize reversible transition matrices, we work with
+        symmetric count matrices X.  The parameters are the upper (inclusive)
+        triangle of X, which we (arbitrarily) throw a uniform(0,1) prior on.        
+        """
+        Sampler.__init__(self, C)
         
-    def iterate_K(self):
-        """Iterate over the sampled transition matrices."""
-        Ki = self.mcmc.trace("K")
-        for i, K in enumerate(Ki):
-            yield K
-
-class ReversibleTransitionSampler():
-    def __init__(self, C):
-        self.C = C
-        self.N = C.sum()
+        C_sym = C + C.T
+        C_sym /= C_sym.sum()
         
         self.num_states = C.shape[0]
         self.num_tril = np.tril_indices_from(C)[0].shape[0]
         
-        self.X_flat = pymc.Uninformative("S_tril",size=(self.num_tril),value=np.ones(self.num_tril))
         self.tril_indices = np.tril_indices_from(C)
+        self.X_flat = pymc.Uniform("X_flat", 0, 1, size=(self.num_tril),value=C_sym[self.tril_indices])       
         
-        self.p_incomplete = pymc.Dirichlet("p_incomplete", np.ones(self.num_states))
-        self.p = pymc.CompletedDirichlet("p",self.p_incomplete)[0]
-
         @pymc.dtrm
         def X(X_flat=self.X_flat):
 
@@ -113,9 +161,6 @@ def X(X_flat=self.X_flat):
 
             X[self.tril_indices] = X_flat
             X.T[self.tril_indices] = X_flat            
-
-            X /= X.sum()
-            X *= self.N
             
             return X
         self.X = X
@@ -128,44 +173,47 @@ def T(X=self.X):
 
         @pymc.potential
         def log_likelihood(T=self.T):
-            return np.sum(self.C * np.log(T))
+            return log_likelihood_T(self.C, T)
         self.log_likelihood = log_likelihood
-        
-        @pymc.potential
-        def normalization_constraint(T=self.T):
-            if T.min() < 0 or T.sum(1).max() > 1:
-                return -1 * np.inf
-            else:
-                return 0.0
-        self.normalization_constraint = normalization_constraint
 
-    def sample(self, num_steps, thin=1, filename=None):
 
-        if filename == None:
-            db = "ram"
-        else:
-            db = "hdf5"
-            
-        self.mcmc = pymc.MCMC(self, db=db, dbname=filename)
-        self.mcmc.sample(num_steps)
+class ReversibleRateSampler(Sampler):
+    def __init__(self, C, beta=BETA, alpha=ALPHA):
+        """Create an object to sample reversible rate matrices.
+
+        Parameters
+        ----------
+        C : ndarray
+            Count matrix
+        beta : float
+            Parameter of exponential prior, default = %f
+        alpha : float
+            Parameter of Dirichlet prior, default = %f
+
+        Notes
+        -----
+        To parameterize reversible rate matrices, we work with both the
+        equilibrium populations (p) and the symmetrized rate matrix, 
+        S = (diag(p**-0.5)) K (diag(p**0.5))
         
-    def iterate_T(self):
-        """Iterate over the sampled transition matrices."""
-        Ti = self.mcmc.trace("T")
-        for i, T in enumerate(Ti):
-            yield T
-
+        We model the upper triangle (exclusive) of S using independent 
+        exponential variables.  We model the equilibrium populations 
+        as a Dirichlet variable.  
+        """ % (BETA, ALPHA)
+        Sampler.__init__(self, C)
 
-class ReversibleRateSampler():
-    def __init__(self, C):
-        self.C = C
-        self.num_states = C.shape[0]
         self.num_tril = np.tril_indices_from(C,-1)[0].shape[0]
+
+        T = MSMLib.estimate_transition_matrix(C + C.T)
+        K0 = scipy.linalg.logm(T)
+        l,v = np.linalg.eig(K0.T)
+        p0 = v[:,l.argmax()]
+        p0 /= p0.sum()        
         
-        self.S_flat = pymc.Uninformative("S_tril",size=(self.num_tril),value=np.ones(self.num_tril))
-        self.tril_indices = np.tril_indices_from(C,-1)
+        self.tril_indices = np.tril_indices_from(C, -1)
+        self.S_flat = pymc.Exponential("S_tril", beta, size=(self.num_tril), value=K0[self.tril_indices])        
         
-        self.p_incomplete = pymc.Dirichlet("p_incomplete", np.ones(self.num_states))
+        self.p_incomplete = pymc.Dirichlet("p_incomplete", alpha * np.ones(self.num_states), value=p0[:-1])
         self.p = pymc.CompletedDirichlet("p",self.p_incomplete)[0]
 
         @pymc.dtrm
@@ -198,31 +246,6 @@ def T(K=self.K):
 
         @pymc.potential
         def log_likelihood(T=self.T):
-            return np.sum(self.C * np.log(T))
+            return log_likelihood_T(self.C, T)
         self.log_likelihood = log_likelihood
         
-        @pymc.potential
-        def normalization_constraint(K=self.K):
-            K = K.copy()
-            np.fill_diagonal(K, np.zeros(self.num_states))
-            if K.min() < 0.0:
-                return -1 * np.inf
-            else:
-                return 0.0
-        self.normalization_constraint = normalization_constraint
-
-    def sample(self, num_steps, thin=1, filename=None):
-
-        if filename == None:
-            db = "ram"
-        else:
-            db = "hdf5"
-            
-        self.mcmc = pymc.MCMC(self, db=db, dbname=filename)
-        self.mcmc.sample(num_steps)
-        
-    def iterate_K(self):
-        """Iterate over the sampled transition matrices."""
-        Ki = self.mcmc.trace("K")
-        for i, K in enumerate(Ki):
-            yield K

From aa53d11779653e2c12721b082184e0ef6399c399 Mon Sep 17 00:00:00 2001
From: Kyle Beauchamp <kyleabeauchamp@gmail.com>
Date: Mon, 29 Apr 2013 11:38:10 -0700
Subject: [PATCH 4/5] fixed docstrings

---
 src/python/mcmc.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/python/mcmc.py b/src/python/mcmc.py
index f6c85c87..c92b6df1 100644
--- a/src/python/mcmc.py
+++ b/src/python/mcmc.py
@@ -66,8 +66,8 @@ def __init__(self, C, alpha=ALPHA):
         C : ndarray
             Count matrix
         alpha : float
-            Strength of Dirichlet prior.  Default %f
-        """ % ALPHA
+            Strength of Dirichlet prior.
+        """
         Sampler.__init__(self, C)
 
         initial = MSMLib.estimate_transition_matrix(C)
@@ -98,8 +98,8 @@ def __init__(self, C, beta=BETA):
         C : ndarray
             Count matrix
         beta : float
-            Parameter of exponential prior.  Default %f
-        """ % BETA
+            Parameter of exponential prior. 
+        """
         Sampler.__init__(self, C)
 
         T = MSMLib.estimate_transition_matrix(C)
@@ -186,9 +186,9 @@ def __init__(self, C, beta=BETA, alpha=ALPHA):
         C : ndarray
             Count matrix
         beta : float
-            Parameter of exponential prior, default = %f
+            Parameter of exponential prior
         alpha : float
-            Parameter of Dirichlet prior, default = %f
+            Parameter of Dirichlet prior
 
         Notes
         -----
@@ -199,7 +199,7 @@ def __init__(self, C, beta=BETA, alpha=ALPHA):
         We model the upper triangle (exclusive) of S using independent 
         exponential variables.  We model the equilibrium populations 
         as a Dirichlet variable.  
-        """ % (BETA, ALPHA)
+        """ 
         Sampler.__init__(self, C)
 
         self.num_tril = np.tril_indices_from(C,-1)[0].shape[0]

From 407d1354dcf5e9f548a5ac8ea1ed06f587650f7e Mon Sep 17 00:00:00 2001
From: Kyle Beauchamp <kyleabeauchamp@gmail.com>
Date: Mon, 6 May 2013 11:44:46 -0700
Subject: [PATCH 5/5] Added tests for MCMC

---
 src/python/mcmc.py | 10 +++++----
 tests/test_mcmc.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 4 deletions(-)
 create mode 100644 tests/test_mcmc.py

diff --git a/src/python/mcmc.py b/src/python/mcmc.py
index c92b6df1..184d8d78 100644
--- a/src/python/mcmc.py
+++ b/src/python/mcmc.py
@@ -34,15 +34,17 @@ def __init__(self, C):
         self.C = C
         self.num_states = C.shape[0]
 
-    def sample(self, num_steps, thin=1, filename=None):
+    def sample(self, num_steps, thin=1, burn=0, filename=None):
         """Sample the MCMC chain and store the results.
 
         Parameters
         ----------
         num_steps : int
             How many MCMC samples to generate
-        thin : int, optional
-            Subsample MCMC chain by this amount (default 1).
+        thin : int, optional, default 1
+            Subsample MCMC chain by this amount.
+        burn : int, optional, default 0
+            Discard first `burn` MCMC samples.  
         filename : str, optional
             If not None, store results as HDF file rather than in RAM.
 
@@ -53,7 +55,7 @@ def sample(self, num_steps, thin=1, filename=None):
             db = "hdf5"
             
         self.mcmc = pymc.MCMC(self, db=db, dbname=filename)
-        self.mcmc.sample(num_steps, thin=thin)    
+        self.mcmc.sample(num_steps, thin=thin, burn=burn)    
 
 
 class TransitionSampler(Sampler):
diff --git a/tests/test_mcmc.py b/tests/test_mcmc.py
new file mode 100644
index 00000000..f372d820
--- /dev/null
+++ b/tests/test_mcmc.py
@@ -0,0 +1,53 @@
+import numpy as np
+from msmbuilder.testing import eq
+from msmbuilder import mcmc, MSMLib
+import scipy.linalg
+from nose.tools import nottest
+from nose.plugins.skip import SkipTest
+
+SKIP_TESTS = True
+
+num_states = 2
+num_steps = 1000000
+thin = 500
+burn = 50000
+
+C = 1000000 * np.ones((num_states,num_states))
+C[0,0] *= 1000
+C[0,1] += 1
+
+T0 = MSMLib.estimate_transition_matrix(C)
+X0 = MSMLib.mle_reversible_count_matrix(scipy.sparse.csr_matrix(C)).toarray()
+T0_rev = MSMLib.estimate_transition_matrix(X0)
+
+def test_TransitionSampler():
+    if SKIP_TESTS == True:
+        raise(SkipTest("MCMC test skipped"))
+    sampler = mcmc.TransitionSampler(C)
+    sampler.sample(num_steps, thin=thin, burn=burn)
+    T = sampler.mcmc.trace("T")[:].mean(0)
+    eq(T0, T, decimal=5)
+    
+def test_ReversibleTransitionSampler():
+    if SKIP_TESTS == True:
+        raise(SkipTest("MCMC test skipped"))
+    sampler = mcmc.ReversibleTransitionSampler(C)
+    sampler.sample(1000000,thin=500, burn=burn)
+    T_rev = sampler.mcmc.trace("T")[:].mean(0)
+    eq(T0_rev, T_rev, decimal=5)
+    
+def test_RateSampler():
+    if SKIP_TESTS == True:
+        raise(SkipTest("MCMC test skipped"))
+    sampler = mcmc.RateSampler(C)
+    sampler.sample(1000000,thin=500, burn=burn)
+    T_K = sampler.mcmc.trace("T")[:].mean(0)
+    eq(T0, T_K, decimal=5)
+    
+def test_ReversibleRateSampler():
+    if SKIP_TESTS == True:
+        raise(SkipTest("MCMC test skipped"))
+    sampler = mcmc.ReversibleRateSampler(C)
+    sampler.sample(1000000,thin=500, burn=burn)
+    T_K_rev = sampler.mcmc.trace("T")[:].mean(0)
+    eq(T0_rev, T_K_rev, decimal=5)