Merge pull request #344 from lnccbrown/342-align-non-decision-time-tr…

…eatment Replace negative non-decision time in likelihood computation with a small value
lnccbrown · Jan 19, 2024 · 19b2453 · 19b2453
2 parents 65f86e7 + f1567f8
commit 19b2453
Show file tree

Hide file tree

Showing 7 changed files with 84 additions and 23 deletions.
diff --git a/README.md b/README.md
@@ -31,13 +31,13 @@ HSSM is a Python toolbox that provides a seamless combination of state-of-the-ar
 
 ## Installation
 
-`hssm` is available through PyPI. You can install it with Pip via:
+`hssm` is available through PyPI. You can install it with pip via:
 
 ```
 pip install hssm
 ```
 
-You can also install the bleeding edge version of `hssm` directly from this repo:
+You can also install the bleeding-edge version of `hssm` directly from this repo:
 
 ```
 pip install git+https://github.com/lnccbrown/HSSM.git
@@ -51,7 +51,7 @@ for more detailed instructions.
 [here](https://github.com/lnccbrown/HSSM/discussions). We recommend leveraging an
 environment manager with Python 3.10~3.11 to prevent any problems with dependencies
 during the installation process. Please note that hssm is tested for python 3.10,
-3.11. As of HSSM v0.1.6, support for Python 3.9 is dropped. Use other python
+3.11. As of HSSM v0.2.0, support for Python 3.9 is dropped. Use other python
 versions with caution.
 
 ## Example

diff --git a/docs/index.md b/docs/index.md
@@ -15,7 +15,6 @@
 
 **HSSM** (Hierarchical Sequential Sampling Modeling) is a modern Python toolbox that provides state-of-the-art likelihood approximation methods within the Python Bayesian ecosystem. It facilitates hierarchical model building and inference via fast and robust MCMC samplers. User-friendly, extensible, and flexible, HSSM can rigorously estimate the impact of neural and other trial-by-trial covariates through parameter-wise mixed-effects models for a large variety of cognitive process models.
 
-
 HSSM is a [BRAINSTORM](https://ccbs.carney.brown.edu/brainstorm) project in collaboration with the [Center for Computation and Visualization (CCV)](https://ccv.brown.edu/) and the [Center for Computational Brain Science](https://ccbs.carney.brown.edu/) within the [Carney Institute at Brown University](https://www.brown.edu/carney/).
 
 ## Features
@@ -30,7 +29,7 @@ HSSM is a [BRAINSTORM](https://ccbs.carney.brown.edu/brainstorm) project in coll
 
 ## Installation
 
-`hssm` is available through PyPI. You can install it with Pip via:
+`hssm` is available through PyPI. You can install it with pip via:
 
 ```bash
 pip install hssm
@@ -50,10 +49,9 @@ For more detailed guidance, please check out our [installation guide](getting_st
     [here](https://github.com/lnccbrown/HSSM/discussions). We recommend leveraging an
     environment manager with Python 3.10~3.11 to prevent any problems with dependencies
     during the installation process. Please note that hssm is tested for python 3.10,
-    3.11. As of HSSM v0.1.6, support for Python 3.9 is dropped. Use other python
+    3.11. As of HSSM v0.2.0, support for Python 3.9 is dropped. Use other python
     versions with caution.
 
-
 ### Setting global float type
 
 Using the analytical DDM (Drift Diffusion Model) likelihood in PyMC without forcing float type to `"float32"` in PyTensor may result in warning messages during sampling, which is a known bug in PyMC v5.6.0 and earlier versions. We can use `hssm.set_floatX("float32")` to get around this for now.

diff --git a/docs/overrides/main.html b/docs/overrides/main.html
@@ -5,7 +5,7 @@
   </span>
   Navigate the site here!
 </span>
-<span class="right-margin"> v0.2.0b1 is released! </span>
+<span class="right-margin"> v0.2.0 is released! </span>
 <span>
   <span class="twemoji">
     {% include ".icons/material/head-question.svg" %}

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "HSSM"
-version = "0.2.0b1"
+version = "0.2.0"
 description = "Bayesian inference for hierarchical sequential sampling models."
 authors = [
     "Alexander Fengler <[email protected]>",

diff --git a/src/hssm/defaults.py b/src/hssm/defaults.py
@@ -251,7 +251,7 @@ class DefaultConfig(TypedDict):
         },
     },
     "race_no_bias_angle_4": {
-        "list_params": ["v0", "v1", "v2", "v3", "a", "z", "ndt", "theta"],
+        "list_params": ["v0", "v1", "v2", "v3", "a", "z", "t", "theta"],
         "description": None,
         "likelihoods": {
             "approx_differentiable": {
@@ -265,7 +265,7 @@ class DefaultConfig(TypedDict):
                     "v3": (0.0, 2.5),
                     "a": (1.0, 3.0),
                     "z": (0.0, 0.9),
-                    "ndt": (0.0, 2.0),
+                    "t": (0.0, 2.0),
                     "theta": (-0.1, 1.45),
                 },
                 "extra_fields": None,

diff --git a/src/hssm/distribution_utils/dist.py b/src/hssm/distribution_utils/dist.py
@@ -29,7 +29,7 @@
 
 _logger = logging.getLogger("hssm")
 
-OUT_OF_BOUNDS_VAL = pm.floatX(-66.1)
+LOGP_LB = pm.floatX(-66.1)
 
 
 def apply_param_bounds_to_loglik(
@@ -71,11 +71,51 @@ def apply_param_bounds_to_loglik(
         param_mask = pt.bitwise_or(pt.lt(param, lower_bound), pt.gt(param, upper_bound))
         out_of_bounds_mask = pt.bitwise_or(out_of_bounds_mask, param_mask)
 
-    logp = pt.where(out_of_bounds_mask, OUT_OF_BOUNDS_VAL, logp)
+    logp = pt.where(out_of_bounds_mask, LOGP_LB, logp)
 
     return logp
 
 
+def ensure_positive_ndt(data, logp, list_params, dist_params):
+    """Ensure that the non-decision time is always positive.
+
+    Replaces the log probability of the model with a lower bound if the non-decision
+    time is not positive.
+
+    Parameters
+    ----------
+    data
+        A two-column numpy array with response time and response.
+    logp
+        The log-likelihoods.
+    list_params
+        A list of parameters that the log-likelihood accepts. The order of the
+        parameters in the list will determine the order in which the parameters
+        are passed to the log-likelihood function.
+    dist_params
+        A list of parameters used in the likelihood computation. The parameters
+        can be both scalars and arrays.
+
+    Returns
+    -------
+    float
+        The log-likelihood of the model.
+    """
+    rt = data[:, 0]
+
+    if "t" not in list_params:
+        return logp
+
+    t = dist_params[list_params.index("t")]
+
+    return pt.where(
+        # consistent with the epsilon in the analytical likelihood
+        rt - t <= 1e-15,
+        LOGP_LB,
+        logp,
+    )
+
+
 def make_ssm_rv(
     model_name: str, list_params: list[str], lapse: bmb.Prior | None = None
 ) -> Type[RandomVariable]:
@@ -404,12 +444,14 @@ def logp(data, *dist_params):  # pylint: disable=E0213
             else:
                 logp = loglik(data, *dist_params, *extra_fields)
 
-            if bounds is None:
-                return logp
+            if bounds is not None:
+                logp = apply_param_bounds_to_loglik(
+                    logp, list_params, *dist_params, bounds=bounds
+                )
 
-            return apply_param_bounds_to_loglik(
-                logp, list_params, *dist_params, bounds=bounds
-            )
+            # Ensure that non-decision time is always smaller than rt.
+            # Assuming that the non-decision time parameter is always named "t".
+            return ensure_positive_ndt(data, logp, list_params, dist_params)
 
     return SSMDistribution
 

diff --git a/tests/test_distribution_utils.py b/tests/test_distribution_utils.py
@@ -6,7 +6,11 @@
 
 import hssm
 from hssm import distribution_utils
-from hssm.distribution_utils.dist import apply_param_bounds_to_loglik, make_distribution
+from hssm.distribution_utils.dist import (
+    apply_param_bounds_to_loglik,
+    make_distribution,
+    ensure_positive_ndt,
+)
 from hssm.likelihoods.analytical import logp_ddm, DDM
 
 hssm.set_floatX("float32")
@@ -123,9 +127,10 @@ def test_apply_param_bounds_to_loglik():
 def test_make_distribution():
     def fake_logp_function(data, param1, param2):
         """Make up a fake log likelihood function for this test only."""
-        return data * param1 * param2
+        return data[:, 0] * param1 * param2
 
-    data = np.random.normal(size=1000)
+    data = np.zeros((1000, 2))
+    data[:, 0] = np.random.normal(size=1000)
     bounds = {"param1": [-1.0, 1.0], "param2": [-1.0, 1.0]}
 
     Dist = make_distribution(
@@ -143,7 +148,7 @@ def fake_logp_function(data, param1, param2):
 
     np.testing.assert_array_equal(
         Dist.logp(data, scalar_in_bound, scalar_in_bound).eval(),
-        data * scalar_in_bound * scalar_in_bound,
+        data[:, 0] * scalar_in_bound * scalar_in_bound,
     )
 
     np.testing.assert_array_equal(
@@ -157,7 +162,7 @@ def fake_logp_function(data, param1, param2):
 
     np.testing.assert_array_equal(
         results_vector[~out_of_bound_indices],
-        data[~out_of_bound_indices]
+        data[:, 0][~out_of_bound_indices]
         * scalar_in_bound
         * random_vector[~out_of_bound_indices],
     )
@@ -227,3 +232,19 @@ def logp_ddm_extra_fields(data, v, a, z, t, x, y):
         ).eval(),
         ddm_model_p_logp_lapse.eval(),
     )
+
+
+def test_ensure_positive_ndt():
+    data = np.zeros((1000, 2))
+    data[:, 0] = np.random.uniform(size=1000)
+
+    logp = np.random.uniform(size=1000)
+
+    list_params = ["v", "a", "z", "t"]
+    dist_params = [0.5] * 4
+
+    after_replacement = ensure_positive_ndt(data, logp, list_params, dist_params).eval()
+    mask = data[:, 0] - 0.5 <= 1e-15
+
+    assert np.all(after_replacement[mask] == np.array(-66.1))
+    assert np.all(after_replacement[~mask] == logp[~mask])