From 95b80d71842158c101c58941921174a03db095b5 Mon Sep 17 00:00:00 2001
From: Abhishek Sarkar <aksarkar@alum.mit.edu>
Date: Fri, 27 Dec 2019 11:10:08 -0600
Subject: [PATCH] Fix Poisson log link PMF

Add test cases for Poisson-uniform mixture likelihood computations

Compute marginal PMF over a point mass using dpois directly

This fixes issue #114
---
 R/lik.R                    | 15 +++++--
 tests/testthat/test_lik.R  | 82 +++++++++++++++++++++++++++++++++++++-
 tests/testthat/test_pois.R | 14 +++----
 3 files changed, 100 insertions(+), 11 deletions(-)

diff --git a/R/lik.R b/R/lik.R
index 5087e86..acfb39c 100644
--- a/R/lik.R
+++ b/R/lik.R
@@ -58,7 +58,7 @@ lik_logF = function(df1,df2){
 #' @details Suppose we have Poisson observations \code{y} where \eqn{y_i\sim Poisson(c_i\lambda_i)}. 
 #'    We either put an unimodal prior g on the (scaled) intensities \eqn{\lambda_i\sim g} 
 #'    (by specifying \code{link="identity"}) or on the log intensities 
-#'    \eqn{logit(\lambda_i)\sim g} (by specifying \code{link="log"}). Either way, 
+#'    \eqn{log(\lambda_i)\sim g} (by specifying \code{link="log"}). Either way, 
 #'    ASH with this Poisson likelihood function will compute the posterior mean of the 
 #'    intensities \eqn{\lambda_i}.
 #' @param y Poisson observations.
@@ -81,17 +81,26 @@ lik_pois = function(y, scale=1, link=c("identity","log")){
   if (link=="identity"){
     list(name = "pois",
          const = TRUE,
+         ## Important: in comp_dens_conv, this will be called like
+         ##
+         ## lcdfFUN((data$x - a) / data$s)
+         ##
+         ## where data is an ash data object with data$x = 0 and data$s =
+         ## 1. Therefore, we need to take absolute values
          lcdfFUN = function(x){pgamma(abs(x),shape=y+1,rate=scale,log.p=TRUE)-log(scale)},
          lpdfFUN = function(x){dgamma(abs(x),shape=y+1,rate=scale,log=TRUE)-log(scale)},
          etruncFUN = function(a,b){-my_etruncgamma(-b,-a,y+1,scale)},
          e2truncFUN = function(a,b){my_e2truncgamma(-b,-a,y+1,scale)},
          data=list(y=y, scale=scale, link=link))
-  }else if (link=="log"){
+  }
+  else if (link=="log"){
     y1 = y+1e-5 # add pseudocount
     list(name = "pois",
          const = TRUE,
+         ## comp_dens_conv calls this with negative of the expected arguments
          lcdfFUN = function(x){pgamma(exp(-x),shape=y1,rate=scale,log.p=TRUE)-log(y1)},
-         lpdfFUN = function(x){dgamma(exp(-x),shape=y1,rate=scale,log=TRUE)-log(y1)},
+         ## This is PMF marginalizing over a point mass on x
+         lpdfFUN = function(x){dpois(y, exp(-x), log=TRUE)},
          etruncFUN = function(a,b){-my_etruncgamma(exp(-b),exp(-a),y1,scale)},
          e2truncFUN = function(a,b){my_e2truncgamma(exp(-b),exp(-a),y1,scale)},
          data=list(y=y, scale=scale, link=link))
diff --git a/tests/testthat/test_lik.R b/tests/testthat/test_lik.R
index 0f3a4dd..f447681 100644
--- a/tests/testthat/test_lik.R
+++ b/tests/testthat/test_lik.R
@@ -30,6 +30,86 @@ test_that("general likelihood with multiple df works", {
   
   data =set_data(betahat,s,lik = lik_t(df),alpha=0)
   expect_equal(calc_null_loglik(data),sum(dt(betahat/s,df=df,log=TRUE)-log(s)))
-  
+})
+
+test_that("Poisson (identity link) marginal PMF is correct", {
+  y = seq(0, 100)
+  g = unimix(1, .1, .2)
+  lik = lik_pois(y=y, link="identity")
+  data = set_data(rep(0, length(y)), 1, lik)
+  py = drop(comp_dens_conv(g, data))
+  true_py = unlist(lapply(y, function (z) {integrate(function(lam) {dpois(z, lam)}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
+  expect_equal(py, true_py)
+})
+
+test_that("Poisson (identity link) marginal PMF for point mass is correct", {
+  y = seq(0, 100)
+  g = unimix(1, .1, .1)
+  lik = lik_pois(y=y, link="identity")
+  data = set_data(rep(0, length(y)), 1, lik)
+  py = drop(comp_dens_conv(g, data))
+  true_py = unlist(lapply(y, function(x) {dpois(x, g$a[1])}))
+  expect_equal(py, true_py)
+})
+
+test_that("Poisson (identity link) marginal PMF is correct with scale factors", {
+  y = seq(0, 100)
+  s = 100
+  g = unimix(1, 1e-3, 2e-3)
+  lik = lik_pois(y=y, scale=s, link="identity")
+  data = set_data(rep(0, length(y)), 1, lik)
+  py = drop(comp_dens_conv(g, data))
+  true_py = unlist(lapply(y, function (z) {integrate(function(lam) {dpois(z, s * lam)}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
+  expect_equal(py, true_py, tolerance=1e-5, scale=1)
+})
+
+test_that("Poisson (log link) marginal PMF is correct", {
+  y = seq(0, 100)
+  g = unimix(1, log(.1), log(.2))
+  lik = lik_pois(y=y, link="log")
+  data = set_data(rep(0, length(y)), 1, lik)
+  py = drop(comp_dens_conv(g, data))
+  true_py = unlist(lapply(y, function (z) {integrate(function(log_lam) {dpois(z, exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
+  expect_equal(py, true_py, tolerance=1e-5, scale=1)
+})
+
+test_that("Poisson (log link) marginal PMF for point mass is correct", {
+  y = seq(0, 100)
+  g = unimix(1, log(.1), log(.1))
+  lik = lik_pois(y=y, link="log")
+  data = set_data(rep(0, length(y)), 1, lik)
+  py = drop(comp_dens_conv(g, data))
+  true_py = unlist(lapply(y, function(x) {dpois(x, exp(g$a[1]))}))
+  expect_equal(py, true_py)
+})
+
+test_that("Poisson (log link) marginal PMF is correct with scale factors", {
+  y = seq(0, 100)
+  s = 100
+  g = unimix(1, log(1e-3), log(2e-3))
+  lik = lik_pois(y=y, scale=s, link="log")
+  data = set_data(rep(0, length(y)), 1, lik)
+  py = drop(comp_dens_conv(g, data))
+  true_py = unlist(lapply(y, function (z) {integrate(function(log_lam) {dpois(z, s * exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
+  expect_equal(py, true_py, tolerance=1e-5, scale=1)
+})
+
+test_that("Poisson (identity link) returns sensible marginal PMF on real data", {
+  skip("save time")
+  dat = readRDS("test_pois_data.Rds")
+  fit <- ash_pois(dat$x, dat$scale, link="identity", mixcompdist="halfuniform")
+  F = comp_dens_conv(fit$fitted_g, fit$data)
+  expect_true(all(F < 1))
+})
 
+test_that("Poisson (log link) returns sensible marginal PMF on real data", {
+  dat = readRDS("test_pois_data.Rds")
+  lam <- dat$x / dat$scale
+  eps = 1 / mean(dat$scale)
+  log_lam <- log(lam + eps)
+  se_log_lam <- sqrt(var(lam) / (lam + eps)^2)
+  mixsd <- seq(.1 * min(se_log_lam), max(2 * sqrt(log_lam^2 - se_log_lam^2)), by=.5 * log(2))
+  fit <- ash_pois(dat$x, dat$scale, link="log", mixcompdist="halfuniform", mixsd=mixsd)
+  F = comp_dens_conv(fit$fitted_g, fit$data)
+  expect_true(all(F < 1))
 })
diff --git a/tests/testthat/test_pois.R b/tests/testthat/test_pois.R
index e06b572..64ed247 100644
--- a/tests/testthat/test_pois.R
+++ b/tests/testthat/test_pois.R
@@ -78,11 +78,11 @@ test_that("lik_pois (log link) fitted mode is close to true mode",{
 })
 
 test_that("Mode estimation for pois_lik finds an acceptable solution", {
-    set.seed(1)
-    # Load example 10X Genomics data
-    dat = readRDS("test_pois_data.Rds")
-    m0 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode="estimate")
-    lam = dat$x / dat$scale
-    m1 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode=c(min(lam), max(lam)))
-    expect_equal(m0$loglik, m1$loglik, tolerance=1, scale=1)
+  set.seed(1)
+  ## Load example 10X Genomics data
+  dat = readRDS("test_pois_data.Rds")
+  m0 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode="estimate")
+  lam = dat$x / dat$scale
+  m1 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode=c(min(lam), max(lam)))
+  expect_equal(m0$loglik, m1$loglik, tolerance=1, scale=1)
 })