From 95b80d71842158c101c58941921174a03db095b5 Mon Sep 17 00:00:00 2001 From: Abhishek Sarkar Date: Fri, 27 Dec 2019 11:10:08 -0600 Subject: [PATCH] Fix Poisson log link PMF Add test cases for Poisson-uniform mixture likelihood computations Compute marginal PMF over a point mass using dpois directly This fixes issue #114 --- R/lik.R | 15 +++++-- tests/testthat/test_lik.R | 82 +++++++++++++++++++++++++++++++++++++- tests/testthat/test_pois.R | 14 +++---- 3 files changed, 100 insertions(+), 11 deletions(-) diff --git a/R/lik.R b/R/lik.R index 5087e86..acfb39c 100644 --- a/R/lik.R +++ b/R/lik.R @@ -58,7 +58,7 @@ lik_logF = function(df1,df2){ #' @details Suppose we have Poisson observations \code{y} where \eqn{y_i\sim Poisson(c_i\lambda_i)}. #' We either put an unimodal prior g on the (scaled) intensities \eqn{\lambda_i\sim g} #' (by specifying \code{link="identity"}) or on the log intensities -#' \eqn{logit(\lambda_i)\sim g} (by specifying \code{link="log"}). Either way, +#' \eqn{log(\lambda_i)\sim g} (by specifying \code{link="log"}). Either way, #' ASH with this Poisson likelihood function will compute the posterior mean of the #' intensities \eqn{\lambda_i}. #' @param y Poisson observations. @@ -81,17 +81,26 @@ lik_pois = function(y, scale=1, link=c("identity","log")){ if (link=="identity"){ list(name = "pois", const = TRUE, + ## Important: in comp_dens_conv, this will be called like + ## + ## lcdfFUN((data$x - a) / data$s) + ## + ## where data is an ash data object with data$x = 0 and data$s = + ## 1. Therefore, we need to take absolute values lcdfFUN = function(x){pgamma(abs(x),shape=y+1,rate=scale,log.p=TRUE)-log(scale)}, lpdfFUN = function(x){dgamma(abs(x),shape=y+1,rate=scale,log=TRUE)-log(scale)}, etruncFUN = function(a,b){-my_etruncgamma(-b,-a,y+1,scale)}, e2truncFUN = function(a,b){my_e2truncgamma(-b,-a,y+1,scale)}, data=list(y=y, scale=scale, link=link)) - }else if (link=="log"){ + } + else if (link=="log"){ y1 = y+1e-5 # add pseudocount list(name = "pois", const = TRUE, + ## comp_dens_conv calls this with negative of the expected arguments lcdfFUN = function(x){pgamma(exp(-x),shape=y1,rate=scale,log.p=TRUE)-log(y1)}, - lpdfFUN = function(x){dgamma(exp(-x),shape=y1,rate=scale,log=TRUE)-log(y1)}, + ## This is PMF marginalizing over a point mass on x + lpdfFUN = function(x){dpois(y, exp(-x), log=TRUE)}, etruncFUN = function(a,b){-my_etruncgamma(exp(-b),exp(-a),y1,scale)}, e2truncFUN = function(a,b){my_e2truncgamma(exp(-b),exp(-a),y1,scale)}, data=list(y=y, scale=scale, link=link)) diff --git a/tests/testthat/test_lik.R b/tests/testthat/test_lik.R index 0f3a4dd..f447681 100644 --- a/tests/testthat/test_lik.R +++ b/tests/testthat/test_lik.R @@ -30,6 +30,86 @@ test_that("general likelihood with multiple df works", { data =set_data(betahat,s,lik = lik_t(df),alpha=0) expect_equal(calc_null_loglik(data),sum(dt(betahat/s,df=df,log=TRUE)-log(s))) - +}) + +test_that("Poisson (identity link) marginal PMF is correct", { + y = seq(0, 100) + g = unimix(1, .1, .2) + lik = lik_pois(y=y, link="identity") + data = set_data(rep(0, length(y)), 1, lik) + py = drop(comp_dens_conv(g, data)) + true_py = unlist(lapply(y, function (z) {integrate(function(lam) {dpois(z, lam)}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])})) + expect_equal(py, true_py) +}) + +test_that("Poisson (identity link) marginal PMF for point mass is correct", { + y = seq(0, 100) + g = unimix(1, .1, .1) + lik = lik_pois(y=y, link="identity") + data = set_data(rep(0, length(y)), 1, lik) + py = drop(comp_dens_conv(g, data)) + true_py = unlist(lapply(y, function(x) {dpois(x, g$a[1])})) + expect_equal(py, true_py) +}) + +test_that("Poisson (identity link) marginal PMF is correct with scale factors", { + y = seq(0, 100) + s = 100 + g = unimix(1, 1e-3, 2e-3) + lik = lik_pois(y=y, scale=s, link="identity") + data = set_data(rep(0, length(y)), 1, lik) + py = drop(comp_dens_conv(g, data)) + true_py = unlist(lapply(y, function (z) {integrate(function(lam) {dpois(z, s * lam)}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])})) + expect_equal(py, true_py, tolerance=1e-5, scale=1) +}) + +test_that("Poisson (log link) marginal PMF is correct", { + y = seq(0, 100) + g = unimix(1, log(.1), log(.2)) + lik = lik_pois(y=y, link="log") + data = set_data(rep(0, length(y)), 1, lik) + py = drop(comp_dens_conv(g, data)) + true_py = unlist(lapply(y, function (z) {integrate(function(log_lam) {dpois(z, exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])})) + expect_equal(py, true_py, tolerance=1e-5, scale=1) +}) + +test_that("Poisson (log link) marginal PMF for point mass is correct", { + y = seq(0, 100) + g = unimix(1, log(.1), log(.1)) + lik = lik_pois(y=y, link="log") + data = set_data(rep(0, length(y)), 1, lik) + py = drop(comp_dens_conv(g, data)) + true_py = unlist(lapply(y, function(x) {dpois(x, exp(g$a[1]))})) + expect_equal(py, true_py) +}) + +test_that("Poisson (log link) marginal PMF is correct with scale factors", { + y = seq(0, 100) + s = 100 + g = unimix(1, log(1e-3), log(2e-3)) + lik = lik_pois(y=y, scale=s, link="log") + data = set_data(rep(0, length(y)), 1, lik) + py = drop(comp_dens_conv(g, data)) + true_py = unlist(lapply(y, function (z) {integrate(function(log_lam) {dpois(z, s * exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])})) + expect_equal(py, true_py, tolerance=1e-5, scale=1) +}) + +test_that("Poisson (identity link) returns sensible marginal PMF on real data", { + skip("save time") + dat = readRDS("test_pois_data.Rds") + fit <- ash_pois(dat$x, dat$scale, link="identity", mixcompdist="halfuniform") + F = comp_dens_conv(fit$fitted_g, fit$data) + expect_true(all(F < 1)) +}) +test_that("Poisson (log link) returns sensible marginal PMF on real data", { + dat = readRDS("test_pois_data.Rds") + lam <- dat$x / dat$scale + eps = 1 / mean(dat$scale) + log_lam <- log(lam + eps) + se_log_lam <- sqrt(var(lam) / (lam + eps)^2) + mixsd <- seq(.1 * min(se_log_lam), max(2 * sqrt(log_lam^2 - se_log_lam^2)), by=.5 * log(2)) + fit <- ash_pois(dat$x, dat$scale, link="log", mixcompdist="halfuniform", mixsd=mixsd) + F = comp_dens_conv(fit$fitted_g, fit$data) + expect_true(all(F < 1)) }) diff --git a/tests/testthat/test_pois.R b/tests/testthat/test_pois.R index e06b572..64ed247 100644 --- a/tests/testthat/test_pois.R +++ b/tests/testthat/test_pois.R @@ -78,11 +78,11 @@ test_that("lik_pois (log link) fitted mode is close to true mode",{ }) test_that("Mode estimation for pois_lik finds an acceptable solution", { - set.seed(1) - # Load example 10X Genomics data - dat = readRDS("test_pois_data.Rds") - m0 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode="estimate") - lam = dat$x / dat$scale - m1 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode=c(min(lam), max(lam))) - expect_equal(m0$loglik, m1$loglik, tolerance=1, scale=1) + set.seed(1) + ## Load example 10X Genomics data + dat = readRDS("test_pois_data.Rds") + m0 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode="estimate") + lam = dat$x / dat$scale + m1 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode=c(min(lam), max(lam))) + expect_equal(m0$loglik, m1$loglik, tolerance=1, scale=1) })