Skip to content

Commit

Permalink
Fix Poisson log link PMF
Browse files Browse the repository at this point in the history
Add test cases for Poisson-uniform mixture likelihood computations

Compute marginal PMF over a point mass using dpois directly

This fixes issue stephens999#114
  • Loading branch information
aksarkar committed Dec 27, 2019
1 parent e8a7abc commit 95b80d7
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 11 deletions.
15 changes: 12 additions & 3 deletions R/lik.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ lik_logF = function(df1,df2){
#' @details Suppose we have Poisson observations \code{y} where \eqn{y_i\sim Poisson(c_i\lambda_i)}.
#' We either put an unimodal prior g on the (scaled) intensities \eqn{\lambda_i\sim g}
#' (by specifying \code{link="identity"}) or on the log intensities
#' \eqn{logit(\lambda_i)\sim g} (by specifying \code{link="log"}). Either way,
#' \eqn{log(\lambda_i)\sim g} (by specifying \code{link="log"}). Either way,
#' ASH with this Poisson likelihood function will compute the posterior mean of the
#' intensities \eqn{\lambda_i}.
#' @param y Poisson observations.
Expand All @@ -81,17 +81,26 @@ lik_pois = function(y, scale=1, link=c("identity","log")){
if (link=="identity"){
list(name = "pois",
const = TRUE,
## Important: in comp_dens_conv, this will be called like
##
## lcdfFUN((data$x - a) / data$s)
##
## where data is an ash data object with data$x = 0 and data$s =
## 1. Therefore, we need to take absolute values
lcdfFUN = function(x){pgamma(abs(x),shape=y+1,rate=scale,log.p=TRUE)-log(scale)},
lpdfFUN = function(x){dgamma(abs(x),shape=y+1,rate=scale,log=TRUE)-log(scale)},
etruncFUN = function(a,b){-my_etruncgamma(-b,-a,y+1,scale)},
e2truncFUN = function(a,b){my_e2truncgamma(-b,-a,y+1,scale)},
data=list(y=y, scale=scale, link=link))
}else if (link=="log"){
}
else if (link=="log"){
y1 = y+1e-5 # add pseudocount
list(name = "pois",
const = TRUE,
## comp_dens_conv calls this with negative of the expected arguments
lcdfFUN = function(x){pgamma(exp(-x),shape=y1,rate=scale,log.p=TRUE)-log(y1)},
lpdfFUN = function(x){dgamma(exp(-x),shape=y1,rate=scale,log=TRUE)-log(y1)},
## This is PMF marginalizing over a point mass on x
lpdfFUN = function(x){dpois(y, exp(-x), log=TRUE)},
etruncFUN = function(a,b){-my_etruncgamma(exp(-b),exp(-a),y1,scale)},
e2truncFUN = function(a,b){my_e2truncgamma(exp(-b),exp(-a),y1,scale)},
data=list(y=y, scale=scale, link=link))
Expand Down
82 changes: 81 additions & 1 deletion tests/testthat/test_lik.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,86 @@ test_that("general likelihood with multiple df works", {

data =set_data(betahat,s,lik = lik_t(df),alpha=0)
expect_equal(calc_null_loglik(data),sum(dt(betahat/s,df=df,log=TRUE)-log(s)))

})

test_that("Poisson (identity link) marginal PMF is correct", {
y = seq(0, 100)
g = unimix(1, .1, .2)
lik = lik_pois(y=y, link="identity")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function (z) {integrate(function(lam) {dpois(z, lam)}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
expect_equal(py, true_py)
})

test_that("Poisson (identity link) marginal PMF for point mass is correct", {
y = seq(0, 100)
g = unimix(1, .1, .1)
lik = lik_pois(y=y, link="identity")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function(x) {dpois(x, g$a[1])}))
expect_equal(py, true_py)
})

test_that("Poisson (identity link) marginal PMF is correct with scale factors", {
y = seq(0, 100)
s = 100
g = unimix(1, 1e-3, 2e-3)
lik = lik_pois(y=y, scale=s, link="identity")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function (z) {integrate(function(lam) {dpois(z, s * lam)}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
expect_equal(py, true_py, tolerance=1e-5, scale=1)
})

test_that("Poisson (log link) marginal PMF is correct", {
y = seq(0, 100)
g = unimix(1, log(.1), log(.2))
lik = lik_pois(y=y, link="log")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function (z) {integrate(function(log_lam) {dpois(z, exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
expect_equal(py, true_py, tolerance=1e-5, scale=1)
})

test_that("Poisson (log link) marginal PMF for point mass is correct", {
y = seq(0, 100)
g = unimix(1, log(.1), log(.1))
lik = lik_pois(y=y, link="log")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function(x) {dpois(x, exp(g$a[1]))}))
expect_equal(py, true_py)
})

test_that("Poisson (log link) marginal PMF is correct with scale factors", {
y = seq(0, 100)
s = 100
g = unimix(1, log(1e-3), log(2e-3))
lik = lik_pois(y=y, scale=s, link="log")
data = set_data(rep(0, length(y)), 1, lik)
py = drop(comp_dens_conv(g, data))
true_py = unlist(lapply(y, function (z) {integrate(function(log_lam) {dpois(z, s * exp(log_lam))}, g$a[1], g$b[1])$value / (g$b[1] - g$a[1])}))
expect_equal(py, true_py, tolerance=1e-5, scale=1)
})

test_that("Poisson (identity link) returns sensible marginal PMF on real data", {
skip("save time")
dat = readRDS("test_pois_data.Rds")
fit <- ash_pois(dat$x, dat$scale, link="identity", mixcompdist="halfuniform")
F = comp_dens_conv(fit$fitted_g, fit$data)
expect_true(all(F < 1))
})

test_that("Poisson (log link) returns sensible marginal PMF on real data", {
dat = readRDS("test_pois_data.Rds")
lam <- dat$x / dat$scale
eps = 1 / mean(dat$scale)
log_lam <- log(lam + eps)
se_log_lam <- sqrt(var(lam) / (lam + eps)^2)
mixsd <- seq(.1 * min(se_log_lam), max(2 * sqrt(log_lam^2 - se_log_lam^2)), by=.5 * log(2))
fit <- ash_pois(dat$x, dat$scale, link="log", mixcompdist="halfuniform", mixsd=mixsd)
F = comp_dens_conv(fit$fitted_g, fit$data)
expect_true(all(F < 1))
})
14 changes: 7 additions & 7 deletions tests/testthat/test_pois.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,11 @@ test_that("lik_pois (log link) fitted mode is close to true mode",{
})

test_that("Mode estimation for pois_lik finds an acceptable solution", {
set.seed(1)
# Load example 10X Genomics data
dat = readRDS("test_pois_data.Rds")
m0 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode="estimate")
lam = dat$x / dat$scale
m1 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode=c(min(lam), max(lam)))
expect_equal(m0$loglik, m1$loglik, tolerance=1, scale=1)
set.seed(1)
## Load example 10X Genomics data
dat = readRDS("test_pois_data.Rds")
m0 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode="estimate")
lam = dat$x / dat$scale
m1 = ashr::ash(rep(0, nrow(dat)), 1, lik=ashr::lik_pois(dat$x, scale=dat$scale, link="identity"), mode=c(min(lam), max(lam)))
expect_equal(m0$loglik, m1$loglik, tolerance=1, scale=1)
})

0 comments on commit 95b80d7

Please sign in to comment.