Skip to content

Commit

Permalink
update tests to avoid issues because of w/ different architectures
Browse files Browse the repository at this point in the history
  • Loading branch information
boennecd committed Jul 6, 2020
1 parent daf0613 commit ee324a1
Showing 1 changed file with 95 additions and 91 deletions.
186 changes: 95 additions & 91 deletions tests/testthat/test_parglm_fams.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,102 +3,89 @@ context("testing results with varrious link functions agianst `glm.fit`")
to_check <- c(
"coefficients", "residuals", "fitted.values", "rank",
"family", "linear.predictors", "deviance", "aic", "null.deviance",
"iter", "prior.weights", "df.residual", "df.null",
"converged", "boundary", "formula", "terms", "data",
"prior.weights", "df.residual", "df.null",
"boundary", "formula", "terms", "data",
"offset", "contrasts", "xlevels")

sim_func <- function(family, n, p){
nam <- paste0(family$family, family$link)
x_vals <- seq(-pi, pi, length.out = n)
X <- outer(x_vals, 1:p, function(x, y) sin(x * y))
rg <- range(rowSums(X))
X <- X * 2 / diff(rg)

set.seed(77311413)
if(nam %in% c("binomiallogit", "binomialprobit", "binomialcauchit",
"binomialcloglog")){
sds <- p:1
sds <- sqrt(sds^2 / sum(sds^2))
X <- matrix(nrow = n, ncol = p)
set.seed(77311413)
for(i in 1:p)
X[, i] <- rnorm(n, 0, sds[i])
y <- family$linkinv(rowSums(X) + 1) > runif(n)
inter <- 1.
y <- family$linkinv(rowSums(X) + inter) > runif(n)

} else if(nam %in% "binomiallog"){
X <- matrix(runif(n * p, -1 / (4 * p), 1 / (4 * p)), n) - 3/(2 * p)
y <- family$linkinv(rowSums(X) + 1) > runif(n)
inter <- -.5
X <- -abs(X)
X <- X * .25 / diff(range(rowSums(X)))
y <- family$linkinv(rowSums(X) + inter) > runif(n)

} else if(nam %in% c("gaussianidentity", "gaussianlog")){
sds <- p:1
sds <- sqrt(sds^2 / sum(sds^2))
X <- matrix(nrow = n, ncol = p)
set.seed(77311413)
for(i in 1:p)
X[, i] <- rnorm(n, 5/p, sds[i])
y <- rnorm(n, family$linkinv(rowSums(X) + 1))
inter <- 0
y <- rnorm(n, family$linkinv(rowSums(X)), sd = .1)

} else if(nam %in% c("gaussianinverse")){
sds <- p:1
sds <- sqrt(sds^2 / sum(sds^2))
X <- matrix(nrow = n, ncol = p)
set.seed(77311413)
for(i in 1:p)
X[, i] <- rnorm(n, -1/p, sds[i])
y <- rnorm(n, family$linkinv(rowSums(X) + 1))
inter <- 1.
X <- abs(X)
y <- rnorm(n, family$linkinv(rowSums(X) + inter), sd = .1)

} else if(nam %in% c("Gammainverse", "Gammaidentity", "Gammalog")){
sds <- p:1
sds <- sqrt(sds^2 / sum(sds^2))
X <- matrix(nrow = n, ncol = p)
set.seed(77311413)
for(i in 1:p)
X[, i] <- rnorm(n, 5/p, sds[i])
y <- rgamma(n, family$linkinv(rowSums(X) + 1))
inter <- .5
X <- abs(X)
y <- rgamma(n, shape = 1, rate = 1 / family$linkinv(rowSums(X) + inter))

} else if(nam %in% c("poissonlog", "poissonidentity", "poissonsqrt")){
sds <- p:1
sds <- sqrt(sds^2 / sum(sds^2))
X <- matrix(nrow = n, ncol = p)
set.seed(77311413)
for(i in 1:p)
X[, i] <- rnorm(n, 4/p, sds[i])
y <- rpois(n, family$linkinv(rowSums(X) + 1))
inter <- 1.5
X <- abs(X)
y <- rpois(n, family$linkinv(rowSums(X) + inter))

} else if(nam %in% c("inverse.gaussian1/mu^2", "inverse.gaussianinverse",
"inverse.gaussianidentity", "inverse.gaussianlog")){
set.seed(77311413)
X <- matrix(runif(n * p, 1e-5, 1/p), nrow = n, ncol = p)
y <- SuppDists::rinvGauss(n, family$linkinv(rowSums(X) + 1), 1)
inter <- 1.5
X <- abs(X)
y <- SuppDists::rinvGauss(n, family$linkinv(rowSums(X) + inter), 1)

} else stop("family not implemented")

list(X = X, y = y)
list(X = X, y = y, inter = inter)
}

test_expr <- expression({
is_FAST <- method == "FAST"
tol <- if(is_FAST) .Machine$double.eps^(1/5) else
.Machine$double.eps^(1/4)
expect_equal(f1[to_check], f2[to_check], label = lab,
tolerance = if(is_FAST) 1e-6 else 1e-8)
tolerance = tol)
# these may differ as `glm.fit` uses the weights from the iteration prior
# to convergence
expect_equal(f1$weights, f2$weights, tolerance = sqrt(1e-7), label = lab)
# expect_equal(f1$weights, f2$weights, tolerance = sqrt(1e-7), label = lab)

s2 <- summary(f2)
s1 <- summary(f1)

excl <- c("call", "coefficients", "cov.unscaled", "cov.scaled",
"dispersion")
"dispersion", "iter")
expect_equal(s1[!names(s1) %in% excl], s2[!names(s2) %in% excl],
label = lab, tolerance = if(is_FAST) 5e-6 else 1e-7)
label = lab, tolerance = tol)
na <- rownames(s1$coefficients)
expect_equal(s1$coefficients[na, 1:2], s2$coefficients[na, 1:2],
label = lab, tolerance = sqrt(1e-7))
label = lab, tolerance = tol)

# may also differ as the weights are not computed at the final estimates
expect_equal(s1$dispersion, s2$dispersion, label = lab,
tolerance = sqrt(1e-7))
tolerance = tol)
})

test_that("works with different families", {
skip_if_not_installed("SuppDists")
n <- 1000L
p <- 5L
set.seed(77311413)
n <- 500L
p <- 2L
for(method in c("LAPACK", "LINPACK", "FAST"))
for(fa in list(
binomial("logit"), binomial("probit"), binomial("cauchit"),
Expand All @@ -107,7 +94,7 @@ test_that("works with different families", {
gaussian("identity"), gaussian("inverse"),
gaussian("log"),

Gamma("identity"), Gamma("log"),
Gamma("log"),

poisson("log"), poisson("sqrt")))
{
Expand All @@ -117,92 +104,100 @@ test_that("works with different families", {
#####
# no weights, no offset
lab <- paste0(fa$family, "_", fa$link, "_", method)
frm <- y ~ X1 + X2 + X3 + X4 + X5
frm <- y ~ X1 + X2
glm_control <- list(maxit = 25L, epsilon = .Machine$double.xmin)
parglm_control <- parglm.control(
nthreads = 2, method = method, maxit = 25L,
epsilon = .Machine$double.xmin)
suppressWarnings({
f2 <- glm(frm, family = fa, data = df)
f2 <- glm(frm, family = fa, data = df, control = glm_control)
f1 <- parglm(frm, family = fa, data = df,
control = parglm.control(nthreads = 2, method = method))
control = parglm_control)
})

eval(test_expr)

#####
# no weights, offset
lab <- paste0(fa$family, "_", fa$link, "_", method, " w/ offset")
frm_off <- update(frm, . ~ . - X1)
offs <- seq(0, .05, length.out = n)
suppressWarnings({
f2 <- glm(frm_off, family = fa, offset = X1, data = df)
f1 <- parglm(frm_off, family = fa, offset = X1, data = df,
control = parglm.control(nthreads = 2, method = method))
f2 <- glm(frm, family = fa, offset = offs, data = df,
control = glm_control)
f1 <- parglm(frm, family = fa, offset = offs, data = df,
control = parglm_control)
})

eval(test_expr)

#####
# weights, no offset
lab <- paste0(fa$family, "_", fa$link, "_", method, " w/ weigths")
w <- runif(n)
df$w <- n * w / sum(w)
df$w <- seq(.5, 1.5, length.out = n)
suppressWarnings({
f2 <- glm(frm, family = fa, weights = w, data = df)
f2 <- glm(frm, family = fa, weights = w, data = df,
control = glm_control)
f1 <- parglm(frm, family = fa, weights = w, data = df,
control = parglm.control(nthreads = 2, method = method))
control = parglm_control)
})

eval(test_expr)
}
})


test_that("works with different families w/ starting values", {
skip_if_not_installed("SuppDists")
n <- 1000L
p <- 5L
n <- 500L
p <- 2L
set.seed(77311413)
for(method in c("LAPACK", "LINPACK", "FAST"))
for(fa in list(
binomial("logit"), binomial("probit"), binomial("cauchit"),
binomial("cloglog"), binomial("log"),
binomial("cloglog"),

gaussian("identity"), gaussian("inverse"),
gaussian("log"),

Gamma("identity"), Gamma("log"), Gamma("log"),
Gamma("log"),

poisson("log"), poisson("sqrt"), poisson("identity"),

inverse.gaussian("1/mu^2"), inverse.gaussian("inverse"),
inverse.gaussian("identity"), inverse.gaussian("log")))
inverse.gaussian("identity")))
{
tmp <- sim_func(fa, n, p)
df <- data.frame(y = tmp$y, tmp$X)
df$INTER <- 1. # hack to avoid issues with NULL fit

#####
# no weights, no offset
lab <- paste0(fa$family, "_", fa$link, "_", method)
sta <- rep(1, p + 1L)
frm <- y ~ X1 + X2 + X3 + X4 + X5
sta[1] <- tmp$inter
frm <- y ~ X1 + X2 + INTER - 1
glm_control <- list(maxit = 25L, epsilon = .Machine$double.xmin)
parglm_control <- parglm.control(
nthreads = 2, method = method, maxit = 25L,
epsilon = .Machine$double.xmin)
suppressWarnings({
f2 <- glm(frm, family = fa, start = sta, data = df)
f2 <- glm(frm, family = fa, start = sta, data = df,
control = glm_control)
f1 <- parglm(frm, family = fa, data = df,
control = parglm.control(nthreads = 2, method = method),
start = sta)
control = parglm_control, start = sta)
})

eval(test_expr)

#####
# no weights, offset
lab <- paste0(fa$family, "_", fa$link, "_", method, " w/ offset")
sta <- rep(1, p)
sta[1] <- tmp$inter
frm_off <- update(frm, . ~ . - X1)
suppressWarnings({
f2 <- glm(frm_off, family = fa, offset = X1, start = sta, data = df)
f2 <- glm(frm_off, family = fa, offset = X1, start = sta, data = df,
control = glm_control)
f1 <- parglm(frm_off, family = fa, offset = X1, data = df,
control = parglm.control(nthreads = 2, method = method),
start = sta)
control = parglm_control, start = sta)
})

eval(test_expr)

#####
Expand All @@ -211,10 +206,12 @@ test_that("works with different families w/ starting values", {
w <- runif(n)
df$w <- n * w / sum(w)
sta <- rep(1, p + 1L)
sta[1] <- tmp$inter
suppressWarnings({
f2 <- glm(frm, family = fa, weights = w, start = sta, data = df)
f2 <- glm(frm, family = fa, weights = w, start = sta, data = df,
control = glm_control)
f1 <- parglm(frm, family = fa, weights = w, start = sta, data = df,
control = parglm.control(nthreads = 2, method = method))
control = parglm_control)
})

eval(test_expr)
Expand All @@ -223,23 +220,25 @@ test_that("works with different families w/ starting values", {

test_that("'method' equal to 'LINPACK' behaves as 'glm'", {
set.seed(73640893)
n <- 1000
n <- 500
p <- 5
X <- matrix(nrow = n, ncol = p)
for(i in 1:p)
X[, i] <- rnorm(n, sd = sqrt(p - i + 1L))
y <- rnorm(n) + rowSums(X)
X <- cbind(X[, 1:3], X[, 3:p])

f1 <- glm(y ~ X)
f2 <- parglm(y ~ X, control = parglm.control(method = "LINPACK"))
glm_control <- list(maxit = 25L, epsilon = .Machine$double.xmin)
parglm_control <- parglm.control(
nthreads = 2, maxit = 25L,
epsilon = .Machine$double.xmin, method = "LINPACK")
f1 <- glm(y ~ X, control = glm_control)
f2 <- parglm(y ~ X, control = parglm_control)

expect_equal(f1[to_check], f2[to_check])

s1 <- summary(f1)
s2 <- summary(f2)


excl <- c("call", "coefficients", "cov.unscaled", "cov.scaled",
"dispersion")
expect_equal(s1[!names(s1) %in% excl], s2[!names(s2) %in% excl])
Expand Down Expand Up @@ -272,17 +271,22 @@ test_that("'parglm' yields the same as 'glm' also when one observations is not '

W <- rep(1, length(Y))
W[1] <- 0
fit <- suppressWarnings(glm(Y ~ X, binomial(), weights = W))
glm_control <- list(maxit = 25L, epsilon = .Machine$double.xmin)
parglm_control <- parglm.control(
nthreads = 2, maxit = 25L, epsilon = .Machine$double.xmin)
fit <- suppressWarnings(glm(Y ~ X, binomial(), weights = W,
control = glm_control))
pfit <- parglm(Y ~ X, binomial(), weights = W,
control = parglm.control(nthreads = 2))
control = parglm_control)
expect_equal(fit[to_check], pfit[to_check])

Y <- rev(Y)
X <- rev(X)
W <- rev(W)
fit <- suppressWarnings(glm(Y ~ X, binomial(), weights = W))
fit <- suppressWarnings(glm(Y ~ X, binomial(), weights = W,
control = glm_control))
pfit <- parglm(Y ~ X, binomial(), weights = W,
control = parglm.control(nthreads = 2))
control = parglm_control)
expect_equal(fit[to_check], pfit[to_check])
})

Expand Down

0 comments on commit ee324a1

Please sign in to comment.