Skip to content

Commit

Permalink
fix #8 and #7
Browse files Browse the repository at this point in the history
  • Loading branch information
boennecd committed Jun 1, 2019
1 parent 1d1644b commit f2e8696
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 14 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: parglm
Type: Package
Title: Parallel GLM
Version: 0.1.3
Version: 0.1.4
Authors@R: c(
person("Benjamin", "Christoffersen",
email = "[email protected]",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# pargm 0.1.4
* `stop`s when there are more variables than observations. Previously, this
caused a crash.

# pargm 0.1.3
* Fix bug found with Valgrind.

Expand Down
16 changes: 14 additions & 2 deletions R/parglm.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ NULL
#' element. The \code{qr} element in the object returned by \code{parglm}(\code{.fit}) only has the \eqn{R}
#' matrix from the QR decomposition.
#'
#' @details
#' The current implementation uses \code{min(as.integer(n / p), nthreads)}
#' threads where \code{n} is the number observations, \code{p} is the
#' number of covariates, and \code{nthreads} is the \code{nthreads} element of
#' the list
#' returned by \code{\link{parglm.control}}. Thus, there is likely little (if
#' any) reduction in computation time if \code{p} is almost equal to \code{n}.
#' The current implementation cannot handle \code{p > n}.
#'
#' @examples
#' # small example from `help('glm')`. Fitting this model in parallel does
#' # not matter as the data set is small
Expand Down Expand Up @@ -140,11 +149,13 @@ parglm.control <- function(
#' @importFrom stats gaussian binomial Gamma inverse.gaussian poisson
#' @export
parglm.fit <- function(
x, y, weights = rep(1, nobs), start = NULL, etastart = NULL,
mustart = NULL, offset = rep(0, nobs), family = gaussian(),
x, y, weights = rep(1, NROW(x)), start = NULL, etastart = NULL,
mustart = NULL, offset = rep(0, NROW(x)), family = gaussian(),
control = list(), intercept = TRUE, ...){
.check_fam(family)
stopifnot(nrow(x) == length(y))
if(NCOL(x) > NROW(x))
stop("not implemented with more variables than observations")

if(!is.null(mustart))
warning(sQuote("mustart"), " will not be used")
Expand Down Expand Up @@ -193,6 +204,7 @@ parglm.fit <- function(
if(control$nthreads > 1L)
max(nrow(x) / control$nthreads, control$nthreads) else
nrow(x)
block_size <- max(block_size, NCOL(x))

use_start <- !is.null(start)
fit <- parallelglm(
Expand Down
13 changes: 11 additions & 2 deletions man/parglm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 0 additions & 7 deletions src/arma_n_rcpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,6 @@
#endif

#define ARMA_NO_DEBUG
// Note: This also disables the check in inv(A, B) for whether inversion is succesfull it seems http://arma.sourceforge.net/docs.html#inv
// from armadillo config.hpp
//// Uncomment the above line if you want to disable all run-time checks.
//// This will result in faster code, but you first need to make sure that your code runs correctly!
//// We strongly recommend to have the run-time checks enabled during development,
//// as this greatly aids in finding mistakes in your code, and hence speeds up development.
//// We recommend that run-time checks be disabled _only_ for the shipped version of your program.

#include <RcppArmadillo.h>

Expand Down
5 changes: 4 additions & 1 deletion src/parallel_qr.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "parallel_qr.h"
#include "LAPACK_wrappers.h"
#include <algorithm>

arma::mat R_F::R_rev_piv() const {
arma::uvec piv = pivot;
Expand All @@ -14,7 +15,9 @@ qr_parallel::worker::worker
R_F qr_parallel::worker::operator()(){
qr_work_chunk my_chunk = my_generator->get_chunk();
QR_factorization qr(my_chunk.X);
arma::mat F = qr.qy(my_chunk.Y, true).rows(0, my_chunk.X.n_cols - 1);
const unsigned n_rows =
std::min(my_chunk.X.n_cols - 1, my_chunk.Y.n_rows - 1);
arma::mat F = qr.qy(my_chunk.Y, true).rows(0, n_rows);

return R_F { qr.R(), qr.pivot(), std::move(F), my_chunk.dev };
}
Expand Down
24 changes: 23 additions & 1 deletion tests/testthat/test_parglm_fams.R
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ test_that("'FASTs' fail when design matrix is singular", {
f2 <- parglm(y ~ X, control = parglm.control(method = "FAST"))))
})

test_that("'parglm' yields the same as 'glm' also when one observations is not 'good'",{
test_that("'parglm' yields the same as 'glm' also when one observations is not 'good'", {
phat <- seq(.01, .99, by = .01)
X <- log(phat / (1 - phat)) - 2
set.seed(47313714)
Expand All @@ -285,3 +285,25 @@ test_that("'parglm' yields the same as 'glm' also when one observations is not '
control = parglm.control(nthreads = 2))
expect_equal(fit[to_check], pfit[to_check])
})

test_that("'stop's when there are more variables than observations", {
set.seed(1)
n <- 20L
dframe <- cbind(data.frame(y = 1:n), replicate(n, rnorm(n)))

expect_error(
parglm(y ~ ., gaussian(), dframe),
"not implemented with more variables than observations", fixed = TRUE)

# check that it works with same number of observations as variables
dframe <- dframe[, 1:n]
fpar <- parglm(y ~ ., gaussian(), dframe, nthreads = 2)
fglm <- glm(y ~ ., gaussian(), dframe)
expect_equal(coef(fpar), coef(fglm))

# and with almost the same number of variables as observations
dframe <- dframe[, 1:(n - 3L)]
fpar <- parglm(y ~ ., gaussian(), dframe, nthreads = 2)
fglm <- glm(y ~ ., gaussian(), dframe)
expect_equal(coef(fpar), coef(fglm))
})

0 comments on commit f2e8696

Please sign in to comment.