Skip to content

Commit

Permalink
Merge pull request #18 from SchisslerGroup/develop
Browse files Browse the repository at this point in the history
passes Windows installation and checks
  • Loading branch information
Alex Knudson authored Dec 24, 2020
2 parents 5211354 + 6e8f06a commit 1b6ac20
Show file tree
Hide file tree
Showing 22 changed files with 743 additions and 769 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@
^docs$
^pkgdown$
^\.github$
^LICENSE\.md$
17 changes: 9 additions & 8 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
Package: bigsimr
Title: An R Package for Generating High-Dimensional Random Vectors
Version: 0.8.0
Title: Fast Generation of High-Dimensional Random Vectors
Version: 0.9.0
Authors@R:
c(person(given = "Alex",
family = "Knudson",
role = c("aut", "cre"),
email = "[email protected]"),
person(given = "Grant",
family = "Schissler",
role = "aut"),
person(given = "Duc",
family = "Tran",
role = "aut"))
Maintainer: Alex Knudson <[email protected]>
Description: Simulate multivariate data with arbitrary marginal distributions.
URL: https://github.com/SchisslerGroup/bigsimr
BugReports: https://github.com/SchisslerGroup/bigsimr/issues
Depends: R (>= 3.5.0)
License: GPL-3 + file LICENSE
Depends: R (>= 3.6.0)
License: GPL-3
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.1
Expand All @@ -31,13 +34,11 @@ Imports:
pcaPP,
Rcpp,
RcppArmadillo,
foreach,
parallel,
doParallel,
rlang,
assertthat,
reticulate,
rstudioapi
rstudioapi,
mvnfast
LinkingTo:
Rcpp,
RcppArmadillo
674 changes: 0 additions & 674 deletions LICENSE

This file was deleted.

595 changes: 595 additions & 0 deletions LICENSE.md

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion R/cor_fast.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ cor_fast <- function(x, y = NULL, method = c("pearson", "kendall", "spearman"))
x <- as.matrix(x)
}

storage.mode(x) <- "double"
storage.mode(x) <- "numeric"
if (!is.null(y))
storage.mode(y) <- "numeric"

if (method == "pearson") {
if (is.null(y)) {
Expand Down Expand Up @@ -42,6 +44,7 @@ cor_fast <- function(x, y = NULL, method = c("pearson", "kendall", "spearman"))

}


fastrank_num_avg <- function(x) {
.Call("fastrank_num_avg_", x, PACKAGE = "bigsimr")
}
4 changes: 2 additions & 2 deletions R/cor_rand.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ cor_randPD <- function(d, a=1.0) {
#' This method is generally faster than \code{\link{cor_randPD}}
#'
#' @param d A positive integer number of dimensions
#' @param k A tuning parameter
#' @param k A tuning parameter between 1 and d
#' @importFrom stats rnorm runif cov2cor
#' @export
cor_randPSD <- function(d, k=d) {
if (d == 1) {
return(matrix(1, 1, 1))
}
assertthat::assert_that(1 <= k && k <= d)
stopifnot(1 <= k && k <= d)
W <- matrix(rnorm(d * k), d, k)
S <- tcrossprod(W, W) + diag(runif(d))
S2 <- diag(1 / sqrt(diag(S)))
Expand Down
39 changes: 19 additions & 20 deletions R/rand_vec.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,24 @@
eval(margin)
}

#' Simulate correlated multivariate data
#'
#' Creates \code{n} observations from a multivariate distribution with the
#' given marginals and correlation matrix.
#' given marginals and correlation matrix. The correlation matrix is always
#' assumed to be a Pearson correlation matrix
#'
#' @param n The number random vectors to generate.
#' @param rho The input correlation matrix.
#' @param rho The input (Pearson) correlation matrix.
#' @param margins The marginal distributions (Typically R's "quantile functions")
#' @param type The type of correlation matrix that is being passed (Assumed to
#' be Pearson by default).
#' @param cores The number of cores to run on.
#' @param ensure_PSD Ensure that the converted correlation matrix is positive
#' semi-definite. More import if the input correlation type is Kendall or
#' Spearman.
#' @param cores The number of cores to run on
#' semi-definite.
#' @return A matrix of random vectors generated from the specified marginals
#' and parameters.
#' @export
rvec <- function(n, rho, margins, type = c("pearson", "kendall", "spearman"),
ensure_PSD=FALSE, cores = 1L){
rvec <- function(n, rho, margins, cores = 1L, ensure_PSD = FALSE) {

type <- match.arg(type)
rho <- cor_convert(rho, from = type, to = "pearson")
d <- length(margins)
d <- length(margins)

if (ensure_PSD)
rho <- cor_nearPSD(rho)
Expand All @@ -35,27 +32,29 @@ rvec <- function(n, rho, margins, type = c("pearson", "kendall", "spearman"),
}

# generate multivariate uniform distribution (via Z -> U)
U <- .rmvuu(n, rho)
if (getOption("use_jax", FALSE)) {
U <- .rmvuu(n, rho)
} else {
U <- stats::pnorm(mvnfast::rmvn(n, rep(0, d), rho, cores))
}

# Apply the copula algorithm
d <- nrow(rho)

if (cores <= 1L) {

rv <- sapply(1:d, function(i){
rv <- vapply(1:d, function(i){
.u2m(U[,i], margins[[i]])
})
}, numeric(n))


} else {

`%dopar%` <- foreach::`%dopar%`
cl <- parallel::makeCluster(cores, type = "FORK")
doParallel::registerDoParallel(cl)
cl <- parallel::makeCluster(spec = cores, type = "FORK")

rv <- foreach::foreach(i = 1:d, .combine = 'cbind') %dopar% {
rv <- parallel::parSapply(cl = cl, 1:d, function(i) {
.u2m(U[,i], margins[[i]])
}
}, simplify = TRUE)

parallel::stopCluster(cl)

Expand Down
6 changes: 6 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,10 @@ numpy <- NULL
delay_load = TRUE,
convert = FALSE)

if (.Platform$OS.type == "windows" || !have_jax()) {
options(use_jax = FALSE)
} else {
options(use_jax = getOption("use_jax", TRUE))
}

}
4 changes: 2 additions & 2 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ knitr::opts_chunk$set(

### bigsimr is an R package for simulating high-dimensional multivariate data with arbitrary marginal distributions

bigsimr lets you simulate multivariate data given a correlation matrix and a list of distributions. The correlation matrix can be of type Pearson, Spearman, or Kendall, and we use a matching algorithm to ensure that the estimated correlation of the simulated data is the same as the input correlation.
bigsimr lets you simulate multivariate data given a correlation matrix and a list of distributions. The correlation matrix can be of type Pearson, Spearman, or Kendall.

### See the [website](https://schisslergroup.github.io/bigsimr/) for more information, including [installation instructions](https://schisslergroup.github.io/bigsimr/articles/install-bigsimr.html), [tutorials](https://schisslergroup.github.io/bigsimr/articles/using-rvec.html), and [package documentation](https://schisslergroup.github.io/bigsimr/reference/index.html).

Expand All @@ -35,7 +35,7 @@ devtools::install_github("SchisslerGroup/bigsimr", ref="develop")

This package depends on
[reticulate](https://rstudio.github.io/reticulate/) to draw on the speed
of Google’s [jax](https://github.com/google/jax) library. Please see the [bigsimr installation instructions](#) for more details.
of Google’s [jax](https://github.com/google/jax) library. Please see the [bigsimr installation instructions](https://schisslergroup.github.io/bigsimr/articles/install-bigsimr.html) for more details.

---

Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@

bigsimr lets you simulate multivariate data given a correlation matrix
and a list of distributions. The correlation matrix can be of type
Pearson, Spearman, or Kendall, and we use a matching algorithm to ensure
that the estimated correlation of the simulated data is the same as the
input correlation.
Pearson, Spearman, or Kendall.

### See the [website](https://schisslergroup.github.io/bigsimr/) for more information, including [installation instructions](https://schisslergroup.github.io/bigsimr/articles/install-bigsimr.html), [tutorials](https://schisslergroup.github.io/bigsimr/articles/using-rvec.html), and [package documentation](https://schisslergroup.github.io/bigsimr/reference/index.html).

Expand All @@ -29,7 +27,9 @@ devtools::install_github("SchisslerGroup/bigsimr", ref="develop")
This package depends on
[reticulate](https://rstudio.github.io/reticulate/) to draw on the speed
of Google’s [jax](https://github.com/google/jax) library. Please see the
[bigsimr installation instructions](#) for more details.
[bigsimr installation
instructions](https://schisslergroup.github.io/bigsimr/articles/install-bigsimr.html)
for more details.

-----

Expand Down
2 changes: 1 addition & 1 deletion man/cor_randPSD.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 7 additions & 18 deletions man/rvec.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions pkgdown/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ authors:
href: https://github.com/adknudson
Grant Schissler:
href: http://www.grantschissler.com/
Richard Foote:
href: https://github.com/rdlfoote
Duc Tran:
href: https://github.com/duct317

reference:
- title: Primary Functions
Expand Down Expand Up @@ -39,5 +39,6 @@ articles:
- title: Articles
navbar: Get Started
contents:
- bigsimr
- install-bigsimr
- using-rvec
27 changes: 26 additions & 1 deletion pkgdown/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,30 @@ Status](https://travis-ci.com/SchisslerGroup/bigsimr.svg?branch=master)](https:/

`bigsimr` is an R package for simulating high-dimensional multivariate data with arbitrary marginal distributions

bigsimr lets you simulate multivariate data given a correlation matrix and a list of distributions. The correlation matrix can be of type Pearson, Spearman, or Kendall, and we use a matching algorithm to ensure that the estimated correlation of the simulated data is the same as the input correlation.
bigsimr lets you simulate multivariate data given a correlation matrix and a list of distributions. The correlation matrix can be of type Pearson, Spearman, or Kendall.

You can install the release version of the package from GitHub:

```r
devtools::install_github("SchisslerGroup/bigsimr")
```

To get a bug fix or to use a new feature, you can install the development version from GitHub:

```r
devtools::install_github("SchisslerGroup/bigsimr", ref="develop")
```

This package depends on
[reticulate](https://rstudio.github.io/reticulate/) to draw on the speed
of Google’s [jax](https://github.com/google/jax) library. Please see the [bigsimr installation instructions](https://schisslergroup.github.io/bigsimr/articles/install-bigsimr.html) for more details.

If on Windows or a system without Python, then the package will default to alternative methods. The option can be toggled with

```r
options(use_jax = FALSE)
```

Currently Google's jax library does not have ready-to-use binaries for Windows. The recommendation is to use Windows Subsystem for Linux (WSL), otherwise you will need to build `jaxlib` from source ([see here](https://jax.readthedocs.io/en/latest/developer.html#additional-notes-for-building-jaxlib-from-source-on-windows)).

Additionally, Windows does not allow for forked multiprocessing, so there will be no performance enhancement on a multicore Windows machine.
2 changes: 0 additions & 2 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
reticulate::py_config()

library(testthat)
library(bigsimr)

Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test_cor_convert.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
test_that("cor_convert works for scalars, vectors, and matrices", {

S <- "spearman"
K <- "kendall"
P <- "pearson"
Expand Down Expand Up @@ -48,4 +49,5 @@ test_that("cor_convert works for scalars, vectors, and matrices", {
expect_equal(cor_convert(-1.0, P, K), -1.0)
expect_equal(cor_convert(-1.0, P, P), -1.0)
expect_equal(cor_convert(-1.0, S, S), -1.0)

})
8 changes: 5 additions & 3 deletions tests/testthat/test_cor_nearPSD.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
test_that("cor_nearPSD returns a positive semidefinite correlation matrix", {

# define a negative definite correlation matrix
p <- matrix(c(
1.00, 0.82, 0.56, 0.44,
Expand All @@ -10,13 +11,14 @@ test_that("cor_nearPSD returns a positive semidefinite correlation matrix", {
r <- cor_nearPSD(p)
e <- eigen(r)

# Correlation matrices must be:
# 1. symmetric
# Correlation matrices must:
# 1. be symmetric
# 2. have ones on diagonal
# 3. be positive semidefinite
# 3. be positive semi-definite
# 4. have all values in the domain [-1, 1]
expect_true(Matrix::isSymmetric(r))
expect_true(all(diag(r) == 1))
expect_true(all(e$values >= 0))
expect_true(all((-1 <= r) & (r <= 1)))

})
2 changes: 2 additions & 0 deletions tests/testthat/test_cor_rand.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
test_that("cor_randPD and cor_randPSD generate random correlation matrices", {

# Correlation matrices must be:
# 1. symmetric
# 2. have ones on diagonal
Expand All @@ -19,4 +20,5 @@ test_that("cor_randPD and cor_randPSD generate random correlation matrices", {
expect_true(all(diag(rho_PSD) == 1))
expect_true(all(e$values >= 0))
expect_true(all((-1 <= rho_PSD) & (rho_PSD <= 1)))

})
3 changes: 3 additions & 0 deletions tests/testthat/test_jax_rand.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
test_that("jax_rmvn generates random multivariate normal data", {

skip_if_not(have_jax(), "jax is not available for testing")

# Should fail for negative definite matrices
nd_cor <- matrix(c(
1.00, 0.82, 0.56, 0.44,
Expand Down
Loading

0 comments on commit 1b6ac20

Please sign in to comment.