Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Knudson committed Jun 8, 2020
2 parents cbcb773 + dabb449 commit fc86d13
Show file tree
Hide file tree
Showing 22 changed files with 153 additions and 616 deletions.
2 changes: 1 addition & 1 deletion .Rprofile
Original file line number Diff line number Diff line change
@@ -1 +1 @@
reticulate::use_condaenv("bigsimr")
# reticulate::use_condaenv("bigsimr")
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ vignettes/*.pdf
notebooks/*_cache/
notebooks/*_files/
notebooks/*.html
notebooks/sims*
/cache/

# Temporary files created by R markdown
Expand All @@ -38,3 +39,6 @@ notebooks/*.html
rsconnect/
.Rproj.user
inst/doc

/tests
/sims_nb
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: bigsimr
Title: An R Package for Generating High-Dimensional Random Vectors
Version: 0.4.0
Version: 0.5.0
Authors@R:
person(given = "Alexander",
family = "Knudson",
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(convertCor)
export(fastCor)
export(install_bigsimr)
export(rcor)
export(rmvn)
export(rnbinom_params)
export(rvec)
export(which_corInBounds)
Expand Down
56 changes: 47 additions & 9 deletions R/correlation_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ convertCor <- function(rho,
function(r) r
)

tmp <- as.matrix(Matrix::nearPD(A(rho))$mat)
rownames(tmp) <- colnames(tmp) <- colnames(rho)
tmp
Matrix::nearPD(A(rho), corr = TRUE)$mat
}


Expand Down Expand Up @@ -65,6 +63,7 @@ adjustForDiscrete <- function(rho, params, nSigmas) {

rho[lower.tri(rho)] <- rho_adj
rho[upper.tri(rho)] <- t(rho)[upper.tri(rho)]
diag(rho) <- 1.0
rho
}

Expand Down Expand Up @@ -242,25 +241,26 @@ fastCor <- function(x, y = NULL, method = c("pearson", "kendall", "spearman")) {

stopifnot(method %in% c("pearson", "kendall", "spearman"))

if (method == "pearson") {
if (is.data.frame(x)) {
x <- as.matrix(x)
}

if (method == "pearson") {
if (is.null(y)) {
coop::pcor(x)
} else {
coop::pcor(x, y)
}

} else if (method == "spearman") {

if (is.null(y)) {
coop::pcor(apply(x, 2, fastrank::fastrank_num_avg))
coop::pcor(apply(x, 2, fastrank::fastrank_average))
} else {
coop::pcor(fastrank::fastrank_num_avg(x),
fastrank::fastrank_num_avg(y))
coop::pcor(fastrank::fastrank_average(x),
fastrank::fastrank_average(y))
}

} else {

if (is.null(y)) {
pcaPP::cor.fk(x)
} else {
Expand All @@ -270,3 +270,41 @@ fastCor <- function(x, y = NULL, method = c("pearson", "kendall", "spearman")) {
}

}


#' Return the nearest positive definite correlation matrix
validcorr <- function(A) {
eye <- function(d) {
A <- matrix(0, d, d)
diag(A) <- 1.0
A
}

Ps <- function(A) {
eig <- eigen(A)
Q <- eig$vectors
D <- eig$values
D <- diag(ifelse(D < 0, 0, D))
Q %*% D %*% t(Q)
}

Pu <- function(X) {
X - diag(diag(X)) + eye(ncol(X))
}

d <- dim(A)
stopifnot(length(d) == 2, d[1] == d[2])

S <- matrix(0, d[1], d[2])
Y <- A

for (k in 1:(ncol(A)*100)) {
R <- Y - S
X <- Ps(R)
S <- X - R
Y <- Pu(X)
}

Pu(X)
}

2 changes: 1 addition & 1 deletion R/normal2margin.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#' uniform intermediate transformation.
#'
#' @param x A normal random vector.
#' @param param A list ccontaining the marginal and its parameters.
#' @param param A list containing the marginal and its parameters.
normal2marginal <- function(x, param) {
do.call(what = paste0("q", param[[1]]),
args = c(list(p = pnorm(x)), param[-1]))
Expand Down
60 changes: 43 additions & 17 deletions R/rmvn.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,51 @@
.rmvn_jax <- function(seed, mu, Sigma, n) {
.rmvn <- function(rho, n) {

# Any RNG in jax requires a key. If none is supplied by the user, then it
# should be generated at random.
if (is.null(seed)) {
set.seed(seed)
seed <- as.integer(sample(1:1000, 1))
}

rmvn <- jax$random$multivariate_normal
R = reticulate::np_array(rho)
S = numpy$linalg$cholesky(R)
S_d = jax$device_put(numpy$transpose(S))

Sigma = numpy$array(Sigma, dtype=numpy$float32)
mu = numpy$array(mu, dtype=numpy$float32)
key = jax$random$PRNGKey(sample(.Random.seed, 1))

key = jax$random$PRNGKey(as.integer(seed))
size = reticulate::tuple(as.integer(n),
as.integer(ncol(rho)))

dev <- reticulate::py_to_r(jax$lib$xla_bridge$get_backend()$platform)
z_d = jax$random$normal(key, size)
x_d = jax$numpy$matmul(z_d, S_d)

# mu = jax$device_put(mu)
# Sigma = jax$device_put(Sigma)
x = rmvn(key, mu, Sigma, reticulate::tuple(list(n)))$block_until_ready()
x = jax$device_get(x)
x = jax$device_get(x_d)

reticulate::py_to_r(x)

}


#' Simulate random data from a multivariate normal distribution
#'
#' This function is designed to be 'bare bones' and does not check that the
#' given covariance matrix is positive semi-definite.
#'
#' @param n number of random vectors to be simulated.
#' @param mu vector of length d, representing the mean.
#' @param sigma covariance matrix (d x d). Alternatively is can be the cholesky
#' decomposition of the covariance. In that case isChol should be set to TRUE.
#' @param isChol boolean set to true if sigma is the cholesky decomposition of
#' the covariance matrix.
#' @export
rmvn <- function(n, mu, sigma, isChol = FALSE) {

S = jax$device_put(reticulate::np_array(sigma)) #
m = jax$device_put(reticulate::np_array(mu))
key = jax$random$PRNGKey(sample(.Random.seed, 1))

if (isChol) {
size = reticulate::tuple(as.integer(n), as.integer(ncol(sigma)))
z = jax$numpy$add(jax$numpy$matmul(jax$random$normal(key, size), S), m)
} else {
size = reticulate::tuple(as.integer(n))
z = jax$random$multivariate_normal(key, m, S, size)
}

reticulate::py_to_r(jax$device_get(z))

}

33 changes: 6 additions & 27 deletions R/rvec.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,56 +38,35 @@ rvec <- function(n,
rho,
params,
cores = 1,
type = c("pearson", "kendall", "spearman"),
# adjustForDiscrete = TRUE,
checkBounds = FALSE,
nSigmas = 10){
type = c("pearson", "kendall", "spearman")){

# Handle different types of dependencies
# NOT YET IMPLEMENTED
# if (type == "spearman" &&
# adjustForDiscrete &&
# any(my_dists %in% discrete_dists)) {
# my_dists <- unlist(lapply(params, '[[', 1))
# rho <- adjustForDiscrete(rho, params, nSigmas)
# }
type <- match.arg(type)

# TODO: Warn if using Pearson correlation

if (checkBounds) {
stopifnot(all_corInBounds(rho, params, cores, type))
}

# Correlation matrix must be a Pearson correlation
rho <- convertCor(rho, from = type, to = "pearson")

d <- NROW(rho)

# generate MVN sample
mvn_sim <- .rmvn_jax(NULL, rep(0, d), rho, as.integer(n))
mvn_sim <- .rmvn(rho, n)

# Apply the NORTA algorithm
d <- NROW(rho)
if (cores == 1) {

mv_sim <- sapply(1:d, function(i){
normal2marginal(mvn_sim[,i], params[[i]])
})

} else {

`%dopar%` <- foreach::`%dopar%`

cl <- parallel::makeCluster(cores, type = "FORK")
doParallel::registerDoParallel(cl)

mv_sim <- foreach::foreach(i = 1:d, .combine = 'cbind') %dopar% {
normal2marginal(mvn_sim[,i], params[[i]])
}

parallel::stopCluster(cl)

}

colnames(mv_sim) <- rownames(rho)

# return
mv_sim
}
2 changes: 1 addition & 1 deletion R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ numpy <- NULL
jax <<- reticulate::import("jax",
delay_load = TRUE,
convert = FALSE)
numpy <<- reticulate::import("jax.numpy",
numpy <<- reticulate::import("numpy",
as = "numpy",
delay_load = TRUE,
convert = FALSE)
Expand Down
18 changes: 9 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,25 +116,25 @@ x <- rvec(100, rho = rho, params = margins, type = "pearson")
# Sample correlation
cor(x)
#> [,1] [,2] [,3]
#> [1,] 1.0000000 0.4296601 0.3491483
#> [2,] 0.4296601 1.0000000 0.3924128
#> [3,] 0.3491483 0.3924128 1.0000000
#> [1,] 1.0000000 0.5275571 0.5018824
#> [2,] 0.5275571 1.0000000 0.5280112
#> [3,] 0.5018824 0.5280112 1.0000000
```

``` r
# Estimated upper and lower correlation bounds
computeCorBounds(margins, type = "pearson")
#> $upper
#> [,1] [,2] [,3]
#> [1,] 1.0000000 0.9531578 0.9706689
#> [2,] 0.9531578 1.0000000 0.9827171
#> [3,] 0.9706689 0.9827171 1.0000000
#> [1,] 1.0000000 0.9532376 0.9698262
#> [2,] 0.9532376 1.0000000 0.9829682
#> [3,] 0.9698262 0.9829682 1.0000000
#>
#> $lower
#> [,1] [,2] [,3]
#> [1,] 1.0000000 -0.9535620 -0.9710851
#> [2,] -0.9535620 1.0000000 -0.8697967
#> [3,] -0.9710851 -0.8697967 1.0000000
#> [1,] 1.0000000 -0.9544697 -0.9708227
#> [2,] -0.9544697 1.0000000 -0.8690555
#> [3,] -0.9708227 -0.8690555 1.0000000
```

## Appendix
Expand Down
Binary file modified man/figures/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion man/normal2marginal.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/rmvn.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 4 additions & 12 deletions man/rvec.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions man/validcorr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit fc86d13

Please sign in to comment.