diff --git a/nobuild/jss/jss_sparseMVN_manuscript.R b/nobuild/jss/jss_sparseMVN_manuscript.R index a0b94b9..b78ed46 100644 --- a/nobuild/jss/jss_sparseMVN_manuscript.R +++ b/nobuild/jss/jss_sparseMVN_manuscript.R @@ -75,6 +75,8 @@ all.equal(logf, logf_dense) ## ----------------- +load("runtimes.Rdata") ## copy file to working directory + tab1 <- filter(runtimes, stat %in% c("density","rand")) %>% group_by(N, k, stat, pattern, type) %>% summarize(mean_ms=mean(time/1000000), @@ -98,9 +100,9 @@ mutate(cases, tmp=tmp) %>% format.args=list(big.mark=",")) ## ------ median compute times ---------- -tmp <- filter(tab1, N==min(tab1[['N']]) & k==min(tab1[['k']]) +tmp2 <- filter(tab1, N==min(tab1[['N']]) & k==min(tab1[['k']]) & time=="mean_ms" & stat=="density") -sm <- with(tmp, c(dense_cov,sparse_cov)) +sm <- with(tmp2, c(dense_cov,sparse_cov)) ## ---Figure 1 ------------------------ theme_set(theme_bw()) diff --git a/nobuild/jss/jss_sparseMVN_repl.R b/nobuild/jss/jss_sparseMVN_repl.R index e575a7e..feb94f0 100644 --- a/nobuild/jss/jss_sparseMVN_repl.R +++ b/nobuild/jss/jss_sparseMVN_repl.R @@ -65,21 +65,11 @@ run_bench <- function(D, reps=10) { data.frame(s=s, N=N, k=k, vals) } - -get_batch <- function(i, cases, reps=10) { - plyr::ddply(cases, c("s","N","k"), run_bench, reps=reps) %>% - mutate(batch=i) -} - - -cores <- 10 reps <- 200 -times <- ceiling(reps/cores) - ## times in milliseconds -cases <- expand.grid(s = 100,#0, - N = c(10, 20), # 50, 100, 200, 300, 400, 500), +cases <- expand.grid(s = 1000, + N = c(10, 20, 50, 100, 200, 300, 400, 500), k = c(2,4)) %>% mutate(nvars=(N+1)*k, nels = nvars^2, @@ -87,7 +77,8 @@ cases <- expand.grid(s = 100,#0, nnzLT = (N+1) * k*(k+1)/2 + N*k*k, pct.nnz = nnz/nels) -RT <- plyr::ldply(1:cores, get_batch, cases, reps=times) +RT <- plyr::ddply(cases, c("s","N","k"), run_bench, reps=reps) + labs <- str_split_fixed(RT[['expr']],"_",3) colnames(labs) <- c("stat","pattern","type") diff --git a/nobuild/jss3138_prescreened.zip b/nobuild/jss3138_prescreened.zip new file mode 100644 index 0000000..fe77f57 Binary files /dev/null and b/nobuild/jss3138_prescreened.zip differ diff --git a/nobuild/jss3138_prescreened/jss_sparseMVN_manuscript.R b/nobuild/jss3138_prescreened/jss_sparseMVN_manuscript.R new file mode 100644 index 0000000..b78ed46 --- /dev/null +++ b/nobuild/jss3138_prescreened/jss_sparseMVN_manuscript.R @@ -0,0 +1,166 @@ +## ----echo=FALSE, cache=FALSE--------------------------------------------- +suppressPackageStartupMessages(library("sparseMVN")) +suppressPackageStartupMessages(library("Matrix")) +suppressPackageStartupMessages(library("mvtnorm")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("xtable")) +suppressPackageStartupMessages(library("ggplot2")) + +sanitize <- function(x) x + +## -------------------------------------- +N <- 5 +k <- 2 +p <- k ## dimension of mu +Q <- 1000 +Mat <- as(kronecker(diag(N), matrix(1, k, k)),"sparseMatrix") +Mat <- rBind(Mat, Matrix(1, p, N*k)) +Mat <- cBind(Mat, Matrix(1, k*N+p, p)) +printSpMatrix(as(Mat,"nMatrix")) + +## -------------------------------------- +Mat <- kronecker(Matrix(1, k, k), diag(N)) +Mat <- rBind(Mat, Matrix(1, p, N * k)) +Mat <- cBind(Mat, Matrix(1, k*N+p, p)) +printSpMatrix(as(Mat,"nMatrix")) + +## ------------------------- +Mat2 <- as(kronecker(diag(Q),matrix(1,k,k)),"lMatrix") %>% + rBind(Matrix(TRUE,p,Q*k)) %>% + cBind(Matrix(TRUE, k*Q+p, p)) %>% + as("dgCMatrix") %>% + as("symmetricMatrix") +A2 <- as(Mat2,"matrix") +format(object.size(A2), units='Mb') +format(object.size(Mat2), units='Kb') + +## ----------------------- + +D <- sparseMVN::binary.sim(N=50, k=2, T=50) +priors <- list(inv.A=diag(2), inv.Omega=diag(2)) +start <- rep(c(-1,1),51) +opt <- trustOptim::trust.optim(start, + fn=sparseMVN::binary.f, + gr=sparseMVN::binary.grad, + hs=sparseMVN::binary.hess, + data=D, priors=priors, + method="Sparse", + control=list(function.scale.factor=-1)) + +## ------------------------ +R <- 100 +pm <- opt[["solution"]] +H <- -opt[["hessian"]] +CH <- Cholesky(H) + + +## ------------------------- +samples <- rmvn.sparse(R, pm, CH, prec=TRUE) +logf <- dmvn.sparse(samples, pm, CH, prec=TRUE) + +## ------------------------- +Matrix::nnzero(H) +Hinv <- drop0(solve(H)) +Matrix::nnzero(Hinv) + +## -------------------------- + +logf_dense <- dmvnorm(samples, pm, as.matrix(Hinv), log=TRUE) +all.equal(logf, logf_dense) + +## --------------- + +## The following code chunks use the cases and runtimes objects +## created in the accompanying replication file. + +## ----------------- + +load("runtimes.Rdata") ## copy file to working directory + +tab1 <- filter(runtimes, stat %in% c("density","rand")) %>% + group_by(N, k, stat, pattern, type) %>% + summarize(mean_ms=mean(time/1000000), + sd_ms=sd(time/1000000)) %>% + tidyr::gather(time, value, c(mean_ms, sd_ms)) %>% + reshape2::dcast(N+k+stat+time~pattern+type) + + +## ----- Table 1 ------ +tmp <- c("\\multirow{8}{*}{k=2}",rep(NA,7), + "\\multirow{8}{*}{k=4}",rep(NA,7)) +mutate(cases, tmp=tmp) %>% + select(tmp,N, nvars,nels, + nnz, nnzLT, pct.nnz) %>% + xtable::xtable(digits=c(rep(0,7),3)) %>% + print(include.rownames=FALSE,only.contents=TRUE, + include.colnames=FALSE, + floating=FALSE, sanitize.colnames.function=sanitize, + sanitize.text.function=sanitize, + hline.after=8, + format.args=list(big.mark=",")) + +## ------ median compute times ---------- +tmp2 <- filter(tab1, N==min(tab1[['N']]) & k==min(tab1[['k']]) + & time=="mean_ms" & stat=="density") +sm <- with(tmp2, c(dense_cov,sparse_cov)) + +## ---Figure 1 ------------------------ +theme_set(theme_bw()) +fig1 <- filter(tab1, time=="mean_ms") %>% + mutate(stat=plyr::revalue(stat, + c(density="density", + rand="random"))) %>% + rename(dense=dense_cov, sparse=sparse_cov) %>% + tidyr::gather(pattern, value, c(dense, sparse)) %>% + ggplot(aes(x=N, y=value, color=pattern, + shape=pattern, linetype=pattern)) %>% + + geom_line() %>% + + geom_point(size=2) %>% + + scale_x_continuous("Number of blocks (N)") %>% + + scale_y_continuous("Computation time (milliseconds)", + labels=scales::comma) %>% + + scale_color_manual("Pattern", + values=c(dense='red', sparse='blue')) %>% + + scale_shape("Pattern") %>% + + scale_linetype("Pattern") %>% + + facet_grid(stat~k, scales="free_y", + labeller=label_bquote(cols = k==.(k))) %>% + + theme(strip.background=element_rect(fill='white')) +print(fig1) + +## ---- Figure 2 ------------------ + +tab2 <- filter(runtimes, stat %in% c("chol","solve")) %>% + group_by(N, k, stat, pattern, type) %>% + summarize(mean_ms=mean(time/1000000), + sd_ms=sd(time/1000000)) %>% + tidyr::gather(time, value, c(mean_ms, sd_ms)) %>% + reshape2::dcast(N+k+time~stat+pattern) + + +fig2 <- filter(tab2, time=="mean_ms") %>% + rename(`dense inversion`=solve_dense, + `dense Cholesky`=chol_dense, + `sparse Cholesky`=chol_sparse) %>% + tidyr::gather(pattern, value, + c(`dense inversion`, + `sparse Cholesky`, + `dense Cholesky`)) %>% + ggplot(aes(x=N, y=value, color=pattern, + shape=pattern, linetype=pattern)) %>% + + geom_line() %>% + + geom_point(size=2) %>% + + scale_x_continuous("Number of blocks (N)") %>% + + scale_y_continuous("Computation time (milliseconds)", + labels=scales::comma) %>% + + scale_color_manual("Pattern/Operation", + values=c(`dense Cholesky`='red', + `sparse Cholesky`='blue', + `dense inversion`='black')) %>% + + scale_shape("Pattern/Operation") %>% + + scale_linetype("Pattern/Operation") %>% + + facet_grid(.~k, scales="free_y", + labeller=label_bquote(cols = k==.(k))) %>% + + theme(strip.background=element_rect(fill='white')) +print(fig2) + diff --git a/nobuild/jss3138_prescreened/jss_sparseMVN_repl.R b/nobuild/jss3138_prescreened/jss_sparseMVN_repl.R new file mode 100644 index 0000000..feb94f0 --- /dev/null +++ b/nobuild/jss3138_prescreened/jss_sparseMVN_repl.R @@ -0,0 +1,86 @@ +library("sparseMVN") +library("microbenchmark") +library("Matrix") +library("mvtnorm") +library("dplyr") +library("tidyr") +library("stringr") +library("reshape2") +set.seed(123) + + +build_mat <- function(N, k) { + t1 <- exp(rnorm(k*k)) + Q1 <- tril(kronecker(diag(N),Matrix(t1,k,k))) + Q2 <- cBind(Q1,Matrix(0, N*k, k)) + Q3 <- rBind(Q2,cBind(Matrix(rnorm(N*k*k), k, N*k), Diagonal(k))) + tcrossprod(Q3) +} + +check_density <- function(CV.sparse, prec) { + chol.CV <- Cholesky(CV.sparse) + if (prec) sigma <- solve(CV.dense) else sigma <- CV.dense + x.sp <- rmvn.sparse(s, mu, chol.CV, prec=prec) + d.sp <- dmvn.sparse(x.sp, mu, chol.CV, prec=prec) + d.dens <- dmvnorm(x.sp, mu, sigma, log=TRUE) + all.equal(d.sp,d.dens) +} + +run_bench <- function(D, reps=10) { + + s <- D$s ## number of random samples + k <- D$k ## heterogeneous variables + N <- D$N ## number of agents + + mu <- rep(0,k*N + k) ## assume mean at origin + + CV.sparse <- build_mat(N, k) + CV.dense <- as(CV.sparse, "matrix") ## dense covariance + chol.CV <- Cholesky(CV.sparse) + + ## check_cov <- check_density(CV.sparse, FALSE) + ## check_prec <- check_density(CV.sparse, TRUE) + ## stopifnot(check_cov & check_prec) + + x <- rmvn.sparse(s, mu, chol.CV, prec=FALSE) + + bench <- microbenchmark( + chol_sparse = Cholesky(CV.sparse), + chol_dense = chol(CV.dense), + solve_dense = solve(CV.dense), + rand_sparse_cov = rmvn.sparse(s, mu, chol.CV, prec=FALSE), + rand_sparse_prec = rmvn.sparse(s, mu, chol.CV, prec=TRUE), + density_sparse_cov = dmvn.sparse(x, mu, chol.CV, prec=FALSE), + density_sparse_prec = dmvn.sparse(x, mu, chol.CV, prec=TRUE), + rand_dense_cov = rmvnorm(s, mu, CV.dense, method="chol"), + density_dense_cov = dmvnorm(x, mu, CV.dense, log=TRUE), + times = reps + ) + + vals <- plyr::ddply(data.frame(bench), "expr", + function(x) return(data.frame(expr=x$expr, + time=x$time, + rep=1:length(x$expr)))) + + data.frame(s=s, N=N, k=k, vals) +} + +reps <- 200 + +## times in milliseconds +cases <- expand.grid(s = 1000, + N = c(10, 20, 50, 100, 200, 300, 400, 500), + k = c(2,4)) %>% + mutate(nvars=(N+1)*k, + nels = nvars^2, + nnz = N*k^2 + k^2 + 2*N*k*k, + nnzLT = (N+1) * k*(k+1)/2 + N*k*k, + pct.nnz = nnz/nels) + +RT <- plyr::ddply(cases, c("s","N","k"), run_bench, reps=reps) + + +labs <- str_split_fixed(RT[['expr']],"_",3) +colnames(labs) <- c("stat","pattern","type") +runtimes <- cbind(RT, labs) + diff --git a/nobuild/jss3138_prescreened/runtimes.Rdata b/nobuild/jss3138_prescreened/runtimes.Rdata new file mode 100644 index 0000000..f24f6c6 Binary files /dev/null and b/nobuild/jss3138_prescreened/runtimes.Rdata differ diff --git a/nobuild/jss3138_prescreened/sparseMVN.pdf b/nobuild/jss3138_prescreened/sparseMVN.pdf new file mode 100644 index 0000000..0b74653 Binary files /dev/null and b/nobuild/jss3138_prescreened/sparseMVN.pdf differ diff --git a/nobuild/jss3138_prescreened/sparseMVN_0.2.1.tar.gz b/nobuild/jss3138_prescreened/sparseMVN_0.2.1.tar.gz new file mode 100644 index 0000000..8337fd9 Binary files /dev/null and b/nobuild/jss3138_prescreened/sparseMVN_0.2.1.tar.gz differ