Skip to content

Commit

Permalink
prescreened version resubmitted
Browse files Browse the repository at this point in the history
  • Loading branch information
braunm committed Jun 21, 2017
1 parent 9927f2c commit 332f140
Show file tree
Hide file tree
Showing 8 changed files with 260 additions and 15 deletions.
6 changes: 4 additions & 2 deletions nobuild/jss/jss_sparseMVN_manuscript.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ all.equal(logf, logf_dense)

## -----------------

load("runtimes.Rdata") ## copy file to working directory

tab1 <- filter(runtimes, stat %in% c("density","rand")) %>%
group_by(N, k, stat, pattern, type) %>%
summarize(mean_ms=mean(time/1000000),
Expand All @@ -98,9 +100,9 @@ mutate(cases, tmp=tmp) %>%
format.args=list(big.mark=","))

## ------ median compute times ----------
tmp <- filter(tab1, N==min(tab1[['N']]) & k==min(tab1[['k']])
tmp2 <- filter(tab1, N==min(tab1[['N']]) & k==min(tab1[['k']])
& time=="mean_ms" & stat=="density")
sm <- with(tmp, c(dense_cov,sparse_cov))
sm <- with(tmp2, c(dense_cov,sparse_cov))

## ---Figure 1 ------------------------
theme_set(theme_bw())
Expand Down
17 changes: 4 additions & 13 deletions nobuild/jss/jss_sparseMVN_repl.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,29 +65,20 @@ run_bench <- function(D, reps=10) {
data.frame(s=s, N=N, k=k, vals)
}


get_batch <- function(i, cases, reps=10) {
plyr::ddply(cases, c("s","N","k"), run_bench, reps=reps) %>%
mutate(batch=i)
}


cores <- 10
reps <- 200
times <- ceiling(reps/cores)


## times in milliseconds
cases <- expand.grid(s = 100,#0,
N = c(10, 20), # 50, 100, 200, 300, 400, 500),
cases <- expand.grid(s = 1000,
N = c(10, 20, 50, 100, 200, 300, 400, 500),
k = c(2,4)) %>%
mutate(nvars=(N+1)*k,
nels = nvars^2,
nnz = N*k^2 + k^2 + 2*N*k*k,
nnzLT = (N+1) * k*(k+1)/2 + N*k*k,
pct.nnz = nnz/nels)

RT <- plyr::ldply(1:cores, get_batch, cases, reps=times)
RT <- plyr::ddply(cases, c("s","N","k"), run_bench, reps=reps)


labs <- str_split_fixed(RT[['expr']],"_",3)
colnames(labs) <- c("stat","pattern","type")
Expand Down
Binary file added nobuild/jss3138_prescreened.zip
Binary file not shown.
166 changes: 166 additions & 0 deletions nobuild/jss3138_prescreened/jss_sparseMVN_manuscript.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
## ----echo=FALSE, cache=FALSE---------------------------------------------
suppressPackageStartupMessages(library("sparseMVN"))
suppressPackageStartupMessages(library("Matrix"))
suppressPackageStartupMessages(library("mvtnorm"))
suppressPackageStartupMessages(library("dplyr"))
suppressPackageStartupMessages(library("xtable"))
suppressPackageStartupMessages(library("ggplot2"))

sanitize <- function(x) x

## --------------------------------------
N <- 5
k <- 2
p <- k ## dimension of mu
Q <- 1000
Mat <- as(kronecker(diag(N), matrix(1, k, k)),"sparseMatrix")
Mat <- rBind(Mat, Matrix(1, p, N*k))
Mat <- cBind(Mat, Matrix(1, k*N+p, p))
printSpMatrix(as(Mat,"nMatrix"))

## --------------------------------------
Mat <- kronecker(Matrix(1, k, k), diag(N))
Mat <- rBind(Mat, Matrix(1, p, N * k))
Mat <- cBind(Mat, Matrix(1, k*N+p, p))
printSpMatrix(as(Mat,"nMatrix"))

## -------------------------
Mat2 <- as(kronecker(diag(Q),matrix(1,k,k)),"lMatrix") %>%
rBind(Matrix(TRUE,p,Q*k)) %>%
cBind(Matrix(TRUE, k*Q+p, p)) %>%
as("dgCMatrix") %>%
as("symmetricMatrix")
A2 <- as(Mat2,"matrix")
format(object.size(A2), units='Mb')
format(object.size(Mat2), units='Kb')

## -----------------------

D <- sparseMVN::binary.sim(N=50, k=2, T=50)
priors <- list(inv.A=diag(2), inv.Omega=diag(2))
start <- rep(c(-1,1),51)
opt <- trustOptim::trust.optim(start,
fn=sparseMVN::binary.f,
gr=sparseMVN::binary.grad,
hs=sparseMVN::binary.hess,
data=D, priors=priors,
method="Sparse",
control=list(function.scale.factor=-1))

## ------------------------
R <- 100
pm <- opt[["solution"]]
H <- -opt[["hessian"]]
CH <- Cholesky(H)


## -------------------------
samples <- rmvn.sparse(R, pm, CH, prec=TRUE)
logf <- dmvn.sparse(samples, pm, CH, prec=TRUE)

## -------------------------
Matrix::nnzero(H)
Hinv <- drop0(solve(H))
Matrix::nnzero(Hinv)

## --------------------------

logf_dense <- dmvnorm(samples, pm, as.matrix(Hinv), log=TRUE)
all.equal(logf, logf_dense)

## ---------------

## The following code chunks use the cases and runtimes objects
## created in the accompanying replication file.

## -----------------

load("runtimes.Rdata") ## copy file to working directory

tab1 <- filter(runtimes, stat %in% c("density","rand")) %>%
group_by(N, k, stat, pattern, type) %>%
summarize(mean_ms=mean(time/1000000),
sd_ms=sd(time/1000000)) %>%
tidyr::gather(time, value, c(mean_ms, sd_ms)) %>%
reshape2::dcast(N+k+stat+time~pattern+type)


## ----- Table 1 ------
tmp <- c("\\multirow{8}{*}{k=2}",rep(NA,7),
"\\multirow{8}{*}{k=4}",rep(NA,7))
mutate(cases, tmp=tmp) %>%
select(tmp,N, nvars,nels,
nnz, nnzLT, pct.nnz) %>%
xtable::xtable(digits=c(rep(0,7),3)) %>%
print(include.rownames=FALSE,only.contents=TRUE,
include.colnames=FALSE,
floating=FALSE, sanitize.colnames.function=sanitize,
sanitize.text.function=sanitize,
hline.after=8,
format.args=list(big.mark=","))

## ------ median compute times ----------
tmp2 <- filter(tab1, N==min(tab1[['N']]) & k==min(tab1[['k']])
& time=="mean_ms" & stat=="density")
sm <- with(tmp2, c(dense_cov,sparse_cov))

## ---Figure 1 ------------------------
theme_set(theme_bw())
fig1 <- filter(tab1, time=="mean_ms") %>%
mutate(stat=plyr::revalue(stat,
c(density="density",
rand="random"))) %>%
rename(dense=dense_cov, sparse=sparse_cov) %>%
tidyr::gather(pattern, value, c(dense, sparse)) %>%
ggplot(aes(x=N, y=value, color=pattern,
shape=pattern, linetype=pattern)) %>%
+ geom_line() %>%
+ geom_point(size=2) %>%
+ scale_x_continuous("Number of blocks (N)") %>%
+ scale_y_continuous("Computation time (milliseconds)",
labels=scales::comma) %>%
+ scale_color_manual("Pattern",
values=c(dense='red', sparse='blue')) %>%
+ scale_shape("Pattern") %>%
+ scale_linetype("Pattern") %>%
+ facet_grid(stat~k, scales="free_y",
labeller=label_bquote(cols = k==.(k))) %>%
+ theme(strip.background=element_rect(fill='white'))
print(fig1)

## ---- Figure 2 ------------------

tab2 <- filter(runtimes, stat %in% c("chol","solve")) %>%
group_by(N, k, stat, pattern, type) %>%
summarize(mean_ms=mean(time/1000000),
sd_ms=sd(time/1000000)) %>%
tidyr::gather(time, value, c(mean_ms, sd_ms)) %>%
reshape2::dcast(N+k+time~stat+pattern)


fig2 <- filter(tab2, time=="mean_ms") %>%
rename(`dense inversion`=solve_dense,
`dense Cholesky`=chol_dense,
`sparse Cholesky`=chol_sparse) %>%
tidyr::gather(pattern, value,
c(`dense inversion`,
`sparse Cholesky`,
`dense Cholesky`)) %>%
ggplot(aes(x=N, y=value, color=pattern,
shape=pattern, linetype=pattern)) %>%
+ geom_line() %>%
+ geom_point(size=2) %>%
+ scale_x_continuous("Number of blocks (N)") %>%
+ scale_y_continuous("Computation time (milliseconds)",
labels=scales::comma) %>%
+ scale_color_manual("Pattern/Operation",
values=c(`dense Cholesky`='red',
`sparse Cholesky`='blue',
`dense inversion`='black')) %>%
+ scale_shape("Pattern/Operation") %>%
+ scale_linetype("Pattern/Operation") %>%
+ facet_grid(.~k, scales="free_y",
labeller=label_bquote(cols = k==.(k))) %>%
+ theme(strip.background=element_rect(fill='white'))
print(fig2)

86 changes: 86 additions & 0 deletions nobuild/jss3138_prescreened/jss_sparseMVN_repl.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
library("sparseMVN")
library("microbenchmark")
library("Matrix")
library("mvtnorm")
library("dplyr")
library("tidyr")
library("stringr")
library("reshape2")
set.seed(123)


build_mat <- function(N, k) {
t1 <- exp(rnorm(k*k))
Q1 <- tril(kronecker(diag(N),Matrix(t1,k,k)))
Q2 <- cBind(Q1,Matrix(0, N*k, k))
Q3 <- rBind(Q2,cBind(Matrix(rnorm(N*k*k), k, N*k), Diagonal(k)))
tcrossprod(Q3)
}

check_density <- function(CV.sparse, prec) {
chol.CV <- Cholesky(CV.sparse)
if (prec) sigma <- solve(CV.dense) else sigma <- CV.dense
x.sp <- rmvn.sparse(s, mu, chol.CV, prec=prec)
d.sp <- dmvn.sparse(x.sp, mu, chol.CV, prec=prec)
d.dens <- dmvnorm(x.sp, mu, sigma, log=TRUE)
all.equal(d.sp,d.dens)
}

run_bench <- function(D, reps=10) {

s <- D$s ## number of random samples
k <- D$k ## heterogeneous variables
N <- D$N ## number of agents

mu <- rep(0,k*N + k) ## assume mean at origin

CV.sparse <- build_mat(N, k)
CV.dense <- as(CV.sparse, "matrix") ## dense covariance
chol.CV <- Cholesky(CV.sparse)

## check_cov <- check_density(CV.sparse, FALSE)
## check_prec <- check_density(CV.sparse, TRUE)
## stopifnot(check_cov & check_prec)

x <- rmvn.sparse(s, mu, chol.CV, prec=FALSE)

bench <- microbenchmark(
chol_sparse = Cholesky(CV.sparse),
chol_dense = chol(CV.dense),
solve_dense = solve(CV.dense),
rand_sparse_cov = rmvn.sparse(s, mu, chol.CV, prec=FALSE),
rand_sparse_prec = rmvn.sparse(s, mu, chol.CV, prec=TRUE),
density_sparse_cov = dmvn.sparse(x, mu, chol.CV, prec=FALSE),
density_sparse_prec = dmvn.sparse(x, mu, chol.CV, prec=TRUE),
rand_dense_cov = rmvnorm(s, mu, CV.dense, method="chol"),
density_dense_cov = dmvnorm(x, mu, CV.dense, log=TRUE),
times = reps
)

vals <- plyr::ddply(data.frame(bench), "expr",
function(x) return(data.frame(expr=x$expr,
time=x$time,
rep=1:length(x$expr))))

data.frame(s=s, N=N, k=k, vals)
}

reps <- 200

## times in milliseconds
cases <- expand.grid(s = 1000,
N = c(10, 20, 50, 100, 200, 300, 400, 500),
k = c(2,4)) %>%
mutate(nvars=(N+1)*k,
nels = nvars^2,
nnz = N*k^2 + k^2 + 2*N*k*k,
nnzLT = (N+1) * k*(k+1)/2 + N*k*k,
pct.nnz = nnz/nels)

RT <- plyr::ddply(cases, c("s","N","k"), run_bench, reps=reps)


labs <- str_split_fixed(RT[['expr']],"_",3)
colnames(labs) <- c("stat","pattern","type")
runtimes <- cbind(RT, labs)

Binary file added nobuild/jss3138_prescreened/runtimes.Rdata
Binary file not shown.
Binary file added nobuild/jss3138_prescreened/sparseMVN.pdf
Binary file not shown.
Binary file not shown.

0 comments on commit 332f140

Please sign in to comment.