benchmarks_column_sort.R

#  Stephan Frickenhaus, Alfred Wegener Institute (2022)
#
# # microbenchmark effects of matrix re-orderings on matrix vector products
#   approach: use approx. Nearest Neighbor to create a 
#   pseudo-FE-linear-problem-matrix with random non-zeros; 
#  a reordering is generated by a SFC on the mesh nodes,
#   and the matrix is reordered accordingly, to show identical numerics.
#  
#
#
# SFC-code is based on:
# Rakowsky N. & Fuchs A. Efficient local resorting techniques with space filling curves applied
# to the tsunami simulation model TsunAWI. In IMUM 2011 - The 10th International Workshop
# on Multiscale (Un-)structured Mesh Numerical Modelling for coastal, shelf and global ocean
# dynamics, August 2011. http://hdl.handle.net/10013/epic.39576.d001

#install.packages("SparseM")
#install.packages("RANN")
require(RANN)

require(SparseM)
require(Rcpp)
require(microbenchmark)

sourceCpp("resortgrid_SFC_Rcpp.cpp",cacheDir = ".cpp-cache")

# run this under WSL or Linux
#require(parallel)
#mcaffinity(3) # choose core to bind R-execution to

N=1500000 # 2d nodes
n=15 # entries per row, non-symmetric


#plot(x,y,pch=".",type="l")
# create a matrix from n nearest neighbors
#   typical for Differential equations linear problems
d=data.frame(x=rnorm(N),y=rnorm(N))
system.time(nn2(d,k=n)$nn.idx -> nn)

#for (i in sample(1:N,1000))  cat(i," ",length(intersect(order((x[i]-x)^2+(y[i]-y)^2)[1:n],nn[i,])),"\n")
#for (i in sample(1:N,1000))  cat(i," ",all(order((x[i]-x)^2+(y[i]-y)^2)[1:n]==nn[i,]),"\n")

# apply sorting for potential cache-reuse/ prefetch
nn[2,]
system.time({nn.s<-t(apply(nn,1,sort))})
system.time({for (i in 1:N) nn[i,]<-sort(nn[i,])})

nn[1,]
nn[2,]

dim(nn)
nnz=prod(dim(nn))
ja=t(nn) # row-wise assembly in a vector
ja[1:(2*n)]
length(ja)-nnz

ia=(1:(N+1))*n-(n-1)
#ia[1:10]
set.seed(10)
ra=rnorm(nnz,mean=1.0)

m=new("matrix.csr",ra,as.integer(ja),as.integer(ia),dimension=as.integer(c(N,N)))
v=rnorm(N)
#m%*%v
# resorting by p: p[i] is target index
mysfc(d$x,d$y) -> p
p[1:5]
# and the inverse
p.i<-rep(NA,N)
p.i[p]<-1:N 
# check for correctness of inverse 
all(p.i[p]==1:N)
p[1:3]
p.i[1:3]

# reordering by SFC:
# step 1 
nnp=nn[p,] # reorder neighbor blocks (rows)
dim(nnp)
# step 2: 
# reorder indices in each block by inverse p
nnp2<-t(apply(nnp,1,function(x) p.i[x]))
nnp2[1,]
nnp2.s.ind<-t(apply(nnp2,1,order)) # for sorting indices and later values
nnp2.s.ind[1,]
nnp3<-t(apply(nnp2,1,sort)) # now sort indices
nnp3[1,]
# check by stored order
nnp3[3,]
nnp2[3,nnp2.s.ind[3,]]

dim(nnp3)

jap=t(nnp3) # transpose to assemble in a vector by as.integer
jap[1:(2*n)]
# values: must be resorted by index order
rap=matrix(ra,ncol=n,byrow = TRUE) # important to assemble row-wise

rap1=rap[p,] # reorder rows of values first

# reorder in-place by sort order of col. indices
for (i in 1:N) rap1[i,]<-rap1[i,nnp2.s.ind[i,]]

rap2=t(rap1) # for correct assembly in a vector
dim(rap2)

m1<-new("matrix.csr",as.numeric(rap2),as.integer(jap),as.integer(ia),dimension=as.integer(c(N,N)))
v[]=1:n
v1<-v[p] # use same vector, permuted
(mb0=microbenchmark({v.0<-(m%*%v)[,1]},{v.1<-(m1%*%v1)[,1]},times=50))
# N=1.5e6,n=15
#Unit: milliseconds
#expr      min       lq     mean   median       uq      max neval
#{     v.0 <- (m %*% v)[, 1] }   230.4816 257.3719 288.5977 275.8483 297.0142 499.4441    50
#{     v.1 <- (m1 %*% v1)[, 1] } 129.0277 140.7763 151.1027 150.9353 158.1047 187.3399    50

mb<-microbenchmark({v.0<-(m%*%v)[,1]},times=150)
mb1<-microbenchmark({v.1<-(m1%*%v1)[,1]},times=150)
sum(abs(v.0[p]-v.1))

save(m,file="Pseudo-FE-Matrix_m_rnd_1.5mio_n15_colsort.rdat")
save(m1,file="Pseudo-FE-Matrix_m1_sfc_1.5mio_n15_colsort.rdat")

plot(mb0)
rbind(mb,mb1)

# locality in ja index (standard dev of column indices)
summary(apply(ja,2,sd))
summary(apply(jap,2,sd))


#> summary(apply(ja,2,sd))
#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#126100  393481  431423  429500  467697  645868 
#> summary(apply(jap,2,sd))
#Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
#4.5     11.4     28.2   2911.7    126.2 774387.3 

plot((apply(ja,2,sd)),
     (apply(jap,2,sd)),pch=".")