sgibb · piplus2 · Oct 16, 2021 · Oct 20, 2021 · Oct 20, 2021 · sgibb
diff --git a/.Rhistory b/.Rhistory
@@ -0,0 +1 @@
+library(MALDIquant)
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,7 +8,8 @@ Authors@R: c(person("Sebastian", "Gibb", role=c("aut", "cre"),
         "Strimmer", role="ths",
         comment=c(ORCID="0000-0001-7917-2056")))
 Depends: R (>= 4.0.0), methods
-Imports: parallel
+Imports: parallel, Matrix, Rcpp, RcppArmadillo
+LinkingTo: Rcpp, RcppArmadillo
 Suggests: knitr, testthat (>= 0.8)
 Description: A complete analysis pipeline for matrix-assisted laser
         desorption/ionization-time-of-flight (MALDI-TOF) and other

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,16 @@
 import("methods")
 
+import("Rcpp")
+
+importFrom("Matrix",
+              "t",
+              "Matrix",
+              "sparseMatrix",
+              "rowSums",
+              "rowMeans",
+              "colSums",
+              "colMeans")
+
 importFrom("parallel",
               "mclapply",
               "mcmapply")
@@ -9,7 +20,7 @@ importFrom("graphics",
               "arrows",
               "lines",
               "par",
-              "plot.default",           
+              "plot.default",
               "points",
               "rasterImage",
               "rect",
@@ -103,4 +114,6 @@ exportMethods("as.matrix",
               "transformIntensity",
               "trim")
 
-useDynLib("MALDIquant")
+useDynLib("MALDIquant", .registration=TRUE)
+importFrom(Rcpp, evalCpp)
+exportPattern("^[[:alpha:]]+")
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -0,0 +1,11 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+colMediansArma <- function(x, ignore_missing) {
+    .Call(`_MALDIquant_colMediansArma`, x, ignore_missing)
+}
+
+colMeansArma <- function(x, ignore_missing) {
+    .Call(`_MALDIquant_colMeansArma`, x, ignore_missing)
+}
+
diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R
@@ -17,10 +17,9 @@
   r <- rep.int(seq_along(l), n)
 
   i <- findInterval(mass, uniqueMass)
-
-  m <- matrix(NA_real_, nrow=length(l), ncol=length(uniqueMass),
-              dimnames=list(NULL, uniqueMass))
-  m[cbind(r, i)] <- intensity
+
+  m <- sparseMatrixNA(r, i, intensity, c(length(l), length(uniqueMass)),
+                      list(NULL, uniqueMass))
   attr(m, "mass") <- uniqueMass
   m
 }
@@ -34,10 +33,16 @@
 ## returns:
 ##  a binary matrix
 .as.binary.matrix <- function(m) {
-  stopifnot(is.matrix(m))
-  isNA <- which(is.na(m))
-  m[] <- 1L
-  m[isNA] <- 0L
-  mode(m) <- "integer"
+  stopifnot(is.matrix(m) | is(m, 'sparseMatrix'))
+  if (is(m, 'sparseMatrix')) {
+    mass <- attr(m, 'mass')
+    m[m != 0] <- 1
+    attr(m, 'mass') <- mass
+  } else {
+    stopifnot(is.matrix(m))
+    isNA <- which(is.na(m))
+    m[] <- 1L
+    m[isNA] <- 0L
+  }
   m
 }
diff --git a/R/colMedians-functions.R b/R/colMedians-functions.R
@@ -1,6 +1,22 @@
 .colMedians <- function(x, na.rm=FALSE) {
-  stopifnot(is.matrix(x), is.logical(na.rm))
-  .Call("C_colMedians", x, na.rm)
+  stopifnot(is.logical(na.rm))
+  # stopifnot(is.matrix(x), is.logical(na.rm))
+  if (is(x, 'sparseMatrix')) {
+    ret <- .Call("_MALDIquant_colMediansArma", x, na.rm) 
+  } else {
+    ret <- .Call("C_colMedians", x, na.rm)
+  }
+  return(as.numeric(ret))
+}
+
+.colMeans <- function(x, na.rm=FALSE) {
+  stopifnot(is.logical(na.rm))
+  if (is(x, 'sparseMatrix')) {
+    ret <- .Call("_MALDIquant_colMeansArma", x, na.rm)
+  } else {
+    ret <- colMeans(x, na.rm=na.rm)
+  }
+  return(as.numeric(ret))
 }
 
 #' .colMaxs
@@ -12,7 +28,11 @@
 #' @author Sebastian Gibb <mail@@sebastiangibb.de>
 #' @noRd
 .colMaxs <- function(x) {
-  x[max.col(t(x), ties.method="first") + 0L:(ncol(x) - 1L) * nrow(x)]
+  if (is(x, 'sparseMatrix')) {
+    apply(x, 2, max)
+  } else {
+    x[max.col(t(x), ties.method="first") + 0L:(ncol(x) - 1L) * nrow(x)]
+  }
 }
 
 #' .colCors
@@ -24,19 +44,33 @@
 #' @author Sebastian Gibb <mail@@sebastiangibb.de>
 #' @noRd
 .colCors <- function(x, y, na.rm=FALSE) {
-  stopifnot(is.matrix(x) && is.matrix(y))
+  # stopifnot(is.matrix(x) && is.matrix(y))
   stopifnot(all(dim(x) == dim(y)))
 
   if (na.rm) {
-    isNA <- is.na(x) | is.na(y)
-    x[isNA] <- NA_real_
-    y[isNA] <- NA_real_
+    if (is(x, "sparseMatrix") & is.matrix(y)) {
+      isMissing <- as.matrix((x == 0) | is.na(y))
+      x[isMissing] <- 0
+      y[isMissing] <- NA_real_
+    } else if (is.matrix(x) & is(y, "sparseMatrix")) {
+      isMissing <- as.matrix(is.na(x) | (y == 0))
+      x[isMissing] <- NA_real_
+      y[isMissing] <- 0
+    } else if (is(x, "sparseMatrix") & is(y, "sparseMatrix")) {
+      isMissing <- (x == 0) | (y == 0)
+      x[isMissing] <- 0
+      y[isMissing] <- 0
+    } else {
+      isMissing <- is.na(x) | is.na(y)
+      x[isMissing] <- NA_real_
+      y[isMissing] <- NA_real_
+    }
   }
 
-  cmX <- colMeans(x, na.rm=na.rm)
-  cmY <- colMeans(y, na.rm=na.rm)
+  cmX <- .colMeans(x, na.rm=na.rm)
+  cmY <- .colMeans(y, na.rm=na.rm)
 
-  (colMeans(x * y, na.rm=na.rm) - (cmX * cmY)) /
-    (sqrt(colMeans(x * x, na.rm=na.rm) - cmX * cmX) *
-     sqrt(colMeans(y * y, na.rm=na.rm) - cmY * cmY))
+  (.colMeans(x * y, na.rm=na.rm) - (cmX * cmY)) /
+    (sqrt(.colMeans(x * x, na.rm=na.rm) - cmX * cmX) *
+     sqrt(.colMeans(y * y, na.rm=na.rm) - cmY * cmY))
 }
diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R
@@ -55,7 +55,7 @@ filterPeaks <- function(l, minFrequency, minNumber, labels,
   m <- .as.binary.matrix(.as.matrix.MassObjectList(l))
 
   ## whitelist
-  w <- matrix(0L, nrow=nrow(m), ncol=ncol(m))
+  w <- Matrix(nrow = nrow(m), ncol = ncol(m), data = 0, sparse = TRUE)
 
   ## group indices by labels
   idx <- lapply(ll, function(x)which(labels == x))
@@ -131,5 +131,5 @@ filterPeaks <- function(l, minFrequency, minNumber, labels,
   ## calculate minimal number of peaks
   minPeakNumber <- max(minFrequency * length(rows), minNumber, na.rm=TRUE)
 
-  colSums(m[rows, , drop=FALSE]) >= minPeakNumber
+  colSums(m[rows, , drop = FALSE]) >= minPeakNumber
 }
diff --git a/R/intensityMatrix-functions.R b/R/intensityMatrix-functions.R
@@ -23,15 +23,21 @@ intensityMatrix <- function(peaks, spectra) {
       stop("Incompatible number of spectra!")
     }
 
-    isNa <- is.na(m)
+    if (is(m, "sparseMatrix")) {
+      isNa <- as.matrix(m == 0)
+    } else {
+      isNa <- is.na(m)
+    }
     uniqueMass <- as.double(colnames(m))
 
     approxSpectra <- lapply(spectra, approxfun, yleft=0L, yright=0L)
 
     for (i in seq_along(approxSpectra)) {
       m[i, isNa[i, ]] <- approxSpectra[[i]](uniqueMass[isNa[i, ]])
     }
+
+    attr(m, "mass") <- uniqueMass
   }
-
+  
   m
 }
diff --git a/R/merge-functions.R b/R/merge-functions.R
@@ -19,7 +19,7 @@ mergeMassPeaks <- function(l, labels, method=c("mean", "median", "sum"),
 
   fun <- switch(method,
               "mean" = {
-                colMeans
+                .colMeans
               },
               "median" = {
                 .colMedians
@@ -43,36 +43,46 @@ mergeMassPeaks <- function(l, labels, method=c("mean", "median", "sum"),
 ## returns:
 ##  a new MassPeaks object
 ##
-.mergeMassPeaks <- function(l, fun=colMeans, ignore.na=TRUE) {
+.mergeMassPeaks <- function(l, fun=.colMeans, ignore.na=TRUE) {
 
   fun <- match.fun(fun)
 
   ## create a matrix which could merged
   m <- .as.matrix.MassObjectList(l)
-
+  
   mass <- attr(m, "mass")
+
+  if (!ignore.na) {
+    m[m == 0] <- .Machine$double.xmin
+  }
 
   ## avoid named intensity/snr slot
   colnames(m) <- NULL
 
-  isNA <- is.na(m)
-  if (!ignore.na) {
-    m[isNA] <- 0L
-  }
-
   ## merge intensities
   intensity <- fun(m, na.rm=TRUE)
 
   ## merge snr
-  for (i in seq_along(l)) {
-    m[i, !isNA[i, ]] <- l[[i]]@snr
+  ij <- lapply(1:nrow(m), function(r) {
+    cbind(r, which(m[r, ] > .Machine$double.xmin))
+  })
+  ij <- Reduce(rbind, ij)
+
+  if (ignore.na) {
+    m <- sparseMatrixNA(i=ij[, 1], j=ij[, 2], unlist(lapply(l, function(z) z@snr)), 
+                        dims=dim(m), keep.zeros=TRUE)
+  } else {
+    m <- sparseMatrix(i=ij[, 1], j=ij[, 2], x=unlist(lapply(l, function(z) z@snr)),
+                      dims=dim(m))
+    m[m == 0] <- .Machine$double.xmin
   }
-  snr <- fun(m, na.rm=TRUE)
+
+  snr <- fun(m, na.rm = TRUE)
 
   ## merge metaData
   metaData <- .mergeMetaData(lapply(l, function(x)x@metaData))
 
-  createMassPeaks(mass=mass, intensity=intensity, snr=snr, metaData=metaData)
+  createMassPeaks(mass=mass, intensity=as.numeric(intensity), snr=as.numeric(snr), metaData=metaData)
 }
 
 ## merge different metaData by equal list names

diff --git a/R/sparseMatrix-functions.R b/R/sparseMatrix-functions.R
@@ -0,0 +1,20 @@
+sparseMatrixNA <- function(i, j, x, dims, dimnames,
+                           keep.zeros = TRUE) {
+  if (keep.zeros) {
+    x[x == 0] <- .Machine$double.xmin
+  }
+  M <- sparseMatrix(i=i, j=j, x=x, dims = dims, dimnames = dimnames)
+
+  return(M)
+}
+
+
+as.sparseMatrixNA <- function(x, keep.zeros = TRUE) {
+  if (keep.zeros) {
+    x[x == 0] <- .Machine$double.xmin
+  }
+  x[is.na(x)] <- 0
+  x <- as(x, 'sparseMatrix')
+
+  return(x)
+}
diff --git a/man/msiSlices-functions.Rd b/man/msiSlices-functions.Rd
@@ -70,7 +70,7 @@ data("fiedler2009subset", package="MALDIquant")
 coordinates(fiedler2009subset) <- cbind(x=rep(1:4, 2), y=rep(1:2, each=4))
 
 slices <- msiSlices(fiedler2009subset, center=c(5864.49, 8936.97),
-                    tolerance=0.25)
+                    tolerance=0.25, method="mean")
 
 slices
 }
diff --git a/src/Makevars b/src/Makevars
@@ -0,0 +1,14 @@
+
+## With R 3.1.0 or later, you can uncomment the following line to tell R to 
+## enable compilation with C++11 (where available)
+##
+## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider
+## availability of the package we do not yet enforce this here.  It is however
+## recommended for client packages to set it.
+##
+## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP
+## support within Armadillo prefers / requires it
+CXX_STD = CXX11
+
+PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 
+PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
diff --git a/src/Makevars.win b/src/Makevars.win
@@ -0,0 +1,14 @@
+
+## With R 3.1.0 or later, you can uncomment the following line to tell R to 
+## enable compilation with C++11 (where available)
+##
+## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider
+## availability of the package we do not yet enforce this here.  It is however
+## recommended for client packages to set it.
+##
+## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP
+## support within Armadillo prefers / requires it
+CXX_STD = CXX11
+
+PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 
+PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)