From 0b59f34bf66f823c640e7723b1b72172b3e196ec Mon Sep 17 00:00:00 2001
From: paoloinglese <p.inglese@outlook.com>
Date: Sat, 16 Oct 2021 16:09:07 +0100
Subject: [PATCH 1/3] using sparse matrix

---
 NAMESPACE                 |  9 ++++++++-
 R/as.matrix-functions.R   | 15 +++++++--------
 R/filterPeaks-functions.R |  4 ++--
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index de363e1..74ef1dd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,12 @@
 import("methods")
 
+importFrom("Matrix",
+              "t",
+              "Matrix",
+              "sparseMatrix",
+               "colSums",
+               "colMeans")
+
 importFrom("parallel",
               "mclapply",
               "mcmapply")
@@ -9,7 +16,7 @@ importFrom("graphics",
               "arrows",
               "lines",
               "par",
-              "plot.default",           
+              "plot.default",
               "points",
               "rasterImage",
               "rect",
diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R
index e9472de..abf7b31 100644
--- a/R/as.matrix-functions.R
+++ b/R/as.matrix-functions.R
@@ -18,9 +18,9 @@
 
   i <- findInterval(mass, uniqueMass)
 
-  m <- matrix(NA_real_, nrow=length(l), ncol=length(uniqueMass),
-              dimnames=list(NULL, uniqueMass))
-  m[cbind(r, i)] <- intensity
+  m <- sparseMatrix(i = r, j = i, x = intensity,
+                    dims = c(length(l), length(uniqueMass)),
+                    dimnames = list(NULL, uniqueMass))
   attr(m, "mass") <- uniqueMass
   m
 }
@@ -34,10 +34,9 @@
 ## returns:
 ##  a binary matrix
 .as.binary.matrix <- function(m) {
-  stopifnot(is.matrix(m))
-  isNA <- which(is.na(m))
-  m[] <- 1L
-  m[isNA] <- 0L
-  mode(m) <- "integer"
+  stopifnot(is.matrix(m) | is(m, 'sparseMatrix'))
+  mass <- attr(m, 'mass')
+  m[m != 0] <- 1
+  attr(m, 'mass') <- mass
   m
 }
diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R
index 9595939..9f65266 100644
--- a/R/filterPeaks-functions.R
+++ b/R/filterPeaks-functions.R
@@ -55,7 +55,7 @@ filterPeaks <- function(l, minFrequency, minNumber, labels,
   m <- .as.binary.matrix(.as.matrix.MassObjectList(l))
 
   ## whitelist
-  w <- matrix(0L, nrow=nrow(m), ncol=ncol(m))
+  w <- Matrix(nrow = nrow(m), ncol = ncol(m), data = 0, sparse = TRUE)
 
   ## group indices by labels
   idx <- lapply(ll, function(x)which(labels == x))
@@ -131,5 +131,5 @@ filterPeaks <- function(l, minFrequency, minNumber, labels,
   ## calculate minimal number of peaks
   minPeakNumber <- max(minFrequency * length(rows), minNumber, na.rm=TRUE)
 
-  colSums(m[rows, , drop=FALSE]) >= minPeakNumber
+  colSums(m[rows, , drop = FALSE]) >= minPeakNumber
 }

From d2aef0cb2316ebf00a3acbb7f2fbcff9bf6cd189 Mon Sep 17 00:00:00 2001
From: pi514 <p.inglese@outlook.com>
Date: Wed, 20 Oct 2021 14:07:37 +0100
Subject: [PATCH 2/3] Added Matrix dep

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f619a64..1eb5fc1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,7 +8,7 @@ Authors@R: c(person("Sebastian", "Gibb", role=c("aut", "cre"),
         "Strimmer", role="ths",
         comment=c(ORCID="0000-0001-7917-2056")))
 Depends: R (>= 4.0.0), methods
-Imports: parallel
+Imports: parallel, Matrix
 Suggests: knitr, testthat (>= 0.8)
 Description: A complete analysis pipeline for matrix-assisted laser
         desorption/ionization-time-of-flight (MALDI-TOF) and other

From aa59b28dbd554ff1552f76dd3eb7de29531dcc78 Mon Sep 17 00:00:00 2001
From: pi514 <p.inglese@outlook.com>
Date: Thu, 21 Oct 2021 00:59:23 +0100
Subject: [PATCH 3/3] support to sparse matrices

Added support to sparse matrices and tests.
---
 .Rhistory                                     |   1 +
 DESCRIPTION                                   |   3 +-
 NAMESPACE                                     |  12 +-
 R/RcppExports.R                               |  11 ++
 R/as.matrix-functions.R                       |  20 ++-
 R/colMedians-functions.R                      |  58 +++++--
 R/intensityMatrix-functions.R                 |  10 +-
 R/merge-functions.R                           |  34 ++--
 R/sparseMatrix-functions.R                    |  20 +++
 man/msiSlices-functions.Rd                    |   2 +-
 src/Makevars                                  |  14 ++
 src/Makevars.win                              |  14 ++
 src/RcppExports.cpp                           |  61 ++++++++
 src/colMediansArma.cpp                        | 147 ++++++++++++++++++
 src/init.c                                    |  40 ++---
 tests/testthat/test_as.matrix-functions.R     |   3 +
 tests/testthat/test_colMedians-functions.R    |  36 ++++-
 .../testthat/test_intensityMatrix-functions.R |   2 +
 18 files changed, 427 insertions(+), 61 deletions(-)
 create mode 100644 .Rhistory
 create mode 100644 R/RcppExports.R
 create mode 100644 R/sparseMatrix-functions.R
 create mode 100644 src/Makevars
 create mode 100644 src/Makevars.win
 create mode 100644 src/RcppExports.cpp
 create mode 100644 src/colMediansArma.cpp

diff --git a/.Rhistory b/.Rhistory
new file mode 100644
index 0000000..0dca60a
--- /dev/null
+++ b/.Rhistory
@@ -0,0 +1 @@
+library(MALDIquant)
diff --git a/DESCRIPTION b/DESCRIPTION
index 1eb5fc1..24f2816 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,7 +8,8 @@ Authors@R: c(person("Sebastian", "Gibb", role=c("aut", "cre"),
         "Strimmer", role="ths",
         comment=c(ORCID="0000-0001-7917-2056")))
 Depends: R (>= 4.0.0), methods
-Imports: parallel, Matrix
+Imports: parallel, Matrix, Rcpp, RcppArmadillo
+LinkingTo: Rcpp, RcppArmadillo
 Suggests: knitr, testthat (>= 0.8)
 Description: A complete analysis pipeline for matrix-assisted laser
         desorption/ionization-time-of-flight (MALDI-TOF) and other
diff --git a/NAMESPACE b/NAMESPACE
index 74ef1dd..6f23d0d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,11 +1,15 @@
 import("methods")
 
+import("Rcpp")
+
 importFrom("Matrix",
               "t",
               "Matrix",
               "sparseMatrix",
-               "colSums",
-               "colMeans")
+              "rowSums",
+              "rowMeans",
+              "colSums",
+              "colMeans")
 
 importFrom("parallel",
               "mclapply",
@@ -110,4 +114,6 @@ exportMethods("as.matrix",
               "transformIntensity",
               "trim")
 
-useDynLib("MALDIquant")
+useDynLib("MALDIquant", .registration=TRUE)
+importFrom(Rcpp, evalCpp)
+exportPattern("^[[:alpha:]]+")
\ No newline at end of file
diff --git a/R/RcppExports.R b/R/RcppExports.R
new file mode 100644
index 0000000..6e30f57
--- /dev/null
+++ b/R/RcppExports.R
@@ -0,0 +1,11 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+colMediansArma <- function(x, ignore_missing) {
+    .Call(`_MALDIquant_colMediansArma`, x, ignore_missing)
+}
+
+colMeansArma <- function(x, ignore_missing) {
+    .Call(`_MALDIquant_colMeansArma`, x, ignore_missing)
+}
+
diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R
index abf7b31..107af04 100644
--- a/R/as.matrix-functions.R
+++ b/R/as.matrix-functions.R
@@ -17,10 +17,9 @@
   r <- rep.int(seq_along(l), n)
 
   i <- findInterval(mass, uniqueMass)
-
-  m <- sparseMatrix(i = r, j = i, x = intensity,
-                    dims = c(length(l), length(uniqueMass)),
-                    dimnames = list(NULL, uniqueMass))
+  
+  m <- sparseMatrixNA(r, i, intensity, c(length(l), length(uniqueMass)),
+                      list(NULL, uniqueMass))
   attr(m, "mass") <- uniqueMass
   m
 }
@@ -35,8 +34,15 @@
 ##  a binary matrix
 .as.binary.matrix <- function(m) {
   stopifnot(is.matrix(m) | is(m, 'sparseMatrix'))
-  mass <- attr(m, 'mass')
-  m[m != 0] <- 1
-  attr(m, 'mass') <- mass
+  if (is(m, 'sparseMatrix')) {
+    mass <- attr(m, 'mass')
+    m[m != 0] <- 1
+    attr(m, 'mass') <- mass
+  } else {
+    stopifnot(is.matrix(m))
+    isNA <- which(is.na(m))
+    m[] <- 1L
+    m[isNA] <- 0L
+  }
   m
 }
diff --git a/R/colMedians-functions.R b/R/colMedians-functions.R
index 065c128..48beba0 100644
--- a/R/colMedians-functions.R
+++ b/R/colMedians-functions.R
@@ -1,6 +1,22 @@
 .colMedians <- function(x, na.rm=FALSE) {
-  stopifnot(is.matrix(x), is.logical(na.rm))
-  .Call("C_colMedians", x, na.rm)
+  stopifnot(is.logical(na.rm))
+  # stopifnot(is.matrix(x), is.logical(na.rm))
+  if (is(x, 'sparseMatrix')) {
+    ret <- .Call("_MALDIquant_colMediansArma", x, na.rm) 
+  } else {
+    ret <- .Call("C_colMedians", x, na.rm)
+  }
+  return(as.numeric(ret))
+}
+
+.colMeans <- function(x, na.rm=FALSE) {
+  stopifnot(is.logical(na.rm))
+  if (is(x, 'sparseMatrix')) {
+    ret <- .Call("_MALDIquant_colMeansArma", x, na.rm)
+  } else {
+    ret <- colMeans(x, na.rm=na.rm)
+  }
+  return(as.numeric(ret))
 }
 
 #' .colMaxs
@@ -12,7 +28,11 @@
 #' @author Sebastian Gibb <mail@@sebastiangibb.de>
 #' @noRd
 .colMaxs <- function(x) {
-  x[max.col(t(x), ties.method="first") + 0L:(ncol(x) - 1L) * nrow(x)]
+  if (is(x, 'sparseMatrix')) {
+    apply(x, 2, max)
+  } else {
+    x[max.col(t(x), ties.method="first") + 0L:(ncol(x) - 1L) * nrow(x)]
+  }
 }
 
 #' .colCors
@@ -24,19 +44,33 @@
 #' @author Sebastian Gibb <mail@@sebastiangibb.de>
 #' @noRd
 .colCors <- function(x, y, na.rm=FALSE) {
-  stopifnot(is.matrix(x) && is.matrix(y))
+  # stopifnot(is.matrix(x) && is.matrix(y))
   stopifnot(all(dim(x) == dim(y)))
 
   if (na.rm) {
-    isNA <- is.na(x) | is.na(y)
-    x[isNA] <- NA_real_
-    y[isNA] <- NA_real_
+    if (is(x, "sparseMatrix") & is.matrix(y)) {
+      isMissing <- as.matrix((x == 0) | is.na(y))
+      x[isMissing] <- 0
+      y[isMissing] <- NA_real_
+    } else if (is.matrix(x) & is(y, "sparseMatrix")) {
+      isMissing <- as.matrix(is.na(x) | (y == 0))
+      x[isMissing] <- NA_real_
+      y[isMissing] <- 0
+    } else if (is(x, "sparseMatrix") & is(y, "sparseMatrix")) {
+      isMissing <- (x == 0) | (y == 0)
+      x[isMissing] <- 0
+      y[isMissing] <- 0
+    } else {
+      isMissing <- is.na(x) | is.na(y)
+      x[isMissing] <- NA_real_
+      y[isMissing] <- NA_real_
+    }
   }
 
-  cmX <- colMeans(x, na.rm=na.rm)
-  cmY <- colMeans(y, na.rm=na.rm)
+  cmX <- .colMeans(x, na.rm=na.rm)
+  cmY <- .colMeans(y, na.rm=na.rm)
 
-  (colMeans(x * y, na.rm=na.rm) - (cmX * cmY)) /
-    (sqrt(colMeans(x * x, na.rm=na.rm) - cmX * cmX) *
-     sqrt(colMeans(y * y, na.rm=na.rm) - cmY * cmY))
+  (.colMeans(x * y, na.rm=na.rm) - (cmX * cmY)) /
+    (sqrt(.colMeans(x * x, na.rm=na.rm) - cmX * cmX) *
+     sqrt(.colMeans(y * y, na.rm=na.rm) - cmY * cmY))
 }
diff --git a/R/intensityMatrix-functions.R b/R/intensityMatrix-functions.R
index dbd6473..f32fa52 100644
--- a/R/intensityMatrix-functions.R
+++ b/R/intensityMatrix-functions.R
@@ -23,7 +23,11 @@ intensityMatrix <- function(peaks, spectra) {
       stop("Incompatible number of spectra!")
     }
 
-    isNa <- is.na(m)
+    if (is(m, "sparseMatrix")) {
+      isNa <- as.matrix(m == 0)
+    } else {
+      isNa <- is.na(m)
+    }
     uniqueMass <- as.double(colnames(m))
 
     approxSpectra <- lapply(spectra, approxfun, yleft=0L, yright=0L)
@@ -31,7 +35,9 @@ intensityMatrix <- function(peaks, spectra) {
     for (i in seq_along(approxSpectra)) {
       m[i, isNa[i, ]] <- approxSpectra[[i]](uniqueMass[isNa[i, ]])
     }
+    
+    attr(m, "mass") <- uniqueMass
   }
-
+  
   m
 }
diff --git a/R/merge-functions.R b/R/merge-functions.R
index 6b83456..775ab91 100644
--- a/R/merge-functions.R
+++ b/R/merge-functions.R
@@ -19,7 +19,7 @@ mergeMassPeaks <- function(l, labels, method=c("mean", "median", "sum"),
 
   fun <- switch(method,
               "mean" = {
-                colMeans
+                .colMeans
               },
               "median" = {
                 .colMedians
@@ -43,36 +43,46 @@ mergeMassPeaks <- function(l, labels, method=c("mean", "median", "sum"),
 ## returns:
 ##  a new MassPeaks object
 ##
-.mergeMassPeaks <- function(l, fun=colMeans, ignore.na=TRUE) {
+.mergeMassPeaks <- function(l, fun=.colMeans, ignore.na=TRUE) {
 
   fun <- match.fun(fun)
 
   ## create a matrix which could merged
   m <- .as.matrix.MassObjectList(l)
-
+  
   mass <- attr(m, "mass")
+  
+  if (!ignore.na) {
+    m[m == 0] <- .Machine$double.xmin
+  }
 
   ## avoid named intensity/snr slot
   colnames(m) <- NULL
 
-  isNA <- is.na(m)
-  if (!ignore.na) {
-    m[isNA] <- 0L
-  }
-
   ## merge intensities
   intensity <- fun(m, na.rm=TRUE)
 
   ## merge snr
-  for (i in seq_along(l)) {
-    m[i, !isNA[i, ]] <- l[[i]]@snr
+  ij <- lapply(1:nrow(m), function(r) {
+    cbind(r, which(m[r, ] > .Machine$double.xmin))
+  })
+  ij <- Reduce(rbind, ij)
+  
+  if (ignore.na) {
+    m <- sparseMatrixNA(i=ij[, 1], j=ij[, 2], unlist(lapply(l, function(z) z@snr)), 
+                        dims=dim(m), keep.zeros=TRUE)
+  } else {
+    m <- sparseMatrix(i=ij[, 1], j=ij[, 2], x=unlist(lapply(l, function(z) z@snr)),
+                      dims=dim(m))
+    m[m == 0] <- .Machine$double.xmin
   }
-  snr <- fun(m, na.rm=TRUE)
+
+  snr <- fun(m, na.rm = TRUE)
 
   ## merge metaData
   metaData <- .mergeMetaData(lapply(l, function(x)x@metaData))
 
-  createMassPeaks(mass=mass, intensity=intensity, snr=snr, metaData=metaData)
+  createMassPeaks(mass=mass, intensity=as.numeric(intensity), snr=as.numeric(snr), metaData=metaData)
 }
 
 ## merge different metaData by equal list names
diff --git a/R/sparseMatrix-functions.R b/R/sparseMatrix-functions.R
new file mode 100644
index 0000000..f825538
--- /dev/null
+++ b/R/sparseMatrix-functions.R
@@ -0,0 +1,20 @@
+sparseMatrixNA <- function(i, j, x, dims, dimnames,
+                           keep.zeros = TRUE) {
+  if (keep.zeros) {
+    x[x == 0] <- .Machine$double.xmin
+  }
+  M <- sparseMatrix(i=i, j=j, x=x, dims = dims, dimnames = dimnames)
+
+  return(M)
+}
+
+
+as.sparseMatrixNA <- function(x, keep.zeros = TRUE) {
+  if (keep.zeros) {
+    x[x == 0] <- .Machine$double.xmin
+  }
+  x[is.na(x)] <- 0
+  x <- as(x, 'sparseMatrix')
+  
+  return(x)
+}
\ No newline at end of file
diff --git a/man/msiSlices-functions.Rd b/man/msiSlices-functions.Rd
index ae9a024..0c61720 100644
--- a/man/msiSlices-functions.Rd
+++ b/man/msiSlices-functions.Rd
@@ -70,7 +70,7 @@ data("fiedler2009subset", package="MALDIquant")
 coordinates(fiedler2009subset) <- cbind(x=rep(1:4, 2), y=rep(1:2, each=4))
 
 slices <- msiSlices(fiedler2009subset, center=c(5864.49, 8936.97),
-                    tolerance=0.25)
+                    tolerance=0.25, method="mean")
 
 slices
 }
diff --git a/src/Makevars b/src/Makevars
new file mode 100644
index 0000000..d3e3f41
--- /dev/null
+++ b/src/Makevars
@@ -0,0 +1,14 @@
+
+## With R 3.1.0 or later, you can uncomment the following line to tell R to 
+## enable compilation with C++11 (where available)
+##
+## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider
+## availability of the package we do not yet enforce this here.  It is however
+## recommended for client packages to set it.
+##
+## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP
+## support within Armadillo prefers / requires it
+CXX_STD = CXX11
+
+PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 
+PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
diff --git a/src/Makevars.win b/src/Makevars.win
new file mode 100644
index 0000000..d3e3f41
--- /dev/null
+++ b/src/Makevars.win
@@ -0,0 +1,14 @@
+
+## With R 3.1.0 or later, you can uncomment the following line to tell R to 
+## enable compilation with C++11 (where available)
+##
+## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider
+## availability of the package we do not yet enforce this here.  It is however
+## recommended for client packages to set it.
+##
+## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP
+## support within Armadillo prefers / requires it
+CXX_STD = CXX11
+
+PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 
+PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
new file mode 100644
index 0000000..ecc1af0
--- /dev/null
+++ b/src/RcppExports.cpp
@@ -0,0 +1,61 @@
+// Generated by using Rcpp::compileAttributes() -> do not edit by hand
+// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+#include <RcppArmadillo.h>
+#include <Rcpp.h>
+
+using namespace Rcpp;
+
+#ifdef RCPP_USE_GLOBAL_ROSTREAM
+Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
+Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
+#endif
+
+// colMediansArma
+arma::vec colMediansArma(arma::sp_mat& x, bool ignore_missing);
+RcppExport SEXP _MALDIquant_colMediansArma(SEXP xSEXP, SEXP ignore_missingSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< arma::sp_mat& >::type x(xSEXP);
+    Rcpp::traits::input_parameter< bool >::type ignore_missing(ignore_missingSEXP);
+    rcpp_result_gen = Rcpp::wrap(colMediansArma(x, ignore_missing));
+    return rcpp_result_gen;
+END_RCPP
+}
+// colMeansArma
+arma::vec colMeansArma(arma::sp_mat& x, bool ignore_missing);
+RcppExport SEXP _MALDIquant_colMeansArma(SEXP xSEXP, SEXP ignore_missingSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< arma::sp_mat& >::type x(xSEXP);
+    Rcpp::traits::input_parameter< bool >::type ignore_missing(ignore_missingSEXP);
+    rcpp_result_gen = Rcpp::wrap(colMeansArma(x, ignore_missing));
+    return rcpp_result_gen;
+END_RCPP
+}
+
+RcppExport SEXP C_colMedians(SEXP, SEXP);
+RcppExport SEXP C_dilation(SEXP, SEXP);
+RcppExport SEXP C_erosion(SEXP, SEXP);
+RcppExport SEXP C_localMaxima(SEXP, SEXP);
+RcppExport SEXP C_lowerConvexHull(SEXP, SEXP);
+RcppExport SEXP C_snip(SEXP, SEXP, SEXP);
+
+static const R_CallMethodDef CallEntries[] = {
+    {"_MALDIquant_colMediansArma", (DL_FUNC) &_MALDIquant_colMediansArma, 2},
+    {"_MALDIquant_colMeansArma", (DL_FUNC) &_MALDIquant_colMeansArma, 2},
+    {"C_colMedians",               (DL_FUNC) &C_colMedians,               2},
+    {"C_dilation",                 (DL_FUNC) &C_dilation,                 2},
+    {"C_erosion",                  (DL_FUNC) &C_erosion,                  2},
+    {"C_localMaxima",              (DL_FUNC) &C_localMaxima,              2},
+    {"C_lowerConvexHull",          (DL_FUNC) &C_lowerConvexHull,          2},
+    {"C_snip",                     (DL_FUNC) &C_snip,                     3},
+    {NULL, NULL, 0}
+};
+
+RcppExport void R_init_MALDIquant(DllInfo *dll) {
+    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
+    R_useDynamicSymbols(dll, FALSE);
+}
diff --git a/src/colMediansArma.cpp b/src/colMediansArma.cpp
new file mode 100644
index 0000000..421df33
--- /dev/null
+++ b/src/colMediansArma.cpp
@@ -0,0 +1,147 @@
+// [[Rcpp::depends(RcppArmadillo)]]
+
+#include <RcppArmadillo.h>
+
+using namespace Rcpp;
+
+// [[Rcpp::export]]
+arma::vec colMediansArma(arma::sp_mat& x, bool ignore_missing) {
+
+  bool is_even;
+  bool is_na;
+  int half;
+  int numel;
+  arma::vec vals;
+  arma::sp_mat colvals;
+  
+  arma::vec med = arma::zeros<arma::vec>(x.n_cols);
+
+  if (!ignore_missing) {
+    is_even=((x.n_rows) % 2 == 0);
+    half=(int)((x.n_rows)/2);
+  }
+
+  for (size_t i = 0; i != x.n_cols; ++i) {
+
+    colvals = x.col(i);
+    is_na = FALSE;
+    
+    if (ignore_missing) {
+      
+      vals = arma::zeros<arma::vec>(x.n_rows);
+      
+      numel = 0;
+      for (int j = 0; j != x.n_rows; ++j) {
+        if (colvals[j] != 0) {
+          vals[numel] = colvals[j];
+          numel++;
+        }
+      }
+      
+      is_even = ((numel) % 2 == 0);
+      half = (int)((numel) / 2);
+      
+    } else {
+      
+      vals = arma::zeros<arma::vec>(x.n_rows);
+      for (int j = 0; j != x.n_rows; ++j) {
+        if (colvals[j] == 0) {
+          med[i] = NA_REAL;
+          is_na = TRUE;
+          break;
+        }
+        vals[j] = colvals[j];
+      }
+      
+      if (is_na) {
+        continue;
+      }
+      
+      numel = x.n_rows;
+    }
+    
+    if (numel == 0) {
+      med[i] = 0;
+    } else {
+      
+      if (numel < x.n_rows) {
+        vals = vals.head(numel);
+      }
+      vals = arma::sort(vals);
+      
+      if (is_even) {
+        med[i] = (vals[half] + vals[half - 1]) / 2;
+      } else {
+        med[i] = vals[half];
+      }
+      
+    }
+
+  }
+  
+  return(med);
+}
+
+
+// [[Rcpp::export]]
+arma::vec colMeansArma(arma::sp_mat& x, bool ignore_missing) {
+  int numel;
+  double sigma;
+  bool is_na;
+  arma::sp_mat colvals;
+  
+  arma::vec mu = arma::zeros<arma::vec>(x.n_cols);
+  
+  for (size_t i = 0; i != x.n_cols; ++i) {
+    
+    colvals = x.col(i);
+    is_na = FALSE;
+    
+    if (ignore_missing) {
+      numel = 0;
+      for (int j = 0; j != x.n_rows; ++j) {
+        if (colvals[j] != 0) {
+          sigma += colvals[j];
+          numel++;
+        }
+      }
+      
+    } else {
+      
+      // If any element is zero = missing, return NA
+      for (int j = 0; j != x.n_rows; ++j) {
+        if (colvals[j] == 0) {
+          mu[i] = NA_REAL;
+          is_na = TRUE;
+          break;
+        }
+      }
+      
+      if (is_na) {
+        continue;
+      }
+      
+      numel = x.n_rows;  // In this case use all elements
+      
+    }
+    
+    // If no element is different from zero, return NA
+    if (numel == 0) {
+      
+      mu[i] = NA_REAL;
+      
+    } else {
+      
+      sigma = 0;
+      for (int j = 0; j != x.n_rows; ++j) {
+        sigma += colvals[j];
+      }
+      
+      mu[i] = sigma / numel;
+      
+    }
+    
+  }
+  
+  return(mu);
+}
\ No newline at end of file
diff --git a/src/init.c b/src/init.c
index 5e24b91..29e046b 100644
--- a/src/init.c
+++ b/src/init.c
@@ -23,24 +23,24 @@
 #include <Rinternals.h>
 #include <R_ext/Rdynload.h>
 
-static const R_CallMethodDef callMethods[] = {
-    {"C_colMedians", (DL_FUNC) &C_colMedians, 2},
-    {"C_snip", (DL_FUNC) &C_snip, 3},
-    {"C_lowerConvexHull", (DL_FUNC) &C_lowerConvexHull, 2},
-    {"C_dilation", (DL_FUNC) &C_dilation, 2},
-    {"C_erosion", (DL_FUNC) &C_erosion, 2},
-    {"C_localMaxima", (DL_FUNC) &C_localMaxima, 2},
-    { NULL, NULL, 0 } /* mark last element */
-};
+// static const R_CallMethodDef callMethods[] = {
+//     {"C_colMedians", (DL_FUNC) &C_colMedians, 2},
+//     {"C_snip", (DL_FUNC) &C_snip, 3},
+//     {"C_lowerConvexHull", (DL_FUNC) &C_lowerConvexHull, 2},
+//     {"C_dilation", (DL_FUNC) &C_dilation, 2},
+//     {"C_erosion", (DL_FUNC) &C_erosion, 2},
+//     {"C_localMaxima", (DL_FUNC) &C_localMaxima, 2},
+//     { NULL, NULL, 0 } /* mark last element */
+// };
 
-void
-#ifdef HAVE_VISIBILITY_ATTRIBUTE
-__attribute__ ((visibility ("default")))
-#endif
-R_init_MALDIquant(DllInfo *dll)
-{
-    /* no .C, .Fortran, or .External routines => NULL */
-    R_registerRoutines(dll, NULL, callMethods, NULL, NULL);
-    R_useDynamicSymbols(dll, FALSE);
-    R_forceSymbols(dll, TRUE);
-}
+// void
+// #ifdef HAVE_VISIBILITY_ATTRIBUTE
+// __attribute__ ((visibility ("default")))
+// #endif
+// R_init_MALDIquant(DllInfo *dll)
+// {
+//     /* no .C, .Fortran, or .External routines => NULL */
+//     R_registerRoutines(dll, NULL, callMethods, NULL, NULL);
+//     R_useDynamicSymbols(dll, FALSE);
+//     R_forceSymbols(dll, TRUE);
+// }
diff --git a/tests/testthat/test_as.matrix-functions.R b/tests/testthat/test_as.matrix-functions.R
index cf3b6f4..f4e0fb4 100644
--- a/tests/testthat/test_as.matrix-functions.R
+++ b/tests/testthat/test_as.matrix-functions.R
@@ -7,11 +7,14 @@ s <- list(createMassSpectrum(mass=1:5, intensity=11:15),
 
 mp <- matrix(c(11:14, NA_real_, NA_real_, 22:25), byrow=TRUE, ncol=5, nrow=2,
              dimnames=list(NULL, 1:5))
+mp <- as.sparseMatrixNA(mp)
 ms <- matrix(as.double(c(11:15, 21:25)), byrow=TRUE, ncol=5, nrow=2,
              dimnames=list(NULL, 1:5))
+ms <- as.sparseMatrixNA(ms)
 
 mb <- matrix(c(rep(1L, 4), 0L, 0L, rep(1L, 4)), byrow=TRUE, ncol=5, nrow=2,
              dimnames=list(NULL, 1:5))
+mb <- as(mb, 'sparseMatrix')
 attr(mp, "mass") <- attr(ms, "mass") <- attr(mb, "mass") <- 1:5
 
 test_that(".as.matrix.MassObjectsList throws errors", {
diff --git a/tests/testthat/test_colMedians-functions.R b/tests/testthat/test_colMedians-functions.R
index 60c0d4b..771e1bc 100644
--- a/tests/testthat/test_colMedians-functions.R
+++ b/tests/testthat/test_colMedians-functions.R
@@ -28,12 +28,22 @@ test_that(".colMaxs", {
   ## even nrow
   m <- matrix(rnorm(1e5), ncol=1e2)
   expect_equal(MALDIquant:::.colMaxs(m), colMaxs(m))
+  ## sparse
+  m[sample(round(length(m) * 0.3))] <- 0
+  m <- as(m, "sparseMatrix")
+  expect_equal(MALDIquant:::.colMaxs(m), colMaxs(m))
 })
 
 test_that(".colCors", {
   colCors <- function(x, y, use="everything") {
     z <- double(ncol(x))
     for (i in 1:ncol(x)) {
+      if (is(x, 'sparseMatrix')) {
+        x[x[, i] == 0, i] <- NA
+      }
+      if (is(y, 'sparseMatrix')) {
+        y[y[, i] == 0, i] <- NA
+      }
       z[i] <- stats::cor(x[,i], y[,i], use=use)
     }
     z
@@ -45,9 +55,29 @@ test_that(".colCors", {
   nna <- n
   mna[sample(1e5, 1e3)] <- NA
   nna[sample(1e5, 1e3)] <- NA
-  expect_equal(MALDIquant:::.colCors(m, n), colCors(m, n))
-  expect_equal(MALDIquant:::.colCors(mna, nna), colCors(mna, nna))
+  expect_equal(MALDIquant:::.colCors(m, n), colCors(m, n), tolerance=testthat_tolerance())
+  expect_equal(MALDIquant:::.colCors(mna, nna), colCors(mna, nna), tolerance=testthat_tolerance())
+  expect_equal(MALDIquant:::.colCors(mna, nna, na.rm=TRUE),
+               colCors(mna, nna, use="na.or.complete"), tolerance=testthat_tolerance())
+  ## sparse
+  mna <- as.sparseMatrixNA(mna)
+  nna <- as.sparseMatrixNA(nna)
+  expect_equal(MALDIquant:::.colCors(mna, nna), colCors(mna, nna), tolerance=testthat_tolerance())
+  expect_equal(MALDIquant:::.colCors(mna, nna, na.rm=TRUE),
+               colCors(mna, nna, use="na.or.complete"), tolerance=testthat_tolerance())
+  
+  mna[mna == 0] <- NA
+  mna <- as.matrix(mna)
+  expect_equal(MALDIquant:::.colCors(mna, nna), colCors(mna, nna), tolerance=testthat_tolerance())
+  expect_equal(MALDIquant:::.colCors(mna, nna, na.rm=TRUE),
+               colCors(mna, nna, use="na.or.complete"), tolerance=testthat_tolerance())
+  
+  mna[is.na(mna)] <- 0
+  mna <- as(mna, 'sparseMatrix')
+  nna[nna == 0] <- NA
+  nna <- as.matrix(nna)
+  expect_equal(MALDIquant:::.colCors(mna, nna), colCors(mna, nna), tolerance=testthat_tolerance())
   expect_equal(MALDIquant:::.colCors(mna, nna, na.rm=TRUE),
-               colCors(mna, nna, use="na.or.complete"))
+               colCors(mna, nna, use="na.or.complete"), tolerance=testthat_tolerance())
 })
 
diff --git a/tests/testthat/test_intensityMatrix-functions.R b/tests/testthat/test_intensityMatrix-functions.R
index 08b8886..b76969f 100644
--- a/tests/testthat/test_intensityMatrix-functions.R
+++ b/tests/testthat/test_intensityMatrix-functions.R
@@ -7,8 +7,10 @@ s <- list(createMassSpectrum(mass=1:5, intensity=11:15),
 
 m <- matrix(c(11:14, NA_real_, NA_real_, 22:25), byrow=TRUE, ncol=5, nrow=2,
             dimnames=list(NULL, 1:5))
+m <- as.sparseMatrixNA(m, keep.zeros = TRUE)
 e <- matrix(c(11:15, 21:25), byrow=TRUE, ncol=5, nrow=2,
             dimnames=list(NULL, 1:5))
+e <- as.sparseMatrixNA(e, keep.zeros = TRUE)
 attr(m, "mass") <- attr(e, "mass") <- 1:5
 
 test_that("intensityMatrix throws errors", {