Merge branch 'develop'

svkucheryavski · Aug 3, 2024 · 909ef43 · 909ef43
2 parents 0e8d954 + 7fc0374
commit 909ef43
Show file tree

Hide file tree

Showing 8 changed files with 88 additions and 16 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,14 +1,14 @@
 Package: mdatools
-Version: 0.14.1
+Version: 0.14.2
 Title: Multivariate Data Analysis for Chemometrics
-Date: 2023-08-12
+Date: 2024-08-02
 Authors@R: c(person("Sergey", "Kucheryavskiy", role = c("aut", "cre"), email="[email protected]", comment = c(ORCID = "0000-0002-3145-7244")))
 Maintainer: Sergey Kucheryavskiy <[email protected]>
 Description: Projection based methods for preprocessing, exploring and analysis of multivariate data used in chemometrics. S. Kucheryavskiy (2020) <doi:10.1016/j.chemolab.2020.103937>.
 Encoding: UTF-8
 License: MIT + file LICENSE
 Imports: methods, graphics, grDevices, stats, Matrix
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
 Suggests: testthat, pcv
 NeedsCompilation: no
 Packaged: 2019-05-24 11:03:33 UTC; svkucheryavski

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+v. 0.14.2
+=========
+* fixed bug [#118](https://github.com/svkucheryavski/mdatools/issues/118) (thanks to @gongyh).
+* added additional sanity checks to preprocessing matrix (most of them work correctly only with matirces).
+* added automatic data frame to matrix conversion to methods for model training.
+
 v. 0.14.1
 =========
 

diff --git a/R/constraints.R b/R/constraints.R
@@ -132,13 +132,17 @@ constraintUnimod <- function(x, d, tol = 0) {
    # process each component separately
    for (a in seq_len(ncol(x))) {
 
-      # flatten peaks to the left of maximum
-      left_part <- (peak.ind[a] - 1):1
-      x[, a] <- f(x[, a], max = x[peak.ind[a], a], indseq = left_part, step = +1)
+      if (peak.ind[a] != 1) {
+          # flatten peaks to the left of maximum
+          left_part <- (peak.ind[a] - 1):1
+          x[, a] <- f(x[, a], max = x[peak.ind[a], a], indseq = left_part, step = +1)
+      }
 
-      # flatten peaks to the right of maximum
-      right_part <- (peak.ind[a] + 1):nvar
-      x[, a] <- f(x[, a], max = x[peak.ind[a], a], indseq = right_part, step = -1)
+      if (peak.ind[a] != nvar) {
+          # flatten peaks to the right of maximum
+          right_part <- (peak.ind[a] + 1):nvar
+          x[, a] <- f(x[, a], max = x[peak.ind[a], a], indseq = right_part, step = -1)
+      }
    }
 
    return(x)

diff --git a/R/misc.R b/R/misc.R
@@ -844,5 +844,11 @@ prepCalData <- function(x, exclrows = NULL, exclcols = NULL, min.nrows = 1, min.
       stop(sprintf("Dataset should contain at least %d variables (columns).", min.ncols))
    }
 
+   if (is.data.frame((x))) {
+      nvar <- ncol(x)
+      x <- mda.df2mat(x)
+      stopifnot("The provided data frame has non-numeric columns, convert them to the numbers first." = ncol(x) == nvar)
+   }
+
    return(x)
 }
diff --git a/R/prep.R b/R/prep.R
@@ -527,6 +527,7 @@ pinv <- function(data) {
 #'
 #'
 prep.generic <- function(x, f, ...) {
+   stopifnot("First argument of preprocessing function should be a matrix." = is.matrix(x))
    attrs <- mda.getattr(x)
    dimnames <- dimnames(x)
    x.p <- f(x, ...)

diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ If you want to cite the package, please use the following: Sergey Kucheryavskiy,
 What is new
 -----------
 
-Latest release (0.14.1, August 2023) is available both from GitHub and CRAN. You can see the full list of changes [here](NEWS.md). The Bookdown tutorial has been also updated and contains the description of new methods added in the last release.
+Latest release (0.14.2, August 2024) is available both from GitHub and CRAN. You can see the full list of changes [here](NEWS.md). The Bookdown tutorial has been also updated and contains the description of new methods added in the last release.
 
 
 How to install

diff --git a/tests/testthat/test-constraints.R b/tests/testthat/test-constraints.R
@@ -112,8 +112,10 @@ x  <- 1:500
 y1 <- dnorm(x, m = 100, s = 20) * 0.8  + dnorm(x, m = 200, s = 10) * 0.2
 y2 <- dnorm(x, m = 100, s = 10) * 0.2  + dnorm(x, m = 200, s = 20) * 0.8
 y3 <- dnorm(x, m = 250, s = 20)
-y <- cbind(y1, y2, y3)
-y <- y + matrix(rnorm(length(y), 0, max(y) * 0.05), nrow(y), ncol(y))
+y4 <- dnorm(x, m = 1, s = 20) * 0.7 + dnorm(x, m = 500, s = 20) * 0.3 ## main peak at first column
+y5 <- dnorm(x, m = 1, s = 20) * 0.3 + dnorm(x, m = 500, s = 20) * 0.7 ## main peak at last column
+y <- cbind(y1, y2, y3, y4, y5)
+yn <- y + matrix(rnorm(length(y), 0, max(y) * 0.01), nrow(y), ncol(y))
 
 check_unimodality <- function(y, tol = 0) {
    n <- length(y)
@@ -123,16 +125,39 @@ check_unimodality <- function(y, tol = 0) {
    return(sum(dl > tol) + sum(dr > tol))
 }
 
-test_that("Unimodality constraint works correctly", {
+test_that("Unimodality constraint works correctly for data without noise", {
    cn1 <- constraint("unimod")
    y.new1 <- employ.constraint(cn1, y, NULL)
-
    cn2 <- constraint("unimod", params = list(tol = 0.2))
    y.new2 <- employ.constraint(cn2, y, NULL)
+   expect_true(sum(apply(y, 2, check_unimodality, tol = 0) > 0) == 4)
+   expect_true(all(apply(y.new1, 2, check_unimodality, tol = 0) < 0.00000001))
+   expect_true(all(apply(y.new2, 2, check_unimodality, tol = 0.2) < 0.20))
+
+   # uncomment for visual inspection
+   # par(mfrow = c(3, 1))
+   # matplot(y, type = "l")
+   # matplot(y.new1, type = "l")
+   # matplot(y.new2, type = "l")
+
+})
+
+test_that("Unimodality constraint works correctly for data with noise", {
+   cn1 <- constraint("unimod")
+   y.new1 <- employ.constraint(cn1, yn, NULL)
+   cn2 <- constraint("unimod", params = list(tol = 0.2))
+   y.new2 <- employ.constraint(cn2, yn, NULL)
 
-   expect_true(all(apply(y, 2, check_unimodality, tol = 0) > 0))
+   expect_true(all(apply(yn, 2, check_unimodality, tol = 0) > 0))
    expect_true(all(apply(y.new1, 2, check_unimodality, tol = 0) < 0.00000001))
    expect_true(all(apply(y.new2, 2, check_unimodality, tol = 0.2) < 0.20))
+
+   # uncomment for visual inspection
+   # par(mfrow = c(3, 1))
+   # matplot(yn, type = "l")
+   # matplot(y.new1, type = "l")
+   # matplot(y.new2, type = "l")
+
 })
 
 

diff --git a/tests/testthat/test-prep.R b/tests/testthat/test-prep.R
@@ -1,9 +1,39 @@
 # new tests on top
 
-context("prep: autoscale")
+context("prep: checks")
 
 data(simdata)
 
+# spectra as data frame or vector — should show error
+Xe <- as.data.frame(simdata$spectra.c)
+xe <- 1:10
+
+test_that("Preprocessing methods raise error if data is not a matrix", {
+
+   expect_error(prep.snv((Xe)))
+   expect_error(prep.snv((xe)))
+
+   expect_error(prep.msc((Xe)))
+   expect_error(prep.msc((xe)))
+
+   expect_error(prep.norm((Xe)))
+   expect_error(prep.norm((xe)))
+
+   expect_error(prep.km((Xe)))
+   expect_error(prep.km((xe)))
+
+   expect_error(prep.savgol((Xe)))
+   expect_error(prep.savgol((xe)))
+
+   expect_error(prep.alsbasecorr((Xe)))
+   expect_error(prep.alsbasecorr((xe)))
+
+   expect_error(prep.varsel((Xe)))
+   expect_error(prep.varsel((xe)))
+})
+
+context("prep: autoscale")
+
 # normal spectra
 X1 <- simdata$spectra.c
 p11 <- prep.autoscale(X1)