From d5864929f20eddfd1f06c0a8c9288f897e7df155 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 1 Aug 2024 21:00:11 +0200 Subject: [PATCH 1/6] added tests to reproduce bug #118 --- tests/testthat/test-constraints.R | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-constraints.R b/tests/testthat/test-constraints.R index 303fde1..4442385 100644 --- a/tests/testthat/test-constraints.R +++ b/tests/testthat/test-constraints.R @@ -112,8 +112,10 @@ x <- 1:500 y1 <- dnorm(x, m = 100, s = 20) * 0.8 + dnorm(x, m = 200, s = 10) * 0.2 y2 <- dnorm(x, m = 100, s = 10) * 0.2 + dnorm(x, m = 200, s = 20) * 0.8 y3 <- dnorm(x, m = 250, s = 20) -y <- cbind(y1, y2, y3) -y <- y + matrix(rnorm(length(y), 0, max(y) * 0.05), nrow(y), ncol(y)) +y4 <- dnorm(x, m = 1, s = 20) * 0.7 + dnorm(x, m = 500, s = 20) * 0.3 ## main peak at first column +y5 <- dnorm(x, m = 1, s = 20) * 0.3 + dnorm(x, m = 500, s = 20) * 0.7 ## main peak at last column +y <- cbind(y1, y2, y3, y4, y5) +yn <- y + matrix(rnorm(length(y), 0, max(y) * 0.01), nrow(y), ncol(y)) check_unimodality <- function(y, tol = 0) { n <- length(y) @@ -123,10 +125,10 @@ check_unimodality <- function(y, tol = 0) { return(sum(dl > tol) + sum(dr > tol)) } -test_that("Unimodality constraint works correctly", { +matplot(y, type = "l") +test_that("Unimodality constraint works correctly for data without noise", { cn1 <- constraint("unimod") y.new1 <- employ.constraint(cn1, y, NULL) - cn2 <- constraint("unimod", params = list(tol = 0.2)) y.new2 <- employ.constraint(cn2, y, NULL) @@ -135,6 +137,17 @@ test_that("Unimodality constraint works correctly", { expect_true(all(apply(y.new2, 2, check_unimodality, tol = 0.2) < 0.20)) }) +test_that("Unimodality constraint works correctly for data with noise", { + cn1 <- constraint("unimod") + y.new1 <- employ.constraint(cn1, yn, NULL) + cn2 <- constraint("unimod", params = list(tol = 0.2)) + y.new2 <- employ.constraint(cn2, yn, NULL) + + expect_true(all(apply(yn, 2, check_unimodality, tol = 0) > 0)) + expect_true(all(apply(y.new1, 2, check_unimodality, tol = 0) < 0.00000001)) + expect_true(all(apply(y.new2, 2, check_unimodality, tol = 0.2) < 0.20)) +}) + ################ # Closure # From 21c3ea8e67043a3d036b83240a1d1a32da28cf36 Mon Sep 17 00:00:00 2001 From: gongyh Date: Fri, 2 Aug 2024 08:46:17 +0800 Subject: [PATCH 2/6] fix bugs in constraintUnimod --- R/constraints.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/R/constraints.R b/R/constraints.R index c02846a..e9c2dbf 100644 --- a/R/constraints.R +++ b/R/constraints.R @@ -132,13 +132,17 @@ constraintUnimod <- function(x, d, tol = 0) { # process each component separately for (a in seq_len(ncol(x))) { - # flatten peaks to the left of maximum - left_part <- (peak.ind[a] - 1):1 - x[, a] <- f(x[, a], max = x[peak.ind[a], a], indseq = left_part, step = +1) + if (peak.ind[a] != 1) { + # flatten peaks to the left of maximum + left_part <- (peak.ind[a] - 1):1 + x[, a] <- f(x[, a], max = x[peak.ind[a], a], indseq = left_part, step = +1) + } - # flatten peaks to the right of maximum - right_part <- (peak.ind[a] + 1):nvar - x[, a] <- f(x[, a], max = x[peak.ind[a], a], indseq = right_part, step = -1) + if (peak.ind[a] != nvar) { + # flatten peaks to the right of maximum + right_part <- (peak.ind[a] + 1):nvar + x[, a] <- f(x[, a], max = x[peak.ind[a], a], indseq = right_part, step = -1) + } } return(x) From 8aaf6ef83f14358042da3ffd39b3757c106ada70 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Fri, 2 Aug 2024 06:51:33 +0200 Subject: [PATCH 3/6] closes #116 --- R/prep.R | 1 + tests/testthat/test-prep.R | 32 +++++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/R/prep.R b/R/prep.R index 33b8da5..8f7d8e9 100755 --- a/R/prep.R +++ b/R/prep.R @@ -527,6 +527,7 @@ pinv <- function(data) { #' #' prep.generic <- function(x, f, ...) { + stopifnot("First argument of preprocessing function should be a matrix." = is.matrix(x)) attrs <- mda.getattr(x) dimnames <- dimnames(x) x.p <- f(x, ...) diff --git a/tests/testthat/test-prep.R b/tests/testthat/test-prep.R index 50a86c0..b5cdb1d 100644 --- a/tests/testthat/test-prep.R +++ b/tests/testthat/test-prep.R @@ -1,9 +1,39 @@ # new tests on top -context("prep: autoscale") +context("prep: checks") data(simdata) +# spectra as data frame or vector — should show error +Xe <- as.data.frame(simdata$spectra.c) +xe <- 1:10 + +test_that("Preprocessing methods raise error if data is not a matrix", { + + expect_error(prep.snv((Xe))) + expect_error(prep.snv((xe))) + + expect_error(prep.msc((Xe))) + expect_error(prep.msc((xe))) + + expect_error(prep.norm((Xe))) + expect_error(prep.norm((xe))) + + expect_error(prep.km((Xe))) + expect_error(prep.km((xe))) + + expect_error(prep.savgol((Xe))) + expect_error(prep.savgol((xe))) + + expect_error(prep.alsbasecorr((Xe))) + expect_error(prep.alsbasecorr((xe))) + + expect_error(prep.varsel((Xe))) + expect_error(prep.varsel((xe))) +}) + +context("prep: autoscale") + # normal spectra X1 <- simdata$spectra.c p11 <- prep.autoscale(X1) From f0396d6055723d60e9af630dfdb01f65bb2752a7 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Fri, 2 Aug 2024 07:10:59 +0200 Subject: [PATCH 4/6] amended tests for unimodality --- tests/testthat/test-constraints.R | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-constraints.R b/tests/testthat/test-constraints.R index 4442385..ddf98b3 100644 --- a/tests/testthat/test-constraints.R +++ b/tests/testthat/test-constraints.R @@ -125,16 +125,21 @@ check_unimodality <- function(y, tol = 0) { return(sum(dl > tol) + sum(dr > tol)) } -matplot(y, type = "l") test_that("Unimodality constraint works correctly for data without noise", { cn1 <- constraint("unimod") y.new1 <- employ.constraint(cn1, y, NULL) cn2 <- constraint("unimod", params = list(tol = 0.2)) y.new2 <- employ.constraint(cn2, y, NULL) - - expect_true(all(apply(y, 2, check_unimodality, tol = 0) > 0)) + expect_true(sum(apply(y, 2, check_unimodality, tol = 0) > 0) == 4) expect_true(all(apply(y.new1, 2, check_unimodality, tol = 0) < 0.00000001)) expect_true(all(apply(y.new2, 2, check_unimodality, tol = 0.2) < 0.20)) + + # uncomment for visual inspection + # par(mfrow = c(3, 1)) + # matplot(y, type = "l") + # matplot(y.new1, type = "l") + # matplot(y.new2, type = "l") + }) test_that("Unimodality constraint works correctly for data with noise", { @@ -146,6 +151,13 @@ test_that("Unimodality constraint works correctly for data with noise", { expect_true(all(apply(yn, 2, check_unimodality, tol = 0) > 0)) expect_true(all(apply(y.new1, 2, check_unimodality, tol = 0) < 0.00000001)) expect_true(all(apply(y.new2, 2, check_unimodality, tol = 0.2) < 0.20)) + + # uncomment for visual inspection + # par(mfrow = c(3, 1)) + # matplot(yn, type = "l") + # matplot(y.new1, type = "l") + # matplot(y.new2, type = "l") + }) From 4555878ff9f0fb355f85cae29496deaa45cfb594 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Fri, 2 Aug 2024 09:32:50 +0200 Subject: [PATCH 5/6] added df to matrix conversion to prepCalData method --- R/misc.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/misc.R b/R/misc.R index d50ee40..705d85d 100755 --- a/R/misc.R +++ b/R/misc.R @@ -844,5 +844,11 @@ prepCalData <- function(x, exclrows = NULL, exclcols = NULL, min.nrows = 1, min. stop(sprintf("Dataset should contain at least %d variables (columns).", min.ncols)) } + if (is.data.frame((x))) { + nvar <- ncol(x) + x <- mda.df2mat(x) + stopifnot("The provided data frame has non-numeric columns, convert them to the numbers first." = ncol(x) == nvar) + } + return(x) } From 7fc0374e4a7e07837dcfeb9969ffe38c68250372 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Fri, 2 Aug 2024 14:01:32 +0200 Subject: [PATCH 6/6] preparation for new release --- DESCRIPTION | 6 +++--- NEWS.md | 6 ++++++ README.md | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 66cbad6..0d2ace0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,14 @@ Package: mdatools -Version: 0.14.1 +Version: 0.14.2 Title: Multivariate Data Analysis for Chemometrics -Date: 2023-08-12 +Date: 2024-08-02 Authors@R: c(person("Sergey", "Kucheryavskiy", role = c("aut", "cre"), email="svkucheryavski@gmail.com", comment = c(ORCID = "0000-0002-3145-7244"))) Maintainer: Sergey Kucheryavskiy Description: Projection based methods for preprocessing, exploring and analysis of multivariate data used in chemometrics. S. Kucheryavskiy (2020) . Encoding: UTF-8 License: MIT + file LICENSE Imports: methods, graphics, grDevices, stats, Matrix -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Suggests: testthat, pcv NeedsCompilation: no Packaged: 2019-05-24 11:03:33 UTC; svkucheryavski diff --git a/NEWS.md b/NEWS.md index 4ecb254..0fd9b2d 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +v. 0.14.2 +========= +* fixed bug [#118](https://github.com/svkucheryavski/mdatools/issues/118) (thanks to @gongyh). +* added additional sanity checks to preprocessing matrix (most of them work correctly only with matirces). +* added automatic data frame to matrix conversion to methods for model training. + v. 0.14.1 ========= diff --git a/README.md b/README.md index bc55b5c..04720dd 100755 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ If you want to cite the package, please use the following: Sergey Kucheryavskiy, What is new ----------- -Latest release (0.14.1, August 2023) is available both from GitHub and CRAN. You can see the full list of changes [here](NEWS.md). The Bookdown tutorial has been also updated and contains the description of new methods added in the last release. +Latest release (0.14.2, August 2024) is available both from GitHub and CRAN. You can see the full list of changes [here](NEWS.md). The Bookdown tutorial has been also updated and contains the description of new methods added in the last release. How to install