diff --git a/DESCRIPTION b/DESCRIPTION index 3bbf9c8..9013bcb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mdatools Title: Multivariate Data Analysis for Chemometrics -Version: 0.11.5 -Date: 2021-04-26 +Version: 0.12.0 +Date: 2021-09-12 Author: Sergey Kucheryavskiy () Maintainer: Sergey Kucheryavskiy Description: Projection based methods for preprocessing, diff --git a/NAMESPACE b/NAMESPACE index 808c243..0215c11 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,7 +11,6 @@ S3method(as.matrix,simcares) S3method(categorize,pca) S3method(categorize,pls) S3method(confint,regcoeffs) -S3method(employ,constraint) S3method(getCalibrationData,simcam) S3method(getConfusionMatrix,classres) S3method(getProbabilities,pca) @@ -171,12 +170,14 @@ export(dd.crit) export(ddmoments.param) export(ddrobust.param) export(ellipse) -export(employ) +export(employ.constraint) +export(employ.prep) export(eye) export(fprintf) export(getCalibrationData) export(getConfusionMatrix) export(getImplementedConstraints) +export(getImplementedPrepMethods) export(getProbabilities) export(getPureVariables) export(getRegcoeffs) @@ -295,13 +296,17 @@ export(pls.run) export(plsda) export(plsdares) export(plsres) +export(prep) export(prep.alsbasecorr) export(prep.autoscale) +export(prep.list) export(prep.msc) export(prep.norm) export(prep.ref2km) export(prep.savgol) export(prep.snv) +export(prep.transform) +export(prep.varsel) export(prepCalData) export(randtest) export(regcoeffs) diff --git a/NEWS.md b/NEWS.md index df6d10e..bc35708 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,35 @@ +v.0.12.0 +======== +This release is mostly about preprocessing - added some new methods, improved the existent once and implemented a possibility to combine preprocessing methods together (including parameter values) and apply them all together in a correct sequence. See [preprocessing section](https://mda.tools/docs/preprocessing.html) in the tutorials for details + +## New features and improvements + +* method `prep.norm()` for normalization of spectra (or any other signals) is more versatile now and supports normalization to unit sum, length, area, to height or area under internal standard peak, and SNV. SNV via `prop.snv()` is still supported for compatibility. + +* `prep.savgol()` has been rewritten to fix a minor bug when first derivative was inverted, but also to make the handling of the edge points better. See details in help text for the function and in the tutorial. + +* added a new method `prep.transform()` which can be used for transformation of values of e.g. response variable to handle non-linearity. + +* added a new method `prep.varsel()` which makes possible to select particular variables as a part of preprocessing framework. For example you can apply baseline correction, normalization and noise suppression to the whole spectra and after that select only a particular part for modelling. + +* added new method `prep()` which let you to combine several preprocessing methods and their parameters into a list and use e.g. it as a part of model. + + +## Bug fixes + +* fixed a bug in `mcrals()` which in rare occasions could lead to a wrong error message. + +* fixed a bug when attribute `yaxis.value` was used as `ylab` when creating line and bar plots. + +* fixed an earlier reported issue with plotXYResiduals ([#100](https://github.com/svkucheryavski/mdatools/issues/100)) + + +## Other changes + +* function `employ()` which was used to employ constraints in MCR-ALS has been renamed to `employ.constraint()`. The function is for internal use and this change should not give any issues in your code. + +* the user guides have been revised and improved. + v.0.11.5 ======== @@ -108,7 +140,7 @@ Finally, all model results (calibration, cross-validation and test set validatio into a single list, `model$res`. This makes a lot of things easier. However, the old way of accessing the result objects (e.g. `model$calres` or `model$cvres`) still works, you can access e.g. calibration results both using `model$res$cal` and `model$calres`, so this change will not break the compatibility. -Below is more detailed list of changes. The [tutorial](https://mdatools.com/docs/) has been updated accordingly. +Below is more detailed list of changes. The [tutorial](https://mda.tools/docs/) has been updated accordingly. ## Breaking changes diff --git a/R/constraints.R b/R/constraints.R index 5850ce0..c02846a 100644 --- a/R/constraints.R +++ b/R/constraints.R @@ -276,9 +276,11 @@ constraint <- function(name, params = NULL, method = NULL) { #' matrix with pure spectra or contributions #' @param d #' matrix with original spectral values +#' @param ... +#' other arguments #' #' @export -employ.constraint <- function(obj, x, d) { +employ.constraint <- function(obj, x, d, ...) { return(do.call(obj$method, c(list(x = x, d = d), obj$params))) } diff --git a/R/defaults.R b/R/defaults.R index d0275b5..594fae8 100755 --- a/R/defaults.R +++ b/R/defaults.R @@ -1,16 +1,3 @@ -#' Generic function to apply a method (e.g. constraint) -#' -#' @param obj -#' constraint object -#' @param x -#' data in question (e.g. resolved spectra) -#' @param d -#' matrix with original data -#' -#' @export -employ <- function(obj, x, d) { - UseMethod("employ") -} #' Plot purity spectra #' @param obj diff --git a/R/ldecomp.R b/R/ldecomp.R index d87c51b..3492ebe 100755 --- a/R/ldecomp.R +++ b/R/ldecomp.R @@ -107,17 +107,19 @@ plotCumVariance.ldecomp <- function(obj, type = "b", labels = "values", show.plo #' vector with ticks for x-axis #' @param show.plot #' logical, shall plot be created or just plot series object is needed +#' @param ylab +#' label for y-axis #' @param ... #' most of graphical parameters from \code{\link{mdaplot}} function can be used. #' #' @export plotVariance.ldecomp <- function(obj, type = "b", variance = "expvar", labels = "values", - xticks = seq_len(obj$ncomp), show.plot = TRUE, ...) { + xticks = seq_len(obj$ncomp), show.plot = TRUE, ylab = "Explained variance, %", ...) { if (!show.plot) return(obj[[variance]]) return( - mdaplot(obj[[variance]], xticks = xticks, labels = labels, type = type, ...) + mdaplot(obj[[variance]], xticks = xticks, labels = labels, type = type, ylab = ylab, ...) ) } @@ -387,7 +389,6 @@ ldecomp.getVariances <- function(scores, loadings, residuals, Q) { attr(expvar, "name") <- "Variance" attr(cumexpvar, "name") <- "Cumulative variance" attr(expvar, "xaxis.name") <- attr(cumexpvar, "xaxis.name") <- "Components" - attr(expvar, "yaxis.name") <- attr(cumexpvar, "yaxis.name") <- "Explained variance, %" return(list(expvar = expvar, cumexpvar = cumexpvar)) } @@ -826,7 +827,7 @@ ldecomp.getT2Limits <- function(lim.type, alpha, gamma, params) { if (lim.type %in% c("jm", "chisq")) DoF <- pT2$nobj lim <- switch(lim.type, - "jm" = , + "jm" = hotelling.crit(pT2$nobj, seq_len(ncomp), alpha, gamma), "chisq" = hotelling.crit(pT2$nobj, seq_len(ncomp), alpha, gamma), "ddmoments" = scale(dd.crit(pQ, pT2, alpha, gamma), center = FALSE, scale = DoF / pT2$u0), "ddrobust" = scale(dd.crit(pQ, pT2, alpha, gamma), center = FALSE, scale = DoF / pT2$u0), @@ -1001,7 +1002,7 @@ ldecomp.plotResiduals <- function(res, Qlim, T2lim, ncomp, log = FALSE, norm = F getPlotLim <- function(lim, pd, ld, dim) { if (!is.null(lim) || all(!show.limits)) return(lim) limits <- if (show.limits[[2]]) max(ld$outliers[, dim]) else max(ld$extremes[, dim]) - return( c(0, max(sapply(pd, function(x) { max(c(getValues(x, dim), limits)) * 1.05}))) ) + return(c(0, max(sapply(pd, function(x) max(c(getValues(x, dim), limits)) * 1.05)))) } # check that show.limits is logical diff --git a/R/mcrals.R b/R/mcrals.R index 0e54177..780ece5 100644 --- a/R/mcrals.R +++ b/R/mcrals.R @@ -462,7 +462,7 @@ mcrals.cal <- function(D, ncomp, cont.constraints, spec.constraints, spec.ini, ## apply constraints to the resolved contributions for (cc in cont.constraints) { - Ct <- employ(cc, x = Ct, d = D) + Ct <- employ.constraint(cc, x = Ct, d = D) } ## resolve spectra @@ -481,7 +481,7 @@ mcrals.cal <- function(D, ncomp, cont.constraints, spec.constraints, spec.ini, ## apply constraints to the resolved spectra for (sc in spec.constraints) { - St <- employ(sc, x = St, d = D) + St <- employ.constraint(sc, x = St, d = D) } if (verbose) { @@ -489,7 +489,15 @@ mcrals.cal <- function(D, ncomp, cont.constraints, spec.constraints, spec.ini, } var_old <- var - var <- 1 - sum((D - tcrossprod(cont.solver(D, St), St))^2) / totvar + var <- tryCatch( + 1 - sum((D - tcrossprod(cont.solver(D, St), St))^2) / totvar, + error = function(e) { + print(e) + stop("Unable to resolve the components, perhaps 'ncomp' is too large.\n + or initial estimates for spectra are not good enough.", call. = FALSE) + } + ) + if ( (var - var_old) < tol) { if (verbose) cat("No more improvements.\n") break diff --git a/R/pca.R b/R/pca.R index 7e2bfac..555335d 100755 --- a/R/pca.R +++ b/R/pca.R @@ -1023,6 +1023,8 @@ pca.cal <- function(x, ncomp, center, scale, method, rand = NULL) { #' vector with ticks for x-axis #' @param res #' list with result objects to show the variance for +#' @param ylab +#' label for y-axis #' @param ... #' other plot parameters (see \code{mdaplotg} for details) #' @@ -1031,11 +1033,11 @@ pca.cal <- function(x, ncomp, center, scale, method, rand = NULL) { #' #' @export plotVariance.pca <- function(obj, type = "b", labels = "values", variance = "expvar", - xticks = seq_len(obj$ncomp), res = obj$res, ...) { + xticks = seq_len(obj$ncomp), res = obj$res, ylab = "Explained variance, %", ...) { res <- getRes(res, "ldecomp") plot_data <- lapply(res, plotVariance, variance = variance, show.plot = FALSE) - mdaplotg(plot_data, xticks = xticks, labels = labels, type = type, ...) + mdaplotg(plot_data, xticks = xticks, labels = labels, type = type, ylab = ylab, ...) } #' Cumulative explained variance plot for PCA model @@ -1302,6 +1304,8 @@ plotBiplot.pca <- function(obj, comp = c(1, 2), pch = c(16, NA), col = mdaplot.g #' what to show as data points labels #' @param xticks #' vector with tick values for x-axis +#' @param ylab +#' label for y-axis #' @param ... #' other plot parameters (see \code{mdaplotg} for details) #' @@ -1309,7 +1313,7 @@ plotBiplot.pca <- function(obj, comp = c(1, 2), pch = c(16, NA), col = mdaplot.g #' Work only if parameter \code{lim.type} equal to "ddmoments" or "ddrobust". #' #' @export -plotT2DoF <- function(obj, type = "b", labels = "values", xticks = seq_len(obj$ncomp), ...) { +plotT2DoF <- function(obj, type = "b", labels = "values", xticks = seq_len(obj$ncomp), ylab = "Nh", ...) { if (!(obj$lim.type %in% c("ddrobust", "ddmoments", "chisq"))) { stop("This plot can not be made for selected 'lim.type' method.") @@ -1318,8 +1322,7 @@ plotT2DoF <- function(obj, type = "b", labels = "values", xticks = seq_len(obj$n plot_data <- mda.subset(obj$T2lim, subset = 4) attr(plot_data, "name") <- "Degrees of freedom" attr(plot_data, "xaxis.name") <- attr(obj$loadings, "xaxis.name") - attr(plot_data, "yaxis.name") <- "Nh" - mdaplot(plot_data, xticks = xticks, labels = labels, type = type, ...) + mdaplot(plot_data, xticks = xticks, labels = labels, type = type, ylab = ylab, ...) } #' Degrees of freedom plot for orthogonal distance (Nh) @@ -1336,6 +1339,8 @@ plotT2DoF <- function(obj, type = "b", labels = "values", xticks = seq_len(obj$n #' what to show as data points labels #' @param xticks #' vector with tick values for x-axis +#' @param ylab +#' label for y-axis #' @param ... #' other plot parameters (see \code{mdaplotg} for details) #' @@ -1343,7 +1348,7 @@ plotT2DoF <- function(obj, type = "b", labels = "values", xticks = seq_len(obj$n #' Work only if parameter \code{lim.type} equal to "ddmoments" or "ddrobust". #' #' @export -plotQDoF <- function(obj, type = "b", labels = "values", xticks = seq_len(obj$ncomp), ...) { +plotQDoF <- function(obj, type = "b", labels = "values", xticks = seq_len(obj$ncomp), ylab = "Nq", ...) { if (!(obj$lim.type %in% c("ddrobust", "ddmoments", "chisq"))) { stop("This plot can not be made for selected 'lim.type' method.") @@ -1352,8 +1357,7 @@ plotQDoF <- function(obj, type = "b", labels = "values", xticks = seq_len(obj$nc plot_data <- mda.subset(obj$Qlim, subset = 4) attr(plot_data, "name") <- "Degrees of freedom" attr(plot_data, "xaxis.name") <- attr(obj$loadings, "xaxis.name") - attr(plot_data, "yaxis.name") <- "Nq" - mdaplot(plot_data, type = type, labels = labels, xticks = xticks, ...) + mdaplot(plot_data, type = type, labels = labels, xticks = xticks, ylab = ylab, ...) } #' Degrees of freedom plot for both distances diff --git a/R/plotseries.R b/R/plotseries.R index 988361f..c77b112 100644 --- a/R/plotseries.R +++ b/R/plotseries.R @@ -71,6 +71,7 @@ plotseries <- function(data, type, cgroup = NULL, col = NULL, opacity = 1, ps$xlim <- range(ps$x_values) ps$ylim <- range(ps$y_values) class(ps) <- "plotseries" + return(ps) } @@ -126,7 +127,7 @@ preparePlotData <- function(data) { excluded_cols <- NULL if (length(attrs$exclcols) > 0) { excluded_cols <- mda.getexclind(attrs$exclcols, colnames(data), ncol(data)) - data <- data[, -excluded_cols, drop = F] + data <- data[, -excluded_cols, drop = FALSE] attrs$xaxis.values <- attrs$xaxis.values[-excluded_cols] } @@ -140,10 +141,10 @@ preparePlotData <- function(data) { excluded_rows <- NULL if (length(attrs$exclrows > 0)) { excluded_rows <- mda.getexclind(attrs$exclrows, rownames(data), nrow(data)) - excluded_data <- data[excluded_rows, , drop = F] + excluded_data <- data[excluded_rows, , drop = FALSE] excluded_yaxis_values <- attrs$yaxis.values[excluded_rows] - data <- data[-excluded_rows, , drop = F] + data <- data[-excluded_rows, , drop = FALSE] attrs$yaxis.values <- attrs$yaxis.values[-excluded_rows] } @@ -206,6 +207,12 @@ splitPlotData <- function(data, type) { ) } + + # 0.12.0: yaxis.name must not be used as axis label in line and bar plots + if (type %in% c("b", "l", "e", "h")) { + attrs$yaxis.name = NULL + } + # prepare x-axis values for other types of plots y_values <- data x_values <- attrs$xaxis.values diff --git a/R/pls.R b/R/pls.R index c185a84..e750caf 100755 --- a/R/pls.R +++ b/R/pls.R @@ -946,6 +946,8 @@ plotYCumVariance.pls <- function(obj, type = "b", main = "Cumulative variance (Y #' what to show as labels for plot objects. #' @param res #' list with result objects to show the plot for (by defaul, model results are used) +#' @param ylab +#' label for y-axis #' @param ... #' other plot parameters (see \code{mdaplotg} for details) #' @@ -954,10 +956,10 @@ plotYCumVariance.pls <- function(obj, type = "b", main = "Cumulative variance (Y #' #' @export plotVariance.pls <- function(obj, decomp = "xdecomp", variance = "expvar", type = "b", - labels = "values", res = obj$res, ...) { + labels = "values", res = obj$res, ylab = "Explained variance, %", ...) { plot_data <- lapply(res, plotVariance, decomp = decomp, variance = variance, show.plot = FALSE) - mdaplotg(plot_data, labels = labels, type = type, ...) + mdaplotg(plot_data, labels = labels, type = type, ylab = ylab, ...) } #' X scores plot for PLS @@ -1178,9 +1180,9 @@ plotXYResiduals.pls <- function(obj, ncomp = obj$ncomp.selected, norm = TRUE, lo # make plot if (length(plot_data) == 1) { - mdaplot(plot_data[[1]], type = "p", xlim = xlim, ylim = ylim, cgroup = cgroup, ...) + mdaplot(plot_data[[1]], type = "p", xlim = xlim, ylim = ylim, cgroup = cgroup, main = main, ...) } else { - mdaplotg(plot_data, type = "p", xlim = xlim, ylim = ylim, show.legend = show.legend, + mdaplotg(plot_data, type = "p", xlim = xlim, ylim = ylim, show.legend = show.legend, main = main, legend.position = legend.position, ...) } diff --git a/R/plsres.R b/R/plsres.R index 71749e0..12a4993 100755 --- a/R/plsres.R +++ b/R/plsres.R @@ -466,7 +466,7 @@ plotXYScores.plsres <- function(obj, ncomp = 1, show.plot = TRUE, ...) { #' #' @export plotXResiduals.plsres <- function(obj, ncomp = obj$ncomp.selected, norm = TRUE, log = FALSE, - main = sprintf("X-residuals (ncomp = %d)", ncomp), ...) { + main = sprintf("X-distances (ncomp = %d)", ncomp), ...) { if (is.null(obj$xdecomp)) return(invisible(NULL)) @@ -487,8 +487,6 @@ plotXResiduals.plsres <- function(obj, ncomp = obj$ncomp.selected, norm = TRUE, #' PLS results (object of class \code{plsres}) #' @param ncomp #' how many components to use (if NULL - user selected optimal value will be used) -#' @param main -#' main title for the plot #' @param ... #' other plot parameters (see \code{mdaplot} for details) #' @@ -496,11 +494,10 @@ plotXResiduals.plsres <- function(obj, ncomp = obj$ncomp.selected, norm = TRUE, #' Proxy for \code{\link{plotResiduals.regres}} function. #' #' @export -plotYResiduals.plsres <- function(obj, ncomp = obj$ncomp.selected, - main = sprintf("Y-residuals (ncomp = %d)", ncomp), ...) { +plotYResiduals.plsres <- function(obj, ncomp = obj$ncomp.selected, ...) { if (is.null(obj$y.ref)) return(invisible(NULL)) - return(plotResiduals.regres(obj, ncomp = ncomp, main = main, ...)) + return(plotResiduals.regres(obj, ncomp = ncomp, ...)) } diff --git a/R/prep.R b/R/prep.R index fa8f2be..ef43e85 100755 --- a/R/prep.R +++ b/R/prep.R @@ -103,33 +103,60 @@ prep.snv <- function(data) { #' Normalization #' #' @description -#' Normalizes signals (rows of data matrix) to unit area, unit length or unit sum +#' Normalizes signals (rows of data matrix). #' #' @param data #' a matrix with data values #' @param type -#' type of normalization \code{"area"}, \code{"length"} or \code{"sum"}. +#' type of normalization \code{"area"}, \code{"length"}, \code{"sum"}, \code{"snv"}, or \code{"is"}. +#' @param col.ind +#' indices of columns (can be either integer or logical valuws) for normalization to internal +#' standard peak. +#' +#' @details +#' The \code{"area"}, \code{"length"}, \code{"sum"} types do preprocessing to unit area (sum of +#' absolute values), length or sum of all values in every row of data matrix. Type \code{"snv"} +#' does the Standard Normal Variate normalization, similar to \code{\link{prep.snv}}. Type +#' \code{"is"} does the normalization to internal standard peak, whose position is defined by +#' parameter `col.ind`. If the position is a single value, the rows are normalized to the height +#' of this peak. If `col.ind` points on several adjucent vales, the rows are normalized to the area +#' under the peak - sum of the intensities. #' #' @return #' data matrix with normalized values #' #' @export -prep.norm <- function(data, type = "area") { +prep.norm <- function(data, type = "area", col.ind = NULL) { + + if (type == "snv") return(prep.snv(data)) + + if (type == "is" && is.null(col.ind) ) { + stop("For 'is' normalization you need to provide indices for IS peak.") + } + + if (is.logical(col.ind)) { + col.ind <- which(col.ind) + } - f <- function(data, type) { + if (!is.null(col.ind) && (min(col.ind) < 1 || max(col.ind) > ncol(data))) { + stop("Values for 'col.ind' seem to be wrong.") + } + + f <- function(data, type, col.ind) { w <- switch( type, "area" = apply(abs(data), 1, sum), "length" = sqrt(apply(data^2, 1, sum)), - "sum" = apply(data, 1, sum) + "sum" = apply(data, 1, sum), + "is" = apply(data[, col.ind, drop = FALSE], 1, sum) ) if (is.null(w)) stop("Wrong value for argument 'type'.") return(sweep(data, 1, w, "/")) } - return(prep.generic(data, f, type = type)) + return(prep.generic(data, f, type = type, col.ind = col.ind)) } #' Savytzky-Golay filter @@ -146,26 +173,73 @@ prep.norm <- function(data, type = "area") { #' @param dorder #' order of derivative to take (0 - no derivative) #' +#' @details +#' The function implements algorithm described in [1] which handles the edge points correctly and +#' does not require to cut the spectra. +#' +#' @references +#' 1. Peter A. Gorry. General least-squares smoothing and differentiation by the convolution +#' (Savitzky-Golay) method. Anal. Chem. 1990, 62, 6, 570–573, https://doi.org/10.1021/ac00205a007. +#' #' @export prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { + stopifnot("Filter width ('width') should be equal at least to 3." = width > 2) + stopifnot("Filter width ('width') should be an odd integer number." = width %% 2 == 1) + stopifnot("Wrong value for the derivative order (should be 0, 1, or 2)." = dorder %in% (0:2)) + stopifnot("Wrong value for the polynomial degree (should be integer number between 0 and 4)." = porder %in% (0:4)) + stopifnot("Polynomal degree ('porder') should not be smaller the derivative order ('dorder')." = porder >= dorder) + + # compute grams polynomials + gram <- function(i, m, k, s) { + if (k > 0) { + return( (4 * k - 2) / (k * (2 * m - k + 1)) * (i * gram(i, m, k - 1, s) + s * gram(i, m, k - 1, s - 1)) - + ((k - 1) * (2 * m + k)) / (k * (2 * m - k + 1)) * gram(i, m, k - 2, s) ) + } + if (k == 0 && s == 0) return(1) + return(0) + } + + # compute generalized factorial + genfact <- function(a, b) { + f <- 1; + if ((a - b + 1) > a) return(f) + + for (i in (a - b + 1):a) { + f <- f * i; + } + + return(f) + } + + # compute weights for convolution depending on position + weight <- function(i, t, m, n, s) { + sum <- 0 + for (k in 0:n) { + sum <- sum + (2 * k + 1) * (genfact(2 * m, k) / genfact(2 * m + k + 1, k + 1)) * gram(i, m, k, 0) * gram(t, m, k, s) + } + return(sum) + } + f <- function(data, width, porder, dorder) { - stopifnot("Filter width ('width') should be equal at least to 3." = width > 2) - stopifnot("Filter width ('width') should be an odd integer number." = width %% 2 == 1) - stopifnot("Wrong value for the derivative order (should be 0, 1, or 2)." = dorder %in% (0:2)) - stopifnot("Wrong value for the polynomial degree (should be integer number between 0 and 4)." = porder %in% (0:4)) - stopifnot("Polynomal degree ('porder') should not be smaller the derivative order ('dorder')." = porder >= dorder) nobj <- nrow(data) nvar <- ncol(data) pdata <- matrix(0, ncol = nvar, nrow = nobj) - for (i in seq_len(nobj)) { - d <- data[i, ] - w <- (width - 1) / 2 - f <- pinv(outer(-w:w, 0:porder, FUN = "^")) - d <- convolve(d, rev(f[dorder + 1, ]), type = "o") - pdata[i, ] <- d[(w + 1) : (length(d) - w)] + m <- (width - 1) / 2 + w <- outer(-m:m, -m:m, function(x, y) weight(x, y, m, porder, dorder)) + n <- ncol(data) + + for (j in seq_len(nobj)) { + for (i in 1:m) { + pdata[j, i] <- convolve(data[j, 1:(2 * m + 1)], w[, i], type = "filter") + pdata[j, n - i + 1] <- convolve(data[j, (n - 2 * m):n], w[, width - i + 1], type = "filter") + } + + for (i in (m+1):(n-m)) { + pdata[j, i] <- convolve(data[j, (i - m):(i + m)], w[, m + 1], type = "filter") + } } return(pdata) @@ -179,8 +253,8 @@ prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { #' @description #' Applies Multiplicative Scatter Correction (MSC) transformation to data matrix (spectra) #' -#' @param spectra -#' a matrix with spectra values +#' @param data +#' a matrix with data values (spectra) #' @param mspectrum #' mean spectrum (if NULL will be calculated from \code{spectra}) #' @@ -206,7 +280,7 @@ prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { #' mdaplot(cspectra, type = "l", main = "After MSC") #' #' @export -prep.msc <- function(spectra, mspectrum = NULL) { +prep.msc <- function(data, mspectrum = NULL) { f <- function(spectra, mspectrum) { if (is.null(mspectrum)) { @@ -231,7 +305,7 @@ prep.msc <- function(spectra, mspectrum = NULL) { return(pspectra) } - return(prep.generic(spectra, f, mspectrum = mspectrum)) + return(prep.generic(data, f, mspectrum = mspectrum)) } @@ -240,7 +314,7 @@ prep.msc <- function(spectra, mspectrum = NULL) { #' @description #' Applies Kubelka-Munk (km) transformation to data matrix (spectra) #' -#' @param spectra +#' @param data #' a matrix with spectra values (absolute reflectance values) #' #' @return @@ -252,16 +326,16 @@ prep.msc <- function(spectra, mspectrum = NULL) { #' (1 - R)^2 / 2R #' #' @export -prep.ref2km <- function(spectra) { - stopifnot("Can't use Kubelka-Munk transformation as some of the values are zeros or negative." = all(spectra > 0)) +prep.ref2km <- function(data) { + stopifnot("Can't use Kubelka-Munk transformation as some of the values are zeros or negative." = all(data > 0)) f <- function(x) (1 - x)^2 / (2 * x) - return(prep.generic(spectra, f)) + return(prep.generic(data, f)) } #' Baseline correction using assymetric least squares #' -#' @param spectra +#' @param data #' matrix with spectra (rows correspond to individual spectra) #' @param plambda #' power of the penalty parameter (e.g. if plambda = 5, lambda = 10^5) @@ -270,6 +344,9 @@ prep.ref2km <- function(spectra) { #' @param max.niter #' maximum number of iterations #' +#' @return +#' preprocessed spectra. +#' #' @details #' The function implements baseline correction algorithm based on Whittaker smoother. The method #' was first shown in [1]. The function has two main parameters - power of a penalty parameter @@ -297,9 +374,9 @@ prep.ref2km <- function(spectra) { #' @importFrom Matrix Matrix Diagonal #' #' @export -prep.alsbasecorr <- function(spectra, plambda = 5, p = 0.1, max.niter = 10) { - attrs <- mda.getattr(spectra) - dimnames <- dimnames(spectra) +prep.alsbasecorr <- function(data, plambda = 5, p = 0.1, max.niter = 10) { + attrs <- mda.getattr(data) + dimnames <- dimnames(data) baseline <- function(y) { @@ -322,7 +399,7 @@ prep.alsbasecorr <- function(spectra, plambda = 5, p = 0.1, max.niter = 10) { return(as.numeric(z)) } - pspectra <- t(apply(spectra, 1, function(x) x - baseline(x))) + pspectra <- t(apply(data, 1, function(x) x - baseline(x))) pspectra <- mda.setattr(pspectra, attrs) dimnames(pspectra) <- dimnames @@ -330,6 +407,66 @@ prep.alsbasecorr <- function(spectra, plambda = 5, p = 0.1, max.niter = 10) { } +#' Transformation +#' +#' @description +#' Transforms values from using any mathematical function (e.g. log). +#' +#' @param data +#' a matrix with data values +#' @param fun +#' reference to a transformation function, e.g. `log` or `function(x) x^2`. +#' @param ... +#' optional parameters for the transformation function +#' +#' @return +#' data matrix with transformed values +#' +#' @examples +#' # generate a matrix with two columns +#' y <- cbind(rnorm(100, 10, 1), rnorm(100, 20, 2)) +#' +#' # apply log transformation +#' py1 = prep.transform(y, log) +#' +#' # apply power transformation +#' py2 = prep.transform(y, function(x) x^-1.25) +#' +#' # show distributions +#' par(mfrow = c(2, 3)) +#' for (i in 1:2) { +#' hist(y[, i], main = paste0("Original values, column #", i)) +#' hist(py1[, i], main = paste0("Log-transformed, column #", i)) +#' hist(py2[, i], main = paste0("Power-transformed, column #", i)) +#' } +#' +#' @export +prep.transform <- function(data, fun, ...) { + return(prep.generic(data, fun, ...)) +} + + +#' Variable selection +#' +#' @description +#' Returns dataset with selected variables +#' +#' @param data +#' a matrix with data values +#' @param var.ind +#' indices of variables (columns) to select, can bet either numeric or logical +#' +#' @return +#' data matrix with the selected variables (columns) +#' +#' @export +prep.varsel <- function(data, var.ind) { + if (!is.null(attr(data, "exclcols"))) { + stop("prep.varsel() can not be used for dataset with excluded (hidden) columns.") + } + return(mda.subset(data, select = var.ind)) +} + #' Pseudo-inverse matrix #' #' @description @@ -363,3 +500,219 @@ prep.generic <- function(x, f, ...) { dimnames(x.p) <- dimnames return(x.p) } + + +#' Shows a list with implemented preprocessing methods +#' +#' @export +getImplementedPrepMethods <- function() { + list( + # prep.snv <- function(data) + "snv" = list( + name = "snv", + method = prep.snv, + params = list(), + params.info = list(), + info = "Standard normal variate normalization." + ), + + # prep.ref2km <- function(spectra) + "ref2km" = list( + name = "ref2km", + method = prep.ref2km, + params = list(), + params.info = list(), + info = "Transform reflectance spectra to Kubelka-Munk units." + ), + + # prep.msc <- function(spectra, mspectrum = NULL) + "msc" = list( + name = "msc", + method = prep.msc, + params = list(mspectrum = NULL), + params.info = list(mspectrum = "mean spectrum (if NULL will be computed based on data)."), + info = "Multiplicative scatter correction." + ), + + # prep.transform <- function(data, fun, ...) + "transform" = list( + name = "transform", + method = prep.transform, + params = list(fun = log), + params.info = list(fun = "function to transform the values (e.g. 'log')"), + info = "Transformation of data values using math functions (log, sqrt, etc.)." + ), + + # prep.varsel <- function(data, var.ind) + "varsel" = list( + name = "varsel", + method = prep.varsel, + params = list(var.ind = NULL), + params.info = list(var.ind = "indices of variables (columns) to select."), + info = "Select user-defined variables (columns of dataset)." + ), + + # prep.norm <- function(data, type = "area", col.ind = NULL) + "norm" = list( + name = "norm", + method = prep.norm, + params = list(type = "area", col.ind = NULL), + params.info = list( + type = "type of normalization ('area', 'sum', 'length', 'is', 'snv').", + col.ind = "indices of columns (variables) for normalization to internal standard peak." + ), + info = "Normalization." + ), + + # prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) + "savgol" = list( + name = "savgol", + method = prep.savgol, + params = list(width = 3, porder = 1, dorder = 0), + params.info = list( + width = "width of the filter.", + porder = "polynomial order.", + dorder = "derivative order." + ), + info = "Savitzky-Golay filter." + ), + + # prep.alsbasecorr <- function(spectra, plambda = 5, p = 0.1, max.niter = 10) + "alsbasecorr" = list( + name = "alsbasecorr", + method = prep.alsbasecorr, + params = list(plambda = 5, p = 0.1, max.niter = 10), + params.info = list( + plambda = "power of the penalty parameter (e.g. if plambda = 5, lambda = 10^5)", + p = "assymetry ratio (should be between 0 and 1)", + max.niter = "maximum number of iterations" + ), + info = "Asymmetric least squares baseline correction." + ), + + # prep.autoscale <- function(data, center = TRUE, scale = FALSE, max.cov = 0) + "autoscale" = list( + name = "autoscale", + method = prep.autoscale, + params = list(center = TRUE, scale = FALSE, max.cov = 0), + params.info = list( + center = "a logical value or vector with numbers for centering.", + scale = "a logical value or vector with numbers for weighting.", + max.cov = "columns with coefficient of variation (in percent) below `max.cov` will not be scaled" + ) + ) + ) +} + + +#' Shows information about all implemented preprocessing methods. +#' +#' @export +prep.list <- function() { + p <- getImplementedPrepMethods() + + cat("\n\nList of available preprocessing methods:\n") + lapply(p, function(c) { + cat("\n\n") + fprintf(" %s\n", c$info) + cat(" ---------------\n") + fprintf(" name: '%s'\n", c$name) + + if (length(c$params.info) == 0) { + cat(" no parameters required\n") + } else { + cat(" parameters:\n") + for (i in seq_along(c$params.info)) { + fprintf(" '%s': %s\n", names(c$params.info)[i], c$params.info[[i]]) + } + } + }) + + invisible() +} + + +#' Class for preprocessing object +#' +#' @param name +#' short text with name for the preprocessing method. +#' @param params +#' a list with parameters for the method (if NULL - default parameters will be used). +#' @param method +#' method to call when applying the preprocessing, provide it only for user defined methods. +#' +#' @details +#' Use this class to create a list with a sequence of preprocessing methods to keep them together +#' in right order and with defined parameters. The list/object can be provided as an extra argument +#' to any modelling function (e.g. \code{pca}, \code{pls}, etc), so the optimal model parameters and +#' the optimal preprocessing will be stored together and can be applied to a raw data by using +#' method \code{predict}. +#' +#' For your own preprocessing method you need to create a function, which takes matrix with values +#' (dataset) as the first argument, does something and then return a matrix with the same dimension +#' and same attributes as the result. The method can have any number of optional parameters. +#' +#' See Bookdown tutorial for details. +#' +#' @export +prep <- function(name, params = NULL, method = NULL) { + + if (!is.null(params) && !is.list(params)) { + stop("prep: argument 'params' must be a list with parameter names and values.") + } + + if (is.null(method)) { + # assuming it is one of the standard constraints + # 1. first check name + item <- getImplementedPrepMethods()[[name]] + stopifnot("prep: either name of preprocessing method is wrong or you need to provide a reference to + function implementing the method if it is user defined." = !is.null(item)) + + # 2. check the parameters + if (is.null(params)) params <- item$params + if (length(params) > 0 && !all(names(params) %in% names(item$params))) { + stop("prep: provided preprocessing parameters have wrong name.") + } + + method <- item$method + } else { + # user defined method, check that it works + res <- tryCatch( + do.call(method, c(list(data = matrix(runif(50, 1, 10), 5, 10)), params)), + error = function(m) stop("prep: the method you provided raises an error: \n", m), + warning = function(m) stop("prep: the method you provided raises a warning: \n", m) + ) + + stopifnot("prep: the method you provided does not return matrix with correct dimension." = + dim(res) == c(5, 10)) + } + + obj <- list( + name = name, + method = method, + params = params + ) + + class(obj) <- c("prep") + return(obj) +} + +#' Applies a list with preprocessing methods to a dataset +#' +#' @param obj +#' list with preprocssing methods (created using \code{prep} function). +#' @param x +#' matrix with dataset +#' @param ... +#' other arguments +#' +#' @export +employ.prep <- function(obj, x, ...) { + + stopifnot("employ.prep: the first argument must be a list with preprocessing methods" = is.list(obj) && class(obj[[1]]) == "prep") + for (p in obj) { + x <- do.call(p$method, c(list(data = x), p$params)) + } + + return(x) +} diff --git a/R/regcoeffs.R b/R/regcoeffs.R index 59cb7ec..2fbad97 100755 --- a/R/regcoeffs.R +++ b/R/regcoeffs.R @@ -334,13 +334,15 @@ regcoeffs.getStats <- function(coeffs, ci.coeffs = NULL, use.mean = TRUE) { #' @param alpha #' significance level for confidence intervals (a number between 0 and 1, e.g. #' for 95\% alpha = 0.05) +#' @param ylab +#' label for y-axis #' @param ... #' other arguments for plotting methods (e.g. main, xlab, etc) #' #' @export plot.regcoeffs <- function(x, ncomp = 1, ny = 1, type = (if (x$nvar > 30) "l" else "h"), col = c(mdaplot.getColors(1), "lightgray"), show.lines = c(NA, 0), show.ci = FALSE, - alpha = 0.05, ...) { + alpha = 0.05, ylab = paste0("Coefficients (", x$respnames[ny], ")"), ...) { if (length(ncomp) != 1) { stop("Parameter 'ncomp' should be just one value.") @@ -365,21 +367,20 @@ plot.regcoeffs <- function(x, ncomp = 1, ny = 1, type = (if (x$nvar > 30) "l" el colnames(plot_data) <- rownames(x$values) attr(plot_data, "name") <- paste0("Regression coefficients (ncomp = ", ncomp, ")") - attr(plot_data, "yaxis.name") <- paste0("Coefficients (", x$respnames[ny], ")") if (!(show.ci && !is.null(x$se))) { - return(mdaplot(plot_data, col = col[1], type = type, show.lines = show.lines, ...)) + return(mdaplot(plot_data, col = col[1], type = type, show.lines = show.lines, ylab = ylab, ...)) } ci <- confint(x, ncomp = ncomp, ny = ny, level = 1 - alpha) if (type == "l") { return( mdaplot(mda.rbind(plot_data, mda.t(ci)), type = "l", col = col[c(2, 1, 1)], - show.lines = show.lines, ...) + show.lines = show.lines, ylab = ylab, ...) ) } err <- (ci[, 2] - ci[, 1]) / 2 mdaplotg(list(plot_data, mda.rbind(plot_data, err)), type = c(type, "e"), show.legend = FALSE, - col = col[c(2, 1)], show.grid = T, show.lines = show.lines, ...) + col = col[c(2, 1)], show.grid = T, show.lines = show.lines, ylab = ylab, ...) } diff --git a/R/regmodel.R b/R/regmodel.R index 84eb0bc..cc33657 100755 --- a/R/regmodel.R +++ b/R/regmodel.R @@ -288,15 +288,17 @@ print.regmodel <- function(x, ...) { #' vector with ticks for x-axis values #' @param res #' list with result objects +#' @param ylab +#' label for y-axis #' @param ... #' other plot parameters (see \code{mdaplotg} for details) #' #' @export plotRMSE.regmodel <- function(obj, ny = 1, type = "b", labels = "values", - xticks = seq_len(obj$ncomp), res = obj$res, ...) { + xticks = seq_len(obj$ncomp), res = obj$res, ylab = paste0("RMSE (", obj$res$cal$respnames[ny], ")"), ...) { plot_data <- lapply(res, plotRMSE, ny = ny, show.plot = FALSE) - mdaplotg(plot_data, type = type, xticks = xticks, labels = labels, ...) + mdaplotg(plot_data, type = type, xticks = xticks, labels = labels, ylab = ylab, ...) } #' Predictions plot for regression model @@ -365,7 +367,7 @@ plotYResiduals.regmodel <- function(obj, ncomp = obj$ncomp.selected, ny = 1, sho } plot_data <- lapply(res, plotResiduals, ny = ny, ncomp = ncomp, show.plot = FALSE) - attr(plot_data[[1]], "name") <- sprintf("Y-distance (ncomp = %d)", ncomp) + attr(plot_data[[1]], "name") <- sprintf("Y-residuals (ncomp = %d)", ncomp) mdaplotg(plot_data, show.lines = show.lines, ...) } diff --git a/R/regres.R b/R/regres.R index 363f7ee..f2b882f 100755 --- a/R/regres.R +++ b/R/regres.R @@ -407,7 +407,7 @@ plotResiduals.regres <- function(obj, ny = 1, ncomp = obj$ncomp.selected, show.lines = c(NA, 0), show.plot = TRUE, ...) { if (is.null(obj$y.ref)) { - stop("Y residuals can not be plotted without reference values.") + stop("Y-residuals can not be plotted without reference values.") } if (length(ny) != 1) { @@ -420,8 +420,11 @@ plotResiduals.regres <- function(obj, ny = 1, ncomp = obj$ncomp.selected, plot_data <- cbind(obj$y.ref[, ny], obj$y.ref[, ny] - obj$y.pred[, ncomp, ny]) plot_data <- mda.setattr(plot_data, mda.getattr(obj$y.pred)) - colnames(plot_data) <- c(sprintf("%s, reference", obj$respnames[ny]), "Residuals") - attr(plot_data, "name") <- sprintf("Y residuals (ncomp = %d)", ncomp) + colnames(plot_data) <- c( + sprintf("%s, reference", obj$respnames[ny]), + sprintf("%s, residuals", obj$respnames[ny]) + ) + attr(plot_data, "name") <- sprintf("Y-residuals (ncomp = %d)", ncomp) if (!show.plot) { return(plot_data) @@ -447,12 +450,14 @@ plotResiduals.regres <- function(obj, ny = 1, ncomp = obj$ncomp.selected, #' what to use as labels ("names", "values" or "indices") #' @param show.plot #' logical, show plot or just return plot data +#' @param ylab +#' label for y-axis #' @param ... #' other plot parameters (see \code{mdaplot} for details) #' #' @export plotRMSE.regres <- function(obj, ny = 1, type = "b", xticks = seq_len(obj$ncomp), - labels = "values", show.plot = TRUE, ...) { + labels = "values", show.plot = TRUE, ylab = paste0("RMSE (", obj$respnames[ny], ")"), ...) { if (is.null(obj$rmse)) { stop("RMSE values are not available.") @@ -463,14 +468,13 @@ plotRMSE.regres <- function(obj, ny = 1, type = "b", xticks = seq_len(obj$ncomp) } plot_data <- mda.subset(obj$rmse, ny) - attr(plot_data, "yaxis.name") <- paste0("RMSE (", obj$respnames[ny], ")") attr(plot_data, "name") <- "RMSE" if (!show.plot) { return(plot_data) } - return(mdaplot(plot_data, type = type, xticks = xticks, labels = labels, ...)) + return(mdaplot(plot_data, type = type, xticks = xticks, labels = labels, ylab = ylab, ...)) } #' Plot method for regression results diff --git a/R/simca.R b/R/simca.R index 09763e2..44812d5 100755 --- a/R/simca.R +++ b/R/simca.R @@ -34,7 +34,6 @@ #' @return #' Returns an object of \code{simca} class with following fields: #' \item{classname }{a short text with class name.} -#' \item{modpower }{a matrix with modelling power of variables.} #' \item{calres }{an object of class \code{\link{simcares}} with classification results for a #' calibration data.} #' \item{testres }{an object of class \code{\link{simcares}} with classification results for a test diff --git a/README.md b/README.md index 5dae6cd..4b0f0f2 100755 --- a/README.md +++ b/README.md @@ -5,11 +5,11 @@ Multivariate Data Analysis Tools ![Downloads (CRAN)](https://cranlogs.r-pkg.org/badges/grand-total/mdatools?color=blue&logo=R&style=flat-square "Downloads from CRAN") [![codecov](https://codecov.io/gh/svkucheryavski/mdatools/branch/0.10.0/graph/badge.svg?style=flat-square)](https://codecov.io/gh/svkucheryavski/mdatools) - + *mdatools* is an R package for preprocessing, exploring and analysis of multivariate data. The package provides methods mostly common for [Chemometrics](https://en.wikipedia.org/wiki/Chemometrics). It was created for an introductory PhD course on Chemometrics given at Section of Chemical Engineering, Aalborg University. The general idea of the package is to collect most widespread chemometric methods and give a similar "user interface" (or rather API) for using them. So if a user knows how to make a model and visualize results for one method, he or she can easily do this for the others. -For more details and examples read a [Bookdown tutorial](https://mdatools.com/docs/). The project website, [mdatools.com](https://mdatools.com), contains additional information about supplementary materials and tools. +For more details and examples read a [Bookdown tutorial](https://mda.tools/docs/). The project website, [mda.tools](https://mda.tools), contains additional information about supplementary materials and tools. You can also take video-lectures from [YouTube channel](https://www.youtube.com/channel/UCox0H4utfMq4FIu2kymuyTA) devoted to introductory Chemometric course I give to master students. The lectures explain theory behind basic Chemometric methods but also show how to use them in *mdatools*. @@ -19,16 +19,16 @@ If you want to cite the package, please use the following: Sergey Kucheryavskiy, What is new ----------- -Latest release (0.11.5) is available both from GitHub and CRAN. You can see the full list of changes [here](NEWS.md). The Bookdown tutorial has been also updated and contains the description of new methods added in the last release. +Latest release (0.12.0) is available both from GitHub and CRAN. You can see the full list of changes [here](NEWS.md). The Bookdown tutorial has been also updated and contains the description of new methods added in the last release. How to install -------------- -The package is available from CRAN by usual installing procedure. However, due to restrictions in CRAN politics regarding number of submissions (one in 3-4 month), mostly major releases will be published there (with 2-3 weeks delay after GitHub release as more thorough testing is needed). You can [download](https://github.com/svkucheryavski/mdatools/releases) a zip-file with source package and install it using the `install.packages` command, e.g. if the downloaded file is `mdatools_0.11.5.tar.gz` and it is located in a current working directory, just run the following: +The package is available from CRAN by usual installing procedure. However, due to restrictions in CRAN politics regarding number of submissions (one in 3-4 month), mostly major releases will be published there (with 2-3 weeks delay after GitHub release as more thorough testing is needed). You can [download](https://github.com/svkucheryavski/mdatools/releases) a zip-file with source package and install it using the `install.packages` command, e.g. if the downloaded file is `mdatools_0.12.0.tar.gz` and it is located in a current working directory, just run the following: ``` -install.packages("mdatools_0.11.5.tar.gz") +install.packages("mdatools_0.12.0.tar.gz") ``` If you have `devtools` package installed, the following command will install the current developer version from the master branch of GitHub repository (do not forget to load the `devtools` package first): diff --git a/man/employ.Rd b/man/employ.Rd deleted file mode 100644 index f4810d4..0000000 --- a/man/employ.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/defaults.R -\name{employ} -\alias{employ} -\title{Generic function to apply a method (e.g. constraint)} -\usage{ -employ(obj, x, d) -} -\arguments{ -\item{obj}{constraint object} - -\item{x}{data in question (e.g. resolved spectra)} - -\item{d}{matrix with original data} -} -\description{ -Generic function to apply a method (e.g. constraint) -} diff --git a/man/employ.constraint.Rd b/man/employ.constraint.Rd index bd14f86..fb561a0 100644 --- a/man/employ.constraint.Rd +++ b/man/employ.constraint.Rd @@ -4,7 +4,7 @@ \alias{employ.constraint} \title{Applies constraint to a dataset} \usage{ -\method{employ}{constraint}(obj, x, d) +employ.constraint(obj, x, d, ...) } \arguments{ \item{obj}{object with constraint} @@ -12,6 +12,8 @@ \item{x}{matrix with pure spectra or contributions} \item{d}{matrix with original spectral values} + +\item{...}{other arguments} } \description{ Applies constraint to a dataset diff --git a/man/employ.prep.Rd b/man/employ.prep.Rd new file mode 100644 index 0000000..f752f66 --- /dev/null +++ b/man/employ.prep.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prep.R +\name{employ.prep} +\alias{employ.prep} +\title{Applies a list with preprocessing methods to a dataset} +\usage{ +employ.prep(obj, x, ...) +} +\arguments{ +\item{obj}{list with preprocssing methods (created using \code{prep} function).} + +\item{x}{matrix with dataset} + +\item{...}{other arguments} +} +\description{ +Applies a list with preprocessing methods to a dataset +} diff --git a/man/getImplementedPrepMethods.Rd b/man/getImplementedPrepMethods.Rd new file mode 100644 index 0000000..394d3b3 --- /dev/null +++ b/man/getImplementedPrepMethods.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prep.R +\name{getImplementedPrepMethods} +\alias{getImplementedPrepMethods} +\title{Shows a list with implemented preprocessing methods} +\usage{ +getImplementedPrepMethods() +} +\description{ +Shows a list with implemented preprocessing methods +} diff --git a/man/plot.regcoeffs.Rd b/man/plot.regcoeffs.Rd index 88f3a4f..6c0f576 100644 --- a/man/plot.regcoeffs.Rd +++ b/man/plot.regcoeffs.Rd @@ -13,6 +13,7 @@ show.lines = c(NA, 0), show.ci = FALSE, alpha = 0.05, + ylab = paste0("Coefficients (", x$respnames[ny], ")"), ... ) } @@ -35,6 +36,8 @@ show confidence intervals)} \item{alpha}{significance level for confidence intervals (a number between 0 and 1, e.g. for 95\% alpha = 0.05)} +\item{ylab}{label for y-axis} + \item{...}{other arguments for plotting methods (e.g. main, xlab, etc)} } \description{ diff --git a/man/plotQDoF.Rd b/man/plotQDoF.Rd index 261abf5..464d299 100644 --- a/man/plotQDoF.Rd +++ b/man/plotQDoF.Rd @@ -4,7 +4,14 @@ \alias{plotQDoF} \title{Degrees of freedom plot for orthogonal distance (Nh)} \usage{ -plotQDoF(obj, type = "b", labels = "values", xticks = seq_len(obj$ncomp), ...) +plotQDoF( + obj, + type = "b", + labels = "values", + xticks = seq_len(obj$ncomp), + ylab = "Nq", + ... +) } \arguments{ \item{obj}{a PCA model (object of class \code{pca})} @@ -15,6 +22,8 @@ plotQDoF(obj, type = "b", labels = "values", xticks = seq_len(obj$ncomp), ...) \item{xticks}{vector with tick values for x-axis} +\item{ylab}{label for y-axis} + \item{...}{other plot parameters (see \code{mdaplotg} for details)} } \description{ diff --git a/man/plotRMSE.regmodel.Rd b/man/plotRMSE.regmodel.Rd index f32deef..756da8d 100644 --- a/man/plotRMSE.regmodel.Rd +++ b/man/plotRMSE.regmodel.Rd @@ -11,6 +11,7 @@ labels = "values", xticks = seq_len(obj$ncomp), res = obj$res, + ylab = paste0("RMSE (", obj$res$cal$respnames[ny], ")"), ... ) } @@ -27,6 +28,8 @@ \item{res}{list with result objects} +\item{ylab}{label for y-axis} + \item{...}{other plot parameters (see \code{mdaplotg} for details)} } \description{ diff --git a/man/plotRMSE.regres.Rd b/man/plotRMSE.regres.Rd index 3ef2fdf..90cfb85 100644 --- a/man/plotRMSE.regres.Rd +++ b/man/plotRMSE.regres.Rd @@ -11,6 +11,7 @@ xticks = seq_len(obj$ncomp), labels = "values", show.plot = TRUE, + ylab = paste0("RMSE (", obj$respnames[ny], ")"), ... ) } @@ -27,6 +28,8 @@ \item{show.plot}{logical, show plot or just return plot data} +\item{ylab}{label for y-axis} + \item{...}{other plot parameters (see \code{mdaplot} for details)} } \description{ diff --git a/man/plotT2DoF.Rd b/man/plotT2DoF.Rd index f902218..f946b7a 100644 --- a/man/plotT2DoF.Rd +++ b/man/plotT2DoF.Rd @@ -4,7 +4,14 @@ \alias{plotT2DoF} \title{Degrees of freedom plot for score distance (Nh)} \usage{ -plotT2DoF(obj, type = "b", labels = "values", xticks = seq_len(obj$ncomp), ...) +plotT2DoF( + obj, + type = "b", + labels = "values", + xticks = seq_len(obj$ncomp), + ylab = "Nh", + ... +) } \arguments{ \item{obj}{a PCA model (object of class \code{pca})} @@ -15,6 +22,8 @@ plotT2DoF(obj, type = "b", labels = "values", xticks = seq_len(obj$ncomp), ...) \item{xticks}{vector with tick values for x-axis} +\item{ylab}{label for y-axis} + \item{...}{other plot parameters (see \code{mdaplotg} for details)} } \description{ diff --git a/man/plotVariance.ldecomp.Rd b/man/plotVariance.ldecomp.Rd index 483c380..17baa71 100644 --- a/man/plotVariance.ldecomp.Rd +++ b/man/plotVariance.ldecomp.Rd @@ -11,6 +11,7 @@ labels = "values", xticks = seq_len(obj$ncomp), show.plot = TRUE, + ylab = "Explained variance, \%", ... ) } @@ -27,6 +28,8 @@ \item{show.plot}{logical, shall plot be created or just plot series object is needed} +\item{ylab}{label for y-axis} + \item{...}{most of graphical parameters from \code{\link{mdaplot}} function can be used.} } \description{ diff --git a/man/plotVariance.pca.Rd b/man/plotVariance.pca.Rd index 2420677..90ef031 100644 --- a/man/plotVariance.pca.Rd +++ b/man/plotVariance.pca.Rd @@ -11,6 +11,7 @@ variance = "expvar", xticks = seq_len(obj$ncomp), res = obj$res, + ylab = "Explained variance, \%", ... ) } @@ -27,6 +28,8 @@ \item{res}{list with result objects to show the variance for} +\item{ylab}{label for y-axis} + \item{...}{other plot parameters (see \code{mdaplotg} for details)} } \description{ diff --git a/man/plotVariance.pls.Rd b/man/plotVariance.pls.Rd index 41a8be2..79cd275 100644 --- a/man/plotVariance.pls.Rd +++ b/man/plotVariance.pls.Rd @@ -11,6 +11,7 @@ type = "b", labels = "values", res = obj$res, + ylab = "Explained variance, \%", ... ) } @@ -27,6 +28,8 @@ \item{res}{list with result objects to show the plot for (by defaul, model results are used)} +\item{ylab}{label for y-axis} + \item{...}{other plot parameters (see \code{mdaplotg} for details)} } \description{ diff --git a/man/plotXResiduals.plsres.Rd b/man/plotXResiduals.plsres.Rd index 8ee5222..339a4ba 100644 --- a/man/plotXResiduals.plsres.Rd +++ b/man/plotXResiduals.plsres.Rd @@ -9,7 +9,7 @@ ncomp = obj$ncomp.selected, norm = TRUE, log = FALSE, - main = sprintf("X-residuals (ncomp = \%d)", ncomp), + main = sprintf("X-distances (ncomp = \%d)", ncomp), ... ) } diff --git a/man/plotYResiduals.plsres.Rd b/man/plotYResiduals.plsres.Rd index 65fbc86..a11ba1b 100644 --- a/man/plotYResiduals.plsres.Rd +++ b/man/plotYResiduals.plsres.Rd @@ -4,20 +4,13 @@ \alias{plotYResiduals.plsres} \title{Y residuals plot for PLS results} \usage{ -\method{plotYResiduals}{plsres}( - obj, - ncomp = obj$ncomp.selected, - main = sprintf("Y-residuals (ncomp = \%d)", ncomp), - ... -) +\method{plotYResiduals}{plsres}(obj, ncomp = obj$ncomp.selected, ...) } \arguments{ \item{obj}{PLS results (object of class \code{plsres})} \item{ncomp}{how many components to use (if NULL - user selected optimal value will be used)} -\item{main}{main title for the plot} - \item{...}{other plot parameters (see \code{mdaplot} for details)} } \description{ diff --git a/man/prep.Rd b/man/prep.Rd new file mode 100644 index 0000000..3818520 --- /dev/null +++ b/man/prep.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prep.R +\name{prep} +\alias{prep} +\title{Class for preprocessing object} +\usage{ +prep(name, params = NULL, method = NULL) +} +\arguments{ +\item{name}{short text with name for the preprocessing method.} + +\item{params}{a list with parameters for the method (if NULL - default parameters will be used).} + +\item{method}{method to call when applying the preprocessing, provide it only for user defined methods.} +} +\description{ +Class for preprocessing object +} +\details{ +Use this class to create a list with a sequence of preprocessing methods to keep them together +in right order and with defined parameters. The list/object can be provided as an extra argument +to any modelling function (e.g. \code{pca}, \code{pls}, etc), so the optimal model parameters and +the optimal preprocessing will be stored together and can be applied to a raw data by using +method \code{predict}. + +For your own preprocessing method you need to create a function, which takes matrix with values +(dataset) as the first argument, does something and then return a matrix with the same dimension +and same attributes as the result. The method can have any number of optional parameters. + +See Bookdown tutorial for details. +} diff --git a/man/prep.alsbasecorr.Rd b/man/prep.alsbasecorr.Rd index af3bb60..b34d0f0 100644 --- a/man/prep.alsbasecorr.Rd +++ b/man/prep.alsbasecorr.Rd @@ -4,10 +4,10 @@ \alias{prep.alsbasecorr} \title{Baseline correction using assymetric least squares} \usage{ -prep.alsbasecorr(spectra, plambda = 5, p = 0.1, max.niter = 10) +prep.alsbasecorr(data, plambda = 5, p = 0.1, max.niter = 10) } \arguments{ -\item{spectra}{matrix with spectra (rows correspond to individual spectra)} +\item{data}{matrix with spectra (rows correspond to individual spectra)} \item{plambda}{power of the penalty parameter (e.g. if plambda = 5, lambda = 10^5)} @@ -15,6 +15,9 @@ prep.alsbasecorr(spectra, plambda = 5, p = 0.1, max.niter = 10) \item{max.niter}{maximum number of iterations} } +\value{ +preprocessed spectra. +} \description{ Baseline correction using assymetric least squares } diff --git a/man/prep.list.Rd b/man/prep.list.Rd new file mode 100644 index 0000000..154d950 --- /dev/null +++ b/man/prep.list.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prep.R +\name{prep.list} +\alias{prep.list} +\title{Shows information about all implemented preprocessing methods.} +\usage{ +prep.list() +} +\description{ +Shows information about all implemented preprocessing methods. +} diff --git a/man/prep.msc.Rd b/man/prep.msc.Rd index b851939..cb4c306 100644 --- a/man/prep.msc.Rd +++ b/man/prep.msc.Rd @@ -4,10 +4,10 @@ \alias{prep.msc} \title{Multiplicative Scatter Correction transformation} \usage{ -prep.msc(spectra, mspectrum = NULL) +prep.msc(data, mspectrum = NULL) } \arguments{ -\item{spectra}{a matrix with spectra values} +\item{data}{a matrix with data values (spectra)} \item{mspectrum}{mean spectrum (if NULL will be calculated from \code{spectra})} } diff --git a/man/prep.norm.Rd b/man/prep.norm.Rd index 10163d5..5e93c5e 100644 --- a/man/prep.norm.Rd +++ b/man/prep.norm.Rd @@ -4,16 +4,28 @@ \alias{prep.norm} \title{Normalization} \usage{ -prep.norm(data, type = "area") +prep.norm(data, type = "area", col.ind = NULL) } \arguments{ \item{data}{a matrix with data values} -\item{type}{type of normalization \code{"area"}, \code{"length"} or \code{"sum"}.} +\item{type}{type of normalization \code{"area"}, \code{"length"}, \code{"sum"}, \code{"snv"}, or \code{"is"}.} + +\item{col.ind}{indices of columns (can be either integer or logical valuws) for normalization to internal +standard peak.} } \value{ data matrix with normalized values } \description{ -Normalizes signals (rows of data matrix) to unit area, unit length or unit sum +Normalizes signals (rows of data matrix). +} +\details{ +The \code{"area"}, \code{"length"}, \code{"sum"} types do preprocessing to unit area (sum of +absolute values), length or sum of all values in every row of data matrix. Type \code{"snv"} +does the Standard Normal Variate normalization, similar to \code{\link{prep.snv}}. Type +\code{"is"} does the normalization to internal standard peak, whose position is defined by +parameter `col.ind`. If the position is a single value, the rows are normalized to the height +of this peak. If `col.ind` points on several adjucent vales, the rows are normalized to the area +under the peak - sum of the intensities. } diff --git a/man/prep.ref2km.Rd b/man/prep.ref2km.Rd index 3b16ea7..3bf6cf7 100644 --- a/man/prep.ref2km.Rd +++ b/man/prep.ref2km.Rd @@ -4,10 +4,10 @@ \alias{prep.ref2km} \title{Kubelka-Munk transformation} \usage{ -prep.ref2km(spectra) +prep.ref2km(data) } \arguments{ -\item{spectra}{a matrix with spectra values (absolute reflectance values)} +\item{data}{a matrix with spectra values (absolute reflectance values)} } \value{ preprocessed spectra. diff --git a/man/prep.savgol.Rd b/man/prep.savgol.Rd index 66a1fca..e95b3a0 100644 --- a/man/prep.savgol.Rd +++ b/man/prep.savgol.Rd @@ -18,3 +18,11 @@ prep.savgol(data, width = 3, porder = 1, dorder = 0) \description{ Applies Savytzky-Golay filter to the rows of data matrix } +\details{ +The function implements algorithm described in [1] which handles the edge points correctly and +does not require to cut the spectra. +} +\references{ +1. Peter A. Gorry. General least-squares smoothing and differentiation by the convolution +(Savitzky-Golay) method. Anal. Chem. 1990, 62, 6, 570–573, https://doi.org/10.1021/ac00205a007. +} diff --git a/man/prep.transform.Rd b/man/prep.transform.Rd new file mode 100644 index 0000000..57a6d03 --- /dev/null +++ b/man/prep.transform.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prep.R +\name{prep.transform} +\alias{prep.transform} +\title{Transformation} +\usage{ +prep.transform(data, fun, ...) +} +\arguments{ +\item{data}{a matrix with data values} + +\item{fun}{reference to a transformation function, e.g. `log` or `function(x) x^2`.} + +\item{...}{optional parameters for the transformation function} +} +\value{ +data matrix with transformed values +} +\description{ +Transforms values from using any mathematical function (e.g. log). +} +\examples{ +# generate a matrix with two columns +y <- cbind(rnorm(100, 10, 1), rnorm(100, 20, 2)) + +# apply log transformation +py1 = prep.transform(y, log) + +# apply power transformation +py2 = prep.transform(y, function(x) x^-1.25) + +# show distributions +par(mfrow = c(2, 3)) +for (i in 1:2) { + hist(y[, i], main = paste0("Original values, column #", i)) + hist(py1[, i], main = paste0("Log-transformed, column #", i)) + hist(py2[, i], main = paste0("Power-transformed, column #", i)) +} + +} diff --git a/man/prep.varsel.Rd b/man/prep.varsel.Rd new file mode 100644 index 0000000..a2febed --- /dev/null +++ b/man/prep.varsel.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prep.R +\name{prep.varsel} +\alias{prep.varsel} +\title{Variable selection} +\usage{ +prep.varsel(data, var.ind) +} +\arguments{ +\item{data}{a matrix with data values} + +\item{var.ind}{indices of variables (columns) to select, can bet either numeric or logical} +} +\value{ +data matrix with the selected variables (columns) +} +\description{ +Returns dataset with selected variables +} diff --git a/man/simca.Rd b/man/simca.Rd index 2a4dd1b..b0fc59c 100644 --- a/man/simca.Rd +++ b/man/simca.Rd @@ -32,7 +32,6 @@ simca( \value{ Returns an object of \code{simca} class with following fields: \item{classname }{a short text with class name.} -\item{modpower }{a matrix with modelling power of variables.} \item{calres }{an object of class \code{\link{simcares}} with classification results for a calibration data.} \item{testres }{an object of class \code{\link{simcares}} with classification results for a test diff --git a/tests/testthat/test-constraints.R b/tests/testthat/test-constraints.R index c4fb12a..303fde1 100644 --- a/tests/testthat/test-constraints.R +++ b/tests/testthat/test-constraints.R @@ -8,21 +8,21 @@ context("mcrals: test implemented constrains") for (cl in getImplementedConstraints()) { test_that(sprintf("%s works correctly with default parameters", cl$name), { expect_silent(cn <- constraint(cl$name, cl$params)) - expect_silent(Sr <- employ(cn, S, D)) + expect_silent(Sr <- employ.constraint(cn, S, D)) expect_true(is.matrix(Sr)) expect_equal(dim(Sr), dim(S)) }) } -#!############################ -#! Non-negativity constraint # -#!############################ +############################# +# Non-negativity constraint # +############################# # add one of the existing constraints with default parameters test_that("Non-negativity constraint works correctly", { S[sample(1:length(S))[1:10]] <- -10 cn <- constraint("nonneg") - Sr <- employ(cn, S, D) + Sr <- employ.constraint(cn, S, D) expect_equal(sum(Sr < 0), 0) }) @@ -30,14 +30,14 @@ test_that("Non-negativity constraint with user defined parameters (wrong)", { expect_error(cn <- constraint("non-negativity", params = list(type = "xxx"))) }) -#!########################### -#! Normalization constraint # -#!########################### +############################ +# Normalization constraint # +############################ # add one of the existing constraints with default parameters test_that("Normalization constraint works correctly", { cn <- constraint("norm") - Sr <- employ(cn, S, D) + Sr <- employ.constraint(cn, S, D) # all columns should have unit length expect_equivalent(colSums(Sr^2), rep(1, ncol(Sr))) @@ -45,13 +45,13 @@ test_that("Normalization constraint works correctly", { test_that("Normalization constraint with user defined parameters (correct)", { cn <- constraint("norm", params = list(type = "area")) - Sr <- employ(cn, S, D) + Sr <- employ.constraint(cn, S, D) # all columns should have unit area expect_equivalent(colSums(abs(Sr)), rep(1, ncol(Sr))) cn <- constraint("norm", params = list(type = "sum")) - Sr <- employ(cn, S, D) + Sr <- employ.constraint(cn, S, D) # all columns should have unit sum expect_equivalent(colSums(Sr), rep(1, ncol(Sr))) @@ -62,14 +62,14 @@ test_that("Normalization constraint with user defined parameters (wrong)", { }) -#!################### -#! Angle constraint # -#!################### +#################### +# Angle constraint # +#################### # add one of the existing constraints with default parameters test_that("Angle constraint works correctly", { cn <- constraint("angle") - Sr <- employ(cn, S, D) + Sr <- employ.constraint(cn, S, D) # compute the expected values m <- apply(D, 2, mean) @@ -83,13 +83,13 @@ test_that("Angle constraint works correctly", { test_that("Angle constraint with user defined parameters (correct)", { cn <- constraint("angle", params = list(weight = 0)) - Sr <- employ(cn, S, D) + Sr <- employ.constraint(cn, S, D) # all columns should have unit length expect_equivalent(Sr, t(prep.norm(t(S), "length"))) cn <- constraint("angle", params = list(weight = 1)) - Sr <- employ(cn, S, D) + Sr <- employ.constraint(cn, S, D) # compute the expected values m <- apply(D, 2, mean) @@ -103,9 +103,9 @@ test_that("Angle constraint with user defined parameters (wrong)", { }) -#!############### -#! Unimodality # -#!############### +################ +# Unimodality # +################ # generate data with extra peaks x <- 1:500 @@ -125,10 +125,10 @@ check_unimodality <- function(y, tol = 0) { test_that("Unimodality constraint works correctly", { cn1 <- constraint("unimod") - y.new1 <- employ(cn1, y, NULL) + y.new1 <- employ.constraint(cn1, y, NULL) cn2 <- constraint("unimod", params = list(tol = 0.2)) - y.new2 <- employ(cn2, y, NULL) + y.new2 <- employ.constraint(cn2, y, NULL) expect_true(all(apply(y, 2, check_unimodality, tol = 0) > 0)) expect_true(all(apply(y.new1, 2, check_unimodality, tol = 0) < 0.00000001)) @@ -136,9 +136,9 @@ test_that("Unimodality constraint works correctly", { }) -#!############### -#! Closure # -#!############### +################ +# Closure # +################ # generate data with violation of the closure x <- 1:50 @@ -150,10 +150,10 @@ y <- y + matrix(rnorm(length(y), 0, max(y) * 0.01), nrow(y), ncol(y)) test_that("Closure constraint works correctly", { cn1 <- constraint("closure") - y.new1 <- employ(cn1, y, NULL) + y.new1 <- employ.constraint(cn1, y, NULL) cn2 <- constraint("closure", params = list(sum = 10)) - y.new2 <- employ(cn2, y, NULL) + y.new2 <- employ.constraint(cn2, y, NULL) expect_true(length(unique(rowSums(y))) > 1) expect_true(all(abs(rowSums(y.new1) - 1) < 10^-8)) diff --git a/tests/testthat/test-plotseries.R b/tests/testthat/test-plotseries.R index 7b8d2bf..c5a007b 100644 --- a/tests/testthat/test-plotseries.R +++ b/tests/testthat/test-plotseries.R @@ -213,11 +213,20 @@ test_that("xaxis.name argument is handled correctly", { test_that("yaxis.name argument is handled correctly", { yaxis.name <- "My y variable" attr(x, "yaxis.name") <- yaxis.name + + # file non-scatter plots it is ignored for (type in nonscatter_plots) { res <- splitPlotData(preparePlotData(x)$visible, type) - expect_equal(attr(res$y_values, "name"), yaxis.name) + expect_equal(attr(res$y_values, "name"), "") + } + + # if transposed it becomes xaxis.name + for (type in nonscatter_plots) { + res <- splitPlotData(preparePlotData(mda.t(x))$visible, type) + expect_equal(attr(res$x_values, "name"), yaxis.name) } + # for scatetr plot is ignored and column names is used instead for (type in scatter_plots) { res <- splitPlotData(preparePlotData(x)$visible, type) expect_equal(attr(res$y_values, "name"), "X2") diff --git a/tests/testthat/test-prep.R b/tests/testthat/test-prep.R index 3a33763..c0d7531 100644 --- a/tests/testthat/test-prep.R +++ b/tests/testthat/test-prep.R @@ -65,6 +65,13 @@ test_that("SNV works correctly", { expect_equal(mda.getattr(spectra), mda.getattr(pspectra)) expect_equivalent(apply(pspectra, 1, mean), rep(0, nrow(spectra))) expect_equivalent(apply(pspectra, 1, sd), rep(1, nrow(spectra))) + + spectra <- simdata$spectra.c + expect_silent(pspectra <- prep.norm(spectra, type = "snv")) + expect_equal(mda.getattr(spectra), mda.getattr(pspectra)) + expect_equivalent(apply(pspectra, 1, mean), rep(0, nrow(spectra))) + expect_equivalent(apply(pspectra, 1, sd), rep(1, nrow(spectra))) + }) test_that("MSC works correctly", { @@ -110,7 +117,38 @@ test_that("Normalization to unit length works correctly", { expect_equivalent(apply(pspectra, 1, function(x) sqrt(sum(x^2))), rep(1, nrow(pspectra))) }) +test_that("Normalization to unit sum works correctly", { + spectra <- simdata$spectra.c + expect_silent(pspectra <- prep.norm(spectra, "sum")) + expect_equal(mda.getattr(spectra), mda.getattr(pspectra)) + expect_equivalent(apply(pspectra, 1, function(x) sum(x)), rep(1, nrow(pspectra))) +}) + +test_that("Normalization to IS peak works (one value)", { + spectra <- simdata$spectra.c + expect_error(prep.norm(spectra, "is")) + expect_error(prep.norm(spectra, "is", col.ind = -1)) + expect_error(prep.norm(spectra, "is", col.ind = 1000)) + expect_error(prep.norm(spectra, "is", col.ind = c(1, 1000))) + expect_error(prep.norm(spectra, "is", col.ind = c(-1, 100))) + + + col.ind <- 100 + expect_silent(pspectra <- prep.norm(spectra, type = "is", col.ind)) + expect_equal(mda.getattr(spectra), mda.getattr(pspectra)) + expect_equivalent(apply(pspectra[, col.ind, drop = FALSE], 1, function(x) sum(x)), rep(1, nrow(pspectra))) + + col.ind <- 100:110 + expect_silent(pspectra <- prep.norm(spectra, type = "is", col.ind)) + expect_equal(mda.getattr(spectra), mda.getattr(pspectra)) + expect_equivalent(apply(pspectra[, col.ind, drop = FALSE], 1, function(x) sum(x)), rep(1, nrow(pspectra))) + + col.ind <- 1:ncol(spectra) + pspectra1 <- prep.norm(spectra, type = "is", col.ind) + pspectra2 <- prep.norm(spectra, type = "sum") + expect_equivalent(pspectra1, pspectra2) +}) test_that("Kubelka-Munk works correctly", { spectra <- simdata$spectra.c @@ -151,6 +189,22 @@ test_that("SavGol smoothing works correctly", { expect_error(pspectra <- prep.savgol(spectra, 3, 1, 2)) expect_error(pspectra <- prep.savgol(spectra, 3, 2, 4)) + # check that the derivatives are correct based on sin(x) and cos(x) + x <- pi * seq(-2.4, 2.4, by = 0.1) + y <- matrix(sin(x), nrow = 1) + zeros <- which((abs(y) < 10^-12)) + ones <- which((abs(y) > 0.999999)) + + # first derivative should have zeros where original function has maximums + dy2 <- prep.savgol(y, 3, 1, 1) + expect_equal(dim(dy2), dim(y)) + expect_equivalent(which((abs(dy2) < 10^-12)), ones) + + # second derivative should have zeros where the original function has zeros + dy3 <- prep.savgol(y, 3, 2, 2) + expect_equal(dim(dy3), dim(y)) + expect_equivalent(which((abs(dy3) < 10^-12)), zeros) + }) context("prep: alsbasecorr") @@ -185,3 +239,196 @@ test_that("ALS baseline correction works correctly", { expect_true(err_after3 < err_after1) expect_true(err_after3 < err_after2) }) + +context("prep: trasnform") + +test_that("Transformation works correctly", { + x <- -5:5 + y <- cbind(exp(x), x^2) + colnames(y) <- c("Y1", "Y2") + attr(y, "yaxis.values") <- x + attr(y, "yaxis.name") <- "Time to sleep" + + # errors + + # normal behaviour no extra parameters + yp <- prep.transform(y, log) + expect_equal(dim(yp), dim(y)) + expect_equal(colnames(yp), colnames(y)) + expect_equal(rownames(yp), rownames(y)) + expect_equal(mda.getattr(yp), mda.getattr(y)) + expect_equivalent(yp, log(y)) + + # normal behaviour extra parameters + yp <- prep.transform(y, log, 2) + expect_equal(dim(yp), dim(y)) + expect_equal(colnames(yp), colnames(y)) + expect_equal(rownames(yp), rownames(y)) + expect_equal(mda.getattr(yp), mda.getattr(y)) + expect_equivalent(yp, log2(y)) + + # normal behaviour extra parameters user defined function + yp <- prep.transform(y, function(x, p) x^p, 1.25) + expect_equal(dim(yp), dim(y)) + expect_equal(colnames(yp), colnames(y)) + expect_equal(rownames(yp), rownames(y)) + expect_equal(mda.getattr(yp), mda.getattr(y)) + expect_equivalent(yp, y^1.25) + +}); + +context("prep: varsel") + +test_that("Variable selection works correctly", { + + checkprep <- function(x, xp, ind) { + expect_equal(ncol(xp), length(ind)) + expect_equal(nrow(xp), nrow(x)) + expect_equal(attr(xp, "yaxis.name"), attr(x, "yaxis.name")) + expect_equal(attr(xp, "yaxis.values"), attr(x, "yaxis.values")) + expect_equal(attr(xp, "xaxis.name"), attr(x, "xaxis.name")) + expect_equal(attr(xp, "xaxis.values"), attr(x, "xaxis.values")[ind]) + expect_equal(attr(xp, "exclrows"), attr(x, "exclrows")) + } + + x <- simdata$spectra.c + attr(x, "xaxis.values") <- simdata$wavelength + attr(x, "xaxis.name") <- "Wavelength, nm" + attr(x, "yaxis.values") <- seq_len(nrow(x)) * 10 + attr(x, "yaxis.name") <- "Time, s" + x <- mda.exclrows(x, c(1, 20, 30, 70)) + + ind <- 21:140 + expect_silent(xp <- prep.varsel(x, ind)) + checkprep(x, xp, ind) + + ind <- c(21:30, 130:140) + expect_silent(xp <- prep.varsel(x, ind)) + checkprep(x, xp, ind) + + ind <- rep(FALSE, ncol(x)) + ind[21:140] <- TRUE + expect_silent(xp <- prep.varsel(x, ind)) + checkprep(x, xp, which(ind)) + + ind <- rep(FALSE, ncol(x)) + ind[c(11:20, 130:140)] <- TRUE + expect_silent(xp <- prep.varsel(x, ind)) + checkprep(x, xp, which(ind)) + + # errors + x <- mda.exclcols(x, c(1, 150)) + expect_error(xp <- prep.varsel(x, 20:140)) + +}); + + +context("prep: combine methods together") + +test_that("List of available methods is shown corectly", { + expect_output(prep.list()) +}) + +test_that("Errors are raised when necessary", { + expect_error(prep("varsel", var.ind = 50:150)) + expect_error(prep("snv120")) +}) + +test_that("Method works with one preprocessing method in the list", { + + x <- simdata$spectra.c + attr(x, "xaxis.values") <- simdata$wavelength + attr(x, "xaxis.name") <- "Wavelength, nm" + attr(x, "yaxis.values") <- seq_len(nrow(x)) * 10 + attr(x, "yaxis.name") <- "Time, s" + x <- mda.exclrows(x, c(1, 20, 30, 70)) + + p <- list( + prep("savgol", list(width = 11, porder = 2, dorder = 1)) + ) + + px1 <- employ.prep(p, x) + px2 <- prep.savgol(x, width = 11, porder = 2, dorder = 1) + + expect_equal(px1, px2) + expect_equal(mda.getattr(px1), mda.getattr(x)) + expect_equal(mda.getattr(px2), mda.getattr(x)) +}) + +test_that("Method works with several preprocessing methods in the list", { + + x <- simdata$spectra.c + attr(x, "xaxis.values") <- simdata$wavelength + attr(x, "xaxis.name") <- "Wavelength, nm" + attr(x, "yaxis.values") <- seq_len(nrow(x)) * 10 + attr(x, "yaxis.name") <- "Time, s" + x <- mda.exclrows(x, c(1, 20, 30, 70)) + + p <- list( + prep("savgol", list(width = 11, porder = 2, dorder = 1)), + prep("snv"), + prep("autoscale", list(center = TRUE, scale = TRUE)) + ) + + px1 <- employ.prep(p, x) + px2 <- x + px2 <- prep.savgol(px2, width = 11, porder = 2, dorder = 1) + px2 <- prep.snv(px2) + px2 <- prep.autoscale(px2, center = TRUE, scale = TRUE) + + expect_equal(px1, px2) + expect_equal(mda.getattr(px1), mda.getattr(x)) + expect_equal(mda.getattr(px2), mda.getattr(x)) +}) + +test_that("Method works with preprocessing methods and varable selection", { + + x <- simdata$spectra.c + attr(x, "xaxis.values") <- simdata$wavelength + attr(x, "xaxis.name") <- "Wavelength, nm" + attr(x, "yaxis.values") <- seq_len(nrow(x)) * 10 + attr(x, "yaxis.name") <- "Time, s" + x <- mda.exclrows(x, c(1, 20, 30, 70)) + + p <- list( + prep("savgol", list(width = 11, porder = 2, dorder = 1)), + prep("snv"), + prep("autoscale", list(center = TRUE, scale = TRUE)), + prep("varsel", list(var.ind = 50:130)) + ) + + px1 <- employ.prep(p, x) + px2 <- x + px2 <- prep.savgol(px2, width = 11, porder = 2, dorder = 1) + px2 <- prep.snv(px2) + px2 <- prep.autoscale(px2, center = TRUE, scale = TRUE) + px2 <- mda.subset(px2, select = 50:130) + + expect_equal(px1, px2) + expect_equal(mda.getattr(px1), mda.getattr(px2)) +}) + + +test_that("Method works with user defined preprocessing method", { + + x <- simdata$spectra.c + attr(x, "xaxis.values") <- simdata$wavelength + attr(x, "xaxis.name") <- "Wavelength, nm" + attr(x, "yaxis.values") <- seq_len(nrow(x)) * 10 + attr(x, "yaxis.name") <- "Time, s" + x <- mda.exclrows(x, c(1, 20, 30, 70)) + + p <- list( + prep("r2a", method = function(data) log(1/abs(data))), + prep("savgol", list(width = 11, porder = 2, dorder = 1)), + prep("snv") + ) + + px1 <- employ.prep(p, x) + px2 <- log(1/abs(x)) + px2 <- prep.savgol(px2, width = 11, porder = 2, dorder = 1) + px2 <- prep.snv(px2) + + expect_equal(px1, px2) + expect_equal(mda.getattr(px1), mda.getattr(px2)) +})