From 4aed860e5292a0a5b7df0c99981c7e004fbfde24 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Fri, 22 Jan 2021 10:03:00 +0100 Subject: [PATCH] refactoring og preprocessing methods and added new: prep.ref2km() --- R/prep.R | 250 ++++++++++++++++++++----------------- man/prep.generic.Rd | 18 +++ man/prep.ref2km.Rd | 22 ++++ tests/testthat/test-prep.R | 19 ++- 4 files changed, 191 insertions(+), 118 deletions(-) create mode 100644 man/prep.generic.Rd create mode 100644 man/prep.ref2km.Rd diff --git a/R/prep.R b/R/prep.R index c20474b..1024792 100755 --- a/R/prep.R +++ b/R/prep.R @@ -26,42 +26,40 @@ #' @export prep.autoscale <- function(data, center = TRUE, scale = FALSE, max.cov = 0) { - attrs <- mda.getattr(data) - dimnames <- dimnames(data) + f <- function(data, center, scale, max.cov) { + # define values for centering + if (is.logical(center) && center) center <- apply(data, 2, mean) - # define values for centering - if (is.logical(center) && center) center <- apply(data, 2, mean) - - if (is.numeric(center) && length(center) != ncol(data)) { - stop("Number of values in 'center' should be the same as number of columns in 'daata'") - } + if (is.numeric(center) && length(center) != ncol(data)) { + stop("Number of values in 'center' should be the same as number of columns in 'daata'") + } - # define values for weigting - if (is.logical(scale) && scale) scale <- apply(data, 2, sd) + # define values for weigting + if (is.logical(scale) && scale) scale <- apply(data, 2, sd) - if (is.numeric(scale) && length(scale) != ncol(data)) { - stop("Number of values in 'scale' should be the same as number of columns in 'daata'") - } + if (is.numeric(scale) && length(scale) != ncol(data)) { + stop("Number of values in 'scale' should be the same as number of columns in 'daata'") + } - # compute coefficient of variation and set scale to 1 if it is below - # a user defined threshold - if (is.numeric(scale)) { - m <- if (is.numeric(center)) center else apply(data, 2, mean) - cv <- scale / abs(m) * 100 - scale[is.nan(cv) | cv <= max.cov] <- 1 - } + # compute coefficient of variation and set scale to 1 if it is below + # a user defined threshold + if (is.numeric(scale)) { + m <- if (is.numeric(center)) center else apply(data, 2, mean) + cv <- scale / abs(m) * 100 + scale[is.nan(cv) | cv <= max.cov] <- 1 + } - # make autoscaling and attach preprocessing attributes - data <- scale(data, center = center, scale = scale) + # make autoscaling and attach preprocessing attributes + data <- scale(data, center = center, scale = scale) + attr(data, "scaled:center") <- NULL + attr(data, "scaled:scale") <- NULL + attr(data, "prep:center") <- center + attr(data, "prep:scale") <- scale - data <- mda.setattr(data, attrs) - attr(data, "scaled:center") <- NULL - attr(data, "scaled:scale") <- NULL - attr(data, "prep:center") <- center - attr(data, "prep:scale") <- scale - dimnames(data) <- dimnames + return(data) + } - return(data) + return(prep.generic(data, f, center = center, scale = scale, max.cov = max.cov)) } #' Standard Normal Variate transformation @@ -97,14 +95,9 @@ prep.autoscale <- function(data, center = TRUE, scale = FALSE, max.cov = 0) { #' #' @export prep.snv <- function(data) { - attrs <- mda.getattr(data) - dimnames <- dimnames(data) - - data <- t(scale(t(data), center = TRUE, scale = TRUE)) - data <- mda.setattr(data, attrs) - dimnames(data) <- dimnames - return(data) + f <- function(data) t(scale(t(data), center = TRUE, scale = TRUE)) + return(prep.generic(data, f)) } #' Normalization @@ -122,25 +115,21 @@ prep.snv <- function(data) { #' #' @export prep.norm <- function(data, type = "area") { - attrs <- mda.getattr(data) - dimnames <- dimnames(data) - - w <- switch( - type, - "area" = apply(abs(data), 1, sum), - "length" = sqrt(apply(data^2, 1, sum)), - "sum" = apply(data, 1, sum) - ) - - if (is.null(w)) { - stop("Wrong value for argument 'type'.") - } - data <- sweep(data, 1, w, "/") - data <- mda.setattr(data, attrs) - dimnames(data) <- dimnames + f <- function(data, type) { + + w <- switch( + type, + "area" = apply(abs(data), 1, sum), + "length" = sqrt(apply(data^2, 1, sum)), + "sum" = apply(data, 1, sum) + ) - return(data) + if (is.null(w)) stop("Wrong value for argument 'type'.") + return(sweep(data, 1, w, "/")) + } + + return(prep.generic(data, f, type = type)) } #' Savytzky-Golay filter @@ -159,45 +148,30 @@ prep.norm <- function(data, type = "area") { #' #' @export prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { - attrs <- mda.getattr(data) - dimnames <- dimnames(data) - - if (width < 3) { - stop("Filter width ('width') should be equal at least to 3.") - } - - if (width %% 2 != 1) { - stop("Filter width ('width') should be an odd integer number.") - } - - if (dorder < 0 || dorder > 2) { - stop("Wrong value for the derivative order (should be 0, 1, or 2).") - } - - if (porder < 0 || porder > 4) { - stop("Wrong value for the polynomial degree (should be integer number between 0 and 4).") - } - - if (porder < dorder) { - stop("Polynomal degree ('porder') should not be smaller the derivative order ('dorder').") - } - nobj <- nrow(data) - nvar <- ncol(data) - pdata <- matrix(0, ncol = nvar, nrow = nobj) + f <- function(data, width, porder, dorder) { + stopifnot("Filter width ('width') should be equal at least to 3." = width > 2) + stopifnot("Filter width ('width') should be an odd integer number." = width %% 2 == 1) + stopifnot("Wrong value for the derivative order (should be 0, 1, or 2)." = dorder %in% (0:2)) + stopifnot("Wrong value for the polynomial degree (should be integer number between 0 and 4)." = porder %in% (0:4)) + stopifnot("Polynomal degree ('porder') should not be smaller the derivative order ('dorder')." = porder >= dorder) + + nobj <- nrow(data) + nvar <- ncol(data) + pdata <- matrix(0, ncol = nvar, nrow = nobj) + + for (i in seq_len(nobj)) { + d <- data[i, ] + w <- (width - 1) / 2 + f <- pinv(outer(-w:w, 0:porder, FUN = "^")) + d <- convolve(d, rev(f[dorder + 1, ]), type = "o") + pdata[i, ] <- d[(w + 1) : (length(d) - w)] + } - for (i in seq_len(nobj)) { - d <- data[i, ] - w <- (width - 1) / 2 - f <- pinv(outer(-w:w, 0:porder, FUN = "^")) - d <- convolve(d, rev(f[dorder + 1, ]), type = "o") - pdata[i, ] <- d[(w + 1) : (length(d) - w)] + return(pdata) } - pdata <- mda.setattr(pdata, attrs) - dimnames(pdata) <- dimnames - - return(pdata) + return(prep.generic(data, f, width = width, porder = porder, dorder = dorder)) } #' Multiplicative Scatter Correction transformation @@ -233,49 +207,57 @@ prep.savgol <- function(data, width = 3, porder = 1, dorder = 0) { #' #' @export prep.msc <- function(spectra, mspectrum = NULL) { - attrs <- mda.getattr(spectra) - dimnames <- dimnames(spectra) - if (is.null(mspectrum)) { - mspectrum <- apply(spectra, 2, mean) - } + f <- function(spectra, mspectrum) { + if (is.null(mspectrum)) { + mspectrum <- apply(spectra, 2, mean) + } - if (!is.null(mspectrum)) { - dim(mspectrum) <- NULL - } + if (!is.null(mspectrum)) { + dim(mspectrum) <- NULL + } - if (length(mspectrum) != ncol(spectra)) { - stop("Length of 'mspectrum' should be the same as number of columns in 'spectra'.") - } + if (length(mspectrum) != ncol(spectra)) { + stop("Length of 'mspectrum' should be the same as number of columns in 'spectra'.") + } - cspectra <- matrix(0, nrow = nrow(spectra), ncol = ncol(spectra)) - for (i in seq_len(nrow(spectra))) { - coef <- coef(lm(spectra[i, ] ~ mspectrum)) - cspectra[i, ] <- (spectra[i, ] - coef[1]) / coef[2] - } + pspectra <- matrix(0, nrow = nrow(spectra), ncol = ncol(spectra)) + for (i in seq_len(nrow(spectra))) { + coef <- coef(lm(spectra[i, ] ~ mspectrum)) + pspectra[i, ] <- (spectra[i, ] - coef[1]) / coef[2] + } - cspectra <- mda.setattr(cspectra, attrs) - attr(cspectra, "mspectrum") <- mspectrum - dimnames(cspectra) <- dimnames + attr(pspectra, "mspectrum") <- mspectrum + return(pspectra) + } - return(cspectra) + return(prep.generic(spectra, f, mspectrum = mspectrum)) } -#' Pseudo-inverse matrix + +#' Kubelka-Munk transformation #' #' @description -#' Computes pseudo-inverse matrix using SVD +#' Applies Kubelka-Munk (km) transformation to data matrix (spectra) #' -#' @param data -#' a matrix with data values to compute inverse for +#' @param spectra +#' a matrix with spectra values (absolute reflectance values) +#' +#' @return +#' preprocessed spectra. +#' +#' @details +#' Kubelka-Munk is useful preprocessing method for diffuse reflection spectra (e.g. taken for +#' powders or rough surface). It transforms the reflectance spectra R to K/M units as follows: +#' (1 - R)^2 / 2R #' #' @export -pinv <- function(data) { - # Calculates pseudo-inverse of data matrix - s <- svd(data) - s$v %*% diag(1 / s$d) %*% t(s$u) -} +prep.ref2km <- function(spectra) { + stopifnot("Can't use Kubelka-Munk transformation as some of the values are zeros or negative." = all(spectra > 0)) + f <- function(x) (1 - x)^2 / (2 * x) + return(prep.generic(spectra, f)) +} #' Baseline correction using assymetric least squares #' @@ -347,3 +329,37 @@ prep.alsbasecorr <- function(spectra, plambda = 5, p = 0.1, max.niter = 10) { return(pspectra) } + +#' Pseudo-inverse matrix +#' +#' @description +#' Computes pseudo-inverse matrix using SVD +#' +#' @param data +#' a matrix with data values to compute inverse for +#' +#' @export +pinv <- function(data) { + # Calculates pseudo-inverse of data matrix + s <- svd(data) + s$v %*% diag(1 / s$d) %*% t(s$u) +} + +#' Generic function for preprocessing +#' +#' @param x +#' data matrix to be preprocessed +#' @param f +#' function for preprocessing +#' @param ... +#' arguments for the function f +#' +#' +prep.generic <- function(x, f, ...) { + attrs <- mda.getattr(x) + dimnames <- dimnames(x) + x.p <- f(x, ...) + x.p <- mda.setattr(x.p, attrs) + dimnames(x.p) <- dimnames + return(x.p) +} \ No newline at end of file diff --git a/man/prep.generic.Rd b/man/prep.generic.Rd new file mode 100644 index 0000000..400ca75 --- /dev/null +++ b/man/prep.generic.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prep.R +\name{prep.generic} +\alias{prep.generic} +\title{Generic function for preprocessing} +\usage{ +prep.generic(x, f, ...) +} +\arguments{ +\item{x}{data matrix to be preprocessed} + +\item{f}{function for preprocessing} + +\item{...}{arguments for the function f} +} +\description{ +Generic function for preprocessing +} diff --git a/man/prep.ref2km.Rd b/man/prep.ref2km.Rd new file mode 100644 index 0000000..3b16ea7 --- /dev/null +++ b/man/prep.ref2km.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prep.R +\name{prep.ref2km} +\alias{prep.ref2km} +\title{Kubelka-Munk transformation} +\usage{ +prep.ref2km(spectra) +} +\arguments{ +\item{spectra}{a matrix with spectra values (absolute reflectance values)} +} +\value{ +preprocessed spectra. +} +\description{ +Applies Kubelka-Munk (km) transformation to data matrix (spectra) +} +\details{ +Kubelka-Munk is useful preprocessing method for diffuse reflection spectra (e.g. taken for +powders or rough surface). It transforms the reflectance spectra R to K/M units as follows: +(1 - R)^2 / 2R +} diff --git a/tests/testthat/test-prep.R b/tests/testthat/test-prep.R index a866d1f..3a33763 100644 --- a/tests/testthat/test-prep.R +++ b/tests/testthat/test-prep.R @@ -57,7 +57,7 @@ test_that("Full autoscaling with limits for max.cov is correct for negative data }) -context("prep: snv, msc, norm") +context("prep: snv, msc, norm, km") test_that("SNV works correctly", { spectra <- simdata$spectra.c @@ -110,6 +110,23 @@ test_that("Normalization to unit length works correctly", { expect_equivalent(apply(pspectra, 1, function(x) sqrt(sum(x^2))), rep(1, nrow(pspectra))) }) + + +test_that("Kubelka-Munk works correctly", { + spectra <- simdata$spectra.c + spectra <- spectra - min(spectra) + 0.01 * max(spectra) + expect_silent(pspectra <- prep.ref2km(spectra)) + expect_equal(mda.getattr(spectra), mda.getattr(pspectra)) + expect_equal(dim(spectra), dim(pspectra)) + expect_true(all(pspectra > 0)) + + spectra[10, 10] <- 0 + expect_error(prep.km(spectra)) + + spectra[10, 10] <- -1 + expect_error(prep.km(spectra)) +}) + context("prep: savgol") test_that("SavGol smoothing works correctly", {