From 8c29a2acc6af22b0e8014bcf650f8962e99dbf70 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Mon, 14 Apr 2014 22:08:42 +0200 Subject: [PATCH] v. 0.5.2 --- DESCRIPTION | 6 +++--- NEWS | 5 +++++ R/classmodel.R | 2 +- R/classres.R | 18 +++++------------- R/simca.R | 14 ++++---------- R/simcam.R | 2 +- README.md | 8 +++++--- man/pls.Rd | 1 + man/predict.simca.Rd | 3 ++- man/predict.simcam.Rd | 3 ++- man/simca.Rd | 4 ++-- man/simcam.Rd | 8 ++++---- test/test_plsda.R | 22 +++++++++++++++++++++- test/test_simca.R | 14 +++++++------- test/test_simcam.R | 14 +++++++------- 15 files changed, 70 insertions(+), 54 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 14f5c81..98c911f 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ Package: mdatools Title: Multivariate data analysis for chemometrics -Version: 0.5.1 -Date: 2014-04-13 +Version: 0.5.2 +Date: 2014-04-15 Author: Sergey Kucheryavskiy Maintainer: Sergey Kucheryavskiy Description: The package implements projection based methods for preprocessing, exploring and analysis of multivariate data used in chemometrics. Suggests: -Depends: methods +Depends: License: GPL-3 diff --git a/NEWS b/NEWS index d7d8c67..a0d87c0 100755 --- a/NEWS +++ b/NEWS @@ -1,3 +1,8 @@ +v. 0.5.2 +======== +* fixed bug for computing classification performance for numeric class names +* improvements to SIMCA implementation + v. 0.5.1 ======== * added more details to documentation diff --git a/R/classmodel.R b/R/classmodel.R index 59636bc..f698865 100755 --- a/R/classmodel.R +++ b/R/classmodel.R @@ -165,7 +165,7 @@ plotPerformance.classmodel = function(obj, nc = NULL, param = 'specificity', typ classname = sprintf('(%s)', obj$classnames[nc]) } - + data = cbind(1:obj$ncomp, obj$calres[[param]][nc, ]) labels = matrix(mdaplot.formatValues(obj$calres[[param]][nc, ]), ncol = 1) legend_str = 'cal' diff --git a/R/classres.R b/R/classres.R index 5d94cc3..bd155a0 100755 --- a/R/classres.R +++ b/R/classres.R @@ -87,21 +87,13 @@ getClassificationPerformance = function(c.ref, c.pred) sensitivity = matrix(0, nrow = nclasses + 1, ncol = ncomp) misclassified = matrix(0, nrow = nclasses + 1, ncol = ncomp) + classnames = dimnames(c.pred)[[3]] + for (i in 1:nclasses) { - if (is.numeric(c.ref)) - { - fn[i, ] = colSums((c.ref[, 1] == i) & (c.pred[, , i, drop = F] == -1)) - fp[i, ] = colSums((c.ref[, 1] != i) & (c.pred[, , i, drop = F] == 1)) - tp[i, ] = colSums((c.ref[, 1] == i) & (c.pred[, , i, drop = F] == 1)) - } - else - { - cname = dimnames(c.pred)[[3]][i] - fn[i, ] = colSums((c.ref[, 1] == cname) & (c.pred[, , i, drop = F] == -1)) - fp[i, ] = colSums((c.ref[, 1] != cname) & (c.pred[, , i, drop = F] == 1)) - tp[i, ] = colSums((c.ref[, 1] == cname) & (c.pred[, , i, drop = F] == 1)) - } + fn[i, ] = colSums((c.ref[, 1] == classnames[i]) & (c.pred[, , i, drop = F] == -1)) + fp[i, ] = colSums((c.ref[, 1] != classnames[i]) & (c.pred[, , i, drop = F] == 1)) + tp[i, ] = colSums((c.ref[, 1] == classnames[i]) & (c.pred[, , i, drop = F] == 1)) sensitivity[i, ] = tp[i, ] / (tp[i, ] + fn[i, ]) specificity[i, ] = tp[i, ] / (tp[i, ] + fp[i, ]) diff --git a/R/simca.R b/R/simca.R index e0f0f8d..b4ee8ba 100755 --- a/R/simca.R +++ b/R/simca.R @@ -57,7 +57,7 @@ simca = function(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.t class(model) = c("simca", "classmodel", "pca") # apply model to calibration set - model$calres = predict.simca(model, x, c.ref = rep(1, nrow(x))) + model$calres = predict.simca(model, x, c.ref = rep(classname, nrow(x))) model$modpower = model$calres$modpower # do cross-validation if needed @@ -68,7 +68,7 @@ simca = function(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.t if (!is.null(x.test)) { if (is.null(c.test)) - c.test = matrix(T, nrow(x.test), 1) + c.test = rep(classname, nrow(x.test)) model$testres = predict.simca(model, x.test, c.ref = c.test) } @@ -86,7 +86,7 @@ simca = function(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.t #' @param x #' a matrix with x values (predictors) #' @param c.ref -#' a vector with reference class values +#' a vector with reference class names (same as class names for models) #' @param cv #' logical, are predictions for cross-validation or not #' @param ... @@ -117,12 +117,6 @@ predict.simca = function(object, x, c.ref = NULL, cv = F, ...) # check c.ref values and add dimnames if (!is.null(c.ref)) { - if (is.character(c.ref)) - c.ref = c.ref == object$classname - - if (is.logical(c.ref)) - c.ref = c.ref * 2 - 1 - c.ref = as.matrix(c.ref) rownames(c.ref) = rownames(x) colnames(c.ref) = object$classname @@ -200,7 +194,7 @@ simca.crossval = function(model, x, cv, center = T, scale = F) Q2 = matrix(0, ncol = ncomp, nrow = nobj) T2 = matrix(0, ncol = ncomp, nrow = nobj) c.pred = array(0, dim = c(nobj, ncomp, 1)) - c.ref = matrix(1, ncol = 1, nrow = nobj) + c.ref = matrix(model$classname, ncol = 1, nrow = nobj) # loop over segments for (i in 1:nrow(idx)) diff --git a/R/simcam.R b/R/simcam.R index 62593ad..b9f8755 100755 --- a/R/simcam.R +++ b/R/simcam.R @@ -50,7 +50,7 @@ simcam = function(models, info = '') #' @param x #' a matrix with x values (predictors) #' @param c.ref -#' a vector with reference class values +#' a vector with reference class names (same as class names in models) #' @param cv #' logical, are predictions for cross-validation or not #' @param ... diff --git a/README.md b/README.md index 3bbe329..23e6d30 100755 --- a/README.md +++ b/README.md @@ -11,13 +11,15 @@ For more details read a [short wiki tutorial](https://github.com/svkucheryavski/ How to install -------------- -The package is in beta testing and therefore is not yet available from CRAN. The easiest way to use the package is to +The package is available from CRAN with usual installing procedure. + +It can be also installed from sources, just [download](https://github.com/svkucheryavski/mdatools/releases) a source package archive from GitHub and install it using -the `install.packages` command, e.g. if the downloaded file is `mdatools_0.5.1.tar.gz` and it is located in a current +the `install.packages` command, e.g. if the downloaded file is `mdatools_0.5.2.tar.gz` and it is located in a current working directory, just run the following: ``` -install.packages('mdatools_0.5.1.tar.gz') +install.packages('mdatools_0.5.2.tar.gz') ``` If you have `devtools` package installed, the following command will install the latest release from the GitHub (do not forget to load the `devtools` package first): diff --git a/man/pls.Rd b/man/pls.Rd index 25e6630..a0c66c4 100755 --- a/man/pls.Rd +++ b/man/pls.Rd @@ -91,6 +91,7 @@ Methods for \code{pls} objects: \code{\link{plotYResiduals.pls}} \tab shows residuals plot for y values.\cr \code{\link{getSelectivityRatio.pls}} \tab returns vector with selectivity ratio values.\cr \code{\link{plotSelectivityRatio.pls}} \tab shows plot with selectivity ratio values.\cr + \code{\link{plotVIPScores.pls}} \tab shows plot with VIP scores values.\cr } Most of the methods for plotting data (except loadings and regression coefficients) are also available for PLS results diff --git a/man/predict.simca.Rd b/man/predict.simca.Rd index d5688ed..62e28aa 100644 --- a/man/predict.simca.Rd +++ b/man/predict.simca.Rd @@ -10,7 +10,8 @@ \item{x}{a matrix with x values (predictors)} - \item{c.ref}{a vector with reference class values} + \item{c.ref}{a vector with reference class names (same as + class names for models)} \item{cv}{logical, are predictions for cross-validation or not} diff --git a/man/predict.simcam.Rd b/man/predict.simcam.Rd index 0b37e26..df0f8be 100644 --- a/man/predict.simcam.Rd +++ b/man/predict.simcam.Rd @@ -10,7 +10,8 @@ \item{x}{a matrix with x values (predictors)} - \item{c.ref}{a vector with reference class values} + \item{c.ref}{a vector with reference class names (same as + class names in models)} \item{cv}{logical, are predictions for cross-validation or not} diff --git a/man/simca.Rd b/man/simca.Rd index 87526f7..81eea48 100755 --- a/man/simca.Rd +++ b/man/simca.Rd @@ -20,7 +20,7 @@ simca(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.test = NULL, \item{scale}{logical, do sdandardization of data or not.} \item{cv}{number of segments for random cross-validation (1 for full cross-validation).} \item{x.test}{a numerical matrix with test data.} - \item{c.test}{a vector with logical values for classes of test data objects.} + \item{c.test}{a vector with text values (names of classes) of test data objects.} \item{alpha}{significance level for calculating limit for T2 and Q2 residuals.} \item{method}{method to compute principal components.} \item{info}{text with information about the model} @@ -98,7 +98,7 @@ class = iris[, 5] se = data[1:20, ] # make SIMCA model and apply to test set -model = simca(se, 'Se', cv = 1) +model = simca(se, 'setosa', cv = 1) model = selectCompNum(model, 1) # show infromation, summary and plot overview diff --git a/man/simcam.Rd b/man/simcam.Rd index 6e16420..b40e1f1 100755 --- a/man/simcam.Rd +++ b/man/simcam.Rd @@ -79,16 +79,16 @@ ve = caldata[26:50, ] vi = caldata[51:75, ] testdata = iris[seq(2, nrow(iris), 2), 1:4] -testdata.cref = rbind(matrix('Se', 25, 1), matrix('Vi', 25, 1), matrix('Ve', 25, 1)) +testdata.cref = iris[seq(2, nrow(iris), 2), 5] # create individual models -semodel = simca(se, classname = 'Se') +semodel = simca(se, classname = 'setosa') semodel = selectCompNum(semodel, 1) -vimodel = simca(vi, classname = 'Vi') +vimodel = simca(vi, classname = 'virginica') vimodel = selectCompNum(vimodel, 1) -vemodel = simca(ve, classname = 'Ve') +vemodel = simca(ve, classname = 'versicolor') vemodel = selectCompNum(vemodel, 1) # combine models into SIMCAM objects, show statistics and plots diff --git a/test/test_plsda.R b/test/test_plsda.R index 161ccaa..d0da16b 100755 --- a/test/test_plsda.R +++ b/test/test_plsda.R @@ -6,7 +6,27 @@ calset.c = iris[seq(1, nrow(iris), 2), 5] testset = iris[seq(2, nrow(iris), 2), 1:4] # test set, 3 classes testset.c = iris[seq(2, nrow(iris), 2), 5] # test set, 3 classes -model = plsda(calset, calset.c, ncomp = 3, cv = 1, info = 'IRIS data example') +cc = as.numeric(calset.c) +ct = as.numeric(testset.c) +model = plsda(calset, cc, ncomp = 3, cv = 1, info = 'IRIS data example') +plot(model) +readline('Press enter to continue...') + +cc = as.numeric(calset.c) +cc[cc == 1] = 10 +cc[cc == 3] = 30 +ct = as.numeric(testset.c) +ct[ct == 1] = 10 +ct[ct == 3] = 30 +model = plsda(calset, cc, ncomp = 3, cv = 1, info = 'IRIS data example') +plot(model) +readline('Press enter to continue...') + +cc = calset.c +ct = testset.c +model = plsda(calset, cc, ncomp = 3, cv = 1, info = 'IRIS data example') +plot(model) +readline('Press enter to continue...') res = predict(model, testset, testset.c) #res = predict(model, testset[1:50, ], testset.c[1:50, ]) diff --git a/test/test_simca.R b/test/test_simca.R index 6c06173..deb5458 100755 --- a/test/test_simca.R +++ b/test/test_simca.R @@ -8,7 +8,7 @@ ve = calset[calset[, 5] == 'versicolor', 1:4] vi = calset[calset[, 5] == 'virginica', 1:4] test.data = tstset[, 1:4] -test.c = c(matrix(1, 1, 25), matrix(2, 1, 25), matrix(3, 1, 25)) +test.c = tstset[, 5] # Select which model to calculate @@ -17,19 +17,19 @@ option = 2 if (option == 1) { # Setosa model with cross-validation - model = simca(se, 'Se', ncomp = 4, alpha = 0.01, cv = 1) + model = simca(se, 'setosa', ncomp = 4, alpha = 0.01, cv = 1) model = selectCompNum(model, 1) - pred = predict(model, test.data, test.c == 1) + pred = predict(model, test.data, test.c) } else if (option == 2) { # Virginica model with test set validation (no CV) - model = simca(vi, 'Vi', x.test = test.data[test.c == 3, ]) + model = simca(vi, 'virginica', x.test = test.data[test.c == 'virginica', ]) model = selectCompNum(model, 3) - pred = predict(model, test.data, test.c == 3) + pred = predict(model, test.data, test.c) } else { # Versicolor model with 5 segments CV and test set - model = simca(ve, 'Ve', ncomp = 4, cv = 5, x.test = test.data, c.test = test.c == 2) + model = simca(ve, 'versicolor', ncomp = 4, cv = 5, x.test = test.data, c.test = test.c == 'versicolor') model = selectCompNum(model, 3) - pred = predict(model, test.data, test.c == 2) + pred = predict(model, test.data, test.c) } cat('1. Show print and summary') diff --git a/test/test_simcam.R b/test/test_simcam.R index 29ddf4c..c955853 100755 --- a/test/test_simcam.R +++ b/test/test_simcam.R @@ -10,30 +10,30 @@ vi = calset[calset[, 5] == 'virginica', 1:4] # calibration data with reference cal.data = calset[, 1:4] -cal.c = c(matrix('Se', 1, 25), matrix('Ve', 1, 25), matrix('Vi', 1, 25)) +cal.c = calset[, 5] # test data with reference test.data = tstset[, 1:4] -test.c = c(matrix(1, 1, 25), matrix(2, 1, 25), matrix(3, 1, 25)) +test.c = tstset[, 5] # test data with reference for two classes test2c.data = tstset2c[, 1:4] -test2c.c = c(matrix(1, 1, 25), matrix(2, 1, 25)) +test2c.c = tstset2c[, 5] # test data with reference for with unknown classes test2cu.data = tstset2c[, 1:4] -test2cu.c = c(matrix('Se', 1, 25), matrix('None', 1, 10), matrix('Ar', 1, 15)) +test2cu.c = c(matrix('setosa', 1, 25), matrix('none', 1, 10), matrix('Ar', 1, 15)) # make individual models -semodel = simca(se, 'Se', ncomp = 4, alpha = 0.01, cv = 1) +semodel = simca(se, 'setosa', ncomp = 4, alpha = 0.01, cv = 1) semodel = selectCompNum(semodel, 1) # make individual models -vimodel = simca(vi, 'Vi') +vimodel = simca(vi, 'virginica') vimodel = selectCompNum(vimodel, 3) # make individual models -vemodel = simca(ve, 'Ve', ncomp = 4, cv = 5, test.data = test.data, test.c = test.c == 2) +vemodel = simca(ve, 'versicolor', ncomp = 4, cv = 5, x.test = test.data, c.test = test.c) vemodel = selectCompNum(vemodel, 3) # make group models