Merge branch 'develop'

svkucheryavski · Apr 14, 2014 · d54e8ba · d54e8ba
2 parents 8b1bc2d + 8c29a2a
commit d54e8ba
Show file tree

Hide file tree

Showing 15 changed files with 70 additions and 54 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,10 +1,10 @@
 Package: mdatools
 Title: Multivariate data analysis for chemometrics
-Version: 0.5.1
-Date: 2014-04-13
+Version: 0.5.2
+Date: 2014-04-15
 Author: Sergey Kucheryavskiy 
 Maintainer:  Sergey Kucheryavskiy <[email protected]>
 Description: The package implements projection based methods for preprocessing, exploring and analysis of multivariate data used in chemometrics.
 Suggests: 
-Depends: methods
+Depends: 
 License: GPL-3
diff --git a/NEWS b/NEWS
@@ -1,3 +1,8 @@
+v. 0.5.2
+========
+* fixed bug for computing classification performance for numeric class names
+* improvements to SIMCA implementation
+
 v. 0.5.1
 ========
 * added more details to documentation

diff --git a/R/classmodel.R b/R/classmodel.R
@@ -165,7 +165,7 @@ plotPerformance.classmodel = function(obj, nc = NULL, param = 'specificity', typ
 
       classname = sprintf('(%s)', obj$classnames[nc])
    }
-
+   
    data = cbind(1:obj$ncomp, obj$calres[[param]][nc, ])
    labels = matrix(mdaplot.formatValues(obj$calres[[param]][nc, ]), ncol = 1)
    legend_str = 'cal'

diff --git a/R/classres.R b/R/classres.R
@@ -87,21 +87,13 @@ getClassificationPerformance = function(c.ref, c.pred)
    sensitivity = matrix(0, nrow = nclasses + 1, ncol = ncomp)
    misclassified = matrix(0, nrow = nclasses + 1, ncol = ncomp)
 
+   classnames = dimnames(c.pred)[[3]]
+
    for (i in 1:nclasses)         
    {   
-      if (is.numeric(c.ref))
-      {   
-         fn[i, ] = colSums((c.ref[, 1] == i) & (c.pred[, , i, drop = F] == -1))
-         fp[i, ] = colSums((c.ref[, 1] != i) & (c.pred[, , i, drop = F] == 1))
-         tp[i, ] = colSums((c.ref[, 1] == i) & (c.pred[, , i, drop = F] == 1))
-      }
-      else
-      {
-         cname = dimnames(c.pred)[[3]][i]
-         fn[i, ] = colSums((c.ref[, 1] == cname) & (c.pred[, , i, drop = F] == -1))
-         fp[i, ] = colSums((c.ref[, 1] != cname) & (c.pred[, , i, drop = F] == 1))
-         tp[i, ] = colSums((c.ref[, 1] == cname) & (c.pred[, , i, drop = F] == 1))         
-      }   
+      fn[i, ] = colSums((c.ref[, 1] == classnames[i]) & (c.pred[, , i, drop = F] == -1))
+      fp[i, ] = colSums((c.ref[, 1] != classnames[i]) & (c.pred[, , i, drop = F] == 1))
+      tp[i, ] = colSums((c.ref[, 1] == classnames[i]) & (c.pred[, , i, drop = F] == 1))
 
       sensitivity[i, ] = tp[i, ] / (tp[i, ] + fn[i, ])
       specificity[i, ] = tp[i, ] / (tp[i, ] + fp[i, ])

diff --git a/R/simca.R b/R/simca.R
@@ -57,7 +57,7 @@ simca = function(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.t
    class(model) = c("simca", "classmodel", "pca")
 
    # apply model to calibration set
-   model$calres = predict.simca(model, x, c.ref = rep(1, nrow(x)))
+   model$calres = predict.simca(model, x, c.ref = rep(classname, nrow(x)))
    model$modpower = model$calres$modpower
 
    # do cross-validation if needed
@@ -68,7 +68,7 @@ simca = function(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.t
    if (!is.null(x.test))
    {
       if (is.null(c.test))
-         c.test = matrix(T, nrow(x.test), 1)
+         c.test = rep(classname, nrow(x.test))
 
       model$testres = predict.simca(model, x.test, c.ref = c.test)
    }
@@ -86,7 +86,7 @@ simca = function(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.t
 #' @param x
 #' a matrix with x values (predictors)
 #' @param c.ref
-#' a vector with reference class values
+#' a vector with reference class names (same as class names for models)
 #' @param cv
 #' logical, are predictions for cross-validation or not
 #' @param ...
@@ -117,12 +117,6 @@ predict.simca = function(object, x, c.ref = NULL, cv = F, ...)
    # check c.ref values and add dimnames
    if (!is.null(c.ref))
    {   
-      if (is.character(c.ref))
-         c.ref = c.ref == object$classname
-
-      if (is.logical(c.ref))
-         c.ref = c.ref * 2 - 1
-
       c.ref = as.matrix(c.ref)
       rownames(c.ref) = rownames(x)
       colnames(c.ref) = object$classname
@@ -200,7 +194,7 @@ simca.crossval = function(model, x, cv, center = T, scale = F)
    Q2 = matrix(0, ncol = ncomp, nrow = nobj)   
    T2 = matrix(0, ncol = ncomp, nrow = nobj)   
    c.pred = array(0, dim = c(nobj, ncomp, 1))
-   c.ref = matrix(1, ncol = 1, nrow = nobj)
+   c.ref = matrix(model$classname, ncol = 1, nrow = nobj)
 
    # loop over segments
    for (i in 1:nrow(idx))

diff --git a/R/simcam.R b/R/simcam.R
@@ -50,7 +50,7 @@ simcam = function(models, info = '')
 #' @param x
 #' a matrix with x values (predictors)
 #' @param c.ref
-#' a vector with reference class values
+#' a vector with reference class names (same as class names in models)
 #' @param cv
 #' logical, are predictions for cross-validation or not
 #' @param ...

diff --git a/README.md b/README.md
@@ -11,13 +11,15 @@ For more details read a [short wiki tutorial](https://github.com/svkucheryavski/
 How to install
 --------------
 
-The package is in beta testing and therefore is not yet available from CRAN. The easiest way to use the package is to 
+The package is available from CRAN with usual installing procedure.
+
+It can be also installed from sources, just  
 [download](https://github.com/svkucheryavski/mdatools/releases) a source package archive from GitHub and install it using 
-the `install.packages` command, e.g. if the downloaded file is `mdatools_0.5.1.tar.gz` and it is located in a current 
+the `install.packages` command, e.g. if the downloaded file is `mdatools_0.5.2.tar.gz` and it is located in a current 
 working directory, just run the following:
 
 ```
-install.packages('mdatools_0.5.1.tar.gz')
+install.packages('mdatools_0.5.2.tar.gz')
 ```
 
 If you have `devtools` package installed, the following command will install the latest release from the GitHub (do not forget to load the `devtools` package first):

diff --git a/man/pls.Rd b/man/pls.Rd
@@ -91,6 +91,7 @@ Methods for \code{pls} objects:
       \code{\link{plotYResiduals.pls}} \tab shows residuals plot for y values.\cr
       \code{\link{getSelectivityRatio.pls}} \tab returns vector with selectivity ratio values.\cr
       \code{\link{plotSelectivityRatio.pls}} \tab shows plot with selectivity ratio values.\cr
+      \code{\link{plotVIPScores.pls}} \tab shows plot with VIP scores values.\cr
    }
 
    Most of the methods for plotting data (except loadings and regression coefficients) are also available for PLS results 

diff --git a/man/predict.simca.Rd b/man/predict.simca.Rd
@@ -10,7 +10,8 @@
 
   \item{x}{a matrix with x values (predictors)}
 
-  \item{c.ref}{a vector with reference class values}
+  \item{c.ref}{a vector with reference class names (same as
+  class names for models)}
 
   \item{cv}{logical, are predictions for cross-validation
   or not}

diff --git a/man/predict.simcam.Rd b/man/predict.simcam.Rd
@@ -10,7 +10,8 @@
 
   \item{x}{a matrix with x values (predictors)}
 
-  \item{c.ref}{a vector with reference class values}
+  \item{c.ref}{a vector with reference class names (same as
+  class names in models)}
 
   \item{cv}{logical, are predictions for cross-validation
   or not}

diff --git a/man/simca.Rd b/man/simca.Rd
@@ -20,7 +20,7 @@ simca(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.test = NULL,
   \item{scale}{logical, do sdandardization of data or not.}
   \item{cv}{number of segments for random cross-validation (1 for full cross-validation).}
   \item{x.test}{a numerical matrix with test data.}
-  \item{c.test}{a vector with logical values for classes of test data objects.}
+  \item{c.test}{a vector with text values (names of classes) of test data objects.}
   \item{alpha}{significance level for calculating limit for T2 and Q2 residuals.}
   \item{method}{method to compute principal components.}
   \item{info}{text with information about the model}
@@ -98,7 +98,7 @@ class = iris[, 5]
 se = data[1:20, ]
 
 # make SIMCA model and apply to test set
-model = simca(se, 'Se', cv = 1)
+model = simca(se, 'setosa', cv = 1)
 model = selectCompNum(model, 1)
 
 # show infromation, summary and plot overview

diff --git a/man/simcam.Rd b/man/simcam.Rd
@@ -79,16 +79,16 @@ ve = caldata[26:50, ]
 vi = caldata[51:75, ]
 
 testdata = iris[seq(2, nrow(iris), 2), 1:4]
-testdata.cref = rbind(matrix('Se', 25, 1), matrix('Vi', 25, 1), matrix('Ve', 25, 1))
+testdata.cref = iris[seq(2, nrow(iris), 2), 5]
 
 # create individual models
-semodel = simca(se, classname = 'Se')
+semodel = simca(se, classname = 'setosa')
 semodel = selectCompNum(semodel, 1)
 
-vimodel = simca(vi, classname = 'Vi')
+vimodel = simca(vi, classname = 'virginica')
 vimodel = selectCompNum(vimodel, 1)
 
-vemodel = simca(ve, classname = 'Ve')
+vemodel = simca(ve, classname = 'versicolor')
 vemodel = selectCompNum(vemodel, 1)
 
 # combine models into SIMCAM objects, show statistics and plots

diff --git a/test/test_plsda.R b/test/test_plsda.R
@@ -6,7 +6,27 @@ calset.c = iris[seq(1, nrow(iris), 2), 5]
 testset = iris[seq(2, nrow(iris), 2), 1:4] # test set, 3 classes
 testset.c = iris[seq(2, nrow(iris), 2), 5] # test set, 3 classes
 
-model = plsda(calset, calset.c, ncomp = 3, cv = 1, info = 'IRIS data example')
+cc = as.numeric(calset.c)
+ct = as.numeric(testset.c)
+model = plsda(calset, cc, ncomp = 3, cv = 1, info = 'IRIS data example')
+plot(model)
+readline('Press enter to continue...')
+
+cc = as.numeric(calset.c)
+cc[cc == 1] = 10
+cc[cc == 3] = 30
+ct = as.numeric(testset.c)
+ct[ct == 1] = 10
+ct[ct == 3] = 30
+model = plsda(calset, cc, ncomp = 3, cv = 1, info = 'IRIS data example')
+plot(model)
+readline('Press enter to continue...')
+
+cc = calset.c
+ct = testset.c
+model = plsda(calset, cc, ncomp = 3, cv = 1, info = 'IRIS data example')
+plot(model)
+readline('Press enter to continue...')
 
 res = predict(model, testset, testset.c)
 #res = predict(model, testset[1:50, ], testset.c[1:50, ])

diff --git a/test/test_simca.R b/test/test_simca.R
@@ -8,7 +8,7 @@ ve = calset[calset[, 5] == 'versicolor', 1:4]
 vi = calset[calset[, 5] == 'virginica', 1:4]
 
 test.data = tstset[, 1:4]
-test.c = c(matrix(1, 1, 25), matrix(2, 1, 25), matrix(3, 1, 25))
+test.c = tstset[, 5]
 
 
 # Select which model to calculate
@@ -17,19 +17,19 @@ option = 2
 if (option == 1)
 {
    # Setosa model with cross-validation
-   model = simca(se, 'Se', ncomp = 4, alpha = 0.01, cv = 1)
+   model = simca(se, 'setosa', ncomp = 4, alpha = 0.01, cv = 1)
    model = selectCompNum(model, 1)
-   pred = predict(model, test.data, test.c == 1)   
+   pred = predict(model, test.data, test.c)   
 } else if (option == 2) {  
    # Virginica model with test set validation (no CV)
-   model = simca(vi, 'Vi', x.test = test.data[test.c == 3, ])
+   model = simca(vi, 'virginica', x.test = test.data[test.c == 'virginica', ])
    model = selectCompNum(model, 3)
-   pred = predict(model, test.data, test.c == 3)   
+   pred = predict(model, test.data, test.c)   
 } else {
    # Versicolor model with 5 segments CV and test set
-   model = simca(ve, 'Ve', ncomp = 4, cv = 5, x.test = test.data, c.test = test.c == 2)
+   model = simca(ve, 'versicolor', ncomp = 4, cv = 5, x.test = test.data, c.test = test.c == 'versicolor')
    model = selectCompNum(model, 3)
-   pred = predict(model, test.data, test.c == 2)   
+   pred = predict(model, test.data, test.c)   
 }
 
 cat('1. Show print and summary')

diff --git a/test/test_simcam.R b/test/test_simcam.R
@@ -10,30 +10,30 @@ vi = calset[calset[, 5] == 'virginica', 1:4]
 
 # calibration data with reference
 cal.data = calset[, 1:4]
-cal.c = c(matrix('Se', 1, 25), matrix('Ve', 1, 25), matrix('Vi', 1, 25))
+cal.c = calset[, 5]
 
 # test data with reference
 test.data = tstset[, 1:4]
-test.c = c(matrix(1, 1, 25), matrix(2, 1, 25), matrix(3, 1, 25))
+test.c = tstset[, 5]
 
 # test data with reference for two classes
 test2c.data = tstset2c[, 1:4]
-test2c.c = c(matrix(1, 1, 25), matrix(2, 1, 25))
+test2c.c = tstset2c[, 5]
 
 # test data with reference for with unknown classes
 test2cu.data = tstset2c[, 1:4]
-test2cu.c = c(matrix('Se', 1, 25), matrix('None', 1, 10), matrix('Ar', 1, 15))
+test2cu.c = c(matrix('setosa', 1, 25), matrix('none', 1, 10), matrix('Ar', 1, 15))
 
 # make individual models
-semodel = simca(se, 'Se', ncomp = 4, alpha = 0.01, cv = 1)
+semodel = simca(se, 'setosa', ncomp = 4, alpha = 0.01, cv = 1)
 semodel = selectCompNum(semodel, 1)
 
 # make individual models
-vimodel = simca(vi, 'Vi')
+vimodel = simca(vi, 'virginica')
 vimodel = selectCompNum(vimodel, 3)
 
 # make individual models
-vemodel = simca(ve, 'Ve', ncomp = 4, cv = 5, test.data = test.data, test.c = test.c == 2)
+vemodel = simca(ve, 'versicolor', ncomp = 4, cv = 5, x.test = test.data, c.test = test.c)
 vemodel = selectCompNum(vemodel, 3)
 
 # make group models