From 8c29a2acc6af22b0e8014bcf650f8962e99dbf70 Mon Sep 17 00:00:00 2001
From: Sergey Kucheryavskiy <svkucheryavski@gmail.com>
Date: Mon, 14 Apr 2014 22:08:42 +0200
Subject: [PATCH] v. 0.5.2

---
 DESCRIPTION           |  6 +++---
 NEWS                  |  5 +++++
 R/classmodel.R        |  2 +-
 R/classres.R          | 18 +++++-------------
 R/simca.R             | 14 ++++----------
 R/simcam.R            |  2 +-
 README.md             |  8 +++++---
 man/pls.Rd            |  1 +
 man/predict.simca.Rd  |  3 ++-
 man/predict.simcam.Rd |  3 ++-
 man/simca.Rd          |  4 ++--
 man/simcam.Rd         |  8 ++++----
 test/test_plsda.R     | 22 +++++++++++++++++++++-
 test/test_simca.R     | 14 +++++++-------
 test/test_simcam.R    | 14 +++++++-------
 15 files changed, 70 insertions(+), 54 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 14f5c81..98c911f 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,10 +1,10 @@
 Package: mdatools
 Title: Multivariate data analysis for chemometrics
-Version: 0.5.1
-Date: 2014-04-13
+Version: 0.5.2
+Date: 2014-04-15
 Author: Sergey Kucheryavskiy 
 Maintainer:  Sergey Kucheryavskiy <svkucheryavski@gmail.com>
 Description: The package implements projection based methods for preprocessing, exploring and analysis of multivariate data used in chemometrics.
 Suggests: 
-Depends: methods
+Depends: 
 License: GPL-3
diff --git a/NEWS b/NEWS
index d7d8c67..a0d87c0 100755
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,8 @@
+v. 0.5.2
+========
+* fixed bug for computing classification performance for numeric class names
+* improvements to SIMCA implementation
+
 v. 0.5.1
 ========
 * added more details to documentation
diff --git a/R/classmodel.R b/R/classmodel.R
index 59636bc..f698865 100755
--- a/R/classmodel.R
+++ b/R/classmodel.R
@@ -165,7 +165,7 @@ plotPerformance.classmodel = function(obj, nc = NULL, param = 'specificity', typ
 
       classname = sprintf('(%s)', obj$classnames[nc])
    }
-
+   
    data = cbind(1:obj$ncomp, obj$calres[[param]][nc, ])
    labels = matrix(mdaplot.formatValues(obj$calres[[param]][nc, ]), ncol = 1)
    legend_str = 'cal'
diff --git a/R/classres.R b/R/classres.R
index 5d94cc3..bd155a0 100755
--- a/R/classres.R
+++ b/R/classres.R
@@ -87,21 +87,13 @@ getClassificationPerformance = function(c.ref, c.pred)
    sensitivity = matrix(0, nrow = nclasses + 1, ncol = ncomp)
    misclassified = matrix(0, nrow = nclasses + 1, ncol = ncomp)
 
+   classnames = dimnames(c.pred)[[3]]
+   
    for (i in 1:nclasses)         
    {   
-      if (is.numeric(c.ref))
-      {   
-         fn[i, ] = colSums((c.ref[, 1] == i) & (c.pred[, , i, drop = F] == -1))
-         fp[i, ] = colSums((c.ref[, 1] != i) & (c.pred[, , i, drop = F] == 1))
-         tp[i, ] = colSums((c.ref[, 1] == i) & (c.pred[, , i, drop = F] == 1))
-      }
-      else
-      {
-         cname = dimnames(c.pred)[[3]][i]
-         fn[i, ] = colSums((c.ref[, 1] == cname) & (c.pred[, , i, drop = F] == -1))
-         fp[i, ] = colSums((c.ref[, 1] != cname) & (c.pred[, , i, drop = F] == 1))
-         tp[i, ] = colSums((c.ref[, 1] == cname) & (c.pred[, , i, drop = F] == 1))         
-      }   
+      fn[i, ] = colSums((c.ref[, 1] == classnames[i]) & (c.pred[, , i, drop = F] == -1))
+      fp[i, ] = colSums((c.ref[, 1] != classnames[i]) & (c.pred[, , i, drop = F] == 1))
+      tp[i, ] = colSums((c.ref[, 1] == classnames[i]) & (c.pred[, , i, drop = F] == 1))
 
       sensitivity[i, ] = tp[i, ] / (tp[i, ] + fn[i, ])
       specificity[i, ] = tp[i, ] / (tp[i, ] + fp[i, ])
diff --git a/R/simca.R b/R/simca.R
index e0f0f8d..b4ee8ba 100755
--- a/R/simca.R
+++ b/R/simca.R
@@ -57,7 +57,7 @@ simca = function(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.t
    class(model) = c("simca", "classmodel", "pca")
    
    # apply model to calibration set
-   model$calres = predict.simca(model, x, c.ref = rep(1, nrow(x)))
+   model$calres = predict.simca(model, x, c.ref = rep(classname, nrow(x)))
    model$modpower = model$calres$modpower
    
    # do cross-validation if needed
@@ -68,7 +68,7 @@ simca = function(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.t
    if (!is.null(x.test))
    {
       if (is.null(c.test))
-         c.test = matrix(T, nrow(x.test), 1)
+         c.test = rep(classname, nrow(x.test))
       
       model$testres = predict.simca(model, x.test, c.ref = c.test)
    }
@@ -86,7 +86,7 @@ simca = function(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.t
 #' @param x
 #' a matrix with x values (predictors)
 #' @param c.ref
-#' a vector with reference class values
+#' a vector with reference class names (same as class names for models)
 #' @param cv
 #' logical, are predictions for cross-validation or not
 #' @param ...
@@ -117,12 +117,6 @@ predict.simca = function(object, x, c.ref = NULL, cv = F, ...)
    # check c.ref values and add dimnames
    if (!is.null(c.ref))
    {   
-      if (is.character(c.ref))
-         c.ref = c.ref == object$classname
-      
-      if (is.logical(c.ref))
-         c.ref = c.ref * 2 - 1
-      
       c.ref = as.matrix(c.ref)
       rownames(c.ref) = rownames(x)
       colnames(c.ref) = object$classname
@@ -200,7 +194,7 @@ simca.crossval = function(model, x, cv, center = T, scale = F)
    Q2 = matrix(0, ncol = ncomp, nrow = nobj)   
    T2 = matrix(0, ncol = ncomp, nrow = nobj)   
    c.pred = array(0, dim = c(nobj, ncomp, 1))
-   c.ref = matrix(1, ncol = 1, nrow = nobj)
+   c.ref = matrix(model$classname, ncol = 1, nrow = nobj)
    
    # loop over segments
    for (i in 1:nrow(idx))
diff --git a/R/simcam.R b/R/simcam.R
index 62593ad..b9f8755 100755
--- a/R/simcam.R
+++ b/R/simcam.R
@@ -50,7 +50,7 @@ simcam = function(models, info = '')
 #' @param x
 #' a matrix with x values (predictors)
 #' @param c.ref
-#' a vector with reference class values
+#' a vector with reference class names (same as class names in models)
 #' @param cv
 #' logical, are predictions for cross-validation or not
 #' @param ...
diff --git a/README.md b/README.md
index 3bbe329..23e6d30 100755
--- a/README.md
+++ b/README.md
@@ -11,13 +11,15 @@ For more details read a [short wiki tutorial](https://github.com/svkucheryavski/
 How to install
 --------------
 
-The package is in beta testing and therefore is not yet available from CRAN. The easiest way to use the package is to 
+The package is available from CRAN with usual installing procedure.
+
+It can be also installed from sources, just  
 [download](https://github.com/svkucheryavski/mdatools/releases) a source package archive from GitHub and install it using 
-the `install.packages` command, e.g. if the downloaded file is `mdatools_0.5.1.tar.gz` and it is located in a current 
+the `install.packages` command, e.g. if the downloaded file is `mdatools_0.5.2.tar.gz` and it is located in a current 
 working directory, just run the following:
 
 ```
-install.packages('mdatools_0.5.1.tar.gz')
+install.packages('mdatools_0.5.2.tar.gz')
 ```
 
 If you have `devtools` package installed, the following command will install the latest release from the GitHub (do not forget to load the `devtools` package first):
diff --git a/man/pls.Rd b/man/pls.Rd
index 25e6630..a0c66c4 100755
--- a/man/pls.Rd
+++ b/man/pls.Rd
@@ -91,6 +91,7 @@ Methods for \code{pls} objects:
       \code{\link{plotYResiduals.pls}} \tab shows residuals plot for y values.\cr
       \code{\link{getSelectivityRatio.pls}} \tab returns vector with selectivity ratio values.\cr
       \code{\link{plotSelectivityRatio.pls}} \tab shows plot with selectivity ratio values.\cr
+      \code{\link{plotVIPScores.pls}} \tab shows plot with VIP scores values.\cr
    }
    
    Most of the methods for plotting data (except loadings and regression coefficients) are also available for PLS results 
diff --git a/man/predict.simca.Rd b/man/predict.simca.Rd
index d5688ed..62e28aa 100644
--- a/man/predict.simca.Rd
+++ b/man/predict.simca.Rd
@@ -10,7 +10,8 @@
 
   \item{x}{a matrix with x values (predictors)}
 
-  \item{c.ref}{a vector with reference class values}
+  \item{c.ref}{a vector with reference class names (same as
+  class names for models)}
 
   \item{cv}{logical, are predictions for cross-validation
   or not}
diff --git a/man/predict.simcam.Rd b/man/predict.simcam.Rd
index 0b37e26..df0f8be 100644
--- a/man/predict.simcam.Rd
+++ b/man/predict.simcam.Rd
@@ -10,7 +10,8 @@
 
   \item{x}{a matrix with x values (predictors)}
 
-  \item{c.ref}{a vector with reference class values}
+  \item{c.ref}{a vector with reference class names (same as
+  class names in models)}
 
   \item{cv}{logical, are predictions for cross-validation
   or not}
diff --git a/man/simca.Rd b/man/simca.Rd
index 87526f7..81eea48 100755
--- a/man/simca.Rd
+++ b/man/simca.Rd
@@ -20,7 +20,7 @@ simca(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.test = NULL,
   \item{scale}{logical, do sdandardization of data or not.}
   \item{cv}{number of segments for random cross-validation (1 for full cross-validation).}
   \item{x.test}{a numerical matrix with test data.}
-  \item{c.test}{a vector with logical values for classes of test data objects.}
+  \item{c.test}{a vector with text values (names of classes) of test data objects.}
   \item{alpha}{significance level for calculating limit for T2 and Q2 residuals.}
   \item{method}{method to compute principal components.}
   \item{info}{text with information about the model}
@@ -98,7 +98,7 @@ class = iris[, 5]
 se = data[1:20, ]
 
 # make SIMCA model and apply to test set
-model = simca(se, 'Se', cv = 1)
+model = simca(se, 'setosa', cv = 1)
 model = selectCompNum(model, 1)
 
 # show infromation, summary and plot overview
diff --git a/man/simcam.Rd b/man/simcam.Rd
index 6e16420..b40e1f1 100755
--- a/man/simcam.Rd
+++ b/man/simcam.Rd
@@ -79,16 +79,16 @@ ve = caldata[26:50, ]
 vi = caldata[51:75, ]
 
 testdata = iris[seq(2, nrow(iris), 2), 1:4]
-testdata.cref = rbind(matrix('Se', 25, 1), matrix('Vi', 25, 1), matrix('Ve', 25, 1))
+testdata.cref = iris[seq(2, nrow(iris), 2), 5]
 
 # create individual models
-semodel = simca(se, classname = 'Se')
+semodel = simca(se, classname = 'setosa')
 semodel = selectCompNum(semodel, 1)
 
-vimodel = simca(vi, classname = 'Vi')
+vimodel = simca(vi, classname = 'virginica')
 vimodel = selectCompNum(vimodel, 1)
 
-vemodel = simca(ve, classname = 'Ve')
+vemodel = simca(ve, classname = 'versicolor')
 vemodel = selectCompNum(vemodel, 1)
 
 # combine models into SIMCAM objects, show statistics and plots
diff --git a/test/test_plsda.R b/test/test_plsda.R
index 161ccaa..d0da16b 100755
--- a/test/test_plsda.R
+++ b/test/test_plsda.R
@@ -6,7 +6,27 @@ calset.c = iris[seq(1, nrow(iris), 2), 5]
 testset = iris[seq(2, nrow(iris), 2), 1:4] # test set, 3 classes
 testset.c = iris[seq(2, nrow(iris), 2), 5] # test set, 3 classes
 
-model = plsda(calset, calset.c, ncomp = 3, cv = 1, info = 'IRIS data example')
+cc = as.numeric(calset.c)
+ct = as.numeric(testset.c)
+model = plsda(calset, cc, ncomp = 3, cv = 1, info = 'IRIS data example')
+plot(model)
+readline('Press enter to continue...')
+
+cc = as.numeric(calset.c)
+cc[cc == 1] = 10
+cc[cc == 3] = 30
+ct = as.numeric(testset.c)
+ct[ct == 1] = 10
+ct[ct == 3] = 30
+model = plsda(calset, cc, ncomp = 3, cv = 1, info = 'IRIS data example')
+plot(model)
+readline('Press enter to continue...')
+
+cc = calset.c
+ct = testset.c
+model = plsda(calset, cc, ncomp = 3, cv = 1, info = 'IRIS data example')
+plot(model)
+readline('Press enter to continue...')
 
 res = predict(model, testset, testset.c)
 #res = predict(model, testset[1:50, ], testset.c[1:50, ])
diff --git a/test/test_simca.R b/test/test_simca.R
index 6c06173..deb5458 100755
--- a/test/test_simca.R
+++ b/test/test_simca.R
@@ -8,7 +8,7 @@ ve = calset[calset[, 5] == 'versicolor', 1:4]
 vi = calset[calset[, 5] == 'virginica', 1:4]
 
 test.data = tstset[, 1:4]
-test.c = c(matrix(1, 1, 25), matrix(2, 1, 25), matrix(3, 1, 25))
+test.c = tstset[, 5]
 
 
 # Select which model to calculate
@@ -17,19 +17,19 @@ option = 2
 if (option == 1)
 {
    # Setosa model with cross-validation
-   model = simca(se, 'Se', ncomp = 4, alpha = 0.01, cv = 1)
+   model = simca(se, 'setosa', ncomp = 4, alpha = 0.01, cv = 1)
    model = selectCompNum(model, 1)
-   pred = predict(model, test.data, test.c == 1)   
+   pred = predict(model, test.data, test.c)   
 } else if (option == 2) {  
    # Virginica model with test set validation (no CV)
-   model = simca(vi, 'Vi', x.test = test.data[test.c == 3, ])
+   model = simca(vi, 'virginica', x.test = test.data[test.c == 'virginica', ])
    model = selectCompNum(model, 3)
-   pred = predict(model, test.data, test.c == 3)   
+   pred = predict(model, test.data, test.c)   
 } else {
    # Versicolor model with 5 segments CV and test set
-   model = simca(ve, 'Ve', ncomp = 4, cv = 5, x.test = test.data, c.test = test.c == 2)
+   model = simca(ve, 'versicolor', ncomp = 4, cv = 5, x.test = test.data, c.test = test.c == 'versicolor')
    model = selectCompNum(model, 3)
-   pred = predict(model, test.data, test.c == 2)   
+   pred = predict(model, test.data, test.c)   
 }
 
 cat('1. Show print and summary')
diff --git a/test/test_simcam.R b/test/test_simcam.R
index 29ddf4c..c955853 100755
--- a/test/test_simcam.R
+++ b/test/test_simcam.R
@@ -10,30 +10,30 @@ vi = calset[calset[, 5] == 'virginica', 1:4]
 
 # calibration data with reference
 cal.data = calset[, 1:4]
-cal.c = c(matrix('Se', 1, 25), matrix('Ve', 1, 25), matrix('Vi', 1, 25))
+cal.c = calset[, 5]
 
 # test data with reference
 test.data = tstset[, 1:4]
-test.c = c(matrix(1, 1, 25), matrix(2, 1, 25), matrix(3, 1, 25))
+test.c = tstset[, 5]
 
 # test data with reference for two classes
 test2c.data = tstset2c[, 1:4]
-test2c.c = c(matrix(1, 1, 25), matrix(2, 1, 25))
+test2c.c = tstset2c[, 5]
 
 # test data with reference for with unknown classes
 test2cu.data = tstset2c[, 1:4]
-test2cu.c = c(matrix('Se', 1, 25), matrix('None', 1, 10), matrix('Ar', 1, 15))
+test2cu.c = c(matrix('setosa', 1, 25), matrix('none', 1, 10), matrix('Ar', 1, 15))
 
 # make individual models
-semodel = simca(se, 'Se', ncomp = 4, alpha = 0.01, cv = 1)
+semodel = simca(se, 'setosa', ncomp = 4, alpha = 0.01, cv = 1)
 semodel = selectCompNum(semodel, 1)
 
 # make individual models
-vimodel = simca(vi, 'Vi')
+vimodel = simca(vi, 'virginica')
 vimodel = selectCompNum(vimodel, 3)
 
 # make individual models
-vemodel = simca(ve, 'Ve', ncomp = 4, cv = 5, test.data = test.data, test.c = test.c == 2)
+vemodel = simca(ve, 'versicolor', ncomp = 4, cv = 5, x.test = test.data, c.test = test.c)
 vemodel = selectCompNum(vemodel, 3)
 
 # make group models