v. 0.6.1

svkucheryavski · Jan 22, 2015 · 0c47a9a · 0c47a9a
2 parents e7bf7ba + 21942db
commit 0c47a9a
Show file tree

Hide file tree

Showing 12 changed files with 135 additions and 39 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mdatools
 Title: Multivariate Data Analysis for Chemometrics
-Version: 0.6.0
-Date: 2014-01-19
+Version: 0.6.1
+Date: 2014-01-22
 Author: Sergey Kucheryavskiy 
 Maintainer:  Sergey Kucheryavskiy <[email protected]>
 Description: Package implements projection based methods for preprocessing, exploring and analysis of multivariate data used in chemometrics.

diff --git a/NEWS b/NEWS
@@ -1,3 +1,9 @@
+v.0.6.1
+========
+* fixed a bug led to incorrect calculation of specificity
+* jack-knife confidence intervals now also calculated for PLS-DA models
+* confidence intervals for regression coefficients are shown by default if calculated
+
 v.0.6.0
 ========
 * randomization test for PLS has been added, see `?randtest`

diff --git a/R/classres.R b/R/classres.R
@@ -83,6 +83,8 @@ getClassificationPerformance = function(c.ref, c.pred)
    tp = matrix(0, nrow = nclasses, ncol = ncomp)
    fp = matrix(0, nrow = nclasses, ncol = ncomp)
    fn = matrix(0, nrow = nclasses, ncol = ncomp)
+   tn = matrix(0, nrow = nclasses, ncol = ncomp)
+
    specificity = matrix(0, nrow = nclasses + 1, ncol = ncomp)
    sensitivity = matrix(0, nrow = nclasses + 1, ncol = ncomp)
    misclassified = matrix(0, nrow = nclasses + 1, ncol = ncomp)
@@ -94,14 +96,15 @@ getClassificationPerformance = function(c.ref, c.pred)
       fn[i, ] = colSums((c.ref[, 1] == classnames[i]) & (c.pred[, , i, drop = F] == -1))
       fp[i, ] = colSums((c.ref[, 1] != classnames[i]) & (c.pred[, , i, drop = F] == 1))
       tp[i, ] = colSums((c.ref[, 1] == classnames[i]) & (c.pred[, , i, drop = F] == 1))
-
+      tn[i, ] = colSums((c.ref[, 1] != classnames[i]) & (c.pred[, , i, drop = F] == -1))
+
       sensitivity[i, ] = tp[i, ] / (tp[i, ] + fn[i, ])
-      specificity[i, ] = tp[i, ] / (tp[i, ] + fp[i, ])
+      specificity[i, ] = tn[i, ] / (tn[i, ] + fp[i, ])
       misclassified[i, ] = (fp[i, ] + fn[i, ]) / nobj
    }
 
    sensitivity[nclasses + 1, ] = colSums(tp) / (colSums(tp) + colSums(fn))
-   specificity[nclasses + 1, ] = colSums(tp) / (colSums(tp) + colSums(fp))
+   specificity[nclasses + 1, ] = colSums(tn) / (colSums(tn) + colSums(fp))
    misclassified[nclasses + 1, ] = (colSums(fp) + colSums(fn)) / (nclasses * nobj)
 
    rownames(fn) = rownames(fp) = rownames(tp) = dimnames(c.pred)[[3]]
@@ -112,6 +115,7 @@ getClassificationPerformance = function(c.ref, c.pred)
    obj$fn = fn
    obj$fp = fp
    obj$tp = tp
+   obj$tn = tn
    obj$sensitivity = sensitivity
    obj$specificity = specificity
    obj$misclassified = misclassified
@@ -505,13 +509,14 @@ as.matrix.classres = function(x, ncomp = NULL, nc = 1, ...)
    if (!is.null(obj$c.ref))
    {  
       if (is.null(ncomp))
-         res = cbind(obj$tp[nc, ], obj$fp[nc, ], obj$fn[nc, ], obj$specificity[nc, ], obj$sensitivity[nc, ])
+         res = cbind(obj$tp[nc, ], obj$fp[nc, ], obj$tn[nc, ], obj$fn[nc, ], 
+                     obj$specificity[nc, ], obj$sensitivity[nc, ])
       else
-         res = cbind(obj$tp[nc, ncomp], obj$fp[nc, ncomp], obj$fn[nc, ncomp], 
+         res = cbind(obj$tp[nc, ncomp], obj$fp[nc, ncomp], obj$tn[nc, ncomp], obj$fn[nc, ncomp], 
                      round(obj$specificity[nc, ncomp], 3), round(obj$sensitivity[nc, ncomp], 3))
 
    res[, 4:5] = round(res[, 4:5], 3)
-   colnames(res) = c('TP', 'FP', 'FN', 'Spec', 'Sens')
+   colnames(res) = c('TP', 'FP', 'TN', 'FN', 'Spec', 'Sens')
    }
    else
    {

diff --git a/R/mdaplots.R b/R/mdaplots.R
@@ -462,18 +462,20 @@ mdaplot.showLabels = function(data, pos = 3, cex = 0.65, col = 'darkgray', type
       rownames(data) = 1:nrow(data)
 
    # show labels
-   if (!is.null(type) && type == 'h')
-   {   
-      # show labels properly for bars with positive and negative values
-      neg = data[, 2] < 0
-      if (sum(neg) > 0)
-         text(data[neg, 1], data[neg, 2], rownames(data)[neg], cex = cex, pos = 1, col = col)   
-      if (sum(!neg) > 0)
-         text(data[!neg, 1], data[!neg, 2], rownames(data)[!neg], cex = cex, pos = 3, col = col)   
+   if (!any(is.nan(data)))
+   {
+      if (!is.null(type) && type == 'h')
+      {   
+         # show labels properly for bars with positive and negative values
+         neg = data[, 2] < 0
+         if (sum(neg) > 0)
+            text(data[neg, 1], data[neg, 2], rownames(data)[neg], cex = cex, pos = 1, col = col)   
+         if (sum(!neg) > 0)
+            text(data[!neg, 1], data[!neg, 2], rownames(data)[!neg], cex = cex, pos = 3, col = col)   
+      }
+      else 
+         text(data[, 1], data[, 2], rownames(data), cex = cex, pos = pos, col = col)   
    }
-   else 
-      text(data[, 1], data[, 2], rownames(data), cex = cex, pos = pos, col = col)   
-
 }  
 
 

diff --git a/R/pls.R b/R/pls.R
@@ -37,9 +37,22 @@ pls = function(x, y, ncomp = 15, center = T, scale = F, cv = NULL,
       {   
          cv = 1      
       }   
-      else if (cv < 10)
+      else
       {
-         warning('Number of segments in cross-validation is too small for jack-knifing!')   
+         if (is.list(cv))
+         {   
+            if (length(cv) == 1 && cv[[1]] == 'loo')
+               cvnseg = nrow(y)
+            else   
+               cvnseg = cv[[2]]
+         }   
+         else
+         {
+            cvnseg = cv
+         }   
+
+         if (cvnseg > 1 && cvnseg < 10)
+            warning('Number of segments in cross-validation is too small for jack-knifing!')   
       }   
    }   
 
@@ -71,8 +84,7 @@ pls = function(x, y, ncomp = 15, center = T, scale = F, cv = NULL,
    # do cross-validation if needed
    if (!is.null(cv))
    {   
-      res = pls.crossval(model, x, y, cv, center = center, scale = scale, 
-                                 jack.knife = T)    
+      res = pls.crossval(model, x, y, cv, center = center, scale = scale, jack.knife = jack.knife)    
       if (jack.knife == T)
       {   
          model$coeffs = regcoeffs(model$coeffs$values, res$jkcoeffs, coeffs.alpha)
@@ -1469,13 +1481,15 @@ plotYResiduals.pls = function(obj, ncomp = NULL, ny = 1, type = 'p',
 #' main plot title
 #' @param ylab
 #' label for y axis
+#' @param show.ci
+#' logical, show or not confidence intervals if they are available
 #' @param ...
 #' other plot parameters (see \code{mdaplotg} for details)
 #'
 #' @details
 #' See examples in help for \code{\link{pls}} function.
 #' 
-plotRegcoeffs.pls = function(obj, ncomp = NULL, ny = 1, main = NULL, ylab = NULL, ...)
+plotRegcoeffs.pls = function(obj, ncomp = NULL, ny = 1, main = NULL, ylab = NULL, show.ci = T, ...)
 {
    if (is.null(main))
    {   
@@ -1498,7 +1512,7 @@ plotRegcoeffs.pls = function(obj, ncomp = NULL, ny = 1, main = NULL, ylab = NULL
    else if (ncomp <= 0 || ncomp > obj$ncomp) 
       stop('Wrong value for number of components!')
 
-   plot(obj$coeffs, ncomp = ncomp, ny = ny, main = main, ylab = ylab, ...)
+   plot(obj$coeffs, ncomp = ncomp, ny = ny, main = main, ylab = ylab, show.ci = show.ci, ...)
 }
 
 #' X loadings plot for PLS

diff --git a/R/plsda.R b/R/plsda.R
@@ -1,7 +1,8 @@
 # class and methods for Partial Least Squares Discriminant Analysis #
 
 plsda = function(x, c, ncomp = 15, center = T, scale = F, cv = NULL, 
-               x.test = NULL, c.test = NULL, method = 'simpls', alpha = 0.05, info = '')
+               x.test = NULL, c.test = NULL, method = 'simpls', alpha = 0.05, 
+               coeffs.ci = NULL, coeffs.alpha = 0.1, info = '')
 {
    # Calibrate and validate a PLS-DA model.
    #
@@ -25,8 +26,53 @@ plsda = function(x, c, ncomp = 15, center = T, scale = F, cv = NULL,
    c = as.matrix(c)
    y = plsda.getReferenceValues(c)
 
+   # set LOO cross-validation if jack.knife is selected
+   jack.knife = F
+   if (!is.null(coeffs.ci) && coeffs.ci == 'jk')
+   {
+      jack.knife = T
+      if (is.null(cv))
+      {   
+         cv = 1      
+      }   
+      else
+      {
+         if (is.list(cv))
+         {   
+            if (length(cv) == 1 && cv[[1]] == 'loo')
+               cvnseg = nrow(y)
+            else   
+               cvnseg = cv[[2]]
+         }   
+         else
+         {
+            cvnseg = cv
+         }   
+
+         if (cvnseg > 1 && cvnseg < 10)
+            warning('Number of segments in cross-validation is too small for jack-knifing!')   
+      }   
+   }   
+
    # correct maximum number of components
-   ncomp = min(ncol(x), nrow(x) - 1, ncomp)
+   if (!is.null(cv))
+   {
+      if (!is.numeric(cv))
+         nseg = cv[[2]]
+      else
+         nseg = cv
+
+      if (nseg == 1)
+         nobj.cv = 1
+      else
+         nobj.cv = ceiling(nrow(x)/nseg)  
+   }
+   else
+   {   
+      nobj.cv = 0
+   }
+
+   ncomp = min(ncol(x), nrow(x) - 1 - nobj.cv, ncomp)
 
    # build a model and apply to calibration set
    model = pls.cal(x, y, ncomp, center = center, scale = scale, method = method)
@@ -38,9 +84,21 @@ plsda = function(x, c, ncomp = 15, center = T, scale = F, cv = NULL,
    model = selectCompNum.pls(model, ncomp)
 
    # do cross-validation if needed
-   if (!is.null(cv))
-      model$cvres = plsda.crossval(model, x, c, cv, center = center, scale = scale)    
-
+   if (!is.null(cv)) 
+   {
+      res = plsda.crossval(model, x, c, cv, center = center, scale = scale, jack.knife = jack.knife)    
+      if (jack.knife == TRUE)
+      {   
+         model$coeffs = regcoeffs(model$coeffs$values, res$jkcoeffs, coeffs.alpha)
+         res[['jkcoeffs']] = NULL
+         model$cvres = res
+      }
+      else
+      {
+         model$cvres = res;
+      }   
+
+   }
    # do test set validation if provided
    if (!is.null(x.test) && !is.null(c.test))
    {
@@ -167,14 +225,16 @@ classify.plsda = function(model, y, c.ref = NULL)
 #' logical, do mean centering or not
 #' @param scale
 #' logical, do standardization or not
+#' @param jack.knife
+#' logical, do jack-knifing or not
 #'
 #' @return
 #' object of class \code{plsdares} with results of cross-validation
 #'  
-plsda.crossval = function(model, x, c, cv, center = T, scale = F)
+plsda.crossval = function(model, x, c, cv, center = T, scale = F, jack.knife = T)
 {
    y = plsda.getReferenceValues(c, model$classnames)
-   plsres = pls.crossval(model, x, y, cv, center, scale)
+   plsres = pls.crossval(model, x, y, cv, center, scale, jack.knife)
    cres = classify.plsda(model, plsres$y.pred, c)
 
    res = plsdares(plsres, cres)

diff --git a/R/regcoeffs.R b/R/regcoeffs.R
@@ -176,7 +176,7 @@ print.regcoeffs = function(x, ncomp = 1, ny = 1, digits = 3, ...)
 plot.regcoeffs = function(x, ncomp = 1, ny = 1, type = NULL, col = NULL, 
                           main = 'Regression coefficients',
                           xlab = 'Variables', ylab = 'Coefficients', show.line = T, 
-                          show.ci = F, ...)
+                          show.ci = T, ...)
 {
    obj = x
 

diff --git a/man/plot.regcoeffs.Rd b/man/plot.regcoeffs.Rd
@@ -6,7 +6,7 @@
 \usage{
 \method{plot}{regcoeffs}(x, ncomp = 1, ny = 1, type = NULL, col = NULL,
   main = "Regression coefficients", xlab = "Variables",
-  ylab = "Coefficients", show.line = T, show.ci = F, ...)
+  ylab = "Coefficients", show.line = T, show.ci = T, ...)
 }
 \arguments{
 \item{x}{regression coefficients object (class \code{regcoeffs})}

diff --git a/man/plotRegcoeffs.pls.Rd b/man/plotRegcoeffs.pls.Rd
@@ -5,7 +5,7 @@
 \title{Regression coefficient plot for PLS}
 \usage{
 \method{plotRegcoeffs}{pls}(obj, ncomp = NULL, ny = 1, main = NULL,
-  ylab = NULL, ...)
+  ylab = NULL, show.ci = T, ...)
 }
 \arguments{
 \item{obj}{a PLS model (object of class \code{pls})}
@@ -18,6 +18,8 @@
 
 \item{ylab}{label for y axis}
 
+\item{show.ci}{logical, show or not confidence intervals if they are available}
+
 \item{...}{other plot parameters (see \code{mdaplotg} for details)}
 }
 \description{

diff --git a/man/pls.Rd b/man/pls.Rd
@@ -24,8 +24,7 @@
   \item{method }{method for calculating PLS model.}
   \item{alpha }{significance level for calculating statistical limits for residuals.}
   \item{coeffs.ci }{method to calculate p-values and confidence intervals for regression coefficients (so far only jack-knifing is availavle: \code{='jk'}).}
-  \item{coeffs.alpha }{significance level for calculating confidence intervals for
-  regression coefficients.}
+  \item{coeffs.alpha }{significance level for calculating confidence intervals for regression coefficients.}
   \item{info }{short text with information about the model.}
 }
 

diff --git a/man/plsda.Rd b/man/plsda.Rd
@@ -4,7 +4,8 @@
 
 \usage{
   plsda(x, c, ncomp = 15, center = T, scale = F, cv = NULL,
-    x.test = NULL, c.test = NULL, method = 'simpls', alpha = 0.05, info = '')
+    x.test = NULL, c.test = NULL, method = 'simpls', alpha = 0.05, 
+    coeffs.ci = NULL, coeffs.alpha = 0.1, info = '')
 }
 
 \description{
@@ -22,6 +23,8 @@
   \item{c.test }{vector with reference class values for test set (same format as calibration values).}
   \item{method }{method for calculating PLS model.}
   \item{alpha }{significance level for calculating statistical limits for residuals.}  
+  \item{coeffs.ci }{method to calculate p-values and confidence intervals for regression coefficients (so far only jack-knifing is availavle: \code{='jk'}).}
+  \item{coeffs.alpha }{significance level for calculating confidence intervals for regression coefficients.}
   \item{info }{short text with information about the model.}
 }
 
@@ -43,6 +46,9 @@ Returns an object of \code{plsda} class with following fields (most inherited fr
 The \code{plsda} class is based on \code{pls} with extra functions and plots covering classification functionality. All plots for \code{pls}
 can be used. E.g. of you want to see the real predicted values (y in PLS) instead of classes use \code{plotPredictions.pls(model)} instead
 of \code{plotPredictions(model)}.
+
+Calculation of confidence intervals and p-values for regression coefficients are available
+only by jack-knifing so far. See help for \code{\link{regcoeffs}} objects for details.
 }
 
 \seealso{

diff --git a/man/plsda.crossval.Rd b/man/plsda.crossval.Rd
@@ -4,7 +4,7 @@
 \alias{plsda.crossval}
 \title{Cross-validation of a PLS-DA model}
 \usage{
-plsda.crossval(model, x, c, cv, center = T, scale = F)
+plsda.crossval(model, x, c, cv, center = T, scale = F, jack.knife = T)
 }
 \arguments{
 \item{model}{a PLS-DA model (object of class \code{plsda})}
@@ -18,6 +18,8 @@ plsda.crossval(model, x, c, cv, center = T, scale = F)
 \item{center}{logical, do mean centering or not}
 
 \item{scale}{logical, do standardization or not}
+
+\item{jack.knife}{logical, do jack-knifing or not}
 }
 \value{
 object of class \code{plsdares} with results of cross-validation