Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
svkucheryavski committed Apr 14, 2014
2 parents 8b1bc2d + 8c29a2a commit d54e8ba
Show file tree
Hide file tree
Showing 15 changed files with 70 additions and 54 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Package: mdatools
Title: Multivariate data analysis for chemometrics
Version: 0.5.1
Date: 2014-04-13
Version: 0.5.2
Date: 2014-04-15
Author: Sergey Kucheryavskiy
Maintainer: Sergey Kucheryavskiy <[email protected]>
Description: The package implements projection based methods for preprocessing, exploring and analysis of multivariate data used in chemometrics.
Suggests:
Depends: methods
Depends:
License: GPL-3
5 changes: 5 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
v. 0.5.2
========
* fixed bug for computing classification performance for numeric class names
* improvements to SIMCA implementation

v. 0.5.1
========
* added more details to documentation
Expand Down
2 changes: 1 addition & 1 deletion R/classmodel.R
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ plotPerformance.classmodel = function(obj, nc = NULL, param = 'specificity', typ

classname = sprintf('(%s)', obj$classnames[nc])
}

data = cbind(1:obj$ncomp, obj$calres[[param]][nc, ])
labels = matrix(mdaplot.formatValues(obj$calres[[param]][nc, ]), ncol = 1)
legend_str = 'cal'
Expand Down
18 changes: 5 additions & 13 deletions R/classres.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,21 +87,13 @@ getClassificationPerformance = function(c.ref, c.pred)
sensitivity = matrix(0, nrow = nclasses + 1, ncol = ncomp)
misclassified = matrix(0, nrow = nclasses + 1, ncol = ncomp)

classnames = dimnames(c.pred)[[3]]

for (i in 1:nclasses)
{
if (is.numeric(c.ref))
{
fn[i, ] = colSums((c.ref[, 1] == i) & (c.pred[, , i, drop = F] == -1))
fp[i, ] = colSums((c.ref[, 1] != i) & (c.pred[, , i, drop = F] == 1))
tp[i, ] = colSums((c.ref[, 1] == i) & (c.pred[, , i, drop = F] == 1))
}
else
{
cname = dimnames(c.pred)[[3]][i]
fn[i, ] = colSums((c.ref[, 1] == cname) & (c.pred[, , i, drop = F] == -1))
fp[i, ] = colSums((c.ref[, 1] != cname) & (c.pred[, , i, drop = F] == 1))
tp[i, ] = colSums((c.ref[, 1] == cname) & (c.pred[, , i, drop = F] == 1))
}
fn[i, ] = colSums((c.ref[, 1] == classnames[i]) & (c.pred[, , i, drop = F] == -1))
fp[i, ] = colSums((c.ref[, 1] != classnames[i]) & (c.pred[, , i, drop = F] == 1))
tp[i, ] = colSums((c.ref[, 1] == classnames[i]) & (c.pred[, , i, drop = F] == 1))

sensitivity[i, ] = tp[i, ] / (tp[i, ] + fn[i, ])
specificity[i, ] = tp[i, ] / (tp[i, ] + fp[i, ])
Expand Down
14 changes: 4 additions & 10 deletions R/simca.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ simca = function(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.t
class(model) = c("simca", "classmodel", "pca")

# apply model to calibration set
model$calres = predict.simca(model, x, c.ref = rep(1, nrow(x)))
model$calres = predict.simca(model, x, c.ref = rep(classname, nrow(x)))
model$modpower = model$calres$modpower

# do cross-validation if needed
Expand All @@ -68,7 +68,7 @@ simca = function(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.t
if (!is.null(x.test))
{
if (is.null(c.test))
c.test = matrix(T, nrow(x.test), 1)
c.test = rep(classname, nrow(x.test))

model$testres = predict.simca(model, x.test, c.ref = c.test)
}
Expand All @@ -86,7 +86,7 @@ simca = function(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.t
#' @param x
#' a matrix with x values (predictors)
#' @param c.ref
#' a vector with reference class values
#' a vector with reference class names (same as class names for models)
#' @param cv
#' logical, are predictions for cross-validation or not
#' @param ...
Expand Down Expand Up @@ -117,12 +117,6 @@ predict.simca = function(object, x, c.ref = NULL, cv = F, ...)
# check c.ref values and add dimnames
if (!is.null(c.ref))
{
if (is.character(c.ref))
c.ref = c.ref == object$classname

if (is.logical(c.ref))
c.ref = c.ref * 2 - 1

c.ref = as.matrix(c.ref)
rownames(c.ref) = rownames(x)
colnames(c.ref) = object$classname
Expand Down Expand Up @@ -200,7 +194,7 @@ simca.crossval = function(model, x, cv, center = T, scale = F)
Q2 = matrix(0, ncol = ncomp, nrow = nobj)
T2 = matrix(0, ncol = ncomp, nrow = nobj)
c.pred = array(0, dim = c(nobj, ncomp, 1))
c.ref = matrix(1, ncol = 1, nrow = nobj)
c.ref = matrix(model$classname, ncol = 1, nrow = nobj)

# loop over segments
for (i in 1:nrow(idx))
Expand Down
2 changes: 1 addition & 1 deletion R/simcam.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ simcam = function(models, info = '')
#' @param x
#' a matrix with x values (predictors)
#' @param c.ref
#' a vector with reference class values
#' a vector with reference class names (same as class names in models)
#' @param cv
#' logical, are predictions for cross-validation or not
#' @param ...
Expand Down
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ For more details read a [short wiki tutorial](https://github.com/svkucheryavski/
How to install
--------------

The package is in beta testing and therefore is not yet available from CRAN. The easiest way to use the package is to
The package is available from CRAN with usual installing procedure.

It can be also installed from sources, just
[download](https://github.com/svkucheryavski/mdatools/releases) a source package archive from GitHub and install it using
the `install.packages` command, e.g. if the downloaded file is `mdatools_0.5.1.tar.gz` and it is located in a current
the `install.packages` command, e.g. if the downloaded file is `mdatools_0.5.2.tar.gz` and it is located in a current
working directory, just run the following:

```
install.packages('mdatools_0.5.1.tar.gz')
install.packages('mdatools_0.5.2.tar.gz')
```

If you have `devtools` package installed, the following command will install the latest release from the GitHub (do not forget to load the `devtools` package first):
Expand Down
1 change: 1 addition & 0 deletions man/pls.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ Methods for \code{pls} objects:
\code{\link{plotYResiduals.pls}} \tab shows residuals plot for y values.\cr
\code{\link{getSelectivityRatio.pls}} \tab returns vector with selectivity ratio values.\cr
\code{\link{plotSelectivityRatio.pls}} \tab shows plot with selectivity ratio values.\cr
\code{\link{plotVIPScores.pls}} \tab shows plot with VIP scores values.\cr
}

Most of the methods for plotting data (except loadings and regression coefficients) are also available for PLS results
Expand Down
3 changes: 2 additions & 1 deletion man/predict.simca.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

\item{x}{a matrix with x values (predictors)}

\item{c.ref}{a vector with reference class values}
\item{c.ref}{a vector with reference class names (same as
class names for models)}

\item{cv}{logical, are predictions for cross-validation
or not}
Expand Down
3 changes: 2 additions & 1 deletion man/predict.simcam.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

\item{x}{a matrix with x values (predictors)}

\item{c.ref}{a vector with reference class values}
\item{c.ref}{a vector with reference class names (same as
class names in models)}

\item{cv}{logical, are predictions for cross-validation
or not}
Expand Down
4 changes: 2 additions & 2 deletions man/simca.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ simca(x, classname, ncomp = 15, center = T, scale = F, cv = NULL, x.test = NULL,
\item{scale}{logical, do sdandardization of data or not.}
\item{cv}{number of segments for random cross-validation (1 for full cross-validation).}
\item{x.test}{a numerical matrix with test data.}
\item{c.test}{a vector with logical values for classes of test data objects.}
\item{c.test}{a vector with text values (names of classes) of test data objects.}
\item{alpha}{significance level for calculating limit for T2 and Q2 residuals.}
\item{method}{method to compute principal components.}
\item{info}{text with information about the model}
Expand Down Expand Up @@ -98,7 +98,7 @@ class = iris[, 5]
se = data[1:20, ]

# make SIMCA model and apply to test set
model = simca(se, 'Se', cv = 1)
model = simca(se, 'setosa', cv = 1)
model = selectCompNum(model, 1)

# show infromation, summary and plot overview
Expand Down
8 changes: 4 additions & 4 deletions man/simcam.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,16 @@ ve = caldata[26:50, ]
vi = caldata[51:75, ]

testdata = iris[seq(2, nrow(iris), 2), 1:4]
testdata.cref = rbind(matrix('Se', 25, 1), matrix('Vi', 25, 1), matrix('Ve', 25, 1))
testdata.cref = iris[seq(2, nrow(iris), 2), 5]

# create individual models
semodel = simca(se, classname = 'Se')
semodel = simca(se, classname = 'setosa')
semodel = selectCompNum(semodel, 1)

vimodel = simca(vi, classname = 'Vi')
vimodel = simca(vi, classname = 'virginica')
vimodel = selectCompNum(vimodel, 1)

vemodel = simca(ve, classname = 'Ve')
vemodel = simca(ve, classname = 'versicolor')
vemodel = selectCompNum(vemodel, 1)

# combine models into SIMCAM objects, show statistics and plots
Expand Down
22 changes: 21 additions & 1 deletion test/test_plsda.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,27 @@ calset.c = iris[seq(1, nrow(iris), 2), 5]
testset = iris[seq(2, nrow(iris), 2), 1:4] # test set, 3 classes
testset.c = iris[seq(2, nrow(iris), 2), 5] # test set, 3 classes

model = plsda(calset, calset.c, ncomp = 3, cv = 1, info = 'IRIS data example')
cc = as.numeric(calset.c)
ct = as.numeric(testset.c)
model = plsda(calset, cc, ncomp = 3, cv = 1, info = 'IRIS data example')
plot(model)
readline('Press enter to continue...')

cc = as.numeric(calset.c)
cc[cc == 1] = 10
cc[cc == 3] = 30
ct = as.numeric(testset.c)
ct[ct == 1] = 10
ct[ct == 3] = 30
model = plsda(calset, cc, ncomp = 3, cv = 1, info = 'IRIS data example')
plot(model)
readline('Press enter to continue...')

cc = calset.c
ct = testset.c
model = plsda(calset, cc, ncomp = 3, cv = 1, info = 'IRIS data example')
plot(model)
readline('Press enter to continue...')

res = predict(model, testset, testset.c)
#res = predict(model, testset[1:50, ], testset.c[1:50, ])
Expand Down
14 changes: 7 additions & 7 deletions test/test_simca.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ve = calset[calset[, 5] == 'versicolor', 1:4]
vi = calset[calset[, 5] == 'virginica', 1:4]

test.data = tstset[, 1:4]
test.c = c(matrix(1, 1, 25), matrix(2, 1, 25), matrix(3, 1, 25))
test.c = tstset[, 5]


# Select which model to calculate
Expand All @@ -17,19 +17,19 @@ option = 2
if (option == 1)
{
# Setosa model with cross-validation
model = simca(se, 'Se', ncomp = 4, alpha = 0.01, cv = 1)
model = simca(se, 'setosa', ncomp = 4, alpha = 0.01, cv = 1)
model = selectCompNum(model, 1)
pred = predict(model, test.data, test.c == 1)
pred = predict(model, test.data, test.c)
} else if (option == 2) {
# Virginica model with test set validation (no CV)
model = simca(vi, 'Vi', x.test = test.data[test.c == 3, ])
model = simca(vi, 'virginica', x.test = test.data[test.c == 'virginica', ])
model = selectCompNum(model, 3)
pred = predict(model, test.data, test.c == 3)
pred = predict(model, test.data, test.c)
} else {
# Versicolor model with 5 segments CV and test set
model = simca(ve, 'Ve', ncomp = 4, cv = 5, x.test = test.data, c.test = test.c == 2)
model = simca(ve, 'versicolor', ncomp = 4, cv = 5, x.test = test.data, c.test = test.c == 'versicolor')
model = selectCompNum(model, 3)
pred = predict(model, test.data, test.c == 2)
pred = predict(model, test.data, test.c)
}

cat('1. Show print and summary')
Expand Down
14 changes: 7 additions & 7 deletions test/test_simcam.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,30 @@ vi = calset[calset[, 5] == 'virginica', 1:4]

# calibration data with reference
cal.data = calset[, 1:4]
cal.c = c(matrix('Se', 1, 25), matrix('Ve', 1, 25), matrix('Vi', 1, 25))
cal.c = calset[, 5]

# test data with reference
test.data = tstset[, 1:4]
test.c = c(matrix(1, 1, 25), matrix(2, 1, 25), matrix(3, 1, 25))
test.c = tstset[, 5]

# test data with reference for two classes
test2c.data = tstset2c[, 1:4]
test2c.c = c(matrix(1, 1, 25), matrix(2, 1, 25))
test2c.c = tstset2c[, 5]

# test data with reference for with unknown classes
test2cu.data = tstset2c[, 1:4]
test2cu.c = c(matrix('Se', 1, 25), matrix('None', 1, 10), matrix('Ar', 1, 15))
test2cu.c = c(matrix('setosa', 1, 25), matrix('none', 1, 10), matrix('Ar', 1, 15))

# make individual models
semodel = simca(se, 'Se', ncomp = 4, alpha = 0.01, cv = 1)
semodel = simca(se, 'setosa', ncomp = 4, alpha = 0.01, cv = 1)
semodel = selectCompNum(semodel, 1)

# make individual models
vimodel = simca(vi, 'Vi')
vimodel = simca(vi, 'virginica')
vimodel = selectCompNum(vimodel, 3)

# make individual models
vemodel = simca(ve, 'Ve', ncomp = 4, cv = 5, test.data = test.data, test.c = test.c == 2)
vemodel = simca(ve, 'versicolor', ncomp = 4, cv = 5, x.test = test.data, c.test = test.c)
vemodel = selectCompNum(vemodel, 3)

# make group models
Expand Down

0 comments on commit d54e8ba

Please sign in to comment.