From 019874bb6f90f22c25c95e7d45a946e6ac8e50ad Mon Sep 17 00:00:00 2001 From: svonallmen <68689173+svonallmen@users.noreply.github.com> Date: Thu, 23 Jul 2020 12:49:18 +0200 Subject: [PATCH 1/6] Update simcam.R --- R/simcam.R | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/R/simcam.R b/R/simcam.R index 843f52c..af7153a 100755 --- a/R/simcam.R +++ b/R/simcam.R @@ -324,7 +324,7 @@ simcam.getPerformanceStats <- function(models, classnames) { # calculate variance for the residuals s11 <- colSums(e11^2) / (nrow(d1) - m1$ncomp.selected - 1) - s22 <- colSums(e12^2) / (nrow(d2) - m2$ncomp.selected - 1) + s22 <- colSums(e22^2) / (nrow(d2) - m2$ncomp.selected - 1) s12 <- colSums(e12^2) / (nrow(d1)) s21 <- colSums(e21^2) / (nrow(d2)) @@ -386,18 +386,20 @@ simcam.getPerformanceStats <- function(models, classnames) { #' using calibration sets X1 and X2 with number of rows n1 and n2. #' Then we do the following: #' -#' 1. Project X2 to model m1 and compute residuals, E12 -#' 2. Compute variance of the residuals as s12 = sum(E12^2) / n1 -#' 3. Project X1 to model m2 and compute residuals, E21 -#' 4. Compute variance of the residuals as s21 = sum(E21^2) / n2 -#' 5. Compute variance of residuals for m1 as s1 = sum(E1^2) / (n1 - A1 - 1) -#' 6. Compute variance of residuals for m2 as s2 = sum(E2^2) / (n2 - A2 - 1) +#' \enumerate{ +#' \item Project X2 to model m1 and compute residuals, E12 +#' \item Compute variance of the residuals as s12 = sum(E12^2) / n1 +#' \item Project X1 to model m2 and compute residuals, E21 +#' \item Compute variance of the residuals as s21 = sum(E21^2) / n2 +#' \item Compute variance of residuals for m1 as s1 = sum(E1^2) / (n1 - A1 - 1) +#' \item Compute variance of residuals for m2 as s2 = sum(E2^2) / (n2 - A2 - 1) +#' } #' #' The model distance then can be computed as: d = sqrt((s12 + s21) / (s1 + s2)) #' #' As one can see, if the two models and corresponding calibration sets are identical, then the #' distance will be sqrt((n - A - 1) / n). For example, if n = 25 and A = 2, then the distance -#' between the model and itself is sqrt(25/22) = sqrt(0.88) = 0.938. This case is demonstrated +#' between the model and itself is sqrt(22/25) = sqrt(0.88) = 0.938. This case is demonstrated #' in the example section. #' #' In general, if distance between models is below one classes are overlapping. If it is above 3 From 30380834844435e112b7fb1bfe6fbdadf4ca7378 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 22 Oct 2020 13:09:47 +0200 Subject: [PATCH 2/6] fixed an issue in plotPerformance cuased warnings in CRAN tests --- R/classres.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/classres.R b/R/classres.R index 9c7d31f..064382a 100755 --- a/R/classres.R +++ b/R/classres.R @@ -534,10 +534,8 @@ plotPerformance.classres <- function(obj, nc = 1, type = "b", # prepare plot data plot_data <- do.call(rbind, lapply(obj[param], function(x) x[nc, , drop = FALSE])) - attr(plot_data, "name") <- sprintf( - if (length(param) == 1) capitalize(param) else "Classification performance (%s)", - obj$classnames[[nc]] - ) + attr(plot_data, "name") <- if (length(param) == 1) capitalize(param) else sprintf( + "Classification performance (%s)", obj$classnames[[nc]]) attr(plot_data, "xaxis.name") <- "Components" rownames(plot_data) <- param From e4c4f721b44fd7fd3f157ae78c5d1343e40f4df5 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 22 Oct 2020 13:10:10 +0200 Subject: [PATCH 3/6] improvements to help text --- man/plotModelDistance.simcam.Rd | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/man/plotModelDistance.simcam.Rd b/man/plotModelDistance.simcam.Rd index 1c11412..cec9d26 100644 --- a/man/plotModelDistance.simcam.Rd +++ b/man/plotModelDistance.simcam.Rd @@ -45,18 +45,20 @@ have optimal number of components A1 and A2. The models have been calibrated using calibration sets X1 and X2 with number of rows n1 and n2. Then we do the following: -1. Project X2 to model m1 and compute residuals, E12 -2. Compute variance of the residuals as s12 = sum(E12^2) / n1 -3. Project X1 to model m2 and compute residuals, E21 -4. Compute variance of the residuals as s21 = sum(E21^2) / n2 -5. Compute variance of residuals for m1 as s1 = sum(E1^2) / (n1 - A1 - 1) -6. Compute variance of residuals for m2 as s2 = sum(E2^2) / (n2 - A2 - 1) +\enumerate{ +\item Project X2 to model m1 and compute residuals, E12 +\item Compute variance of the residuals as s12 = sum(E12^2) / n1 +\item Project X1 to model m2 and compute residuals, E21 +\item Compute variance of the residuals as s21 = sum(E21^2) / n2 +\item Compute variance of residuals for m1 as s1 = sum(E1^2) / (n1 - A1 - 1) +\item Compute variance of residuals for m2 as s2 = sum(E2^2) / (n2 - A2 - 1) +} The model distance then can be computed as: d = sqrt((s12 + s21) / (s1 + s2)) As one can see, if the two models and corresponding calibration sets are identical, then the distance will be sqrt((n - A - 1) / n). For example, if n = 25 and A = 2, then the distance -between the model and itself is sqrt(25/22) = sqrt(0.88) = 0.938. This case is demonstrated +between the model and itself is sqrt(22/25) = sqrt(0.88) = 0.938. This case is demonstrated in the example section. In general, if distance between models is below one classes are overlapping. If it is above 3 From 3c240c410edb26b8943ec6a42a23d90dc5406f75 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 22 Oct 2020 13:10:26 +0200 Subject: [PATCH 4/6] small improvements to text --- NEWS.md | 9 ++++++++- README.md | 10 +++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index b22307a..175db33 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +v.0.11.2 +======== + +* fixed an issue, which lead to a bug in `simcam.getPerformanceStats`, returning implausible and asymmetrical results (thanks to @svonallmen). + +* fixed a small issue sometimes giving warning when running tests on CRAN (did not influence the user experience though). + v.0.11.1 ======== @@ -66,7 +73,7 @@ Finally, all model results (calibration, cross-validation and test set validatio into a single list, `model$res`. This makes a lot of things easier. However, the old way of accessing the result objects (e.g. `model$calres` or `model$cvres`) still works, you can access e.g. calibration results both using `model$res$cal` and `model$calres`, so this change will not break the compatibility. -Below is more detailed list of changes. The [tutorial](http://mdatools.com/docs/) has been updated accordingly. +Below is more detailed list of changes. The [tutorial](https://mdatools.com/docs/) has been updated accordingly. ## Breaking changes diff --git a/README.md b/README.md index 40c8d92..0598e52 100755 --- a/README.md +++ b/README.md @@ -7,9 +7,9 @@ Multivariate Data Analysis Tools -*mdatools* is an R package for preprocessing, exploring and analysis of multivariate data. The package provides methods mostly common for [Chemometrics](http://en.wikipedia.org/wiki/Chemometrics). It was created for an introductory PhD course on Chemometrics given at Section of Chemical Engineering, Aalborg University. The general idea of the package is to collect most widespread chemometric methods and give a similar "user interface" (or rather API) for using them. So if a user knows how to make a model and visualize results for one method, he or she can easily do this for the others. +*mdatools* is an R package for preprocessing, exploring and analysis of multivariate data. The package provides methods mostly common for [Chemometrics](https://en.wikipedia.org/wiki/Chemometrics). It was created for an introductory PhD course on Chemometrics given at Section of Chemical Engineering, Aalborg University. The general idea of the package is to collect most widespread chemometric methods and give a similar "user interface" (or rather API) for using them. So if a user knows how to make a model and visualize results for one method, he or she can easily do this for the others. -For more details and examples read a [Bookdown tutorial](http://mdatools.com/docs/). The project website, [mdatools.com](https://mdatools.com), contains additional information about supplementary materials and tools. +For more details and examples read a [Bookdown tutorial](https://mdatools.com/docs/). The project website, [mdatools.com](https://mdatools.com), contains additional information about supplementary materials and tools. If you want to cite the package, please use the following: Sergey Kucheryavskiy, *mdatools – R package for chemometrics*, Chemometrics and Intelligent Laboratory Systems, Volume 198, 2020 (DOI: [10.1016/j.chemolab.2020.103937](https://doi.org/10.1016/j.chemolab.2020.103937)). @@ -17,16 +17,16 @@ If you want to cite the package, please use the following: Sergey Kucheryavskiy, What is new ----------- -Latest release (0.11.1) is available from GitHub. It will be available on CRAN soon (release is expected for *25.07.2020*). You can see the full list of changes [here](NEWS.md). The Bookdown tutorial has been also updated and contains the description of new methods added in the last release. +Latest release (0.11.2) is available both from GitHub and CRAN. You can see the full list of changes [here](NEWS.md). The Bookdown tutorial has been also updated and contains the description of new methods added in the last release. How to install -------------- -The package is available from CRAN by usual installing procedure. However, due to restrictions in CRAN politics regarding number of submissions (one in 3-4 month), mostly major releases will be published there (with 2-3 weeks delay after GitHub release as more thorough testing is needed). You can [download](https://github.com/svkucheryavski/mdatools/releases) a zip-file with source package and install it using the `install.packages` command, e.g. if the downloaded file is `mdatools_0.11.1.tar.gz` and it is located in a current working directory, just run the following: +The package is available from CRAN by usual installing procedure. However, due to restrictions in CRAN politics regarding number of submissions (one in 3-4 month), mostly major releases will be published there (with 2-3 weeks delay after GitHub release as more thorough testing is needed). You can [download](https://github.com/svkucheryavski/mdatools/releases) a zip-file with source package and install it using the `install.packages` command, e.g. if the downloaded file is `mdatools_0.11.2.tar.gz` and it is located in a current working directory, just run the following: ``` -install.packages("mdatools_0.11.1.tar.gz") +install.packages("mdatools_0.11.2.tar.gz") ``` If you have `devtools` package installed, the following command will install the current developer version from the master branch of GitHub repository (do not forget to load the `devtools` package first): From 84a86cb39db7eb73e6d1d3e09090a164dd654db3 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 22 Oct 2020 13:10:40 +0200 Subject: [PATCH 5/6] changed version number --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5c63f59..f7fc4fb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mdatools Title: Multivariate Data Analysis for Chemometrics -Version: 0.11.1 -Date: 2020-07-23 +Version: 0.11.2 +Date: 2020-10-22 Author: Sergey Kucheryavskiy Maintainer: Sergey Kucheryavskiy Description: Projection based methods for preprocessing, From b85369c5c35bafc0792c5bcae35f2760a7d6c669 Mon Sep 17 00:00:00 2001 From: Sergey Kucheryavskiy Date: Thu, 22 Oct 2020 17:27:07 +0200 Subject: [PATCH 6/6] added reference to the description --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index f7fc4fb..69a9f0d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -6,6 +6,7 @@ Author: Sergey Kucheryavskiy Maintainer: Sergey Kucheryavskiy Description: Projection based methods for preprocessing, exploring and analysis of multivariate data used in chemometrics. + S. Kucheryavskiy (2020) . Encoding: UTF-8 License: MIT + file LICENSE Imports: methods, graphics, grDevices, stats, Matrix