From e59ded611b3c1c57e69a8f4d7286427c663929d8 Mon Sep 17 00:00:00 2001 From: Larisa Yanceva Date: Thu, 2 May 2019 20:11:10 +0300 Subject: [PATCH 1/9] Added documentation for windsorize() --- R/windsorize.R | 11 ++++++++++- man/windsorize.Rd | 16 +++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/R/windsorize.R b/R/windsorize.R index b4e15e6..c3fc6e3 100644 --- a/R/windsorize.R +++ b/R/windsorize.R @@ -1,6 +1,15 @@ #' Windsorize #' -#' Do some windsorization. +#' set all outliers to a specified percentile of the data; +#' a 90% winsorization would see all data below the 5th percentile set +#' to the 5th percentile, +#' and data above the 95th percentile set to the 95th percentile. +#' +#' @param x A vector. +#' @param p A quantile. +#' @return dataset with trimmed outliers with 10% percentile +#' @examples +#' windsorize(c(92, 19, 101, 58, 101, 91, 26, 78, 10, 13, −5, 101, 86, −5)) #' @export windsorize <- function(x, p = .90) { q <- quantile(x, p) diff --git a/man/windsorize.Rd b/man/windsorize.Rd index 832c3cb..b060543 100644 --- a/man/windsorize.Rd +++ b/man/windsorize.Rd @@ -6,6 +6,20 @@ \usage{ windsorize(x, p = 0.9) } +\arguments{ +\item{x}{A vector.} + +\item{p}{A quantile.} +} +\value{ +dataset with trimmed outliers with 10% percentile +} \description{ -Do some windsorization. +set all outliers to a specified percentile of the data; +a 90% winsorization would see all data below the 5th percentile set +to the 5th percentile, +and data above the 95th percentile set to the 95th percentile. +} +\examples{ +windsorize(c(92, 19, 101, 58, 101, 91, 26, 78, 10, 13, −5, 101, 86, −5)) } From 4c1393d5879a736994de2d656b4836c1ba52dd6c Mon Sep 17 00:00:00 2001 From: Larisa Yanceva Date: Thu, 2 May 2019 20:59:35 +0300 Subject: [PATCH 2/9] Added arguments' checks into windsorize() Fix usage example of windorize() --- R/windsorize.R | 6 +++++- man/windsorize.Rd | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/R/windsorize.R b/R/windsorize.R index c3fc6e3..fe0b4f7 100644 --- a/R/windsorize.R +++ b/R/windsorize.R @@ -9,9 +9,13 @@ #' @param p A quantile. #' @return dataset with trimmed outliers with 10% percentile #' @examples -#' windsorize(c(92, 19, 101, 58, 101, 91, 26, 78, 10, 13, −5, 101, 86, −5)) +#' windsorize(c(3,4,4,3,4,5,1)) #' @export windsorize <- function(x, p = .90) { + if (length(x) == 0) stop("argument should not be a empty vector") + if (is.na(x)) { + stop("argument should not be a vector containing NA") + } q <- quantile(x, p) x[x >= q] <- q x diff --git a/man/windsorize.Rd b/man/windsorize.Rd index b060543..2a0ff5d 100644 --- a/man/windsorize.Rd +++ b/man/windsorize.Rd @@ -21,5 +21,5 @@ to the 5th percentile, and data above the 95th percentile set to the 95th percentile. } \examples{ -windsorize(c(92, 19, 101, 58, 101, 91, 26, 78, 10, 13, −5, 101, 86, −5)) +windsorize(c(3,4,4,3,4,5,1)) } From 6dcb8afd900eefde1f798f328c21f2206339659c Mon Sep 17 00:00:00 2001 From: Larisa Yanceva Date: Thu, 2 May 2019 21:09:49 +0300 Subject: [PATCH 3/9] Fixed is.na() for all(is.na()) --- R/windsorize.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/windsorize.R b/R/windsorize.R index fe0b4f7..0182bdd 100644 --- a/R/windsorize.R +++ b/R/windsorize.R @@ -13,8 +13,8 @@ #' @export windsorize <- function(x, p = .90) { if (length(x) == 0) stop("argument should not be a empty vector") - if (is.na(x)) { - stop("argument should not be a vector containing NA") + if (all(is.na(x))) { + stop("argument should not be a vector containing only NA") } q <- quantile(x, p) x[x >= q] <- q From c2ec4977fd38488defdd6a7a7e1d37a5c45722cb Mon Sep 17 00:00:00 2001 From: Larisa Yanceva Date: Thu, 2 May 2019 21:21:29 +0300 Subject: [PATCH 4/9] Fixed windorize logic --- R/windsorize.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/windsorize.R b/R/windsorize.R index 0182bdd..54c0c1b 100644 --- a/R/windsorize.R +++ b/R/windsorize.R @@ -16,8 +16,13 @@ windsorize <- function(x, p = .90) { if (all(is.na(x))) { stop("argument should not be a vector containing only NA") } - q <- quantile(x, p) - x[x >= q] <- q + if (p < 0 || p > 1) { + stop("p invalid percentale. Expected values from 0 to 1") + } + q_lower <- quantile(x, (1-p)/2) + q_upper <- quantile(x, 1 - (1-p)/2) + x[x <= q_lower] <- q_lower + x[x >= q_upper] <- q_upper x } From b447bafb24eb3b010a8822287a958bc4e37427da Mon Sep 17 00:00:00 2001 From: Larisa Yanceva Date: Thu, 2 May 2019 22:11:10 +0300 Subject: [PATCH 5/9] Added additional checks for arguments --- R/windsorize.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/windsorize.R b/R/windsorize.R index 54c0c1b..de18782 100644 --- a/R/windsorize.R +++ b/R/windsorize.R @@ -16,6 +16,8 @@ windsorize <- function(x, p = .90) { if (all(is.na(x))) { stop("argument should not be a vector containing only NA") } + if (!is.numeric(x)) stop("argument should be a numeric vector") + if (!is.numeric(p)) stop("argument should be a number from 0 to 1") if (p < 0 || p > 1) { stop("p invalid percentale. Expected values from 0 to 1") } From 8260705f72a8e3b5d2e20ad344bd31822ace92d5 Mon Sep 17 00:00:00 2001 From: Larisa Yanceva Date: Thu, 2 May 2019 22:23:20 +0300 Subject: [PATCH 6/9] Add tests for windsorize() --- tests/testthat.R | 4 ++++ tests/testthat/test_windorize.R | 17 +++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 tests/testthat.R create mode 100644 tests/testthat/test_windorize.R diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..8adbfff --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(datacleaner) + +test_check("datacleaner") diff --git a/tests/testthat/test_windorize.R b/tests/testthat/test_windorize.R new file mode 100644 index 0000000..5e0beb4 --- /dev/null +++ b/tests/testthat/test_windorize.R @@ -0,0 +1,17 @@ +context("Windsorize") +library(datacleaner) +test_that("windorizing is correct", { + expect_equal(windsorize(c(2,2,2,2,3),.9), c(2,2,2,2,2.8) ) + expect_equal(windsorize(c(2,2,2,2,1),.9), c(2,2,2,2,1.2) ) +}) + +test_that("unexpected parameters", { + expect_error(windsorize(c(NA,NA,NA), .9), "argument should not be a vector containing only NA") + expect_error(windsorize(c(), .9), "argument should not be a empty vector") + expect_error(windsorize(c(1,2,3,"4"), .9), "argument should be a numeric vector") + expect_error(windsorize(c(1,2,3,4), ".9"), "argument should be a number from 0 to 1") + expect_error(windsorize(c(1,2,3,4), -1), "p invalid percentale. Expected values from 0 to 1") + expect_error(windsorize(c(1,2,3,4), 1.2), "p invalid percentale. Expected values from 0 to 1") + expect_error(windsorize(c(1,2,3,NA), .9)) + +}) From e95213f6ad0c2ed646e8149a8428e65d8622afa7 Mon Sep 17 00:00:00 2001 From: Larisa Yanceva Date: Thu, 2 May 2019 23:22:24 +0300 Subject: [PATCH 7/9] Add tranform_log() together with tests --- R/transform_log.R | 19 +++++++++++++++++++ man/transform_log.Rd | 24 ++++++++++++++++++++++++ tests/testthat/test_tranform_log.R | 11 +++++++++++ 3 files changed, 54 insertions(+) create mode 100644 R/transform_log.R create mode 100644 man/transform_log.Rd create mode 100644 tests/testthat/test_tranform_log.R diff --git a/R/transform_log.R b/R/transform_log.R new file mode 100644 index 0000000..004f883 --- /dev/null +++ b/R/transform_log.R @@ -0,0 +1,19 @@ +#' transform_log +#' Transform numerical values into their log values +#' @param x A vector +#' @return logarithm of x +#' @export +#' @examples +#' transform_log(1) +#' transform_log(c(1, 2, 3, 4, 5)) +#' +transform_log <- function(x) { + if (!is.numeric(x)) { + warning("transform_log: Expecting numeric argument") + } + x_badval <- is.na(suppressWarnings(as.numeric(x))) + x[x_badval] <- 1 + y <- log(as.numeric(x)) + y[x_badval] <- NA + y +} \ No newline at end of file diff --git a/man/transform_log.Rd b/man/transform_log.Rd new file mode 100644 index 0000000..c5b287c --- /dev/null +++ b/man/transform_log.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transform_log.R +\name{transform_log} +\alias{transform_log} +\title{transform_log +Transform numerical values into their log values} +\usage{ +transform_log(x) +} +\arguments{ +\item{x}{A vector} +} +\value{ +logarithm of x +} +\description{ +transform_log +Transform numerical values into their log values +} +\examples{ +transform_log(1) +transform_log(c(1, 2, 3, 4, 5)) + +} diff --git a/tests/testthat/test_tranform_log.R b/tests/testthat/test_tranform_log.R new file mode 100644 index 0000000..70bae1c --- /dev/null +++ b/tests/testthat/test_tranform_log.R @@ -0,0 +1,11 @@ +context("tranform_log") +library(datacleaner) +test_that("tranform_log is correct", { + expect_equal(as.character(transform_log(c(2, 2, 2, 2, 3))), as.character(c(0.693147180559945, 0.693147180559945, 0.693147180559945, 0.693147180559945, 1.09861228866811))) + expect_equal(as.character(transform_log(c(2, 3, NA))), as.character(c(0.693147180559945, 1.09861228866811, NA))) +}) + +test_that("unexpected parameters", { + expect_warning(t <- transform_log(c(2, 3, "NA")), "transform_log: Expecting numeric argument") + expect_equal(as.character(t), as.character(c(0.693147180559945, 1.09861228866811, NA))) +}) \ No newline at end of file From 049c52df57ec9b60763c265edbb5854a682a21e0 Mon Sep 17 00:00:00 2001 From: Larisa Yanceva Date: Fri, 3 May 2019 21:49:52 +0300 Subject: [PATCH 8/9] Add parament for meanimpute() --- R/meanimpute.R | 1 + man/meanimpute.Rd | 3 +++ 2 files changed, 4 insertions(+) diff --git a/R/meanimpute.R b/R/meanimpute.R index cc7cf5e..f7f79e5 100644 --- a/R/meanimpute.R +++ b/R/meanimpute.R @@ -1,4 +1,5 @@ #' Meanimputation +#' @param x A vector #' @export meanimpute <- function(x) { x[is.na(x)] <- mean(x, na.rm = TRUE) diff --git a/man/meanimpute.Rd b/man/meanimpute.Rd index 8139e8f..77f2fbc 100644 --- a/man/meanimpute.Rd +++ b/man/meanimpute.Rd @@ -6,6 +6,9 @@ \usage{ meanimpute(x) } +\arguments{ +\item{x}{A vector} +} \description{ Meanimputation } From 5c2ce83b438d1e269ee6d83489f093fd5da35cf1 Mon Sep 17 00:00:00 2001 From: Larisa Yanceva Date: Fri, 3 May 2019 22:08:38 +0300 Subject: [PATCH 9/9] Import quantile() for windsorize() --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index d75f824..c1a7cea 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1 +1,2 @@ exportPattern("^[[:alpha:]]+") +importFrom("stats", "quantile")