Currency printing, backports, new release

msberends · Dec 2, 2020 · 63f6287 · 63f6287
1 parent b13ec45
commit 63f6287
Show file tree

Hide file tree

Showing 30 changed files with 653 additions and 573 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: cleaner
 Title: Fast and Easy Data Cleaning
-Version: 1.5.9000
-Date: 2020-10-19
+Version: 1.5.1
+Date: 2020-12-02
 Authors@R: 
   person(
     given = c("Matthijs", "S."),

diff --git a/NEWS.md b/NEWS.md
@@ -1,12 +1,21 @@
-# cleaner 1.5.9xxx
+# cleaner 1.5.1
 
 * New function `format_p_value()` to format raw p values according to the APA guideline
+* `clean_Date()` now works with POSIX standards:
+  ```r
+  clean_Date("2020-11-12 12:24:12")
+  clean_Date(c("2020-11-12 12:24:12", "2020-11-13"), guess_each = TRUE)
+  ```
+* Currency now prints and formats without symbols as default, use `as_symbol = TRUE` to print/format with currency symbols
+* Support for older versions of R (v3.2)
 
 # cleaner 1.5.0
 
 * New function `format_names()` to quickly and easily change names of `data.frame` columns, `list`s or `character` vectors.
   ```r
+  df <- data.frame(old.name = "test1", value = "test2")
   format_names(df, snake_case = TRUE)
+  format_names(df, camelCase = TRUE)
   format_names(df, c(old.name = "new_name", value = "measurement"))
 
   library(dplyr)

diff --git a/R/clean.R b/R/clean.R
@@ -70,7 +70,7 @@
 #'   \item{\code{clean_POSIXct()}: classes \code{POSIXct/POSIXt}}
 #' }
 #' @export
-#' @source \href{http://www.bis.org/publ/rpfx16fx.pdf}{Triennial Central Bank Survey Foreign exchange turnover in April 2016} (PDF). Bank for International Settlements. 11 December 2016. p. 10.
+#' @source \href{https://www.bis.org/publ/rpfx16fx.pdf}{Triennial Central Bank Survey Foreign exchange turnover in April 2016} (PDF). Bank for International Settlements. 11 December 2016. p. 10.
 #' @examples 
 #' clean_logical(c("Yes", "No"))   # English
 #' clean_logical(c("Oui", "Non"))  # French
@@ -355,7 +355,7 @@ clean_Date <- function(x, format = NULL, guess_each = FALSE, max_date = Sys.Date
     }
   } else {
     if (guess_each == FALSE) {
-      final_result <- guess_Date(x = x, throw_note = TRUE)
+      final_result <- guess_Date(x = x, throw_note = TRUE, guess_each = guess_each)
     } else {
       if (length(format) > 1) {
         # checking date according to set vector of format options
@@ -426,7 +426,7 @@ clean_POSIXct <- function(x, tz = "", remove = "[^.0-9 :/-]", fixed = FALSE, max
   as.POSIXct(x)
 }
 
-guess_Date <- function(x, throw_note = TRUE, format_options = NULL) {
+guess_Date <- function(x, throw_note = TRUE, format_options = NULL, guess_each = guess_each) {
   msg_clean_as <- function(format_set, sep = " ") {
     if (throw_note == TRUE) {
       if (tolower(format_set) == "excel") {
@@ -444,6 +444,12 @@ guess_Date <- function(x, throw_note = TRUE, format_options = NULL) {
     return(as.Date(x_numeric, origin = "1899-12-30"))
   }
 
+  # check for POSIX (yyyy-mm-dd HH:MM:SS)
+  if (all(grepl("^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{1,2}:[0-9]{2}:[0-9]{2}$", x))) {
+    msg_clean_as("yyyy-mm-dd HH:MM:SS", sep = " ")
+    return(as.Date(as.POSIXct(x)))
+  }
+
   # replace any non-number/separators ("-", ".", etc.) with space
   separator <- ifelse(grepl("[0-9]-", x) & !grepl("[0-9]-$", x), "-",
                       ifelse(grepl("[0-9][.]", x) & !grepl("[0-9][.]$", x), ".",
@@ -545,6 +551,7 @@ guess_Date <- function(x, throw_note = TRUE, format_options = NULL) {
   if (!is.null(new_format)) {
     return(as.Date(as.character(x), format = new_format))
   }
-  warning("Date/time format could not be determined automatically, returning NAs", call. = FALSE)
+  warning(ifelse(guess_each == FALSE, "Try guess_each = TRUE to guess the format for each value.\n", ""),
+          "Date/time format could not be determined automatically, returning NAs", call. = FALSE)
   as.Date(rep(NA, length(x)))
 }
diff --git a/R/currency.R b/R/currency.R
@@ -39,7 +39,7 @@
 #' 
 #' format(money, currency_symbol = "USD")
 #' format(money, currency_symbol = "EUR", decimal.mark = ",")
-#' format(money, currency_symbol = "EUR", as_symbol = FALSE)
+#' format(money, currency_symbol = "EUR", as_symbol = TRUE)
 #' 
 #' as.currency(2.5e+04)
 as.currency <- function(x, currency_symbol = Sys.localeconv()["int_curr_symbol"], ...) {
@@ -101,8 +101,15 @@ txt2symb <- function(txt) {
          "EUR" = "\u20ac",
          "JPY" = "\u00a5",
          "GBP" = "\u00a3",
-         "CNY" = "\u5143",
-         "KRW" = "\u20a9", 
+         txt)
+}
+
+symb2txt <- function(txt) {
+  switch(txt,
+         "\u0024" = "USD",
+         "\u20ac" = "EUR",
+         "\u00a5" = "JPY",
+         "\u00a3" = "GBP",
          txt)
 }
 
@@ -112,7 +119,7 @@ txt2symb <- function(txt) {
 print.currency <- function(x, 
                            decimal.mark = getOption("OutDec"),
                            big.mark = ifelse(decimal.mark == ",", ".", ","),
-                           as_symbol = TRUE,
+                           as_symbol = FALSE,
                            ...) {
   currency_symbol <- toupper(trimws(attributes(x)$currency_symbol))
   if (isTRUE(as_symbol)) {
@@ -137,7 +144,7 @@ format.currency <- function(x,
                             currency_symbol = attributes(x)$currency_symbol,
                             decimal.mark = getOption("OutDec"),
                             big.mark = ifelse(decimal.mark == ",", ".", ","),
-                            as_symbol = TRUE,
+                            as_symbol = FALSE,
                             ...) {
   currency_symbol <- toupper(trimws(currency_symbol))
   if (isTRUE(as_symbol)) {
@@ -193,7 +200,7 @@ median.currency <- function(x, ...) {
 #' @method summary currency
 #' @export
 summary.currency <- function(object, ...) {
-  c("Class" = paste0("currency", txt2symb(trimws(attributes(object)$currency_symbol))),
+  c("Class" = paste0("currency", symb2txt(trimws(attributes(object)$currency_symbol))),
     "<NA>" = length(object[is.na(object)]),
     "Min." = format(min(object)),
     "Mean" = format(mean(object)),
@@ -203,13 +210,13 @@ summary.currency <- function(object, ...) {
 #' @importFrom vctrs vec_ptype_abbr
 #' @export
 vec_ptype_abbr.currency <- function(x, ...) {
-  paste0("crncy/", txt2symb(trimws(attributes(x)$currency_symbol)))
+  paste0("crncy/", symb2txt(trimws(attributes(x)$currency_symbol)))
 }
 
 #' @importFrom vctrs vec_ptype_full
 #' @export
 vec_ptype_full.currency <- function(x, ...) {
-  paste0("currency/", txt2symb(trimws(attributes(x)$currency_symbol)))
+  paste0("currency/", symb2txt(trimws(attributes(x)$currency_symbol)))
 }
 
 #' @importFrom pillar pillar_shaft

diff --git a/R/format_names.R b/R/format_names.R
@@ -20,7 +20,7 @@
 #' Format names and values
 #' 
 #' This function can be used on any \code{data.frame}, \code{list} or character vector to format their names or values. It supports \href{https://en.wikipedia.org/wiki/Snake_case}{snake case} and \href{https://en.wikipedia.org/wiki/Camel_case}{camel case}. 
-#' @param x a \code{data.frame}, \code{list} or character vector
+#' @param x a \code{data.frame}, \code{list} or \code{character} vector
 #' @param ... when \code{x} is a \code{data.frame}: new column names to set, which can be named (in the form \code{old = "new"}). The original column names do not need to be quoted, see Examples.
 #' @param snake_case logical to indicate whether the column names must be in \href{https://en.wikipedia.org/wiki/Snake_case}{snake case}. This will have no effect on manually set column names.
 #' @param camelCase logical to indicate whether the column names must be in \href{https://en.wikipedia.org/wiki/Camel_case}{camel case}. This will have no effect on manually set column names.

diff --git a/README.md b/README.md
@@ -2,13 +2,11 @@
 
 # `cleaner`: Fast and Easy Data Cleaning
 
-**(Previously called `clean`, but renamed to `cleaner` upon CRAN request)**
-
 **Website of this package: https://msberends.github.io/cleaner**
 
 [![CRAN_Badge](https://www.r-pkg.org/badges/version/cleaner)](https://CRAN.R-project.org/package=cleaner)
 
-The R package for **cleaning and checking data columns** in a fast and easy way. Relying on very few dependencies, it provides **smart guessing**, but with user options to override anything if needed.
+The small R package for **cleaning and checking data columns** in a fast and easy way. Relying on very few dependencies, it provides **smart guessing**, but with user options to override anything if needed.
 
 It also provides two new data types that are not available in base R: `currency` and `percentage`.
 
@@ -26,7 +24,7 @@ Contents:
 ----
 
 ## Why this package
-As a data scientist, I'm often served with data that is not clean, not tidy and consquently not ready for analysis at all. For tidying data, there's of course the `tidyverse` (https://www.tidyverse.org), which lets you manipulate data in any way you can think of. But for *cleaning*, I think our community was still lacking a neat solution that makes data cleaning fast and easy with functions that kind of 'think on their own' to do that.
+As a data scientist, I'm often served with data that is not clean, not tidy and consequently not ready for analysis at all. For tidying data, there's of course the `tidyverse` (https://www.tidyverse.org), which lets you manipulate data in any way you can think of. But for *cleaning*, I think our community was still lacking a neat solution that makes data cleaning fast and easy with functions that kind of 'think on their own' to do that.
 
 If the CRAN button at the top of this page is green, install the package with:
 
@@ -36,8 +34,8 @@ install.packages("cleaner")
 
 Otherwise, or if you are looking for the latest stable development version, install the package with:
 ```r
-install.packages("devtools") # if you haven't already
-devtools::install_github("msberends/cleaner")
+install.packages("remotes") # if you haven't already
+remotes::install_github("msberends/cleaner")
 ```
 
 ## How it works
@@ -213,26 +211,29 @@ Use `clean()` to clean data. It guesses what kind of data class would best fit y
 
   format(sum(received), 
          currency_symbol = "€", decimal.mark = ",")
-  #> [1] "€ 56,40"
+  #> [1] "EUR 56,40"
   ```
 
   This new class also comes with support for printing in `tibble`s, used by the [`tidyverse`](https://www.tidyverse.org):
 
   ```r
-  library(tibble)
-  tibble(money = clean_currency(c("Jack sent £ 25", "Bill sent £ 31.40")))
-  #> # A tibble: 2 x 1
-  #>         money
-  #>   <crncy/GBP>
-  #> 1       25.00
-  #> 2       31.40
+  library(dplyr)
+  tibble(money = c("Jack sent £ 25", "Bill sent £ 31.40")) %>% 
+    mutate(mutate_cleaner = clean_currency(money))
+  #> # A tibble: 2 x 2
+  #>   money               mutate_cleaner
+  #>   <chr>                  <crncy/GBP>
+  #> 1 Jack sent £ 25               25.00
+  #> 2 Bill sent £ 31.40            31.40
   ```
 
 #### Other cleaning
 
 * Use `format_names()` to quickly and easily change names of `data.frame` columns, `list`s or `character` vectors.
   ```r
+  df <- data.frame(old.name = "test1", value = "test2")
   format_names(df, snake_case = TRUE)
+  format_names(df, camelCase = TRUE)
   format_names(df, c(old.name = "new_name", value = "measurement"))
 
   library(dplyr)
@@ -272,38 +273,51 @@ Use `clean()` to clean data. It guesses what kind of data class would best fit y
     summarise(n = n())
   ```
 
-### Checking
+* Use the function `format_p_value()` to format p values according to the international APA guideline. It tries to round to two decimals, but has a exception for values that would round to `alpha` (defaults to 0.05):
 
-Any idea why in R `as.numeric()` and `is.numeric()` and `as.Date()` exist, but `is.Date()` doesn't? Me neither, but now it does. And you probably know `runif()` to create random numeric values. Now `rdate()` exists as well, for generating random dates.
+   ```r
+   format_p_value(c(0.345678, 0.123))
+   #> [1] "0.35" "0.12"
+
+   # a value of 0.0499 must not be "0.05", but is not "0.049" either,
+   # so the function will add as many decimals as needed:
+   format_p_value(0.04993) 
+   #> [1] "0.0499"
+   ```
+
+### Checking
 
-The easiest and most comprehensive way to check the data of a column/variable is to create frequency tables. Use `freq()` to do this. It supports a lot of different classes (types of data) and is even extendible by other packages.
+The easiest and most comprehensive way to check the data of a column/variable is to create frequency tables. Use `freq()` to do this. It supports a lot of different classes (types of data) and is even extendible by other packages. In markdown documents (like this README file), it formats as real markdown.
 
 ```r
 freq(unclean$gender)
-#> Frequency table 
-#> 
-#> Class:      character
-#> Length:     500
-#> Available:  500 (100%, NA: 0 = 0%)
-#> Unique:     5
-#> 
-#> Shortest:   1
-#> Longest:    6
-#> 
-#>      Item      Count   Percent   Cum. Count   Cum. Percent
-#> ---  -------  ------  --------  -----------  -------------
-#> 1    male        240     48.0%          240          48.0%
-#> 2    female      220     44.0%          460          92.0%
-#> 3    man          22      4.4%          482          96.4%
-#> 4    m            15      3.0%          497          99.4%
-#> 5    F             3      0.6%          500         100.0%
 ```
 
-Clean it and check again:
+**Frequency table**   
+
+Class:      character  
+Length:     500  
+Available:  500 (100%, NA: 0 = 0%)  
+Unique:     5  
+
+Shortest:   1  
+Longest:    6
+
+
+|   |Item   | Count| Percent| Cum. Count| Cum. Percent|
+|:--|:------|-----:|-------:|----------:|------------:|
+|1  |male   |   240|   48.0%|        240|        48.0%|
+|2  |female |   220|   44.0%|        460|        92.0%|
+|3  |man    |    22|    4.4%|        482|        96.4%|
+|4  |m      |    15|    3.0%|        497|        99.4%|
+|5  |F      |     3|    0.6%|        500|       100.0%|
+
+Clean it and check again (using `markdown = FALSE` to show how it would look in the R console):
 
 ```r
 freq(clean_factor(unclean$gender, 
-                  levels = c("^m" = "Male", "^f" = "Female")))
+                  levels = c("^m" = "Male", "^f" = "Female")),
+     markdown = FALSE)
 #> Frequency table 
 #> 
 #> Class:      factor (numeric)

diff --git a/docs/404.html b/docs/404.html
diff --git a/docs/authors.html b/docs/authors.html